polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 )),
341 }
342 }
343}
344
345/// Trait that each concrete SQL dialect must implement.
346///
347/// `DialectImpl` provides the configuration hooks and per-expression transform logic
348/// that distinguish one dialect from another. Implementors supply:
349///
350/// - A [`DialectType`] identifier.
351/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
352/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
353/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
354/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
355/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
356///
357/// The default implementations are no-ops, so a minimal dialect only needs to provide
358/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
359/// standard SQL.
360pub trait DialectImpl {
361 /// Returns the [`DialectType`] that identifies this dialect.
362 fn dialect_type(&self) -> DialectType;
363
364 /// Returns the tokenizer configuration for this dialect.
365 ///
366 /// Override to customize identifier quoting characters, string escape rules,
367 /// comment styles, and other lexing behavior.
368 fn tokenizer_config(&self) -> TokenizerConfig {
369 TokenizerConfig::default()
370 }
371
372 /// Returns the generator configuration for this dialect.
373 ///
374 /// Override to customize identifier quoting style, function name casing,
375 /// keyword casing, and other SQL generation behavior.
376 fn generator_config(&self) -> GeneratorConfig {
377 GeneratorConfig::default()
378 }
379
380 /// Returns a generator configuration tailored to a specific expression.
381 ///
382 /// Override this for hybrid dialects like Athena that route to different SQL engines
383 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
384 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
385 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
386 self.generator_config()
387 }
388
389 /// Transforms a single expression node for this dialect, without recursing into children.
390 ///
391 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
392 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
393 /// typically include function renaming, operator substitution, and type mapping.
394 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
395 Ok(expr)
396 }
397
398 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
399 ///
400 /// Override this to apply structural rewrites that must see the entire tree at once,
401 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
402 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
403 fn preprocess(&self, expr: Expression) -> Result<Expression> {
404 Ok(expr)
405 }
406}
407
408/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
409/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
410///
411/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
412/// and then nested element/field types are recursed into. This ensures that dialect-level
413/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
414fn transform_data_type_recursive<F>(
415 dt: crate::expressions::DataType,
416 transform_fn: &F,
417) -> Result<crate::expressions::DataType>
418where
419 F: Fn(Expression) -> Result<Expression>,
420{
421 use crate::expressions::DataType;
422 // First, transform the outermost type through the expression system
423 let dt_expr = transform_fn(Expression::DataType(dt))?;
424 let dt = match dt_expr {
425 Expression::DataType(d) => d,
426 _ => {
427 return Ok(match dt_expr {
428 _ => DataType::Custom {
429 name: "UNKNOWN".to_string(),
430 },
431 })
432 }
433 };
434 // Then recurse into nested types
435 match dt {
436 DataType::Array {
437 element_type,
438 dimension,
439 } => {
440 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
441 Ok(DataType::Array {
442 element_type: Box::new(inner),
443 dimension,
444 })
445 }
446 DataType::List { element_type } => {
447 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
448 Ok(DataType::List {
449 element_type: Box::new(inner),
450 })
451 }
452 DataType::Struct { fields, nested } => {
453 let mut new_fields = Vec::new();
454 for mut field in fields {
455 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
456 new_fields.push(field);
457 }
458 Ok(DataType::Struct {
459 fields: new_fields,
460 nested,
461 })
462 }
463 DataType::Map {
464 key_type,
465 value_type,
466 } => {
467 let k = transform_data_type_recursive(*key_type, transform_fn)?;
468 let v = transform_data_type_recursive(*value_type, transform_fn)?;
469 Ok(DataType::Map {
470 key_type: Box::new(k),
471 value_type: Box::new(v),
472 })
473 }
474 other => Ok(other),
475 }
476}
477
478/// Convert DuckDB C-style format strings to Presto C-style format strings.
479/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
480#[cfg(feature = "transpile")]
481fn duckdb_to_presto_format(fmt: &str) -> String {
482 // Order matters: handle longer patterns first to avoid partial replacements
483 let mut result = fmt.to_string();
484 // First pass: mark multi-char patterns with placeholders
485 result = result.replace("%-m", "\x01NOPADM\x01");
486 result = result.replace("%-d", "\x01NOPADD\x01");
487 result = result.replace("%-I", "\x01NOPADI\x01");
488 result = result.replace("%-H", "\x01NOPADH\x01");
489 result = result.replace("%H:%M:%S", "\x01HMS\x01");
490 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
491 // Now convert individual specifiers
492 result = result.replace("%M", "%i");
493 result = result.replace("%S", "%s");
494 // Restore multi-char patterns with Presto equivalents
495 result = result.replace("\x01NOPADM\x01", "%c");
496 result = result.replace("\x01NOPADD\x01", "%e");
497 result = result.replace("\x01NOPADI\x01", "%l");
498 result = result.replace("\x01NOPADH\x01", "%k");
499 result = result.replace("\x01HMS\x01", "%T");
500 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
501 result
502}
503
504/// Convert DuckDB C-style format strings to BigQuery format strings.
505/// BigQuery uses a mix of strftime-like directives.
506#[cfg(feature = "transpile")]
507fn duckdb_to_bigquery_format(fmt: &str) -> String {
508 let mut result = fmt.to_string();
509 // Handle longer patterns first
510 result = result.replace("%-d", "%e");
511 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
512 result = result.replace("%Y-%m-%d", "%F");
513 result = result.replace("%H:%M:%S", "%T");
514 result
515}
516
517/// Applies a transform function bottom-up through an entire expression tree.
518///
519/// This is the core tree-rewriting engine used by the dialect system. It performs
520/// a post-order (children-first) traversal: for each node, all children are recursively
521/// transformed before the node itself is passed to `transform_fn`. This bottom-up
522/// strategy means that when `transform_fn` sees a node, its children have already
523/// been rewritten, which simplifies pattern matching on sub-expressions.
524///
525/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
526/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
527/// function calls, CASE expressions, date/time functions, and more.
528///
529/// # Arguments
530///
531/// * `expr` - The root expression to transform (consumed).
532/// * `transform_fn` - A closure that receives each expression node (after its children
533/// have been transformed) and returns a possibly-rewritten expression.
534///
535/// # Errors
536///
537/// Returns an error if `transform_fn` returns an error for any node.
538pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
539where
540 F: Fn(Expression) -> Result<Expression>,
541{
542 use crate::expressions::BinaryOp;
543
544 // Helper macro to transform binary ops with Box<BinaryOp>
545 macro_rules! transform_binary {
546 ($variant:ident, $op:expr) => {{
547 let left = transform_recursive($op.left, transform_fn)?;
548 let right = transform_recursive($op.right, transform_fn)?;
549 Expression::$variant(Box::new(BinaryOp {
550 left,
551 right,
552 left_comments: $op.left_comments,
553 operator_comments: $op.operator_comments,
554 trailing_comments: $op.trailing_comments,
555 }))
556 }};
557 }
558
559 // First recursively transform children, then apply the transform function
560 let expr = match expr {
561 Expression::Select(mut select) => {
562 select.expressions = select
563 .expressions
564 .into_iter()
565 .map(|e| transform_recursive(e, transform_fn))
566 .collect::<Result<Vec<_>>>()?;
567
568 // Transform FROM clause
569 if let Some(mut from) = select.from.take() {
570 from.expressions = from
571 .expressions
572 .into_iter()
573 .map(|e| transform_recursive(e, transform_fn))
574 .collect::<Result<Vec<_>>>()?;
575 select.from = Some(from);
576 }
577
578 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
579 select.joins = select
580 .joins
581 .into_iter()
582 .map(|mut join| {
583 join.this = transform_recursive(join.this, transform_fn)?;
584 if let Some(on) = join.on.take() {
585 join.on = Some(transform_recursive(on, transform_fn)?);
586 }
587 // Wrap join in Expression::Join to allow transform_fn to transform it
588 match transform_fn(Expression::Join(Box::new(join)))? {
589 Expression::Join(j) => Ok(*j),
590 _ => Err(crate::error::Error::parse(
591 "Join transformation returned non-join expression",
592 0,
593 0,
594 )),
595 }
596 })
597 .collect::<Result<Vec<_>>>()?;
598
599 // Transform LATERAL VIEW expressions (Hive/Spark)
600 select.lateral_views = select
601 .lateral_views
602 .into_iter()
603 .map(|mut lv| {
604 lv.this = transform_recursive(lv.this, transform_fn)?;
605 Ok(lv)
606 })
607 .collect::<Result<Vec<_>>>()?;
608
609 // Transform WHERE clause
610 if let Some(mut where_clause) = select.where_clause.take() {
611 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
612 select.where_clause = Some(where_clause);
613 }
614
615 // Transform GROUP BY
616 if let Some(mut group_by) = select.group_by.take() {
617 group_by.expressions = group_by
618 .expressions
619 .into_iter()
620 .map(|e| transform_recursive(e, transform_fn))
621 .collect::<Result<Vec<_>>>()?;
622 select.group_by = Some(group_by);
623 }
624
625 // Transform HAVING
626 if let Some(mut having) = select.having.take() {
627 having.this = transform_recursive(having.this, transform_fn)?;
628 select.having = Some(having);
629 }
630
631 // Transform WITH (CTEs)
632 if let Some(mut with) = select.with.take() {
633 with.ctes = with
634 .ctes
635 .into_iter()
636 .map(|mut cte| {
637 let original = cte.this.clone();
638 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
639 cte
640 })
641 .collect();
642 select.with = Some(with);
643 }
644
645 // Transform ORDER BY
646 if let Some(mut order) = select.order_by.take() {
647 order.expressions = order
648 .expressions
649 .into_iter()
650 .map(|o| {
651 let mut o = o;
652 let original = o.this.clone();
653 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
654 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
655 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
656 Ok(Expression::Ordered(transformed)) => *transformed,
657 Ok(_) | Err(_) => o,
658 }
659 })
660 .collect();
661 select.order_by = Some(order);
662 }
663
664 // Transform WINDOW clause order_by
665 if let Some(ref mut windows) = select.windows {
666 for nw in windows.iter_mut() {
667 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
668 .into_iter()
669 .map(|o| {
670 let mut o = o;
671 let original = o.this.clone();
672 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
673 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
674 Ok(Expression::Ordered(transformed)) => *transformed,
675 Ok(_) | Err(_) => o,
676 }
677 })
678 .collect();
679 }
680 }
681
682 // Transform QUALIFY
683 if let Some(mut qual) = select.qualify.take() {
684 qual.this = transform_recursive(qual.this, transform_fn)?;
685 select.qualify = Some(qual);
686 }
687
688 Expression::Select(select)
689 }
690 Expression::Function(mut f) => {
691 f.args = f
692 .args
693 .into_iter()
694 .map(|e| transform_recursive(e, transform_fn))
695 .collect::<Result<Vec<_>>>()?;
696 Expression::Function(f)
697 }
698 Expression::AggregateFunction(mut f) => {
699 f.args = f
700 .args
701 .into_iter()
702 .map(|e| transform_recursive(e, transform_fn))
703 .collect::<Result<Vec<_>>>()?;
704 if let Some(filter) = f.filter {
705 f.filter = Some(transform_recursive(filter, transform_fn)?);
706 }
707 Expression::AggregateFunction(f)
708 }
709 Expression::WindowFunction(mut wf) => {
710 wf.this = transform_recursive(wf.this, transform_fn)?;
711 wf.over.partition_by = wf
712 .over
713 .partition_by
714 .into_iter()
715 .map(|e| transform_recursive(e, transform_fn))
716 .collect::<Result<Vec<_>>>()?;
717 // Transform order_by items through Expression::Ordered wrapper
718 wf.over.order_by = wf
719 .over
720 .order_by
721 .into_iter()
722 .map(|o| {
723 let mut o = o;
724 o.this = transform_recursive(o.this, transform_fn)?;
725 match transform_fn(Expression::Ordered(Box::new(o)))? {
726 Expression::Ordered(transformed) => Ok(*transformed),
727 _ => Err(crate::error::Error::parse(
728 "Ordered transformation returned non-Ordered expression",
729 0,
730 0,
731 )),
732 }
733 })
734 .collect::<Result<Vec<_>>>()?;
735 Expression::WindowFunction(wf)
736 }
737 Expression::Alias(mut a) => {
738 a.this = transform_recursive(a.this, transform_fn)?;
739 Expression::Alias(a)
740 }
741 Expression::Cast(mut c) => {
742 c.this = transform_recursive(c.this, transform_fn)?;
743 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
744 c.to = transform_data_type_recursive(c.to, transform_fn)?;
745 Expression::Cast(c)
746 }
747 Expression::And(op) => transform_binary!(And, *op),
748 Expression::Or(op) => transform_binary!(Or, *op),
749 Expression::Add(op) => transform_binary!(Add, *op),
750 Expression::Sub(op) => transform_binary!(Sub, *op),
751 Expression::Mul(op) => transform_binary!(Mul, *op),
752 Expression::Div(op) => transform_binary!(Div, *op),
753 Expression::Eq(op) => transform_binary!(Eq, *op),
754 Expression::Lt(op) => transform_binary!(Lt, *op),
755 Expression::Gt(op) => transform_binary!(Gt, *op),
756 Expression::Paren(mut p) => {
757 p.this = transform_recursive(p.this, transform_fn)?;
758 Expression::Paren(p)
759 }
760 Expression::Coalesce(mut f) => {
761 f.expressions = f
762 .expressions
763 .into_iter()
764 .map(|e| transform_recursive(e, transform_fn))
765 .collect::<Result<Vec<_>>>()?;
766 Expression::Coalesce(f)
767 }
768 Expression::IfNull(mut f) => {
769 f.this = transform_recursive(f.this, transform_fn)?;
770 f.expression = transform_recursive(f.expression, transform_fn)?;
771 Expression::IfNull(f)
772 }
773 Expression::Nvl(mut f) => {
774 f.this = transform_recursive(f.this, transform_fn)?;
775 f.expression = transform_recursive(f.expression, transform_fn)?;
776 Expression::Nvl(f)
777 }
778 Expression::In(mut i) => {
779 i.this = transform_recursive(i.this, transform_fn)?;
780 i.expressions = i
781 .expressions
782 .into_iter()
783 .map(|e| transform_recursive(e, transform_fn))
784 .collect::<Result<Vec<_>>>()?;
785 if let Some(query) = i.query {
786 i.query = Some(transform_recursive(query, transform_fn)?);
787 }
788 Expression::In(i)
789 }
790 Expression::Not(mut n) => {
791 n.this = transform_recursive(n.this, transform_fn)?;
792 Expression::Not(n)
793 }
794 Expression::ArraySlice(mut s) => {
795 s.this = transform_recursive(s.this, transform_fn)?;
796 if let Some(start) = s.start {
797 s.start = Some(transform_recursive(start, transform_fn)?);
798 }
799 if let Some(end) = s.end {
800 s.end = Some(transform_recursive(end, transform_fn)?);
801 }
802 Expression::ArraySlice(s)
803 }
804 Expression::Subscript(mut s) => {
805 s.this = transform_recursive(s.this, transform_fn)?;
806 s.index = transform_recursive(s.index, transform_fn)?;
807 Expression::Subscript(s)
808 }
809 Expression::Array(mut a) => {
810 a.expressions = a
811 .expressions
812 .into_iter()
813 .map(|e| transform_recursive(e, transform_fn))
814 .collect::<Result<Vec<_>>>()?;
815 Expression::Array(a)
816 }
817 Expression::Struct(mut s) => {
818 let mut new_fields = Vec::new();
819 for (name, expr) in s.fields {
820 let transformed = transform_recursive(expr, transform_fn)?;
821 new_fields.push((name, transformed));
822 }
823 s.fields = new_fields;
824 Expression::Struct(s)
825 }
826 Expression::NamedArgument(mut na) => {
827 na.value = transform_recursive(na.value, transform_fn)?;
828 Expression::NamedArgument(na)
829 }
830 Expression::MapFunc(mut m) => {
831 m.keys = m
832 .keys
833 .into_iter()
834 .map(|e| transform_recursive(e, transform_fn))
835 .collect::<Result<Vec<_>>>()?;
836 m.values = m
837 .values
838 .into_iter()
839 .map(|e| transform_recursive(e, transform_fn))
840 .collect::<Result<Vec<_>>>()?;
841 Expression::MapFunc(m)
842 }
843 Expression::ArrayFunc(mut a) => {
844 a.expressions = a
845 .expressions
846 .into_iter()
847 .map(|e| transform_recursive(e, transform_fn))
848 .collect::<Result<Vec<_>>>()?;
849 Expression::ArrayFunc(a)
850 }
851 Expression::Lambda(mut l) => {
852 l.body = transform_recursive(l.body, transform_fn)?;
853 Expression::Lambda(l)
854 }
855 Expression::JsonExtract(mut f) => {
856 f.this = transform_recursive(f.this, transform_fn)?;
857 f.path = transform_recursive(f.path, transform_fn)?;
858 Expression::JsonExtract(f)
859 }
860 Expression::JsonExtractScalar(mut f) => {
861 f.this = transform_recursive(f.this, transform_fn)?;
862 f.path = transform_recursive(f.path, transform_fn)?;
863 Expression::JsonExtractScalar(f)
864 }
865
866 // ===== UnaryFunc-based expressions =====
867 // These all have a single `this: Expression` child
868 Expression::Length(mut f) => {
869 f.this = transform_recursive(f.this, transform_fn)?;
870 Expression::Length(f)
871 }
872 Expression::Upper(mut f) => {
873 f.this = transform_recursive(f.this, transform_fn)?;
874 Expression::Upper(f)
875 }
876 Expression::Lower(mut f) => {
877 f.this = transform_recursive(f.this, transform_fn)?;
878 Expression::Lower(f)
879 }
880 Expression::LTrim(mut f) => {
881 f.this = transform_recursive(f.this, transform_fn)?;
882 Expression::LTrim(f)
883 }
884 Expression::RTrim(mut f) => {
885 f.this = transform_recursive(f.this, transform_fn)?;
886 Expression::RTrim(f)
887 }
888 Expression::Reverse(mut f) => {
889 f.this = transform_recursive(f.this, transform_fn)?;
890 Expression::Reverse(f)
891 }
892 Expression::Abs(mut f) => {
893 f.this = transform_recursive(f.this, transform_fn)?;
894 Expression::Abs(f)
895 }
896 Expression::Ceil(mut f) => {
897 f.this = transform_recursive(f.this, transform_fn)?;
898 Expression::Ceil(f)
899 }
900 Expression::Floor(mut f) => {
901 f.this = transform_recursive(f.this, transform_fn)?;
902 Expression::Floor(f)
903 }
904 Expression::Sign(mut f) => {
905 f.this = transform_recursive(f.this, transform_fn)?;
906 Expression::Sign(f)
907 }
908 Expression::Sqrt(mut f) => {
909 f.this = transform_recursive(f.this, transform_fn)?;
910 Expression::Sqrt(f)
911 }
912 Expression::Cbrt(mut f) => {
913 f.this = transform_recursive(f.this, transform_fn)?;
914 Expression::Cbrt(f)
915 }
916 Expression::Ln(mut f) => {
917 f.this = transform_recursive(f.this, transform_fn)?;
918 Expression::Ln(f)
919 }
920 Expression::Log(mut f) => {
921 f.this = transform_recursive(f.this, transform_fn)?;
922 if let Some(base) = f.base {
923 f.base = Some(transform_recursive(base, transform_fn)?);
924 }
925 Expression::Log(f)
926 }
927 Expression::Exp(mut f) => {
928 f.this = transform_recursive(f.this, transform_fn)?;
929 Expression::Exp(f)
930 }
931 Expression::Date(mut f) => {
932 f.this = transform_recursive(f.this, transform_fn)?;
933 Expression::Date(f)
934 }
935 Expression::Stddev(mut f) => {
936 f.this = transform_recursive(f.this, transform_fn)?;
937 Expression::Stddev(f)
938 }
939 Expression::Variance(mut f) => {
940 f.this = transform_recursive(f.this, transform_fn)?;
941 Expression::Variance(f)
942 }
943
944 // ===== BinaryFunc-based expressions =====
945 Expression::ModFunc(mut f) => {
946 f.this = transform_recursive(f.this, transform_fn)?;
947 f.expression = transform_recursive(f.expression, transform_fn)?;
948 Expression::ModFunc(f)
949 }
950 Expression::Power(mut f) => {
951 f.this = transform_recursive(f.this, transform_fn)?;
952 f.expression = transform_recursive(f.expression, transform_fn)?;
953 Expression::Power(f)
954 }
955 Expression::MapFromArrays(mut f) => {
956 f.this = transform_recursive(f.this, transform_fn)?;
957 f.expression = transform_recursive(f.expression, transform_fn)?;
958 Expression::MapFromArrays(f)
959 }
960 Expression::ElementAt(mut f) => {
961 f.this = transform_recursive(f.this, transform_fn)?;
962 f.expression = transform_recursive(f.expression, transform_fn)?;
963 Expression::ElementAt(f)
964 }
965 Expression::MapContainsKey(mut f) => {
966 f.this = transform_recursive(f.this, transform_fn)?;
967 f.expression = transform_recursive(f.expression, transform_fn)?;
968 Expression::MapContainsKey(f)
969 }
970 Expression::Left(mut f) => {
971 f.this = transform_recursive(f.this, transform_fn)?;
972 f.length = transform_recursive(f.length, transform_fn)?;
973 Expression::Left(f)
974 }
975 Expression::Right(mut f) => {
976 f.this = transform_recursive(f.this, transform_fn)?;
977 f.length = transform_recursive(f.length, transform_fn)?;
978 Expression::Right(f)
979 }
980 Expression::Repeat(mut f) => {
981 f.this = transform_recursive(f.this, transform_fn)?;
982 f.times = transform_recursive(f.times, transform_fn)?;
983 Expression::Repeat(f)
984 }
985
986 // ===== Complex function expressions =====
987 Expression::Substring(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.start = transform_recursive(f.start, transform_fn)?;
990 if let Some(len) = f.length {
991 f.length = Some(transform_recursive(len, transform_fn)?);
992 }
993 Expression::Substring(f)
994 }
995 Expression::Replace(mut f) => {
996 f.this = transform_recursive(f.this, transform_fn)?;
997 f.old = transform_recursive(f.old, transform_fn)?;
998 f.new = transform_recursive(f.new, transform_fn)?;
999 Expression::Replace(f)
1000 }
1001 Expression::ConcatWs(mut f) => {
1002 f.separator = transform_recursive(f.separator, transform_fn)?;
1003 f.expressions = f
1004 .expressions
1005 .into_iter()
1006 .map(|e| transform_recursive(e, transform_fn))
1007 .collect::<Result<Vec<_>>>()?;
1008 Expression::ConcatWs(f)
1009 }
1010 Expression::Trim(mut f) => {
1011 f.this = transform_recursive(f.this, transform_fn)?;
1012 if let Some(chars) = f.characters {
1013 f.characters = Some(transform_recursive(chars, transform_fn)?);
1014 }
1015 Expression::Trim(f)
1016 }
1017 Expression::Split(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1020 Expression::Split(f)
1021 }
1022 Expression::Lpad(mut f) => {
1023 f.this = transform_recursive(f.this, transform_fn)?;
1024 f.length = transform_recursive(f.length, transform_fn)?;
1025 if let Some(fill) = f.fill {
1026 f.fill = Some(transform_recursive(fill, transform_fn)?);
1027 }
1028 Expression::Lpad(f)
1029 }
1030 Expression::Rpad(mut f) => {
1031 f.this = transform_recursive(f.this, transform_fn)?;
1032 f.length = transform_recursive(f.length, transform_fn)?;
1033 if let Some(fill) = f.fill {
1034 f.fill = Some(transform_recursive(fill, transform_fn)?);
1035 }
1036 Expression::Rpad(f)
1037 }
1038
1039 // ===== Conditional expressions =====
1040 Expression::Case(mut c) => {
1041 if let Some(operand) = c.operand {
1042 c.operand = Some(transform_recursive(operand, transform_fn)?);
1043 }
1044 c.whens = c
1045 .whens
1046 .into_iter()
1047 .map(|(cond, then)| {
1048 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1049 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1050 (new_cond, new_then)
1051 })
1052 .collect();
1053 if let Some(else_expr) = c.else_ {
1054 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1055 }
1056 Expression::Case(c)
1057 }
1058 Expression::IfFunc(mut f) => {
1059 f.condition = transform_recursive(f.condition, transform_fn)?;
1060 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1061 if let Some(false_val) = f.false_value {
1062 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1063 }
1064 Expression::IfFunc(f)
1065 }
1066
1067 // ===== Date/Time expressions =====
1068 Expression::DateAdd(mut f) => {
1069 f.this = transform_recursive(f.this, transform_fn)?;
1070 f.interval = transform_recursive(f.interval, transform_fn)?;
1071 Expression::DateAdd(f)
1072 }
1073 Expression::DateSub(mut f) => {
1074 f.this = transform_recursive(f.this, transform_fn)?;
1075 f.interval = transform_recursive(f.interval, transform_fn)?;
1076 Expression::DateSub(f)
1077 }
1078 Expression::DateDiff(mut f) => {
1079 f.this = transform_recursive(f.this, transform_fn)?;
1080 f.expression = transform_recursive(f.expression, transform_fn)?;
1081 Expression::DateDiff(f)
1082 }
1083 Expression::DateTrunc(mut f) => {
1084 f.this = transform_recursive(f.this, transform_fn)?;
1085 Expression::DateTrunc(f)
1086 }
1087 Expression::Extract(mut f) => {
1088 f.this = transform_recursive(f.this, transform_fn)?;
1089 Expression::Extract(f)
1090 }
1091
1092 // ===== JSON expressions =====
1093 Expression::JsonObject(mut f) => {
1094 f.pairs = f
1095 .pairs
1096 .into_iter()
1097 .map(|(k, v)| {
1098 let new_k = transform_recursive(k, transform_fn)?;
1099 let new_v = transform_recursive(v, transform_fn)?;
1100 Ok((new_k, new_v))
1101 })
1102 .collect::<Result<Vec<_>>>()?;
1103 Expression::JsonObject(f)
1104 }
1105
1106 // ===== Subquery expressions =====
1107 Expression::Subquery(mut s) => {
1108 s.this = transform_recursive(s.this, transform_fn)?;
1109 Expression::Subquery(s)
1110 }
1111 Expression::Exists(mut e) => {
1112 e.this = transform_recursive(e.this, transform_fn)?;
1113 Expression::Exists(e)
1114 }
1115
1116 // ===== Set operations =====
1117 Expression::Union(mut u) => {
1118 u.left = transform_recursive(u.left, transform_fn)?;
1119 u.right = transform_recursive(u.right, transform_fn)?;
1120 Expression::Union(u)
1121 }
1122 Expression::Intersect(mut i) => {
1123 i.left = transform_recursive(i.left, transform_fn)?;
1124 i.right = transform_recursive(i.right, transform_fn)?;
1125 Expression::Intersect(i)
1126 }
1127 Expression::Except(mut e) => {
1128 e.left = transform_recursive(e.left, transform_fn)?;
1129 e.right = transform_recursive(e.right, transform_fn)?;
1130 Expression::Except(e)
1131 }
1132
1133 // ===== DML expressions =====
1134 Expression::Insert(mut ins) => {
1135 // Transform VALUES clause expressions
1136 let mut new_values = Vec::new();
1137 for row in ins.values {
1138 let mut new_row = Vec::new();
1139 for e in row {
1140 new_row.push(transform_recursive(e, transform_fn)?);
1141 }
1142 new_values.push(new_row);
1143 }
1144 ins.values = new_values;
1145
1146 // Transform query (for INSERT ... SELECT)
1147 if let Some(query) = ins.query {
1148 ins.query = Some(transform_recursive(query, transform_fn)?);
1149 }
1150
1151 // Transform RETURNING clause
1152 let mut new_returning = Vec::new();
1153 for e in ins.returning {
1154 new_returning.push(transform_recursive(e, transform_fn)?);
1155 }
1156 ins.returning = new_returning;
1157
1158 // Transform ON CONFLICT clause
1159 if let Some(on_conflict) = ins.on_conflict {
1160 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1161 }
1162
1163 Expression::Insert(ins)
1164 }
1165 Expression::Update(mut upd) => {
1166 upd.set = upd
1167 .set
1168 .into_iter()
1169 .map(|(id, val)| {
1170 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1171 (id, new_val)
1172 })
1173 .collect();
1174 if let Some(mut where_clause) = upd.where_clause.take() {
1175 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1176 upd.where_clause = Some(where_clause);
1177 }
1178 Expression::Update(upd)
1179 }
1180 Expression::Delete(mut del) => {
1181 if let Some(mut where_clause) = del.where_clause.take() {
1182 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1183 del.where_clause = Some(where_clause);
1184 }
1185 Expression::Delete(del)
1186 }
1187
1188 // ===== CTE expressions =====
1189 Expression::With(mut w) => {
1190 w.ctes = w
1191 .ctes
1192 .into_iter()
1193 .map(|mut cte| {
1194 let original = cte.this.clone();
1195 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1196 cte
1197 })
1198 .collect();
1199 Expression::With(w)
1200 }
1201 Expression::Cte(mut c) => {
1202 c.this = transform_recursive(c.this, transform_fn)?;
1203 Expression::Cte(c)
1204 }
1205
1206 // ===== Order expressions =====
1207 Expression::Ordered(mut o) => {
1208 o.this = transform_recursive(o.this, transform_fn)?;
1209 Expression::Ordered(o)
1210 }
1211
1212 // ===== Negation =====
1213 Expression::Neg(mut n) => {
1214 n.this = transform_recursive(n.this, transform_fn)?;
1215 Expression::Neg(n)
1216 }
1217
1218 // ===== Between =====
1219 Expression::Between(mut b) => {
1220 b.this = transform_recursive(b.this, transform_fn)?;
1221 b.low = transform_recursive(b.low, transform_fn)?;
1222 b.high = transform_recursive(b.high, transform_fn)?;
1223 Expression::Between(b)
1224 }
1225
1226 // ===== Like expressions =====
1227 Expression::Like(mut l) => {
1228 l.left = transform_recursive(l.left, transform_fn)?;
1229 l.right = transform_recursive(l.right, transform_fn)?;
1230 Expression::Like(l)
1231 }
1232 Expression::ILike(mut l) => {
1233 l.left = transform_recursive(l.left, transform_fn)?;
1234 l.right = transform_recursive(l.right, transform_fn)?;
1235 Expression::ILike(l)
1236 }
1237
1238 // ===== Additional binary ops not covered by macro =====
1239 Expression::Neq(op) => transform_binary!(Neq, *op),
1240 Expression::Lte(op) => transform_binary!(Lte, *op),
1241 Expression::Gte(op) => transform_binary!(Gte, *op),
1242 Expression::Mod(op) => transform_binary!(Mod, *op),
1243 Expression::Concat(op) => transform_binary!(Concat, *op),
1244 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1245 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1246 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1247 Expression::Is(op) => transform_binary!(Is, *op),
1248
1249 // ===== TryCast / SafeCast =====
1250 Expression::TryCast(mut c) => {
1251 c.this = transform_recursive(c.this, transform_fn)?;
1252 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1253 Expression::TryCast(c)
1254 }
1255 Expression::SafeCast(mut c) => {
1256 c.this = transform_recursive(c.this, transform_fn)?;
1257 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1258 Expression::SafeCast(c)
1259 }
1260
1261 // ===== Misc =====
1262 Expression::Unnest(mut f) => {
1263 f.this = transform_recursive(f.this, transform_fn)?;
1264 f.expressions = f
1265 .expressions
1266 .into_iter()
1267 .map(|e| transform_recursive(e, transform_fn))
1268 .collect::<Result<Vec<_>>>()?;
1269 Expression::Unnest(f)
1270 }
1271 Expression::Explode(mut f) => {
1272 f.this = transform_recursive(f.this, transform_fn)?;
1273 Expression::Explode(f)
1274 }
1275 Expression::GroupConcat(mut f) => {
1276 f.this = transform_recursive(f.this, transform_fn)?;
1277 Expression::GroupConcat(f)
1278 }
1279 Expression::StringAgg(mut f) => {
1280 f.this = transform_recursive(f.this, transform_fn)?;
1281 Expression::StringAgg(f)
1282 }
1283 Expression::ListAgg(mut f) => {
1284 f.this = transform_recursive(f.this, transform_fn)?;
1285 Expression::ListAgg(f)
1286 }
1287 Expression::ArrayAgg(mut f) => {
1288 f.this = transform_recursive(f.this, transform_fn)?;
1289 Expression::ArrayAgg(f)
1290 }
1291 Expression::ParseJson(mut f) => {
1292 f.this = transform_recursive(f.this, transform_fn)?;
1293 Expression::ParseJson(f)
1294 }
1295 Expression::ToJson(mut f) => {
1296 f.this = transform_recursive(f.this, transform_fn)?;
1297 Expression::ToJson(f)
1298 }
1299 Expression::JSONExtract(mut e) => {
1300 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1301 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1302 Expression::JSONExtract(e)
1303 }
1304 Expression::JSONExtractScalar(mut e) => {
1305 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1306 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1307 Expression::JSONExtractScalar(e)
1308 }
1309
1310 // StrToTime: recurse into this
1311 Expression::StrToTime(mut e) => {
1312 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1313 Expression::StrToTime(e)
1314 }
1315
1316 // UnixToTime: recurse into this
1317 Expression::UnixToTime(mut e) => {
1318 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1319 Expression::UnixToTime(e)
1320 }
1321
1322 // CreateTable: recurse into column defaults, on_update expressions, and data types
1323 Expression::CreateTable(mut ct) => {
1324 for col in &mut ct.columns {
1325 if let Some(default_expr) = col.default.take() {
1326 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1327 }
1328 if let Some(on_update_expr) = col.on_update.take() {
1329 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1330 }
1331 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1332 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1333 // contexts and may not produce correct results for DDL column definitions.
1334 // The DDL type mappings would need dedicated handling per source/target pair.
1335 }
1336 if let Some(as_select) = ct.as_select.take() {
1337 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1338 }
1339 Expression::CreateTable(ct)
1340 }
1341
1342 // CreateProcedure: recurse into body expressions
1343 Expression::CreateProcedure(mut cp) => {
1344 if let Some(body) = cp.body.take() {
1345 cp.body = Some(match body {
1346 FunctionBody::Expression(expr) => {
1347 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1348 }
1349 FunctionBody::Return(expr) => {
1350 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1351 }
1352 FunctionBody::Statements(stmts) => {
1353 let transformed_stmts = stmts
1354 .into_iter()
1355 .map(|s| transform_recursive(s, transform_fn))
1356 .collect::<Result<Vec<_>>>()?;
1357 FunctionBody::Statements(transformed_stmts)
1358 }
1359 other => other,
1360 });
1361 }
1362 Expression::CreateProcedure(cp)
1363 }
1364
1365 // CreateFunction: recurse into body expressions
1366 Expression::CreateFunction(mut cf) => {
1367 if let Some(body) = cf.body.take() {
1368 cf.body = Some(match body {
1369 FunctionBody::Expression(expr) => {
1370 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1371 }
1372 FunctionBody::Return(expr) => {
1373 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1374 }
1375 FunctionBody::Statements(stmts) => {
1376 let transformed_stmts = stmts
1377 .into_iter()
1378 .map(|s| transform_recursive(s, transform_fn))
1379 .collect::<Result<Vec<_>>>()?;
1380 FunctionBody::Statements(transformed_stmts)
1381 }
1382 other => other,
1383 });
1384 }
1385 Expression::CreateFunction(cf)
1386 }
1387
1388 // MemberOf: recurse into left and right operands
1389 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1390 // ArrayContainsAll (@>): recurse into left and right operands
1391 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1392 // ArrayContainedBy (<@): recurse into left and right operands
1393 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1394 // ArrayOverlaps (&&): recurse into left and right operands
1395 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1396 // TsMatch (@@): recurse into left and right operands
1397 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1398 // Adjacent (-|-): recurse into left and right operands
1399 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1400
1401 // Table: recurse into when (HistoricalData) and changes fields
1402 Expression::Table(mut t) => {
1403 if let Some(when) = t.when.take() {
1404 let transformed =
1405 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1406 if let Expression::HistoricalData(hd) = transformed {
1407 t.when = Some(hd);
1408 }
1409 }
1410 if let Some(changes) = t.changes.take() {
1411 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1412 if let Expression::Changes(c) = transformed {
1413 t.changes = Some(c);
1414 }
1415 }
1416 Expression::Table(t)
1417 }
1418
1419 // HistoricalData (Snowflake time travel): recurse into expression
1420 Expression::HistoricalData(mut hd) => {
1421 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1422 Expression::HistoricalData(hd)
1423 }
1424
1425 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1426 Expression::Changes(mut c) => {
1427 if let Some(at_before) = c.at_before.take() {
1428 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1429 }
1430 if let Some(end) = c.end.take() {
1431 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1432 }
1433 Expression::Changes(c)
1434 }
1435
1436 // TableArgument: TABLE(expr) or MODEL(expr)
1437 Expression::TableArgument(mut ta) => {
1438 ta.this = transform_recursive(ta.this, transform_fn)?;
1439 Expression::TableArgument(ta)
1440 }
1441
1442 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1443 Expression::JoinedTable(mut jt) => {
1444 jt.left = transform_recursive(jt.left, transform_fn)?;
1445 for join in &mut jt.joins {
1446 join.this = transform_recursive(
1447 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1448 transform_fn,
1449 )?;
1450 if let Some(on) = join.on.take() {
1451 join.on = Some(transform_recursive(on, transform_fn)?);
1452 }
1453 }
1454 jt.lateral_views = jt
1455 .lateral_views
1456 .into_iter()
1457 .map(|mut lv| {
1458 lv.this = transform_recursive(lv.this, transform_fn)?;
1459 Ok(lv)
1460 })
1461 .collect::<Result<Vec<_>>>()?;
1462 Expression::JoinedTable(jt)
1463 }
1464
1465 // Lateral: LATERAL func() - recurse into the function expression
1466 Expression::Lateral(mut lat) => {
1467 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1468 Expression::Lateral(lat)
1469 }
1470
1471 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1472 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1473 // as a unit together with the WithinGroup wrapper
1474 Expression::WithinGroup(mut wg) => {
1475 wg.order_by = wg
1476 .order_by
1477 .into_iter()
1478 .map(|mut o| {
1479 let original = o.this.clone();
1480 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1481 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1482 Ok(Expression::Ordered(transformed)) => *transformed,
1483 Ok(_) | Err(_) => o,
1484 }
1485 })
1486 .collect();
1487 Expression::WithinGroup(wg)
1488 }
1489
1490 // Filter: recurse into both the aggregate and the filter condition
1491 Expression::Filter(mut f) => {
1492 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1493 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1494 Expression::Filter(f)
1495 }
1496
1497 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1498 Expression::BitwiseOrAgg(mut f) => {
1499 f.this = transform_recursive(f.this, transform_fn)?;
1500 Expression::BitwiseOrAgg(f)
1501 }
1502 Expression::BitwiseAndAgg(mut f) => {
1503 f.this = transform_recursive(f.this, transform_fn)?;
1504 Expression::BitwiseAndAgg(f)
1505 }
1506 Expression::BitwiseXorAgg(mut f) => {
1507 f.this = transform_recursive(f.this, transform_fn)?;
1508 Expression::BitwiseXorAgg(f)
1509 }
1510 Expression::PipeOperator(mut pipe) => {
1511 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1512 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1513 Expression::PipeOperator(pipe)
1514 }
1515
1516 // Pass through leaf nodes unchanged
1517 other => other,
1518 };
1519
1520 // Then apply the transform function
1521 transform_fn(expr)
1522}
1523
1524/// Returns the tokenizer config, generator config, and expression transform closure
1525/// for a built-in dialect type. This is the shared implementation used by both
1526/// `Dialect::get()` and custom dialect construction.
1527fn configs_for_dialect_type(
1528 dt: DialectType,
1529) -> (
1530 TokenizerConfig,
1531 GeneratorConfig,
1532 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1533) {
1534 macro_rules! dialect_configs {
1535 ($dialect_struct:ident) => {{
1536 let d = $dialect_struct;
1537 (
1538 d.tokenizer_config(),
1539 d.generator_config(),
1540 Box::new(move |e| $dialect_struct.transform_expr(e)),
1541 )
1542 }};
1543 }
1544 match dt {
1545 #[cfg(feature = "dialect-postgresql")]
1546 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1547 #[cfg(feature = "dialect-mysql")]
1548 DialectType::MySQL => dialect_configs!(MySQLDialect),
1549 #[cfg(feature = "dialect-bigquery")]
1550 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1551 #[cfg(feature = "dialect-snowflake")]
1552 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1553 #[cfg(feature = "dialect-duckdb")]
1554 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1555 #[cfg(feature = "dialect-tsql")]
1556 DialectType::TSQL => dialect_configs!(TSQLDialect),
1557 #[cfg(feature = "dialect-oracle")]
1558 DialectType::Oracle => dialect_configs!(OracleDialect),
1559 #[cfg(feature = "dialect-hive")]
1560 DialectType::Hive => dialect_configs!(HiveDialect),
1561 #[cfg(feature = "dialect-spark")]
1562 DialectType::Spark => dialect_configs!(SparkDialect),
1563 #[cfg(feature = "dialect-sqlite")]
1564 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1565 #[cfg(feature = "dialect-presto")]
1566 DialectType::Presto => dialect_configs!(PrestoDialect),
1567 #[cfg(feature = "dialect-trino")]
1568 DialectType::Trino => dialect_configs!(TrinoDialect),
1569 #[cfg(feature = "dialect-redshift")]
1570 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1571 #[cfg(feature = "dialect-clickhouse")]
1572 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1573 #[cfg(feature = "dialect-databricks")]
1574 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1575 #[cfg(feature = "dialect-athena")]
1576 DialectType::Athena => dialect_configs!(AthenaDialect),
1577 #[cfg(feature = "dialect-teradata")]
1578 DialectType::Teradata => dialect_configs!(TeradataDialect),
1579 #[cfg(feature = "dialect-doris")]
1580 DialectType::Doris => dialect_configs!(DorisDialect),
1581 #[cfg(feature = "dialect-starrocks")]
1582 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1583 #[cfg(feature = "dialect-materialize")]
1584 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1585 #[cfg(feature = "dialect-risingwave")]
1586 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1587 #[cfg(feature = "dialect-singlestore")]
1588 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1589 #[cfg(feature = "dialect-cockroachdb")]
1590 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1591 #[cfg(feature = "dialect-tidb")]
1592 DialectType::TiDB => dialect_configs!(TiDBDialect),
1593 #[cfg(feature = "dialect-druid")]
1594 DialectType::Druid => dialect_configs!(DruidDialect),
1595 #[cfg(feature = "dialect-solr")]
1596 DialectType::Solr => dialect_configs!(SolrDialect),
1597 #[cfg(feature = "dialect-tableau")]
1598 DialectType::Tableau => dialect_configs!(TableauDialect),
1599 #[cfg(feature = "dialect-dune")]
1600 DialectType::Dune => dialect_configs!(DuneDialect),
1601 #[cfg(feature = "dialect-fabric")]
1602 DialectType::Fabric => dialect_configs!(FabricDialect),
1603 #[cfg(feature = "dialect-drill")]
1604 DialectType::Drill => dialect_configs!(DrillDialect),
1605 #[cfg(feature = "dialect-dremio")]
1606 DialectType::Dremio => dialect_configs!(DremioDialect),
1607 #[cfg(feature = "dialect-exasol")]
1608 DialectType::Exasol => dialect_configs!(ExasolDialect),
1609 #[cfg(feature = "dialect-datafusion")]
1610 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1611 _ => dialect_configs!(GenericDialect),
1612 }
1613}
1614
1615// ---------------------------------------------------------------------------
1616// Custom dialect registry
1617// ---------------------------------------------------------------------------
1618
1619static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1620 LazyLock::new(|| RwLock::new(HashMap::new()));
1621
1622struct CustomDialectConfig {
1623 name: String,
1624 base_dialect: DialectType,
1625 tokenizer_config: TokenizerConfig,
1626 generator_config: GeneratorConfig,
1627 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1628 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1629}
1630
1631/// Fluent builder for creating and registering custom SQL dialects.
1632///
1633/// A custom dialect is based on an existing built-in dialect and allows selective
1634/// overrides of tokenizer configuration, generator configuration, and expression
1635/// transforms.
1636///
1637/// # Example
1638///
1639/// ```rust,ignore
1640/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1641/// use polyglot_sql::generator::NormalizeFunctions;
1642///
1643/// CustomDialectBuilder::new("my_postgres")
1644/// .based_on(DialectType::PostgreSQL)
1645/// .generator_config_modifier(|gc| {
1646/// gc.normalize_functions = NormalizeFunctions::Lower;
1647/// })
1648/// .register()
1649/// .unwrap();
1650///
1651/// let d = Dialect::get_by_name("my_postgres").unwrap();
1652/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1653/// let sql = d.generate(&exprs[0]).unwrap();
1654/// assert_eq!(sql, "select count(*)");
1655///
1656/// polyglot_sql::unregister_custom_dialect("my_postgres");
1657/// ```
1658pub struct CustomDialectBuilder {
1659 name: String,
1660 base_dialect: DialectType,
1661 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1662 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1663 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1664 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1665}
1666
1667impl CustomDialectBuilder {
1668 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1669 pub fn new(name: impl Into<String>) -> Self {
1670 Self {
1671 name: name.into(),
1672 base_dialect: DialectType::Generic,
1673 tokenizer_modifier: None,
1674 generator_modifier: None,
1675 transform: None,
1676 preprocess: None,
1677 }
1678 }
1679
1680 /// Set the base built-in dialect to inherit configuration from.
1681 pub fn based_on(mut self, dialect: DialectType) -> Self {
1682 self.base_dialect = dialect;
1683 self
1684 }
1685
1686 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1687 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1688 where
1689 F: FnOnce(&mut TokenizerConfig) + 'static,
1690 {
1691 self.tokenizer_modifier = Some(Box::new(f));
1692 self
1693 }
1694
1695 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1696 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1697 where
1698 F: FnOnce(&mut GeneratorConfig) + 'static,
1699 {
1700 self.generator_modifier = Some(Box::new(f));
1701 self
1702 }
1703
1704 /// Set a custom per-node expression transform function.
1705 ///
1706 /// This replaces the base dialect's transform. It is called on every expression
1707 /// node during the recursive transform pass.
1708 pub fn transform_fn<F>(mut self, f: F) -> Self
1709 where
1710 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1711 {
1712 self.transform = Some(Arc::new(f));
1713 self
1714 }
1715
1716 /// Set a custom whole-tree preprocessing function.
1717 ///
1718 /// This replaces the base dialect's built-in preprocessing. It is called once
1719 /// on the entire expression tree before the recursive per-node transform.
1720 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1721 where
1722 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1723 {
1724 self.preprocess = Some(Arc::new(f));
1725 self
1726 }
1727
1728 /// Build the custom dialect configuration and register it in the global registry.
1729 ///
1730 /// Returns an error if:
1731 /// - The name collides with a built-in dialect name
1732 /// - A custom dialect with the same name is already registered
1733 pub fn register(self) -> Result<()> {
1734 // Reject names that collide with built-in dialects
1735 if DialectType::from_str(&self.name).is_ok() {
1736 return Err(crate::error::Error::parse(
1737 format!(
1738 "Cannot register custom dialect '{}': name collides with built-in dialect",
1739 self.name
1740 ),
1741 0,
1742 0,
1743 ));
1744 }
1745
1746 // Get base configs
1747 let (mut tok_config, mut gen_config, _base_transform) =
1748 configs_for_dialect_type(self.base_dialect);
1749
1750 // Apply modifiers
1751 if let Some(tok_mod) = self.tokenizer_modifier {
1752 tok_mod(&mut tok_config);
1753 }
1754 if let Some(gen_mod) = self.generator_modifier {
1755 gen_mod(&mut gen_config);
1756 }
1757
1758 let config = CustomDialectConfig {
1759 name: self.name.clone(),
1760 base_dialect: self.base_dialect,
1761 tokenizer_config: tok_config,
1762 generator_config: gen_config,
1763 transform: self.transform,
1764 preprocess: self.preprocess,
1765 };
1766
1767 register_custom_dialect(config)
1768 }
1769}
1770
1771use std::str::FromStr;
1772
1773fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1774 let mut registry = CUSTOM_DIALECT_REGISTRY
1775 .write()
1776 .map_err(|e| crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0))?;
1777
1778 if registry.contains_key(&config.name) {
1779 return Err(crate::error::Error::parse(
1780 format!("Custom dialect '{}' is already registered", config.name),
1781 0,
1782 0,
1783 ));
1784 }
1785
1786 registry.insert(config.name.clone(), Arc::new(config));
1787 Ok(())
1788}
1789
1790/// Remove a custom dialect from the global registry.
1791///
1792/// Returns `true` if a dialect with that name was found and removed,
1793/// `false` if no such custom dialect existed.
1794pub fn unregister_custom_dialect(name: &str) -> bool {
1795 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1796 registry.remove(name).is_some()
1797 } else {
1798 false
1799 }
1800}
1801
1802fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1803 CUSTOM_DIALECT_REGISTRY
1804 .read()
1805 .ok()
1806 .and_then(|registry| registry.get(name).cloned())
1807}
1808
1809/// Main entry point for dialect-specific SQL operations.
1810///
1811/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1812/// transformer for a specific SQL database engine. It is the high-level API through
1813/// which callers parse, generate, transform, and transpile SQL.
1814///
1815/// # Usage
1816///
1817/// ```rust,ignore
1818/// use polyglot_sql::dialects::{Dialect, DialectType};
1819///
1820/// // Parse PostgreSQL SQL into an AST
1821/// let pg = Dialect::get(DialectType::PostgreSQL);
1822/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1823///
1824/// // Transpile from PostgreSQL to BigQuery
1825/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1826/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1827/// ```
1828///
1829/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1830/// The struct is `Send + Sync` safe so it can be shared across threads.
1831pub struct Dialect {
1832 dialect_type: DialectType,
1833 tokenizer: Tokenizer,
1834 generator_config: GeneratorConfig,
1835 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1836 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1837 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1838 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1839 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1840}
1841
1842impl Dialect {
1843 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1844 ///
1845 /// This is the primary constructor. It initializes the tokenizer, generator config,
1846 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1847 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1848 /// config routing.
1849 pub fn get(dialect_type: DialectType) -> Self {
1850 let (tokenizer_config, generator_config, transformer) =
1851 configs_for_dialect_type(dialect_type);
1852
1853 // Set up expression-specific generator config for hybrid dialects
1854 let generator_config_for_expr: Option<
1855 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1856 > = match dialect_type {
1857 #[cfg(feature = "dialect-athena")]
1858 DialectType::Athena => Some(Box::new(|expr| {
1859 AthenaDialect.generator_config_for_expr(expr)
1860 })),
1861 _ => None,
1862 };
1863
1864 Self {
1865 dialect_type,
1866 tokenizer: Tokenizer::new(tokenizer_config),
1867 generator_config,
1868 transformer,
1869 generator_config_for_expr,
1870 custom_preprocess: None,
1871 }
1872 }
1873
1874 /// Look up a dialect by string name.
1875 ///
1876 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1877 /// falls back to the custom dialect registry. Returns `None` if no dialect
1878 /// with the given name exists.
1879 pub fn get_by_name(name: &str) -> Option<Self> {
1880 // Try built-in first
1881 if let Ok(dt) = DialectType::from_str(name) {
1882 return Some(Self::get(dt));
1883 }
1884
1885 // Try custom registry
1886 let config = get_custom_dialect_config(name)?;
1887 Some(Self::from_custom_config(&config))
1888 }
1889
1890 /// Construct a `Dialect` from a custom dialect configuration.
1891 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1892 // Build the transformer: use custom if provided, else use base dialect's
1893 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1894 if let Some(ref custom_transform) = config.transform {
1895 let t = Arc::clone(custom_transform);
1896 Box::new(move |e| t(e))
1897 } else {
1898 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1899 base_transform
1900 };
1901
1902 // Build the custom preprocess: use custom if provided
1903 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1904 config.preprocess.as_ref().map(|p| {
1905 let p = Arc::clone(p);
1906 Box::new(move |e: Expression| p(e))
1907 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1908 });
1909
1910 Self {
1911 dialect_type: config.base_dialect,
1912 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1913 generator_config: config.generator_config.clone(),
1914 transformer,
1915 generator_config_for_expr: None,
1916 custom_preprocess,
1917 }
1918 }
1919
1920 /// Get the dialect type
1921 pub fn dialect_type(&self) -> DialectType {
1922 self.dialect_type
1923 }
1924
1925 /// Get the generator configuration
1926 pub fn generator_config(&self) -> &GeneratorConfig {
1927 &self.generator_config
1928 }
1929
1930 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1931 ///
1932 /// The input may contain multiple semicolon-separated statements; each one
1933 /// produces a separate element in the returned vector. Tokenization uses
1934 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1935 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1936 let tokens = self.tokenizer.tokenize(sql)?;
1937 let config = crate::parser::ParserConfig {
1938 dialect: Some(self.dialect_type),
1939 ..Default::default()
1940 };
1941 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1942 parser.parse()
1943 }
1944
1945 /// Tokenize SQL using this dialect's tokenizer configuration.
1946 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1947 self.tokenizer.tokenize(sql)
1948 }
1949
1950 /// Get the generator config for a specific expression (supports hybrid dialects)
1951 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1952 if let Some(ref config_fn) = self.generator_config_for_expr {
1953 config_fn(expr)
1954 } else {
1955 self.generator_config.clone()
1956 }
1957 }
1958
1959 /// Generates a SQL string from an [`Expression`] AST node.
1960 ///
1961 /// The output uses this dialect's generator configuration for identifier quoting,
1962 /// keyword casing, function name normalization, and syntax style. The result is
1963 /// a single-line (non-pretty) SQL string.
1964 pub fn generate(&self, expr: &Expression) -> Result<String> {
1965 let config = self.get_config_for_expr(expr);
1966 let mut generator = Generator::with_config(config);
1967 generator.generate(expr)
1968 }
1969
1970 /// Generate SQL from an expression with pretty printing enabled
1971 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1972 let mut config = self.get_config_for_expr(expr);
1973 config.pretty = true;
1974 let mut generator = Generator::with_config(config);
1975 generator.generate(expr)
1976 }
1977
1978 /// Generate SQL from an expression with source dialect info (for transpilation)
1979 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
1980 let mut config = self.get_config_for_expr(expr);
1981 config.source_dialect = Some(source);
1982 let mut generator = Generator::with_config(config);
1983 generator.generate(expr)
1984 }
1985
1986 /// Generate SQL from an expression with pretty printing and source dialect info
1987 pub fn generate_pretty_with_source(
1988 &self,
1989 expr: &Expression,
1990 source: DialectType,
1991 ) -> Result<String> {
1992 let mut config = self.get_config_for_expr(expr);
1993 config.pretty = true;
1994 config.source_dialect = Some(source);
1995 let mut generator = Generator::with_config(config);
1996 generator.generate(expr)
1997 }
1998
1999 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2000 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2001 let mut config = self.get_config_for_expr(expr);
2002 config.always_quote_identifiers = true;
2003 let mut generator = Generator::with_config(config);
2004 generator.generate(expr)
2005 }
2006
2007 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2008 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2009 let mut config = self.generator_config.clone();
2010 config.pretty = true;
2011 config.always_quote_identifiers = true;
2012 let mut generator = Generator::with_config(config);
2013 generator.generate(expr)
2014 }
2015
2016 /// Generate SQL from an expression with caller-specified config overrides
2017 pub fn generate_with_overrides(
2018 &self,
2019 expr: &Expression,
2020 overrides: impl FnOnce(&mut GeneratorConfig),
2021 ) -> Result<String> {
2022 let mut config = self.get_config_for_expr(expr);
2023 overrides(&mut config);
2024 let mut generator = Generator::with_config(config);
2025 generator.generate(expr)
2026 }
2027
2028 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2029 ///
2030 /// The transformation proceeds in two phases:
2031 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2032 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2033 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2034 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2035 ///
2036 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2037 /// and for identity transforms (normalizing SQL within the same dialect).
2038 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2039 // Apply preprocessing transforms based on dialect
2040 let preprocessed = self.preprocess(expr)?;
2041 // Then apply recursive transformation
2042 transform_recursive(preprocessed, &self.transformer)
2043 }
2044
2045 /// Apply dialect-specific preprocessing transforms
2046 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2047 // If a custom preprocess function is set, use it instead of the built-in logic
2048 if let Some(ref custom_preprocess) = self.custom_preprocess {
2049 return custom_preprocess(expr);
2050 }
2051
2052 #[cfg(any(
2053 feature = "dialect-mysql",
2054 feature = "dialect-postgresql",
2055 feature = "dialect-bigquery",
2056 feature = "dialect-snowflake",
2057 feature = "dialect-tsql",
2058 feature = "dialect-spark",
2059 feature = "dialect-databricks",
2060 feature = "dialect-hive",
2061 feature = "dialect-sqlite",
2062 feature = "dialect-trino",
2063 feature = "dialect-presto",
2064 feature = "dialect-duckdb",
2065 feature = "dialect-redshift",
2066 feature = "dialect-starrocks",
2067 feature = "dialect-oracle",
2068 feature = "dialect-clickhouse",
2069 ))]
2070 use crate::transforms;
2071
2072 match self.dialect_type {
2073 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2074 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2075 #[cfg(feature = "dialect-mysql")]
2076 DialectType::MySQL => {
2077 let expr = transforms::eliminate_qualify(expr)?;
2078 let expr = transforms::eliminate_full_outer_join(expr)?;
2079 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2080 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2081 Ok(expr)
2082 }
2083 // PostgreSQL doesn't support QUALIFY
2084 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2085 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2086 #[cfg(feature = "dialect-postgresql")]
2087 DialectType::PostgreSQL => {
2088 let expr = transforms::eliminate_qualify(expr)?;
2089 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2090 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2091 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2092 // Only normalize when sqlglot would fully parse (no body) —
2093 // sqlglot falls back to Command for complex function bodies,
2094 // preserving the original text including TO.
2095 let expr = if let Expression::CreateFunction(mut cf) = expr {
2096 if cf.body.is_none() {
2097 for opt in &mut cf.set_options {
2098 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2099 &mut opt.value
2100 {
2101 *use_to = false;
2102 }
2103 }
2104 }
2105 Expression::CreateFunction(cf)
2106 } else {
2107 expr
2108 };
2109 Ok(expr)
2110 }
2111 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2112 #[cfg(feature = "dialect-bigquery")]
2113 DialectType::BigQuery => {
2114 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2115 let expr = transforms::pushdown_cte_column_names(expr)?;
2116 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2117 Ok(expr)
2118 }
2119 // Snowflake
2120 #[cfg(feature = "dialect-snowflake")]
2121 DialectType::Snowflake => {
2122 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2123 let expr = transforms::eliminate_window_clause(expr)?;
2124 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2125 Ok(expr)
2126 }
2127 // TSQL doesn't support QUALIFY
2128 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2129 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2130 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2131 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2132 #[cfg(feature = "dialect-tsql")]
2133 DialectType::TSQL => {
2134 let expr = transforms::eliminate_qualify(expr)?;
2135 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2136 let expr = transforms::ensure_bools(expr)?;
2137 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2138 let expr = transforms::move_ctes_to_top_level(expr)?;
2139 let expr = transforms::qualify_derived_table_outputs(expr)?;
2140 Ok(expr)
2141 }
2142 // Spark doesn't support QUALIFY (but Databricks does)
2143 // Spark doesn't support CTEs in subqueries (hoist to top level)
2144 #[cfg(feature = "dialect-spark")]
2145 DialectType::Spark => {
2146 let expr = transforms::eliminate_qualify(expr)?;
2147 let expr = transforms::add_auto_table_alias(expr)?;
2148 let expr = transforms::simplify_nested_paren_values(expr)?;
2149 let expr = transforms::move_ctes_to_top_level(expr)?;
2150 Ok(expr)
2151 }
2152 // Databricks supports QUALIFY natively
2153 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2154 #[cfg(feature = "dialect-databricks")]
2155 DialectType::Databricks => {
2156 let expr = transforms::add_auto_table_alias(expr)?;
2157 let expr = transforms::simplify_nested_paren_values(expr)?;
2158 let expr = transforms::move_ctes_to_top_level(expr)?;
2159 Ok(expr)
2160 }
2161 // Hive doesn't support QUALIFY or CTEs in subqueries
2162 #[cfg(feature = "dialect-hive")]
2163 DialectType::Hive => {
2164 let expr = transforms::eliminate_qualify(expr)?;
2165 let expr = transforms::move_ctes_to_top_level(expr)?;
2166 Ok(expr)
2167 }
2168 // SQLite doesn't support QUALIFY
2169 #[cfg(feature = "dialect-sqlite")]
2170 DialectType::SQLite => {
2171 let expr = transforms::eliminate_qualify(expr)?;
2172 Ok(expr)
2173 }
2174 // Trino doesn't support QUALIFY
2175 #[cfg(feature = "dialect-trino")]
2176 DialectType::Trino => {
2177 let expr = transforms::eliminate_qualify(expr)?;
2178 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2179 Ok(expr)
2180 }
2181 // Presto doesn't support QUALIFY or WINDOW clause
2182 #[cfg(feature = "dialect-presto")]
2183 DialectType::Presto => {
2184 let expr = transforms::eliminate_qualify(expr)?;
2185 let expr = transforms::eliminate_window_clause(expr)?;
2186 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2187 Ok(expr)
2188 }
2189 // DuckDB supports QUALIFY - no elimination needed
2190 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2191 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2192 #[cfg(feature = "dialect-duckdb")]
2193 DialectType::DuckDB => {
2194 let expr = transforms::expand_posexplode_duckdb(expr)?;
2195 let expr = transforms::expand_like_any(expr)?;
2196 Ok(expr)
2197 }
2198 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2199 #[cfg(feature = "dialect-redshift")]
2200 DialectType::Redshift => {
2201 let expr = transforms::eliminate_qualify(expr)?;
2202 let expr = transforms::eliminate_window_clause(expr)?;
2203 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2204 Ok(expr)
2205 }
2206 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2207 #[cfg(feature = "dialect-starrocks")]
2208 DialectType::StarRocks => {
2209 let expr = transforms::eliminate_qualify(expr)?;
2210 let expr = transforms::expand_between_in_delete(expr)?;
2211 Ok(expr)
2212 }
2213 // DataFusion supports QUALIFY and semi/anti joins natively
2214 #[cfg(feature = "dialect-datafusion")]
2215 DialectType::DataFusion => Ok(expr),
2216 // Oracle doesn't support QUALIFY
2217 #[cfg(feature = "dialect-oracle")]
2218 DialectType::Oracle => {
2219 let expr = transforms::eliminate_qualify(expr)?;
2220 Ok(expr)
2221 }
2222 // Drill - no special preprocessing needed
2223 #[cfg(feature = "dialect-drill")]
2224 DialectType::Drill => Ok(expr),
2225 // Teradata - no special preprocessing needed
2226 #[cfg(feature = "dialect-teradata")]
2227 DialectType::Teradata => Ok(expr),
2228 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2229 #[cfg(feature = "dialect-clickhouse")]
2230 DialectType::ClickHouse => {
2231 let expr = transforms::no_limit_order_by_union(expr)?;
2232 Ok(expr)
2233 }
2234 // Other dialects - no preprocessing
2235 _ => Ok(expr),
2236 }
2237 }
2238
2239 /// Transpile SQL from this dialect to another
2240 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2241 self.transpile_to_inner(sql, target, false)
2242 }
2243
2244 /// Transpile SQL from this dialect to another with pretty printing enabled
2245 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2246 self.transpile_to_inner(sql, target, true)
2247 }
2248
2249 #[cfg(not(feature = "transpile"))]
2250 fn transpile_to_inner(
2251 &self,
2252 sql: &str,
2253 target: DialectType,
2254 pretty: bool,
2255 ) -> Result<Vec<String>> {
2256 // Without the transpile feature, only same-dialect or to/from generic is supported
2257 if self.dialect_type != target
2258 && self.dialect_type != DialectType::Generic
2259 && target != DialectType::Generic
2260 {
2261 return Err(crate::error::Error::parse(
2262 "Cross-dialect transpilation not available in this build",
2263 0,
2264 0,
2265 ));
2266 }
2267
2268 let expressions = self.parse(sql)?;
2269 let target_dialect = Dialect::get(target);
2270 let generic_identity =
2271 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2272
2273 if generic_identity {
2274 return expressions
2275 .into_iter()
2276 .map(|expr| {
2277 if pretty {
2278 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2279 } else {
2280 target_dialect.generate_with_source(&expr, self.dialect_type)
2281 }
2282 })
2283 .collect();
2284 }
2285
2286 expressions
2287 .into_iter()
2288 .map(|expr| {
2289 let transformed = target_dialect.transform(expr)?;
2290 if pretty {
2291 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2292 } else {
2293 target_dialect.generate_with_source(&transformed, self.dialect_type)
2294 }
2295 })
2296 .collect()
2297 }
2298
2299 #[cfg(feature = "transpile")]
2300 fn transpile_to_inner(
2301 &self,
2302 sql: &str,
2303 target: DialectType,
2304 pretty: bool,
2305 ) -> Result<Vec<String>> {
2306 let expressions = self.parse(sql)?;
2307 let target_dialect = Dialect::get(target);
2308 let generic_identity =
2309 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2310
2311 if generic_identity {
2312 return expressions
2313 .into_iter()
2314 .map(|expr| {
2315 if pretty {
2316 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2317 } else {
2318 target_dialect.generate_with_source(&expr, self.dialect_type)
2319 }
2320 })
2321 .collect();
2322 }
2323
2324 expressions
2325 .into_iter()
2326 .map(|expr| {
2327 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2328 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2329 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2330 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2331 use crate::expressions::DataType as DT;
2332 transform_recursive(expr, &|e| match e {
2333 Expression::DataType(DT::VarChar { .. }) => {
2334 Ok(Expression::DataType(DT::Text))
2335 }
2336 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2337 _ => Ok(e),
2338 })?
2339 } else {
2340 expr
2341 };
2342
2343 // When source and target differ, first normalize the source dialect's
2344 // AST constructs to standard SQL, so that the target dialect can handle them.
2345 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2346 let normalized =
2347 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2348 self.transform(expr)?
2349 } else {
2350 expr
2351 };
2352
2353 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2354 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2355 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2356 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2357 let normalized =
2358 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2359 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2360 {
2361 transform_recursive(normalized, &|e| {
2362 if let Expression::Function(ref f) = e {
2363 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2364 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2365 if let (
2366 Expression::Function(ref jq),
2367 Expression::Function(ref jv),
2368 ) = (&f.args[0], &f.args[1])
2369 {
2370 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2371 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2372 {
2373 // Unwrap: return just JSON_QUERY(...)
2374 return Ok(f.args[0].clone());
2375 }
2376 }
2377 }
2378 }
2379 Ok(e)
2380 })?
2381 } else {
2382 normalized
2383 };
2384
2385 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2386 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2387 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2388 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2389 && !matches!(target, DialectType::Snowflake)
2390 {
2391 transform_recursive(normalized, &|e| {
2392 if let Expression::Function(ref f) = e {
2393 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2394 return Ok(Expression::Localtime(Box::new(
2395 crate::expressions::Localtime { this: None },
2396 )));
2397 }
2398 }
2399 Ok(e)
2400 })?
2401 } else {
2402 normalized
2403 };
2404
2405 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2406 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2407 // transform. DuckDB requires the count argument to be BIGINT.
2408 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2409 && matches!(target, DialectType::DuckDB)
2410 {
2411 transform_recursive(normalized, &|e| {
2412 if let Expression::Function(ref f) = e {
2413 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2414 // Check if first arg is space string literal
2415 if let Expression::Literal(crate::expressions::Literal::String(
2416 ref s,
2417 )) = f.args[0]
2418 {
2419 if s == " " {
2420 // Wrap second arg in CAST(... AS BIGINT) if not already
2421 if !matches!(f.args[1], Expression::Cast(_)) {
2422 let mut new_args = f.args.clone();
2423 new_args[1] = Expression::Cast(Box::new(
2424 crate::expressions::Cast {
2425 this: new_args[1].clone(),
2426 to: crate::expressions::DataType::BigInt {
2427 length: None,
2428 },
2429 trailing_comments: Vec::new(),
2430 double_colon_syntax: false,
2431 format: None,
2432 default: None,
2433 },
2434 ));
2435 return Ok(Expression::Function(Box::new(
2436 crate::expressions::Function {
2437 name: f.name.clone(),
2438 args: new_args,
2439 distinct: f.distinct,
2440 trailing_comments: f.trailing_comments.clone(),
2441 use_bracket_syntax: f.use_bracket_syntax,
2442 no_parens: f.no_parens,
2443 quoted: f.quoted,
2444 },
2445 )));
2446 }
2447 }
2448 }
2449 }
2450 }
2451 Ok(e)
2452 })?
2453 } else {
2454 normalized
2455 };
2456
2457 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2458 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2459 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2460 && !matches!(target, DialectType::BigQuery)
2461 {
2462 crate::transforms::propagate_struct_field_names(normalized)?
2463 } else {
2464 normalized
2465 };
2466
2467 // Apply cross-dialect semantic normalizations
2468 let normalized =
2469 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2470
2471 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2472 // (SELECT UNNEST(..., max_depth => 2)) subquery
2473 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2474 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2475 && matches!(target, DialectType::DuckDB)
2476 {
2477 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2478 } else {
2479 normalized
2480 };
2481
2482 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2483 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2484 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2485 && matches!(
2486 target,
2487 DialectType::DuckDB
2488 | DialectType::Presto
2489 | DialectType::Trino
2490 | DialectType::Athena
2491 | DialectType::Spark
2492 | DialectType::Databricks
2493 ) {
2494 crate::transforms::unnest_alias_to_column_alias(normalized)?
2495 } else if matches!(self.dialect_type, DialectType::BigQuery)
2496 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2497 {
2498 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2499 // but don't convert alias format (no _t0 wrapper)
2500 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2501 // For Redshift: strip UNNEST when arg is a column reference path
2502 if matches!(target, DialectType::Redshift) {
2503 crate::transforms::strip_unnest_column_refs(result)?
2504 } else {
2505 result
2506 }
2507 } else {
2508 normalized
2509 };
2510
2511 // For Presto/Trino targets from PostgreSQL/Redshift source:
2512 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2513 let normalized = if matches!(
2514 self.dialect_type,
2515 DialectType::PostgreSQL | DialectType::Redshift
2516 ) && matches!(
2517 target,
2518 DialectType::Presto | DialectType::Trino | DialectType::Athena
2519 ) {
2520 crate::transforms::wrap_unnest_join_aliases(normalized)?
2521 } else {
2522 normalized
2523 };
2524
2525 // Eliminate DISTINCT ON with target-dialect awareness
2526 // This must happen after source transform (which may produce DISTINCT ON)
2527 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2528 let normalized =
2529 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2530
2531 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2532 let normalized = if matches!(target, DialectType::Snowflake) {
2533 Self::transform_generate_date_array_snowflake(normalized)?
2534 } else {
2535 normalized
2536 };
2537
2538 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2539 let normalized = if matches!(
2540 target,
2541 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2542 ) {
2543 crate::transforms::unnest_to_explode_select(normalized)?
2544 } else {
2545 normalized
2546 };
2547
2548 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2549 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2550 crate::transforms::no_limit_order_by_union(normalized)?
2551 } else {
2552 normalized
2553 };
2554
2555 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2556 // Python sqlglot does this in the TSQL generator, but we can't do it there
2557 // because it would break TSQL -> TSQL identity
2558 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2559 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2560 {
2561 transform_recursive(normalized, &|e| {
2562 if let Expression::Count(ref c) = e {
2563 // Build COUNT_BIG(...) as an AggregateFunction
2564 let args = if c.star {
2565 vec![Expression::Star(crate::expressions::Star {
2566 table: None,
2567 except: None,
2568 replace: None,
2569 rename: None,
2570 trailing_comments: Vec::new(),
2571 })]
2572 } else if let Some(ref this) = c.this {
2573 vec![this.clone()]
2574 } else {
2575 vec![]
2576 };
2577 Ok(Expression::AggregateFunction(Box::new(
2578 crate::expressions::AggregateFunction {
2579 name: "COUNT_BIG".to_string(),
2580 args,
2581 distinct: c.distinct,
2582 filter: c.filter.clone(),
2583 order_by: Vec::new(),
2584 limit: None,
2585 ignore_nulls: None,
2586 },
2587 )))
2588 } else {
2589 Ok(e)
2590 }
2591 })?
2592 } else {
2593 normalized
2594 };
2595
2596 let transformed = target_dialect.transform(normalized)?;
2597 let mut sql = if pretty {
2598 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2599 } else {
2600 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2601 };
2602
2603 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2604 if pretty && target == DialectType::Snowflake {
2605 sql = Self::normalize_snowflake_pretty(sql);
2606 }
2607
2608 Ok(sql)
2609 })
2610 .collect()
2611 }
2612}
2613
2614// Transpile-only methods: cross-dialect normalization and helpers
2615#[cfg(feature = "transpile")]
2616impl Dialect {
2617 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2618 /// Converts:
2619 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2620 /// To:
2621 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2622 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2623 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2624 use crate::expressions::*;
2625 transform_recursive(expr, &|e| {
2626 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2627 if let Expression::ArraySize(ref af) = e {
2628 if let Expression::Function(ref f) = af.this {
2629 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2630 let result = Self::convert_array_size_gda_snowflake(f)?;
2631 return Ok(result);
2632 }
2633 }
2634 }
2635
2636 let Expression::Select(mut sel) = e else {
2637 return Ok(e);
2638 };
2639
2640 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2641 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2642 let mut gda_join_idx: Option<usize> = None;
2643
2644 for (idx, join) in sel.joins.iter().enumerate() {
2645 // The join.this may be:
2646 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2647 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2648 let (unnest_ref, alias_name) = match &join.this {
2649 Expression::Unnest(ref unnest) => {
2650 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2651 (Some(unnest.as_ref()), alias)
2652 }
2653 Expression::Alias(ref a) => {
2654 if let Expression::Unnest(ref unnest) = a.this {
2655 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2656 } else {
2657 (None, None)
2658 }
2659 }
2660 _ => (None, None),
2661 };
2662
2663 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2664 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2665 if let Expression::Function(ref f) = unnest.this {
2666 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2667 let start_expr = f.args[0].clone();
2668 let end_expr = f.args[1].clone();
2669 let step = f.args.get(2).cloned();
2670
2671 // Extract unit from step interval
2672 let unit = if let Some(Expression::Interval(ref iv)) = step {
2673 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2674 Some(format!("{:?}", unit).to_uppercase())
2675 } else if let Some(ref this) = iv.this {
2676 // The interval may be stored as a string like "1 MONTH"
2677 if let Expression::Literal(Literal::String(ref s)) = this {
2678 let parts: Vec<&str> = s.split_whitespace().collect();
2679 if parts.len() == 2 {
2680 Some(parts[1].to_uppercase())
2681 } else if parts.len() == 1 {
2682 // Single word like "MONTH" or just "1"
2683 let upper = parts[0].to_uppercase();
2684 if matches!(
2685 upper.as_str(),
2686 "YEAR"
2687 | "QUARTER"
2688 | "MONTH"
2689 | "WEEK"
2690 | "DAY"
2691 | "HOUR"
2692 | "MINUTE"
2693 | "SECOND"
2694 ) {
2695 Some(upper)
2696 } else {
2697 None
2698 }
2699 } else {
2700 None
2701 }
2702 } else {
2703 None
2704 }
2705 } else {
2706 None
2707 }
2708 } else {
2709 None
2710 };
2711
2712 if let Some(unit_str) = unit {
2713 gda_info = Some((alias, start_expr, end_expr, unit_str));
2714 gda_join_idx = Some(idx);
2715 }
2716 }
2717 }
2718 }
2719 if gda_info.is_some() {
2720 break;
2721 }
2722 }
2723
2724 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2725 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2726 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2727 let result = Self::try_transform_from_gda_snowflake(sel);
2728 return result;
2729 };
2730 let join_idx = gda_join_idx.unwrap();
2731
2732 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2733 let datediff = Expression::Function(Box::new(Function::new(
2734 "DATEDIFF".to_string(),
2735 vec![
2736 Expression::Column(Column {
2737 name: Identifier::new(&unit_str),
2738 table: None,
2739 join_mark: false,
2740 trailing_comments: vec![],
2741 }),
2742 start_expr.clone(),
2743 end_expr.clone(),
2744 ],
2745 )));
2746 // (DATEDIFF(...) + 1 - 1) + 1
2747 let plus_one = Expression::Add(Box::new(BinaryOp {
2748 left: datediff,
2749 right: Expression::Literal(Literal::Number("1".to_string())),
2750 left_comments: vec![],
2751 operator_comments: vec![],
2752 trailing_comments: vec![],
2753 }));
2754 let minus_one = Expression::Sub(Box::new(BinaryOp {
2755 left: plus_one,
2756 right: Expression::Literal(Literal::Number("1".to_string())),
2757 left_comments: vec![],
2758 operator_comments: vec![],
2759 trailing_comments: vec![],
2760 }));
2761 let paren_inner = Expression::Paren(Box::new(Paren {
2762 this: minus_one,
2763 trailing_comments: vec![],
2764 }));
2765 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2766 left: paren_inner,
2767 right: Expression::Literal(Literal::Number("1".to_string())),
2768 left_comments: vec![],
2769 operator_comments: vec![],
2770 trailing_comments: vec![],
2771 }));
2772
2773 let array_gen_range = Expression::Function(Box::new(Function::new(
2774 "ARRAY_GENERATE_RANGE".to_string(),
2775 vec![
2776 Expression::Literal(Literal::Number("0".to_string())),
2777 outer_plus_one,
2778 ],
2779 )));
2780
2781 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2782 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2783 name: Identifier::new("INPUT"),
2784 value: array_gen_range,
2785 separator: crate::expressions::NamedArgSeparator::DArrow,
2786 }));
2787 let flatten = Expression::Function(Box::new(Function::new(
2788 "FLATTEN".to_string(),
2789 vec![flatten_input],
2790 )));
2791
2792 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2793 let alias_table = Alias {
2794 this: flatten,
2795 alias: Identifier::new("_t0"),
2796 column_aliases: vec![
2797 Identifier::new("seq"),
2798 Identifier::new("key"),
2799 Identifier::new("path"),
2800 Identifier::new("index"),
2801 Identifier::new(&alias_name),
2802 Identifier::new("this"),
2803 ],
2804 pre_alias_comments: vec![],
2805 trailing_comments: vec![],
2806 };
2807 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2808 this: Box::new(Expression::Alias(Box::new(alias_table))),
2809 view: None,
2810 outer: None,
2811 alias: None,
2812 alias_quoted: false,
2813 cross_apply: None,
2814 ordinality: None,
2815 column_aliases: vec![],
2816 }));
2817
2818 // Remove the original join and add to FROM expressions
2819 sel.joins.remove(join_idx);
2820 if let Some(ref mut from) = sel.from {
2821 from.expressions.push(lateral_expr);
2822 }
2823
2824 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2825 let dateadd_expr = Expression::Function(Box::new(Function::new(
2826 "DATEADD".to_string(),
2827 vec![
2828 Expression::Column(Column {
2829 name: Identifier::new(&unit_str),
2830 table: None,
2831 join_mark: false,
2832 trailing_comments: vec![],
2833 }),
2834 Expression::Cast(Box::new(Cast {
2835 this: Expression::Column(Column {
2836 name: Identifier::new(&alias_name),
2837 table: None,
2838 join_mark: false,
2839 trailing_comments: vec![],
2840 }),
2841 to: DataType::Int {
2842 length: None,
2843 integer_spelling: false,
2844 },
2845 trailing_comments: vec![],
2846 double_colon_syntax: false,
2847 format: None,
2848 default: None,
2849 })),
2850 Expression::Cast(Box::new(Cast {
2851 this: start_expr.clone(),
2852 to: DataType::Date,
2853 trailing_comments: vec![],
2854 double_colon_syntax: false,
2855 format: None,
2856 default: None,
2857 })),
2858 ],
2859 )));
2860
2861 // Replace references to the alias in the SELECT list
2862 let new_exprs: Vec<Expression> = sel
2863 .expressions
2864 .iter()
2865 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2866 .collect();
2867 sel.expressions = new_exprs;
2868
2869 Ok(Expression::Select(sel))
2870 })
2871 }
2872
2873 /// Helper: replace column references to `alias_name` with dateadd expression
2874 fn replace_column_ref_with_dateadd(
2875 expr: &Expression,
2876 alias_name: &str,
2877 dateadd: &Expression,
2878 ) -> Expression {
2879 use crate::expressions::*;
2880 match expr {
2881 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2882 // Plain column reference -> DATEADD(...) AS alias_name
2883 Expression::Alias(Box::new(Alias {
2884 this: dateadd.clone(),
2885 alias: Identifier::new(alias_name),
2886 column_aliases: vec![],
2887 pre_alias_comments: vec![],
2888 trailing_comments: vec![],
2889 }))
2890 }
2891 Expression::Alias(a) => {
2892 // Check if the inner expression references the alias
2893 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2894 Expression::Alias(Box::new(Alias {
2895 this: new_this,
2896 alias: a.alias.clone(),
2897 column_aliases: a.column_aliases.clone(),
2898 pre_alias_comments: a.pre_alias_comments.clone(),
2899 trailing_comments: a.trailing_comments.clone(),
2900 }))
2901 }
2902 _ => expr.clone(),
2903 }
2904 }
2905
2906 /// Helper: replace column references in inner expression (not top-level)
2907 fn replace_column_ref_inner(
2908 expr: &Expression,
2909 alias_name: &str,
2910 dateadd: &Expression,
2911 ) -> Expression {
2912 use crate::expressions::*;
2913 match expr {
2914 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2915 dateadd.clone()
2916 }
2917 Expression::Add(op) => {
2918 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2919 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2920 Expression::Add(Box::new(BinaryOp {
2921 left,
2922 right,
2923 left_comments: op.left_comments.clone(),
2924 operator_comments: op.operator_comments.clone(),
2925 trailing_comments: op.trailing_comments.clone(),
2926 }))
2927 }
2928 Expression::Sub(op) => {
2929 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2930 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2931 Expression::Sub(Box::new(BinaryOp {
2932 left,
2933 right,
2934 left_comments: op.left_comments.clone(),
2935 operator_comments: op.operator_comments.clone(),
2936 trailing_comments: op.trailing_comments.clone(),
2937 }))
2938 }
2939 Expression::Mul(op) => {
2940 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2941 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2942 Expression::Mul(Box::new(BinaryOp {
2943 left,
2944 right,
2945 left_comments: op.left_comments.clone(),
2946 operator_comments: op.operator_comments.clone(),
2947 trailing_comments: op.trailing_comments.clone(),
2948 }))
2949 }
2950 _ => expr.clone(),
2951 }
2952 }
2953
2954 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
2955 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
2956 fn try_transform_from_gda_snowflake(
2957 mut sel: Box<crate::expressions::Select>,
2958 ) -> Result<Expression> {
2959 use crate::expressions::*;
2960
2961 // Extract GDA info from FROM clause
2962 let mut gda_info: Option<(
2963 usize,
2964 String,
2965 Expression,
2966 Expression,
2967 String,
2968 Option<(String, Vec<Identifier>)>,
2969 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
2970
2971 if let Some(ref from) = sel.from {
2972 for (idx, table_expr) in from.expressions.iter().enumerate() {
2973 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
2974 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
2975 let (unnest_opt, outer_alias_info) = match table_expr {
2976 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
2977 Expression::Alias(ref a) => {
2978 if let Expression::Unnest(ref unnest) = a.this {
2979 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
2980 (Some(unnest.as_ref()), Some(alias_info))
2981 } else {
2982 (None, None)
2983 }
2984 }
2985 _ => (None, None),
2986 };
2987
2988 if let Some(unnest) = unnest_opt {
2989 // Check for GENERATE_DATE_ARRAY function
2990 let func_opt = match &unnest.this {
2991 Expression::Function(ref f)
2992 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
2993 && f.args.len() >= 2 =>
2994 {
2995 Some(f)
2996 }
2997 // Also check for GenerateSeries (from earlier normalization)
2998 _ => None,
2999 };
3000
3001 if let Some(f) = func_opt {
3002 let start_expr = f.args[0].clone();
3003 let end_expr = f.args[1].clone();
3004 let step = f.args.get(2).cloned();
3005
3006 // Extract unit and column name
3007 let unit = Self::extract_interval_unit_str(&step);
3008 let col_name = outer_alias_info
3009 .as_ref()
3010 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3011 .unwrap_or_else(|| "value".to_string());
3012
3013 if let Some(unit_str) = unit {
3014 gda_info = Some((
3015 idx,
3016 col_name,
3017 start_expr,
3018 end_expr,
3019 unit_str,
3020 outer_alias_info,
3021 ));
3022 break;
3023 }
3024 }
3025 }
3026 }
3027 }
3028
3029 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3030 else {
3031 return Ok(Expression::Select(sel));
3032 };
3033
3034 // Build the Snowflake subquery:
3035 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3036 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3037
3038 // DATEDIFF(unit, start, end)
3039 let datediff = Expression::Function(Box::new(Function::new(
3040 "DATEDIFF".to_string(),
3041 vec![
3042 Expression::Column(Column {
3043 name: Identifier::new(&unit_str),
3044 table: None,
3045 join_mark: false,
3046 trailing_comments: vec![],
3047 }),
3048 start_expr.clone(),
3049 end_expr.clone(),
3050 ],
3051 )));
3052 // (DATEDIFF(...) + 1 - 1) + 1
3053 let plus_one = Expression::Add(Box::new(BinaryOp {
3054 left: datediff,
3055 right: Expression::Literal(Literal::Number("1".to_string())),
3056 left_comments: vec![],
3057 operator_comments: vec![],
3058 trailing_comments: vec![],
3059 }));
3060 let minus_one = Expression::Sub(Box::new(BinaryOp {
3061 left: plus_one,
3062 right: Expression::Literal(Literal::Number("1".to_string())),
3063 left_comments: vec![],
3064 operator_comments: vec![],
3065 trailing_comments: vec![],
3066 }));
3067 let paren_inner = Expression::Paren(Box::new(Paren {
3068 this: minus_one,
3069 trailing_comments: vec![],
3070 }));
3071 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3072 left: paren_inner,
3073 right: Expression::Literal(Literal::Number("1".to_string())),
3074 left_comments: vec![],
3075 operator_comments: vec![],
3076 trailing_comments: vec![],
3077 }));
3078
3079 let array_gen_range = Expression::Function(Box::new(Function::new(
3080 "ARRAY_GENERATE_RANGE".to_string(),
3081 vec![
3082 Expression::Literal(Literal::Number("0".to_string())),
3083 outer_plus_one,
3084 ],
3085 )));
3086
3087 // TABLE(FLATTEN(INPUT => ...))
3088 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3089 name: Identifier::new("INPUT"),
3090 value: array_gen_range,
3091 separator: crate::expressions::NamedArgSeparator::DArrow,
3092 }));
3093 let flatten = Expression::Function(Box::new(Function::new(
3094 "FLATTEN".to_string(),
3095 vec![flatten_input],
3096 )));
3097
3098 // Determine alias name for the table: use outer alias or _t0
3099 let table_alias_name = outer_alias_info
3100 .as_ref()
3101 .map(|(name, _)| name.clone())
3102 .unwrap_or_else(|| "_t0".to_string());
3103
3104 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3105 let table_func =
3106 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3107 let flatten_aliased = Expression::Alias(Box::new(Alias {
3108 this: table_func,
3109 alias: Identifier::new(&table_alias_name),
3110 column_aliases: vec![
3111 Identifier::new("seq"),
3112 Identifier::new("key"),
3113 Identifier::new("path"),
3114 Identifier::new("index"),
3115 Identifier::new(&col_name),
3116 Identifier::new("this"),
3117 ],
3118 pre_alias_comments: vec![],
3119 trailing_comments: vec![],
3120 }));
3121
3122 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3123 let dateadd_expr = Expression::Function(Box::new(Function::new(
3124 "DATEADD".to_string(),
3125 vec![
3126 Expression::Column(Column {
3127 name: Identifier::new(&unit_str),
3128 table: None,
3129 join_mark: false,
3130 trailing_comments: vec![],
3131 }),
3132 Expression::Cast(Box::new(Cast {
3133 this: Expression::Column(Column {
3134 name: Identifier::new(&col_name),
3135 table: None,
3136 join_mark: false,
3137 trailing_comments: vec![],
3138 }),
3139 to: DataType::Int {
3140 length: None,
3141 integer_spelling: false,
3142 },
3143 trailing_comments: vec![],
3144 double_colon_syntax: false,
3145 format: None,
3146 default: None,
3147 })),
3148 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3149 start_expr.clone(),
3150 ],
3151 )));
3152 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3153 this: dateadd_expr,
3154 alias: Identifier::new(&col_name),
3155 column_aliases: vec![],
3156 pre_alias_comments: vec![],
3157 trailing_comments: vec![],
3158 }));
3159
3160 // Build inner SELECT
3161 let mut inner_select = Select::new();
3162 inner_select.expressions = vec![dateadd_aliased];
3163 inner_select.from = Some(From {
3164 expressions: vec![flatten_aliased],
3165 });
3166
3167 let inner_select_expr = Expression::Select(Box::new(inner_select));
3168 let subquery = Expression::Subquery(Box::new(Subquery {
3169 this: inner_select_expr,
3170 alias: None,
3171 column_aliases: vec![],
3172 order_by: None,
3173 limit: None,
3174 offset: None,
3175 distribute_by: None,
3176 sort_by: None,
3177 cluster_by: None,
3178 lateral: false,
3179 modifiers_inside: false,
3180 trailing_comments: vec![],
3181 }));
3182
3183 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3184 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3185 Expression::Alias(Box::new(Alias {
3186 this: subquery,
3187 alias: Identifier::new(&alias_name),
3188 column_aliases: col_aliases,
3189 pre_alias_comments: vec![],
3190 trailing_comments: vec![],
3191 }))
3192 } else {
3193 subquery
3194 };
3195
3196 // Replace the FROM expression
3197 if let Some(ref mut from) = sel.from {
3198 from.expressions[from_idx] = replacement;
3199 }
3200
3201 Ok(Expression::Select(sel))
3202 }
3203
3204 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3205 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3206 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3207 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3208 use crate::expressions::*;
3209
3210 let start_expr = f.args[0].clone();
3211 let end_expr = f.args[1].clone();
3212 let step = f.args.get(2).cloned();
3213 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3214 let col_name = "value";
3215
3216 // Build the inner subquery: same as try_transform_from_gda_snowflake
3217 let datediff = Expression::Function(Box::new(Function::new(
3218 "DATEDIFF".to_string(),
3219 vec![
3220 Expression::Column(Column {
3221 name: Identifier::new(&unit_str),
3222 table: None,
3223 join_mark: false,
3224 trailing_comments: vec![],
3225 }),
3226 start_expr.clone(),
3227 end_expr.clone(),
3228 ],
3229 )));
3230 let plus_one = Expression::Add(Box::new(BinaryOp {
3231 left: datediff,
3232 right: Expression::Literal(Literal::Number("1".to_string())),
3233 left_comments: vec![],
3234 operator_comments: vec![],
3235 trailing_comments: vec![],
3236 }));
3237 let minus_one = Expression::Sub(Box::new(BinaryOp {
3238 left: plus_one,
3239 right: Expression::Literal(Literal::Number("1".to_string())),
3240 left_comments: vec![],
3241 operator_comments: vec![],
3242 trailing_comments: vec![],
3243 }));
3244 let paren_inner = Expression::Paren(Box::new(Paren {
3245 this: minus_one,
3246 trailing_comments: vec![],
3247 }));
3248 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3249 left: paren_inner,
3250 right: Expression::Literal(Literal::Number("1".to_string())),
3251 left_comments: vec![],
3252 operator_comments: vec![],
3253 trailing_comments: vec![],
3254 }));
3255
3256 let array_gen_range = Expression::Function(Box::new(Function::new(
3257 "ARRAY_GENERATE_RANGE".to_string(),
3258 vec![
3259 Expression::Literal(Literal::Number("0".to_string())),
3260 outer_plus_one,
3261 ],
3262 )));
3263
3264 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3265 name: Identifier::new("INPUT"),
3266 value: array_gen_range,
3267 separator: crate::expressions::NamedArgSeparator::DArrow,
3268 }));
3269 let flatten = Expression::Function(Box::new(Function::new(
3270 "FLATTEN".to_string(),
3271 vec![flatten_input],
3272 )));
3273
3274 let table_func =
3275 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3276 let flatten_aliased = Expression::Alias(Box::new(Alias {
3277 this: table_func,
3278 alias: Identifier::new("_t0"),
3279 column_aliases: vec![
3280 Identifier::new("seq"),
3281 Identifier::new("key"),
3282 Identifier::new("path"),
3283 Identifier::new("index"),
3284 Identifier::new(col_name),
3285 Identifier::new("this"),
3286 ],
3287 pre_alias_comments: vec![],
3288 trailing_comments: vec![],
3289 }));
3290
3291 let dateadd_expr = Expression::Function(Box::new(Function::new(
3292 "DATEADD".to_string(),
3293 vec![
3294 Expression::Column(Column {
3295 name: Identifier::new(&unit_str),
3296 table: None,
3297 join_mark: false,
3298 trailing_comments: vec![],
3299 }),
3300 Expression::Cast(Box::new(Cast {
3301 this: Expression::Column(Column {
3302 name: Identifier::new(col_name),
3303 table: None,
3304 join_mark: false,
3305 trailing_comments: vec![],
3306 }),
3307 to: DataType::Int {
3308 length: None,
3309 integer_spelling: false,
3310 },
3311 trailing_comments: vec![],
3312 double_colon_syntax: false,
3313 format: None,
3314 default: None,
3315 })),
3316 start_expr.clone(),
3317 ],
3318 )));
3319 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3320 this: dateadd_expr,
3321 alias: Identifier::new(col_name),
3322 column_aliases: vec![],
3323 pre_alias_comments: vec![],
3324 trailing_comments: vec![],
3325 }));
3326
3327 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3328 let mut inner_select = Select::new();
3329 inner_select.expressions = vec![dateadd_aliased];
3330 inner_select.from = Some(From {
3331 expressions: vec![flatten_aliased],
3332 });
3333
3334 // Wrap in subquery for the inner part
3335 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3336 this: Expression::Select(Box::new(inner_select)),
3337 alias: None,
3338 column_aliases: vec![],
3339 order_by: None,
3340 limit: None,
3341 offset: None,
3342 distribute_by: None,
3343 sort_by: None,
3344 cluster_by: None,
3345 lateral: false,
3346 modifiers_inside: false,
3347 trailing_comments: vec![],
3348 }));
3349
3350 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3351 let star = Expression::Star(Star {
3352 table: None,
3353 except: None,
3354 replace: None,
3355 rename: None,
3356 trailing_comments: vec![],
3357 });
3358 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3359 this: star,
3360 distinct: false,
3361 filter: None,
3362 order_by: vec![],
3363 name: Some("ARRAY_AGG".to_string()),
3364 ignore_nulls: None,
3365 having_max: None,
3366 limit: None,
3367 }));
3368
3369 let mut outer_select = Select::new();
3370 outer_select.expressions = vec![array_agg];
3371 outer_select.from = Some(From {
3372 expressions: vec![inner_subquery],
3373 });
3374
3375 // Wrap in a subquery
3376 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3377 this: Expression::Select(Box::new(outer_select)),
3378 alias: None,
3379 column_aliases: vec![],
3380 order_by: None,
3381 limit: None,
3382 offset: None,
3383 distribute_by: None,
3384 sort_by: None,
3385 cluster_by: None,
3386 lateral: false,
3387 modifiers_inside: false,
3388 trailing_comments: vec![],
3389 }));
3390
3391 // ARRAY_SIZE(subquery)
3392 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3393 outer_subquery,
3394 ))))
3395 }
3396
3397 /// Extract interval unit string from an optional step expression.
3398 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3399 use crate::expressions::*;
3400 if let Some(Expression::Interval(ref iv)) = step {
3401 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3402 return Some(format!("{:?}", unit).to_uppercase());
3403 }
3404 if let Some(ref this) = iv.this {
3405 if let Expression::Literal(Literal::String(ref s)) = this {
3406 let parts: Vec<&str> = s.split_whitespace().collect();
3407 if parts.len() == 2 {
3408 return Some(parts[1].to_uppercase());
3409 } else if parts.len() == 1 {
3410 let upper = parts[0].to_uppercase();
3411 if matches!(
3412 upper.as_str(),
3413 "YEAR"
3414 | "QUARTER"
3415 | "MONTH"
3416 | "WEEK"
3417 | "DAY"
3418 | "HOUR"
3419 | "MINUTE"
3420 | "SECOND"
3421 ) {
3422 return Some(upper);
3423 }
3424 }
3425 }
3426 }
3427 }
3428 // Default to DAY if no step or no interval
3429 if step.is_none() {
3430 return Some("DAY".to_string());
3431 }
3432 None
3433 }
3434
3435 fn normalize_snowflake_pretty(mut sql: String) -> String {
3436 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3437 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3438 {
3439 sql = sql.replace(
3440 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3441 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3442 );
3443
3444 sql = sql.replace(
3445 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3446 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3447 );
3448
3449 sql = sql.replace(
3450 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3451 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3452 );
3453 }
3454
3455 sql
3456 }
3457
3458 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3459 /// This handles cases where the same syntax has different semantics across dialects.
3460 fn cross_dialect_normalize(
3461 expr: Expression,
3462 source: DialectType,
3463 target: DialectType,
3464 ) -> Result<Expression> {
3465 use crate::expressions::{
3466 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3467 Function, Identifier, IsNull, Literal, Null, Paren,
3468 };
3469
3470 // Helper to tag which kind of transform to apply
3471 #[derive(Debug)]
3472 enum Action {
3473 None,
3474 GreatestLeastNull,
3475 ArrayGenerateRange,
3476 Div0TypedDivision,
3477 ArrayAggCollectList,
3478 ArrayAggWithinGroupFilter,
3479 ArrayAggFilter,
3480 CastTimestampToDatetime,
3481 DateTruncWrapCast,
3482 ToDateToCast,
3483 ConvertTimezoneToExpr,
3484 SetToVariable,
3485 RegexpReplaceSnowflakeToDuckDB,
3486 BigQueryFunctionNormalize,
3487 BigQuerySafeDivide,
3488 BigQueryCastType,
3489 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3490 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3491 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3492 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3493 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3494 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3495 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3496 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3497 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3498 EpochConvert, // Expression::Epoch -> target-specific epoch function
3499 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3500 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3501 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3502 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3503 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3504 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3505 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3506 TempTableHash, // TSQL #table -> temp table normalization
3507 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3508 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3509 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3510 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3511 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3512 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3513 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3514 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3515 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3516 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3517 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3518 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3519 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3520 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3521 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3522 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3523 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3524 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3525 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3526 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3527 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3528 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3529 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3530 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3531 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3532 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3533 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3534 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3535 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3536 DollarParamConvert, // $foo -> @foo for BigQuery
3537 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3538 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3539 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3540 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3541 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3542 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3543 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3544 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3545 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3546 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3547 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3548 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3549 RespectNullsConvert, // RESPECT NULLS window function handling
3550 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3551 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3552 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3553 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3554 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3555 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3556 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3557 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3558 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3559 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3560 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3561 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3562 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3563 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3564 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3565 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3566 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3567 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3568 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3569 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3570 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3571 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3572 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3573 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3574 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3575 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3576 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3577 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3578 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3579 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3580 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3581 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3582 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3583 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3584 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3585 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3586 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3587 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3588 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3589 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3590 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3591 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3592 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3593 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3594 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3595 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3596 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3597 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3598 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3599 ArraySumConvert, // ARRAY_SUM -> target-specific
3600 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3601 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3602 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3603 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3604 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3605 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3606 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3607 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3608 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3609 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3610 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3611 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3612 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3613 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3614 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3615 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3616 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3617 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3618 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3619 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3620 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3621 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3622 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3623 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3624 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3625 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3626 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3627 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3628 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3629 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3630 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3631 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3632 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3633 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3634 }
3635
3636 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3637 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3638 Self::transform_select_into(expr, source, target)
3639 } else {
3640 expr
3641 };
3642
3643 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3644 let expr = if !matches!(
3645 target,
3646 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3647 ) {
3648 if let Expression::Select(mut select) = expr {
3649 if let Some(ref mut offset) = select.offset {
3650 offset.rows = None;
3651 }
3652 Expression::Select(select)
3653 } else {
3654 expr
3655 }
3656 } else {
3657 expr
3658 };
3659
3660 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3661 let expr = if matches!(target, DialectType::Oracle) {
3662 if let Expression::Select(mut select) = expr {
3663 if let Some(limit) = select.limit.take() {
3664 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3665 select.fetch = Some(crate::expressions::Fetch {
3666 direction: "FIRST".to_string(),
3667 count: Some(limit.this),
3668 percent: false,
3669 rows: true,
3670 with_ties: false,
3671 });
3672 }
3673 // Add ROWS to OFFSET if present
3674 if let Some(ref mut offset) = select.offset {
3675 offset.rows = Some(true);
3676 }
3677 Expression::Select(select)
3678 } else {
3679 expr
3680 }
3681 } else {
3682 expr
3683 };
3684
3685 // Handle CreateTable WITH properties transformation before recursive transforms
3686 let expr = if let Expression::CreateTable(mut ct) = expr {
3687 Self::transform_create_table_properties(&mut ct, source, target);
3688
3689 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3690 // When the PARTITIONED BY clause contains column definitions, merge them into the
3691 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3692 if matches!(
3693 source,
3694 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3695 ) {
3696 let mut partition_col_names: Vec<String> = Vec::new();
3697 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3698 let mut has_col_def_partitions = false;
3699
3700 // Check if any PARTITIONED BY property contains ColumnDef expressions
3701 for prop in &ct.properties {
3702 if let Expression::PartitionedByProperty(ref pbp) = prop {
3703 if let Expression::Tuple(ref tuple) = *pbp.this {
3704 for expr in &tuple.expressions {
3705 if let Expression::ColumnDef(ref cd) = expr {
3706 has_col_def_partitions = true;
3707 partition_col_names.push(cd.name.name.clone());
3708 partition_col_defs.push(*cd.clone());
3709 }
3710 }
3711 }
3712 }
3713 }
3714
3715 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3716 // Merge partition columns into main column list
3717 for cd in partition_col_defs {
3718 ct.columns.push(cd);
3719 }
3720
3721 // Replace PARTITIONED BY property with column-name-only version
3722 ct.properties
3723 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3724
3725 if matches!(
3726 target,
3727 DialectType::Presto | DialectType::Trino | DialectType::Athena
3728 ) {
3729 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3730 let array_elements: Vec<String> = partition_col_names
3731 .iter()
3732 .map(|n| format!("'{}'", n))
3733 .collect();
3734 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3735 ct.with_properties
3736 .push(("PARTITIONED_BY".to_string(), array_value));
3737 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3738 // Spark: PARTITIONED BY (y, z) - just column names
3739 let name_exprs: Vec<Expression> = partition_col_names
3740 .iter()
3741 .map(|n| {
3742 Expression::Column(crate::expressions::Column {
3743 name: crate::expressions::Identifier::new(n.clone()),
3744 table: None,
3745 join_mark: false,
3746 trailing_comments: Vec::new(),
3747 })
3748 })
3749 .collect();
3750 ct.properties.insert(
3751 0,
3752 Expression::PartitionedByProperty(Box::new(
3753 crate::expressions::PartitionedByProperty {
3754 this: Box::new(Expression::Tuple(Box::new(
3755 crate::expressions::Tuple {
3756 expressions: name_exprs,
3757 },
3758 ))),
3759 },
3760 )),
3761 );
3762 }
3763 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3764 }
3765
3766 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3767 // are handled by transform_create_table_properties which runs first
3768 }
3769
3770 // Strip LOCATION property for Presto/Trino (not supported)
3771 if matches!(
3772 target,
3773 DialectType::Presto | DialectType::Trino | DialectType::Athena
3774 ) {
3775 ct.properties
3776 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3777 }
3778
3779 // Strip table-level constraints for Spark/Hive/Databricks
3780 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3781 if matches!(
3782 target,
3783 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3784 ) {
3785 ct.constraints.retain(|c| {
3786 matches!(
3787 c,
3788 crate::expressions::TableConstraint::PrimaryKey { .. }
3789 | crate::expressions::TableConstraint::Like { .. }
3790 )
3791 });
3792 for constraint in &mut ct.constraints {
3793 if let crate::expressions::TableConstraint::PrimaryKey {
3794 columns,
3795 modifiers,
3796 ..
3797 } = constraint
3798 {
3799 // Strip ASC/DESC from column names
3800 for col in columns.iter_mut() {
3801 if col.name.ends_with(" ASC") {
3802 col.name = col.name[..col.name.len() - 4].to_string();
3803 } else if col.name.ends_with(" DESC") {
3804 col.name = col.name[..col.name.len() - 5].to_string();
3805 }
3806 }
3807 // Strip TSQL-specific modifiers
3808 modifiers.clustered = None;
3809 modifiers.with_options.clear();
3810 modifiers.on_filegroup = None;
3811 }
3812 }
3813 }
3814
3815 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3816 if matches!(target, DialectType::Databricks) {
3817 for col in &mut ct.columns {
3818 if col.auto_increment {
3819 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3820 col.data_type = crate::expressions::DataType::BigInt { length: None };
3821 }
3822 }
3823 }
3824 }
3825
3826 // Spark/Databricks: INTEGER -> INT in column definitions
3827 // Python sqlglot always outputs INT for Spark/Databricks
3828 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3829 for col in &mut ct.columns {
3830 if let crate::expressions::DataType::Int {
3831 integer_spelling, ..
3832 } = &mut col.data_type
3833 {
3834 *integer_spelling = false;
3835 }
3836 }
3837 }
3838
3839 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3840 if matches!(target, DialectType::Hive | DialectType::Spark) {
3841 for col in &mut ct.columns {
3842 // If nullable is explicitly true (NULL), change to None (omit it)
3843 if col.nullable == Some(true) {
3844 col.nullable = None;
3845 }
3846 // Also remove from constraints if stored there
3847 col.constraints
3848 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3849 }
3850 }
3851
3852 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3853 if ct.on_property.is_some()
3854 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3855 {
3856 ct.on_property = None;
3857 }
3858
3859 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3860 // Snowflake doesn't support typed arrays in DDL
3861 if matches!(target, DialectType::Snowflake) {
3862 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3863 if let crate::expressions::DataType::Array { .. } = dt {
3864 *dt = crate::expressions::DataType::Custom {
3865 name: "ARRAY".to_string(),
3866 };
3867 }
3868 }
3869 for col in &mut ct.columns {
3870 strip_array_type_params(&mut col.data_type);
3871 }
3872 }
3873
3874 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3875 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3876 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3877 if matches!(target, DialectType::PostgreSQL) {
3878 for col in &mut ct.columns {
3879 if col.auto_increment && !col.constraint_order.is_empty() {
3880 use crate::expressions::ConstraintType;
3881 let has_explicit_not_null = col
3882 .constraint_order
3883 .iter()
3884 .any(|ct| *ct == ConstraintType::NotNull);
3885
3886 if has_explicit_not_null {
3887 // Source had explicit NOT NULL - preserve original order
3888 // Just ensure nullable is set
3889 if col.nullable != Some(false) {
3890 col.nullable = Some(false);
3891 }
3892 } else {
3893 // Source didn't have explicit NOT NULL - build order with
3894 // AutoIncrement + NotNull first, then remaining constraints
3895 let mut new_order = Vec::new();
3896 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3897 new_order.push(ConstraintType::AutoIncrement);
3898 new_order.push(ConstraintType::NotNull);
3899 // Add remaining constraints in original order (except AutoIncrement)
3900 for ct_type in &col.constraint_order {
3901 if *ct_type != ConstraintType::AutoIncrement {
3902 new_order.push(ct_type.clone());
3903 }
3904 }
3905 col.constraint_order = new_order;
3906 col.nullable = Some(false);
3907 }
3908 }
3909 }
3910 }
3911
3912 Expression::CreateTable(ct)
3913 } else {
3914 expr
3915 };
3916
3917 // Handle CreateView column stripping for Presto/Trino target
3918 let expr = if let Expression::CreateView(mut cv) = expr {
3919 // Presto/Trino: drop column list when view has a SELECT body
3920 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
3921 {
3922 if !matches!(&cv.query, Expression::Null(_)) {
3923 cv.columns.clear();
3924 }
3925 }
3926 Expression::CreateView(cv)
3927 } else {
3928 expr
3929 };
3930
3931 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
3932 let expr = if !matches!(
3933 target,
3934 DialectType::Presto | DialectType::Trino | DialectType::Athena
3935 ) {
3936 if let Expression::Select(mut select) = expr {
3937 if let Some(ref mut with) = select.with {
3938 for cte in &mut with.ctes {
3939 if let Expression::Values(ref vals) = cte.this {
3940 // Build: SELECT * FROM (VALUES ...) AS _values
3941 let values_subquery =
3942 Expression::Subquery(Box::new(crate::expressions::Subquery {
3943 this: Expression::Values(vals.clone()),
3944 alias: Some(Identifier::new("_values".to_string())),
3945 column_aliases: Vec::new(),
3946 order_by: None,
3947 limit: None,
3948 offset: None,
3949 distribute_by: None,
3950 sort_by: None,
3951 cluster_by: None,
3952 lateral: false,
3953 modifiers_inside: false,
3954 trailing_comments: Vec::new(),
3955 }));
3956 let mut new_select = crate::expressions::Select::new();
3957 new_select.expressions =
3958 vec![Expression::Star(crate::expressions::Star {
3959 table: None,
3960 except: None,
3961 replace: None,
3962 rename: None,
3963 trailing_comments: Vec::new(),
3964 })];
3965 new_select.from = Some(crate::expressions::From {
3966 expressions: vec![values_subquery],
3967 });
3968 cte.this = Expression::Select(Box::new(new_select));
3969 }
3970 }
3971 }
3972 Expression::Select(select)
3973 } else {
3974 expr
3975 }
3976 } else {
3977 expr
3978 };
3979
3980 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
3981 let expr = if matches!(target, DialectType::PostgreSQL) {
3982 if let Expression::CreateIndex(mut ci) = expr {
3983 for col in &mut ci.columns {
3984 if col.nulls_first.is_none() {
3985 col.nulls_first = Some(true);
3986 }
3987 }
3988 Expression::CreateIndex(ci)
3989 } else {
3990 expr
3991 }
3992 } else {
3993 expr
3994 };
3995
3996 transform_recursive(expr, &|e| {
3997 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
3998 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
3999 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4000 if let Expression::Cast(ref c) = e {
4001 // Check if this is a CAST of an array to a struct array type
4002 let is_struct_array_cast =
4003 matches!(&c.to, crate::expressions::DataType::Array { .. });
4004 if is_struct_array_cast {
4005 let has_auto_named_structs = match &c.this {
4006 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4007 if let Expression::Struct(s) = elem {
4008 s.fields.iter().all(|(name, _)| {
4009 name.as_ref().map_or(true, |n| {
4010 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4011 })
4012 })
4013 } else {
4014 false
4015 }
4016 }),
4017 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4018 if let Expression::Struct(s) = elem {
4019 s.fields.iter().all(|(name, _)| {
4020 name.as_ref().map_or(true, |n| {
4021 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4022 })
4023 })
4024 } else {
4025 false
4026 }
4027 }),
4028 _ => false,
4029 };
4030 if has_auto_named_structs {
4031 let convert_struct_to_row = |elem: Expression| -> Expression {
4032 if let Expression::Struct(s) = elem {
4033 let row_args: Vec<Expression> =
4034 s.fields.into_iter().map(|(_, v)| v).collect();
4035 Expression::Function(Box::new(Function::new(
4036 "ROW".to_string(),
4037 row_args,
4038 )))
4039 } else {
4040 elem
4041 }
4042 };
4043 let mut c_clone = c.as_ref().clone();
4044 match &mut c_clone.this {
4045 Expression::Array(arr) => {
4046 arr.expressions = arr
4047 .expressions
4048 .drain(..)
4049 .map(convert_struct_to_row)
4050 .collect();
4051 }
4052 Expression::ArrayFunc(arr) => {
4053 arr.expressions = arr
4054 .expressions
4055 .drain(..)
4056 .map(convert_struct_to_row)
4057 .collect();
4058 }
4059 _ => {}
4060 }
4061 return Ok(Expression::Cast(Box::new(c_clone)));
4062 }
4063 }
4064 }
4065 }
4066
4067 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4068 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4069 if let Expression::Select(ref sel) = e {
4070 if sel.kind.as_deref() == Some("STRUCT") {
4071 let mut fields = Vec::new();
4072 for expr in &sel.expressions {
4073 match expr {
4074 Expression::Alias(a) => {
4075 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4076 }
4077 Expression::Column(c) => {
4078 fields.push((Some(c.name.name.clone()), expr.clone()));
4079 }
4080 _ => {
4081 fields.push((None, expr.clone()));
4082 }
4083 }
4084 }
4085 let struct_lit =
4086 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4087 let mut new_select = sel.as_ref().clone();
4088 new_select.kind = None;
4089 new_select.expressions = vec![struct_lit];
4090 return Ok(Expression::Select(Box::new(new_select)));
4091 }
4092 }
4093 }
4094
4095 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4096 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4097 && matches!(
4098 target,
4099 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4100 )
4101 {
4102 if let Expression::Parameter(ref p) = e {
4103 if p.style == crate::expressions::ParameterStyle::At {
4104 if let Some(ref name) = p.name {
4105 return Ok(Expression::Parameter(Box::new(
4106 crate::expressions::Parameter {
4107 name: Some(name.clone()),
4108 index: p.index,
4109 style: crate::expressions::ParameterStyle::DollarBrace,
4110 quoted: p.quoted,
4111 string_quoted: p.string_quoted,
4112 expression: None,
4113 },
4114 )));
4115 }
4116 }
4117 }
4118 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4119 if let Expression::Column(ref col) = e {
4120 if col.name.name.starts_with('@') && col.table.is_none() {
4121 let var_name = col.name.name.trim_start_matches('@').to_string();
4122 return Ok(Expression::Parameter(Box::new(
4123 crate::expressions::Parameter {
4124 name: Some(var_name),
4125 index: None,
4126 style: crate::expressions::ParameterStyle::DollarBrace,
4127 quoted: false,
4128 string_quoted: false,
4129 expression: None,
4130 },
4131 )));
4132 }
4133 }
4134 }
4135
4136 // Convert @variable -> variable in SET statements for Spark/Databricks
4137 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4138 && matches!(target, DialectType::Spark | DialectType::Databricks)
4139 {
4140 if let Expression::SetStatement(ref s) = e {
4141 let mut new_items = s.items.clone();
4142 let mut changed = false;
4143 for item in &mut new_items {
4144 // Strip @ from the SET name (Parameter style)
4145 if let Expression::Parameter(ref p) = item.name {
4146 if p.style == crate::expressions::ParameterStyle::At {
4147 if let Some(ref name) = p.name {
4148 item.name = Expression::Identifier(Identifier::new(name));
4149 changed = true;
4150 }
4151 }
4152 }
4153 // Strip @ from the SET name (Identifier style - SET parser)
4154 if let Expression::Identifier(ref id) = item.name {
4155 if id.name.starts_with('@') {
4156 let var_name = id.name.trim_start_matches('@').to_string();
4157 item.name = Expression::Identifier(Identifier::new(&var_name));
4158 changed = true;
4159 }
4160 }
4161 // Strip @ from the SET name (Column style - alternative parsing)
4162 if let Expression::Column(ref col) = item.name {
4163 if col.name.name.starts_with('@') && col.table.is_none() {
4164 let var_name = col.name.name.trim_start_matches('@').to_string();
4165 item.name = Expression::Identifier(Identifier::new(&var_name));
4166 changed = true;
4167 }
4168 }
4169 }
4170 if changed {
4171 let mut new_set = (**s).clone();
4172 new_set.items = new_items;
4173 return Ok(Expression::SetStatement(Box::new(new_set)));
4174 }
4175 }
4176 }
4177
4178 // Strip NOLOCK hint for non-TSQL targets
4179 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4180 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4181 {
4182 if let Expression::Table(ref tr) = e {
4183 if !tr.hints.is_empty() {
4184 let mut new_tr = tr.clone();
4185 new_tr.hints.clear();
4186 return Ok(Expression::Table(new_tr));
4187 }
4188 }
4189 }
4190
4191 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4192 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4193 if matches!(target, DialectType::Snowflake) {
4194 if let Expression::IsTrue(ref itf) = e {
4195 if let Expression::Boolean(ref b) = itf.this {
4196 if !itf.not {
4197 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4198 value: b.value,
4199 }));
4200 } else {
4201 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4202 value: !b.value,
4203 }));
4204 }
4205 }
4206 }
4207 if let Expression::IsFalse(ref itf) = e {
4208 if let Expression::Boolean(ref b) = itf.this {
4209 if !itf.not {
4210 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4211 value: !b.value,
4212 }));
4213 } else {
4214 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4215 value: b.value,
4216 }));
4217 }
4218 }
4219 }
4220 }
4221
4222 // BigQuery: split dotted backtick identifiers in table names
4223 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4224 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4225 if let Expression::CreateTable(ref ct) = e {
4226 let mut changed = false;
4227 let mut new_ct = ct.clone();
4228 // Split the table name
4229 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4230 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4231 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4232 let was_quoted = ct.name.name.quoted;
4233 let mk_id = |s: &str| {
4234 if was_quoted {
4235 Identifier::quoted(s)
4236 } else {
4237 Identifier::new(s)
4238 }
4239 };
4240 if parts.len() == 3 {
4241 new_ct.name.catalog = Some(mk_id(parts[0]));
4242 new_ct.name.schema = Some(mk_id(parts[1]));
4243 new_ct.name.name = mk_id(parts[2]);
4244 changed = true;
4245 } else if parts.len() == 2 {
4246 new_ct.name.schema = Some(mk_id(parts[0]));
4247 new_ct.name.name = mk_id(parts[1]);
4248 changed = true;
4249 }
4250 }
4251 // Split the clone source name
4252 if let Some(ref clone_src) = ct.clone_source {
4253 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4254 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4255 let was_quoted = clone_src.name.quoted;
4256 let mk_id = |s: &str| {
4257 if was_quoted {
4258 Identifier::quoted(s)
4259 } else {
4260 Identifier::new(s)
4261 }
4262 };
4263 let mut new_src = clone_src.clone();
4264 if parts.len() == 3 {
4265 new_src.catalog = Some(mk_id(parts[0]));
4266 new_src.schema = Some(mk_id(parts[1]));
4267 new_src.name = mk_id(parts[2]);
4268 new_ct.clone_source = Some(new_src);
4269 changed = true;
4270 } else if parts.len() == 2 {
4271 new_src.schema = Some(mk_id(parts[0]));
4272 new_src.name = mk_id(parts[1]);
4273 new_ct.clone_source = Some(new_src);
4274 changed = true;
4275 }
4276 }
4277 }
4278 if changed {
4279 return Ok(Expression::CreateTable(new_ct));
4280 }
4281 }
4282 }
4283
4284 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4285 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4286 if matches!(source, DialectType::BigQuery)
4287 && matches!(
4288 target,
4289 DialectType::DuckDB
4290 | DialectType::Presto
4291 | DialectType::Trino
4292 | DialectType::Athena
4293 )
4294 {
4295 if let Expression::Subscript(ref sub) = e {
4296 let (new_index, is_safe) = match &sub.index {
4297 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4298 Expression::Literal(Literal::Number(n)) => {
4299 if let Ok(val) = n.parse::<i64>() {
4300 (
4301 Some(Expression::Literal(Literal::Number(
4302 (val + 1).to_string(),
4303 ))),
4304 false,
4305 )
4306 } else {
4307 (None, false)
4308 }
4309 }
4310 // OFFSET(n) -> n+1 (0-based)
4311 Expression::Function(ref f)
4312 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4313 {
4314 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4315 if let Ok(val) = n.parse::<i64>() {
4316 (
4317 Some(Expression::Literal(Literal::Number(
4318 (val + 1).to_string(),
4319 ))),
4320 false,
4321 )
4322 } else {
4323 (
4324 Some(Expression::Add(Box::new(
4325 crate::expressions::BinaryOp::new(
4326 f.args[0].clone(),
4327 Expression::number(1),
4328 ),
4329 ))),
4330 false,
4331 )
4332 }
4333 } else {
4334 (
4335 Some(Expression::Add(Box::new(
4336 crate::expressions::BinaryOp::new(
4337 f.args[0].clone(),
4338 Expression::number(1),
4339 ),
4340 ))),
4341 false,
4342 )
4343 }
4344 }
4345 // ORDINAL(n) -> n (already 1-based)
4346 Expression::Function(ref f)
4347 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4348 {
4349 (Some(f.args[0].clone()), false)
4350 }
4351 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4352 Expression::Function(ref f)
4353 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4354 {
4355 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4356 if let Ok(val) = n.parse::<i64>() {
4357 (
4358 Some(Expression::Literal(Literal::Number(
4359 (val + 1).to_string(),
4360 ))),
4361 true,
4362 )
4363 } else {
4364 (
4365 Some(Expression::Add(Box::new(
4366 crate::expressions::BinaryOp::new(
4367 f.args[0].clone(),
4368 Expression::number(1),
4369 ),
4370 ))),
4371 true,
4372 )
4373 }
4374 } else {
4375 (
4376 Some(Expression::Add(Box::new(
4377 crate::expressions::BinaryOp::new(
4378 f.args[0].clone(),
4379 Expression::number(1),
4380 ),
4381 ))),
4382 true,
4383 )
4384 }
4385 }
4386 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4387 Expression::Function(ref f)
4388 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4389 {
4390 (Some(f.args[0].clone()), true)
4391 }
4392 _ => (None, false),
4393 };
4394 if let Some(idx) = new_index {
4395 if is_safe
4396 && matches!(
4397 target,
4398 DialectType::Presto | DialectType::Trino | DialectType::Athena
4399 )
4400 {
4401 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4402 return Ok(Expression::Function(Box::new(Function::new(
4403 "ELEMENT_AT".to_string(),
4404 vec![sub.this.clone(), idx],
4405 ))));
4406 } else {
4407 // DuckDB or non-safe: just use subscript with converted index
4408 return Ok(Expression::Subscript(Box::new(
4409 crate::expressions::Subscript {
4410 this: sub.this.clone(),
4411 index: idx,
4412 },
4413 )));
4414 }
4415 }
4416 }
4417 }
4418
4419 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4420 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4421 if let Expression::Length(ref uf) = e {
4422 let arg = uf.this.clone();
4423 let typeof_func = Expression::Function(Box::new(Function::new(
4424 "TYPEOF".to_string(),
4425 vec![arg.clone()],
4426 )));
4427 let blob_cast = Expression::Cast(Box::new(Cast {
4428 this: arg.clone(),
4429 to: DataType::VarBinary { length: None },
4430 trailing_comments: vec![],
4431 double_colon_syntax: false,
4432 format: None,
4433 default: None,
4434 }));
4435 let octet_length = Expression::Function(Box::new(Function::new(
4436 "OCTET_LENGTH".to_string(),
4437 vec![blob_cast],
4438 )));
4439 let text_cast = Expression::Cast(Box::new(Cast {
4440 this: arg,
4441 to: DataType::Text,
4442 trailing_comments: vec![],
4443 double_colon_syntax: false,
4444 format: None,
4445 default: None,
4446 }));
4447 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4448 this: text_cast,
4449 original_name: None,
4450 }));
4451 return Ok(Expression::Case(Box::new(Case {
4452 operand: Some(typeof_func),
4453 whens: vec![(
4454 Expression::Literal(Literal::String("BLOB".to_string())),
4455 octet_length,
4456 )],
4457 else_: Some(length_text),
4458 comments: Vec::new(),
4459 })));
4460 }
4461 }
4462
4463 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4464 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4465 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4466 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4467 if let Expression::Alias(ref a) = e {
4468 if matches!(&a.this, Expression::Unnest(_)) {
4469 if a.column_aliases.is_empty() {
4470 // Drop the entire alias, return just the UNNEST expression
4471 return Ok(a.this.clone());
4472 } else {
4473 // Use first column alias as the main alias
4474 let mut new_alias = a.as_ref().clone();
4475 new_alias.alias = a.column_aliases[0].clone();
4476 new_alias.column_aliases.clear();
4477 return Ok(Expression::Alias(Box::new(new_alias)));
4478 }
4479 }
4480 }
4481 }
4482
4483 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4484 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4485 if let Expression::In(ref in_expr) = e {
4486 if let Some(ref unnest_inner) = in_expr.unnest {
4487 // Build the function call for the target dialect
4488 let func_expr = if matches!(
4489 target,
4490 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4491 ) {
4492 // Use EXPLODE for Hive/Spark
4493 Expression::Function(Box::new(Function::new(
4494 "EXPLODE".to_string(),
4495 vec![*unnest_inner.clone()],
4496 )))
4497 } else {
4498 // Use UNNEST for Presto/Trino/DuckDB/etc.
4499 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4500 this: *unnest_inner.clone(),
4501 expressions: Vec::new(),
4502 with_ordinality: false,
4503 alias: None,
4504 offset_alias: None,
4505 }))
4506 };
4507
4508 // Wrap in SELECT
4509 let mut inner_select = crate::expressions::Select::new();
4510 inner_select.expressions = vec![func_expr];
4511
4512 let subquery_expr = Expression::Select(Box::new(inner_select));
4513
4514 return Ok(Expression::In(Box::new(crate::expressions::In {
4515 this: in_expr.this.clone(),
4516 expressions: Vec::new(),
4517 query: Some(subquery_expr),
4518 not: in_expr.not,
4519 global: in_expr.global,
4520 unnest: None,
4521 is_field: false,
4522 })));
4523 }
4524 }
4525 }
4526
4527 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4528 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4529 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4530 if let Expression::Alias(ref a) = e {
4531 if let Expression::Function(ref f) = a.this {
4532 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4533 && !a.column_aliases.is_empty()
4534 {
4535 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4536 let col_alias = a.column_aliases[0].clone();
4537 let mut inner_select = crate::expressions::Select::new();
4538 inner_select.expressions =
4539 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4540 Expression::Identifier(Identifier::new("value".to_string())),
4541 col_alias,
4542 )))];
4543 inner_select.from = Some(crate::expressions::From {
4544 expressions: vec![a.this.clone()],
4545 });
4546 let subquery =
4547 Expression::Subquery(Box::new(crate::expressions::Subquery {
4548 this: Expression::Select(Box::new(inner_select)),
4549 alias: Some(a.alias.clone()),
4550 column_aliases: Vec::new(),
4551 order_by: None,
4552 limit: None,
4553 offset: None,
4554 lateral: false,
4555 modifiers_inside: false,
4556 trailing_comments: Vec::new(),
4557 distribute_by: None,
4558 sort_by: None,
4559 cluster_by: None,
4560 }));
4561 return Ok(subquery);
4562 }
4563 }
4564 }
4565 }
4566
4567 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4568 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4569 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4570 if matches!(source, DialectType::BigQuery) {
4571 if let Expression::Select(ref s) = e {
4572 if let Some(ref from) = s.from {
4573 if from.expressions.len() >= 2 {
4574 // Collect table names from first expression
4575 let first_tables: Vec<String> = from
4576 .expressions
4577 .iter()
4578 .take(1)
4579 .filter_map(|expr| {
4580 if let Expression::Table(t) = expr {
4581 Some(t.name.name.to_lowercase())
4582 } else {
4583 None
4584 }
4585 })
4586 .collect();
4587
4588 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4589 // or have a dotted name matching a table
4590 let mut needs_rewrite = false;
4591 for expr in from.expressions.iter().skip(1) {
4592 if let Expression::Table(t) = expr {
4593 if let Some(ref schema) = t.schema {
4594 if first_tables.contains(&schema.name.to_lowercase()) {
4595 needs_rewrite = true;
4596 break;
4597 }
4598 }
4599 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4600 if t.schema.is_none() && t.name.name.contains('.') {
4601 let parts: Vec<&str> = t.name.name.split('.').collect();
4602 if parts.len() >= 2
4603 && first_tables.contains(&parts[0].to_lowercase())
4604 {
4605 needs_rewrite = true;
4606 break;
4607 }
4608 }
4609 }
4610 }
4611
4612 if needs_rewrite {
4613 let mut new_select = s.clone();
4614 let mut new_from_exprs = vec![from.expressions[0].clone()];
4615 let mut new_joins = s.joins.clone();
4616
4617 for expr in from.expressions.iter().skip(1) {
4618 if let Expression::Table(ref t) = expr {
4619 if let Some(ref schema) = t.schema {
4620 if first_tables.contains(&schema.name.to_lowercase()) {
4621 // This is an array path reference, convert to CROSS JOIN UNNEST
4622 let col_expr = Expression::Column(
4623 crate::expressions::Column {
4624 name: t.name.clone(),
4625 table: Some(schema.clone()),
4626 join_mark: false,
4627 trailing_comments: vec![],
4628 },
4629 );
4630 let unnest_expr = Expression::Unnest(Box::new(
4631 crate::expressions::UnnestFunc {
4632 this: col_expr,
4633 expressions: Vec::new(),
4634 with_ordinality: false,
4635 alias: None,
4636 offset_alias: None,
4637 },
4638 ));
4639 let join_this = if let Some(ref alias) = t.alias {
4640 if matches!(
4641 target,
4642 DialectType::Presto
4643 | DialectType::Trino
4644 | DialectType::Athena
4645 ) {
4646 // Presto: UNNEST(x) AS _t0(results)
4647 Expression::Alias(Box::new(
4648 crate::expressions::Alias {
4649 this: unnest_expr,
4650 alias: Identifier::new("_t0"),
4651 column_aliases: vec![alias.clone()],
4652 pre_alias_comments: vec![],
4653 trailing_comments: vec![],
4654 },
4655 ))
4656 } else {
4657 // BigQuery: UNNEST(x) AS results
4658 Expression::Alias(Box::new(
4659 crate::expressions::Alias {
4660 this: unnest_expr,
4661 alias: alias.clone(),
4662 column_aliases: vec![],
4663 pre_alias_comments: vec![],
4664 trailing_comments: vec![],
4665 },
4666 ))
4667 }
4668 } else {
4669 unnest_expr
4670 };
4671 new_joins.push(crate::expressions::Join {
4672 kind: crate::expressions::JoinKind::Cross,
4673 this: join_this,
4674 on: None,
4675 using: Vec::new(),
4676 use_inner_keyword: false,
4677 use_outer_keyword: false,
4678 deferred_condition: false,
4679 join_hint: None,
4680 match_condition: None,
4681 pivots: Vec::new(),
4682 comments: Vec::new(),
4683 nesting_group: 0,
4684 directed: false,
4685 });
4686 } else {
4687 new_from_exprs.push(expr.clone());
4688 }
4689 } else if t.schema.is_none() && t.name.name.contains('.') {
4690 // Dotted name in quoted identifier: `Coordinates.position`
4691 let parts: Vec<&str> = t.name.name.split('.').collect();
4692 if parts.len() >= 2
4693 && first_tables.contains(&parts[0].to_lowercase())
4694 {
4695 let join_this =
4696 if matches!(target, DialectType::BigQuery) {
4697 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4698 Expression::Table(t.clone())
4699 } else {
4700 // Other targets: split into "schema"."name"
4701 let mut new_t = t.clone();
4702 new_t.schema =
4703 Some(Identifier::quoted(parts[0]));
4704 new_t.name = Identifier::quoted(parts[1]);
4705 Expression::Table(new_t)
4706 };
4707 new_joins.push(crate::expressions::Join {
4708 kind: crate::expressions::JoinKind::Cross,
4709 this: join_this,
4710 on: None,
4711 using: Vec::new(),
4712 use_inner_keyword: false,
4713 use_outer_keyword: false,
4714 deferred_condition: false,
4715 join_hint: None,
4716 match_condition: None,
4717 pivots: Vec::new(),
4718 comments: Vec::new(),
4719 nesting_group: 0,
4720 directed: false,
4721 });
4722 } else {
4723 new_from_exprs.push(expr.clone());
4724 }
4725 } else {
4726 new_from_exprs.push(expr.clone());
4727 }
4728 } else {
4729 new_from_exprs.push(expr.clone());
4730 }
4731 }
4732
4733 new_select.from = Some(crate::expressions::From {
4734 expressions: new_from_exprs,
4735 ..from.clone()
4736 });
4737 new_select.joins = new_joins;
4738 return Ok(Expression::Select(new_select));
4739 }
4740 }
4741 }
4742 }
4743 }
4744
4745 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4746 if matches!(
4747 target,
4748 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4749 ) {
4750 if let Expression::Select(ref s) = e {
4751 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4752 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4753 matches!(expr, Expression::Unnest(_))
4754 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4755 };
4756 let has_unnest_join = s.joins.iter().any(|j| {
4757 j.kind == crate::expressions::JoinKind::Cross && (
4758 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4759 || is_unnest_or_explode_expr(&j.this)
4760 )
4761 });
4762 if has_unnest_join {
4763 let mut select = s.clone();
4764 let mut new_joins = Vec::new();
4765 for join in select.joins.drain(..) {
4766 if join.kind == crate::expressions::JoinKind::Cross {
4767 // Extract the UNNEST/EXPLODE from the join
4768 let (func_expr, table_alias, col_aliases) = match &join.this {
4769 Expression::Alias(a) => {
4770 let ta = if a.alias.is_empty() {
4771 None
4772 } else {
4773 Some(a.alias.clone())
4774 };
4775 let cas = a.column_aliases.clone();
4776 match &a.this {
4777 Expression::Unnest(u) => {
4778 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4779 if !u.expressions.is_empty() {
4780 let mut all_args = vec![u.this.clone()];
4781 all_args.extend(u.expressions.clone());
4782 let arrays_zip =
4783 Expression::Function(Box::new(
4784 crate::expressions::Function::new(
4785 "ARRAYS_ZIP".to_string(),
4786 all_args,
4787 ),
4788 ));
4789 let inline = Expression::Function(Box::new(
4790 crate::expressions::Function::new(
4791 "INLINE".to_string(),
4792 vec![arrays_zip],
4793 ),
4794 ));
4795 (Some(inline), ta, a.column_aliases.clone())
4796 } else {
4797 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4798 let func_name = if u.with_ordinality {
4799 "POSEXPLODE"
4800 } else {
4801 "EXPLODE"
4802 };
4803 let explode = Expression::Function(Box::new(
4804 crate::expressions::Function::new(
4805 func_name.to_string(),
4806 vec![u.this.clone()],
4807 ),
4808 ));
4809 // For POSEXPLODE, add 'pos' to column aliases
4810 let cas = if u.with_ordinality {
4811 let mut pos_aliases =
4812 vec![Identifier::new(
4813 "pos".to_string(),
4814 )];
4815 pos_aliases
4816 .extend(a.column_aliases.clone());
4817 pos_aliases
4818 } else {
4819 a.column_aliases.clone()
4820 };
4821 (Some(explode), ta, cas)
4822 }
4823 }
4824 Expression::Function(f)
4825 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4826 {
4827 (Some(Expression::Function(f.clone())), ta, cas)
4828 }
4829 _ => (None, None, Vec::new()),
4830 }
4831 }
4832 Expression::Unnest(u) => {
4833 let func_name = if u.with_ordinality {
4834 "POSEXPLODE"
4835 } else {
4836 "EXPLODE"
4837 };
4838 let explode = Expression::Function(Box::new(
4839 crate::expressions::Function::new(
4840 func_name.to_string(),
4841 vec![u.this.clone()],
4842 ),
4843 ));
4844 let ta = u.alias.clone();
4845 let col_aliases = if u.with_ordinality {
4846 vec![Identifier::new("pos".to_string())]
4847 } else {
4848 Vec::new()
4849 };
4850 (Some(explode), ta, col_aliases)
4851 }
4852 _ => (None, None, Vec::new()),
4853 };
4854 if let Some(func) = func_expr {
4855 select.lateral_views.push(crate::expressions::LateralView {
4856 this: func,
4857 table_alias,
4858 column_aliases: col_aliases,
4859 outer: false,
4860 });
4861 } else {
4862 new_joins.push(join);
4863 }
4864 } else {
4865 new_joins.push(join);
4866 }
4867 }
4868 select.joins = new_joins;
4869 return Ok(Expression::Select(select));
4870 }
4871 }
4872 }
4873
4874 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4875 // for BigQuery, Presto/Trino, Snowflake
4876 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4877 && matches!(
4878 target,
4879 DialectType::BigQuery
4880 | DialectType::Presto
4881 | DialectType::Trino
4882 | DialectType::Snowflake
4883 )
4884 {
4885 if let Expression::Select(ref s) = e {
4886 // Check if any SELECT expressions contain UNNEST
4887 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4888 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4889 fn contains_unnest(e: &Expression) -> bool {
4890 match e {
4891 Expression::Unnest(_) => true,
4892 Expression::Function(f)
4893 if f.name.eq_ignore_ascii_case("UNNEST") =>
4894 {
4895 true
4896 }
4897 Expression::Alias(a) => contains_unnest(&a.this),
4898 Expression::Add(op)
4899 | Expression::Sub(op)
4900 | Expression::Mul(op)
4901 | Expression::Div(op) => {
4902 contains_unnest(&op.left) || contains_unnest(&op.right)
4903 }
4904 _ => false,
4905 }
4906 }
4907 contains_unnest(expr)
4908 });
4909
4910 if has_unnest_in_select {
4911 let rewritten = Self::rewrite_unnest_expansion(s, target);
4912 if let Some(new_select) = rewritten {
4913 return Ok(Expression::Select(Box::new(new_select)));
4914 }
4915 }
4916 }
4917 }
4918
4919 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
4920 // BigQuery '\n' -> PostgreSQL literal newline in string
4921 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
4922 {
4923 if let Expression::Literal(Literal::String(ref s)) = e {
4924 if s.contains("\\n")
4925 || s.contains("\\t")
4926 || s.contains("\\r")
4927 || s.contains("\\\\")
4928 {
4929 let converted = s
4930 .replace("\\n", "\n")
4931 .replace("\\t", "\t")
4932 .replace("\\r", "\r")
4933 .replace("\\\\", "\\");
4934 return Ok(Expression::Literal(Literal::String(converted)));
4935 }
4936 }
4937 }
4938
4939 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
4940 // when source != target (identity tests keep the Literal::Timestamp for native handling)
4941 if source != target {
4942 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
4943 let s = s.clone();
4944 // MySQL: TIMESTAMP handling depends on source dialect
4945 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
4946 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
4947 if matches!(target, DialectType::MySQL) {
4948 if matches!(source, DialectType::BigQuery) {
4949 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
4950 return Ok(Expression::Function(Box::new(Function::new(
4951 "TIMESTAMP".to_string(),
4952 vec![Expression::Literal(Literal::String(s))],
4953 ))));
4954 } else {
4955 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
4956 return Ok(Expression::Cast(Box::new(Cast {
4957 this: Expression::Literal(Literal::String(s)),
4958 to: DataType::Custom {
4959 name: "DATETIME".to_string(),
4960 },
4961 trailing_comments: Vec::new(),
4962 double_colon_syntax: false,
4963 format: None,
4964 default: None,
4965 })));
4966 }
4967 }
4968 let dt = match target {
4969 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
4970 name: "DATETIME".to_string(),
4971 },
4972 DialectType::Snowflake => {
4973 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
4974 if matches!(source, DialectType::BigQuery) {
4975 DataType::Custom {
4976 name: "TIMESTAMPTZ".to_string(),
4977 }
4978 } else if matches!(
4979 source,
4980 DialectType::PostgreSQL
4981 | DialectType::Redshift
4982 | DialectType::Snowflake
4983 ) {
4984 DataType::Timestamp {
4985 precision: None,
4986 timezone: false,
4987 }
4988 } else {
4989 DataType::Custom {
4990 name: "TIMESTAMPNTZ".to_string(),
4991 }
4992 }
4993 }
4994 DialectType::Spark | DialectType::Databricks => {
4995 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
4996 if matches!(source, DialectType::BigQuery) {
4997 DataType::Timestamp {
4998 precision: None,
4999 timezone: false,
5000 }
5001 } else {
5002 DataType::Custom {
5003 name: "TIMESTAMP_NTZ".to_string(),
5004 }
5005 }
5006 }
5007 DialectType::ClickHouse => DataType::Nullable {
5008 inner: Box::new(DataType::Custom {
5009 name: "DateTime".to_string(),
5010 }),
5011 },
5012 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5013 name: "DATETIME2".to_string(),
5014 },
5015 DialectType::DuckDB => {
5016 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5017 // or when the timestamp string explicitly has timezone info
5018 if matches!(source, DialectType::BigQuery)
5019 || Self::timestamp_string_has_timezone(&s)
5020 {
5021 DataType::Custom {
5022 name: "TIMESTAMPTZ".to_string(),
5023 }
5024 } else {
5025 DataType::Timestamp {
5026 precision: None,
5027 timezone: false,
5028 }
5029 }
5030 }
5031 _ => DataType::Timestamp {
5032 precision: None,
5033 timezone: false,
5034 },
5035 };
5036 return Ok(Expression::Cast(Box::new(Cast {
5037 this: Expression::Literal(Literal::String(s)),
5038 to: dt,
5039 trailing_comments: vec![],
5040 double_colon_syntax: false,
5041 format: None,
5042 default: None,
5043 })));
5044 }
5045 }
5046
5047 // PostgreSQL DELETE requires explicit AS for table aliases
5048 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5049 if let Expression::Delete(ref del) = e {
5050 if del.alias.is_some() && !del.alias_explicit_as {
5051 let mut new_del = del.clone();
5052 new_del.alias_explicit_as = true;
5053 return Ok(Expression::Delete(new_del));
5054 }
5055 }
5056 }
5057
5058 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5059 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5060 // while others don't support it (Presto, Spark, DuckDB, etc.)
5061 {
5062 let needs_distinct =
5063 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5064 let drop_distinct = matches!(
5065 target,
5066 DialectType::Presto
5067 | DialectType::Trino
5068 | DialectType::Athena
5069 | DialectType::Spark
5070 | DialectType::Databricks
5071 | DialectType::DuckDB
5072 | DialectType::Hive
5073 | DialectType::MySQL
5074 | DialectType::PostgreSQL
5075 | DialectType::SQLite
5076 | DialectType::TSQL
5077 | DialectType::Redshift
5078 | DialectType::Snowflake
5079 | DialectType::Oracle
5080 | DialectType::Teradata
5081 | DialectType::Drill
5082 | DialectType::Doris
5083 | DialectType::StarRocks
5084 );
5085 match &e {
5086 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5087 let mut new_u = (**u).clone();
5088 new_u.distinct = true;
5089 return Ok(Expression::Union(Box::new(new_u)));
5090 }
5091 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5092 let mut new_i = (**i).clone();
5093 new_i.distinct = true;
5094 return Ok(Expression::Intersect(Box::new(new_i)));
5095 }
5096 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5097 let mut new_ex = (**ex).clone();
5098 new_ex.distinct = true;
5099 return Ok(Expression::Except(Box::new(new_ex)));
5100 }
5101 Expression::Union(u) if u.distinct && drop_distinct => {
5102 let mut new_u = (**u).clone();
5103 new_u.distinct = false;
5104 return Ok(Expression::Union(Box::new(new_u)));
5105 }
5106 Expression::Intersect(i) if i.distinct && drop_distinct => {
5107 let mut new_i = (**i).clone();
5108 new_i.distinct = false;
5109 return Ok(Expression::Intersect(Box::new(new_i)));
5110 }
5111 Expression::Except(ex) if ex.distinct && drop_distinct => {
5112 let mut new_ex = (**ex).clone();
5113 new_ex.distinct = false;
5114 return Ok(Expression::Except(Box::new(new_ex)));
5115 }
5116 _ => {}
5117 }
5118 }
5119
5120 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5121 if matches!(target, DialectType::ClickHouse) {
5122 if let Expression::Function(ref f) = e {
5123 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5124 let mut new_f = f.as_ref().clone();
5125 new_f.name = "map".to_string();
5126 return Ok(Expression::Function(Box::new(new_f)));
5127 }
5128 }
5129 }
5130
5131 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5132 if matches!(target, DialectType::ClickHouse) {
5133 if let Expression::Intersect(ref i) = e {
5134 if i.all {
5135 let mut new_i = (**i).clone();
5136 new_i.all = false;
5137 return Ok(Expression::Intersect(Box::new(new_i)));
5138 }
5139 }
5140 }
5141
5142 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5143 // Only from Generic source, to prevent double-wrapping
5144 if matches!(source, DialectType::Generic) {
5145 if let Expression::Div(ref op) = e {
5146 let cast_type = match target {
5147 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5148 precision: None,
5149 scale: None,
5150 real_spelling: false,
5151 }),
5152 DialectType::Drill
5153 | DialectType::Trino
5154 | DialectType::Athena
5155 | DialectType::Presto => Some(DataType::Double {
5156 precision: None,
5157 scale: None,
5158 }),
5159 DialectType::PostgreSQL
5160 | DialectType::Redshift
5161 | DialectType::Materialize
5162 | DialectType::Teradata
5163 | DialectType::RisingWave => Some(DataType::Double {
5164 precision: None,
5165 scale: None,
5166 }),
5167 _ => None,
5168 };
5169 if let Some(dt) = cast_type {
5170 let cast_left = Expression::Cast(Box::new(Cast {
5171 this: op.left.clone(),
5172 to: dt,
5173 double_colon_syntax: false,
5174 trailing_comments: Vec::new(),
5175 format: None,
5176 default: None,
5177 }));
5178 let new_op = crate::expressions::BinaryOp {
5179 left: cast_left,
5180 right: op.right.clone(),
5181 left_comments: op.left_comments.clone(),
5182 operator_comments: op.operator_comments.clone(),
5183 trailing_comments: op.trailing_comments.clone(),
5184 };
5185 return Ok(Expression::Div(Box::new(new_op)));
5186 }
5187 }
5188 }
5189
5190 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5191 if matches!(target, DialectType::DuckDB) {
5192 if let Expression::CreateDatabase(db) = e {
5193 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5194 schema.if_not_exists = db.if_not_exists;
5195 return Ok(Expression::CreateSchema(Box::new(schema)));
5196 }
5197 if let Expression::DropDatabase(db) = e {
5198 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5199 schema.if_exists = db.if_exists;
5200 return Ok(Expression::DropSchema(Box::new(schema)));
5201 }
5202 }
5203
5204 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5205 if matches!(source, DialectType::ClickHouse)
5206 && !matches!(target, DialectType::ClickHouse)
5207 {
5208 if let Expression::Cast(ref c) = e {
5209 if let DataType::Custom { ref name } = c.to {
5210 let upper = name.to_uppercase();
5211 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5212 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5213 let inner_upper = inner.to_uppercase();
5214 let new_dt = match inner_upper.as_str() {
5215 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5216 precision: None,
5217 timezone: false,
5218 },
5219 "DATE" => DataType::Date,
5220 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5221 "INT32" | "INT" | "INTEGER" => DataType::Int {
5222 length: None,
5223 integer_spelling: false,
5224 },
5225 "FLOAT64" | "DOUBLE" => DataType::Double {
5226 precision: None,
5227 scale: None,
5228 },
5229 "STRING" => DataType::Text,
5230 _ => DataType::Custom {
5231 name: inner.to_string(),
5232 },
5233 };
5234 let mut new_cast = c.clone();
5235 new_cast.to = new_dt;
5236 return Ok(Expression::Cast(new_cast));
5237 }
5238 }
5239 }
5240 }
5241
5242 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5243 if matches!(target, DialectType::Snowflake) {
5244 if let Expression::ArrayConcatAgg(ref agg) = e {
5245 let mut agg_clone = agg.as_ref().clone();
5246 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5247 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5248 let flatten = Expression::Function(Box::new(Function::new(
5249 "ARRAY_FLATTEN".to_string(),
5250 vec![array_agg],
5251 )));
5252 return Ok(flatten);
5253 }
5254 }
5255
5256 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5257 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5258 if let Expression::ArrayConcatAgg(agg) = e {
5259 let arg = agg.this;
5260 return Ok(Expression::Function(Box::new(Function::new(
5261 "ARRAY_CONCAT_AGG".to_string(),
5262 vec![arg],
5263 ))));
5264 }
5265 }
5266
5267 // Determine what action to take by inspecting e immutably
5268 let action = {
5269 let source_propagates_nulls =
5270 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5271 let target_ignores_nulls =
5272 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5273
5274 match &e {
5275 Expression::Function(f) => {
5276 let name = f.name.to_uppercase();
5277 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5278 if (name == "DATE_PART" || name == "DATEPART")
5279 && f.args.len() == 2
5280 && matches!(target, DialectType::Snowflake)
5281 && !matches!(source, DialectType::Snowflake)
5282 && matches!(
5283 &f.args[0],
5284 Expression::Literal(crate::expressions::Literal::String(_))
5285 )
5286 {
5287 Action::DatePartUnquote
5288 } else if source_propagates_nulls
5289 && target_ignores_nulls
5290 && (name == "GREATEST" || name == "LEAST")
5291 && f.args.len() >= 2
5292 {
5293 Action::GreatestLeastNull
5294 } else if matches!(source, DialectType::Snowflake)
5295 && name == "ARRAY_GENERATE_RANGE"
5296 && f.args.len() >= 2
5297 {
5298 Action::ArrayGenerateRange
5299 } else if matches!(source, DialectType::Snowflake)
5300 && matches!(target, DialectType::DuckDB)
5301 && name == "DATE_TRUNC"
5302 && f.args.len() == 2
5303 {
5304 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5305 // Logic based on Python sqlglot's input_type_preserved flag:
5306 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5307 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5308 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5309 let unit_str = match &f.args[0] {
5310 Expression::Literal(crate::expressions::Literal::String(s)) => {
5311 Some(s.to_uppercase())
5312 }
5313 _ => None,
5314 };
5315 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5316 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5317 });
5318 match &f.args[1] {
5319 Expression::Cast(c) => match &c.to {
5320 DataType::Time { .. } => Action::DateTruncWrapCast,
5321 DataType::Custom { name }
5322 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5323 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5324 {
5325 Action::DateTruncWrapCast
5326 }
5327 DataType::Timestamp { timezone: true, .. } => {
5328 Action::DateTruncWrapCast
5329 }
5330 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5331 DataType::Timestamp {
5332 timezone: false, ..
5333 } if is_date_unit => Action::DateTruncWrapCast,
5334 _ => Action::None,
5335 },
5336 _ => Action::None,
5337 }
5338 } else if matches!(source, DialectType::Snowflake)
5339 && matches!(target, DialectType::DuckDB)
5340 && name == "TO_DATE"
5341 && f.args.len() == 1
5342 && !matches!(
5343 &f.args[0],
5344 Expression::Literal(crate::expressions::Literal::String(_))
5345 )
5346 {
5347 Action::ToDateToCast
5348 } else if !matches!(source, DialectType::Redshift)
5349 && matches!(target, DialectType::Redshift)
5350 && name == "CONVERT_TIMEZONE"
5351 && (f.args.len() == 2 || f.args.len() == 3)
5352 {
5353 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5354 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5355 // The Redshift parser adds 'UTC' as default source_tz, but when
5356 // transpiling from other dialects, we should preserve the original form.
5357 Action::ConvertTimezoneToExpr
5358 } else if matches!(source, DialectType::Snowflake)
5359 && matches!(target, DialectType::DuckDB)
5360 && name == "REGEXP_REPLACE"
5361 && f.args.len() == 4
5362 && !matches!(
5363 &f.args[3],
5364 Expression::Literal(crate::expressions::Literal::String(_))
5365 )
5366 {
5367 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5368 Action::RegexpReplaceSnowflakeToDuckDB
5369 } else if name == "_BQ_TO_HEX" {
5370 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5371 Action::BigQueryToHexBare
5372 } else if matches!(source, DialectType::BigQuery)
5373 && !matches!(target, DialectType::BigQuery)
5374 {
5375 // BigQuery-specific functions that need to be converted to standard forms
5376 match name.as_str() {
5377 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5378 | "DATE_DIFF"
5379 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5380 | "DATETIME_ADD" | "DATETIME_SUB"
5381 | "TIME_ADD" | "TIME_SUB"
5382 | "DATE_ADD" | "DATE_SUB"
5383 | "SAFE_DIVIDE"
5384 | "GENERATE_UUID"
5385 | "COUNTIF"
5386 | "EDIT_DISTANCE"
5387 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5388 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5389 | "TO_HEX"
5390 | "TO_JSON_STRING"
5391 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5392 | "DIV"
5393 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5394 | "LAST_DAY"
5395 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5396 | "REGEXP_CONTAINS"
5397 | "CONTAINS_SUBSTR"
5398 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5399 | "SAFE_CAST"
5400 | "GENERATE_DATE_ARRAY"
5401 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5402 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5403 | "ARRAY_CONCAT"
5404 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5405 | "INSTR"
5406 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5407 | "GENERATE_UUID()" // just in case
5408 | "REGEXP_EXTRACT_ALL"
5409 | "REGEXP_EXTRACT"
5410 | "INT64"
5411 | "ARRAY_CONCAT_AGG"
5412 | "DATE_DIFF(" // just in case
5413 | "TO_HEX_MD5" // internal
5414 | "MOD"
5415 | "CONCAT"
5416 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5417 | "STRUCT"
5418 | "ROUND"
5419 | "MAKE_INTERVAL"
5420 | "ARRAY_TO_STRING"
5421 | "PERCENTILE_CONT"
5422 => Action::BigQueryFunctionNormalize,
5423 "ARRAY" if matches!(target, DialectType::Snowflake)
5424 && f.args.len() == 1
5425 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5426 => Action::BigQueryArraySelectAsStructToSnowflake,
5427 _ => Action::None,
5428 }
5429 } else if matches!(source, DialectType::BigQuery)
5430 && matches!(target, DialectType::BigQuery)
5431 {
5432 // BigQuery -> BigQuery normalizations
5433 match name.as_str() {
5434 "TIMESTAMP_DIFF"
5435 | "DATETIME_DIFF"
5436 | "TIME_DIFF"
5437 | "DATE_DIFF"
5438 | "DATE_ADD"
5439 | "TO_HEX"
5440 | "CURRENT_TIMESTAMP"
5441 | "CURRENT_DATE"
5442 | "CURRENT_TIME"
5443 | "CURRENT_DATETIME"
5444 | "GENERATE_DATE_ARRAY"
5445 | "INSTR"
5446 | "FORMAT_DATETIME"
5447 | "DATETIME"
5448 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5449 _ => Action::None,
5450 }
5451 } else {
5452 // Generic function normalization for non-BigQuery sources
5453 match name.as_str() {
5454 "ARBITRARY" | "AGGREGATE"
5455 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5456 | "STRUCT_EXTRACT"
5457 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5458 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5459 | "SUBSTRINGINDEX"
5460 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5461 | "UNICODE"
5462 | "XOR"
5463 | "ARRAY_REVERSE_SORT"
5464 | "ENCODE" | "DECODE"
5465 | "QUANTILE"
5466 | "EPOCH" | "EPOCH_MS"
5467 | "HASHBYTES"
5468 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5469 | "APPROX_DISTINCT"
5470 | "DATE_PARSE" | "FORMAT_DATETIME"
5471 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5472 | "RLIKE"
5473 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5474 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5475 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5476 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5477 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5478 | "MAP" | "MAP_FROM_ENTRIES"
5479 | "COLLECT_LIST" | "COLLECT_SET"
5480 | "ISNAN" | "IS_NAN"
5481 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5482 | "FORMAT_NUMBER"
5483 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5484 | "ELEMENT_AT"
5485 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5486 | "SPLIT_PART"
5487 // GENERATE_SERIES: handled separately below
5488 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5489 | "JSON_QUERY" | "JSON_VALUE"
5490 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5491 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5492 | "CURDATE" | "CURTIME"
5493 | "ARRAY_TO_STRING"
5494 | "ARRAY_SORT" | "SORT_ARRAY"
5495 | "LEFT" | "RIGHT"
5496 | "MAP_FROM_ARRAYS"
5497 | "LIKE" | "ILIKE"
5498 | "ARRAY_CONCAT" | "LIST_CONCAT"
5499 | "QUANTILE_CONT" | "QUANTILE_DISC"
5500 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5501 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5502 | "LOCATE" | "STRPOS" | "INSTR"
5503 | "CHAR"
5504 // CONCAT: handled separately for COALESCE wrapping
5505 | "ARRAY_JOIN"
5506 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5507 | "ISNULL"
5508 | "MONTHNAME"
5509 | "TO_TIMESTAMP"
5510 | "TO_DATE"
5511 | "TO_JSON"
5512 | "REGEXP_SPLIT"
5513 | "SPLIT"
5514 | "FORMATDATETIME"
5515 | "ARRAYJOIN"
5516 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5517 | "NVL"
5518 | "TO_CHAR"
5519 | "DBMS_RANDOM.VALUE"
5520 | "REGEXP_LIKE"
5521 | "REPLICATE"
5522 | "LEN"
5523 | "COUNT_BIG"
5524 | "DATEFROMPARTS"
5525 | "DATETIMEFROMPARTS"
5526 | "CONVERT" | "TRY_CONVERT"
5527 | "STRFTIME" | "STRPTIME"
5528 | "DATE_FORMAT" | "FORMAT_DATE"
5529 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5530 | "FROM_BASE64" | "TO_BASE64"
5531 | "GETDATE"
5532 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5533 | "TO_UTF8" | "FROM_UTF8"
5534 | "STARTS_WITH" | "STARTSWITH"
5535 | "APPROX_COUNT_DISTINCT"
5536 | "JSON_FORMAT"
5537 | "SYSDATE"
5538 | "LOGICAL_OR" | "LOGICAL_AND"
5539 | "MONTHS_ADD"
5540 | "SCHEMA_NAME"
5541 | "STRTOL"
5542 | "EDITDIST3"
5543 | "FORMAT"
5544 | "LIST_CONTAINS" | "LIST_HAS"
5545 | "VARIANCE" | "STDDEV"
5546 | "ISINF"
5547 | "TO_UNIXTIME"
5548 | "FROM_UNIXTIME"
5549 | "DATEPART" | "DATE_PART"
5550 | "DATENAME"
5551 | "STRING_AGG"
5552 | "JSON_ARRAYAGG"
5553 | "APPROX_QUANTILE"
5554 | "MAKE_DATE"
5555 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5556 | "RANGE"
5557 | "TRY_ELEMENT_AT"
5558 | "STR_TO_MAP"
5559 | "STRING"
5560 | "STR_TO_TIME"
5561 | "CURRENT_SCHEMA"
5562 | "LTRIM" | "RTRIM"
5563 | "UUID"
5564 | "FARM_FINGERPRINT"
5565 | "JSON_KEYS"
5566 | "WEEKOFYEAR"
5567 | "CONCAT_WS"
5568 | "ARRAY_SLICE"
5569 | "ARRAY_PREPEND"
5570 | "ARRAY_REMOVE"
5571 | "GENERATE_DATE_ARRAY"
5572 | "PARSE_JSON"
5573 | "JSON_REMOVE"
5574 | "JSON_SET"
5575 | "LEVENSHTEIN"
5576 => Action::GenericFunctionNormalize,
5577 // Canonical date functions -> dialect-specific
5578 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5579 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5580 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5581 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5582 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5583 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5584 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5585 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5586 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5587 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5588 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5589 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5590 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5591 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5592 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5593 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5594 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5595 // STR_TO_DATE(x, fmt) -> dialect-specific
5596 "STR_TO_DATE" if f.args.len() == 2
5597 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5598 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5599 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5600 "TS_OR_DS_ADD" if f.args.len() == 3
5601 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5602 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5603 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5604 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5605 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5606 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5607 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5608 // IS_ASCII(x) -> dialect-specific
5609 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5610 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5611 "STR_POSITION" => Action::StrPositionConvert,
5612 // ARRAY_SUM -> dialect-specific
5613 "ARRAY_SUM" => Action::ArraySumConvert,
5614 // ARRAY_SIZE -> dialect-specific (Drill only)
5615 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5616 // ARRAY_ANY -> dialect-specific
5617 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5618 // Functions needing specific cross-dialect transforms
5619 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5620 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5621 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5622 "ARRAY" if matches!(source, DialectType::BigQuery)
5623 && matches!(target, DialectType::Snowflake)
5624 && f.args.len() == 1
5625 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5626 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5627 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5628 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5629 "DATE_TRUNC" if f.args.len() == 2
5630 && matches!(source, DialectType::Generic)
5631 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5632 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5633 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5634 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5635 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5636 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5637 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5638 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5639 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5640 // GENERATE_SERIES with interval normalization for PG target
5641 "GENERATE_SERIES" if f.args.len() >= 3
5642 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5643 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5644 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5645 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5646 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5647 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5648 "CONCAT" => Action::GenericFunctionNormalize,
5649 // DIV(a, b) -> target-specific integer division
5650 "DIV" if f.args.len() == 2
5651 && matches!(source, DialectType::PostgreSQL)
5652 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5653 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5654 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5655 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5656 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5657 "JSONB_EXISTS" if f.args.len() == 2
5658 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5659 // DATE_BIN -> TIME_BUCKET for DuckDB
5660 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5661 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5662 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5663 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5664 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5665 // ClickHouse any -> ANY_VALUE for other dialects
5666 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5667 _ => Action::None,
5668 }
5669 }
5670 }
5671 Expression::AggregateFunction(af) => {
5672 let name = af.name.to_uppercase();
5673 match name.as_str() {
5674 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5675 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5676 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5677 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5678 if matches!(target, DialectType::DuckDB) =>
5679 {
5680 Action::JsonObjectAggConvert
5681 }
5682 "ARRAY_AGG"
5683 if matches!(
5684 target,
5685 DialectType::Hive
5686 | DialectType::Spark
5687 | DialectType::Databricks
5688 ) =>
5689 {
5690 Action::ArrayAggToCollectList
5691 }
5692 "MAX_BY" | "MIN_BY"
5693 if matches!(
5694 target,
5695 DialectType::ClickHouse
5696 | DialectType::Spark
5697 | DialectType::Databricks
5698 | DialectType::DuckDB
5699 ) =>
5700 {
5701 Action::MaxByMinByConvert
5702 }
5703 "COLLECT_LIST"
5704 if matches!(
5705 target,
5706 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5707 ) =>
5708 {
5709 Action::CollectListToArrayAgg
5710 }
5711 "COLLECT_SET"
5712 if matches!(
5713 target,
5714 DialectType::Presto
5715 | DialectType::Trino
5716 | DialectType::Snowflake
5717 | DialectType::DuckDB
5718 ) =>
5719 {
5720 Action::CollectSetConvert
5721 }
5722 "PERCENTILE"
5723 if matches!(
5724 target,
5725 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5726 ) =>
5727 {
5728 Action::PercentileConvert
5729 }
5730 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5731 "CORR"
5732 if matches!(target, DialectType::DuckDB)
5733 && matches!(source, DialectType::Snowflake) =>
5734 {
5735 Action::CorrIsnanWrap
5736 }
5737 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5738 "APPROX_QUANTILES"
5739 if matches!(source, DialectType::BigQuery)
5740 && matches!(target, DialectType::DuckDB) =>
5741 {
5742 Action::BigQueryApproxQuantiles
5743 }
5744 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5745 "PERCENTILE_CONT"
5746 if matches!(source, DialectType::BigQuery)
5747 && matches!(target, DialectType::DuckDB)
5748 && af.args.len() >= 2 =>
5749 {
5750 Action::BigQueryPercentileContToDuckDB
5751 }
5752 _ => Action::None,
5753 }
5754 }
5755 Expression::JSONArrayAgg(_) => match target {
5756 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5757 _ => Action::None,
5758 },
5759 Expression::ToNumber(tn) => {
5760 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5761 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5762 match target {
5763 DialectType::Oracle
5764 | DialectType::Snowflake
5765 | DialectType::Teradata => Action::None,
5766 _ => Action::GenericFunctionNormalize,
5767 }
5768 } else {
5769 Action::None
5770 }
5771 }
5772 Expression::Nvl2(_) => {
5773 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5774 // Keep as NVL2 for dialects that support it natively
5775 match target {
5776 DialectType::Oracle
5777 | DialectType::Snowflake
5778 | DialectType::Teradata
5779 | DialectType::Spark
5780 | DialectType::Databricks
5781 | DialectType::Redshift => Action::None,
5782 _ => Action::Nvl2Expand,
5783 }
5784 }
5785 Expression::Decode(_) | Expression::DecodeCase(_) => {
5786 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5787 // Keep as DECODE for Oracle/Snowflake
5788 match target {
5789 DialectType::Oracle | DialectType::Snowflake => Action::None,
5790 _ => Action::DecodeSimplify,
5791 }
5792 }
5793 Expression::Coalesce(ref cf) => {
5794 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5795 // BigQuery keeps IFNULL natively when source is also BigQuery
5796 if cf.original_name.as_deref() == Some("IFNULL")
5797 && !(matches!(source, DialectType::BigQuery)
5798 && matches!(target, DialectType::BigQuery))
5799 {
5800 Action::IfnullToCoalesce
5801 } else {
5802 Action::None
5803 }
5804 }
5805 Expression::IfFunc(if_func) => {
5806 if matches!(source, DialectType::Snowflake)
5807 && matches!(
5808 target,
5809 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5810 )
5811 && matches!(if_func.false_value, Some(Expression::Div(_)))
5812 {
5813 Action::Div0TypedDivision
5814 } else {
5815 Action::None
5816 }
5817 }
5818 Expression::ToJson(_) => match target {
5819 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5820 DialectType::BigQuery => Action::ToJsonConvert,
5821 DialectType::DuckDB => Action::ToJsonConvert,
5822 _ => Action::None,
5823 },
5824 Expression::ArrayAgg(ref agg) => {
5825 if matches!(
5826 target,
5827 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5828 ) {
5829 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5830 Action::ArrayAggToCollectList
5831 } else if matches!(
5832 source,
5833 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5834 ) && matches!(target, DialectType::DuckDB)
5835 && agg.filter.is_some()
5836 {
5837 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5838 // Need to add NOT x IS NULL to existing filter
5839 Action::ArrayAggNullFilter
5840 } else if matches!(target, DialectType::DuckDB)
5841 && agg.ignore_nulls == Some(true)
5842 && !agg.order_by.is_empty()
5843 {
5844 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5845 Action::ArrayAggIgnoreNullsDuckDB
5846 } else if !matches!(source, DialectType::Snowflake) {
5847 Action::None
5848 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5849 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5850 == Some("ARRAY_AGG".to_string())
5851 || agg.name.is_none();
5852 if is_array_agg {
5853 Action::ArrayAggCollectList
5854 } else {
5855 Action::None
5856 }
5857 } else if matches!(
5858 target,
5859 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5860 ) && agg.filter.is_none()
5861 {
5862 Action::ArrayAggFilter
5863 } else {
5864 Action::None
5865 }
5866 }
5867 Expression::WithinGroup(wg) => {
5868 if matches!(source, DialectType::Snowflake)
5869 && matches!(
5870 target,
5871 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5872 )
5873 && matches!(wg.this, Expression::ArrayAgg(_))
5874 {
5875 Action::ArrayAggWithinGroupFilter
5876 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5877 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5878 || matches!(&wg.this, Expression::StringAgg(_))
5879 {
5880 Action::StringAggConvert
5881 } else if matches!(
5882 target,
5883 DialectType::Presto
5884 | DialectType::Trino
5885 | DialectType::Athena
5886 | DialectType::Spark
5887 | DialectType::Databricks
5888 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5889 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5890 || matches!(&wg.this, Expression::PercentileCont(_)))
5891 {
5892 Action::PercentileContConvert
5893 } else {
5894 Action::None
5895 }
5896 }
5897 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5898 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5899 // DATETIME is the timezone-unaware type
5900 Expression::Cast(ref c) => {
5901 if c.format.is_some()
5902 && (matches!(source, DialectType::BigQuery)
5903 || matches!(source, DialectType::Teradata))
5904 {
5905 Action::BigQueryCastFormat
5906 } else if matches!(target, DialectType::BigQuery)
5907 && !matches!(source, DialectType::BigQuery)
5908 && matches!(
5909 c.to,
5910 DataType::Timestamp {
5911 timezone: false,
5912 ..
5913 }
5914 )
5915 {
5916 Action::CastTimestampToDatetime
5917 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
5918 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
5919 && matches!(
5920 c.to,
5921 DataType::Timestamp {
5922 timezone: false,
5923 ..
5924 }
5925 )
5926 {
5927 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5928 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
5929 Action::CastTimestampToDatetime
5930 } else if matches!(
5931 source,
5932 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5933 ) && matches!(
5934 target,
5935 DialectType::Presto
5936 | DialectType::Trino
5937 | DialectType::Athena
5938 | DialectType::DuckDB
5939 | DialectType::Snowflake
5940 | DialectType::BigQuery
5941 | DialectType::Databricks
5942 | DialectType::TSQL
5943 ) {
5944 Action::HiveCastToTryCast
5945 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5946 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
5947 {
5948 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
5949 Action::CastTimestamptzToFunc
5950 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5951 && matches!(
5952 target,
5953 DialectType::Hive
5954 | DialectType::Spark
5955 | DialectType::Databricks
5956 | DialectType::BigQuery
5957 )
5958 {
5959 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5960 Action::CastTimestampStripTz
5961 } else if matches!(&c.to, DataType::Json)
5962 && matches!(&c.this, Expression::Literal(Literal::String(_)))
5963 && matches!(
5964 target,
5965 DialectType::Presto
5966 | DialectType::Trino
5967 | DialectType::Athena
5968 | DialectType::Snowflake
5969 )
5970 {
5971 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
5972 // Only when the input is a string literal (JSON 'value' syntax)
5973 Action::JsonLiteralToJsonParse
5974 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
5975 && matches!(target, DialectType::Spark | DialectType::Databricks)
5976 {
5977 // CAST(x AS JSON) -> TO_JSON(x) for Spark
5978 Action::CastToJsonForSpark
5979 } else if (matches!(
5980 &c.to,
5981 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
5982 )) && matches!(
5983 target,
5984 DialectType::Spark | DialectType::Databricks
5985 ) && (matches!(&c.this, Expression::ParseJson(_))
5986 || matches!(
5987 &c.this,
5988 Expression::Function(f)
5989 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
5990 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
5991 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
5992 ))
5993 {
5994 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
5995 // -> FROM_JSON(..., type_string) for Spark
5996 Action::CastJsonToFromJson
5997 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
5998 && matches!(
5999 c.to,
6000 DataType::Timestamp {
6001 timezone: false,
6002 ..
6003 }
6004 )
6005 && matches!(source, DialectType::DuckDB)
6006 {
6007 Action::StrftimeCastTimestamp
6008 } else if matches!(source, DialectType::DuckDB)
6009 && matches!(
6010 c.to,
6011 DataType::Decimal {
6012 precision: None,
6013 ..
6014 }
6015 )
6016 {
6017 Action::DecimalDefaultPrecision
6018 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6019 && matches!(c.to, DataType::Char { length: None })
6020 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6021 {
6022 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6023 Action::MysqlCastCharToText
6024 } else if matches!(
6025 source,
6026 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6027 ) && matches!(
6028 target,
6029 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6030 ) && Self::has_varchar_char_type(&c.to)
6031 {
6032 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6033 Action::SparkCastVarcharToString
6034 } else {
6035 Action::None
6036 }
6037 }
6038 Expression::SafeCast(ref c) => {
6039 if c.format.is_some()
6040 && matches!(source, DialectType::BigQuery)
6041 && !matches!(target, DialectType::BigQuery)
6042 {
6043 Action::BigQueryCastFormat
6044 } else {
6045 Action::None
6046 }
6047 }
6048 // For DuckDB: DATE_TRUNC should preserve the input type
6049 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6050 if matches!(source, DialectType::Snowflake)
6051 && matches!(target, DialectType::DuckDB)
6052 {
6053 Action::DateTruncWrapCast
6054 } else {
6055 Action::None
6056 }
6057 }
6058 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6059 Expression::SetStatement(s) => {
6060 if matches!(target, DialectType::DuckDB)
6061 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6062 && s.items.iter().any(|item| item.kind.is_none())
6063 {
6064 Action::SetToVariable
6065 } else {
6066 Action::None
6067 }
6068 }
6069 // Cross-dialect NULL ordering normalization.
6070 // When nulls_first is not specified, fill in the source dialect's implied
6071 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6072 Expression::Ordered(o) => {
6073 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6074 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6075 Action::MysqlNullsOrdering
6076 } else {
6077 // Skip targets that don't support NULLS FIRST/LAST syntax
6078 let target_supports_nulls = !matches!(
6079 target,
6080 DialectType::MySQL
6081 | DialectType::TSQL
6082 | DialectType::StarRocks
6083 | DialectType::Doris
6084 );
6085 if o.nulls_first.is_none() && source != target && target_supports_nulls
6086 {
6087 Action::NullsOrdering
6088 } else {
6089 Action::None
6090 }
6091 }
6092 }
6093 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6094 Expression::DataType(dt) => {
6095 if matches!(source, DialectType::BigQuery)
6096 && !matches!(target, DialectType::BigQuery)
6097 {
6098 match dt {
6099 DataType::Custom { ref name }
6100 if name.eq_ignore_ascii_case("INT64")
6101 || name.eq_ignore_ascii_case("FLOAT64")
6102 || name.eq_ignore_ascii_case("BOOL")
6103 || name.eq_ignore_ascii_case("BYTES")
6104 || name.eq_ignore_ascii_case("NUMERIC")
6105 || name.eq_ignore_ascii_case("STRING")
6106 || name.eq_ignore_ascii_case("DATETIME") =>
6107 {
6108 Action::BigQueryCastType
6109 }
6110 _ => Action::None,
6111 }
6112 } else if matches!(source, DialectType::TSQL) {
6113 // For TSQL source -> any target (including TSQL itself for REAL)
6114 match dt {
6115 // REAL -> FLOAT even for TSQL->TSQL
6116 DataType::Custom { ref name }
6117 if name.eq_ignore_ascii_case("REAL") =>
6118 {
6119 Action::TSQLTypeNormalize
6120 }
6121 DataType::Float {
6122 real_spelling: true,
6123 ..
6124 } => Action::TSQLTypeNormalize,
6125 // Other TSQL type normalizations only for non-TSQL targets
6126 DataType::Custom { ref name }
6127 if !matches!(target, DialectType::TSQL)
6128 && (name.eq_ignore_ascii_case("MONEY")
6129 || name.eq_ignore_ascii_case("SMALLMONEY")
6130 || name.eq_ignore_ascii_case("DATETIME2")
6131 || name.eq_ignore_ascii_case("IMAGE")
6132 || name.eq_ignore_ascii_case("BIT")
6133 || name.eq_ignore_ascii_case("ROWVERSION")
6134 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6135 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6136 || name.to_uppercase().starts_with("NUMERIC")
6137 || name.to_uppercase().starts_with("DATETIME2(")
6138 || name.to_uppercase().starts_with("TIME(")) =>
6139 {
6140 Action::TSQLTypeNormalize
6141 }
6142 DataType::Float {
6143 precision: Some(_), ..
6144 } if !matches!(target, DialectType::TSQL) => {
6145 Action::TSQLTypeNormalize
6146 }
6147 DataType::TinyInt { .. }
6148 if !matches!(target, DialectType::TSQL) =>
6149 {
6150 Action::TSQLTypeNormalize
6151 }
6152 // INTEGER -> INT for Databricks/Spark targets
6153 DataType::Int {
6154 integer_spelling: true,
6155 ..
6156 } if matches!(
6157 target,
6158 DialectType::Databricks | DialectType::Spark
6159 ) =>
6160 {
6161 Action::TSQLTypeNormalize
6162 }
6163 _ => Action::None,
6164 }
6165 } else if (matches!(source, DialectType::Oracle)
6166 || matches!(source, DialectType::Generic))
6167 && !matches!(target, DialectType::Oracle)
6168 {
6169 match dt {
6170 DataType::Custom { ref name }
6171 if name.to_uppercase().starts_with("VARCHAR2(")
6172 || name.to_uppercase().starts_with("NVARCHAR2(")
6173 || name.eq_ignore_ascii_case("VARCHAR2")
6174 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6175 {
6176 Action::OracleVarchar2ToVarchar
6177 }
6178 _ => Action::None,
6179 }
6180 } else if matches!(target, DialectType::Snowflake)
6181 && !matches!(source, DialectType::Snowflake)
6182 {
6183 // When target is Snowflake but source is NOT Snowflake,
6184 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6185 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6186 // should keep their FLOAT spelling.
6187 match dt {
6188 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6189 _ => Action::None,
6190 }
6191 } else {
6192 Action::None
6193 }
6194 }
6195 // LOWER patterns from BigQuery TO_HEX conversions:
6196 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6197 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6198 Expression::Lower(uf) => {
6199 if matches!(source, DialectType::BigQuery) {
6200 match &uf.this {
6201 Expression::Lower(_) => Action::BigQueryToHexLower,
6202 Expression::Function(f)
6203 if f.name == "TO_HEX"
6204 && matches!(target, DialectType::BigQuery) =>
6205 {
6206 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6207 Action::BigQueryToHexLower
6208 }
6209 _ => Action::None,
6210 }
6211 } else {
6212 Action::None
6213 }
6214 }
6215 // UPPER patterns from BigQuery TO_HEX conversions:
6216 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6217 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6218 Expression::Upper(uf) => {
6219 if matches!(source, DialectType::BigQuery) {
6220 match &uf.this {
6221 Expression::Lower(_) => Action::BigQueryToHexUpper,
6222 _ => Action::None,
6223 }
6224 } else {
6225 Action::None
6226 }
6227 }
6228 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6229 // Snowflake supports LAST_DAY with unit, so keep it there
6230 Expression::LastDay(ld) => {
6231 if matches!(source, DialectType::BigQuery)
6232 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6233 && ld.unit.is_some()
6234 {
6235 Action::BigQueryLastDayStripUnit
6236 } else {
6237 Action::None
6238 }
6239 }
6240 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6241 Expression::SafeDivide(_) => {
6242 if matches!(source, DialectType::BigQuery)
6243 && !matches!(target, DialectType::BigQuery)
6244 {
6245 Action::BigQuerySafeDivide
6246 } else {
6247 Action::None
6248 }
6249 }
6250 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6251 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6252 Expression::AnyValue(ref agg) => {
6253 if matches!(source, DialectType::BigQuery)
6254 && matches!(target, DialectType::DuckDB)
6255 && agg.having_max.is_some()
6256 {
6257 Action::BigQueryAnyValueHaving
6258 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6259 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6260 && agg.ignore_nulls.is_none()
6261 {
6262 Action::AnyValueIgnoreNulls
6263 } else {
6264 Action::None
6265 }
6266 }
6267 Expression::Any(ref q) => {
6268 if matches!(source, DialectType::PostgreSQL)
6269 && matches!(
6270 target,
6271 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6272 )
6273 && q.op.is_some()
6274 && !matches!(
6275 q.subquery,
6276 Expression::Select(_) | Expression::Subquery(_)
6277 )
6278 {
6279 Action::AnyToExists
6280 } else {
6281 Action::None
6282 }
6283 }
6284 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6285 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6286 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6287 Expression::RegexpLike(_)
6288 if !matches!(source, DialectType::DuckDB)
6289 && matches!(target, DialectType::DuckDB) =>
6290 {
6291 Action::RegexpLikeToDuckDB
6292 }
6293 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6294 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6295 Expression::Div(ref op)
6296 if matches!(
6297 source,
6298 DialectType::MySQL
6299 | DialectType::DuckDB
6300 | DialectType::SingleStore
6301 | DialectType::TiDB
6302 | DialectType::ClickHouse
6303 | DialectType::Doris
6304 ) && matches!(
6305 target,
6306 DialectType::PostgreSQL
6307 | DialectType::Redshift
6308 | DialectType::Drill
6309 | DialectType::Trino
6310 | DialectType::Presto
6311 | DialectType::Athena
6312 | DialectType::TSQL
6313 | DialectType::Teradata
6314 | DialectType::SQLite
6315 | DialectType::BigQuery
6316 | DialectType::Snowflake
6317 | DialectType::Databricks
6318 | DialectType::Oracle
6319 | DialectType::Materialize
6320 | DialectType::RisingWave
6321 ) =>
6322 {
6323 // Only wrap if RHS is not already NULLIF
6324 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6325 {
6326 Action::MySQLSafeDivide
6327 } else {
6328 Action::None
6329 }
6330 }
6331 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6332 // For TSQL/Fabric, convert to sp_rename instead
6333 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6334 if let Some(crate::expressions::AlterTableAction::RenameTable(
6335 ref new_tbl,
6336 )) = at.actions.first()
6337 {
6338 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6339 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6340 Action::AlterTableToSpRename
6341 } else if new_tbl.schema.is_some()
6342 && matches!(
6343 target,
6344 DialectType::BigQuery
6345 | DialectType::Doris
6346 | DialectType::StarRocks
6347 | DialectType::DuckDB
6348 | DialectType::PostgreSQL
6349 | DialectType::Redshift
6350 )
6351 {
6352 Action::AlterTableRenameStripSchema
6353 } else {
6354 Action::None
6355 }
6356 } else {
6357 Action::None
6358 }
6359 }
6360 // EPOCH(x) expression -> target-specific epoch conversion
6361 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6362 Action::EpochConvert
6363 }
6364 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6365 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6366 Action::EpochMsConvert
6367 }
6368 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6369 Expression::StringAgg(_) => {
6370 if matches!(
6371 target,
6372 DialectType::MySQL
6373 | DialectType::SingleStore
6374 | DialectType::Doris
6375 | DialectType::StarRocks
6376 | DialectType::SQLite
6377 ) {
6378 Action::StringAggConvert
6379 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6380 Action::StringAggConvert
6381 } else {
6382 Action::None
6383 }
6384 }
6385 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6386 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6387 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6388 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6389 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6390 Action::ArrayLengthConvert
6391 }
6392 Expression::ArraySize(_) => {
6393 if matches!(target, DialectType::Drill) {
6394 Action::ArraySizeDrill
6395 } else {
6396 Action::ArrayLengthConvert
6397 }
6398 }
6399 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6400 Expression::ArrayRemove(_) => match target {
6401 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6402 Action::ArrayRemoveConvert
6403 }
6404 _ => Action::None,
6405 },
6406 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6407 Expression::ArrayReverse(_) => match target {
6408 DialectType::ClickHouse => Action::ArrayReverseConvert,
6409 _ => Action::None,
6410 },
6411 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6412 Expression::JsonKeys(_) => match target {
6413 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6414 Action::JsonKeysConvert
6415 }
6416 _ => Action::None,
6417 },
6418 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6419 Expression::ParseJson(_) => match target {
6420 DialectType::SQLite
6421 | DialectType::Doris
6422 | DialectType::MySQL
6423 | DialectType::StarRocks => Action::ParseJsonStrip,
6424 _ => Action::None,
6425 },
6426 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6427 Expression::WeekOfYear(_)
6428 if matches!(target, DialectType::Snowflake)
6429 && !matches!(source, DialectType::Snowflake) =>
6430 {
6431 Action::WeekOfYearToWeekIso
6432 }
6433 // NVL: clear original_name so generator uses dialect-specific function names
6434 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6435 // XOR: expand for dialects that don't support the XOR keyword
6436 Expression::Xor(_) => {
6437 let target_supports_xor = matches!(
6438 target,
6439 DialectType::MySQL
6440 | DialectType::SingleStore
6441 | DialectType::Doris
6442 | DialectType::StarRocks
6443 );
6444 if !target_supports_xor {
6445 Action::XorExpand
6446 } else {
6447 Action::None
6448 }
6449 }
6450 // TSQL #table -> temp table normalization (CREATE TABLE)
6451 Expression::CreateTable(ct)
6452 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6453 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6454 && ct.name.name.name.starts_with('#') =>
6455 {
6456 Action::TempTableHash
6457 }
6458 // TSQL #table -> strip # from table references in SELECT/etc.
6459 Expression::Table(tr)
6460 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6461 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6462 && tr.name.name.starts_with('#') =>
6463 {
6464 Action::TempTableHash
6465 }
6466 // TSQL #table -> strip # from DROP TABLE names
6467 Expression::DropTable(ref dt)
6468 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6469 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6470 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6471 {
6472 Action::TempTableHash
6473 }
6474 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6475 Expression::JsonExtract(_)
6476 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6477 {
6478 Action::JsonExtractToTsql
6479 }
6480 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6481 Expression::JsonExtractScalar(_)
6482 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6483 {
6484 Action::JsonExtractToTsql
6485 }
6486 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6487 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6488 Action::JsonExtractToClickHouse
6489 }
6490 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6491 Expression::JsonExtractScalar(_)
6492 if matches!(target, DialectType::ClickHouse) =>
6493 {
6494 Action::JsonExtractToClickHouse
6495 }
6496 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6497 Expression::JsonExtract(ref f)
6498 if !f.arrow_syntax
6499 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6500 {
6501 Action::JsonExtractToArrow
6502 }
6503 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6504 Expression::JsonExtract(ref f)
6505 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6506 && !matches!(
6507 source,
6508 DialectType::PostgreSQL
6509 | DialectType::Redshift
6510 | DialectType::Materialize
6511 )
6512 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6513 {
6514 Action::JsonExtractToGetJsonObject
6515 }
6516 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6517 Expression::JsonExtract(_)
6518 if matches!(
6519 target,
6520 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6521 ) =>
6522 {
6523 Action::JsonExtractToGetJsonObject
6524 }
6525 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6526 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6527 Expression::JsonExtractScalar(ref f)
6528 if !f.arrow_syntax
6529 && !f.hash_arrow_syntax
6530 && matches!(
6531 target,
6532 DialectType::PostgreSQL
6533 | DialectType::Redshift
6534 | DialectType::Snowflake
6535 | DialectType::SQLite
6536 | DialectType::DuckDB
6537 ) =>
6538 {
6539 Action::JsonExtractScalarConvert
6540 }
6541 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6542 Expression::JsonExtractScalar(_)
6543 if matches!(
6544 target,
6545 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6546 ) =>
6547 {
6548 Action::JsonExtractScalarToGetJsonObject
6549 }
6550 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6551 Expression::JsonExtract(ref f)
6552 if !f.arrow_syntax
6553 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6554 {
6555 Action::JsonPathNormalize
6556 }
6557 // JsonQuery (parsed JSON_QUERY) -> target-specific
6558 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6559 // JsonValue (parsed JSON_VALUE) -> target-specific
6560 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6561 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6562 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6563 Expression::AtTimeZone(_)
6564 if matches!(
6565 target,
6566 DialectType::Presto
6567 | DialectType::Trino
6568 | DialectType::Athena
6569 | DialectType::Spark
6570 | DialectType::Databricks
6571 | DialectType::BigQuery
6572 | DialectType::Snowflake
6573 ) =>
6574 {
6575 Action::AtTimeZoneConvert
6576 }
6577 // DAY_OF_WEEK -> dialect-specific
6578 Expression::DayOfWeek(_)
6579 if matches!(
6580 target,
6581 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6582 ) =>
6583 {
6584 Action::DayOfWeekConvert
6585 }
6586 // CURRENT_USER -> CURRENT_USER() for Snowflake
6587 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6588 Action::CurrentUserParens
6589 }
6590 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6591 Expression::ElementAt(_)
6592 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6593 {
6594 Action::ElementAtConvert
6595 }
6596 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6597 Expression::ArrayFunc(ref arr)
6598 if !arr.bracket_notation
6599 && matches!(
6600 target,
6601 DialectType::Spark
6602 | DialectType::Databricks
6603 | DialectType::Hive
6604 | DialectType::BigQuery
6605 | DialectType::DuckDB
6606 | DialectType::Snowflake
6607 | DialectType::Presto
6608 | DialectType::Trino
6609 | DialectType::Athena
6610 | DialectType::ClickHouse
6611 | DialectType::StarRocks
6612 ) =>
6613 {
6614 Action::ArraySyntaxConvert
6615 }
6616 // VARIANCE expression -> varSamp for ClickHouse
6617 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6618 Action::VarianceToClickHouse
6619 }
6620 // STDDEV expression -> stddevSamp for ClickHouse
6621 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6622 Action::StddevToClickHouse
6623 }
6624 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6625 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6626 Action::ApproxQuantileConvert
6627 }
6628 // MonthsBetween -> target-specific
6629 Expression::MonthsBetween(_)
6630 if !matches!(
6631 target,
6632 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6633 ) =>
6634 {
6635 Action::MonthsBetweenConvert
6636 }
6637 // AddMonths -> target-specific DATEADD/DATE_ADD
6638 Expression::AddMonths(_) => Action::AddMonthsConvert,
6639 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6640 Expression::MapFromArrays(_)
6641 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6642 {
6643 Action::MapFromArraysConvert
6644 }
6645 // CURRENT_USER -> CURRENT_USER() for Spark
6646 Expression::CurrentUser(_)
6647 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6648 {
6649 Action::CurrentUserSparkParens
6650 }
6651 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6652 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6653 if matches!(
6654 source,
6655 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6656 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6657 && matches!(
6658 target,
6659 DialectType::DuckDB
6660 | DialectType::Presto
6661 | DialectType::Trino
6662 | DialectType::Athena
6663 | DialectType::PostgreSQL
6664 | DialectType::Redshift
6665 ) =>
6666 {
6667 Action::SparkDateFuncCast
6668 }
6669 // $parameter -> @parameter for BigQuery
6670 Expression::Parameter(ref p)
6671 if matches!(target, DialectType::BigQuery)
6672 && matches!(source, DialectType::DuckDB)
6673 && (p.style == crate::expressions::ParameterStyle::Dollar
6674 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6675 {
6676 Action::DollarParamConvert
6677 }
6678 // EscapeString literal: normalize literal newlines to \n
6679 Expression::Literal(Literal::EscapeString(ref s))
6680 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6681 {
6682 Action::EscapeStringNormalize
6683 }
6684 // straight_join: keep lowercase for DuckDB, quote for MySQL
6685 Expression::Column(ref col)
6686 if col.name.name == "STRAIGHT_JOIN"
6687 && col.table.is_none()
6688 && matches!(source, DialectType::DuckDB)
6689 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6690 {
6691 Action::StraightJoinCase
6692 }
6693 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6694 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6695 Expression::Interval(ref iv)
6696 if matches!(
6697 target,
6698 DialectType::Snowflake
6699 | DialectType::PostgreSQL
6700 | DialectType::Redshift
6701 ) && iv.unit.is_some()
6702 && matches!(
6703 &iv.this,
6704 Some(Expression::Literal(Literal::String(_)))
6705 ) =>
6706 {
6707 Action::SnowflakeIntervalFormat
6708 }
6709 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6710 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6711 if let Some(ref sample) = ts.sample {
6712 if !sample.explicit_method {
6713 Action::TablesampleReservoir
6714 } else {
6715 Action::None
6716 }
6717 } else {
6718 Action::None
6719 }
6720 }
6721 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6722 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6723 Expression::TableSample(ref ts)
6724 if matches!(target, DialectType::Snowflake)
6725 && !matches!(source, DialectType::Snowflake)
6726 && ts.sample.is_some() =>
6727 {
6728 if let Some(ref sample) = ts.sample {
6729 if !sample.explicit_method {
6730 Action::TablesampleSnowflakeStrip
6731 } else {
6732 Action::None
6733 }
6734 } else {
6735 Action::None
6736 }
6737 }
6738 Expression::Table(ref t)
6739 if matches!(target, DialectType::Snowflake)
6740 && !matches!(source, DialectType::Snowflake)
6741 && t.table_sample.is_some() =>
6742 {
6743 if let Some(ref sample) = t.table_sample {
6744 if !sample.explicit_method {
6745 Action::TablesampleSnowflakeStrip
6746 } else {
6747 Action::None
6748 }
6749 } else {
6750 Action::None
6751 }
6752 }
6753 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6754 Expression::AlterTable(ref at)
6755 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6756 && !at.actions.is_empty()
6757 && matches!(
6758 at.actions.first(),
6759 Some(crate::expressions::AlterTableAction::RenameTable(_))
6760 ) =>
6761 {
6762 Action::AlterTableToSpRename
6763 }
6764 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6765 Expression::Subscript(ref sub)
6766 if matches!(
6767 target,
6768 DialectType::BigQuery
6769 | DialectType::Hive
6770 | DialectType::Spark
6771 | DialectType::Databricks
6772 ) && matches!(
6773 source,
6774 DialectType::DuckDB
6775 | DialectType::PostgreSQL
6776 | DialectType::Presto
6777 | DialectType::Trino
6778 | DialectType::Redshift
6779 | DialectType::ClickHouse
6780 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6781 {
6782 Action::ArrayIndexConvert
6783 }
6784 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6785 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6786 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6787 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6788 Expression::WindowFunction(ref wf) => {
6789 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6790 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6791 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6792 if matches!(target, DialectType::BigQuery)
6793 && !is_row_number
6794 && !wf.over.order_by.is_empty()
6795 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6796 {
6797 Action::BigQueryNullsOrdering
6798 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6799 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6800 } else {
6801 let source_nulls_last = matches!(source, DialectType::DuckDB);
6802 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6803 matches!(
6804 f.kind,
6805 crate::expressions::WindowFrameKind::Range
6806 | crate::expressions::WindowFrameKind::Groups
6807 )
6808 });
6809 if source_nulls_last
6810 && matches!(target, DialectType::MySQL)
6811 && !wf.over.order_by.is_empty()
6812 && wf.over.order_by.iter().any(|o| !o.desc)
6813 && !has_range_frame
6814 {
6815 Action::MysqlNullsLastRewrite
6816 } else {
6817 match &wf.this {
6818 Expression::FirstValue(ref vf)
6819 | Expression::LastValue(ref vf)
6820 if vf.ignore_nulls == Some(false) =>
6821 {
6822 // RESPECT NULLS
6823 match target {
6824 DialectType::SQLite => Action::RespectNullsConvert,
6825 _ => Action::None,
6826 }
6827 }
6828 _ => Action::None,
6829 }
6830 }
6831 }
6832 }
6833 // CREATE TABLE a LIKE b -> dialect-specific transformations
6834 Expression::CreateTable(ref ct)
6835 if ct.columns.is_empty()
6836 && ct.constraints.iter().any(|c| {
6837 matches!(c, crate::expressions::TableConstraint::Like { .. })
6838 })
6839 && matches!(
6840 target,
6841 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6842 ) =>
6843 {
6844 Action::CreateTableLikeToCtas
6845 }
6846 Expression::CreateTable(ref ct)
6847 if ct.columns.is_empty()
6848 && ct.constraints.iter().any(|c| {
6849 matches!(c, crate::expressions::TableConstraint::Like { .. })
6850 })
6851 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6852 {
6853 Action::CreateTableLikeToSelectInto
6854 }
6855 Expression::CreateTable(ref ct)
6856 if ct.columns.is_empty()
6857 && ct.constraints.iter().any(|c| {
6858 matches!(c, crate::expressions::TableConstraint::Like { .. })
6859 })
6860 && matches!(target, DialectType::ClickHouse) =>
6861 {
6862 Action::CreateTableLikeToAs
6863 }
6864 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6865 Expression::CreateTable(ref ct)
6866 if matches!(target, DialectType::DuckDB)
6867 && matches!(
6868 source,
6869 DialectType::DuckDB
6870 | DialectType::Spark
6871 | DialectType::Databricks
6872 | DialectType::Hive
6873 ) =>
6874 {
6875 let has_comment = ct.columns.iter().any(|c| {
6876 c.comment.is_some()
6877 || c.constraints.iter().any(|con| {
6878 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6879 })
6880 });
6881 let has_props = !ct.properties.is_empty();
6882 if has_comment || has_props {
6883 Action::CreateTableStripComment
6884 } else {
6885 Action::None
6886 }
6887 }
6888 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6889 Expression::Array(_)
6890 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6891 {
6892 Action::ArrayConcatBracketConvert
6893 }
6894 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6895 Expression::ArrayFunc(ref arr)
6896 if arr.bracket_notation
6897 && matches!(source, DialectType::BigQuery)
6898 && matches!(target, DialectType::Redshift) =>
6899 {
6900 Action::ArrayConcatBracketConvert
6901 }
6902 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6903 Expression::BitwiseOrAgg(ref f)
6904 | Expression::BitwiseAndAgg(ref f)
6905 | Expression::BitwiseXorAgg(ref f) => {
6906 if matches!(target, DialectType::DuckDB) {
6907 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
6908 if let Expression::Cast(ref c) = f.this {
6909 match &c.to {
6910 DataType::Float { .. }
6911 | DataType::Double { .. }
6912 | DataType::Decimal { .. } => Action::BitAggFloatCast,
6913 DataType::Custom { ref name }
6914 if name.eq_ignore_ascii_case("REAL") =>
6915 {
6916 Action::BitAggFloatCast
6917 }
6918 _ => Action::None,
6919 }
6920 } else {
6921 Action::None
6922 }
6923 } else if matches!(target, DialectType::Snowflake) {
6924 Action::BitAggSnowflakeRename
6925 } else {
6926 Action::None
6927 }
6928 }
6929 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
6930 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
6931 Action::FilterToIff
6932 }
6933 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6934 Expression::Avg(ref f)
6935 | Expression::Sum(ref f)
6936 | Expression::Min(ref f)
6937 | Expression::Max(ref f)
6938 | Expression::CountIf(ref f)
6939 | Expression::Stddev(ref f)
6940 | Expression::StddevPop(ref f)
6941 | Expression::StddevSamp(ref f)
6942 | Expression::Variance(ref f)
6943 | Expression::VarPop(ref f)
6944 | Expression::VarSamp(ref f)
6945 | Expression::Median(ref f)
6946 | Expression::Mode(ref f)
6947 | Expression::First(ref f)
6948 | Expression::Last(ref f)
6949 | Expression::ApproxDistinct(ref f)
6950 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6951 {
6952 Action::AggFilterToIff
6953 }
6954 Expression::Count(ref c)
6955 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6956 {
6957 Action::AggFilterToIff
6958 }
6959 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
6960 Expression::Count(ref c)
6961 if c.distinct
6962 && matches!(&c.this, Some(Expression::Tuple(_)))
6963 && matches!(
6964 target,
6965 DialectType::Presto
6966 | DialectType::Trino
6967 | DialectType::DuckDB
6968 | DialectType::PostgreSQL
6969 ) =>
6970 {
6971 Action::CountDistinctMultiArg
6972 }
6973 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
6974 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
6975 Action::JsonToGetPath
6976 }
6977 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
6978 Expression::Struct(_)
6979 if matches!(
6980 target,
6981 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6982 ) && matches!(source, DialectType::DuckDB) =>
6983 {
6984 Action::StructToRow
6985 }
6986 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
6987 Expression::MapFunc(ref m)
6988 if m.curly_brace_syntax
6989 && matches!(
6990 target,
6991 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6992 )
6993 && matches!(source, DialectType::DuckDB) =>
6994 {
6995 Action::StructToRow
6996 }
6997 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
6998 Expression::ApproxCountDistinct(_)
6999 if matches!(
7000 target,
7001 DialectType::Presto | DialectType::Trino | DialectType::Athena
7002 ) =>
7003 {
7004 Action::ApproxCountDistinctToApproxDistinct
7005 }
7006 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7007 Expression::ArrayContains(_)
7008 if matches!(
7009 target,
7010 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7011 ) =>
7012 {
7013 Action::ArrayContainsConvert
7014 }
7015 // StrPosition with position -> complex expansion for Presto/DuckDB
7016 // STRPOS doesn't support a position arg in these dialects
7017 Expression::StrPosition(ref sp)
7018 if sp.position.is_some()
7019 && matches!(
7020 target,
7021 DialectType::Presto
7022 | DialectType::Trino
7023 | DialectType::Athena
7024 | DialectType::DuckDB
7025 ) =>
7026 {
7027 Action::StrPositionExpand
7028 }
7029 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7030 Expression::First(ref f)
7031 if f.ignore_nulls == Some(true)
7032 && matches!(target, DialectType::DuckDB) =>
7033 {
7034 Action::FirstToAnyValue
7035 }
7036 // BEGIN -> START TRANSACTION for Presto/Trino
7037 Expression::Command(ref cmd)
7038 if cmd.this.eq_ignore_ascii_case("BEGIN")
7039 && matches!(
7040 target,
7041 DialectType::Presto | DialectType::Trino | DialectType::Athena
7042 ) =>
7043 {
7044 // Handled inline below
7045 Action::None // We'll handle it directly
7046 }
7047 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7048 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7049 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7050 Expression::Concat(ref _op)
7051 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7052 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7053 {
7054 Action::PipeConcatToConcat
7055 }
7056 _ => Action::None,
7057 }
7058 };
7059
7060 match action {
7061 Action::None => {
7062 // Handle inline transforms that don't need a dedicated action
7063
7064 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7065 if let Expression::Between(ref b) = e {
7066 if let Some(sym) = b.symmetric {
7067 let keeps_symmetric =
7068 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7069 if !keeps_symmetric {
7070 if sym {
7071 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7072 let b = if let Expression::Between(b) = e {
7073 *b
7074 } else {
7075 unreachable!()
7076 };
7077 let between1 = Expression::Between(Box::new(
7078 crate::expressions::Between {
7079 this: b.this.clone(),
7080 low: b.low.clone(),
7081 high: b.high.clone(),
7082 not: b.not,
7083 symmetric: None,
7084 },
7085 ));
7086 let between2 = Expression::Between(Box::new(
7087 crate::expressions::Between {
7088 this: b.this,
7089 low: b.high,
7090 high: b.low,
7091 not: b.not,
7092 symmetric: None,
7093 },
7094 ));
7095 return Ok(Expression::Paren(Box::new(
7096 crate::expressions::Paren {
7097 this: Expression::Or(Box::new(
7098 crate::expressions::BinaryOp::new(
7099 between1, between2,
7100 ),
7101 )),
7102 trailing_comments: vec![],
7103 },
7104 )));
7105 } else {
7106 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7107 let b = if let Expression::Between(b) = e {
7108 *b
7109 } else {
7110 unreachable!()
7111 };
7112 return Ok(Expression::Between(Box::new(
7113 crate::expressions::Between {
7114 this: b.this,
7115 low: b.low,
7116 high: b.high,
7117 not: b.not,
7118 symmetric: None,
7119 },
7120 )));
7121 }
7122 }
7123 }
7124 }
7125
7126 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7127 if let Expression::ILike(ref _like) = e {
7128 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7129 let like = if let Expression::ILike(l) = e {
7130 *l
7131 } else {
7132 unreachable!()
7133 };
7134 let lower_left = Expression::Function(Box::new(Function::new(
7135 "LOWER".to_string(),
7136 vec![like.left],
7137 )));
7138 let lower_right = Expression::Function(Box::new(Function::new(
7139 "LOWER".to_string(),
7140 vec![like.right],
7141 )));
7142 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7143 left: lower_left,
7144 right: lower_right,
7145 escape: like.escape,
7146 quantifier: like.quantifier,
7147 })));
7148 }
7149 }
7150
7151 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7152 if let Expression::MethodCall(ref mc) = e {
7153 if matches!(source, DialectType::Oracle)
7154 && mc.method.name.eq_ignore_ascii_case("VALUE")
7155 && mc.args.is_empty()
7156 {
7157 let is_dbms_random = match &mc.this {
7158 Expression::Identifier(id) => {
7159 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7160 }
7161 Expression::Column(col) => {
7162 col.table.is_none()
7163 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7164 }
7165 _ => false,
7166 };
7167 if is_dbms_random {
7168 let func_name = match target {
7169 DialectType::PostgreSQL
7170 | DialectType::Redshift
7171 | DialectType::DuckDB
7172 | DialectType::SQLite => "RANDOM",
7173 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7174 _ => "RAND",
7175 };
7176 return Ok(Expression::Function(Box::new(Function::new(
7177 func_name.to_string(),
7178 vec![],
7179 ))));
7180 }
7181 }
7182 }
7183 // TRIM without explicit position -> add BOTH for ClickHouse
7184 if let Expression::Trim(ref trim) = e {
7185 if matches!(target, DialectType::ClickHouse)
7186 && trim.sql_standard_syntax
7187 && trim.characters.is_some()
7188 && !trim.position_explicit
7189 {
7190 let mut new_trim = (**trim).clone();
7191 new_trim.position_explicit = true;
7192 return Ok(Expression::Trim(Box::new(new_trim)));
7193 }
7194 }
7195 // BEGIN -> START TRANSACTION for Presto/Trino
7196 if let Expression::Transaction(ref txn) = e {
7197 if matches!(
7198 target,
7199 DialectType::Presto | DialectType::Trino | DialectType::Athena
7200 ) {
7201 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7202 let mut txn = txn.clone();
7203 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7204 "START".to_string(),
7205 ))));
7206 return Ok(Expression::Transaction(Box::new(*txn)));
7207 }
7208 }
7209 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7210 if matches!(
7211 target,
7212 DialectType::Presto | DialectType::Trino | DialectType::Athena
7213 ) {
7214 match &e {
7215 Expression::IsTrue(itf) if !itf.not => {
7216 // x IS TRUE -> x
7217 return Ok(itf.this.clone());
7218 }
7219 Expression::IsTrue(itf) if itf.not => {
7220 // x IS NOT TRUE -> NOT x
7221 return Ok(Expression::Not(Box::new(
7222 crate::expressions::UnaryOp {
7223 this: itf.this.clone(),
7224 },
7225 )));
7226 }
7227 Expression::IsFalse(itf) if !itf.not => {
7228 // x IS FALSE -> NOT x
7229 return Ok(Expression::Not(Box::new(
7230 crate::expressions::UnaryOp {
7231 this: itf.this.clone(),
7232 },
7233 )));
7234 }
7235 Expression::IsFalse(itf) if itf.not => {
7236 // x IS NOT FALSE -> NOT NOT x
7237 let not_x =
7238 Expression::Not(Box::new(crate::expressions::UnaryOp {
7239 this: itf.this.clone(),
7240 }));
7241 return Ok(Expression::Not(Box::new(
7242 crate::expressions::UnaryOp { this: not_x },
7243 )));
7244 }
7245 _ => {}
7246 }
7247 }
7248 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7249 if matches!(target, DialectType::Redshift) {
7250 if let Expression::IsFalse(ref itf) = e {
7251 if itf.not {
7252 return Ok(Expression::Not(Box::new(
7253 crate::expressions::UnaryOp {
7254 this: Expression::IsFalse(Box::new(
7255 crate::expressions::IsTrueFalse {
7256 this: itf.this.clone(),
7257 not: false,
7258 },
7259 )),
7260 },
7261 )));
7262 }
7263 }
7264 }
7265 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7266 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7267 if let Expression::Function(ref f) = e {
7268 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7269 && matches!(source, DialectType::Snowflake)
7270 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7271 {
7272 if f.args.len() == 3 {
7273 let mut args = f.args.clone();
7274 args.push(Expression::string("g"));
7275 return Ok(Expression::Function(Box::new(Function::new(
7276 "REGEXP_REPLACE".to_string(),
7277 args,
7278 ))));
7279 } else if f.args.len() == 4 {
7280 // 4th arg might be position, add 'g' as 5th
7281 let mut args = f.args.clone();
7282 args.push(Expression::string("g"));
7283 return Ok(Expression::Function(Box::new(Function::new(
7284 "REGEXP_REPLACE".to_string(),
7285 args,
7286 ))));
7287 }
7288 }
7289 }
7290 Ok(e)
7291 }
7292
7293 Action::GreatestLeastNull => {
7294 let f = if let Expression::Function(f) = e {
7295 *f
7296 } else {
7297 unreachable!("action only triggered for Function expressions")
7298 };
7299 let mut null_checks: Vec<Expression> = f
7300 .args
7301 .iter()
7302 .map(|a| {
7303 Expression::IsNull(Box::new(IsNull {
7304 this: a.clone(),
7305 not: false,
7306 postfix_form: false,
7307 }))
7308 })
7309 .collect();
7310 let condition = if null_checks.len() == 1 {
7311 null_checks.remove(0)
7312 } else {
7313 let first = null_checks.remove(0);
7314 null_checks.into_iter().fold(first, |acc, check| {
7315 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7316 })
7317 };
7318 Ok(Expression::Case(Box::new(Case {
7319 operand: None,
7320 whens: vec![(condition, Expression::Null(Null))],
7321 else_: Some(Expression::Function(Box::new(Function::new(
7322 f.name, f.args,
7323 )))),
7324 comments: Vec::new(),
7325 })))
7326 }
7327
7328 Action::ArrayGenerateRange => {
7329 let f = if let Expression::Function(f) = e {
7330 *f
7331 } else {
7332 unreachable!("action only triggered for Function expressions")
7333 };
7334 let start = f.args[0].clone();
7335 let end = f.args[1].clone();
7336 let step = f.args.get(2).cloned();
7337
7338 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7339 end.clone(),
7340 Expression::number(1),
7341 )));
7342
7343 match target {
7344 DialectType::PostgreSQL | DialectType::Redshift => {
7345 let mut args = vec![start, end_minus_1];
7346 if let Some(s) = step {
7347 args.push(s);
7348 }
7349 Ok(Expression::Function(Box::new(Function::new(
7350 "GENERATE_SERIES".to_string(),
7351 args,
7352 ))))
7353 }
7354 DialectType::Presto | DialectType::Trino => {
7355 let mut args = vec![start, end_minus_1];
7356 if let Some(s) = step {
7357 args.push(s);
7358 }
7359 Ok(Expression::Function(Box::new(Function::new(
7360 "SEQUENCE".to_string(),
7361 args,
7362 ))))
7363 }
7364 DialectType::BigQuery => {
7365 let mut args = vec![start, end_minus_1];
7366 if let Some(s) = step {
7367 args.push(s);
7368 }
7369 Ok(Expression::Function(Box::new(Function::new(
7370 "GENERATE_ARRAY".to_string(),
7371 args,
7372 ))))
7373 }
7374 DialectType::Snowflake => {
7375 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7376 Expression::Paren(Box::new(Paren {
7377 this: end_minus_1,
7378 trailing_comments: vec![],
7379 })),
7380 Expression::number(1),
7381 )));
7382 let mut args = vec![start, normalized_end];
7383 if let Some(s) = step {
7384 args.push(s);
7385 }
7386 Ok(Expression::Function(Box::new(Function::new(
7387 "ARRAY_GENERATE_RANGE".to_string(),
7388 args,
7389 ))))
7390 }
7391 _ => Ok(Expression::Function(Box::new(Function::new(
7392 f.name, f.args,
7393 )))),
7394 }
7395 }
7396
7397 Action::Div0TypedDivision => {
7398 let if_func = if let Expression::IfFunc(f) = e {
7399 *f
7400 } else {
7401 unreachable!("action only triggered for IfFunc expressions")
7402 };
7403 if let Some(Expression::Div(div)) = if_func.false_value {
7404 let cast_type = if matches!(target, DialectType::SQLite) {
7405 DataType::Float {
7406 precision: None,
7407 scale: None,
7408 real_spelling: true,
7409 }
7410 } else {
7411 DataType::Double {
7412 precision: None,
7413 scale: None,
7414 }
7415 };
7416 let casted_left = Expression::Cast(Box::new(Cast {
7417 this: div.left,
7418 to: cast_type,
7419 trailing_comments: vec![],
7420 double_colon_syntax: false,
7421 format: None,
7422 default: None,
7423 }));
7424 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7425 condition: if_func.condition,
7426 true_value: if_func.true_value,
7427 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7428 casted_left,
7429 div.right,
7430 )))),
7431 original_name: if_func.original_name,
7432 })))
7433 } else {
7434 // Not actually a Div, reconstruct
7435 Ok(Expression::IfFunc(Box::new(if_func)))
7436 }
7437 }
7438
7439 Action::ArrayAggCollectList => {
7440 let agg = if let Expression::ArrayAgg(a) = e {
7441 *a
7442 } else {
7443 unreachable!("action only triggered for ArrayAgg expressions")
7444 };
7445 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7446 name: Some("COLLECT_LIST".to_string()),
7447 ..agg
7448 })))
7449 }
7450
7451 Action::ArrayAggWithinGroupFilter => {
7452 let wg = if let Expression::WithinGroup(w) = e {
7453 *w
7454 } else {
7455 unreachable!("action only triggered for WithinGroup expressions")
7456 };
7457 if let Expression::ArrayAgg(inner_agg) = wg.this {
7458 let col = inner_agg.this.clone();
7459 let filter = Expression::IsNull(Box::new(IsNull {
7460 this: col,
7461 not: true,
7462 postfix_form: false,
7463 }));
7464 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7465 let order_by = if matches!(target, DialectType::DuckDB) {
7466 wg.order_by
7467 .into_iter()
7468 .map(|mut o| {
7469 if o.desc && o.nulls_first.is_none() {
7470 o.nulls_first = Some(true);
7471 }
7472 o
7473 })
7474 .collect()
7475 } else {
7476 wg.order_by
7477 };
7478 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7479 this: inner_agg.this,
7480 distinct: inner_agg.distinct,
7481 filter: Some(filter),
7482 order_by,
7483 name: inner_agg.name,
7484 ignore_nulls: inner_agg.ignore_nulls,
7485 having_max: inner_agg.having_max,
7486 limit: inner_agg.limit,
7487 })))
7488 } else {
7489 Ok(Expression::WithinGroup(Box::new(wg)))
7490 }
7491 }
7492
7493 Action::ArrayAggFilter => {
7494 let agg = if let Expression::ArrayAgg(a) = e {
7495 *a
7496 } else {
7497 unreachable!("action only triggered for ArrayAgg expressions")
7498 };
7499 let col = agg.this.clone();
7500 let filter = Expression::IsNull(Box::new(IsNull {
7501 this: col,
7502 not: true,
7503 postfix_form: false,
7504 }));
7505 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7506 filter: Some(filter),
7507 ..agg
7508 })))
7509 }
7510
7511 Action::ArrayAggNullFilter => {
7512 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7513 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7514 let agg = if let Expression::ArrayAgg(a) = e {
7515 *a
7516 } else {
7517 unreachable!("action only triggered for ArrayAgg expressions")
7518 };
7519 let col = agg.this.clone();
7520 let not_null = Expression::IsNull(Box::new(IsNull {
7521 this: col,
7522 not: true,
7523 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7524 }));
7525 let new_filter = if let Some(existing_filter) = agg.filter {
7526 // AND the NOT IS NULL with existing filter
7527 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7528 existing_filter,
7529 not_null,
7530 )))
7531 } else {
7532 not_null
7533 };
7534 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7535 filter: Some(new_filter),
7536 ..agg
7537 })))
7538 }
7539
7540 Action::BigQueryArraySelectAsStructToSnowflake => {
7541 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7542 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7543 if let Expression::Function(mut f) = e {
7544 let is_match = f.args.len() == 1
7545 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7546 if is_match {
7547 let inner_select = match f.args.remove(0) {
7548 Expression::Select(s) => *s,
7549 _ => unreachable!(
7550 "argument already verified to be a Select expression"
7551 ),
7552 };
7553 // Build OBJECT_CONSTRUCT args from SELECT expressions
7554 let mut oc_args = Vec::new();
7555 for expr in &inner_select.expressions {
7556 match expr {
7557 Expression::Alias(a) => {
7558 let key = Expression::Literal(Literal::String(
7559 a.alias.name.clone(),
7560 ));
7561 let value = a.this.clone();
7562 oc_args.push(key);
7563 oc_args.push(value);
7564 }
7565 Expression::Column(c) => {
7566 let key = Expression::Literal(Literal::String(
7567 c.name.name.clone(),
7568 ));
7569 oc_args.push(key);
7570 oc_args.push(expr.clone());
7571 }
7572 _ => {
7573 oc_args.push(expr.clone());
7574 }
7575 }
7576 }
7577 let object_construct = Expression::Function(Box::new(Function::new(
7578 "OBJECT_CONSTRUCT".to_string(),
7579 oc_args,
7580 )));
7581 let array_agg = Expression::Function(Box::new(Function::new(
7582 "ARRAY_AGG".to_string(),
7583 vec![object_construct],
7584 )));
7585 let mut new_select = crate::expressions::Select::new();
7586 new_select.expressions = vec![array_agg];
7587 new_select.from = inner_select.from.clone();
7588 new_select.where_clause = inner_select.where_clause.clone();
7589 new_select.group_by = inner_select.group_by.clone();
7590 new_select.having = inner_select.having.clone();
7591 new_select.joins = inner_select.joins.clone();
7592 Ok(Expression::Subquery(Box::new(
7593 crate::expressions::Subquery {
7594 this: Expression::Select(Box::new(new_select)),
7595 alias: None,
7596 column_aliases: Vec::new(),
7597 order_by: None,
7598 limit: None,
7599 offset: None,
7600 distribute_by: None,
7601 sort_by: None,
7602 cluster_by: None,
7603 lateral: false,
7604 modifiers_inside: false,
7605 trailing_comments: Vec::new(),
7606 },
7607 )))
7608 } else {
7609 Ok(Expression::Function(f))
7610 }
7611 } else {
7612 Ok(e)
7613 }
7614 }
7615
7616 Action::BigQueryPercentileContToDuckDB => {
7617 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7618 if let Expression::AggregateFunction(mut af) = e {
7619 af.name = "QUANTILE_CONT".to_string();
7620 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7621 // Keep only first 2 args
7622 if af.args.len() > 2 {
7623 af.args.truncate(2);
7624 }
7625 Ok(Expression::AggregateFunction(af))
7626 } else {
7627 Ok(e)
7628 }
7629 }
7630
7631 Action::ArrayAggIgnoreNullsDuckDB => {
7632 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7633 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7634 let mut agg = if let Expression::ArrayAgg(a) = e {
7635 *a
7636 } else {
7637 unreachable!("action only triggered for ArrayAgg expressions")
7638 };
7639 agg.ignore_nulls = None; // Strip IGNORE NULLS
7640 if !agg.order_by.is_empty() {
7641 agg.order_by[0].nulls_first = Some(true);
7642 }
7643 Ok(Expression::ArrayAgg(Box::new(agg)))
7644 }
7645
7646 Action::CountDistinctMultiArg => {
7647 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7648 if let Expression::Count(c) = e {
7649 if let Some(Expression::Tuple(t)) = c.this {
7650 let args = t.expressions;
7651 // Build CASE expression:
7652 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7653 let mut whens = Vec::new();
7654 for arg in &args {
7655 whens.push((
7656 Expression::IsNull(Box::new(IsNull {
7657 this: arg.clone(),
7658 not: false,
7659 postfix_form: false,
7660 })),
7661 Expression::Null(crate::expressions::Null),
7662 ));
7663 }
7664 // Build the tuple for ELSE
7665 let tuple_expr =
7666 Expression::Tuple(Box::new(crate::expressions::Tuple {
7667 expressions: args,
7668 }));
7669 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7670 operand: None,
7671 whens,
7672 else_: Some(tuple_expr),
7673 comments: Vec::new(),
7674 }));
7675 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7676 this: Some(case_expr),
7677 star: false,
7678 distinct: true,
7679 filter: c.filter,
7680 ignore_nulls: c.ignore_nulls,
7681 original_name: c.original_name,
7682 })))
7683 } else {
7684 Ok(Expression::Count(c))
7685 }
7686 } else {
7687 Ok(e)
7688 }
7689 }
7690
7691 Action::CastTimestampToDatetime => {
7692 let c = if let Expression::Cast(c) = e {
7693 *c
7694 } else {
7695 unreachable!("action only triggered for Cast expressions")
7696 };
7697 Ok(Expression::Cast(Box::new(Cast {
7698 to: DataType::Custom {
7699 name: "DATETIME".to_string(),
7700 },
7701 ..c
7702 })))
7703 }
7704
7705 Action::CastTimestampStripTz => {
7706 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7707 let c = if let Expression::Cast(c) = e {
7708 *c
7709 } else {
7710 unreachable!("action only triggered for Cast expressions")
7711 };
7712 Ok(Expression::Cast(Box::new(Cast {
7713 to: DataType::Timestamp {
7714 precision: None,
7715 timezone: false,
7716 },
7717 ..c
7718 })))
7719 }
7720
7721 Action::CastTimestamptzToFunc => {
7722 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7723 let c = if let Expression::Cast(c) = e {
7724 *c
7725 } else {
7726 unreachable!("action only triggered for Cast expressions")
7727 };
7728 Ok(Expression::Function(Box::new(Function::new(
7729 "TIMESTAMP".to_string(),
7730 vec![c.this],
7731 ))))
7732 }
7733
7734 Action::ToDateToCast => {
7735 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7736 if let Expression::Function(f) = e {
7737 let arg = f.args.into_iter().next().unwrap();
7738 Ok(Expression::Cast(Box::new(Cast {
7739 this: arg,
7740 to: DataType::Date,
7741 double_colon_syntax: false,
7742 trailing_comments: vec![],
7743 format: None,
7744 default: None,
7745 })))
7746 } else {
7747 Ok(e)
7748 }
7749 }
7750 Action::DateTruncWrapCast => {
7751 // Handle both Expression::DateTrunc/TimestampTrunc and
7752 // Expression::Function("DATE_TRUNC", [unit, expr])
7753 match e {
7754 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7755 let input_type = match &d.this {
7756 Expression::Cast(c) => Some(c.to.clone()),
7757 _ => None,
7758 };
7759 if let Some(cast_type) = input_type {
7760 let is_time = matches!(cast_type, DataType::Time { .. });
7761 if is_time {
7762 let date_expr = Expression::Cast(Box::new(Cast {
7763 this: Expression::Literal(
7764 crate::expressions::Literal::String(
7765 "1970-01-01".to_string(),
7766 ),
7767 ),
7768 to: DataType::Date,
7769 double_colon_syntax: false,
7770 trailing_comments: vec![],
7771 format: None,
7772 default: None,
7773 }));
7774 let add_expr =
7775 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7776 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7777 this: add_expr,
7778 unit: d.unit,
7779 }));
7780 Ok(Expression::Cast(Box::new(Cast {
7781 this: inner,
7782 to: cast_type,
7783 double_colon_syntax: false,
7784 trailing_comments: vec![],
7785 format: None,
7786 default: None,
7787 })))
7788 } else {
7789 let inner = Expression::DateTrunc(Box::new(*d));
7790 Ok(Expression::Cast(Box::new(Cast {
7791 this: inner,
7792 to: cast_type,
7793 double_colon_syntax: false,
7794 trailing_comments: vec![],
7795 format: None,
7796 default: None,
7797 })))
7798 }
7799 } else {
7800 Ok(Expression::DateTrunc(d))
7801 }
7802 }
7803 Expression::Function(f) if f.args.len() == 2 => {
7804 // Function-based DATE_TRUNC(unit, expr)
7805 let input_type = match &f.args[1] {
7806 Expression::Cast(c) => Some(c.to.clone()),
7807 _ => None,
7808 };
7809 if let Some(cast_type) = input_type {
7810 let is_time = matches!(cast_type, DataType::Time { .. });
7811 if is_time {
7812 let date_expr = Expression::Cast(Box::new(Cast {
7813 this: Expression::Literal(
7814 crate::expressions::Literal::String(
7815 "1970-01-01".to_string(),
7816 ),
7817 ),
7818 to: DataType::Date,
7819 double_colon_syntax: false,
7820 trailing_comments: vec![],
7821 format: None,
7822 default: None,
7823 }));
7824 let mut args = f.args;
7825 let unit_arg = args.remove(0);
7826 let time_expr = args.remove(0);
7827 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7828 date_expr, time_expr,
7829 )));
7830 let inner = Expression::Function(Box::new(Function::new(
7831 "DATE_TRUNC".to_string(),
7832 vec![unit_arg, add_expr],
7833 )));
7834 Ok(Expression::Cast(Box::new(Cast {
7835 this: inner,
7836 to: cast_type,
7837 double_colon_syntax: false,
7838 trailing_comments: vec![],
7839 format: None,
7840 default: None,
7841 })))
7842 } else {
7843 // Wrap the function in CAST
7844 Ok(Expression::Cast(Box::new(Cast {
7845 this: Expression::Function(f),
7846 to: cast_type,
7847 double_colon_syntax: false,
7848 trailing_comments: vec![],
7849 format: None,
7850 default: None,
7851 })))
7852 }
7853 } else {
7854 Ok(Expression::Function(f))
7855 }
7856 }
7857 other => Ok(other),
7858 }
7859 }
7860
7861 Action::RegexpReplaceSnowflakeToDuckDB => {
7862 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7863 if let Expression::Function(f) = e {
7864 let mut args = f.args;
7865 let subject = args.remove(0);
7866 let pattern = args.remove(0);
7867 let replacement = args.remove(0);
7868 Ok(Expression::Function(Box::new(Function::new(
7869 "REGEXP_REPLACE".to_string(),
7870 vec![
7871 subject,
7872 pattern,
7873 replacement,
7874 Expression::Literal(crate::expressions::Literal::String(
7875 "g".to_string(),
7876 )),
7877 ],
7878 ))))
7879 } else {
7880 Ok(e)
7881 }
7882 }
7883
7884 Action::SetToVariable => {
7885 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7886 if let Expression::SetStatement(mut s) = e {
7887 for item in &mut s.items {
7888 if item.kind.is_none() {
7889 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
7890 let already_variable = match &item.name {
7891 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
7892 _ => false,
7893 };
7894 if already_variable {
7895 // Extract the actual name and set kind
7896 if let Expression::Identifier(ref mut id) = item.name {
7897 let actual_name = id.name["VARIABLE ".len()..].to_string();
7898 id.name = actual_name;
7899 }
7900 }
7901 item.kind = Some("VARIABLE".to_string());
7902 }
7903 }
7904 Ok(Expression::SetStatement(s))
7905 } else {
7906 Ok(e)
7907 }
7908 }
7909
7910 Action::ConvertTimezoneToExpr => {
7911 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
7912 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
7913 if let Expression::Function(f) = e {
7914 if f.args.len() == 2 {
7915 let mut args = f.args;
7916 let target_tz = args.remove(0);
7917 let timestamp = args.remove(0);
7918 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7919 source_tz: None,
7920 target_tz: Some(Box::new(target_tz)),
7921 timestamp: Some(Box::new(timestamp)),
7922 options: vec![],
7923 })))
7924 } else if f.args.len() == 3 {
7925 let mut args = f.args;
7926 let source_tz = args.remove(0);
7927 let target_tz = args.remove(0);
7928 let timestamp = args.remove(0);
7929 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7930 source_tz: Some(Box::new(source_tz)),
7931 target_tz: Some(Box::new(target_tz)),
7932 timestamp: Some(Box::new(timestamp)),
7933 options: vec![],
7934 })))
7935 } else {
7936 Ok(Expression::Function(f))
7937 }
7938 } else {
7939 Ok(e)
7940 }
7941 }
7942
7943 Action::BigQueryCastType => {
7944 // Convert BigQuery types to standard SQL types
7945 if let Expression::DataType(dt) = e {
7946 match dt {
7947 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
7948 Ok(Expression::DataType(DataType::BigInt { length: None }))
7949 }
7950 DataType::Custom { ref name }
7951 if name.eq_ignore_ascii_case("FLOAT64") =>
7952 {
7953 Ok(Expression::DataType(DataType::Double {
7954 precision: None,
7955 scale: None,
7956 }))
7957 }
7958 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
7959 Ok(Expression::DataType(DataType::Boolean))
7960 }
7961 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
7962 Ok(Expression::DataType(DataType::VarBinary { length: None }))
7963 }
7964 DataType::Custom { ref name }
7965 if name.eq_ignore_ascii_case("NUMERIC") =>
7966 {
7967 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
7968 // default precision (18, 3) being added to bare DECIMAL
7969 if matches!(target, DialectType::DuckDB) {
7970 Ok(Expression::DataType(DataType::Custom {
7971 name: "DECIMAL".to_string(),
7972 }))
7973 } else {
7974 Ok(Expression::DataType(DataType::Decimal {
7975 precision: None,
7976 scale: None,
7977 }))
7978 }
7979 }
7980 DataType::Custom { ref name }
7981 if name.eq_ignore_ascii_case("STRING") =>
7982 {
7983 Ok(Expression::DataType(DataType::String { length: None }))
7984 }
7985 DataType::Custom { ref name }
7986 if name.eq_ignore_ascii_case("DATETIME") =>
7987 {
7988 Ok(Expression::DataType(DataType::Timestamp {
7989 precision: None,
7990 timezone: false,
7991 }))
7992 }
7993 _ => Ok(Expression::DataType(dt)),
7994 }
7995 } else {
7996 Ok(e)
7997 }
7998 }
7999
8000 Action::BigQuerySafeDivide => {
8001 // Convert SafeDivide expression to IF/CASE form for most targets
8002 if let Expression::SafeDivide(sd) = e {
8003 let x = *sd.this;
8004 let y = *sd.expression;
8005 // Wrap x and y in parens if they're complex expressions
8006 let y_ref = match &y {
8007 Expression::Column(_)
8008 | Expression::Literal(_)
8009 | Expression::Identifier(_) => y.clone(),
8010 _ => Expression::Paren(Box::new(Paren {
8011 this: y.clone(),
8012 trailing_comments: vec![],
8013 })),
8014 };
8015 let x_ref = match &x {
8016 Expression::Column(_)
8017 | Expression::Literal(_)
8018 | Expression::Identifier(_) => x.clone(),
8019 _ => Expression::Paren(Box::new(Paren {
8020 this: x.clone(),
8021 trailing_comments: vec![],
8022 })),
8023 };
8024 let condition = Expression::Neq(Box::new(BinaryOp::new(
8025 y_ref.clone(),
8026 Expression::number(0),
8027 )));
8028 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
8029
8030 if matches!(target, DialectType::Presto | DialectType::Trino) {
8031 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
8032 let cast_x = Expression::Cast(Box::new(Cast {
8033 this: match &x {
8034 Expression::Column(_)
8035 | Expression::Literal(_)
8036 | Expression::Identifier(_) => x,
8037 _ => Expression::Paren(Box::new(Paren {
8038 this: x,
8039 trailing_comments: vec![],
8040 })),
8041 },
8042 to: DataType::Double {
8043 precision: None,
8044 scale: None,
8045 },
8046 trailing_comments: vec![],
8047 double_colon_syntax: false,
8048 format: None,
8049 default: None,
8050 }));
8051 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8052 cast_x,
8053 match &y {
8054 Expression::Column(_)
8055 | Expression::Literal(_)
8056 | Expression::Identifier(_) => y,
8057 _ => Expression::Paren(Box::new(Paren {
8058 this: y,
8059 trailing_comments: vec![],
8060 })),
8061 },
8062 )));
8063 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8064 condition,
8065 true_value: cast_div,
8066 false_value: Some(Expression::Null(Null)),
8067 original_name: None,
8068 })))
8069 } else if matches!(target, DialectType::PostgreSQL) {
8070 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8071 let cast_x = Expression::Cast(Box::new(Cast {
8072 this: match &x {
8073 Expression::Column(_)
8074 | Expression::Literal(_)
8075 | Expression::Identifier(_) => x,
8076 _ => Expression::Paren(Box::new(Paren {
8077 this: x,
8078 trailing_comments: vec![],
8079 })),
8080 },
8081 to: DataType::Custom {
8082 name: "DOUBLE PRECISION".to_string(),
8083 },
8084 trailing_comments: vec![],
8085 double_colon_syntax: false,
8086 format: None,
8087 default: None,
8088 }));
8089 let y_paren = match &y {
8090 Expression::Column(_)
8091 | Expression::Literal(_)
8092 | Expression::Identifier(_) => y,
8093 _ => Expression::Paren(Box::new(Paren {
8094 this: y,
8095 trailing_comments: vec![],
8096 })),
8097 };
8098 let cast_div =
8099 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8100 Ok(Expression::Case(Box::new(Case {
8101 operand: None,
8102 whens: vec![(condition, cast_div)],
8103 else_: Some(Expression::Null(Null)),
8104 comments: Vec::new(),
8105 })))
8106 } else if matches!(target, DialectType::DuckDB) {
8107 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8108 Ok(Expression::Case(Box::new(Case {
8109 operand: None,
8110 whens: vec![(condition, div_expr)],
8111 else_: Some(Expression::Null(Null)),
8112 comments: Vec::new(),
8113 })))
8114 } else if matches!(target, DialectType::Snowflake) {
8115 // Snowflake: IFF(y <> 0, x / y, NULL)
8116 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8117 condition,
8118 true_value: div_expr,
8119 false_value: Some(Expression::Null(Null)),
8120 original_name: Some("IFF".to_string()),
8121 })))
8122 } else {
8123 // All others: IF(y <> 0, x / y, NULL)
8124 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8125 condition,
8126 true_value: div_expr,
8127 false_value: Some(Expression::Null(Null)),
8128 original_name: None,
8129 })))
8130 }
8131 } else {
8132 Ok(e)
8133 }
8134 }
8135
8136 Action::BigQueryLastDayStripUnit => {
8137 if let Expression::LastDay(mut ld) = e {
8138 ld.unit = None; // Strip the unit (MONTH is default)
8139 match target {
8140 DialectType::PostgreSQL => {
8141 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8142 let date_trunc = Expression::Function(Box::new(Function::new(
8143 "DATE_TRUNC".to_string(),
8144 vec![
8145 Expression::Literal(crate::expressions::Literal::String(
8146 "MONTH".to_string(),
8147 )),
8148 ld.this.clone(),
8149 ],
8150 )));
8151 let plus_month =
8152 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8153 date_trunc,
8154 Expression::Interval(Box::new(
8155 crate::expressions::Interval {
8156 this: Some(Expression::Literal(
8157 crate::expressions::Literal::String(
8158 "1 MONTH".to_string(),
8159 ),
8160 )),
8161 unit: None,
8162 },
8163 )),
8164 )));
8165 let minus_day =
8166 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8167 plus_month,
8168 Expression::Interval(Box::new(
8169 crate::expressions::Interval {
8170 this: Some(Expression::Literal(
8171 crate::expressions::Literal::String(
8172 "1 DAY".to_string(),
8173 ),
8174 )),
8175 unit: None,
8176 },
8177 )),
8178 )));
8179 Ok(Expression::Cast(Box::new(Cast {
8180 this: minus_day,
8181 to: DataType::Date,
8182 trailing_comments: vec![],
8183 double_colon_syntax: false,
8184 format: None,
8185 default: None,
8186 })))
8187 }
8188 DialectType::Presto => {
8189 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8190 Ok(Expression::Function(Box::new(Function::new(
8191 "LAST_DAY_OF_MONTH".to_string(),
8192 vec![ld.this],
8193 ))))
8194 }
8195 DialectType::ClickHouse => {
8196 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8197 // Need to wrap the DATE type in Nullable
8198 let nullable_date = match ld.this {
8199 Expression::Cast(mut c) => {
8200 c.to = DataType::Nullable {
8201 inner: Box::new(DataType::Date),
8202 };
8203 Expression::Cast(c)
8204 }
8205 other => other,
8206 };
8207 ld.this = nullable_date;
8208 Ok(Expression::LastDay(ld))
8209 }
8210 _ => Ok(Expression::LastDay(ld)),
8211 }
8212 } else {
8213 Ok(e)
8214 }
8215 }
8216
8217 Action::BigQueryCastFormat => {
8218 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8219 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8220 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8221 let (this, to, format_expr, is_safe) = match e {
8222 Expression::Cast(ref c) if c.format.is_some() => (
8223 c.this.clone(),
8224 c.to.clone(),
8225 c.format.as_ref().unwrap().as_ref().clone(),
8226 false,
8227 ),
8228 Expression::SafeCast(ref c) if c.format.is_some() => (
8229 c.this.clone(),
8230 c.to.clone(),
8231 c.format.as_ref().unwrap().as_ref().clone(),
8232 true,
8233 ),
8234 _ => return Ok(e),
8235 };
8236 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8237 if matches!(target, DialectType::BigQuery) {
8238 match &to {
8239 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8240 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8241 return Ok(e);
8242 }
8243 _ => {}
8244 }
8245 }
8246 // Extract timezone from format if AT TIME ZONE is present
8247 let (actual_format_expr, timezone) = match &format_expr {
8248 Expression::AtTimeZone(ref atz) => {
8249 (atz.this.clone(), Some(atz.zone.clone()))
8250 }
8251 _ => (format_expr.clone(), None),
8252 };
8253 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8254 match target {
8255 DialectType::BigQuery => {
8256 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8257 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8258 let func_name = match &to {
8259 DataType::Date => "PARSE_DATE",
8260 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8261 DataType::Time { .. } => "PARSE_TIMESTAMP",
8262 _ => "PARSE_TIMESTAMP",
8263 };
8264 let mut func_args = vec![strftime_fmt, this];
8265 if let Some(tz) = timezone {
8266 func_args.push(tz);
8267 }
8268 Ok(Expression::Function(Box::new(Function::new(
8269 func_name.to_string(),
8270 func_args,
8271 ))))
8272 }
8273 DialectType::DuckDB => {
8274 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8275 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8276 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8277 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8278 let parse_call = Expression::Function(Box::new(Function::new(
8279 parse_fn_name.to_string(),
8280 vec![this, duck_fmt],
8281 )));
8282 Ok(Expression::Cast(Box::new(Cast {
8283 this: parse_call,
8284 to,
8285 trailing_comments: vec![],
8286 double_colon_syntax: false,
8287 format: None,
8288 default: None,
8289 })))
8290 }
8291 _ => Ok(e),
8292 }
8293 }
8294
8295 Action::BigQueryFunctionNormalize => {
8296 Self::normalize_bigquery_function(e, source, target)
8297 }
8298
8299 Action::BigQueryToHexBare => {
8300 // Not used anymore - handled directly in normalize_bigquery_function
8301 Ok(e)
8302 }
8303
8304 Action::BigQueryToHexLower => {
8305 if let Expression::Lower(uf) = e {
8306 match uf.this {
8307 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8308 Expression::Function(f)
8309 if matches!(target, DialectType::BigQuery)
8310 && f.name == "TO_HEX" =>
8311 {
8312 Ok(Expression::Function(f))
8313 }
8314 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8315 Expression::Lower(inner_uf) => {
8316 if matches!(target, DialectType::BigQuery) {
8317 // BQ->BQ: extract TO_HEX
8318 if let Expression::Function(f) = inner_uf.this {
8319 Ok(Expression::Function(Box::new(Function::new(
8320 "TO_HEX".to_string(),
8321 f.args,
8322 ))))
8323 } else {
8324 Ok(Expression::Lower(inner_uf))
8325 }
8326 } else {
8327 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8328 Ok(Expression::Lower(inner_uf))
8329 }
8330 }
8331 other => {
8332 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8333 this: other,
8334 original_name: None,
8335 })))
8336 }
8337 }
8338 } else {
8339 Ok(e)
8340 }
8341 }
8342
8343 Action::BigQueryToHexUpper => {
8344 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8345 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8346 if let Expression::Upper(uf) = e {
8347 if let Expression::Lower(inner_uf) = uf.this {
8348 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8349 if matches!(target, DialectType::BigQuery) {
8350 // Restore TO_HEX name in inner function
8351 if let Expression::Function(f) = inner_uf.this {
8352 let restored = Expression::Function(Box::new(Function::new(
8353 "TO_HEX".to_string(),
8354 f.args,
8355 )));
8356 Ok(Expression::Upper(Box::new(
8357 crate::expressions::UnaryFunc::new(restored),
8358 )))
8359 } else {
8360 Ok(Expression::Upper(inner_uf))
8361 }
8362 } else {
8363 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8364 Ok(inner_uf.this)
8365 }
8366 } else {
8367 Ok(Expression::Upper(uf))
8368 }
8369 } else {
8370 Ok(e)
8371 }
8372 }
8373
8374 Action::BigQueryAnyValueHaving => {
8375 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8376 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8377 if let Expression::AnyValue(agg) = e {
8378 if let Some((having_expr, is_max)) = agg.having_max {
8379 let func_name = if is_max {
8380 "ARG_MAX_NULL"
8381 } else {
8382 "ARG_MIN_NULL"
8383 };
8384 Ok(Expression::Function(Box::new(Function::new(
8385 func_name.to_string(),
8386 vec![agg.this, *having_expr],
8387 ))))
8388 } else {
8389 Ok(Expression::AnyValue(agg))
8390 }
8391 } else {
8392 Ok(e)
8393 }
8394 }
8395
8396 Action::BigQueryApproxQuantiles => {
8397 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8398 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8399 if let Expression::AggregateFunction(agg) = e {
8400 if agg.args.len() >= 2 {
8401 let x_expr = agg.args[0].clone();
8402 let n_expr = &agg.args[1];
8403
8404 // Extract the numeric value from n_expr
8405 let n = match n_expr {
8406 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8407 s.parse::<usize>().unwrap_or(2)
8408 }
8409 _ => 2,
8410 };
8411
8412 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8413 let mut quantiles = Vec::new();
8414 for i in 0..=n {
8415 let q = i as f64 / n as f64;
8416 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8417 if q == 0.0 {
8418 quantiles.push(Expression::number(0));
8419 } else if q == 1.0 {
8420 quantiles.push(Expression::number(1));
8421 } else {
8422 quantiles.push(Expression::Literal(
8423 crate::expressions::Literal::Number(format!("{}", q)),
8424 ));
8425 }
8426 }
8427
8428 let array_expr =
8429 Expression::Array(Box::new(crate::expressions::Array {
8430 expressions: quantiles,
8431 }));
8432
8433 // Preserve DISTINCT modifier
8434 let mut new_func = Function::new(
8435 "APPROX_QUANTILE".to_string(),
8436 vec![x_expr, array_expr],
8437 );
8438 new_func.distinct = agg.distinct;
8439 Ok(Expression::Function(Box::new(new_func)))
8440 } else {
8441 Ok(Expression::AggregateFunction(agg))
8442 }
8443 } else {
8444 Ok(e)
8445 }
8446 }
8447
8448 Action::GenericFunctionNormalize => {
8449 // Helper closure to convert ARBITRARY to target-specific function
8450 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8451 let name = match target {
8452 DialectType::ClickHouse => "any",
8453 DialectType::TSQL | DialectType::SQLite => "MAX",
8454 DialectType::Hive => "FIRST",
8455 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8456 "ARBITRARY"
8457 }
8458 _ => "ANY_VALUE",
8459 };
8460 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8461 }
8462
8463 if let Expression::Function(f) = e {
8464 let name = f.name.to_uppercase();
8465 match name.as_str() {
8466 "ARBITRARY" if f.args.len() == 1 => {
8467 let arg = f.args.into_iter().next().unwrap();
8468 Ok(convert_arbitrary(arg, target))
8469 }
8470 "TO_NUMBER" if f.args.len() == 1 => {
8471 let arg = f.args.into_iter().next().unwrap();
8472 match target {
8473 DialectType::Oracle | DialectType::Snowflake => {
8474 Ok(Expression::Function(Box::new(Function::new(
8475 "TO_NUMBER".to_string(),
8476 vec![arg],
8477 ))))
8478 }
8479 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8480 this: arg,
8481 to: crate::expressions::DataType::Double {
8482 precision: None,
8483 scale: None,
8484 },
8485 double_colon_syntax: false,
8486 trailing_comments: Vec::new(),
8487 format: None,
8488 default: None,
8489 }))),
8490 }
8491 }
8492 "AGGREGATE" if f.args.len() >= 3 => match target {
8493 DialectType::DuckDB
8494 | DialectType::Hive
8495 | DialectType::Presto
8496 | DialectType::Trino => Ok(Expression::Function(Box::new(
8497 Function::new("REDUCE".to_string(), f.args),
8498 ))),
8499 _ => Ok(Expression::Function(f)),
8500 },
8501 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8502 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8503 if matches!(target, DialectType::DuckDB) {
8504 Ok(Expression::Function(f))
8505 } else {
8506 let mut args = f.args;
8507 let this = args.remove(0);
8508 let pattern = args.remove(0);
8509 let flags = if args.is_empty() {
8510 None
8511 } else {
8512 Some(args.remove(0))
8513 };
8514 Ok(Expression::RegexpLike(Box::new(
8515 crate::expressions::RegexpFunc {
8516 this,
8517 pattern,
8518 flags,
8519 },
8520 )))
8521 }
8522 }
8523 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8524 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8525 if matches!(target, DialectType::DuckDB) {
8526 Ok(Expression::Function(f))
8527 } else {
8528 let mut args = f.args;
8529 let this = args.remove(0);
8530 let pattern = args.remove(0);
8531 let flags = if args.is_empty() {
8532 None
8533 } else {
8534 Some(args.remove(0))
8535 };
8536 Ok(Expression::RegexpLike(Box::new(
8537 crate::expressions::RegexpFunc {
8538 this,
8539 pattern,
8540 flags,
8541 },
8542 )))
8543 }
8544 }
8545 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8546 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8547 let mut args = f.args;
8548 let this = args.remove(0);
8549 let field_expr = args.remove(0);
8550 // Extract string literal to get field name
8551 let field_name = match &field_expr {
8552 Expression::Literal(crate::expressions::Literal::String(s)) => {
8553 s.clone()
8554 }
8555 Expression::Identifier(id) => id.name.clone(),
8556 _ => {
8557 return Ok(Expression::Function(Box::new(Function::new(
8558 "STRUCT_EXTRACT".to_string(),
8559 vec![this, field_expr],
8560 ))))
8561 }
8562 };
8563 Ok(Expression::StructExtract(Box::new(
8564 crate::expressions::StructExtractFunc {
8565 this,
8566 field: crate::expressions::Identifier::new(field_name),
8567 },
8568 )))
8569 }
8570 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8571 "LIST_FILTER" if f.args.len() == 2 => {
8572 let name = match target {
8573 DialectType::DuckDB => "LIST_FILTER",
8574 _ => "FILTER",
8575 };
8576 Ok(Expression::Function(Box::new(Function::new(
8577 name.to_string(),
8578 f.args,
8579 ))))
8580 }
8581 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8582 "LIST_TRANSFORM" if f.args.len() == 2 => {
8583 let name = match target {
8584 DialectType::DuckDB => "LIST_TRANSFORM",
8585 _ => "TRANSFORM",
8586 };
8587 Ok(Expression::Function(Box::new(Function::new(
8588 name.to_string(),
8589 f.args,
8590 ))))
8591 }
8592 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8593 "LIST_SORT" if f.args.len() >= 1 => {
8594 let name = match target {
8595 DialectType::DuckDB
8596 | DialectType::Presto
8597 | DialectType::Trino => "ARRAY_SORT",
8598 _ => "SORT_ARRAY",
8599 };
8600 Ok(Expression::Function(Box::new(Function::new(
8601 name.to_string(),
8602 f.args,
8603 ))))
8604 }
8605 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8606 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8607 match target {
8608 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8609 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8610 ))),
8611 DialectType::Spark
8612 | DialectType::Databricks
8613 | DialectType::Hive => {
8614 let mut args = f.args;
8615 args.push(Expression::Identifier(
8616 crate::expressions::Identifier::new("FALSE"),
8617 ));
8618 Ok(Expression::Function(Box::new(Function::new(
8619 "SORT_ARRAY".to_string(),
8620 args,
8621 ))))
8622 }
8623 DialectType::Presto
8624 | DialectType::Trino
8625 | DialectType::Athena => {
8626 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8627 let arr = f.args.into_iter().next().unwrap();
8628 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8629 parameters: vec![
8630 crate::expressions::Identifier::new("a"),
8631 crate::expressions::Identifier::new("b"),
8632 ],
8633 body: Expression::Case(Box::new(Case {
8634 operand: None,
8635 whens: vec![
8636 (
8637 Expression::Lt(Box::new(BinaryOp::new(
8638 Expression::Identifier(crate::expressions::Identifier::new("a")),
8639 Expression::Identifier(crate::expressions::Identifier::new("b")),
8640 ))),
8641 Expression::number(1),
8642 ),
8643 (
8644 Expression::Gt(Box::new(BinaryOp::new(
8645 Expression::Identifier(crate::expressions::Identifier::new("a")),
8646 Expression::Identifier(crate::expressions::Identifier::new("b")),
8647 ))),
8648 Expression::Literal(Literal::Number("-1".to_string())),
8649 ),
8650 ],
8651 else_: Some(Expression::number(0)),
8652 comments: Vec::new(),
8653 })),
8654 colon: false,
8655 parameter_types: Vec::new(),
8656 }));
8657 Ok(Expression::Function(Box::new(Function::new(
8658 "ARRAY_SORT".to_string(),
8659 vec![arr, lambda],
8660 ))))
8661 }
8662 _ => Ok(Expression::Function(Box::new(Function::new(
8663 "LIST_REVERSE_SORT".to_string(),
8664 f.args,
8665 )))),
8666 }
8667 }
8668 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8669 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8670 let mut args = f.args;
8671 args.push(Expression::string(","));
8672 let name = match target {
8673 DialectType::DuckDB => "STR_SPLIT",
8674 DialectType::Presto | DialectType::Trino => "SPLIT",
8675 DialectType::Spark
8676 | DialectType::Databricks
8677 | DialectType::Hive => "SPLIT",
8678 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8679 DialectType::Redshift => "SPLIT_TO_ARRAY",
8680 _ => "SPLIT",
8681 };
8682 Ok(Expression::Function(Box::new(Function::new(
8683 name.to_string(),
8684 args,
8685 ))))
8686 }
8687 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8688 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8689 let name = match target {
8690 DialectType::DuckDB => "STR_SPLIT",
8691 DialectType::Presto | DialectType::Trino => "SPLIT",
8692 DialectType::Spark
8693 | DialectType::Databricks
8694 | DialectType::Hive => "SPLIT",
8695 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8696 DialectType::Redshift => "SPLIT_TO_ARRAY",
8697 _ => "SPLIT",
8698 };
8699 Ok(Expression::Function(Box::new(Function::new(
8700 name.to_string(),
8701 f.args,
8702 ))))
8703 }
8704 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8705 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8706 let name = match target {
8707 DialectType::DuckDB => "STR_SPLIT",
8708 DialectType::Presto | DialectType::Trino => "SPLIT",
8709 DialectType::Spark
8710 | DialectType::Databricks
8711 | DialectType::Hive => "SPLIT",
8712 DialectType::Doris | DialectType::StarRocks => {
8713 "SPLIT_BY_STRING"
8714 }
8715 DialectType::PostgreSQL | DialectType::Redshift => {
8716 "STRING_TO_ARRAY"
8717 }
8718 _ => "SPLIT",
8719 };
8720 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8721 if matches!(
8722 target,
8723 DialectType::Spark
8724 | DialectType::Databricks
8725 | DialectType::Hive
8726 ) {
8727 let mut args = f.args;
8728 let x = args.remove(0);
8729 let sep = args.remove(0);
8730 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8731 let escaped_sep =
8732 Expression::Function(Box::new(Function::new(
8733 "CONCAT".to_string(),
8734 vec![
8735 Expression::string("\\Q"),
8736 sep,
8737 Expression::string("\\E"),
8738 ],
8739 )));
8740 Ok(Expression::Function(Box::new(Function::new(
8741 name.to_string(),
8742 vec![x, escaped_sep],
8743 ))))
8744 } else {
8745 Ok(Expression::Function(Box::new(Function::new(
8746 name.to_string(),
8747 f.args,
8748 ))))
8749 }
8750 }
8751 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8752 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8753 let name = match target {
8754 DialectType::DuckDB => "STR_SPLIT_REGEX",
8755 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8756 DialectType::Spark
8757 | DialectType::Databricks
8758 | DialectType::Hive => "SPLIT",
8759 _ => "REGEXP_SPLIT",
8760 };
8761 Ok(Expression::Function(Box::new(Function::new(
8762 name.to_string(),
8763 f.args,
8764 ))))
8765 }
8766 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8767 "SPLIT"
8768 if f.args.len() == 2
8769 && matches!(
8770 source,
8771 DialectType::Presto
8772 | DialectType::Trino
8773 | DialectType::Athena
8774 | DialectType::StarRocks
8775 | DialectType::Doris
8776 )
8777 && matches!(
8778 target,
8779 DialectType::Spark
8780 | DialectType::Databricks
8781 | DialectType::Hive
8782 ) =>
8783 {
8784 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8785 let mut args = f.args;
8786 let x = args.remove(0);
8787 let sep = args.remove(0);
8788 let escaped_sep = Expression::Function(Box::new(Function::new(
8789 "CONCAT".to_string(),
8790 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8791 )));
8792 Ok(Expression::Function(Box::new(Function::new(
8793 "SPLIT".to_string(),
8794 vec![x, escaped_sep],
8795 ))))
8796 }
8797 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8798 // For ClickHouse target, preserve original name to maintain camelCase
8799 "SUBSTRINGINDEX" => {
8800 let name = if matches!(target, DialectType::ClickHouse) {
8801 f.name.clone()
8802 } else {
8803 "SUBSTRING_INDEX".to_string()
8804 };
8805 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8806 }
8807 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8808 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8809 // Get the array argument (first arg, drop dimension args)
8810 let mut args = f.args;
8811 let arr = if args.is_empty() {
8812 return Ok(Expression::Function(Box::new(Function::new(
8813 name.to_string(),
8814 args,
8815 ))));
8816 } else {
8817 args.remove(0)
8818 };
8819 let name =
8820 match target {
8821 DialectType::Spark
8822 | DialectType::Databricks
8823 | DialectType::Hive => "SIZE",
8824 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8825 DialectType::BigQuery => "ARRAY_LENGTH",
8826 DialectType::DuckDB => {
8827 // DuckDB: use ARRAY_LENGTH with all args
8828 let mut all_args = vec![arr];
8829 all_args.extend(args);
8830 return Ok(Expression::Function(Box::new(
8831 Function::new("ARRAY_LENGTH".to_string(), all_args),
8832 )));
8833 }
8834 DialectType::PostgreSQL | DialectType::Redshift => {
8835 // Keep ARRAY_LENGTH with dimension arg
8836 let mut all_args = vec![arr];
8837 all_args.extend(args);
8838 return Ok(Expression::Function(Box::new(
8839 Function::new("ARRAY_LENGTH".to_string(), all_args),
8840 )));
8841 }
8842 DialectType::ClickHouse => "LENGTH",
8843 _ => "ARRAY_LENGTH",
8844 };
8845 Ok(Expression::Function(Box::new(Function::new(
8846 name.to_string(),
8847 vec![arr],
8848 ))))
8849 }
8850 // UNICODE(x) -> target-specific codepoint function
8851 "UNICODE" if f.args.len() == 1 => {
8852 match target {
8853 DialectType::SQLite | DialectType::DuckDB => {
8854 Ok(Expression::Function(Box::new(Function::new(
8855 "UNICODE".to_string(),
8856 f.args,
8857 ))))
8858 }
8859 DialectType::Oracle => {
8860 // ASCII(UNISTR(x))
8861 let inner = Expression::Function(Box::new(Function::new(
8862 "UNISTR".to_string(),
8863 f.args,
8864 )));
8865 Ok(Expression::Function(Box::new(Function::new(
8866 "ASCII".to_string(),
8867 vec![inner],
8868 ))))
8869 }
8870 DialectType::MySQL => {
8871 // ORD(CONVERT(x USING utf32))
8872 let arg = f.args.into_iter().next().unwrap();
8873 let convert_expr = Expression::ConvertToCharset(Box::new(
8874 crate::expressions::ConvertToCharset {
8875 this: Box::new(arg),
8876 dest: Some(Box::new(Expression::Identifier(
8877 crate::expressions::Identifier::new("utf32"),
8878 ))),
8879 source: None,
8880 },
8881 ));
8882 Ok(Expression::Function(Box::new(Function::new(
8883 "ORD".to_string(),
8884 vec![convert_expr],
8885 ))))
8886 }
8887 _ => Ok(Expression::Function(Box::new(Function::new(
8888 "ASCII".to_string(),
8889 f.args,
8890 )))),
8891 }
8892 }
8893 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
8894 "XOR" if f.args.len() >= 2 => {
8895 match target {
8896 DialectType::ClickHouse => {
8897 // ClickHouse: keep as xor() function with lowercase name
8898 Ok(Expression::Function(Box::new(Function::new(
8899 "xor".to_string(),
8900 f.args,
8901 ))))
8902 }
8903 DialectType::Presto | DialectType::Trino => {
8904 if f.args.len() == 2 {
8905 Ok(Expression::Function(Box::new(Function::new(
8906 "BITWISE_XOR".to_string(),
8907 f.args,
8908 ))))
8909 } else {
8910 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
8911 let mut args = f.args;
8912 let first = args.remove(0);
8913 let second = args.remove(0);
8914 let mut result =
8915 Expression::Function(Box::new(Function::new(
8916 "BITWISE_XOR".to_string(),
8917 vec![first, second],
8918 )));
8919 for arg in args {
8920 result =
8921 Expression::Function(Box::new(Function::new(
8922 "BITWISE_XOR".to_string(),
8923 vec![result, arg],
8924 )));
8925 }
8926 Ok(result)
8927 }
8928 }
8929 DialectType::MySQL
8930 | DialectType::SingleStore
8931 | DialectType::Doris
8932 | DialectType::StarRocks => {
8933 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
8934 let args = f.args;
8935 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
8936 this: None,
8937 expression: None,
8938 expressions: args,
8939 })))
8940 }
8941 DialectType::PostgreSQL | DialectType::Redshift => {
8942 // PostgreSQL: a # b (hash operator for XOR)
8943 let mut args = f.args;
8944 let first = args.remove(0);
8945 let second = args.remove(0);
8946 let mut result = Expression::BitwiseXor(Box::new(
8947 BinaryOp::new(first, second),
8948 ));
8949 for arg in args {
8950 result = Expression::BitwiseXor(Box::new(
8951 BinaryOp::new(result, arg),
8952 ));
8953 }
8954 Ok(result)
8955 }
8956 DialectType::DuckDB => {
8957 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
8958 Ok(Expression::Function(Box::new(Function::new(
8959 "XOR".to_string(),
8960 f.args,
8961 ))))
8962 }
8963 DialectType::BigQuery => {
8964 // BigQuery: a ^ b (caret operator for XOR)
8965 let mut args = f.args;
8966 let first = args.remove(0);
8967 let second = args.remove(0);
8968 let mut result = Expression::BitwiseXor(Box::new(
8969 BinaryOp::new(first, second),
8970 ));
8971 for arg in args {
8972 result = Expression::BitwiseXor(Box::new(
8973 BinaryOp::new(result, arg),
8974 ));
8975 }
8976 Ok(result)
8977 }
8978 _ => Ok(Expression::Function(Box::new(Function::new(
8979 "XOR".to_string(),
8980 f.args,
8981 )))),
8982 }
8983 }
8984 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8985 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
8986 match target {
8987 DialectType::Spark
8988 | DialectType::Databricks
8989 | DialectType::Hive => {
8990 let mut args = f.args;
8991 args.push(Expression::Identifier(
8992 crate::expressions::Identifier::new("FALSE"),
8993 ));
8994 Ok(Expression::Function(Box::new(Function::new(
8995 "SORT_ARRAY".to_string(),
8996 args,
8997 ))))
8998 }
8999 DialectType::Presto
9000 | DialectType::Trino
9001 | DialectType::Athena => {
9002 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9003 let arr = f.args.into_iter().next().unwrap();
9004 let lambda = Expression::Lambda(Box::new(
9005 crate::expressions::LambdaExpr {
9006 parameters: vec![
9007 Identifier::new("a"),
9008 Identifier::new("b"),
9009 ],
9010 colon: false,
9011 parameter_types: Vec::new(),
9012 body: Expression::Case(Box::new(Case {
9013 operand: None,
9014 whens: vec![
9015 (
9016 Expression::Lt(Box::new(
9017 BinaryOp::new(
9018 Expression::Identifier(
9019 Identifier::new("a"),
9020 ),
9021 Expression::Identifier(
9022 Identifier::new("b"),
9023 ),
9024 ),
9025 )),
9026 Expression::number(1),
9027 ),
9028 (
9029 Expression::Gt(Box::new(
9030 BinaryOp::new(
9031 Expression::Identifier(
9032 Identifier::new("a"),
9033 ),
9034 Expression::Identifier(
9035 Identifier::new("b"),
9036 ),
9037 ),
9038 )),
9039 Expression::Neg(Box::new(
9040 crate::expressions::UnaryOp {
9041 this: Expression::number(1),
9042 },
9043 )),
9044 ),
9045 ],
9046 else_: Some(Expression::number(0)),
9047 comments: Vec::new(),
9048 })),
9049 },
9050 ));
9051 Ok(Expression::Function(Box::new(Function::new(
9052 "ARRAY_SORT".to_string(),
9053 vec![arr, lambda],
9054 ))))
9055 }
9056 _ => Ok(Expression::Function(Box::new(Function::new(
9057 "ARRAY_REVERSE_SORT".to_string(),
9058 f.args,
9059 )))),
9060 }
9061 }
9062 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9063 "ENCODE" if f.args.len() == 1 => match target {
9064 DialectType::Spark
9065 | DialectType::Databricks
9066 | DialectType::Hive => {
9067 let mut args = f.args;
9068 args.push(Expression::string("utf-8"));
9069 Ok(Expression::Function(Box::new(Function::new(
9070 "ENCODE".to_string(),
9071 args,
9072 ))))
9073 }
9074 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9075 Ok(Expression::Function(Box::new(Function::new(
9076 "TO_UTF8".to_string(),
9077 f.args,
9078 ))))
9079 }
9080 _ => Ok(Expression::Function(Box::new(Function::new(
9081 "ENCODE".to_string(),
9082 f.args,
9083 )))),
9084 },
9085 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9086 "DECODE" if f.args.len() == 1 => match target {
9087 DialectType::Spark
9088 | DialectType::Databricks
9089 | DialectType::Hive => {
9090 let mut args = f.args;
9091 args.push(Expression::string("utf-8"));
9092 Ok(Expression::Function(Box::new(Function::new(
9093 "DECODE".to_string(),
9094 args,
9095 ))))
9096 }
9097 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9098 Ok(Expression::Function(Box::new(Function::new(
9099 "FROM_UTF8".to_string(),
9100 f.args,
9101 ))))
9102 }
9103 _ => Ok(Expression::Function(Box::new(Function::new(
9104 "DECODE".to_string(),
9105 f.args,
9106 )))),
9107 },
9108 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9109 "QUANTILE" if f.args.len() == 2 => {
9110 let name = match target {
9111 DialectType::Spark
9112 | DialectType::Databricks
9113 | DialectType::Hive => "PERCENTILE",
9114 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9115 DialectType::BigQuery => "PERCENTILE_CONT",
9116 _ => "QUANTILE",
9117 };
9118 Ok(Expression::Function(Box::new(Function::new(
9119 name.to_string(),
9120 f.args,
9121 ))))
9122 }
9123 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9124 "QUANTILE_CONT" if f.args.len() == 2 => {
9125 let mut args = f.args;
9126 let column = args.remove(0);
9127 let quantile = args.remove(0);
9128 match target {
9129 DialectType::DuckDB => {
9130 Ok(Expression::Function(Box::new(Function::new(
9131 "QUANTILE_CONT".to_string(),
9132 vec![column, quantile],
9133 ))))
9134 }
9135 DialectType::PostgreSQL
9136 | DialectType::Redshift
9137 | DialectType::Snowflake => {
9138 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9139 let inner = Expression::PercentileCont(Box::new(
9140 crate::expressions::PercentileFunc {
9141 this: column.clone(),
9142 percentile: quantile,
9143 order_by: None,
9144 filter: None,
9145 },
9146 ));
9147 Ok(Expression::WithinGroup(Box::new(
9148 crate::expressions::WithinGroup {
9149 this: inner,
9150 order_by: vec![crate::expressions::Ordered {
9151 this: column,
9152 desc: false,
9153 nulls_first: None,
9154 explicit_asc: false,
9155 with_fill: None,
9156 }],
9157 },
9158 )))
9159 }
9160 _ => Ok(Expression::Function(Box::new(Function::new(
9161 "QUANTILE_CONT".to_string(),
9162 vec![column, quantile],
9163 )))),
9164 }
9165 }
9166 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9167 "QUANTILE_DISC" if f.args.len() == 2 => {
9168 let mut args = f.args;
9169 let column = args.remove(0);
9170 let quantile = args.remove(0);
9171 match target {
9172 DialectType::DuckDB => {
9173 Ok(Expression::Function(Box::new(Function::new(
9174 "QUANTILE_DISC".to_string(),
9175 vec![column, quantile],
9176 ))))
9177 }
9178 DialectType::PostgreSQL
9179 | DialectType::Redshift
9180 | DialectType::Snowflake => {
9181 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9182 let inner = Expression::PercentileDisc(Box::new(
9183 crate::expressions::PercentileFunc {
9184 this: column.clone(),
9185 percentile: quantile,
9186 order_by: None,
9187 filter: None,
9188 },
9189 ));
9190 Ok(Expression::WithinGroup(Box::new(
9191 crate::expressions::WithinGroup {
9192 this: inner,
9193 order_by: vec![crate::expressions::Ordered {
9194 this: column,
9195 desc: false,
9196 nulls_first: None,
9197 explicit_asc: false,
9198 with_fill: None,
9199 }],
9200 },
9201 )))
9202 }
9203 _ => Ok(Expression::Function(Box::new(Function::new(
9204 "QUANTILE_DISC".to_string(),
9205 vec![column, quantile],
9206 )))),
9207 }
9208 }
9209 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9210 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9211 let name = match target {
9212 DialectType::Presto
9213 | DialectType::Trino
9214 | DialectType::Athena => "APPROX_PERCENTILE",
9215 DialectType::Spark
9216 | DialectType::Databricks
9217 | DialectType::Hive => "PERCENTILE_APPROX",
9218 DialectType::DuckDB => "APPROX_QUANTILE",
9219 DialectType::PostgreSQL | DialectType::Redshift => {
9220 "PERCENTILE_CONT"
9221 }
9222 _ => &f.name,
9223 };
9224 Ok(Expression::Function(Box::new(Function::new(
9225 name.to_string(),
9226 f.args,
9227 ))))
9228 }
9229 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9230 "EPOCH" if f.args.len() == 1 => {
9231 let name = match target {
9232 DialectType::Spark
9233 | DialectType::Databricks
9234 | DialectType::Hive => "UNIX_TIMESTAMP",
9235 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9236 _ => "EPOCH",
9237 };
9238 Ok(Expression::Function(Box::new(Function::new(
9239 name.to_string(),
9240 f.args,
9241 ))))
9242 }
9243 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9244 "EPOCH_MS" if f.args.len() == 1 => {
9245 match target {
9246 DialectType::Spark | DialectType::Databricks => {
9247 Ok(Expression::Function(Box::new(Function::new(
9248 "TIMESTAMP_MILLIS".to_string(),
9249 f.args,
9250 ))))
9251 }
9252 DialectType::Hive => {
9253 // Hive: FROM_UNIXTIME(x / 1000)
9254 let arg = f.args.into_iter().next().unwrap();
9255 let div_expr = Expression::Div(Box::new(
9256 crate::expressions::BinaryOp::new(
9257 arg,
9258 Expression::number(1000),
9259 ),
9260 ));
9261 Ok(Expression::Function(Box::new(Function::new(
9262 "FROM_UNIXTIME".to_string(),
9263 vec![div_expr],
9264 ))))
9265 }
9266 DialectType::Presto | DialectType::Trino => {
9267 Ok(Expression::Function(Box::new(Function::new(
9268 "FROM_UNIXTIME".to_string(),
9269 vec![Expression::Div(Box::new(
9270 crate::expressions::BinaryOp::new(
9271 f.args.into_iter().next().unwrap(),
9272 Expression::number(1000),
9273 ),
9274 ))],
9275 ))))
9276 }
9277 _ => Ok(Expression::Function(Box::new(Function::new(
9278 "EPOCH_MS".to_string(),
9279 f.args,
9280 )))),
9281 }
9282 }
9283 // HASHBYTES('algorithm', x) -> target-specific hash function
9284 "HASHBYTES" if f.args.len() == 2 => {
9285 // Keep HASHBYTES as-is for TSQL target
9286 if matches!(target, DialectType::TSQL) {
9287 return Ok(Expression::Function(f));
9288 }
9289 let algo_expr = &f.args[0];
9290 let algo = match algo_expr {
9291 Expression::Literal(crate::expressions::Literal::String(s)) => {
9292 s.to_uppercase()
9293 }
9294 _ => return Ok(Expression::Function(f)),
9295 };
9296 let data_arg = f.args.into_iter().nth(1).unwrap();
9297 match algo.as_str() {
9298 "SHA1" => {
9299 let name = match target {
9300 DialectType::Spark | DialectType::Databricks => "SHA",
9301 DialectType::Hive => "SHA1",
9302 _ => "SHA1",
9303 };
9304 Ok(Expression::Function(Box::new(Function::new(
9305 name.to_string(),
9306 vec![data_arg],
9307 ))))
9308 }
9309 "SHA2_256" => {
9310 Ok(Expression::Function(Box::new(Function::new(
9311 "SHA2".to_string(),
9312 vec![data_arg, Expression::number(256)],
9313 ))))
9314 }
9315 "SHA2_512" => {
9316 Ok(Expression::Function(Box::new(Function::new(
9317 "SHA2".to_string(),
9318 vec![data_arg, Expression::number(512)],
9319 ))))
9320 }
9321 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9322 "MD5".to_string(),
9323 vec![data_arg],
9324 )))),
9325 _ => Ok(Expression::Function(Box::new(Function::new(
9326 "HASHBYTES".to_string(),
9327 vec![Expression::string(&algo), data_arg],
9328 )))),
9329 }
9330 }
9331 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9332 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9333 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9334 let mut args = f.args;
9335 let json_expr = args.remove(0);
9336 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9337 let mut json_path = "$".to_string();
9338 for a in &args {
9339 match a {
9340 Expression::Literal(
9341 crate::expressions::Literal::String(s),
9342 ) => {
9343 // Numeric string keys become array indices: [0]
9344 if s.chars().all(|c| c.is_ascii_digit()) {
9345 json_path.push('[');
9346 json_path.push_str(s);
9347 json_path.push(']');
9348 } else {
9349 json_path.push('.');
9350 json_path.push_str(s);
9351 }
9352 }
9353 _ => {
9354 json_path.push_str(".?");
9355 }
9356 }
9357 }
9358 match target {
9359 DialectType::Spark
9360 | DialectType::Databricks
9361 | DialectType::Hive => {
9362 Ok(Expression::Function(Box::new(Function::new(
9363 "GET_JSON_OBJECT".to_string(),
9364 vec![json_expr, Expression::string(&json_path)],
9365 ))))
9366 }
9367 DialectType::Presto | DialectType::Trino => {
9368 let func_name = if is_text {
9369 "JSON_EXTRACT_SCALAR"
9370 } else {
9371 "JSON_EXTRACT"
9372 };
9373 Ok(Expression::Function(Box::new(Function::new(
9374 func_name.to_string(),
9375 vec![json_expr, Expression::string(&json_path)],
9376 ))))
9377 }
9378 DialectType::BigQuery | DialectType::MySQL => {
9379 let func_name = if is_text {
9380 "JSON_EXTRACT_SCALAR"
9381 } else {
9382 "JSON_EXTRACT"
9383 };
9384 Ok(Expression::Function(Box::new(Function::new(
9385 func_name.to_string(),
9386 vec![json_expr, Expression::string(&json_path)],
9387 ))))
9388 }
9389 DialectType::PostgreSQL | DialectType::Materialize => {
9390 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9391 let func_name = if is_text {
9392 "JSON_EXTRACT_PATH_TEXT"
9393 } else {
9394 "JSON_EXTRACT_PATH"
9395 };
9396 let mut new_args = vec![json_expr];
9397 new_args.extend(args);
9398 Ok(Expression::Function(Box::new(Function::new(
9399 func_name.to_string(),
9400 new_args,
9401 ))))
9402 }
9403 DialectType::DuckDB | DialectType::SQLite => {
9404 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9405 if is_text {
9406 Ok(Expression::JsonExtractScalar(Box::new(
9407 crate::expressions::JsonExtractFunc {
9408 this: json_expr,
9409 path: Expression::string(&json_path),
9410 returning: None,
9411 arrow_syntax: true,
9412 hash_arrow_syntax: false,
9413 wrapper_option: None,
9414 quotes_option: None,
9415 on_scalar_string: false,
9416 on_error: None,
9417 },
9418 )))
9419 } else {
9420 Ok(Expression::JsonExtract(Box::new(
9421 crate::expressions::JsonExtractFunc {
9422 this: json_expr,
9423 path: Expression::string(&json_path),
9424 returning: None,
9425 arrow_syntax: true,
9426 hash_arrow_syntax: false,
9427 wrapper_option: None,
9428 quotes_option: None,
9429 on_scalar_string: false,
9430 on_error: None,
9431 },
9432 )))
9433 }
9434 }
9435 DialectType::Redshift => {
9436 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9437 let mut new_args = vec![json_expr];
9438 new_args.extend(args);
9439 Ok(Expression::Function(Box::new(Function::new(
9440 "JSON_EXTRACT_PATH_TEXT".to_string(),
9441 new_args,
9442 ))))
9443 }
9444 DialectType::TSQL => {
9445 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9446 let jq = Expression::Function(Box::new(Function::new(
9447 "JSON_QUERY".to_string(),
9448 vec![json_expr.clone(), Expression::string(&json_path)],
9449 )));
9450 let jv = Expression::Function(Box::new(Function::new(
9451 "JSON_VALUE".to_string(),
9452 vec![json_expr, Expression::string(&json_path)],
9453 )));
9454 Ok(Expression::Function(Box::new(Function::new(
9455 "ISNULL".to_string(),
9456 vec![jq, jv],
9457 ))))
9458 }
9459 DialectType::ClickHouse => {
9460 let func_name = if is_text {
9461 "JSONExtractString"
9462 } else {
9463 "JSONExtractRaw"
9464 };
9465 let mut new_args = vec![json_expr];
9466 new_args.extend(args);
9467 Ok(Expression::Function(Box::new(Function::new(
9468 func_name.to_string(),
9469 new_args,
9470 ))))
9471 }
9472 _ => {
9473 let func_name = if is_text {
9474 "JSON_EXTRACT_SCALAR"
9475 } else {
9476 "JSON_EXTRACT"
9477 };
9478 Ok(Expression::Function(Box::new(Function::new(
9479 func_name.to_string(),
9480 vec![json_expr, Expression::string(&json_path)],
9481 ))))
9482 }
9483 }
9484 }
9485 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9486 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9487 let name = match target {
9488 DialectType::Spark
9489 | DialectType::Databricks
9490 | DialectType::Hive
9491 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9492 _ => "APPROX_DISTINCT",
9493 };
9494 let mut args = f.args;
9495 // Hive doesn't support the accuracy parameter
9496 if name == "APPROX_COUNT_DISTINCT"
9497 && matches!(target, DialectType::Hive)
9498 {
9499 args.truncate(1);
9500 }
9501 Ok(Expression::Function(Box::new(Function::new(
9502 name.to_string(),
9503 args,
9504 ))))
9505 }
9506 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9507 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9508 // Determine source default group index
9509 let source_default = match source {
9510 DialectType::Presto
9511 | DialectType::Trino
9512 | DialectType::DuckDB => 0,
9513 _ => 1, // Hive/Spark/Databricks default = 1
9514 };
9515 // Determine target default group index
9516 let target_default = match target {
9517 DialectType::Presto
9518 | DialectType::Trino
9519 | DialectType::DuckDB
9520 | DialectType::BigQuery => 0,
9521 DialectType::Snowflake => {
9522 // Snowflake uses REGEXP_SUBSTR
9523 return Ok(Expression::Function(Box::new(Function::new(
9524 "REGEXP_SUBSTR".to_string(),
9525 f.args,
9526 ))));
9527 }
9528 _ => 1, // Hive/Spark/Databricks default = 1
9529 };
9530 if source_default != target_default {
9531 let mut args = f.args;
9532 args.push(Expression::number(source_default));
9533 Ok(Expression::Function(Box::new(Function::new(
9534 "REGEXP_EXTRACT".to_string(),
9535 args,
9536 ))))
9537 } else {
9538 Ok(Expression::Function(Box::new(Function::new(
9539 "REGEXP_EXTRACT".to_string(),
9540 f.args,
9541 ))))
9542 }
9543 }
9544 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9545 "RLIKE" if f.args.len() == 2 => {
9546 let mut args = f.args;
9547 let str_expr = args.remove(0);
9548 let pattern = args.remove(0);
9549 match target {
9550 DialectType::DuckDB => {
9551 // REGEXP_MATCHES(str, pattern)
9552 Ok(Expression::Function(Box::new(Function::new(
9553 "REGEXP_MATCHES".to_string(),
9554 vec![str_expr, pattern],
9555 ))))
9556 }
9557 _ => {
9558 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9559 Ok(Expression::RegexpLike(Box::new(
9560 crate::expressions::RegexpFunc {
9561 this: str_expr,
9562 pattern,
9563 flags: None,
9564 },
9565 )))
9566 }
9567 }
9568 }
9569 // EOMONTH(date[, month_offset]) -> target-specific
9570 "EOMONTH" if f.args.len() >= 1 => {
9571 let mut args = f.args;
9572 let date_arg = args.remove(0);
9573 let month_offset = if !args.is_empty() {
9574 Some(args.remove(0))
9575 } else {
9576 None
9577 };
9578
9579 // Helper: wrap date in CAST to DATE
9580 let cast_to_date = |e: Expression| -> Expression {
9581 Expression::Cast(Box::new(Cast {
9582 this: e,
9583 to: DataType::Date,
9584 trailing_comments: vec![],
9585 double_colon_syntax: false,
9586 format: None,
9587 default: None,
9588 }))
9589 };
9590
9591 match target {
9592 DialectType::TSQL | DialectType::Fabric => {
9593 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9594 let date = cast_to_date(date_arg);
9595 let date = if let Some(offset) = month_offset {
9596 Expression::Function(Box::new(Function::new(
9597 "DATEADD".to_string(),
9598 vec![
9599 Expression::Identifier(Identifier::new(
9600 "MONTH",
9601 )),
9602 offset,
9603 date,
9604 ],
9605 )))
9606 } else {
9607 date
9608 };
9609 Ok(Expression::Function(Box::new(Function::new(
9610 "EOMONTH".to_string(),
9611 vec![date],
9612 ))))
9613 }
9614 DialectType::Presto
9615 | DialectType::Trino
9616 | DialectType::Athena => {
9617 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9618 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9619 let cast_ts = Expression::Cast(Box::new(Cast {
9620 this: date_arg,
9621 to: DataType::Timestamp {
9622 timezone: false,
9623 precision: None,
9624 },
9625 trailing_comments: vec![],
9626 double_colon_syntax: false,
9627 format: None,
9628 default: None,
9629 }));
9630 let date = cast_to_date(cast_ts);
9631 let date = if let Some(offset) = month_offset {
9632 Expression::Function(Box::new(Function::new(
9633 "DATE_ADD".to_string(),
9634 vec![Expression::string("MONTH"), offset, date],
9635 )))
9636 } else {
9637 date
9638 };
9639 Ok(Expression::Function(Box::new(Function::new(
9640 "LAST_DAY_OF_MONTH".to_string(),
9641 vec![date],
9642 ))))
9643 }
9644 DialectType::PostgreSQL => {
9645 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9646 let date = cast_to_date(date_arg);
9647 let date = if let Some(offset) = month_offset {
9648 let interval_str = format!(
9649 "{} MONTH",
9650 Self::expr_to_string_static(&offset)
9651 );
9652 Expression::Add(Box::new(
9653 crate::expressions::BinaryOp::new(
9654 date,
9655 Expression::Interval(Box::new(
9656 crate::expressions::Interval {
9657 this: Some(Expression::string(
9658 &interval_str,
9659 )),
9660 unit: None,
9661 },
9662 )),
9663 ),
9664 ))
9665 } else {
9666 date
9667 };
9668 let truncated =
9669 Expression::Function(Box::new(Function::new(
9670 "DATE_TRUNC".to_string(),
9671 vec![Expression::string("MONTH"), date],
9672 )));
9673 let plus_month = Expression::Add(Box::new(
9674 crate::expressions::BinaryOp::new(
9675 truncated,
9676 Expression::Interval(Box::new(
9677 crate::expressions::Interval {
9678 this: Some(Expression::string("1 MONTH")),
9679 unit: None,
9680 },
9681 )),
9682 ),
9683 ));
9684 let minus_day = Expression::Sub(Box::new(
9685 crate::expressions::BinaryOp::new(
9686 plus_month,
9687 Expression::Interval(Box::new(
9688 crate::expressions::Interval {
9689 this: Some(Expression::string("1 DAY")),
9690 unit: None,
9691 },
9692 )),
9693 ),
9694 ));
9695 Ok(Expression::Cast(Box::new(Cast {
9696 this: minus_day,
9697 to: DataType::Date,
9698 trailing_comments: vec![],
9699 double_colon_syntax: false,
9700 format: None,
9701 default: None,
9702 })))
9703 }
9704 DialectType::DuckDB => {
9705 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9706 let date = cast_to_date(date_arg);
9707 let date = if let Some(offset) = month_offset {
9708 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9709 let interval_val =
9710 if matches!(&offset, Expression::Neg(_)) {
9711 Expression::Paren(Box::new(
9712 crate::expressions::Paren {
9713 this: offset,
9714 trailing_comments: Vec::new(),
9715 },
9716 ))
9717 } else {
9718 offset
9719 };
9720 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9721 date,
9722 Expression::Interval(Box::new(crate::expressions::Interval {
9723 this: Some(interval_val),
9724 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9725 unit: crate::expressions::IntervalUnit::Month,
9726 use_plural: false,
9727 }),
9728 })),
9729 )))
9730 } else {
9731 date
9732 };
9733 Ok(Expression::Function(Box::new(Function::new(
9734 "LAST_DAY".to_string(),
9735 vec![date],
9736 ))))
9737 }
9738 DialectType::Snowflake | DialectType::Redshift => {
9739 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9740 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9741 let date = if matches!(target, DialectType::Snowflake) {
9742 Expression::Function(Box::new(Function::new(
9743 "TO_DATE".to_string(),
9744 vec![date_arg],
9745 )))
9746 } else {
9747 cast_to_date(date_arg)
9748 };
9749 let date = if let Some(offset) = month_offset {
9750 Expression::Function(Box::new(Function::new(
9751 "DATEADD".to_string(),
9752 vec![
9753 Expression::Identifier(Identifier::new(
9754 "MONTH",
9755 )),
9756 offset,
9757 date,
9758 ],
9759 )))
9760 } else {
9761 date
9762 };
9763 Ok(Expression::Function(Box::new(Function::new(
9764 "LAST_DAY".to_string(),
9765 vec![date],
9766 ))))
9767 }
9768 DialectType::Spark | DialectType::Databricks => {
9769 // Spark: LAST_DAY(TO_DATE(date))
9770 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9771 let date = Expression::Function(Box::new(Function::new(
9772 "TO_DATE".to_string(),
9773 vec![date_arg],
9774 )));
9775 let date = if let Some(offset) = month_offset {
9776 Expression::Function(Box::new(Function::new(
9777 "ADD_MONTHS".to_string(),
9778 vec![date, offset],
9779 )))
9780 } else {
9781 date
9782 };
9783 Ok(Expression::Function(Box::new(Function::new(
9784 "LAST_DAY".to_string(),
9785 vec![date],
9786 ))))
9787 }
9788 DialectType::MySQL => {
9789 // MySQL: LAST_DAY(DATE(date)) - no offset
9790 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9791 let date = if let Some(offset) = month_offset {
9792 let iu = crate::expressions::IntervalUnit::Month;
9793 Expression::DateAdd(Box::new(
9794 crate::expressions::DateAddFunc {
9795 this: date_arg,
9796 interval: offset,
9797 unit: iu,
9798 },
9799 ))
9800 } else {
9801 Expression::Function(Box::new(Function::new(
9802 "DATE".to_string(),
9803 vec![date_arg],
9804 )))
9805 };
9806 Ok(Expression::Function(Box::new(Function::new(
9807 "LAST_DAY".to_string(),
9808 vec![date],
9809 ))))
9810 }
9811 DialectType::BigQuery => {
9812 // BigQuery: LAST_DAY(CAST(date AS DATE))
9813 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9814 let date = cast_to_date(date_arg);
9815 let date = if let Some(offset) = month_offset {
9816 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9817 this: Some(offset),
9818 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9819 unit: crate::expressions::IntervalUnit::Month,
9820 use_plural: false,
9821 }),
9822 }));
9823 Expression::Function(Box::new(Function::new(
9824 "DATE_ADD".to_string(),
9825 vec![date, interval],
9826 )))
9827 } else {
9828 date
9829 };
9830 Ok(Expression::Function(Box::new(Function::new(
9831 "LAST_DAY".to_string(),
9832 vec![date],
9833 ))))
9834 }
9835 DialectType::ClickHouse => {
9836 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9837 let date = Expression::Cast(Box::new(Cast {
9838 this: date_arg,
9839 to: DataType::Nullable {
9840 inner: Box::new(DataType::Date),
9841 },
9842 trailing_comments: vec![],
9843 double_colon_syntax: false,
9844 format: None,
9845 default: None,
9846 }));
9847 let date = if let Some(offset) = month_offset {
9848 Expression::Function(Box::new(Function::new(
9849 "DATE_ADD".to_string(),
9850 vec![
9851 Expression::Identifier(Identifier::new(
9852 "MONTH",
9853 )),
9854 offset,
9855 date,
9856 ],
9857 )))
9858 } else {
9859 date
9860 };
9861 Ok(Expression::Function(Box::new(Function::new(
9862 "LAST_DAY".to_string(),
9863 vec![date],
9864 ))))
9865 }
9866 DialectType::Hive => {
9867 // Hive: LAST_DAY(date)
9868 let date = if let Some(offset) = month_offset {
9869 Expression::Function(Box::new(Function::new(
9870 "ADD_MONTHS".to_string(),
9871 vec![date_arg, offset],
9872 )))
9873 } else {
9874 date_arg
9875 };
9876 Ok(Expression::Function(Box::new(Function::new(
9877 "LAST_DAY".to_string(),
9878 vec![date],
9879 ))))
9880 }
9881 _ => {
9882 // Default: LAST_DAY(date)
9883 let date = if let Some(offset) = month_offset {
9884 let unit =
9885 Expression::Identifier(Identifier::new("MONTH"));
9886 Expression::Function(Box::new(Function::new(
9887 "DATEADD".to_string(),
9888 vec![unit, offset, date_arg],
9889 )))
9890 } else {
9891 date_arg
9892 };
9893 Ok(Expression::Function(Box::new(Function::new(
9894 "LAST_DAY".to_string(),
9895 vec![date],
9896 ))))
9897 }
9898 }
9899 }
9900 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
9901 "LAST_DAY" | "LAST_DAY_OF_MONTH"
9902 if !matches!(source, DialectType::BigQuery)
9903 && f.args.len() >= 1 =>
9904 {
9905 let first_arg = f.args.into_iter().next().unwrap();
9906 match target {
9907 DialectType::TSQL | DialectType::Fabric => {
9908 Ok(Expression::Function(Box::new(Function::new(
9909 "EOMONTH".to_string(),
9910 vec![first_arg],
9911 ))))
9912 }
9913 DialectType::Presto
9914 | DialectType::Trino
9915 | DialectType::Athena => {
9916 Ok(Expression::Function(Box::new(Function::new(
9917 "LAST_DAY_OF_MONTH".to_string(),
9918 vec![first_arg],
9919 ))))
9920 }
9921 _ => Ok(Expression::Function(Box::new(Function::new(
9922 "LAST_DAY".to_string(),
9923 vec![first_arg],
9924 )))),
9925 }
9926 }
9927 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
9928 "MAP"
9929 if f.args.len() == 2
9930 && matches!(
9931 source,
9932 DialectType::Presto
9933 | DialectType::Trino
9934 | DialectType::Athena
9935 ) =>
9936 {
9937 let keys_arg = f.args[0].clone();
9938 let vals_arg = f.args[1].clone();
9939
9940 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
9941 fn extract_array_elements(
9942 expr: &Expression,
9943 ) -> Option<&Vec<Expression>> {
9944 match expr {
9945 Expression::Array(arr) => Some(&arr.expressions),
9946 Expression::ArrayFunc(arr) => Some(&arr.expressions),
9947 Expression::Function(f)
9948 if f.name.eq_ignore_ascii_case("ARRAY") =>
9949 {
9950 Some(&f.args)
9951 }
9952 _ => None,
9953 }
9954 }
9955
9956 match target {
9957 DialectType::Spark | DialectType::Databricks => {
9958 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
9959 Ok(Expression::Function(Box::new(Function::new(
9960 "MAP_FROM_ARRAYS".to_string(),
9961 f.args,
9962 ))))
9963 }
9964 DialectType::Hive => {
9965 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
9966 if let (Some(keys), Some(vals)) = (
9967 extract_array_elements(&keys_arg),
9968 extract_array_elements(&vals_arg),
9969 ) {
9970 if keys.len() == vals.len() {
9971 let mut interleaved = Vec::new();
9972 for (k, v) in keys.iter().zip(vals.iter()) {
9973 interleaved.push(k.clone());
9974 interleaved.push(v.clone());
9975 }
9976 Ok(Expression::Function(Box::new(Function::new(
9977 "MAP".to_string(),
9978 interleaved,
9979 ))))
9980 } else {
9981 Ok(Expression::Function(Box::new(Function::new(
9982 "MAP".to_string(),
9983 f.args,
9984 ))))
9985 }
9986 } else {
9987 Ok(Expression::Function(Box::new(Function::new(
9988 "MAP".to_string(),
9989 f.args,
9990 ))))
9991 }
9992 }
9993 DialectType::Snowflake => {
9994 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
9995 if let (Some(keys), Some(vals)) = (
9996 extract_array_elements(&keys_arg),
9997 extract_array_elements(&vals_arg),
9998 ) {
9999 if keys.len() == vals.len() {
10000 let mut interleaved = Vec::new();
10001 for (k, v) in keys.iter().zip(vals.iter()) {
10002 interleaved.push(k.clone());
10003 interleaved.push(v.clone());
10004 }
10005 Ok(Expression::Function(Box::new(Function::new(
10006 "OBJECT_CONSTRUCT".to_string(),
10007 interleaved,
10008 ))))
10009 } else {
10010 Ok(Expression::Function(Box::new(Function::new(
10011 "MAP".to_string(),
10012 f.args,
10013 ))))
10014 }
10015 } else {
10016 Ok(Expression::Function(Box::new(Function::new(
10017 "MAP".to_string(),
10018 f.args,
10019 ))))
10020 }
10021 }
10022 _ => Ok(Expression::Function(f)),
10023 }
10024 }
10025 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
10026 "MAP"
10027 if f.args.is_empty()
10028 && matches!(
10029 source,
10030 DialectType::Hive
10031 | DialectType::Spark
10032 | DialectType::Databricks
10033 )
10034 && matches!(
10035 target,
10036 DialectType::Presto
10037 | DialectType::Trino
10038 | DialectType::Athena
10039 ) =>
10040 {
10041 let empty_keys =
10042 Expression::Array(Box::new(crate::expressions::Array {
10043 expressions: vec![],
10044 }));
10045 let empty_vals =
10046 Expression::Array(Box::new(crate::expressions::Array {
10047 expressions: vec![],
10048 }));
10049 Ok(Expression::Function(Box::new(Function::new(
10050 "MAP".to_string(),
10051 vec![empty_keys, empty_vals],
10052 ))))
10053 }
10054 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10055 "MAP"
10056 if f.args.len() >= 2
10057 && f.args.len() % 2 == 0
10058 && matches!(
10059 source,
10060 DialectType::Hive
10061 | DialectType::Spark
10062 | DialectType::Databricks
10063 | DialectType::ClickHouse
10064 ) =>
10065 {
10066 let args = f.args;
10067 match target {
10068 DialectType::DuckDB => {
10069 // MAP([k1, k2], [v1, v2])
10070 let mut keys = Vec::new();
10071 let mut vals = Vec::new();
10072 for (i, arg) in args.into_iter().enumerate() {
10073 if i % 2 == 0 {
10074 keys.push(arg);
10075 } else {
10076 vals.push(arg);
10077 }
10078 }
10079 let keys_arr = Expression::Array(Box::new(
10080 crate::expressions::Array { expressions: keys },
10081 ));
10082 let vals_arr = Expression::Array(Box::new(
10083 crate::expressions::Array { expressions: vals },
10084 ));
10085 Ok(Expression::Function(Box::new(Function::new(
10086 "MAP".to_string(),
10087 vec![keys_arr, vals_arr],
10088 ))))
10089 }
10090 DialectType::Presto | DialectType::Trino => {
10091 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10092 let mut keys = Vec::new();
10093 let mut vals = Vec::new();
10094 for (i, arg) in args.into_iter().enumerate() {
10095 if i % 2 == 0 {
10096 keys.push(arg);
10097 } else {
10098 vals.push(arg);
10099 }
10100 }
10101 let keys_arr = Expression::Array(Box::new(
10102 crate::expressions::Array { expressions: keys },
10103 ));
10104 let vals_arr = Expression::Array(Box::new(
10105 crate::expressions::Array { expressions: vals },
10106 ));
10107 Ok(Expression::Function(Box::new(Function::new(
10108 "MAP".to_string(),
10109 vec![keys_arr, vals_arr],
10110 ))))
10111 }
10112 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10113 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10114 ))),
10115 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10116 Function::new("map".to_string(), args),
10117 ))),
10118 _ => Ok(Expression::Function(Box::new(Function::new(
10119 "MAP".to_string(),
10120 args,
10121 )))),
10122 }
10123 }
10124 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10125 "COLLECT_LIST" if f.args.len() >= 1 => {
10126 let name = match target {
10127 DialectType::Spark
10128 | DialectType::Databricks
10129 | DialectType::Hive => "COLLECT_LIST",
10130 DialectType::DuckDB
10131 | DialectType::PostgreSQL
10132 | DialectType::Redshift
10133 | DialectType::Snowflake
10134 | DialectType::BigQuery => "ARRAY_AGG",
10135 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10136 _ => "ARRAY_AGG",
10137 };
10138 Ok(Expression::Function(Box::new(Function::new(
10139 name.to_string(),
10140 f.args,
10141 ))))
10142 }
10143 // COLLECT_SET(x) -> target-specific distinct array aggregation
10144 "COLLECT_SET" if f.args.len() >= 1 => {
10145 let name = match target {
10146 DialectType::Spark
10147 | DialectType::Databricks
10148 | DialectType::Hive => "COLLECT_SET",
10149 DialectType::Presto
10150 | DialectType::Trino
10151 | DialectType::Athena => "SET_AGG",
10152 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10153 _ => "ARRAY_AGG",
10154 };
10155 Ok(Expression::Function(Box::new(Function::new(
10156 name.to_string(),
10157 f.args,
10158 ))))
10159 }
10160 // ISNAN(x) / IS_NAN(x) - normalize
10161 "ISNAN" | "IS_NAN" => {
10162 let name = match target {
10163 DialectType::Spark
10164 | DialectType::Databricks
10165 | DialectType::Hive => "ISNAN",
10166 DialectType::Presto
10167 | DialectType::Trino
10168 | DialectType::Athena => "IS_NAN",
10169 DialectType::BigQuery
10170 | DialectType::PostgreSQL
10171 | DialectType::Redshift => "IS_NAN",
10172 DialectType::ClickHouse => "IS_NAN",
10173 _ => "ISNAN",
10174 };
10175 Ok(Expression::Function(Box::new(Function::new(
10176 name.to_string(),
10177 f.args,
10178 ))))
10179 }
10180 // SPLIT_PART(str, delim, index) -> target-specific
10181 "SPLIT_PART" if f.args.len() == 3 => {
10182 match target {
10183 DialectType::Spark | DialectType::Databricks => {
10184 // Keep as SPLIT_PART (Spark 3.4+)
10185 Ok(Expression::Function(Box::new(Function::new(
10186 "SPLIT_PART".to_string(),
10187 f.args,
10188 ))))
10189 }
10190 DialectType::DuckDB
10191 | DialectType::PostgreSQL
10192 | DialectType::Snowflake
10193 | DialectType::Redshift
10194 | DialectType::Trino
10195 | DialectType::Presto => Ok(Expression::Function(Box::new(
10196 Function::new("SPLIT_PART".to_string(), f.args),
10197 ))),
10198 DialectType::Hive => {
10199 // SPLIT(str, delim)[index]
10200 // Complex conversion, just keep as-is for now
10201 Ok(Expression::Function(Box::new(Function::new(
10202 "SPLIT_PART".to_string(),
10203 f.args,
10204 ))))
10205 }
10206 _ => Ok(Expression::Function(Box::new(Function::new(
10207 "SPLIT_PART".to_string(),
10208 f.args,
10209 )))),
10210 }
10211 }
10212 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10213 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10214 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10215 match target {
10216 DialectType::Spark
10217 | DialectType::Databricks
10218 | DialectType::Hive => {
10219 let mut args = f.args;
10220 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10221 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10222 if let Some(Expression::Function(inner)) = args.first() {
10223 if inner.name.eq_ignore_ascii_case("TRY")
10224 && inner.args.len() == 1
10225 {
10226 let mut inner_args = inner.args.clone();
10227 args[0] = inner_args.remove(0);
10228 }
10229 }
10230 Ok(Expression::Function(Box::new(Function::new(
10231 "GET_JSON_OBJECT".to_string(),
10232 args,
10233 ))))
10234 }
10235 DialectType::DuckDB | DialectType::SQLite => {
10236 // json -> path syntax
10237 let mut args = f.args;
10238 let json_expr = args.remove(0);
10239 let path = args.remove(0);
10240 Ok(Expression::JsonExtract(Box::new(
10241 crate::expressions::JsonExtractFunc {
10242 this: json_expr,
10243 path,
10244 returning: None,
10245 arrow_syntax: true,
10246 hash_arrow_syntax: false,
10247 wrapper_option: None,
10248 quotes_option: None,
10249 on_scalar_string: false,
10250 on_error: None,
10251 },
10252 )))
10253 }
10254 DialectType::TSQL => {
10255 let func_name = if is_scalar {
10256 "JSON_VALUE"
10257 } else {
10258 "JSON_QUERY"
10259 };
10260 Ok(Expression::Function(Box::new(Function::new(
10261 func_name.to_string(),
10262 f.args,
10263 ))))
10264 }
10265 DialectType::PostgreSQL | DialectType::Redshift => {
10266 let func_name = if is_scalar {
10267 "JSON_EXTRACT_PATH_TEXT"
10268 } else {
10269 "JSON_EXTRACT_PATH"
10270 };
10271 Ok(Expression::Function(Box::new(Function::new(
10272 func_name.to_string(),
10273 f.args,
10274 ))))
10275 }
10276 _ => Ok(Expression::Function(Box::new(Function::new(
10277 name.to_string(),
10278 f.args,
10279 )))),
10280 }
10281 }
10282 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10283 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10284 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10285 if f.args.len() >= 2
10286 && matches!(source, DialectType::SingleStore) =>
10287 {
10288 let is_bson = name == "BSON_EXTRACT_BSON";
10289 let mut args = f.args;
10290 let json_expr = args.remove(0);
10291
10292 // Build JSONPath from remaining arguments
10293 let mut path = String::from("$");
10294 for arg in &args {
10295 if let Expression::Literal(
10296 crate::expressions::Literal::String(s),
10297 ) = arg
10298 {
10299 // Check if it's a numeric string (array index)
10300 if s.parse::<i64>().is_ok() {
10301 path.push('[');
10302 path.push_str(s);
10303 path.push(']');
10304 } else {
10305 path.push('.');
10306 path.push_str(s);
10307 }
10308 }
10309 }
10310
10311 let target_func = if is_bson {
10312 "JSONB_EXTRACT"
10313 } else {
10314 "JSON_EXTRACT"
10315 };
10316 Ok(Expression::Function(Box::new(Function::new(
10317 target_func.to_string(),
10318 vec![json_expr, Expression::string(&path)],
10319 ))))
10320 }
10321 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10322 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10323 Ok(Expression::Function(Box::new(Function {
10324 name: "arraySum".to_string(),
10325 args: f.args,
10326 distinct: f.distinct,
10327 trailing_comments: f.trailing_comments,
10328 use_bracket_syntax: f.use_bracket_syntax,
10329 no_parens: f.no_parens,
10330 quoted: f.quoted,
10331 })))
10332 }
10333 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10334 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10335 // and is handled by JsonQueryValueConvert action. This handles the case where
10336 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10337 "JSON_QUERY" | "JSON_VALUE"
10338 if f.args.len() == 2
10339 && matches!(
10340 source,
10341 DialectType::TSQL | DialectType::Fabric
10342 ) =>
10343 {
10344 match target {
10345 DialectType::Spark
10346 | DialectType::Databricks
10347 | DialectType::Hive => Ok(Expression::Function(Box::new(
10348 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10349 ))),
10350 _ => Ok(Expression::Function(Box::new(Function::new(
10351 name.to_string(),
10352 f.args,
10353 )))),
10354 }
10355 }
10356 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10357 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10358 let arg = f.args.into_iter().next().unwrap();
10359 let is_hive_source = matches!(
10360 source,
10361 DialectType::Hive
10362 | DialectType::Spark
10363 | DialectType::Databricks
10364 );
10365 match target {
10366 DialectType::DuckDB if is_hive_source => {
10367 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10368 let strptime =
10369 Expression::Function(Box::new(Function::new(
10370 "STRPTIME".to_string(),
10371 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10372 )));
10373 Ok(Expression::Function(Box::new(Function::new(
10374 "EPOCH".to_string(),
10375 vec![strptime],
10376 ))))
10377 }
10378 DialectType::Presto | DialectType::Trino if is_hive_source => {
10379 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10380 let cast_varchar =
10381 Expression::Cast(Box::new(crate::expressions::Cast {
10382 this: arg.clone(),
10383 to: DataType::VarChar {
10384 length: None,
10385 parenthesized_length: false,
10386 },
10387 trailing_comments: vec![],
10388 double_colon_syntax: false,
10389 format: None,
10390 default: None,
10391 }));
10392 let date_parse =
10393 Expression::Function(Box::new(Function::new(
10394 "DATE_PARSE".to_string(),
10395 vec![
10396 cast_varchar,
10397 Expression::string("%Y-%m-%d %T"),
10398 ],
10399 )));
10400 let try_expr = Expression::Function(Box::new(
10401 Function::new("TRY".to_string(), vec![date_parse]),
10402 ));
10403 let date_format =
10404 Expression::Function(Box::new(Function::new(
10405 "DATE_FORMAT".to_string(),
10406 vec![arg, Expression::string("%Y-%m-%d %T")],
10407 )));
10408 let parse_datetime =
10409 Expression::Function(Box::new(Function::new(
10410 "PARSE_DATETIME".to_string(),
10411 vec![
10412 date_format,
10413 Expression::string("yyyy-MM-dd HH:mm:ss"),
10414 ],
10415 )));
10416 let coalesce =
10417 Expression::Function(Box::new(Function::new(
10418 "COALESCE".to_string(),
10419 vec![try_expr, parse_datetime],
10420 )));
10421 Ok(Expression::Function(Box::new(Function::new(
10422 "TO_UNIXTIME".to_string(),
10423 vec![coalesce],
10424 ))))
10425 }
10426 DialectType::Presto | DialectType::Trino => {
10427 Ok(Expression::Function(Box::new(Function::new(
10428 "TO_UNIXTIME".to_string(),
10429 vec![arg],
10430 ))))
10431 }
10432 _ => Ok(Expression::Function(Box::new(Function::new(
10433 "UNIX_TIMESTAMP".to_string(),
10434 vec![arg],
10435 )))),
10436 }
10437 }
10438 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10439 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10440 DialectType::Spark
10441 | DialectType::Databricks
10442 | DialectType::Hive => Ok(Expression::Function(Box::new(
10443 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10444 ))),
10445 _ => Ok(Expression::Function(Box::new(Function::new(
10446 "TO_UNIX_TIMESTAMP".to_string(),
10447 f.args,
10448 )))),
10449 },
10450 // CURDATE() -> CURRENT_DATE
10451 "CURDATE" => {
10452 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10453 }
10454 // CURTIME() -> CURRENT_TIME
10455 "CURTIME" => {
10456 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10457 precision: None,
10458 }))
10459 }
10460 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10461 "ARRAY_SORT" if f.args.len() >= 1 => {
10462 match target {
10463 DialectType::Hive => {
10464 let mut args = f.args;
10465 args.truncate(1); // Drop lambda comparator
10466 Ok(Expression::Function(Box::new(Function::new(
10467 "SORT_ARRAY".to_string(),
10468 args,
10469 ))))
10470 }
10471 _ => Ok(Expression::Function(f)),
10472 }
10473 }
10474 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10475 "SORT_ARRAY" if f.args.len() == 1 => match target {
10476 DialectType::Hive
10477 | DialectType::Spark
10478 | DialectType::Databricks => Ok(Expression::Function(f)),
10479 _ => Ok(Expression::Function(Box::new(Function::new(
10480 "ARRAY_SORT".to_string(),
10481 f.args,
10482 )))),
10483 },
10484 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10485 "SORT_ARRAY" if f.args.len() == 2 => {
10486 let is_desc =
10487 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10488 if is_desc {
10489 match target {
10490 DialectType::DuckDB => {
10491 Ok(Expression::Function(Box::new(Function::new(
10492 "ARRAY_REVERSE_SORT".to_string(),
10493 vec![f.args.into_iter().next().unwrap()],
10494 ))))
10495 }
10496 DialectType::Presto | DialectType::Trino => {
10497 let arr_arg = f.args.into_iter().next().unwrap();
10498 let a =
10499 Expression::Column(crate::expressions::Column {
10500 name: crate::expressions::Identifier::new("a"),
10501 table: None,
10502 join_mark: false,
10503 trailing_comments: Vec::new(),
10504 });
10505 let b =
10506 Expression::Column(crate::expressions::Column {
10507 name: crate::expressions::Identifier::new("b"),
10508 table: None,
10509 join_mark: false,
10510 trailing_comments: Vec::new(),
10511 });
10512 let case_expr = Expression::Case(Box::new(
10513 crate::expressions::Case {
10514 operand: None,
10515 whens: vec![
10516 (
10517 Expression::Lt(Box::new(
10518 BinaryOp::new(a.clone(), b.clone()),
10519 )),
10520 Expression::Literal(Literal::Number(
10521 "1".to_string(),
10522 )),
10523 ),
10524 (
10525 Expression::Gt(Box::new(
10526 BinaryOp::new(a.clone(), b.clone()),
10527 )),
10528 Expression::Literal(Literal::Number(
10529 "-1".to_string(),
10530 )),
10531 ),
10532 ],
10533 else_: Some(Expression::Literal(
10534 Literal::Number("0".to_string()),
10535 )),
10536 comments: Vec::new(),
10537 },
10538 ));
10539 let lambda = Expression::Lambda(Box::new(
10540 crate::expressions::LambdaExpr {
10541 parameters: vec![
10542 crate::expressions::Identifier::new("a"),
10543 crate::expressions::Identifier::new("b"),
10544 ],
10545 body: case_expr,
10546 colon: false,
10547 parameter_types: Vec::new(),
10548 },
10549 ));
10550 Ok(Expression::Function(Box::new(Function::new(
10551 "ARRAY_SORT".to_string(),
10552 vec![arr_arg, lambda],
10553 ))))
10554 }
10555 _ => Ok(Expression::Function(f)),
10556 }
10557 } else {
10558 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10559 match target {
10560 DialectType::Hive => Ok(Expression::Function(f)),
10561 _ => Ok(Expression::Function(Box::new(Function::new(
10562 "ARRAY_SORT".to_string(),
10563 vec![f.args.into_iter().next().unwrap()],
10564 )))),
10565 }
10566 }
10567 }
10568 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10569 "LEFT" if f.args.len() == 2 => {
10570 match target {
10571 DialectType::Hive
10572 | DialectType::Presto
10573 | DialectType::Trino
10574 | DialectType::Athena => {
10575 let x = f.args[0].clone();
10576 let n = f.args[1].clone();
10577 Ok(Expression::Function(Box::new(Function::new(
10578 "SUBSTRING".to_string(),
10579 vec![x, Expression::number(1), n],
10580 ))))
10581 }
10582 DialectType::Spark | DialectType::Databricks
10583 if matches!(
10584 source,
10585 DialectType::TSQL | DialectType::Fabric
10586 ) =>
10587 {
10588 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10589 let x = f.args[0].clone();
10590 let n = f.args[1].clone();
10591 let cast_x = Expression::Cast(Box::new(Cast {
10592 this: x,
10593 to: DataType::VarChar {
10594 length: None,
10595 parenthesized_length: false,
10596 },
10597 double_colon_syntax: false,
10598 trailing_comments: Vec::new(),
10599 format: None,
10600 default: None,
10601 }));
10602 Ok(Expression::Function(Box::new(Function::new(
10603 "LEFT".to_string(),
10604 vec![cast_x, n],
10605 ))))
10606 }
10607 _ => Ok(Expression::Function(f)),
10608 }
10609 }
10610 "RIGHT" if f.args.len() == 2 => {
10611 match target {
10612 DialectType::Hive
10613 | DialectType::Presto
10614 | DialectType::Trino
10615 | DialectType::Athena => {
10616 let x = f.args[0].clone();
10617 let n = f.args[1].clone();
10618 // SUBSTRING(x, LENGTH(x) - (n - 1))
10619 let len_x = Expression::Function(Box::new(Function::new(
10620 "LENGTH".to_string(),
10621 vec![x.clone()],
10622 )));
10623 let n_minus_1 = Expression::Sub(Box::new(
10624 crate::expressions::BinaryOp::new(
10625 n,
10626 Expression::number(1),
10627 ),
10628 ));
10629 let n_minus_1_paren = Expression::Paren(Box::new(
10630 crate::expressions::Paren {
10631 this: n_minus_1,
10632 trailing_comments: Vec::new(),
10633 },
10634 ));
10635 let offset = Expression::Sub(Box::new(
10636 crate::expressions::BinaryOp::new(
10637 len_x,
10638 n_minus_1_paren,
10639 ),
10640 ));
10641 Ok(Expression::Function(Box::new(Function::new(
10642 "SUBSTRING".to_string(),
10643 vec![x, offset],
10644 ))))
10645 }
10646 DialectType::Spark | DialectType::Databricks
10647 if matches!(
10648 source,
10649 DialectType::TSQL | DialectType::Fabric
10650 ) =>
10651 {
10652 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10653 let x = f.args[0].clone();
10654 let n = f.args[1].clone();
10655 let cast_x = Expression::Cast(Box::new(Cast {
10656 this: x,
10657 to: DataType::VarChar {
10658 length: None,
10659 parenthesized_length: false,
10660 },
10661 double_colon_syntax: false,
10662 trailing_comments: Vec::new(),
10663 format: None,
10664 default: None,
10665 }));
10666 Ok(Expression::Function(Box::new(Function::new(
10667 "RIGHT".to_string(),
10668 vec![cast_x, n],
10669 ))))
10670 }
10671 _ => Ok(Expression::Function(f)),
10672 }
10673 }
10674 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10675 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10676 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10677 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10678 ))),
10679 DialectType::Spark | DialectType::Databricks => {
10680 Ok(Expression::Function(Box::new(Function::new(
10681 "MAP_FROM_ARRAYS".to_string(),
10682 f.args,
10683 ))))
10684 }
10685 _ => Ok(Expression::Function(Box::new(Function::new(
10686 "MAP".to_string(),
10687 f.args,
10688 )))),
10689 },
10690 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10691 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10692 "LIKE" if f.args.len() >= 2 => {
10693 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10694 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10695 (f.args[1].clone(), f.args[0].clone())
10696 } else {
10697 // Standard: LIKE(string, pattern) -> string LIKE pattern
10698 (f.args[0].clone(), f.args[1].clone())
10699 };
10700 let escape = if f.args.len() >= 3 {
10701 Some(f.args[2].clone())
10702 } else {
10703 None
10704 };
10705 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10706 left: this,
10707 right: pattern,
10708 escape,
10709 quantifier: None,
10710 })))
10711 }
10712 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10713 "ILIKE" if f.args.len() >= 2 => {
10714 let this = f.args[0].clone();
10715 let pattern = f.args[1].clone();
10716 let escape = if f.args.len() >= 3 {
10717 Some(f.args[2].clone())
10718 } else {
10719 None
10720 };
10721 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10722 left: this,
10723 right: pattern,
10724 escape,
10725 quantifier: None,
10726 })))
10727 }
10728 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10729 "CHAR" if f.args.len() == 1 => match target {
10730 DialectType::MySQL
10731 | DialectType::SingleStore
10732 | DialectType::TSQL => Ok(Expression::Function(f)),
10733 _ => Ok(Expression::Function(Box::new(Function::new(
10734 "CHR".to_string(),
10735 f.args,
10736 )))),
10737 },
10738 // CONCAT(a, b) -> a || b for PostgreSQL
10739 "CONCAT"
10740 if f.args.len() == 2
10741 && matches!(target, DialectType::PostgreSQL)
10742 && matches!(
10743 source,
10744 DialectType::ClickHouse | DialectType::MySQL
10745 ) =>
10746 {
10747 let mut args = f.args;
10748 let right = args.pop().unwrap();
10749 let left = args.pop().unwrap();
10750 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10751 this: Box::new(left),
10752 expression: Box::new(right),
10753 safe: None,
10754 })))
10755 }
10756 // ARRAY_TO_STRING(arr, delim) -> target-specific
10757 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10758 DialectType::Presto | DialectType::Trino => {
10759 Ok(Expression::Function(Box::new(Function::new(
10760 "ARRAY_JOIN".to_string(),
10761 f.args,
10762 ))))
10763 }
10764 DialectType::TSQL => Ok(Expression::Function(Box::new(
10765 Function::new("STRING_AGG".to_string(), f.args),
10766 ))),
10767 _ => Ok(Expression::Function(f)),
10768 },
10769 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10770 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10771 DialectType::Spark
10772 | DialectType::Databricks
10773 | DialectType::Hive => Ok(Expression::Function(Box::new(
10774 Function::new("CONCAT".to_string(), f.args),
10775 ))),
10776 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10777 Function::new("ARRAY_CAT".to_string(), f.args),
10778 ))),
10779 DialectType::Redshift => Ok(Expression::Function(Box::new(
10780 Function::new("ARRAY_CONCAT".to_string(), f.args),
10781 ))),
10782 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10783 Function::new("ARRAY_CAT".to_string(), f.args),
10784 ))),
10785 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10786 Function::new("LIST_CONCAT".to_string(), f.args),
10787 ))),
10788 DialectType::Presto | DialectType::Trino => {
10789 Ok(Expression::Function(Box::new(Function::new(
10790 "CONCAT".to_string(),
10791 f.args,
10792 ))))
10793 }
10794 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10795 Function::new("ARRAY_CONCAT".to_string(), f.args),
10796 ))),
10797 _ => Ok(Expression::Function(f)),
10798 },
10799 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10800 "HAS" if f.args.len() == 2 => match target {
10801 DialectType::Spark
10802 | DialectType::Databricks
10803 | DialectType::Hive => Ok(Expression::Function(Box::new(
10804 Function::new("ARRAY_CONTAINS".to_string(), f.args),
10805 ))),
10806 DialectType::Presto | DialectType::Trino => {
10807 Ok(Expression::Function(Box::new(Function::new(
10808 "CONTAINS".to_string(),
10809 f.args,
10810 ))))
10811 }
10812 _ => Ok(Expression::Function(f)),
10813 },
10814 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
10815 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
10816 Function::new("COALESCE".to_string(), f.args),
10817 ))),
10818 // ISNULL(x) in MySQL -> (x IS NULL)
10819 "ISNULL"
10820 if f.args.len() == 1
10821 && matches!(source, DialectType::MySQL)
10822 && matches!(target, DialectType::MySQL) =>
10823 {
10824 let arg = f.args.into_iter().next().unwrap();
10825 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10826 this: Expression::IsNull(Box::new(
10827 crate::expressions::IsNull {
10828 this: arg,
10829 not: false,
10830 postfix_form: false,
10831 },
10832 )),
10833 trailing_comments: Vec::new(),
10834 })))
10835 }
10836 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
10837 "MONTHNAME"
10838 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
10839 {
10840 let arg = f.args.into_iter().next().unwrap();
10841 Ok(Expression::Function(Box::new(Function::new(
10842 "DATE_FORMAT".to_string(),
10843 vec![arg, Expression::string("%M")],
10844 ))))
10845 }
10846 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
10847 "SPLITBYSTRING" if f.args.len() == 2 => {
10848 let sep = f.args[0].clone();
10849 let str_arg = f.args[1].clone();
10850 match target {
10851 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10852 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
10853 ))),
10854 DialectType::Doris => {
10855 Ok(Expression::Function(Box::new(Function::new(
10856 "SPLIT_BY_STRING".to_string(),
10857 vec![str_arg, sep],
10858 ))))
10859 }
10860 DialectType::Hive
10861 | DialectType::Spark
10862 | DialectType::Databricks => {
10863 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
10864 let escaped =
10865 Expression::Function(Box::new(Function::new(
10866 "CONCAT".to_string(),
10867 vec![
10868 Expression::string("\\Q"),
10869 sep,
10870 Expression::string("\\E"),
10871 ],
10872 )));
10873 Ok(Expression::Function(Box::new(Function::new(
10874 "SPLIT".to_string(),
10875 vec![str_arg, escaped],
10876 ))))
10877 }
10878 _ => Ok(Expression::Function(f)),
10879 }
10880 }
10881 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
10882 "SPLITBYREGEXP" if f.args.len() == 2 => {
10883 let sep = f.args[0].clone();
10884 let str_arg = f.args[1].clone();
10885 match target {
10886 DialectType::DuckDB => {
10887 Ok(Expression::Function(Box::new(Function::new(
10888 "STR_SPLIT_REGEX".to_string(),
10889 vec![str_arg, sep],
10890 ))))
10891 }
10892 DialectType::Hive
10893 | DialectType::Spark
10894 | DialectType::Databricks => {
10895 Ok(Expression::Function(Box::new(Function::new(
10896 "SPLIT".to_string(),
10897 vec![str_arg, sep],
10898 ))))
10899 }
10900 _ => Ok(Expression::Function(f)),
10901 }
10902 }
10903 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
10904 "TOMONDAY" => {
10905 if f.args.len() == 1 {
10906 let arg = f.args.into_iter().next().unwrap();
10907 match target {
10908 DialectType::Doris => {
10909 Ok(Expression::Function(Box::new(Function::new(
10910 "DATE_TRUNC".to_string(),
10911 vec![arg, Expression::string("WEEK")],
10912 ))))
10913 }
10914 _ => Ok(Expression::Function(Box::new(Function::new(
10915 "DATE_TRUNC".to_string(),
10916 vec![Expression::string("WEEK"), arg],
10917 )))),
10918 }
10919 } else {
10920 Ok(Expression::Function(f))
10921 }
10922 }
10923 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
10924 "COLLECT_LIST" if f.args.len() == 1 => match target {
10925 DialectType::Spark
10926 | DialectType::Databricks
10927 | DialectType::Hive => Ok(Expression::Function(f)),
10928 _ => Ok(Expression::Function(Box::new(Function::new(
10929 "ARRAY_AGG".to_string(),
10930 f.args,
10931 )))),
10932 },
10933 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
10934 "TO_CHAR"
10935 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
10936 {
10937 let arg = f.args.into_iter().next().unwrap();
10938 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10939 this: arg,
10940 to: DataType::Custom {
10941 name: "STRING".to_string(),
10942 },
10943 double_colon_syntax: false,
10944 trailing_comments: Vec::new(),
10945 format: None,
10946 default: None,
10947 })))
10948 }
10949 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
10950 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
10951 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10952 Function::new("RANDOM".to_string(), vec![]),
10953 ))),
10954 _ => Ok(Expression::Function(f)),
10955 },
10956 // ClickHouse formatDateTime -> target-specific
10957 "FORMATDATETIME" if f.args.len() >= 2 => match target {
10958 DialectType::MySQL => Ok(Expression::Function(Box::new(
10959 Function::new("DATE_FORMAT".to_string(), f.args),
10960 ))),
10961 _ => Ok(Expression::Function(f)),
10962 },
10963 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
10964 "REPLICATE" if f.args.len() == 2 => match target {
10965 DialectType::TSQL => Ok(Expression::Function(f)),
10966 _ => Ok(Expression::Function(Box::new(Function::new(
10967 "REPEAT".to_string(),
10968 f.args,
10969 )))),
10970 },
10971 // LEN(x) -> LENGTH(x) for non-TSQL targets
10972 // No CAST needed when arg is already a string literal
10973 "LEN" if f.args.len() == 1 => {
10974 match target {
10975 DialectType::TSQL => Ok(Expression::Function(f)),
10976 DialectType::Spark | DialectType::Databricks => {
10977 let arg = f.args.into_iter().next().unwrap();
10978 // Don't wrap string literals with CAST - they're already strings
10979 let is_string = matches!(
10980 &arg,
10981 Expression::Literal(
10982 crate::expressions::Literal::String(_)
10983 )
10984 );
10985 let final_arg = if is_string {
10986 arg
10987 } else {
10988 Expression::Cast(Box::new(Cast {
10989 this: arg,
10990 to: DataType::VarChar {
10991 length: None,
10992 parenthesized_length: false,
10993 },
10994 double_colon_syntax: false,
10995 trailing_comments: Vec::new(),
10996 format: None,
10997 default: None,
10998 }))
10999 };
11000 Ok(Expression::Function(Box::new(Function::new(
11001 "LENGTH".to_string(),
11002 vec![final_arg],
11003 ))))
11004 }
11005 _ => {
11006 let arg = f.args.into_iter().next().unwrap();
11007 Ok(Expression::Function(Box::new(Function::new(
11008 "LENGTH".to_string(),
11009 vec![arg],
11010 ))))
11011 }
11012 }
11013 }
11014 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
11015 "COUNT_BIG" if f.args.len() == 1 => match target {
11016 DialectType::TSQL => Ok(Expression::Function(f)),
11017 _ => Ok(Expression::Function(Box::new(Function::new(
11018 "COUNT".to_string(),
11019 f.args,
11020 )))),
11021 },
11022 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
11023 "DATEFROMPARTS" if f.args.len() == 3 => match target {
11024 DialectType::TSQL => Ok(Expression::Function(f)),
11025 _ => Ok(Expression::Function(Box::new(Function::new(
11026 "MAKE_DATE".to_string(),
11027 f.args,
11028 )))),
11029 },
11030 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
11031 "REGEXP_LIKE" if f.args.len() >= 2 => {
11032 let str_expr = f.args[0].clone();
11033 let pattern = f.args[1].clone();
11034 let flags = if f.args.len() >= 3 {
11035 Some(f.args[2].clone())
11036 } else {
11037 None
11038 };
11039 match target {
11040 DialectType::DuckDB => {
11041 let mut new_args = vec![str_expr, pattern];
11042 if let Some(fl) = flags {
11043 new_args.push(fl);
11044 }
11045 Ok(Expression::Function(Box::new(Function::new(
11046 "REGEXP_MATCHES".to_string(),
11047 new_args,
11048 ))))
11049 }
11050 _ => Ok(Expression::RegexpLike(Box::new(
11051 crate::expressions::RegexpFunc {
11052 this: str_expr,
11053 pattern,
11054 flags,
11055 },
11056 ))),
11057 }
11058 }
11059 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11060 "ARRAYJOIN" if f.args.len() == 1 => match target {
11061 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11062 Function::new("UNNEST".to_string(), f.args),
11063 ))),
11064 _ => Ok(Expression::Function(f)),
11065 },
11066 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11067 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11068 match target {
11069 DialectType::TSQL => Ok(Expression::Function(f)),
11070 DialectType::DuckDB => {
11071 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11072 let mut args = f.args;
11073 let ms = args.pop().unwrap();
11074 let s = args.pop().unwrap();
11075 // s + (ms / 1000.0)
11076 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11077 ms,
11078 Expression::Literal(
11079 crate::expressions::Literal::Number(
11080 "1000.0".to_string(),
11081 ),
11082 ),
11083 )));
11084 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11085 s,
11086 Expression::Paren(Box::new(Paren {
11087 this: ms_frac,
11088 trailing_comments: vec![],
11089 })),
11090 )));
11091 args.push(s_with_ms);
11092 Ok(Expression::Function(Box::new(Function::new(
11093 "MAKE_TIMESTAMP".to_string(),
11094 args,
11095 ))))
11096 }
11097 DialectType::Snowflake => {
11098 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11099 let mut args = f.args;
11100 let ms = args.pop().unwrap();
11101 // ms * 1000000
11102 let ns = Expression::Mul(Box::new(BinaryOp::new(
11103 ms,
11104 Expression::number(1000000),
11105 )));
11106 args.push(ns);
11107 Ok(Expression::Function(Box::new(Function::new(
11108 "TIMESTAMP_FROM_PARTS".to_string(),
11109 args,
11110 ))))
11111 }
11112 _ => {
11113 // Default: keep function name for other targets
11114 Ok(Expression::Function(Box::new(Function::new(
11115 "DATETIMEFROMPARTS".to_string(),
11116 f.args,
11117 ))))
11118 }
11119 }
11120 }
11121 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11122 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11123 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11124 let is_try = name == "TRY_CONVERT";
11125 let type_expr = f.args[0].clone();
11126 let value_expr = f.args[1].clone();
11127 let style = if f.args.len() >= 3 {
11128 Some(&f.args[2])
11129 } else {
11130 None
11131 };
11132
11133 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11134 if matches!(target, DialectType::TSQL) {
11135 let normalized_type = match &type_expr {
11136 Expression::DataType(dt) => {
11137 let new_dt = match dt {
11138 DataType::Int { .. } => DataType::Custom {
11139 name: "INTEGER".to_string(),
11140 },
11141 _ => dt.clone(),
11142 };
11143 Expression::DataType(new_dt)
11144 }
11145 Expression::Identifier(id) => {
11146 let upper = id.name.to_uppercase();
11147 let normalized = match upper.as_str() {
11148 "INT" => "INTEGER",
11149 _ => &upper,
11150 };
11151 Expression::Identifier(
11152 crate::expressions::Identifier::new(normalized),
11153 )
11154 }
11155 Expression::Column(col) => {
11156 let upper = col.name.name.to_uppercase();
11157 let normalized = match upper.as_str() {
11158 "INT" => "INTEGER",
11159 _ => &upper,
11160 };
11161 Expression::Identifier(
11162 crate::expressions::Identifier::new(normalized),
11163 )
11164 }
11165 _ => type_expr.clone(),
11166 };
11167 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11168 let mut new_args = vec![normalized_type, value_expr];
11169 if let Some(s) = style {
11170 new_args.push(s.clone());
11171 }
11172 return Ok(Expression::Function(Box::new(Function::new(
11173 func_name.to_string(),
11174 new_args,
11175 ))));
11176 }
11177
11178 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11179 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11180 match e {
11181 Expression::DataType(dt) => {
11182 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11183 match dt {
11184 DataType::Custom { name }
11185 if name.starts_with("NVARCHAR(")
11186 || name.starts_with("NCHAR(") =>
11187 {
11188 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11189 let inner = &name[name.find('(').unwrap() + 1
11190 ..name.len() - 1];
11191 if inner.eq_ignore_ascii_case("MAX") {
11192 Some(DataType::Text)
11193 } else if let Ok(len) = inner.parse::<u32>() {
11194 if name.starts_with("NCHAR") {
11195 Some(DataType::Char {
11196 length: Some(len),
11197 })
11198 } else {
11199 Some(DataType::VarChar {
11200 length: Some(len),
11201 parenthesized_length: false,
11202 })
11203 }
11204 } else {
11205 Some(dt.clone())
11206 }
11207 }
11208 DataType::Custom { name } if name == "NVARCHAR" => {
11209 Some(DataType::VarChar {
11210 length: None,
11211 parenthesized_length: false,
11212 })
11213 }
11214 DataType::Custom { name } if name == "NCHAR" => {
11215 Some(DataType::Char { length: None })
11216 }
11217 DataType::Custom { name }
11218 if name == "NVARCHAR(MAX)"
11219 || name == "VARCHAR(MAX)" =>
11220 {
11221 Some(DataType::Text)
11222 }
11223 _ => Some(dt.clone()),
11224 }
11225 }
11226 Expression::Identifier(id) => {
11227 let name = id.name.to_uppercase();
11228 match name.as_str() {
11229 "INT" | "INTEGER" => Some(DataType::Int {
11230 length: None,
11231 integer_spelling: false,
11232 }),
11233 "BIGINT" => Some(DataType::BigInt { length: None }),
11234 "SMALLINT" => {
11235 Some(DataType::SmallInt { length: None })
11236 }
11237 "TINYINT" => {
11238 Some(DataType::TinyInt { length: None })
11239 }
11240 "FLOAT" => Some(DataType::Float {
11241 precision: None,
11242 scale: None,
11243 real_spelling: false,
11244 }),
11245 "REAL" => Some(DataType::Float {
11246 precision: None,
11247 scale: None,
11248 real_spelling: true,
11249 }),
11250 "DATETIME" | "DATETIME2" => {
11251 Some(DataType::Timestamp {
11252 timezone: false,
11253 precision: None,
11254 })
11255 }
11256 "DATE" => Some(DataType::Date),
11257 "BIT" => Some(DataType::Boolean),
11258 "TEXT" => Some(DataType::Text),
11259 "NUMERIC" => Some(DataType::Decimal {
11260 precision: None,
11261 scale: None,
11262 }),
11263 "MONEY" => Some(DataType::Decimal {
11264 precision: Some(15),
11265 scale: Some(4),
11266 }),
11267 "SMALLMONEY" => Some(DataType::Decimal {
11268 precision: Some(6),
11269 scale: Some(4),
11270 }),
11271 "VARCHAR" => Some(DataType::VarChar {
11272 length: None,
11273 parenthesized_length: false,
11274 }),
11275 "NVARCHAR" => Some(DataType::VarChar {
11276 length: None,
11277 parenthesized_length: false,
11278 }),
11279 "CHAR" => Some(DataType::Char { length: None }),
11280 "NCHAR" => Some(DataType::Char { length: None }),
11281 _ => Some(DataType::Custom { name }),
11282 }
11283 }
11284 Expression::Column(col) => {
11285 let name = col.name.name.to_uppercase();
11286 match name.as_str() {
11287 "INT" | "INTEGER" => Some(DataType::Int {
11288 length: None,
11289 integer_spelling: false,
11290 }),
11291 "BIGINT" => Some(DataType::BigInt { length: None }),
11292 "FLOAT" => Some(DataType::Float {
11293 precision: None,
11294 scale: None,
11295 real_spelling: false,
11296 }),
11297 "DATETIME" | "DATETIME2" => {
11298 Some(DataType::Timestamp {
11299 timezone: false,
11300 precision: None,
11301 })
11302 }
11303 "DATE" => Some(DataType::Date),
11304 "NUMERIC" => Some(DataType::Decimal {
11305 precision: None,
11306 scale: None,
11307 }),
11308 "VARCHAR" => Some(DataType::VarChar {
11309 length: None,
11310 parenthesized_length: false,
11311 }),
11312 "NVARCHAR" => Some(DataType::VarChar {
11313 length: None,
11314 parenthesized_length: false,
11315 }),
11316 "CHAR" => Some(DataType::Char { length: None }),
11317 "NCHAR" => Some(DataType::Char { length: None }),
11318 _ => Some(DataType::Custom { name }),
11319 }
11320 }
11321 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11322 Expression::Function(f) => {
11323 let fname = f.name.to_uppercase();
11324 match fname.as_str() {
11325 "VARCHAR" | "NVARCHAR" => {
11326 let len = f.args.first().and_then(|a| {
11327 if let Expression::Literal(
11328 crate::expressions::Literal::Number(n),
11329 ) = a
11330 {
11331 n.parse::<u32>().ok()
11332 } else if let Expression::Identifier(id) = a
11333 {
11334 if id.name.eq_ignore_ascii_case("MAX") {
11335 None
11336 } else {
11337 None
11338 }
11339 } else {
11340 None
11341 }
11342 });
11343 // Check for VARCHAR(MAX) -> TEXT
11344 let is_max = f.args.first().map_or(false, |a| {
11345 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11346 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11347 });
11348 if is_max {
11349 Some(DataType::Text)
11350 } else {
11351 Some(DataType::VarChar {
11352 length: len,
11353 parenthesized_length: false,
11354 })
11355 }
11356 }
11357 "NCHAR" | "CHAR" => {
11358 let len = f.args.first().and_then(|a| {
11359 if let Expression::Literal(
11360 crate::expressions::Literal::Number(n),
11361 ) = a
11362 {
11363 n.parse::<u32>().ok()
11364 } else {
11365 None
11366 }
11367 });
11368 Some(DataType::Char { length: len })
11369 }
11370 "NUMERIC" | "DECIMAL" => {
11371 let precision = f.args.first().and_then(|a| {
11372 if let Expression::Literal(
11373 crate::expressions::Literal::Number(n),
11374 ) = a
11375 {
11376 n.parse::<u32>().ok()
11377 } else {
11378 None
11379 }
11380 });
11381 let scale = f.args.get(1).and_then(|a| {
11382 if let Expression::Literal(
11383 crate::expressions::Literal::Number(n),
11384 ) = a
11385 {
11386 n.parse::<u32>().ok()
11387 } else {
11388 None
11389 }
11390 });
11391 Some(DataType::Decimal { precision, scale })
11392 }
11393 _ => None,
11394 }
11395 }
11396 _ => None,
11397 }
11398 }
11399
11400 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11401 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11402 let is_tsql_source =
11403 matches!(source, DialectType::TSQL | DialectType::Fabric);
11404 if is_tsql_source {
11405 match &dt {
11406 DataType::VarChar { length: None, .. } => {
11407 dt = DataType::VarChar {
11408 length: Some(30),
11409 parenthesized_length: false,
11410 };
11411 }
11412 DataType::Char { length: None } => {
11413 dt = DataType::Char { length: Some(30) };
11414 }
11415 _ => {}
11416 }
11417 }
11418
11419 // Determine if this is a string type
11420 let is_string_type = matches!(
11421 dt,
11422 DataType::VarChar { .. }
11423 | DataType::Char { .. }
11424 | DataType::Text
11425 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11426 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11427 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11428 || name == "STRING");
11429
11430 // Determine if this is a date/time type
11431 let is_datetime_type = matches!(
11432 dt,
11433 DataType::Timestamp { .. } | DataType::Date
11434 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11435 || name == "DATETIME2" || name == "SMALLDATETIME");
11436
11437 // Check for date conversion with style
11438 if style.is_some() {
11439 let style_num = style.and_then(|s| {
11440 if let Expression::Literal(
11441 crate::expressions::Literal::Number(n),
11442 ) = s
11443 {
11444 n.parse::<u32>().ok()
11445 } else {
11446 None
11447 }
11448 });
11449
11450 // TSQL CONVERT date styles (Java format)
11451 let format_str = style_num.and_then(|n| match n {
11452 101 => Some("MM/dd/yyyy"),
11453 102 => Some("yyyy.MM.dd"),
11454 103 => Some("dd/MM/yyyy"),
11455 104 => Some("dd.MM.yyyy"),
11456 105 => Some("dd-MM-yyyy"),
11457 108 => Some("HH:mm:ss"),
11458 110 => Some("MM-dd-yyyy"),
11459 112 => Some("yyyyMMdd"),
11460 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11461 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11462 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11463 _ => None,
11464 });
11465
11466 // Non-string, non-datetime types with style: just CAST, ignore the style
11467 if !is_string_type && !is_datetime_type {
11468 let cast_expr = if is_try {
11469 Expression::TryCast(Box::new(
11470 crate::expressions::Cast {
11471 this: value_expr,
11472 to: dt,
11473 trailing_comments: Vec::new(),
11474 double_colon_syntax: false,
11475 format: None,
11476 default: None,
11477 },
11478 ))
11479 } else {
11480 Expression::Cast(Box::new(
11481 crate::expressions::Cast {
11482 this: value_expr,
11483 to: dt,
11484 trailing_comments: Vec::new(),
11485 double_colon_syntax: false,
11486 format: None,
11487 default: None,
11488 },
11489 ))
11490 };
11491 return Ok(cast_expr);
11492 }
11493
11494 if let Some(java_fmt) = format_str {
11495 let c_fmt = java_fmt
11496 .replace("yyyy", "%Y")
11497 .replace("MM", "%m")
11498 .replace("dd", "%d")
11499 .replace("HH", "%H")
11500 .replace("mm", "%M")
11501 .replace("ss", "%S")
11502 .replace("SSSSSS", "%f")
11503 .replace("SSS", "%f")
11504 .replace("'T'", "T");
11505
11506 // For datetime target types: style is the INPUT format for parsing strings -> dates
11507 if is_datetime_type {
11508 match target {
11509 DialectType::DuckDB => {
11510 return Ok(Expression::Function(Box::new(
11511 Function::new(
11512 "STRPTIME".to_string(),
11513 vec![
11514 value_expr,
11515 Expression::string(&c_fmt),
11516 ],
11517 ),
11518 )));
11519 }
11520 DialectType::Spark
11521 | DialectType::Databricks => {
11522 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11523 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11524 let func_name =
11525 if matches!(dt, DataType::Date) {
11526 "TO_DATE"
11527 } else {
11528 "TO_TIMESTAMP"
11529 };
11530 return Ok(Expression::Function(Box::new(
11531 Function::new(
11532 func_name.to_string(),
11533 vec![
11534 value_expr,
11535 Expression::string(java_fmt),
11536 ],
11537 ),
11538 )));
11539 }
11540 DialectType::Hive => {
11541 return Ok(Expression::Function(Box::new(
11542 Function::new(
11543 "TO_TIMESTAMP".to_string(),
11544 vec![
11545 value_expr,
11546 Expression::string(java_fmt),
11547 ],
11548 ),
11549 )));
11550 }
11551 _ => {
11552 return Ok(Expression::Cast(Box::new(
11553 crate::expressions::Cast {
11554 this: value_expr,
11555 to: dt,
11556 trailing_comments: Vec::new(),
11557 double_colon_syntax: false,
11558 format: None,
11559 default: None,
11560 },
11561 )));
11562 }
11563 }
11564 }
11565
11566 // For string target types: style is the OUTPUT format for dates -> strings
11567 match target {
11568 DialectType::DuckDB => Ok(Expression::Function(
11569 Box::new(Function::new(
11570 "STRPTIME".to_string(),
11571 vec![
11572 value_expr,
11573 Expression::string(&c_fmt),
11574 ],
11575 )),
11576 )),
11577 DialectType::Spark | DialectType::Databricks => {
11578 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11579 // Determine the target string type
11580 let string_dt = match &dt {
11581 DataType::VarChar {
11582 length: Some(l),
11583 ..
11584 } => DataType::VarChar {
11585 length: Some(*l),
11586 parenthesized_length: false,
11587 },
11588 DataType::Text => DataType::Custom {
11589 name: "STRING".to_string(),
11590 },
11591 _ => DataType::Custom {
11592 name: "STRING".to_string(),
11593 },
11594 };
11595 let date_format_expr = Expression::Function(
11596 Box::new(Function::new(
11597 "DATE_FORMAT".to_string(),
11598 vec![
11599 value_expr,
11600 Expression::string(java_fmt),
11601 ],
11602 )),
11603 );
11604 let cast_expr = if is_try {
11605 Expression::TryCast(Box::new(
11606 crate::expressions::Cast {
11607 this: date_format_expr,
11608 to: string_dt,
11609 trailing_comments: Vec::new(),
11610 double_colon_syntax: false,
11611 format: None,
11612 default: None,
11613 },
11614 ))
11615 } else {
11616 Expression::Cast(Box::new(
11617 crate::expressions::Cast {
11618 this: date_format_expr,
11619 to: string_dt,
11620 trailing_comments: Vec::new(),
11621 double_colon_syntax: false,
11622 format: None,
11623 default: None,
11624 },
11625 ))
11626 };
11627 Ok(cast_expr)
11628 }
11629 DialectType::MySQL | DialectType::SingleStore => {
11630 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11631 let mysql_fmt = java_fmt
11632 .replace("yyyy", "%Y")
11633 .replace("MM", "%m")
11634 .replace("dd", "%d")
11635 .replace("HH:mm:ss.SSSSSS", "%T")
11636 .replace("HH:mm:ss", "%T")
11637 .replace("HH", "%H")
11638 .replace("mm", "%i")
11639 .replace("ss", "%S");
11640 let date_format_expr = Expression::Function(
11641 Box::new(Function::new(
11642 "DATE_FORMAT".to_string(),
11643 vec![
11644 value_expr,
11645 Expression::string(&mysql_fmt),
11646 ],
11647 )),
11648 );
11649 // MySQL uses CHAR for string casts
11650 let mysql_dt = match &dt {
11651 DataType::VarChar { length, .. } => {
11652 DataType::Char { length: *length }
11653 }
11654 _ => dt,
11655 };
11656 Ok(Expression::Cast(Box::new(
11657 crate::expressions::Cast {
11658 this: date_format_expr,
11659 to: mysql_dt,
11660 trailing_comments: Vec::new(),
11661 double_colon_syntax: false,
11662 format: None,
11663 default: None,
11664 },
11665 )))
11666 }
11667 DialectType::Hive => {
11668 let func_name = "TO_TIMESTAMP";
11669 Ok(Expression::Function(Box::new(
11670 Function::new(
11671 func_name.to_string(),
11672 vec![
11673 value_expr,
11674 Expression::string(java_fmt),
11675 ],
11676 ),
11677 )))
11678 }
11679 _ => Ok(Expression::Cast(Box::new(
11680 crate::expressions::Cast {
11681 this: value_expr,
11682 to: dt,
11683 trailing_comments: Vec::new(),
11684 double_colon_syntax: false,
11685 format: None,
11686 default: None,
11687 },
11688 ))),
11689 }
11690 } else {
11691 // Unknown style, just CAST
11692 let cast_expr = if is_try {
11693 Expression::TryCast(Box::new(
11694 crate::expressions::Cast {
11695 this: value_expr,
11696 to: dt,
11697 trailing_comments: Vec::new(),
11698 double_colon_syntax: false,
11699 format: None,
11700 default: None,
11701 },
11702 ))
11703 } else {
11704 Expression::Cast(Box::new(
11705 crate::expressions::Cast {
11706 this: value_expr,
11707 to: dt,
11708 trailing_comments: Vec::new(),
11709 double_colon_syntax: false,
11710 format: None,
11711 default: None,
11712 },
11713 ))
11714 };
11715 Ok(cast_expr)
11716 }
11717 } else {
11718 // No style - simple CAST
11719 let final_dt = if matches!(
11720 target,
11721 DialectType::MySQL | DialectType::SingleStore
11722 ) {
11723 match &dt {
11724 DataType::Int { .. }
11725 | DataType::BigInt { .. }
11726 | DataType::SmallInt { .. }
11727 | DataType::TinyInt { .. } => DataType::Custom {
11728 name: "SIGNED".to_string(),
11729 },
11730 DataType::VarChar { length, .. } => {
11731 DataType::Char { length: *length }
11732 }
11733 _ => dt,
11734 }
11735 } else {
11736 dt
11737 };
11738 let cast_expr = if is_try {
11739 Expression::TryCast(Box::new(
11740 crate::expressions::Cast {
11741 this: value_expr,
11742 to: final_dt,
11743 trailing_comments: Vec::new(),
11744 double_colon_syntax: false,
11745 format: None,
11746 default: None,
11747 },
11748 ))
11749 } else {
11750 Expression::Cast(Box::new(crate::expressions::Cast {
11751 this: value_expr,
11752 to: final_dt,
11753 trailing_comments: Vec::new(),
11754 double_colon_syntax: false,
11755 format: None,
11756 default: None,
11757 }))
11758 };
11759 Ok(cast_expr)
11760 }
11761 } else {
11762 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11763 Ok(Expression::Function(f))
11764 }
11765 }
11766 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11767 "STRFTIME" if f.args.len() == 2 => {
11768 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11769 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11770 // SQLite: args[0] = format, args[1] = value
11771 (f.args[1].clone(), &f.args[0])
11772 } else {
11773 // DuckDB and others: args[0] = value, args[1] = format
11774 (f.args[0].clone(), &f.args[1])
11775 };
11776
11777 // Helper to convert C-style format to Java-style
11778 fn c_to_java_format(fmt: &str) -> String {
11779 fmt.replace("%Y", "yyyy")
11780 .replace("%m", "MM")
11781 .replace("%d", "dd")
11782 .replace("%H", "HH")
11783 .replace("%M", "mm")
11784 .replace("%S", "ss")
11785 .replace("%f", "SSSSSS")
11786 .replace("%y", "yy")
11787 .replace("%-m", "M")
11788 .replace("%-d", "d")
11789 .replace("%-H", "H")
11790 .replace("%-I", "h")
11791 .replace("%I", "hh")
11792 .replace("%p", "a")
11793 .replace("%j", "DDD")
11794 .replace("%a", "EEE")
11795 .replace("%b", "MMM")
11796 .replace("%F", "yyyy-MM-dd")
11797 .replace("%T", "HH:mm:ss")
11798 }
11799
11800 // Helper: recursively convert format strings within expressions (handles CONCAT)
11801 fn convert_fmt_expr(
11802 expr: &Expression,
11803 converter: &dyn Fn(&str) -> String,
11804 ) -> Expression {
11805 match expr {
11806 Expression::Literal(
11807 crate::expressions::Literal::String(s),
11808 ) => Expression::string(&converter(s)),
11809 Expression::Function(func)
11810 if func.name.eq_ignore_ascii_case("CONCAT") =>
11811 {
11812 let new_args: Vec<Expression> = func
11813 .args
11814 .iter()
11815 .map(|a| convert_fmt_expr(a, converter))
11816 .collect();
11817 Expression::Function(Box::new(Function::new(
11818 "CONCAT".to_string(),
11819 new_args,
11820 )))
11821 }
11822 other => other.clone(),
11823 }
11824 }
11825
11826 match target {
11827 DialectType::DuckDB => {
11828 if matches!(source, DialectType::SQLite) {
11829 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
11830 let cast_val = Expression::Cast(Box::new(Cast {
11831 this: val,
11832 to: crate::expressions::DataType::Timestamp {
11833 precision: None,
11834 timezone: false,
11835 },
11836 trailing_comments: Vec::new(),
11837 double_colon_syntax: false,
11838 format: None,
11839 default: None,
11840 }));
11841 Ok(Expression::Function(Box::new(Function::new(
11842 "STRFTIME".to_string(),
11843 vec![cast_val, fmt_expr.clone()],
11844 ))))
11845 } else {
11846 Ok(Expression::Function(f))
11847 }
11848 }
11849 DialectType::Spark
11850 | DialectType::Databricks
11851 | DialectType::Hive => {
11852 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
11853 let converted_fmt =
11854 convert_fmt_expr(fmt_expr, &c_to_java_format);
11855 Ok(Expression::Function(Box::new(Function::new(
11856 "DATE_FORMAT".to_string(),
11857 vec![val, converted_fmt],
11858 ))))
11859 }
11860 DialectType::TSQL | DialectType::Fabric => {
11861 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
11862 let converted_fmt =
11863 convert_fmt_expr(fmt_expr, &c_to_java_format);
11864 Ok(Expression::Function(Box::new(Function::new(
11865 "FORMAT".to_string(),
11866 vec![val, converted_fmt],
11867 ))))
11868 }
11869 DialectType::Presto
11870 | DialectType::Trino
11871 | DialectType::Athena => {
11872 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
11873 if let Expression::Literal(
11874 crate::expressions::Literal::String(s),
11875 ) = fmt_expr
11876 {
11877 let presto_fmt = duckdb_to_presto_format(s);
11878 Ok(Expression::Function(Box::new(Function::new(
11879 "DATE_FORMAT".to_string(),
11880 vec![val, Expression::string(&presto_fmt)],
11881 ))))
11882 } else {
11883 Ok(Expression::Function(Box::new(Function::new(
11884 "DATE_FORMAT".to_string(),
11885 vec![val, fmt_expr.clone()],
11886 ))))
11887 }
11888 }
11889 DialectType::BigQuery => {
11890 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
11891 if let Expression::Literal(
11892 crate::expressions::Literal::String(s),
11893 ) = fmt_expr
11894 {
11895 let bq_fmt = duckdb_to_bigquery_format(s);
11896 Ok(Expression::Function(Box::new(Function::new(
11897 "FORMAT_DATE".to_string(),
11898 vec![Expression::string(&bq_fmt), val],
11899 ))))
11900 } else {
11901 Ok(Expression::Function(Box::new(Function::new(
11902 "FORMAT_DATE".to_string(),
11903 vec![fmt_expr.clone(), val],
11904 ))))
11905 }
11906 }
11907 DialectType::PostgreSQL | DialectType::Redshift => {
11908 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
11909 if let Expression::Literal(
11910 crate::expressions::Literal::String(s),
11911 ) = fmt_expr
11912 {
11913 let pg_fmt = s
11914 .replace("%Y", "YYYY")
11915 .replace("%m", "MM")
11916 .replace("%d", "DD")
11917 .replace("%H", "HH24")
11918 .replace("%M", "MI")
11919 .replace("%S", "SS")
11920 .replace("%y", "YY")
11921 .replace("%-m", "FMMM")
11922 .replace("%-d", "FMDD")
11923 .replace("%-H", "FMHH24")
11924 .replace("%-I", "FMHH12")
11925 .replace("%p", "AM")
11926 .replace("%F", "YYYY-MM-DD")
11927 .replace("%T", "HH24:MI:SS");
11928 Ok(Expression::Function(Box::new(Function::new(
11929 "TO_CHAR".to_string(),
11930 vec![val, Expression::string(&pg_fmt)],
11931 ))))
11932 } else {
11933 Ok(Expression::Function(Box::new(Function::new(
11934 "TO_CHAR".to_string(),
11935 vec![val, fmt_expr.clone()],
11936 ))))
11937 }
11938 }
11939 _ => Ok(Expression::Function(f)),
11940 }
11941 }
11942 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
11943 "STRPTIME" if f.args.len() == 2 => {
11944 let val = f.args[0].clone();
11945 let fmt_expr = &f.args[1];
11946
11947 fn c_to_java_format_parse(fmt: &str) -> String {
11948 fmt.replace("%Y", "yyyy")
11949 .replace("%m", "MM")
11950 .replace("%d", "dd")
11951 .replace("%H", "HH")
11952 .replace("%M", "mm")
11953 .replace("%S", "ss")
11954 .replace("%f", "SSSSSS")
11955 .replace("%y", "yy")
11956 .replace("%-m", "M")
11957 .replace("%-d", "d")
11958 .replace("%-H", "H")
11959 .replace("%-I", "h")
11960 .replace("%I", "hh")
11961 .replace("%p", "a")
11962 .replace("%F", "yyyy-MM-dd")
11963 .replace("%T", "HH:mm:ss")
11964 }
11965
11966 match target {
11967 DialectType::DuckDB => Ok(Expression::Function(f)),
11968 DialectType::Spark | DialectType::Databricks => {
11969 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
11970 if let Expression::Literal(
11971 crate::expressions::Literal::String(s),
11972 ) = fmt_expr
11973 {
11974 let java_fmt = c_to_java_format_parse(s);
11975 Ok(Expression::Function(Box::new(Function::new(
11976 "TO_TIMESTAMP".to_string(),
11977 vec![val, Expression::string(&java_fmt)],
11978 ))))
11979 } else {
11980 Ok(Expression::Function(Box::new(Function::new(
11981 "TO_TIMESTAMP".to_string(),
11982 vec![val, fmt_expr.clone()],
11983 ))))
11984 }
11985 }
11986 DialectType::Hive => {
11987 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
11988 if let Expression::Literal(
11989 crate::expressions::Literal::String(s),
11990 ) = fmt_expr
11991 {
11992 let java_fmt = c_to_java_format_parse(s);
11993 let unix_ts =
11994 Expression::Function(Box::new(Function::new(
11995 "UNIX_TIMESTAMP".to_string(),
11996 vec![val, Expression::string(&java_fmt)],
11997 )));
11998 let from_unix =
11999 Expression::Function(Box::new(Function::new(
12000 "FROM_UNIXTIME".to_string(),
12001 vec![unix_ts],
12002 )));
12003 Ok(Expression::Cast(Box::new(
12004 crate::expressions::Cast {
12005 this: from_unix,
12006 to: DataType::Timestamp {
12007 timezone: false,
12008 precision: None,
12009 },
12010 trailing_comments: Vec::new(),
12011 double_colon_syntax: false,
12012 format: None,
12013 default: None,
12014 },
12015 )))
12016 } else {
12017 Ok(Expression::Function(f))
12018 }
12019 }
12020 DialectType::Presto
12021 | DialectType::Trino
12022 | DialectType::Athena => {
12023 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
12024 if let Expression::Literal(
12025 crate::expressions::Literal::String(s),
12026 ) = fmt_expr
12027 {
12028 let presto_fmt = duckdb_to_presto_format(s);
12029 Ok(Expression::Function(Box::new(Function::new(
12030 "DATE_PARSE".to_string(),
12031 vec![val, Expression::string(&presto_fmt)],
12032 ))))
12033 } else {
12034 Ok(Expression::Function(Box::new(Function::new(
12035 "DATE_PARSE".to_string(),
12036 vec![val, fmt_expr.clone()],
12037 ))))
12038 }
12039 }
12040 DialectType::BigQuery => {
12041 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12042 if let Expression::Literal(
12043 crate::expressions::Literal::String(s),
12044 ) = fmt_expr
12045 {
12046 let bq_fmt = duckdb_to_bigquery_format(s);
12047 Ok(Expression::Function(Box::new(Function::new(
12048 "PARSE_TIMESTAMP".to_string(),
12049 vec![Expression::string(&bq_fmt), val],
12050 ))))
12051 } else {
12052 Ok(Expression::Function(Box::new(Function::new(
12053 "PARSE_TIMESTAMP".to_string(),
12054 vec![fmt_expr.clone(), val],
12055 ))))
12056 }
12057 }
12058 _ => Ok(Expression::Function(f)),
12059 }
12060 }
12061 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12062 "DATE_FORMAT"
12063 if f.args.len() >= 2
12064 && matches!(
12065 source,
12066 DialectType::Presto
12067 | DialectType::Trino
12068 | DialectType::Athena
12069 ) =>
12070 {
12071 let val = f.args[0].clone();
12072 let fmt_expr = &f.args[1];
12073
12074 match target {
12075 DialectType::Presto
12076 | DialectType::Trino
12077 | DialectType::Athena => {
12078 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12079 if let Expression::Literal(
12080 crate::expressions::Literal::String(s),
12081 ) = fmt_expr
12082 {
12083 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12084 Ok(Expression::Function(Box::new(Function::new(
12085 "DATE_FORMAT".to_string(),
12086 vec![val, Expression::string(&normalized)],
12087 ))))
12088 } else {
12089 Ok(Expression::Function(f))
12090 }
12091 }
12092 DialectType::Hive
12093 | DialectType::Spark
12094 | DialectType::Databricks => {
12095 // Convert Presto C-style to Java-style format
12096 if let Expression::Literal(
12097 crate::expressions::Literal::String(s),
12098 ) = fmt_expr
12099 {
12100 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12101 Ok(Expression::Function(Box::new(Function::new(
12102 "DATE_FORMAT".to_string(),
12103 vec![val, Expression::string(&java_fmt)],
12104 ))))
12105 } else {
12106 Ok(Expression::Function(f))
12107 }
12108 }
12109 DialectType::DuckDB => {
12110 // Convert to STRFTIME(val, duckdb_fmt)
12111 if let Expression::Literal(
12112 crate::expressions::Literal::String(s),
12113 ) = fmt_expr
12114 {
12115 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12116 Ok(Expression::Function(Box::new(Function::new(
12117 "STRFTIME".to_string(),
12118 vec![val, Expression::string(&duckdb_fmt)],
12119 ))))
12120 } else {
12121 Ok(Expression::Function(Box::new(Function::new(
12122 "STRFTIME".to_string(),
12123 vec![val, fmt_expr.clone()],
12124 ))))
12125 }
12126 }
12127 DialectType::BigQuery => {
12128 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12129 if let Expression::Literal(
12130 crate::expressions::Literal::String(s),
12131 ) = fmt_expr
12132 {
12133 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12134 Ok(Expression::Function(Box::new(Function::new(
12135 "FORMAT_DATE".to_string(),
12136 vec![Expression::string(&bq_fmt), val],
12137 ))))
12138 } else {
12139 Ok(Expression::Function(Box::new(Function::new(
12140 "FORMAT_DATE".to_string(),
12141 vec![fmt_expr.clone(), val],
12142 ))))
12143 }
12144 }
12145 _ => Ok(Expression::Function(f)),
12146 }
12147 }
12148 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12149 "DATE_PARSE"
12150 if f.args.len() >= 2
12151 && matches!(
12152 source,
12153 DialectType::Presto
12154 | DialectType::Trino
12155 | DialectType::Athena
12156 ) =>
12157 {
12158 let val = f.args[0].clone();
12159 let fmt_expr = &f.args[1];
12160
12161 match target {
12162 DialectType::Presto
12163 | DialectType::Trino
12164 | DialectType::Athena => {
12165 // Presto -> Presto: normalize format
12166 if let Expression::Literal(
12167 crate::expressions::Literal::String(s),
12168 ) = fmt_expr
12169 {
12170 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12171 Ok(Expression::Function(Box::new(Function::new(
12172 "DATE_PARSE".to_string(),
12173 vec![val, Expression::string(&normalized)],
12174 ))))
12175 } else {
12176 Ok(Expression::Function(f))
12177 }
12178 }
12179 DialectType::Hive => {
12180 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12181 if let Expression::Literal(
12182 crate::expressions::Literal::String(s),
12183 ) = fmt_expr
12184 {
12185 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12186 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12187 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12188 this: val,
12189 to: DataType::Timestamp { timezone: false, precision: None },
12190 trailing_comments: Vec::new(),
12191 double_colon_syntax: false,
12192 format: None,
12193 default: None,
12194 })))
12195 } else {
12196 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12197 Ok(Expression::Function(Box::new(Function::new(
12198 "TO_TIMESTAMP".to_string(),
12199 vec![val, Expression::string(&java_fmt)],
12200 ))))
12201 }
12202 } else {
12203 Ok(Expression::Function(f))
12204 }
12205 }
12206 DialectType::Spark | DialectType::Databricks => {
12207 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12208 if let Expression::Literal(
12209 crate::expressions::Literal::String(s),
12210 ) = fmt_expr
12211 {
12212 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12213 Ok(Expression::Function(Box::new(Function::new(
12214 "TO_TIMESTAMP".to_string(),
12215 vec![val, Expression::string(&java_fmt)],
12216 ))))
12217 } else {
12218 Ok(Expression::Function(f))
12219 }
12220 }
12221 DialectType::DuckDB => {
12222 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12223 if let Expression::Literal(
12224 crate::expressions::Literal::String(s),
12225 ) = fmt_expr
12226 {
12227 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12228 Ok(Expression::Function(Box::new(Function::new(
12229 "STRPTIME".to_string(),
12230 vec![val, Expression::string(&duckdb_fmt)],
12231 ))))
12232 } else {
12233 Ok(Expression::Function(Box::new(Function::new(
12234 "STRPTIME".to_string(),
12235 vec![val, fmt_expr.clone()],
12236 ))))
12237 }
12238 }
12239 _ => Ok(Expression::Function(f)),
12240 }
12241 }
12242 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12243 "FROM_BASE64"
12244 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12245 {
12246 Ok(Expression::Function(Box::new(Function::new(
12247 "UNBASE64".to_string(),
12248 f.args,
12249 ))))
12250 }
12251 "TO_BASE64"
12252 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12253 {
12254 Ok(Expression::Function(Box::new(Function::new(
12255 "BASE64".to_string(),
12256 f.args,
12257 ))))
12258 }
12259 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12260 "FROM_UNIXTIME"
12261 if f.args.len() == 1
12262 && matches!(
12263 source,
12264 DialectType::Presto
12265 | DialectType::Trino
12266 | DialectType::Athena
12267 )
12268 && matches!(
12269 target,
12270 DialectType::Spark | DialectType::Databricks
12271 ) =>
12272 {
12273 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12274 let from_unix = Expression::Function(Box::new(Function::new(
12275 "FROM_UNIXTIME".to_string(),
12276 f.args,
12277 )));
12278 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12279 this: from_unix,
12280 to: DataType::Timestamp {
12281 timezone: false,
12282 precision: None,
12283 },
12284 trailing_comments: Vec::new(),
12285 double_colon_syntax: false,
12286 format: None,
12287 default: None,
12288 })))
12289 }
12290 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12291 "DATE_FORMAT"
12292 if f.args.len() >= 2
12293 && !matches!(
12294 target,
12295 DialectType::Hive
12296 | DialectType::Spark
12297 | DialectType::Databricks
12298 | DialectType::MySQL
12299 | DialectType::SingleStore
12300 ) =>
12301 {
12302 let val = f.args[0].clone();
12303 let fmt_expr = &f.args[1];
12304 let is_hive_source = matches!(
12305 source,
12306 DialectType::Hive
12307 | DialectType::Spark
12308 | DialectType::Databricks
12309 );
12310
12311 fn java_to_c_format(fmt: &str) -> String {
12312 // Replace Java patterns with C strftime patterns.
12313 // Uses multi-pass to handle patterns that conflict.
12314 // First pass: replace multi-char patterns (longer first)
12315 let result = fmt
12316 .replace("yyyy", "%Y")
12317 .replace("SSSSSS", "%f")
12318 .replace("EEEE", "%W")
12319 .replace("MM", "%m")
12320 .replace("dd", "%d")
12321 .replace("HH", "%H")
12322 .replace("mm", "%M")
12323 .replace("ss", "%S")
12324 .replace("yy", "%y");
12325 // Second pass: handle single-char timezone patterns
12326 // z -> %Z (timezone name), Z -> %z (timezone offset)
12327 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12328 let mut out = String::new();
12329 let chars: Vec<char> = result.chars().collect();
12330 let mut i = 0;
12331 while i < chars.len() {
12332 if chars[i] == '%' && i + 1 < chars.len() {
12333 // Already a format specifier, skip both chars
12334 out.push(chars[i]);
12335 out.push(chars[i + 1]);
12336 i += 2;
12337 } else if chars[i] == 'z' {
12338 out.push_str("%Z");
12339 i += 1;
12340 } else if chars[i] == 'Z' {
12341 out.push_str("%z");
12342 i += 1;
12343 } else {
12344 out.push(chars[i]);
12345 i += 1;
12346 }
12347 }
12348 out
12349 }
12350
12351 fn java_to_presto_format(fmt: &str) -> String {
12352 // Presto uses %T for HH:MM:SS
12353 let c_fmt = java_to_c_format(fmt);
12354 c_fmt.replace("%H:%M:%S", "%T")
12355 }
12356
12357 fn java_to_bq_format(fmt: &str) -> String {
12358 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12359 let c_fmt = java_to_c_format(fmt);
12360 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12361 }
12362
12363 // For Hive source, CAST string literals to appropriate type
12364 let cast_val = if is_hive_source {
12365 match &val {
12366 Expression::Literal(
12367 crate::expressions::Literal::String(_),
12368 ) => {
12369 match target {
12370 DialectType::DuckDB
12371 | DialectType::Presto
12372 | DialectType::Trino
12373 | DialectType::Athena => {
12374 Self::ensure_cast_timestamp(val.clone())
12375 }
12376 DialectType::BigQuery => {
12377 // BigQuery: CAST(val AS DATETIME)
12378 Expression::Cast(Box::new(
12379 crate::expressions::Cast {
12380 this: val.clone(),
12381 to: DataType::Custom {
12382 name: "DATETIME".to_string(),
12383 },
12384 trailing_comments: vec![],
12385 double_colon_syntax: false,
12386 format: None,
12387 default: None,
12388 },
12389 ))
12390 }
12391 _ => val.clone(),
12392 }
12393 }
12394 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12395 Expression::Cast(c)
12396 if matches!(c.to, DataType::Date)
12397 && matches!(
12398 target,
12399 DialectType::Presto
12400 | DialectType::Trino
12401 | DialectType::Athena
12402 ) =>
12403 {
12404 Expression::Cast(Box::new(crate::expressions::Cast {
12405 this: val.clone(),
12406 to: DataType::Timestamp {
12407 timezone: false,
12408 precision: None,
12409 },
12410 trailing_comments: vec![],
12411 double_colon_syntax: false,
12412 format: None,
12413 default: None,
12414 }))
12415 }
12416 Expression::Literal(crate::expressions::Literal::Date(
12417 _,
12418 )) if matches!(
12419 target,
12420 DialectType::Presto
12421 | DialectType::Trino
12422 | DialectType::Athena
12423 ) =>
12424 {
12425 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12426 let cast_date = Self::date_literal_to_cast(val.clone());
12427 Expression::Cast(Box::new(crate::expressions::Cast {
12428 this: cast_date,
12429 to: DataType::Timestamp {
12430 timezone: false,
12431 precision: None,
12432 },
12433 trailing_comments: vec![],
12434 double_colon_syntax: false,
12435 format: None,
12436 default: None,
12437 }))
12438 }
12439 _ => val.clone(),
12440 }
12441 } else {
12442 val.clone()
12443 };
12444
12445 match target {
12446 DialectType::DuckDB => {
12447 if let Expression::Literal(
12448 crate::expressions::Literal::String(s),
12449 ) = fmt_expr
12450 {
12451 let c_fmt = if is_hive_source {
12452 java_to_c_format(s)
12453 } else {
12454 s.clone()
12455 };
12456 Ok(Expression::Function(Box::new(Function::new(
12457 "STRFTIME".to_string(),
12458 vec![cast_val, Expression::string(&c_fmt)],
12459 ))))
12460 } else {
12461 Ok(Expression::Function(Box::new(Function::new(
12462 "STRFTIME".to_string(),
12463 vec![cast_val, fmt_expr.clone()],
12464 ))))
12465 }
12466 }
12467 DialectType::Presto
12468 | DialectType::Trino
12469 | DialectType::Athena => {
12470 if is_hive_source {
12471 if let Expression::Literal(
12472 crate::expressions::Literal::String(s),
12473 ) = fmt_expr
12474 {
12475 let p_fmt = java_to_presto_format(s);
12476 Ok(Expression::Function(Box::new(Function::new(
12477 "DATE_FORMAT".to_string(),
12478 vec![cast_val, Expression::string(&p_fmt)],
12479 ))))
12480 } else {
12481 Ok(Expression::Function(Box::new(Function::new(
12482 "DATE_FORMAT".to_string(),
12483 vec![cast_val, fmt_expr.clone()],
12484 ))))
12485 }
12486 } else {
12487 Ok(Expression::Function(Box::new(Function::new(
12488 "DATE_FORMAT".to_string(),
12489 f.args,
12490 ))))
12491 }
12492 }
12493 DialectType::BigQuery => {
12494 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12495 if let Expression::Literal(
12496 crate::expressions::Literal::String(s),
12497 ) = fmt_expr
12498 {
12499 let bq_fmt = if is_hive_source {
12500 java_to_bq_format(s)
12501 } else {
12502 java_to_c_format(s)
12503 };
12504 Ok(Expression::Function(Box::new(Function::new(
12505 "FORMAT_DATE".to_string(),
12506 vec![Expression::string(&bq_fmt), cast_val],
12507 ))))
12508 } else {
12509 Ok(Expression::Function(Box::new(Function::new(
12510 "FORMAT_DATE".to_string(),
12511 vec![fmt_expr.clone(), cast_val],
12512 ))))
12513 }
12514 }
12515 DialectType::PostgreSQL | DialectType::Redshift => {
12516 if let Expression::Literal(
12517 crate::expressions::Literal::String(s),
12518 ) = fmt_expr
12519 {
12520 let pg_fmt = s
12521 .replace("yyyy", "YYYY")
12522 .replace("MM", "MM")
12523 .replace("dd", "DD")
12524 .replace("HH", "HH24")
12525 .replace("mm", "MI")
12526 .replace("ss", "SS")
12527 .replace("yy", "YY");
12528 Ok(Expression::Function(Box::new(Function::new(
12529 "TO_CHAR".to_string(),
12530 vec![val, Expression::string(&pg_fmt)],
12531 ))))
12532 } else {
12533 Ok(Expression::Function(Box::new(Function::new(
12534 "TO_CHAR".to_string(),
12535 vec![val, fmt_expr.clone()],
12536 ))))
12537 }
12538 }
12539 _ => Ok(Expression::Function(f)),
12540 }
12541 }
12542 // DATEDIFF(unit, start, end) - 3-arg form
12543 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12544 "DATEDIFF" if f.args.len() == 3 => {
12545 let mut args = f.args;
12546 // SQLite source: args = (date1, date2, unit_string)
12547 // Standard source: args = (unit, start, end)
12548 let (_arg0, arg1, arg2, unit_str) =
12549 if matches!(source, DialectType::SQLite) {
12550 let date1 = args.remove(0);
12551 let date2 = args.remove(0);
12552 let unit_expr = args.remove(0);
12553 let unit_s = Self::get_unit_str_static(&unit_expr);
12554
12555 // For SQLite target, generate JULIANDAY arithmetic directly
12556 if matches!(target, DialectType::SQLite) {
12557 let jd_first = Expression::Function(Box::new(
12558 Function::new("JULIANDAY".to_string(), vec![date1]),
12559 ));
12560 let jd_second = Expression::Function(Box::new(
12561 Function::new("JULIANDAY".to_string(), vec![date2]),
12562 ));
12563 let diff = Expression::Sub(Box::new(
12564 crate::expressions::BinaryOp::new(
12565 jd_first, jd_second,
12566 ),
12567 ));
12568 let paren_diff = Expression::Paren(Box::new(
12569 crate::expressions::Paren {
12570 this: diff,
12571 trailing_comments: Vec::new(),
12572 },
12573 ));
12574 let adjusted = match unit_s.as_str() {
12575 "HOUR" => Expression::Mul(Box::new(
12576 crate::expressions::BinaryOp::new(
12577 paren_diff,
12578 Expression::Literal(Literal::Number(
12579 "24.0".to_string(),
12580 )),
12581 ),
12582 )),
12583 "MINUTE" => Expression::Mul(Box::new(
12584 crate::expressions::BinaryOp::new(
12585 paren_diff,
12586 Expression::Literal(Literal::Number(
12587 "1440.0".to_string(),
12588 )),
12589 ),
12590 )),
12591 "SECOND" => Expression::Mul(Box::new(
12592 crate::expressions::BinaryOp::new(
12593 paren_diff,
12594 Expression::Literal(Literal::Number(
12595 "86400.0".to_string(),
12596 )),
12597 ),
12598 )),
12599 "MONTH" => Expression::Div(Box::new(
12600 crate::expressions::BinaryOp::new(
12601 paren_diff,
12602 Expression::Literal(Literal::Number(
12603 "30.0".to_string(),
12604 )),
12605 ),
12606 )),
12607 "YEAR" => Expression::Div(Box::new(
12608 crate::expressions::BinaryOp::new(
12609 paren_diff,
12610 Expression::Literal(Literal::Number(
12611 "365.0".to_string(),
12612 )),
12613 ),
12614 )),
12615 _ => paren_diff,
12616 };
12617 return Ok(Expression::Cast(Box::new(Cast {
12618 this: adjusted,
12619 to: DataType::Int {
12620 length: None,
12621 integer_spelling: true,
12622 },
12623 trailing_comments: vec![],
12624 double_colon_syntax: false,
12625 format: None,
12626 default: None,
12627 })));
12628 }
12629
12630 // For other targets, remap to standard (unit, start, end) form
12631 let unit_ident =
12632 Expression::Identifier(Identifier::new(&unit_s));
12633 (unit_ident, date1, date2, unit_s)
12634 } else {
12635 let arg0 = args.remove(0);
12636 let arg1 = args.remove(0);
12637 let arg2 = args.remove(0);
12638 let unit_s = Self::get_unit_str_static(&arg0);
12639 (arg0, arg1, arg2, unit_s)
12640 };
12641
12642 // For Hive/Spark source, string literal dates need to be cast
12643 // Note: Databricks is excluded - it handles string args like standard SQL
12644 let is_hive_spark =
12645 matches!(source, DialectType::Hive | DialectType::Spark);
12646
12647 match target {
12648 DialectType::Snowflake => {
12649 let unit =
12650 Expression::Identifier(Identifier::new(&unit_str));
12651 // Use ensure_to_date_preserved to add TO_DATE with a marker
12652 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12653 let d1 = if is_hive_spark {
12654 Self::ensure_to_date_preserved(arg1)
12655 } else {
12656 arg1
12657 };
12658 let d2 = if is_hive_spark {
12659 Self::ensure_to_date_preserved(arg2)
12660 } else {
12661 arg2
12662 };
12663 Ok(Expression::Function(Box::new(Function::new(
12664 "DATEDIFF".to_string(),
12665 vec![unit, d1, d2],
12666 ))))
12667 }
12668 DialectType::Redshift => {
12669 let unit =
12670 Expression::Identifier(Identifier::new(&unit_str));
12671 let d1 = if is_hive_spark {
12672 Self::ensure_cast_date(arg1)
12673 } else {
12674 arg1
12675 };
12676 let d2 = if is_hive_spark {
12677 Self::ensure_cast_date(arg2)
12678 } else {
12679 arg2
12680 };
12681 Ok(Expression::Function(Box::new(Function::new(
12682 "DATEDIFF".to_string(),
12683 vec![unit, d1, d2],
12684 ))))
12685 }
12686 DialectType::TSQL => {
12687 let unit =
12688 Expression::Identifier(Identifier::new(&unit_str));
12689 Ok(Expression::Function(Box::new(Function::new(
12690 "DATEDIFF".to_string(),
12691 vec![unit, arg1, arg2],
12692 ))))
12693 }
12694 DialectType::DuckDB => {
12695 let is_redshift_tsql = matches!(
12696 source,
12697 DialectType::Redshift | DialectType::TSQL
12698 );
12699 if is_hive_spark {
12700 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12701 let d1 = Self::ensure_cast_date(arg1);
12702 let d2 = Self::ensure_cast_date(arg2);
12703 Ok(Expression::Function(Box::new(Function::new(
12704 "DATE_DIFF".to_string(),
12705 vec![Expression::string(&unit_str), d1, d2],
12706 ))))
12707 } else if matches!(source, DialectType::Snowflake) {
12708 // For Snowflake source: special handling per unit
12709 match unit_str.as_str() {
12710 "NANOSECOND" => {
12711 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12712 fn cast_to_timestamp_ns(
12713 expr: Expression,
12714 ) -> Expression
12715 {
12716 Expression::Cast(Box::new(Cast {
12717 this: expr,
12718 to: DataType::Custom {
12719 name: "TIMESTAMP_NS".to_string(),
12720 },
12721 trailing_comments: vec![],
12722 double_colon_syntax: false,
12723 format: None,
12724 default: None,
12725 }))
12726 }
12727 let epoch_end = Expression::Function(Box::new(
12728 Function::new(
12729 "EPOCH_NS".to_string(),
12730 vec![cast_to_timestamp_ns(arg2)],
12731 ),
12732 ));
12733 let epoch_start = Expression::Function(
12734 Box::new(Function::new(
12735 "EPOCH_NS".to_string(),
12736 vec![cast_to_timestamp_ns(arg1)],
12737 )),
12738 );
12739 Ok(Expression::Sub(Box::new(BinaryOp::new(
12740 epoch_end,
12741 epoch_start,
12742 ))))
12743 }
12744 "WEEK" => {
12745 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12746 let d1 = Self::force_cast_date(arg1);
12747 let d2 = Self::force_cast_date(arg2);
12748 let dt1 = Expression::Function(Box::new(
12749 Function::new(
12750 "DATE_TRUNC".to_string(),
12751 vec![Expression::string("WEEK"), d1],
12752 ),
12753 ));
12754 let dt2 = Expression::Function(Box::new(
12755 Function::new(
12756 "DATE_TRUNC".to_string(),
12757 vec![Expression::string("WEEK"), d2],
12758 ),
12759 ));
12760 Ok(Expression::Function(Box::new(
12761 Function::new(
12762 "DATE_DIFF".to_string(),
12763 vec![
12764 Expression::string(&unit_str),
12765 dt1,
12766 dt2,
12767 ],
12768 ),
12769 )))
12770 }
12771 _ => {
12772 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12773 let d1 = Self::force_cast_date(arg1);
12774 let d2 = Self::force_cast_date(arg2);
12775 Ok(Expression::Function(Box::new(
12776 Function::new(
12777 "DATE_DIFF".to_string(),
12778 vec![
12779 Expression::string(&unit_str),
12780 d1,
12781 d2,
12782 ],
12783 ),
12784 )))
12785 }
12786 }
12787 } else if is_redshift_tsql {
12788 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
12789 let d1 = Self::force_cast_timestamp(arg1);
12790 let d2 = Self::force_cast_timestamp(arg2);
12791 Ok(Expression::Function(Box::new(Function::new(
12792 "DATE_DIFF".to_string(),
12793 vec![Expression::string(&unit_str), d1, d2],
12794 ))))
12795 } else {
12796 // Keep as DATEDIFF so DuckDB's transform_datediff handles
12797 // DATE_TRUNC for WEEK, CAST for string literals, etc.
12798 let unit =
12799 Expression::Identifier(Identifier::new(&unit_str));
12800 Ok(Expression::Function(Box::new(Function::new(
12801 "DATEDIFF".to_string(),
12802 vec![unit, arg1, arg2],
12803 ))))
12804 }
12805 }
12806 DialectType::BigQuery => {
12807 let is_redshift_tsql = matches!(
12808 source,
12809 DialectType::Redshift
12810 | DialectType::TSQL
12811 | DialectType::Snowflake
12812 );
12813 let cast_d1 = if is_hive_spark {
12814 Self::ensure_cast_date(arg1)
12815 } else if is_redshift_tsql {
12816 Self::force_cast_datetime(arg1)
12817 } else {
12818 Self::ensure_cast_datetime(arg1)
12819 };
12820 let cast_d2 = if is_hive_spark {
12821 Self::ensure_cast_date(arg2)
12822 } else if is_redshift_tsql {
12823 Self::force_cast_datetime(arg2)
12824 } else {
12825 Self::ensure_cast_datetime(arg2)
12826 };
12827 let unit =
12828 Expression::Identifier(Identifier::new(&unit_str));
12829 Ok(Expression::Function(Box::new(Function::new(
12830 "DATE_DIFF".to_string(),
12831 vec![cast_d2, cast_d1, unit],
12832 ))))
12833 }
12834 DialectType::Presto
12835 | DialectType::Trino
12836 | DialectType::Athena => {
12837 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
12838 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
12839 let is_redshift_tsql = matches!(
12840 source,
12841 DialectType::Redshift
12842 | DialectType::TSQL
12843 | DialectType::Snowflake
12844 );
12845 let d1 = if is_hive_spark {
12846 Self::double_cast_timestamp_date(arg1)
12847 } else if is_redshift_tsql {
12848 Self::force_cast_timestamp(arg1)
12849 } else {
12850 arg1
12851 };
12852 let d2 = if is_hive_spark {
12853 Self::double_cast_timestamp_date(arg2)
12854 } else if is_redshift_tsql {
12855 Self::force_cast_timestamp(arg2)
12856 } else {
12857 arg2
12858 };
12859 Ok(Expression::Function(Box::new(Function::new(
12860 "DATE_DIFF".to_string(),
12861 vec![Expression::string(&unit_str), d1, d2],
12862 ))))
12863 }
12864 DialectType::Hive => match unit_str.as_str() {
12865 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
12866 this: Expression::Function(Box::new(Function::new(
12867 "MONTHS_BETWEEN".to_string(),
12868 vec![arg2, arg1],
12869 ))),
12870 to: DataType::Int {
12871 length: None,
12872 integer_spelling: false,
12873 },
12874 trailing_comments: vec![],
12875 double_colon_syntax: false,
12876 format: None,
12877 default: None,
12878 }))),
12879 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
12880 this: Expression::Div(Box::new(
12881 crate::expressions::BinaryOp::new(
12882 Expression::Function(Box::new(Function::new(
12883 "DATEDIFF".to_string(),
12884 vec![arg2, arg1],
12885 ))),
12886 Expression::number(7),
12887 ),
12888 )),
12889 to: DataType::Int {
12890 length: None,
12891 integer_spelling: false,
12892 },
12893 trailing_comments: vec![],
12894 double_colon_syntax: false,
12895 format: None,
12896 default: None,
12897 }))),
12898 _ => Ok(Expression::Function(Box::new(Function::new(
12899 "DATEDIFF".to_string(),
12900 vec![arg2, arg1],
12901 )))),
12902 },
12903 DialectType::Spark | DialectType::Databricks => {
12904 let unit =
12905 Expression::Identifier(Identifier::new(&unit_str));
12906 Ok(Expression::Function(Box::new(Function::new(
12907 "DATEDIFF".to_string(),
12908 vec![unit, arg1, arg2],
12909 ))))
12910 }
12911 _ => {
12912 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
12913 let d1 = if is_hive_spark {
12914 Self::ensure_cast_date(arg1)
12915 } else {
12916 arg1
12917 };
12918 let d2 = if is_hive_spark {
12919 Self::ensure_cast_date(arg2)
12920 } else {
12921 arg2
12922 };
12923 let unit =
12924 Expression::Identifier(Identifier::new(&unit_str));
12925 Ok(Expression::Function(Box::new(Function::new(
12926 "DATEDIFF".to_string(),
12927 vec![unit, d1, d2],
12928 ))))
12929 }
12930 }
12931 }
12932 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
12933 "DATEDIFF" if f.args.len() == 2 => {
12934 let mut args = f.args;
12935 let arg0 = args.remove(0);
12936 let arg1 = args.remove(0);
12937
12938 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
12939 // Also recognizes TryCast/Cast to DATE that may have been produced by
12940 // cross-dialect TO_DATE -> TRY_CAST conversion
12941 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
12942 if let Expression::Function(ref f) = e {
12943 if f.name.eq_ignore_ascii_case("TO_DATE")
12944 && f.args.len() == 1
12945 {
12946 return (f.args[0].clone(), true);
12947 }
12948 }
12949 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
12950 if let Expression::TryCast(ref c) = e {
12951 if matches!(c.to, DataType::Date) {
12952 return (e, true); // Already properly cast, return as-is
12953 }
12954 }
12955 (e, false)
12956 };
12957
12958 match target {
12959 DialectType::DuckDB => {
12960 // For Hive source, always CAST to DATE
12961 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
12962 let cast_d0 = if matches!(
12963 source,
12964 DialectType::Hive
12965 | DialectType::Spark
12966 | DialectType::Databricks
12967 ) {
12968 let (inner, was_to_date) = unwrap_to_date(arg1);
12969 if was_to_date {
12970 // Already a date expression, use directly
12971 if matches!(&inner, Expression::TryCast(_)) {
12972 inner // Already TRY_CAST(x AS DATE)
12973 } else {
12974 Self::try_cast_date(inner)
12975 }
12976 } else {
12977 Self::force_cast_date(inner)
12978 }
12979 } else {
12980 Self::ensure_cast_date(arg1)
12981 };
12982 let cast_d1 = if matches!(
12983 source,
12984 DialectType::Hive
12985 | DialectType::Spark
12986 | DialectType::Databricks
12987 ) {
12988 let (inner, was_to_date) = unwrap_to_date(arg0);
12989 if was_to_date {
12990 if matches!(&inner, Expression::TryCast(_)) {
12991 inner
12992 } else {
12993 Self::try_cast_date(inner)
12994 }
12995 } else {
12996 Self::force_cast_date(inner)
12997 }
12998 } else {
12999 Self::ensure_cast_date(arg0)
13000 };
13001 Ok(Expression::Function(Box::new(Function::new(
13002 "DATE_DIFF".to_string(),
13003 vec![Expression::string("DAY"), cast_d0, cast_d1],
13004 ))))
13005 }
13006 DialectType::Presto
13007 | DialectType::Trino
13008 | DialectType::Athena => {
13009 // For Hive/Spark source, apply double_cast_timestamp_date
13010 // For other sources (MySQL etc.), just swap args without casting
13011 if matches!(
13012 source,
13013 DialectType::Hive
13014 | DialectType::Spark
13015 | DialectType::Databricks
13016 ) {
13017 let cast_fn = |e: Expression| -> Expression {
13018 let (inner, was_to_date) = unwrap_to_date(e);
13019 if was_to_date {
13020 let first_cast =
13021 Self::double_cast_timestamp_date(inner);
13022 Self::double_cast_timestamp_date(first_cast)
13023 } else {
13024 Self::double_cast_timestamp_date(inner)
13025 }
13026 };
13027 Ok(Expression::Function(Box::new(Function::new(
13028 "DATE_DIFF".to_string(),
13029 vec![
13030 Expression::string("DAY"),
13031 cast_fn(arg1),
13032 cast_fn(arg0),
13033 ],
13034 ))))
13035 } else {
13036 Ok(Expression::Function(Box::new(Function::new(
13037 "DATE_DIFF".to_string(),
13038 vec![Expression::string("DAY"), arg1, arg0],
13039 ))))
13040 }
13041 }
13042 DialectType::Redshift => {
13043 let unit = Expression::Identifier(Identifier::new("DAY"));
13044 Ok(Expression::Function(Box::new(Function::new(
13045 "DATEDIFF".to_string(),
13046 vec![unit, arg1, arg0],
13047 ))))
13048 }
13049 _ => Ok(Expression::Function(Box::new(Function::new(
13050 "DATEDIFF".to_string(),
13051 vec![arg0, arg1],
13052 )))),
13053 }
13054 }
13055 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13056 "DATE_DIFF" if f.args.len() == 3 => {
13057 let mut args = f.args;
13058 let arg0 = args.remove(0);
13059 let arg1 = args.remove(0);
13060 let arg2 = args.remove(0);
13061 let unit_str = Self::get_unit_str_static(&arg0);
13062
13063 match target {
13064 DialectType::DuckDB => {
13065 // DuckDB: DATE_DIFF('UNIT', start, end)
13066 Ok(Expression::Function(Box::new(Function::new(
13067 "DATE_DIFF".to_string(),
13068 vec![Expression::string(&unit_str), arg1, arg2],
13069 ))))
13070 }
13071 DialectType::Presto
13072 | DialectType::Trino
13073 | DialectType::Athena => {
13074 Ok(Expression::Function(Box::new(Function::new(
13075 "DATE_DIFF".to_string(),
13076 vec![Expression::string(&unit_str), arg1, arg2],
13077 ))))
13078 }
13079 DialectType::ClickHouse => {
13080 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13081 let unit =
13082 Expression::Identifier(Identifier::new(&unit_str));
13083 Ok(Expression::Function(Box::new(Function::new(
13084 "DATE_DIFF".to_string(),
13085 vec![unit, arg1, arg2],
13086 ))))
13087 }
13088 DialectType::Snowflake | DialectType::Redshift => {
13089 let unit =
13090 Expression::Identifier(Identifier::new(&unit_str));
13091 Ok(Expression::Function(Box::new(Function::new(
13092 "DATEDIFF".to_string(),
13093 vec![unit, arg1, arg2],
13094 ))))
13095 }
13096 _ => {
13097 let unit =
13098 Expression::Identifier(Identifier::new(&unit_str));
13099 Ok(Expression::Function(Box::new(Function::new(
13100 "DATEDIFF".to_string(),
13101 vec![unit, arg1, arg2],
13102 ))))
13103 }
13104 }
13105 }
13106 // DATEADD(unit, val, date) - 3-arg form
13107 "DATEADD" if f.args.len() == 3 => {
13108 let mut args = f.args;
13109 let arg0 = args.remove(0);
13110 let arg1 = args.remove(0);
13111 let arg2 = args.remove(0);
13112 let unit_str = Self::get_unit_str_static(&arg0);
13113
13114 // Normalize TSQL unit abbreviations to standard names
13115 let unit_str = match unit_str.as_str() {
13116 "YY" | "YYYY" => "YEAR".to_string(),
13117 "QQ" | "Q" => "QUARTER".to_string(),
13118 "MM" | "M" => "MONTH".to_string(),
13119 "WK" | "WW" => "WEEK".to_string(),
13120 "DD" | "D" | "DY" => "DAY".to_string(),
13121 "HH" => "HOUR".to_string(),
13122 "MI" | "N" => "MINUTE".to_string(),
13123 "SS" | "S" => "SECOND".to_string(),
13124 "MS" => "MILLISECOND".to_string(),
13125 "MCS" | "US" => "MICROSECOND".to_string(),
13126 _ => unit_str,
13127 };
13128 match target {
13129 DialectType::Snowflake => {
13130 let unit =
13131 Expression::Identifier(Identifier::new(&unit_str));
13132 // Cast string literal to TIMESTAMP, but not for Snowflake source
13133 // (Snowflake natively accepts string literals in DATEADD)
13134 let arg2 = if matches!(
13135 &arg2,
13136 Expression::Literal(Literal::String(_))
13137 ) && !matches!(source, DialectType::Snowflake)
13138 {
13139 Expression::Cast(Box::new(Cast {
13140 this: arg2,
13141 to: DataType::Timestamp {
13142 precision: None,
13143 timezone: false,
13144 },
13145 trailing_comments: Vec::new(),
13146 double_colon_syntax: false,
13147 format: None,
13148 default: None,
13149 }))
13150 } else {
13151 arg2
13152 };
13153 Ok(Expression::Function(Box::new(Function::new(
13154 "DATEADD".to_string(),
13155 vec![unit, arg1, arg2],
13156 ))))
13157 }
13158 DialectType::TSQL => {
13159 let unit =
13160 Expression::Identifier(Identifier::new(&unit_str));
13161 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13162 let arg2 = if matches!(
13163 &arg2,
13164 Expression::Literal(Literal::String(_))
13165 ) && !matches!(
13166 source,
13167 DialectType::Spark
13168 | DialectType::Databricks
13169 | DialectType::Hive
13170 ) {
13171 Expression::Cast(Box::new(Cast {
13172 this: arg2,
13173 to: DataType::Custom {
13174 name: "DATETIME2".to_string(),
13175 },
13176 trailing_comments: Vec::new(),
13177 double_colon_syntax: false,
13178 format: None,
13179 default: None,
13180 }))
13181 } else {
13182 arg2
13183 };
13184 Ok(Expression::Function(Box::new(Function::new(
13185 "DATEADD".to_string(),
13186 vec![unit, arg1, arg2],
13187 ))))
13188 }
13189 DialectType::Redshift => {
13190 let unit =
13191 Expression::Identifier(Identifier::new(&unit_str));
13192 Ok(Expression::Function(Box::new(Function::new(
13193 "DATEADD".to_string(),
13194 vec![unit, arg1, arg2],
13195 ))))
13196 }
13197 DialectType::Databricks => {
13198 let unit =
13199 Expression::Identifier(Identifier::new(&unit_str));
13200 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13201 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13202 let func_name = if matches!(
13203 source,
13204 DialectType::TSQL
13205 | DialectType::Fabric
13206 | DialectType::Databricks
13207 | DialectType::Snowflake
13208 ) {
13209 "DATEADD"
13210 } else {
13211 "DATE_ADD"
13212 };
13213 Ok(Expression::Function(Box::new(Function::new(
13214 func_name.to_string(),
13215 vec![unit, arg1, arg2],
13216 ))))
13217 }
13218 DialectType::DuckDB => {
13219 // Special handling for NANOSECOND from Snowflake
13220 if unit_str == "NANOSECOND"
13221 && matches!(source, DialectType::Snowflake)
13222 {
13223 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13224 let cast_ts = Expression::Cast(Box::new(Cast {
13225 this: arg2,
13226 to: DataType::Custom {
13227 name: "TIMESTAMP_NS".to_string(),
13228 },
13229 trailing_comments: vec![],
13230 double_colon_syntax: false,
13231 format: None,
13232 default: None,
13233 }));
13234 let epoch_ns =
13235 Expression::Function(Box::new(Function::new(
13236 "EPOCH_NS".to_string(),
13237 vec![cast_ts],
13238 )));
13239 let sum = Expression::Add(Box::new(BinaryOp::new(
13240 epoch_ns, arg1,
13241 )));
13242 Ok(Expression::Function(Box::new(Function::new(
13243 "MAKE_TIMESTAMP_NS".to_string(),
13244 vec![sum],
13245 ))))
13246 } else {
13247 // DuckDB: convert to date + INTERVAL syntax with CAST
13248 let iu = Self::parse_interval_unit_static(&unit_str);
13249 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13250 this: Some(arg1),
13251 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13252 }));
13253 // Cast string literal to TIMESTAMP
13254 let arg2 = if matches!(
13255 &arg2,
13256 Expression::Literal(Literal::String(_))
13257 ) {
13258 Expression::Cast(Box::new(Cast {
13259 this: arg2,
13260 to: DataType::Timestamp {
13261 precision: None,
13262 timezone: false,
13263 },
13264 trailing_comments: Vec::new(),
13265 double_colon_syntax: false,
13266 format: None,
13267 default: None,
13268 }))
13269 } else {
13270 arg2
13271 };
13272 Ok(Expression::Add(Box::new(
13273 crate::expressions::BinaryOp::new(arg2, interval),
13274 )))
13275 }
13276 }
13277 DialectType::Spark => {
13278 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13279 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13280 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13281 {
13282 fn multiply_expr_spark(
13283 expr: Expression,
13284 factor: i64,
13285 ) -> Expression
13286 {
13287 if let Expression::Literal(
13288 crate::expressions::Literal::Number(n),
13289 ) = &expr
13290 {
13291 if let Ok(val) = n.parse::<i64>() {
13292 return Expression::Literal(
13293 crate::expressions::Literal::Number(
13294 (val * factor).to_string(),
13295 ),
13296 );
13297 }
13298 }
13299 Expression::Mul(Box::new(
13300 crate::expressions::BinaryOp::new(
13301 expr,
13302 Expression::Literal(
13303 crate::expressions::Literal::Number(
13304 factor.to_string(),
13305 ),
13306 ),
13307 ),
13308 ))
13309 }
13310 let normalized_unit = match unit_str.as_str() {
13311 "YEAR" | "YY" | "YYYY" => "YEAR",
13312 "QUARTER" | "QQ" | "Q" => "QUARTER",
13313 "MONTH" | "MM" | "M" => "MONTH",
13314 "WEEK" | "WK" | "WW" => "WEEK",
13315 "DAY" | "DD" | "D" | "DY" => "DAY",
13316 _ => &unit_str,
13317 };
13318 match normalized_unit {
13319 "YEAR" => {
13320 let months = multiply_expr_spark(arg1, 12);
13321 Ok(Expression::Function(Box::new(
13322 Function::new(
13323 "ADD_MONTHS".to_string(),
13324 vec![arg2, months],
13325 ),
13326 )))
13327 }
13328 "QUARTER" => {
13329 let months = multiply_expr_spark(arg1, 3);
13330 Ok(Expression::Function(Box::new(
13331 Function::new(
13332 "ADD_MONTHS".to_string(),
13333 vec![arg2, months],
13334 ),
13335 )))
13336 }
13337 "MONTH" => Ok(Expression::Function(Box::new(
13338 Function::new(
13339 "ADD_MONTHS".to_string(),
13340 vec![arg2, arg1],
13341 ),
13342 ))),
13343 "WEEK" => {
13344 let days = multiply_expr_spark(arg1, 7);
13345 Ok(Expression::Function(Box::new(
13346 Function::new(
13347 "DATE_ADD".to_string(),
13348 vec![arg2, days],
13349 ),
13350 )))
13351 }
13352 "DAY" => Ok(Expression::Function(Box::new(
13353 Function::new(
13354 "DATE_ADD".to_string(),
13355 vec![arg2, arg1],
13356 ),
13357 ))),
13358 _ => {
13359 let unit = Expression::Identifier(
13360 Identifier::new(&unit_str),
13361 );
13362 Ok(Expression::Function(Box::new(
13363 Function::new(
13364 "DATE_ADD".to_string(),
13365 vec![unit, arg1, arg2],
13366 ),
13367 )))
13368 }
13369 }
13370 } else {
13371 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13372 let unit =
13373 Expression::Identifier(Identifier::new(&unit_str));
13374 Ok(Expression::Function(Box::new(Function::new(
13375 "DATE_ADD".to_string(),
13376 vec![unit, arg1, arg2],
13377 ))))
13378 }
13379 }
13380 DialectType::Hive => match unit_str.as_str() {
13381 "MONTH" => {
13382 Ok(Expression::Function(Box::new(Function::new(
13383 "ADD_MONTHS".to_string(),
13384 vec![arg2, arg1],
13385 ))))
13386 }
13387 _ => Ok(Expression::Function(Box::new(Function::new(
13388 "DATE_ADD".to_string(),
13389 vec![arg2, arg1],
13390 )))),
13391 },
13392 DialectType::Presto
13393 | DialectType::Trino
13394 | DialectType::Athena => {
13395 // Cast string literal date to TIMESTAMP
13396 let arg2 = if matches!(
13397 &arg2,
13398 Expression::Literal(Literal::String(_))
13399 ) {
13400 Expression::Cast(Box::new(Cast {
13401 this: arg2,
13402 to: DataType::Timestamp {
13403 precision: None,
13404 timezone: false,
13405 },
13406 trailing_comments: Vec::new(),
13407 double_colon_syntax: false,
13408 format: None,
13409 default: None,
13410 }))
13411 } else {
13412 arg2
13413 };
13414 Ok(Expression::Function(Box::new(Function::new(
13415 "DATE_ADD".to_string(),
13416 vec![Expression::string(&unit_str), arg1, arg2],
13417 ))))
13418 }
13419 DialectType::MySQL => {
13420 let iu = Self::parse_interval_unit_static(&unit_str);
13421 Ok(Expression::DateAdd(Box::new(
13422 crate::expressions::DateAddFunc {
13423 this: arg2,
13424 interval: arg1,
13425 unit: iu,
13426 },
13427 )))
13428 }
13429 DialectType::PostgreSQL => {
13430 // Cast string literal date to TIMESTAMP
13431 let arg2 = if matches!(
13432 &arg2,
13433 Expression::Literal(Literal::String(_))
13434 ) {
13435 Expression::Cast(Box::new(Cast {
13436 this: arg2,
13437 to: DataType::Timestamp {
13438 precision: None,
13439 timezone: false,
13440 },
13441 trailing_comments: Vec::new(),
13442 double_colon_syntax: false,
13443 format: None,
13444 default: None,
13445 }))
13446 } else {
13447 arg2
13448 };
13449 let interval = Expression::Interval(Box::new(
13450 crate::expressions::Interval {
13451 this: Some(Expression::string(&format!(
13452 "{} {}",
13453 Self::expr_to_string_static(&arg1),
13454 unit_str
13455 ))),
13456 unit: None,
13457 },
13458 ));
13459 Ok(Expression::Add(Box::new(
13460 crate::expressions::BinaryOp::new(arg2, interval),
13461 )))
13462 }
13463 DialectType::BigQuery => {
13464 let iu = Self::parse_interval_unit_static(&unit_str);
13465 let interval = Expression::Interval(Box::new(
13466 crate::expressions::Interval {
13467 this: Some(arg1),
13468 unit: Some(
13469 crate::expressions::IntervalUnitSpec::Simple {
13470 unit: iu,
13471 use_plural: false,
13472 },
13473 ),
13474 },
13475 ));
13476 // Non-TSQL sources: CAST string literal to DATETIME
13477 let arg2 = if !matches!(
13478 source,
13479 DialectType::TSQL | DialectType::Fabric
13480 ) && matches!(
13481 &arg2,
13482 Expression::Literal(Literal::String(_))
13483 ) {
13484 Expression::Cast(Box::new(Cast {
13485 this: arg2,
13486 to: DataType::Custom {
13487 name: "DATETIME".to_string(),
13488 },
13489 trailing_comments: Vec::new(),
13490 double_colon_syntax: false,
13491 format: None,
13492 default: None,
13493 }))
13494 } else {
13495 arg2
13496 };
13497 Ok(Expression::Function(Box::new(Function::new(
13498 "DATE_ADD".to_string(),
13499 vec![arg2, interval],
13500 ))))
13501 }
13502 _ => {
13503 let unit =
13504 Expression::Identifier(Identifier::new(&unit_str));
13505 Ok(Expression::Function(Box::new(Function::new(
13506 "DATEADD".to_string(),
13507 vec![unit, arg1, arg2],
13508 ))))
13509 }
13510 }
13511 }
13512 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13513 // or (date, val, 'UNIT') from Generic canonical form
13514 "DATE_ADD" if f.args.len() == 3 => {
13515 let mut args = f.args;
13516 let arg0 = args.remove(0);
13517 let arg1 = args.remove(0);
13518 let arg2 = args.remove(0);
13519 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13520 // where arg2 is a string literal matching a unit name
13521 let arg2_unit = match &arg2 {
13522 Expression::Literal(Literal::String(s)) => {
13523 let u = s.to_uppercase();
13524 if matches!(
13525 u.as_str(),
13526 "DAY"
13527 | "MONTH"
13528 | "YEAR"
13529 | "HOUR"
13530 | "MINUTE"
13531 | "SECOND"
13532 | "WEEK"
13533 | "QUARTER"
13534 | "MILLISECOND"
13535 | "MICROSECOND"
13536 ) {
13537 Some(u)
13538 } else {
13539 None
13540 }
13541 }
13542 _ => None,
13543 };
13544 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13545 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13546 (u, arg1, arg0)
13547 } else {
13548 (Self::get_unit_str_static(&arg0), arg1, arg2)
13549 };
13550 // Alias for backward compat with the rest of the match
13551 let arg1 = val;
13552 let arg2 = date;
13553
13554 match target {
13555 DialectType::Presto
13556 | DialectType::Trino
13557 | DialectType::Athena => {
13558 Ok(Expression::Function(Box::new(Function::new(
13559 "DATE_ADD".to_string(),
13560 vec![Expression::string(&unit_str), arg1, arg2],
13561 ))))
13562 }
13563 DialectType::DuckDB => {
13564 let iu = Self::parse_interval_unit_static(&unit_str);
13565 let interval = Expression::Interval(Box::new(
13566 crate::expressions::Interval {
13567 this: Some(arg1),
13568 unit: Some(
13569 crate::expressions::IntervalUnitSpec::Simple {
13570 unit: iu,
13571 use_plural: false,
13572 },
13573 ),
13574 },
13575 ));
13576 Ok(Expression::Add(Box::new(
13577 crate::expressions::BinaryOp::new(arg2, interval),
13578 )))
13579 }
13580 DialectType::PostgreSQL
13581 | DialectType::Materialize
13582 | DialectType::RisingWave => {
13583 // PostgreSQL: x + INTERVAL '1 DAY'
13584 let amount_str = Self::expr_to_string_static(&arg1);
13585 let interval = Expression::Interval(Box::new(
13586 crate::expressions::Interval {
13587 this: Some(Expression::string(&format!(
13588 "{} {}",
13589 amount_str, unit_str
13590 ))),
13591 unit: None,
13592 },
13593 ));
13594 Ok(Expression::Add(Box::new(
13595 crate::expressions::BinaryOp::new(arg2, interval),
13596 )))
13597 }
13598 DialectType::Snowflake
13599 | DialectType::TSQL
13600 | DialectType::Redshift => {
13601 let unit =
13602 Expression::Identifier(Identifier::new(&unit_str));
13603 Ok(Expression::Function(Box::new(Function::new(
13604 "DATEADD".to_string(),
13605 vec![unit, arg1, arg2],
13606 ))))
13607 }
13608 DialectType::BigQuery
13609 | DialectType::MySQL
13610 | DialectType::Doris
13611 | DialectType::StarRocks
13612 | DialectType::Drill => {
13613 // DATE_ADD(date, INTERVAL amount UNIT)
13614 let iu = Self::parse_interval_unit_static(&unit_str);
13615 let interval = Expression::Interval(Box::new(
13616 crate::expressions::Interval {
13617 this: Some(arg1),
13618 unit: Some(
13619 crate::expressions::IntervalUnitSpec::Simple {
13620 unit: iu,
13621 use_plural: false,
13622 },
13623 ),
13624 },
13625 ));
13626 Ok(Expression::Function(Box::new(Function::new(
13627 "DATE_ADD".to_string(),
13628 vec![arg2, interval],
13629 ))))
13630 }
13631 DialectType::SQLite => {
13632 // SQLite: DATE(x, '1 DAY')
13633 // Build the string '1 DAY' from amount and unit
13634 let amount_str = match &arg1 {
13635 Expression::Literal(Literal::Number(n)) => n.clone(),
13636 _ => "1".to_string(),
13637 };
13638 Ok(Expression::Function(Box::new(Function::new(
13639 "DATE".to_string(),
13640 vec![
13641 arg2,
13642 Expression::string(format!(
13643 "{} {}",
13644 amount_str, unit_str
13645 )),
13646 ],
13647 ))))
13648 }
13649 DialectType::Dremio => {
13650 // Dremio: DATE_ADD(date, amount) - drops unit
13651 Ok(Expression::Function(Box::new(Function::new(
13652 "DATE_ADD".to_string(),
13653 vec![arg2, arg1],
13654 ))))
13655 }
13656 DialectType::Spark => {
13657 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13658 if unit_str == "DAY" {
13659 Ok(Expression::Function(Box::new(Function::new(
13660 "DATE_ADD".to_string(),
13661 vec![arg2, arg1],
13662 ))))
13663 } else {
13664 let unit =
13665 Expression::Identifier(Identifier::new(&unit_str));
13666 Ok(Expression::Function(Box::new(Function::new(
13667 "DATE_ADD".to_string(),
13668 vec![unit, arg1, arg2],
13669 ))))
13670 }
13671 }
13672 DialectType::Databricks => {
13673 let unit =
13674 Expression::Identifier(Identifier::new(&unit_str));
13675 Ok(Expression::Function(Box::new(Function::new(
13676 "DATE_ADD".to_string(),
13677 vec![unit, arg1, arg2],
13678 ))))
13679 }
13680 DialectType::Hive => {
13681 // Hive: DATE_ADD(date, val) for DAY
13682 Ok(Expression::Function(Box::new(Function::new(
13683 "DATE_ADD".to_string(),
13684 vec![arg2, arg1],
13685 ))))
13686 }
13687 _ => {
13688 let unit =
13689 Expression::Identifier(Identifier::new(&unit_str));
13690 Ok(Expression::Function(Box::new(Function::new(
13691 "DATE_ADD".to_string(),
13692 vec![unit, arg1, arg2],
13693 ))))
13694 }
13695 }
13696 }
13697 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13698 "DATE_ADD"
13699 if f.args.len() == 2
13700 && matches!(
13701 source,
13702 DialectType::Hive
13703 | DialectType::Spark
13704 | DialectType::Databricks
13705 | DialectType::Generic
13706 ) =>
13707 {
13708 let mut args = f.args;
13709 let date = args.remove(0);
13710 let days = args.remove(0);
13711 match target {
13712 DialectType::Hive | DialectType::Spark => {
13713 // Keep as DATE_ADD(date, days) for Hive/Spark
13714 Ok(Expression::Function(Box::new(Function::new(
13715 "DATE_ADD".to_string(),
13716 vec![date, days],
13717 ))))
13718 }
13719 DialectType::Databricks => {
13720 // Databricks: DATEADD(DAY, days, date)
13721 Ok(Expression::Function(Box::new(Function::new(
13722 "DATEADD".to_string(),
13723 vec![
13724 Expression::Identifier(Identifier::new("DAY")),
13725 days,
13726 date,
13727 ],
13728 ))))
13729 }
13730 DialectType::DuckDB => {
13731 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13732 let cast_date = Self::ensure_cast_date(date);
13733 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13734 let interval_val = if matches!(
13735 days,
13736 Expression::Mul(_)
13737 | Expression::Sub(_)
13738 | Expression::Add(_)
13739 ) {
13740 Expression::Paren(Box::new(crate::expressions::Paren {
13741 this: days,
13742 trailing_comments: vec![],
13743 }))
13744 } else {
13745 days
13746 };
13747 let interval = Expression::Interval(Box::new(
13748 crate::expressions::Interval {
13749 this: Some(interval_val),
13750 unit: Some(
13751 crate::expressions::IntervalUnitSpec::Simple {
13752 unit: crate::expressions::IntervalUnit::Day,
13753 use_plural: false,
13754 },
13755 ),
13756 },
13757 ));
13758 Ok(Expression::Add(Box::new(
13759 crate::expressions::BinaryOp::new(cast_date, interval),
13760 )))
13761 }
13762 DialectType::Snowflake => {
13763 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13764 let cast_date = if matches!(
13765 source,
13766 DialectType::Hive
13767 | DialectType::Spark
13768 | DialectType::Databricks
13769 ) {
13770 if matches!(
13771 date,
13772 Expression::Literal(Literal::String(_))
13773 ) {
13774 Self::double_cast_timestamp_date(date)
13775 } else {
13776 date
13777 }
13778 } else {
13779 date
13780 };
13781 Ok(Expression::Function(Box::new(Function::new(
13782 "DATEADD".to_string(),
13783 vec![
13784 Expression::Identifier(Identifier::new("DAY")),
13785 days,
13786 cast_date,
13787 ],
13788 ))))
13789 }
13790 DialectType::Redshift => {
13791 Ok(Expression::Function(Box::new(Function::new(
13792 "DATEADD".to_string(),
13793 vec![
13794 Expression::Identifier(Identifier::new("DAY")),
13795 days,
13796 date,
13797 ],
13798 ))))
13799 }
13800 DialectType::TSQL | DialectType::Fabric => {
13801 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
13802 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
13803 let cast_date = if matches!(
13804 source,
13805 DialectType::Hive | DialectType::Spark
13806 ) {
13807 if matches!(
13808 date,
13809 Expression::Literal(Literal::String(_))
13810 ) {
13811 Self::double_cast_datetime2_date(date)
13812 } else {
13813 date
13814 }
13815 } else {
13816 date
13817 };
13818 Ok(Expression::Function(Box::new(Function::new(
13819 "DATEADD".to_string(),
13820 vec![
13821 Expression::Identifier(Identifier::new("DAY")),
13822 days,
13823 cast_date,
13824 ],
13825 ))))
13826 }
13827 DialectType::Presto
13828 | DialectType::Trino
13829 | DialectType::Athena => {
13830 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13831 let cast_date = if matches!(
13832 source,
13833 DialectType::Hive
13834 | DialectType::Spark
13835 | DialectType::Databricks
13836 ) {
13837 if matches!(
13838 date,
13839 Expression::Literal(Literal::String(_))
13840 ) {
13841 Self::double_cast_timestamp_date(date)
13842 } else {
13843 date
13844 }
13845 } else {
13846 date
13847 };
13848 Ok(Expression::Function(Box::new(Function::new(
13849 "DATE_ADD".to_string(),
13850 vec![Expression::string("DAY"), days, cast_date],
13851 ))))
13852 }
13853 DialectType::BigQuery => {
13854 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
13855 let cast_date = if matches!(
13856 source,
13857 DialectType::Hive
13858 | DialectType::Spark
13859 | DialectType::Databricks
13860 ) {
13861 Self::double_cast_datetime_date(date)
13862 } else {
13863 date
13864 };
13865 // Wrap complex expressions in Paren for interval
13866 let interval_val = if matches!(
13867 days,
13868 Expression::Mul(_)
13869 | Expression::Sub(_)
13870 | Expression::Add(_)
13871 ) {
13872 Expression::Paren(Box::new(crate::expressions::Paren {
13873 this: days,
13874 trailing_comments: vec![],
13875 }))
13876 } else {
13877 days
13878 };
13879 let interval = Expression::Interval(Box::new(
13880 crate::expressions::Interval {
13881 this: Some(interval_val),
13882 unit: Some(
13883 crate::expressions::IntervalUnitSpec::Simple {
13884 unit: crate::expressions::IntervalUnit::Day,
13885 use_plural: false,
13886 },
13887 ),
13888 },
13889 ));
13890 Ok(Expression::Function(Box::new(Function::new(
13891 "DATE_ADD".to_string(),
13892 vec![cast_date, interval],
13893 ))))
13894 }
13895 DialectType::MySQL => {
13896 let iu = crate::expressions::IntervalUnit::Day;
13897 Ok(Expression::DateAdd(Box::new(
13898 crate::expressions::DateAddFunc {
13899 this: date,
13900 interval: days,
13901 unit: iu,
13902 },
13903 )))
13904 }
13905 DialectType::PostgreSQL => {
13906 let interval = Expression::Interval(Box::new(
13907 crate::expressions::Interval {
13908 this: Some(Expression::string(&format!(
13909 "{} DAY",
13910 Self::expr_to_string_static(&days)
13911 ))),
13912 unit: None,
13913 },
13914 ));
13915 Ok(Expression::Add(Box::new(
13916 crate::expressions::BinaryOp::new(date, interval),
13917 )))
13918 }
13919 DialectType::Doris
13920 | DialectType::StarRocks
13921 | DialectType::Drill => {
13922 // DATE_ADD(date, INTERVAL days DAY)
13923 let interval = Expression::Interval(Box::new(
13924 crate::expressions::Interval {
13925 this: Some(days),
13926 unit: Some(
13927 crate::expressions::IntervalUnitSpec::Simple {
13928 unit: crate::expressions::IntervalUnit::Day,
13929 use_plural: false,
13930 },
13931 ),
13932 },
13933 ));
13934 Ok(Expression::Function(Box::new(Function::new(
13935 "DATE_ADD".to_string(),
13936 vec![date, interval],
13937 ))))
13938 }
13939 _ => Ok(Expression::Function(Box::new(Function::new(
13940 "DATE_ADD".to_string(),
13941 vec![date, days],
13942 )))),
13943 }
13944 }
13945 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
13946 "DATE_SUB"
13947 if f.args.len() == 2
13948 && matches!(
13949 source,
13950 DialectType::Hive
13951 | DialectType::Spark
13952 | DialectType::Databricks
13953 ) =>
13954 {
13955 let mut args = f.args;
13956 let date = args.remove(0);
13957 let days = args.remove(0);
13958 // Helper to create days * -1
13959 let make_neg_days = |d: Expression| -> Expression {
13960 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
13961 d,
13962 Expression::Literal(Literal::Number("-1".to_string())),
13963 )))
13964 };
13965 let is_string_literal =
13966 matches!(date, Expression::Literal(Literal::String(_)));
13967 match target {
13968 DialectType::Hive
13969 | DialectType::Spark
13970 | DialectType::Databricks => {
13971 // Keep as DATE_SUB(date, days) for Hive/Spark
13972 Ok(Expression::Function(Box::new(Function::new(
13973 "DATE_SUB".to_string(),
13974 vec![date, days],
13975 ))))
13976 }
13977 DialectType::DuckDB => {
13978 let cast_date = Self::ensure_cast_date(date);
13979 let neg = make_neg_days(days);
13980 let interval = Expression::Interval(Box::new(
13981 crate::expressions::Interval {
13982 this: Some(Expression::Paren(Box::new(
13983 crate::expressions::Paren {
13984 this: neg,
13985 trailing_comments: vec![],
13986 },
13987 ))),
13988 unit: Some(
13989 crate::expressions::IntervalUnitSpec::Simple {
13990 unit: crate::expressions::IntervalUnit::Day,
13991 use_plural: false,
13992 },
13993 ),
13994 },
13995 ));
13996 Ok(Expression::Add(Box::new(
13997 crate::expressions::BinaryOp::new(cast_date, interval),
13998 )))
13999 }
14000 DialectType::Snowflake => {
14001 let cast_date = if is_string_literal {
14002 Self::double_cast_timestamp_date(date)
14003 } else {
14004 date
14005 };
14006 let neg = make_neg_days(days);
14007 Ok(Expression::Function(Box::new(Function::new(
14008 "DATEADD".to_string(),
14009 vec![
14010 Expression::Identifier(Identifier::new("DAY")),
14011 neg,
14012 cast_date,
14013 ],
14014 ))))
14015 }
14016 DialectType::Redshift => {
14017 let neg = make_neg_days(days);
14018 Ok(Expression::Function(Box::new(Function::new(
14019 "DATEADD".to_string(),
14020 vec![
14021 Expression::Identifier(Identifier::new("DAY")),
14022 neg,
14023 date,
14024 ],
14025 ))))
14026 }
14027 DialectType::TSQL | DialectType::Fabric => {
14028 let cast_date = if is_string_literal {
14029 Self::double_cast_datetime2_date(date)
14030 } else {
14031 date
14032 };
14033 let neg = make_neg_days(days);
14034 Ok(Expression::Function(Box::new(Function::new(
14035 "DATEADD".to_string(),
14036 vec![
14037 Expression::Identifier(Identifier::new("DAY")),
14038 neg,
14039 cast_date,
14040 ],
14041 ))))
14042 }
14043 DialectType::Presto
14044 | DialectType::Trino
14045 | DialectType::Athena => {
14046 let cast_date = if is_string_literal {
14047 Self::double_cast_timestamp_date(date)
14048 } else {
14049 date
14050 };
14051 let neg = make_neg_days(days);
14052 Ok(Expression::Function(Box::new(Function::new(
14053 "DATE_ADD".to_string(),
14054 vec![Expression::string("DAY"), neg, cast_date],
14055 ))))
14056 }
14057 DialectType::BigQuery => {
14058 let cast_date = if is_string_literal {
14059 Self::double_cast_datetime_date(date)
14060 } else {
14061 date
14062 };
14063 let neg = make_neg_days(days);
14064 let interval = Expression::Interval(Box::new(
14065 crate::expressions::Interval {
14066 this: Some(Expression::Paren(Box::new(
14067 crate::expressions::Paren {
14068 this: neg,
14069 trailing_comments: vec![],
14070 },
14071 ))),
14072 unit: Some(
14073 crate::expressions::IntervalUnitSpec::Simple {
14074 unit: crate::expressions::IntervalUnit::Day,
14075 use_plural: false,
14076 },
14077 ),
14078 },
14079 ));
14080 Ok(Expression::Function(Box::new(Function::new(
14081 "DATE_ADD".to_string(),
14082 vec![cast_date, interval],
14083 ))))
14084 }
14085 _ => Ok(Expression::Function(Box::new(Function::new(
14086 "DATE_SUB".to_string(),
14087 vec![date, days],
14088 )))),
14089 }
14090 }
14091 // ADD_MONTHS(date, val) -> target-specific
14092 "ADD_MONTHS" if f.args.len() == 2 => {
14093 let mut args = f.args;
14094 let date = args.remove(0);
14095 let val = args.remove(0);
14096 match target {
14097 DialectType::TSQL => {
14098 let cast_date = Self::ensure_cast_datetime2(date);
14099 Ok(Expression::Function(Box::new(Function::new(
14100 "DATEADD".to_string(),
14101 vec![
14102 Expression::Identifier(Identifier::new("MONTH")),
14103 val,
14104 cast_date,
14105 ],
14106 ))))
14107 }
14108 DialectType::DuckDB => {
14109 let interval = Expression::Interval(Box::new(
14110 crate::expressions::Interval {
14111 this: Some(val),
14112 unit: Some(
14113 crate::expressions::IntervalUnitSpec::Simple {
14114 unit:
14115 crate::expressions::IntervalUnit::Month,
14116 use_plural: false,
14117 },
14118 ),
14119 },
14120 ));
14121 Ok(Expression::Add(Box::new(
14122 crate::expressions::BinaryOp::new(date, interval),
14123 )))
14124 }
14125 DialectType::Snowflake => {
14126 // Keep ADD_MONTHS when source is Snowflake
14127 if matches!(source, DialectType::Snowflake) {
14128 Ok(Expression::Function(Box::new(Function::new(
14129 "ADD_MONTHS".to_string(),
14130 vec![date, val],
14131 ))))
14132 } else {
14133 Ok(Expression::Function(Box::new(Function::new(
14134 "DATEADD".to_string(),
14135 vec![
14136 Expression::Identifier(Identifier::new(
14137 "MONTH",
14138 )),
14139 val,
14140 date,
14141 ],
14142 ))))
14143 }
14144 }
14145 DialectType::Redshift => {
14146 Ok(Expression::Function(Box::new(Function::new(
14147 "DATEADD".to_string(),
14148 vec![
14149 Expression::Identifier(Identifier::new("MONTH")),
14150 val,
14151 date,
14152 ],
14153 ))))
14154 }
14155 DialectType::Presto
14156 | DialectType::Trino
14157 | DialectType::Athena => {
14158 Ok(Expression::Function(Box::new(Function::new(
14159 "DATE_ADD".to_string(),
14160 vec![Expression::string("MONTH"), val, date],
14161 ))))
14162 }
14163 DialectType::BigQuery => {
14164 let interval = Expression::Interval(Box::new(
14165 crate::expressions::Interval {
14166 this: Some(val),
14167 unit: Some(
14168 crate::expressions::IntervalUnitSpec::Simple {
14169 unit:
14170 crate::expressions::IntervalUnit::Month,
14171 use_plural: false,
14172 },
14173 ),
14174 },
14175 ));
14176 Ok(Expression::Function(Box::new(Function::new(
14177 "DATE_ADD".to_string(),
14178 vec![date, interval],
14179 ))))
14180 }
14181 _ => Ok(Expression::Function(Box::new(Function::new(
14182 "ADD_MONTHS".to_string(),
14183 vec![date, val],
14184 )))),
14185 }
14186 }
14187 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14188 "DATETRUNC" if f.args.len() == 2 => {
14189 let mut args = f.args;
14190 let arg0 = args.remove(0);
14191 let arg1 = args.remove(0);
14192 let unit_str = Self::get_unit_str_static(&arg0);
14193 match target {
14194 DialectType::TSQL | DialectType::Fabric => {
14195 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14196 Ok(Expression::Function(Box::new(Function::new(
14197 "DATETRUNC".to_string(),
14198 vec![
14199 Expression::Identifier(Identifier::new(&unit_str)),
14200 arg1,
14201 ],
14202 ))))
14203 }
14204 DialectType::DuckDB => {
14205 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14206 let date = Self::ensure_cast_timestamp(arg1);
14207 Ok(Expression::Function(Box::new(Function::new(
14208 "DATE_TRUNC".to_string(),
14209 vec![Expression::string(&unit_str), date],
14210 ))))
14211 }
14212 DialectType::ClickHouse => {
14213 // ClickHouse: dateTrunc('UNIT', expr)
14214 Ok(Expression::Function(Box::new(Function::new(
14215 "dateTrunc".to_string(),
14216 vec![Expression::string(&unit_str), arg1],
14217 ))))
14218 }
14219 _ => {
14220 // Standard: DATE_TRUNC('UNIT', expr)
14221 let unit = Expression::string(&unit_str);
14222 Ok(Expression::Function(Box::new(Function::new(
14223 "DATE_TRUNC".to_string(),
14224 vec![unit, arg1],
14225 ))))
14226 }
14227 }
14228 }
14229 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14230 "GETDATE" if f.args.is_empty() => match target {
14231 DialectType::TSQL => Ok(Expression::Function(f)),
14232 DialectType::Redshift => Ok(Expression::Function(Box::new(
14233 Function::new("GETDATE".to_string(), vec![]),
14234 ))),
14235 _ => Ok(Expression::CurrentTimestamp(
14236 crate::expressions::CurrentTimestamp {
14237 precision: None,
14238 sysdate: false,
14239 },
14240 )),
14241 },
14242 // TO_HEX(x) / HEX(x) -> target-specific hex function
14243 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14244 let name = match target {
14245 DialectType::Presto | DialectType::Trino => "TO_HEX",
14246 DialectType::Spark
14247 | DialectType::Databricks
14248 | DialectType::Hive => "HEX",
14249 DialectType::DuckDB
14250 | DialectType::PostgreSQL
14251 | DialectType::Redshift => "TO_HEX",
14252 _ => &f.name,
14253 };
14254 Ok(Expression::Function(Box::new(Function::new(
14255 name.to_string(),
14256 f.args,
14257 ))))
14258 }
14259 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14260 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14261 match target {
14262 DialectType::BigQuery => {
14263 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14264 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14265 // because BigQuery MD5 returns BYTES, not hex string
14266 let arg = &f.args[0];
14267 let wrapped_arg = match arg {
14268 Expression::Function(inner_f)
14269 if inner_f.name.to_uppercase() == "MD5"
14270 || inner_f.name.to_uppercase() == "SHA1"
14271 || inner_f.name.to_uppercase() == "SHA256"
14272 || inner_f.name.to_uppercase() == "SHA512" =>
14273 {
14274 // Wrap hash function in TO_HEX for BigQuery
14275 Expression::Function(Box::new(Function::new(
14276 "TO_HEX".to_string(),
14277 vec![arg.clone()],
14278 )))
14279 }
14280 _ => f.args.into_iter().next().unwrap(),
14281 };
14282 Ok(Expression::Function(Box::new(Function::new(
14283 "FROM_HEX".to_string(),
14284 vec![wrapped_arg],
14285 ))))
14286 }
14287 _ => {
14288 let name = match target {
14289 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14290 DialectType::Spark
14291 | DialectType::Databricks
14292 | DialectType::Hive => "UNHEX",
14293 _ => &f.name,
14294 };
14295 Ok(Expression::Function(Box::new(Function::new(
14296 name.to_string(),
14297 f.args,
14298 ))))
14299 }
14300 }
14301 }
14302 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14303 "TO_UTF8" if f.args.len() == 1 => match target {
14304 DialectType::Spark | DialectType::Databricks => {
14305 let mut args = f.args;
14306 args.push(Expression::string("utf-8"));
14307 Ok(Expression::Function(Box::new(Function::new(
14308 "ENCODE".to_string(),
14309 args,
14310 ))))
14311 }
14312 _ => Ok(Expression::Function(f)),
14313 },
14314 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14315 "FROM_UTF8" if f.args.len() == 1 => match target {
14316 DialectType::Spark | DialectType::Databricks => {
14317 let mut args = f.args;
14318 args.push(Expression::string("utf-8"));
14319 Ok(Expression::Function(Box::new(Function::new(
14320 "DECODE".to_string(),
14321 args,
14322 ))))
14323 }
14324 _ => Ok(Expression::Function(f)),
14325 },
14326 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14327 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14328 let name = match target {
14329 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14330 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14331 DialectType::PostgreSQL | DialectType::Redshift => {
14332 "STARTS_WITH"
14333 }
14334 _ => &f.name,
14335 };
14336 Ok(Expression::Function(Box::new(Function::new(
14337 name.to_string(),
14338 f.args,
14339 ))))
14340 }
14341 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14342 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14343 let name = match target {
14344 DialectType::Presto
14345 | DialectType::Trino
14346 | DialectType::Athena => "APPROX_DISTINCT",
14347 _ => "APPROX_COUNT_DISTINCT",
14348 };
14349 Ok(Expression::Function(Box::new(Function::new(
14350 name.to_string(),
14351 f.args,
14352 ))))
14353 }
14354 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14355 "JSON_EXTRACT"
14356 if f.args.len() == 2
14357 && !matches!(source, DialectType::BigQuery)
14358 && matches!(
14359 target,
14360 DialectType::Spark
14361 | DialectType::Databricks
14362 | DialectType::Hive
14363 ) =>
14364 {
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "GET_JSON_OBJECT".to_string(),
14367 f.args,
14368 ))))
14369 }
14370 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14371 "JSON_EXTRACT"
14372 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14373 {
14374 let mut args = f.args;
14375 let path = args.remove(1);
14376 let this = args.remove(0);
14377 Ok(Expression::JsonExtract(Box::new(
14378 crate::expressions::JsonExtractFunc {
14379 this,
14380 path,
14381 returning: None,
14382 arrow_syntax: true,
14383 hash_arrow_syntax: false,
14384 wrapper_option: None,
14385 quotes_option: None,
14386 on_scalar_string: false,
14387 on_error: None,
14388 },
14389 )))
14390 }
14391 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14392 "JSON_FORMAT" if f.args.len() == 1 => {
14393 match target {
14394 DialectType::Spark | DialectType::Databricks => {
14395 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14396 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14397 if matches!(
14398 source,
14399 DialectType::Presto
14400 | DialectType::Trino
14401 | DialectType::Athena
14402 ) {
14403 if let Some(Expression::ParseJson(pj)) = f.args.first()
14404 {
14405 if let Expression::Literal(Literal::String(s)) =
14406 &pj.this
14407 {
14408 let wrapped = Expression::Literal(
14409 Literal::String(format!("[{}]", s)),
14410 );
14411 let schema_of_json = Expression::Function(
14412 Box::new(Function::new(
14413 "SCHEMA_OF_JSON".to_string(),
14414 vec![wrapped.clone()],
14415 )),
14416 );
14417 let from_json = Expression::Function(Box::new(
14418 Function::new(
14419 "FROM_JSON".to_string(),
14420 vec![wrapped, schema_of_json],
14421 ),
14422 ));
14423 let to_json = Expression::Function(Box::new(
14424 Function::new(
14425 "TO_JSON".to_string(),
14426 vec![from_json],
14427 ),
14428 ));
14429 return Ok(Expression::Function(Box::new(
14430 Function::new(
14431 "REGEXP_EXTRACT".to_string(),
14432 vec![
14433 to_json,
14434 Expression::Literal(
14435 Literal::String(
14436 "^.(.*).$".to_string(),
14437 ),
14438 ),
14439 Expression::Literal(
14440 Literal::Number(
14441 "1".to_string(),
14442 ),
14443 ),
14444 ],
14445 ),
14446 )));
14447 }
14448 }
14449 }
14450
14451 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14452 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14453 let mut args = f.args;
14454 if let Some(Expression::Cast(ref c)) = args.first() {
14455 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14456 args = vec![c.this.clone()];
14457 }
14458 } else if let Some(Expression::Function(ref inner_f)) =
14459 args.first()
14460 {
14461 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14462 && inner_f.args.len() == 1
14463 {
14464 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14465 args = inner_f.args.clone();
14466 }
14467 }
14468 Ok(Expression::Function(Box::new(Function::new(
14469 "TO_JSON".to_string(),
14470 args,
14471 ))))
14472 }
14473 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14474 Function::new("TO_JSON_STRING".to_string(), f.args),
14475 ))),
14476 DialectType::DuckDB => {
14477 // CAST(TO_JSON(x) AS TEXT)
14478 let to_json = Expression::Function(Box::new(
14479 Function::new("TO_JSON".to_string(), f.args),
14480 ));
14481 Ok(Expression::Cast(Box::new(Cast {
14482 this: to_json,
14483 to: DataType::Text,
14484 trailing_comments: Vec::new(),
14485 double_colon_syntax: false,
14486 format: None,
14487 default: None,
14488 })))
14489 }
14490 _ => Ok(Expression::Function(f)),
14491 }
14492 }
14493 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14494 "SYSDATE" if f.args.is_empty() => {
14495 match target {
14496 DialectType::Oracle | DialectType::Redshift => {
14497 Ok(Expression::Function(f))
14498 }
14499 DialectType::Snowflake => {
14500 // Snowflake uses SYSDATE() with parens
14501 let mut f = *f;
14502 f.no_parens = false;
14503 Ok(Expression::Function(Box::new(f)))
14504 }
14505 DialectType::DuckDB => {
14506 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14507 Ok(Expression::AtTimeZone(Box::new(
14508 crate::expressions::AtTimeZone {
14509 this: Expression::CurrentTimestamp(
14510 crate::expressions::CurrentTimestamp {
14511 precision: None,
14512 sysdate: false,
14513 },
14514 ),
14515 zone: Expression::Literal(Literal::String(
14516 "UTC".to_string(),
14517 )),
14518 },
14519 )))
14520 }
14521 _ => Ok(Expression::CurrentTimestamp(
14522 crate::expressions::CurrentTimestamp {
14523 precision: None,
14524 sysdate: true,
14525 },
14526 )),
14527 }
14528 }
14529 // LOGICAL_OR(x) -> BOOL_OR(x)
14530 "LOGICAL_OR" if f.args.len() == 1 => {
14531 let name = match target {
14532 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14533 _ => &f.name,
14534 };
14535 Ok(Expression::Function(Box::new(Function::new(
14536 name.to_string(),
14537 f.args,
14538 ))))
14539 }
14540 // LOGICAL_AND(x) -> BOOL_AND(x)
14541 "LOGICAL_AND" if f.args.len() == 1 => {
14542 let name = match target {
14543 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14544 _ => &f.name,
14545 };
14546 Ok(Expression::Function(Box::new(Function::new(
14547 name.to_string(),
14548 f.args,
14549 ))))
14550 }
14551 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14552 "MONTHS_ADD" if f.args.len() == 2 => match target {
14553 DialectType::Oracle => Ok(Expression::Function(Box::new(
14554 Function::new("ADD_MONTHS".to_string(), f.args),
14555 ))),
14556 _ => Ok(Expression::Function(f)),
14557 },
14558 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14559 "ARRAY_JOIN" if f.args.len() >= 2 => {
14560 match target {
14561 DialectType::Spark | DialectType::Databricks => {
14562 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14563 Ok(Expression::Function(f))
14564 }
14565 DialectType::Hive => {
14566 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14567 let mut args = f.args;
14568 let arr = args.remove(0);
14569 let sep = args.remove(0);
14570 // Drop any remaining args (null_replacement)
14571 Ok(Expression::Function(Box::new(Function::new(
14572 "CONCAT_WS".to_string(),
14573 vec![sep, arr],
14574 ))))
14575 }
14576 DialectType::Presto | DialectType::Trino => {
14577 Ok(Expression::Function(f))
14578 }
14579 _ => Ok(Expression::Function(f)),
14580 }
14581 }
14582 // LOCATE(substr, str, pos) 3-arg -> target-specific
14583 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14584 "LOCATE"
14585 if f.args.len() == 3
14586 && matches!(
14587 target,
14588 DialectType::Presto
14589 | DialectType::Trino
14590 | DialectType::Athena
14591 | DialectType::DuckDB
14592 ) =>
14593 {
14594 let mut args = f.args;
14595 let substr = args.remove(0);
14596 let string = args.remove(0);
14597 let pos = args.remove(0);
14598 // STRPOS(SUBSTRING(string, pos), substr)
14599 let substring_call = Expression::Function(Box::new(Function::new(
14600 "SUBSTRING".to_string(),
14601 vec![string.clone(), pos.clone()],
14602 )));
14603 let strpos_call = Expression::Function(Box::new(Function::new(
14604 "STRPOS".to_string(),
14605 vec![substring_call, substr.clone()],
14606 )));
14607 // STRPOS(...) + pos - 1
14608 let pos_adjusted =
14609 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14610 Expression::Add(Box::new(
14611 crate::expressions::BinaryOp::new(
14612 strpos_call.clone(),
14613 pos.clone(),
14614 ),
14615 )),
14616 Expression::number(1),
14617 )));
14618 // STRPOS(...) = 0
14619 let is_zero =
14620 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14621 strpos_call.clone(),
14622 Expression::number(0),
14623 )));
14624
14625 match target {
14626 DialectType::Presto
14627 | DialectType::Trino
14628 | DialectType::Athena => {
14629 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14630 Ok(Expression::Function(Box::new(Function::new(
14631 "IF".to_string(),
14632 vec![is_zero, Expression::number(0), pos_adjusted],
14633 ))))
14634 }
14635 DialectType::DuckDB => {
14636 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14637 Ok(Expression::Case(Box::new(crate::expressions::Case {
14638 operand: None,
14639 whens: vec![(is_zero, Expression::number(0))],
14640 else_: Some(pos_adjusted),
14641 comments: Vec::new(),
14642 })))
14643 }
14644 _ => Ok(Expression::Function(Box::new(Function::new(
14645 "LOCATE".to_string(),
14646 vec![substr, string, pos],
14647 )))),
14648 }
14649 }
14650 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14651 "STRPOS"
14652 if f.args.len() == 3
14653 && matches!(
14654 target,
14655 DialectType::BigQuery
14656 | DialectType::Oracle
14657 | DialectType::Teradata
14658 ) =>
14659 {
14660 let mut args = f.args;
14661 let haystack = args.remove(0);
14662 let needle = args.remove(0);
14663 let occurrence = args.remove(0);
14664 Ok(Expression::Function(Box::new(Function::new(
14665 "INSTR".to_string(),
14666 vec![haystack, needle, Expression::number(1), occurrence],
14667 ))))
14668 }
14669 // SCHEMA_NAME(id) -> target-specific
14670 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14671 DialectType::MySQL | DialectType::SingleStore => {
14672 Ok(Expression::Function(Box::new(Function::new(
14673 "SCHEMA".to_string(),
14674 vec![],
14675 ))))
14676 }
14677 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14678 crate::expressions::CurrentSchema { this: None },
14679 ))),
14680 DialectType::SQLite => Ok(Expression::string("main")),
14681 _ => Ok(Expression::Function(f)),
14682 },
14683 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14684 "STRTOL" if f.args.len() == 2 => match target {
14685 DialectType::Presto | DialectType::Trino => {
14686 Ok(Expression::Function(Box::new(Function::new(
14687 "FROM_BASE".to_string(),
14688 f.args,
14689 ))))
14690 }
14691 _ => Ok(Expression::Function(f)),
14692 },
14693 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14694 "EDITDIST3" if f.args.len() == 2 => match target {
14695 DialectType::Spark | DialectType::Databricks => {
14696 Ok(Expression::Function(Box::new(Function::new(
14697 "LEVENSHTEIN".to_string(),
14698 f.args,
14699 ))))
14700 }
14701 _ => Ok(Expression::Function(f)),
14702 },
14703 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14704 "FORMAT"
14705 if f.args.len() == 2
14706 && matches!(
14707 source,
14708 DialectType::MySQL | DialectType::SingleStore
14709 )
14710 && matches!(target, DialectType::DuckDB) =>
14711 {
14712 let mut args = f.args;
14713 let num_expr = args.remove(0);
14714 let decimals_expr = args.remove(0);
14715 // Extract decimal count
14716 let dec_count = match &decimals_expr {
14717 Expression::Literal(Literal::Number(n)) => n.clone(),
14718 _ => "0".to_string(),
14719 };
14720 let fmt_str = format!("{{:,.{}f}}", dec_count);
14721 Ok(Expression::Function(Box::new(Function::new(
14722 "FORMAT".to_string(),
14723 vec![Expression::string(&fmt_str), num_expr],
14724 ))))
14725 }
14726 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14727 "FORMAT"
14728 if f.args.len() == 2
14729 && matches!(
14730 source,
14731 DialectType::TSQL | DialectType::Fabric
14732 ) =>
14733 {
14734 let val_expr = f.args[0].clone();
14735 let fmt_expr = f.args[1].clone();
14736 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14737 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14738 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14739 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14740 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14741 Expression::Literal(crate::expressions::Literal::String(s)) => {
14742 match s.as_str() {
14743 "m" | "M" => (Expression::string("MMMM d"), true),
14744 "t" => (Expression::string("h:mm tt"), true),
14745 "T" => (Expression::string("h:mm:ss tt"), true),
14746 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14747 _ => (fmt_expr.clone(), false),
14748 }
14749 }
14750 _ => (fmt_expr.clone(), false),
14751 };
14752 // Check if the format looks like a date format
14753 let is_date_format = is_shortcode
14754 || match &expanded_fmt {
14755 Expression::Literal(
14756 crate::expressions::Literal::String(s),
14757 ) => {
14758 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14759 s.contains("yyyy")
14760 || s.contains("YYYY")
14761 || s.contains("MM")
14762 || s.contains("dd")
14763 || s.contains("MMMM")
14764 || s.contains("HH")
14765 || s.contains("hh")
14766 || s.contains("ss")
14767 }
14768 _ => false,
14769 };
14770 match target {
14771 DialectType::Spark | DialectType::Databricks => {
14772 let func_name = if is_date_format {
14773 "DATE_FORMAT"
14774 } else {
14775 "FORMAT_NUMBER"
14776 };
14777 Ok(Expression::Function(Box::new(Function::new(
14778 func_name.to_string(),
14779 vec![val_expr, expanded_fmt],
14780 ))))
14781 }
14782 _ => {
14783 // For TSQL and other targets, expand shortcodes but keep FORMAT
14784 if is_shortcode {
14785 Ok(Expression::Function(Box::new(Function::new(
14786 "FORMAT".to_string(),
14787 vec![val_expr, expanded_fmt],
14788 ))))
14789 } else {
14790 Ok(Expression::Function(f))
14791 }
14792 }
14793 }
14794 }
14795 // FORMAT('%s', x) from Trino/Presto -> target-specific
14796 "FORMAT"
14797 if f.args.len() >= 2
14798 && matches!(
14799 source,
14800 DialectType::Trino
14801 | DialectType::Presto
14802 | DialectType::Athena
14803 ) =>
14804 {
14805 let fmt_expr = f.args[0].clone();
14806 let value_args: Vec<Expression> = f.args[1..].to_vec();
14807 match target {
14808 // DuckDB: replace %s with {} in format string
14809 DialectType::DuckDB => {
14810 let new_fmt = match &fmt_expr {
14811 Expression::Literal(Literal::String(s)) => {
14812 Expression::Literal(Literal::String(
14813 s.replace("%s", "{}"),
14814 ))
14815 }
14816 _ => fmt_expr,
14817 };
14818 let mut args = vec![new_fmt];
14819 args.extend(value_args);
14820 Ok(Expression::Function(Box::new(Function::new(
14821 "FORMAT".to_string(),
14822 args,
14823 ))))
14824 }
14825 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
14826 DialectType::Snowflake => match &fmt_expr {
14827 Expression::Literal(Literal::String(s))
14828 if s == "%s" && value_args.len() == 1 =>
14829 {
14830 Ok(Expression::Function(Box::new(Function::new(
14831 "TO_CHAR".to_string(),
14832 value_args,
14833 ))))
14834 }
14835 _ => Ok(Expression::Function(f)),
14836 },
14837 // Default: keep FORMAT as-is
14838 _ => Ok(Expression::Function(f)),
14839 }
14840 }
14841 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
14842 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
14843 if f.args.len() == 2 =>
14844 {
14845 match target {
14846 DialectType::PostgreSQL | DialectType::Redshift => {
14847 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
14848 let arr = f.args[0].clone();
14849 let needle = f.args[1].clone();
14850 // Convert [] to ARRAY[] for PostgreSQL
14851 let pg_arr = match arr {
14852 Expression::Array(a) => Expression::ArrayFunc(
14853 Box::new(crate::expressions::ArrayConstructor {
14854 expressions: a.expressions,
14855 bracket_notation: false,
14856 use_list_keyword: false,
14857 }),
14858 ),
14859 _ => arr,
14860 };
14861 // needle = ANY(arr) using the Any quantified expression
14862 let any_expr = Expression::Any(Box::new(
14863 crate::expressions::QuantifiedExpr {
14864 this: needle.clone(),
14865 subquery: pg_arr,
14866 op: Some(crate::expressions::QuantifiedOp::Eq),
14867 },
14868 ));
14869 let coalesce = Expression::Coalesce(Box::new(
14870 crate::expressions::VarArgFunc {
14871 expressions: vec![
14872 any_expr,
14873 Expression::Boolean(
14874 crate::expressions::BooleanLiteral {
14875 value: false,
14876 },
14877 ),
14878 ],
14879 original_name: None,
14880 },
14881 ));
14882 let is_null_check = Expression::IsNull(Box::new(
14883 crate::expressions::IsNull {
14884 this: needle,
14885 not: false,
14886 postfix_form: false,
14887 },
14888 ));
14889 Ok(Expression::Case(Box::new(Case {
14890 operand: None,
14891 whens: vec![(
14892 is_null_check,
14893 Expression::Null(crate::expressions::Null),
14894 )],
14895 else_: Some(coalesce),
14896 comments: Vec::new(),
14897 })))
14898 }
14899 _ => Ok(Expression::Function(Box::new(Function::new(
14900 "ARRAY_CONTAINS".to_string(),
14901 f.args,
14902 )))),
14903 }
14904 }
14905 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
14906 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
14907 match target {
14908 DialectType::PostgreSQL | DialectType::Redshift => {
14909 // arr1 && arr2 with ARRAY[] syntax
14910 let mut args = f.args;
14911 let arr1 = args.remove(0);
14912 let arr2 = args.remove(0);
14913 let pg_arr1 = match arr1 {
14914 Expression::Array(a) => Expression::ArrayFunc(
14915 Box::new(crate::expressions::ArrayConstructor {
14916 expressions: a.expressions,
14917 bracket_notation: false,
14918 use_list_keyword: false,
14919 }),
14920 ),
14921 _ => arr1,
14922 };
14923 let pg_arr2 = match arr2 {
14924 Expression::Array(a) => Expression::ArrayFunc(
14925 Box::new(crate::expressions::ArrayConstructor {
14926 expressions: a.expressions,
14927 bracket_notation: false,
14928 use_list_keyword: false,
14929 }),
14930 ),
14931 _ => arr2,
14932 };
14933 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14934 pg_arr1, pg_arr2,
14935 ))))
14936 }
14937 DialectType::DuckDB => {
14938 // DuckDB: arr1 && arr2 (native support)
14939 let mut args = f.args;
14940 let arr1 = args.remove(0);
14941 let arr2 = args.remove(0);
14942 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14943 arr1, arr2,
14944 ))))
14945 }
14946 _ => Ok(Expression::Function(Box::new(Function::new(
14947 "LIST_HAS_ANY".to_string(),
14948 f.args,
14949 )))),
14950 }
14951 }
14952 // APPROX_QUANTILE(x, q) -> target-specific
14953 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
14954 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14955 Function::new("APPROX_PERCENTILE".to_string(), f.args),
14956 ))),
14957 DialectType::DuckDB => Ok(Expression::Function(f)),
14958 _ => Ok(Expression::Function(f)),
14959 },
14960 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
14961 "MAKE_DATE" if f.args.len() == 3 => match target {
14962 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14963 Function::new("DATE".to_string(), f.args),
14964 ))),
14965 _ => Ok(Expression::Function(f)),
14966 },
14967 // RANGE(start, end[, step]) -> target-specific
14968 "RANGE"
14969 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
14970 {
14971 let start = f.args[0].clone();
14972 let end = f.args[1].clone();
14973 let step = f.args.get(2).cloned();
14974 match target {
14975 DialectType::Spark | DialectType::Databricks => {
14976 // RANGE(start, end) -> SEQUENCE(start, end-1)
14977 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
14978 // RANGE(start, start) -> ARRAY() (empty)
14979 // RANGE(start, end, 0) -> ARRAY() (empty)
14980 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
14981
14982 // Check for constant args
14983 fn extract_i64(e: &Expression) -> Option<i64> {
14984 match e {
14985 Expression::Literal(Literal::Number(n)) => {
14986 n.parse::<i64>().ok()
14987 }
14988 Expression::Neg(u) => {
14989 if let Expression::Literal(Literal::Number(n)) =
14990 &u.this
14991 {
14992 n.parse::<i64>().ok().map(|v| -v)
14993 } else {
14994 None
14995 }
14996 }
14997 _ => None,
14998 }
14999 }
15000 let start_val = extract_i64(&start);
15001 let end_val = extract_i64(&end);
15002 let step_val = step.as_ref().and_then(|s| extract_i64(s));
15003
15004 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
15005 if step_val == Some(0) {
15006 return Ok(Expression::Function(Box::new(
15007 Function::new("ARRAY".to_string(), vec![]),
15008 )));
15009 }
15010 if let (Some(s), Some(e_val)) = (start_val, end_val) {
15011 if s == e_val {
15012 return Ok(Expression::Function(Box::new(
15013 Function::new("ARRAY".to_string(), vec![]),
15014 )));
15015 }
15016 }
15017
15018 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
15019 // All constants - compute new end = end - step (if step provided) or end - 1
15020 match step_val {
15021 Some(st) if st < 0 => {
15022 // Negative step: SEQUENCE(start, end - step, step)
15023 let new_end = e_val - st; // end - step (= end + |step|)
15024 let mut args =
15025 vec![start, Expression::number(new_end)];
15026 if let Some(s) = step {
15027 args.push(s);
15028 }
15029 Ok(Expression::Function(Box::new(
15030 Function::new("SEQUENCE".to_string(), args),
15031 )))
15032 }
15033 Some(st) => {
15034 let new_end = e_val - st;
15035 let mut args =
15036 vec![start, Expression::number(new_end)];
15037 if let Some(s) = step {
15038 args.push(s);
15039 }
15040 Ok(Expression::Function(Box::new(
15041 Function::new("SEQUENCE".to_string(), args),
15042 )))
15043 }
15044 None => {
15045 // No step: SEQUENCE(start, end - 1)
15046 let new_end = e_val - 1;
15047 Ok(Expression::Function(Box::new(
15048 Function::new(
15049 "SEQUENCE".to_string(),
15050 vec![
15051 start,
15052 Expression::number(new_end),
15053 ],
15054 ),
15055 )))
15056 }
15057 }
15058 } else {
15059 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15060 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15061 end.clone(),
15062 Expression::number(1),
15063 )));
15064 let cond = Expression::Lte(Box::new(BinaryOp::new(
15065 Expression::Paren(Box::new(Paren {
15066 this: end_m1.clone(),
15067 trailing_comments: Vec::new(),
15068 })),
15069 start.clone(),
15070 )));
15071 let empty = Expression::Function(Box::new(
15072 Function::new("ARRAY".to_string(), vec![]),
15073 ));
15074 let mut seq_args = vec![
15075 start,
15076 Expression::Paren(Box::new(Paren {
15077 this: end_m1,
15078 trailing_comments: Vec::new(),
15079 })),
15080 ];
15081 if let Some(s) = step {
15082 seq_args.push(s);
15083 }
15084 let seq = Expression::Function(Box::new(
15085 Function::new("SEQUENCE".to_string(), seq_args),
15086 ));
15087 Ok(Expression::IfFunc(Box::new(
15088 crate::expressions::IfFunc {
15089 condition: cond,
15090 true_value: empty,
15091 false_value: Some(seq),
15092 original_name: None,
15093 },
15094 )))
15095 }
15096 }
15097 DialectType::SQLite => {
15098 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15099 // The subquery wrapping is handled at the Alias level
15100 let mut args = vec![start, end];
15101 if let Some(s) = step {
15102 args.push(s);
15103 }
15104 Ok(Expression::Function(Box::new(Function::new(
15105 "GENERATE_SERIES".to_string(),
15106 args,
15107 ))))
15108 }
15109 _ => Ok(Expression::Function(f)),
15110 }
15111 }
15112 // ARRAY_REVERSE_SORT -> target-specific
15113 // (handled above as well, but also need DuckDB self-normalization)
15114 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15115 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15116 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15117 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15118 ))),
15119 DialectType::Spark | DialectType::Databricks => {
15120 Ok(Expression::Function(Box::new(Function::new(
15121 "MAP_FROM_ARRAYS".to_string(),
15122 f.args,
15123 ))))
15124 }
15125 _ => Ok(Expression::Function(Box::new(Function::new(
15126 "MAP".to_string(),
15127 f.args,
15128 )))),
15129 },
15130 // VARIANCE(x) -> varSamp(x) for ClickHouse
15131 "VARIANCE" if f.args.len() == 1 => match target {
15132 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15133 Function::new("varSamp".to_string(), f.args),
15134 ))),
15135 _ => Ok(Expression::Function(f)),
15136 },
15137 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15138 "STDDEV" if f.args.len() == 1 => match target {
15139 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15140 Function::new("stddevSamp".to_string(), f.args),
15141 ))),
15142 _ => Ok(Expression::Function(f)),
15143 },
15144 // ISINF(x) -> IS_INF(x) for BigQuery
15145 "ISINF" if f.args.len() == 1 => match target {
15146 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15147 Function::new("IS_INF".to_string(), f.args),
15148 ))),
15149 _ => Ok(Expression::Function(f)),
15150 },
15151 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15152 "CONTAINS" if f.args.len() == 2 => match target {
15153 DialectType::Spark
15154 | DialectType::Databricks
15155 | DialectType::Hive => Ok(Expression::Function(Box::new(
15156 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15157 ))),
15158 _ => Ok(Expression::Function(f)),
15159 },
15160 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15161 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15162 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15163 Ok(Expression::Function(Box::new(Function::new(
15164 "CONTAINS".to_string(),
15165 f.args,
15166 ))))
15167 }
15168 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15169 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15170 ))),
15171 _ => Ok(Expression::Function(f)),
15172 },
15173 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15174 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15175 DialectType::Hive
15176 | DialectType::Spark
15177 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15178 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15179 ))),
15180 _ => Ok(Expression::Function(f)),
15181 },
15182 // FROM_UNIXTIME(x) -> target-specific
15183 "FROM_UNIXTIME" if f.args.len() == 1 => {
15184 match target {
15185 DialectType::Hive
15186 | DialectType::Spark
15187 | DialectType::Databricks
15188 | DialectType::Presto
15189 | DialectType::Trino => Ok(Expression::Function(f)),
15190 DialectType::DuckDB => {
15191 // DuckDB: TO_TIMESTAMP(x)
15192 let arg = f.args.into_iter().next().unwrap();
15193 Ok(Expression::Function(Box::new(Function::new(
15194 "TO_TIMESTAMP".to_string(),
15195 vec![arg],
15196 ))))
15197 }
15198 DialectType::PostgreSQL => {
15199 // PG: TO_TIMESTAMP(col)
15200 let arg = f.args.into_iter().next().unwrap();
15201 Ok(Expression::Function(Box::new(Function::new(
15202 "TO_TIMESTAMP".to_string(),
15203 vec![arg],
15204 ))))
15205 }
15206 DialectType::Redshift => {
15207 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15208 let arg = f.args.into_iter().next().unwrap();
15209 let epoch_ts = Expression::Literal(Literal::Timestamp(
15210 "epoch".to_string(),
15211 ));
15212 let interval = Expression::Interval(Box::new(
15213 crate::expressions::Interval {
15214 this: Some(Expression::string("1 SECOND")),
15215 unit: None,
15216 },
15217 ));
15218 let mul =
15219 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15220 let add =
15221 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15222 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15223 this: add,
15224 trailing_comments: Vec::new(),
15225 })))
15226 }
15227 _ => Ok(Expression::Function(f)),
15228 }
15229 }
15230 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15231 "FROM_UNIXTIME"
15232 if f.args.len() == 2
15233 && matches!(
15234 source,
15235 DialectType::Hive
15236 | DialectType::Spark
15237 | DialectType::Databricks
15238 ) =>
15239 {
15240 let mut args = f.args;
15241 let unix_ts = args.remove(0);
15242 let fmt_expr = args.remove(0);
15243 match target {
15244 DialectType::DuckDB => {
15245 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15246 let to_ts = Expression::Function(Box::new(Function::new(
15247 "TO_TIMESTAMP".to_string(),
15248 vec![unix_ts],
15249 )));
15250 if let Expression::Literal(
15251 crate::expressions::Literal::String(s),
15252 ) = &fmt_expr
15253 {
15254 let c_fmt = Self::hive_format_to_c_format(s);
15255 Ok(Expression::Function(Box::new(Function::new(
15256 "STRFTIME".to_string(),
15257 vec![to_ts, Expression::string(&c_fmt)],
15258 ))))
15259 } else {
15260 Ok(Expression::Function(Box::new(Function::new(
15261 "STRFTIME".to_string(),
15262 vec![to_ts, fmt_expr],
15263 ))))
15264 }
15265 }
15266 DialectType::Presto
15267 | DialectType::Trino
15268 | DialectType::Athena => {
15269 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15270 let from_unix =
15271 Expression::Function(Box::new(Function::new(
15272 "FROM_UNIXTIME".to_string(),
15273 vec![unix_ts],
15274 )));
15275 if let Expression::Literal(
15276 crate::expressions::Literal::String(s),
15277 ) = &fmt_expr
15278 {
15279 let p_fmt = Self::hive_format_to_presto_format(s);
15280 Ok(Expression::Function(Box::new(Function::new(
15281 "DATE_FORMAT".to_string(),
15282 vec![from_unix, Expression::string(&p_fmt)],
15283 ))))
15284 } else {
15285 Ok(Expression::Function(Box::new(Function::new(
15286 "DATE_FORMAT".to_string(),
15287 vec![from_unix, fmt_expr],
15288 ))))
15289 }
15290 }
15291 _ => {
15292 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15293 Ok(Expression::Function(Box::new(Function::new(
15294 "FROM_UNIXTIME".to_string(),
15295 vec![unix_ts, fmt_expr],
15296 ))))
15297 }
15298 }
15299 }
15300 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15301 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15302 let unit_str = Self::get_unit_str_static(&f.args[0]);
15303 // Get the raw unit text preserving original case
15304 let raw_unit = match &f.args[0] {
15305 Expression::Identifier(id) => id.name.clone(),
15306 Expression::Literal(crate::expressions::Literal::String(s)) => {
15307 s.clone()
15308 }
15309 Expression::Column(col) => col.name.name.clone(),
15310 _ => unit_str.clone(),
15311 };
15312 match target {
15313 DialectType::TSQL | DialectType::Fabric => {
15314 // Preserve original case of unit for TSQL
15315 let unit_name = match unit_str.as_str() {
15316 "YY" | "YYYY" => "YEAR".to_string(),
15317 "QQ" | "Q" => "QUARTER".to_string(),
15318 "MM" | "M" => "MONTH".to_string(),
15319 "WK" | "WW" => "WEEK".to_string(),
15320 "DD" | "D" | "DY" => "DAY".to_string(),
15321 "HH" => "HOUR".to_string(),
15322 "MI" | "N" => "MINUTE".to_string(),
15323 "SS" | "S" => "SECOND".to_string(),
15324 _ => raw_unit.clone(), // preserve original case
15325 };
15326 let mut args = f.args;
15327 args[0] =
15328 Expression::Identifier(Identifier::new(&unit_name));
15329 Ok(Expression::Function(Box::new(Function::new(
15330 "DATEPART".to_string(),
15331 args,
15332 ))))
15333 }
15334 DialectType::Spark | DialectType::Databricks => {
15335 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15336 // Preserve original case for non-abbreviation units
15337 let unit = match unit_str.as_str() {
15338 "YY" | "YYYY" => "YEAR".to_string(),
15339 "QQ" | "Q" => "QUARTER".to_string(),
15340 "MM" | "M" => "MONTH".to_string(),
15341 "WK" | "WW" => "WEEK".to_string(),
15342 "DD" | "D" | "DY" => "DAY".to_string(),
15343 "HH" => "HOUR".to_string(),
15344 "MI" | "N" => "MINUTE".to_string(),
15345 "SS" | "S" => "SECOND".to_string(),
15346 _ => raw_unit, // preserve original case
15347 };
15348 Ok(Expression::Extract(Box::new(
15349 crate::expressions::ExtractFunc {
15350 this: f.args[1].clone(),
15351 field: crate::expressions::DateTimeField::Custom(
15352 unit,
15353 ),
15354 },
15355 )))
15356 }
15357 _ => Ok(Expression::Function(Box::new(Function::new(
15358 "DATE_PART".to_string(),
15359 f.args,
15360 )))),
15361 }
15362 }
15363 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15364 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15365 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15366 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15367 "DATENAME" if f.args.len() == 2 => {
15368 let unit_str = Self::get_unit_str_static(&f.args[0]);
15369 let date_expr = f.args[1].clone();
15370 match unit_str.as_str() {
15371 "MM" | "M" | "MONTH" => match target {
15372 DialectType::TSQL => {
15373 let cast_date = Expression::Cast(Box::new(
15374 crate::expressions::Cast {
15375 this: date_expr,
15376 to: DataType::Custom {
15377 name: "DATETIME2".to_string(),
15378 },
15379 trailing_comments: Vec::new(),
15380 double_colon_syntax: false,
15381 format: None,
15382 default: None,
15383 },
15384 ));
15385 Ok(Expression::Function(Box::new(Function::new(
15386 "FORMAT".to_string(),
15387 vec![cast_date, Expression::string("MMMM")],
15388 ))))
15389 }
15390 DialectType::Spark | DialectType::Databricks => {
15391 let cast_date = Expression::Cast(Box::new(
15392 crate::expressions::Cast {
15393 this: date_expr,
15394 to: DataType::Timestamp {
15395 timezone: false,
15396 precision: None,
15397 },
15398 trailing_comments: Vec::new(),
15399 double_colon_syntax: false,
15400 format: None,
15401 default: None,
15402 },
15403 ));
15404 Ok(Expression::Function(Box::new(Function::new(
15405 "DATE_FORMAT".to_string(),
15406 vec![cast_date, Expression::string("MMMM")],
15407 ))))
15408 }
15409 _ => Ok(Expression::Function(f)),
15410 },
15411 "DW" | "WEEKDAY" => match target {
15412 DialectType::TSQL => {
15413 let cast_date = Expression::Cast(Box::new(
15414 crate::expressions::Cast {
15415 this: date_expr,
15416 to: DataType::Custom {
15417 name: "DATETIME2".to_string(),
15418 },
15419 trailing_comments: Vec::new(),
15420 double_colon_syntax: false,
15421 format: None,
15422 default: None,
15423 },
15424 ));
15425 Ok(Expression::Function(Box::new(Function::new(
15426 "FORMAT".to_string(),
15427 vec![cast_date, Expression::string("dddd")],
15428 ))))
15429 }
15430 DialectType::Spark | DialectType::Databricks => {
15431 let cast_date = Expression::Cast(Box::new(
15432 crate::expressions::Cast {
15433 this: date_expr,
15434 to: DataType::Timestamp {
15435 timezone: false,
15436 precision: None,
15437 },
15438 trailing_comments: Vec::new(),
15439 double_colon_syntax: false,
15440 format: None,
15441 default: None,
15442 },
15443 ));
15444 Ok(Expression::Function(Box::new(Function::new(
15445 "DATE_FORMAT".to_string(),
15446 vec![cast_date, Expression::string("EEEE")],
15447 ))))
15448 }
15449 _ => Ok(Expression::Function(f)),
15450 },
15451 _ => Ok(Expression::Function(f)),
15452 }
15453 }
15454 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15455 "STRING_AGG" if f.args.len() >= 2 => {
15456 let x = f.args[0].clone();
15457 let sep = f.args[1].clone();
15458 match target {
15459 DialectType::MySQL
15460 | DialectType::SingleStore
15461 | DialectType::Doris
15462 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15463 Box::new(crate::expressions::GroupConcatFunc {
15464 this: x,
15465 separator: Some(sep),
15466 order_by: None,
15467 distinct: false,
15468 filter: None,
15469 }),
15470 )),
15471 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15472 crate::expressions::GroupConcatFunc {
15473 this: x,
15474 separator: Some(sep),
15475 order_by: None,
15476 distinct: false,
15477 filter: None,
15478 },
15479 ))),
15480 DialectType::PostgreSQL | DialectType::Redshift => {
15481 Ok(Expression::StringAgg(Box::new(
15482 crate::expressions::StringAggFunc {
15483 this: x,
15484 separator: Some(sep),
15485 order_by: None,
15486 distinct: false,
15487 filter: None,
15488 limit: None,
15489 },
15490 )))
15491 }
15492 _ => Ok(Expression::Function(f)),
15493 }
15494 }
15495 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15496 "JSON_ARRAYAGG" => match target {
15497 DialectType::PostgreSQL => {
15498 Ok(Expression::Function(Box::new(Function {
15499 name: "JSON_AGG".to_string(),
15500 ..(*f)
15501 })))
15502 }
15503 _ => Ok(Expression::Function(f)),
15504 },
15505 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15506 "SCHEMA_NAME" => match target {
15507 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15508 crate::expressions::CurrentSchema { this: None },
15509 ))),
15510 DialectType::SQLite => Ok(Expression::string("main")),
15511 _ => Ok(Expression::Function(f)),
15512 },
15513 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15514 "TO_TIMESTAMP"
15515 if f.args.len() == 2
15516 && matches!(
15517 source,
15518 DialectType::Spark
15519 | DialectType::Databricks
15520 | DialectType::Hive
15521 )
15522 && matches!(target, DialectType::DuckDB) =>
15523 {
15524 let mut args = f.args;
15525 let val = args.remove(0);
15526 let fmt_expr = args.remove(0);
15527 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15528 // Convert Java/Spark format to C strptime format
15529 fn java_to_c_fmt(fmt: &str) -> String {
15530 let result = fmt
15531 .replace("yyyy", "%Y")
15532 .replace("SSSSSS", "%f")
15533 .replace("EEEE", "%W")
15534 .replace("MM", "%m")
15535 .replace("dd", "%d")
15536 .replace("HH", "%H")
15537 .replace("mm", "%M")
15538 .replace("ss", "%S")
15539 .replace("yy", "%y");
15540 let mut out = String::new();
15541 let chars: Vec<char> = result.chars().collect();
15542 let mut i = 0;
15543 while i < chars.len() {
15544 if chars[i] == '%' && i + 1 < chars.len() {
15545 out.push(chars[i]);
15546 out.push(chars[i + 1]);
15547 i += 2;
15548 } else if chars[i] == 'z' {
15549 out.push_str("%Z");
15550 i += 1;
15551 } else if chars[i] == 'Z' {
15552 out.push_str("%z");
15553 i += 1;
15554 } else {
15555 out.push(chars[i]);
15556 i += 1;
15557 }
15558 }
15559 out
15560 }
15561 let c_fmt = java_to_c_fmt(s);
15562 Ok(Expression::Function(Box::new(Function::new(
15563 "STRPTIME".to_string(),
15564 vec![val, Expression::string(&c_fmt)],
15565 ))))
15566 } else {
15567 Ok(Expression::Function(Box::new(Function::new(
15568 "STRPTIME".to_string(),
15569 vec![val, fmt_expr],
15570 ))))
15571 }
15572 }
15573 // TO_DATE(x) 1-arg from Doris: date conversion
15574 "TO_DATE"
15575 if f.args.len() == 1
15576 && matches!(
15577 source,
15578 DialectType::Doris | DialectType::StarRocks
15579 ) =>
15580 {
15581 let arg = f.args.into_iter().next().unwrap();
15582 match target {
15583 DialectType::Oracle
15584 | DialectType::DuckDB
15585 | DialectType::TSQL => {
15586 // CAST(x AS DATE)
15587 Ok(Expression::Cast(Box::new(Cast {
15588 this: arg,
15589 to: DataType::Date,
15590 double_colon_syntax: false,
15591 trailing_comments: vec![],
15592 format: None,
15593 default: None,
15594 })))
15595 }
15596 DialectType::MySQL | DialectType::SingleStore => {
15597 // DATE(x)
15598 Ok(Expression::Function(Box::new(Function::new(
15599 "DATE".to_string(),
15600 vec![arg],
15601 ))))
15602 }
15603 _ => {
15604 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15605 Ok(Expression::Function(Box::new(Function::new(
15606 "TO_DATE".to_string(),
15607 vec![arg],
15608 ))))
15609 }
15610 }
15611 }
15612 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15613 "TO_DATE"
15614 if f.args.len() == 1
15615 && matches!(
15616 source,
15617 DialectType::Spark
15618 | DialectType::Databricks
15619 | DialectType::Hive
15620 ) =>
15621 {
15622 let arg = f.args.into_iter().next().unwrap();
15623 match target {
15624 DialectType::DuckDB => {
15625 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15626 Ok(Expression::TryCast(Box::new(Cast {
15627 this: arg,
15628 to: DataType::Date,
15629 double_colon_syntax: false,
15630 trailing_comments: vec![],
15631 format: None,
15632 default: None,
15633 })))
15634 }
15635 DialectType::Presto
15636 | DialectType::Trino
15637 | DialectType::Athena => {
15638 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15639 Ok(Self::double_cast_timestamp_date(arg))
15640 }
15641 DialectType::Snowflake => {
15642 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15643 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15644 Ok(Expression::Function(Box::new(Function::new(
15645 "TRY_TO_DATE".to_string(),
15646 vec![arg, Expression::string("yyyy-mm-DD")],
15647 ))))
15648 }
15649 _ => {
15650 // Default: keep as TO_DATE(x)
15651 Ok(Expression::Function(Box::new(Function::new(
15652 "TO_DATE".to_string(),
15653 vec![arg],
15654 ))))
15655 }
15656 }
15657 }
15658 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15659 "TO_DATE"
15660 if f.args.len() == 2
15661 && matches!(
15662 source,
15663 DialectType::Spark
15664 | DialectType::Databricks
15665 | DialectType::Hive
15666 ) =>
15667 {
15668 let mut args = f.args;
15669 let val = args.remove(0);
15670 let fmt_expr = args.remove(0);
15671 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15672
15673 if is_default_format {
15674 // Default format: same as 1-arg form
15675 match target {
15676 DialectType::DuckDB => {
15677 Ok(Expression::TryCast(Box::new(Cast {
15678 this: val,
15679 to: DataType::Date,
15680 double_colon_syntax: false,
15681 trailing_comments: vec![],
15682 format: None,
15683 default: None,
15684 })))
15685 }
15686 DialectType::Presto
15687 | DialectType::Trino
15688 | DialectType::Athena => {
15689 Ok(Self::double_cast_timestamp_date(val))
15690 }
15691 DialectType::Snowflake => {
15692 // TRY_TO_DATE(x, format) with Snowflake format mapping
15693 let sf_fmt = "yyyy-MM-dd"
15694 .replace("yyyy", "yyyy")
15695 .replace("MM", "mm")
15696 .replace("dd", "DD");
15697 Ok(Expression::Function(Box::new(Function::new(
15698 "TRY_TO_DATE".to_string(),
15699 vec![val, Expression::string(&sf_fmt)],
15700 ))))
15701 }
15702 _ => Ok(Expression::Function(Box::new(Function::new(
15703 "TO_DATE".to_string(),
15704 vec![val],
15705 )))),
15706 }
15707 } else {
15708 // Non-default format: use format-based parsing
15709 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15710 match target {
15711 DialectType::DuckDB => {
15712 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15713 fn java_to_c_fmt_todate(fmt: &str) -> String {
15714 let result = fmt
15715 .replace("yyyy", "%Y")
15716 .replace("SSSSSS", "%f")
15717 .replace("EEEE", "%W")
15718 .replace("MM", "%m")
15719 .replace("dd", "%d")
15720 .replace("HH", "%H")
15721 .replace("mm", "%M")
15722 .replace("ss", "%S")
15723 .replace("yy", "%y");
15724 let mut out = String::new();
15725 let chars: Vec<char> = result.chars().collect();
15726 let mut i = 0;
15727 while i < chars.len() {
15728 if chars[i] == '%' && i + 1 < chars.len() {
15729 out.push(chars[i]);
15730 out.push(chars[i + 1]);
15731 i += 2;
15732 } else if chars[i] == 'z' {
15733 out.push_str("%Z");
15734 i += 1;
15735 } else if chars[i] == 'Z' {
15736 out.push_str("%z");
15737 i += 1;
15738 } else {
15739 out.push(chars[i]);
15740 i += 1;
15741 }
15742 }
15743 out
15744 }
15745 let c_fmt = java_to_c_fmt_todate(s);
15746 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15747 let try_strptime =
15748 Expression::Function(Box::new(Function::new(
15749 "TRY_STRPTIME".to_string(),
15750 vec![val, Expression::string(&c_fmt)],
15751 )));
15752 let cast_ts = Expression::Cast(Box::new(Cast {
15753 this: try_strptime,
15754 to: DataType::Timestamp {
15755 precision: None,
15756 timezone: false,
15757 },
15758 double_colon_syntax: false,
15759 trailing_comments: vec![],
15760 format: None,
15761 default: None,
15762 }));
15763 Ok(Expression::Cast(Box::new(Cast {
15764 this: cast_ts,
15765 to: DataType::Date,
15766 double_colon_syntax: false,
15767 trailing_comments: vec![],
15768 format: None,
15769 default: None,
15770 })))
15771 }
15772 DialectType::Presto
15773 | DialectType::Trino
15774 | DialectType::Athena => {
15775 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
15776 let p_fmt = s
15777 .replace("yyyy", "%Y")
15778 .replace("SSSSSS", "%f")
15779 .replace("MM", "%m")
15780 .replace("dd", "%d")
15781 .replace("HH", "%H")
15782 .replace("mm", "%M")
15783 .replace("ss", "%S")
15784 .replace("yy", "%y");
15785 let date_parse =
15786 Expression::Function(Box::new(Function::new(
15787 "DATE_PARSE".to_string(),
15788 vec![val, Expression::string(&p_fmt)],
15789 )));
15790 Ok(Expression::Cast(Box::new(Cast {
15791 this: date_parse,
15792 to: DataType::Date,
15793 double_colon_syntax: false,
15794 trailing_comments: vec![],
15795 format: None,
15796 default: None,
15797 })))
15798 }
15799 DialectType::Snowflake => {
15800 // TRY_TO_DATE(x, snowflake_fmt)
15801 Ok(Expression::Function(Box::new(Function::new(
15802 "TRY_TO_DATE".to_string(),
15803 vec![val, Expression::string(s)],
15804 ))))
15805 }
15806 _ => Ok(Expression::Function(Box::new(Function::new(
15807 "TO_DATE".to_string(),
15808 vec![val, fmt_expr],
15809 )))),
15810 }
15811 } else {
15812 Ok(Expression::Function(Box::new(Function::new(
15813 "TO_DATE".to_string(),
15814 vec![val, fmt_expr],
15815 ))))
15816 }
15817 }
15818 }
15819 // TO_TIMESTAMP(x) 1-arg: epoch conversion
15820 "TO_TIMESTAMP"
15821 if f.args.len() == 1
15822 && matches!(source, DialectType::DuckDB)
15823 && matches!(
15824 target,
15825 DialectType::BigQuery
15826 | DialectType::Presto
15827 | DialectType::Trino
15828 | DialectType::Hive
15829 | DialectType::Spark
15830 | DialectType::Databricks
15831 | DialectType::Athena
15832 ) =>
15833 {
15834 let arg = f.args.into_iter().next().unwrap();
15835 let func_name = match target {
15836 DialectType::BigQuery => "TIMESTAMP_SECONDS",
15837 DialectType::Presto
15838 | DialectType::Trino
15839 | DialectType::Athena
15840 | DialectType::Hive
15841 | DialectType::Spark
15842 | DialectType::Databricks => "FROM_UNIXTIME",
15843 _ => "TO_TIMESTAMP",
15844 };
15845 Ok(Expression::Function(Box::new(Function::new(
15846 func_name.to_string(),
15847 vec![arg],
15848 ))))
15849 }
15850 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
15851 "CONCAT" if f.args.len() == 1 => {
15852 let arg = f.args.into_iter().next().unwrap();
15853 match target {
15854 DialectType::Presto
15855 | DialectType::Trino
15856 | DialectType::Athena => {
15857 // CONCAT(a) -> CAST(a AS VARCHAR)
15858 Ok(Expression::Cast(Box::new(Cast {
15859 this: arg,
15860 to: DataType::VarChar {
15861 length: None,
15862 parenthesized_length: false,
15863 },
15864 trailing_comments: vec![],
15865 double_colon_syntax: false,
15866 format: None,
15867 default: None,
15868 })))
15869 }
15870 DialectType::TSQL => {
15871 // CONCAT(a) -> a
15872 Ok(arg)
15873 }
15874 DialectType::DuckDB => {
15875 // Keep CONCAT(a) for DuckDB (native support)
15876 Ok(Expression::Function(Box::new(Function::new(
15877 "CONCAT".to_string(),
15878 vec![arg],
15879 ))))
15880 }
15881 DialectType::Spark | DialectType::Databricks => {
15882 let coalesced = Expression::Coalesce(Box::new(
15883 crate::expressions::VarArgFunc {
15884 expressions: vec![arg, Expression::string("")],
15885 original_name: None,
15886 },
15887 ));
15888 Ok(Expression::Function(Box::new(Function::new(
15889 "CONCAT".to_string(),
15890 vec![coalesced],
15891 ))))
15892 }
15893 _ => Ok(Expression::Function(Box::new(Function::new(
15894 "CONCAT".to_string(),
15895 vec![arg],
15896 )))),
15897 }
15898 }
15899 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
15900 "REGEXP_EXTRACT"
15901 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
15902 {
15903 // If group_index is 0, drop it
15904 let drop_group = match &f.args[2] {
15905 Expression::Literal(Literal::Number(n)) => n == "0",
15906 _ => false,
15907 };
15908 if drop_group {
15909 let mut args = f.args;
15910 args.truncate(2);
15911 Ok(Expression::Function(Box::new(Function::new(
15912 "REGEXP_EXTRACT".to_string(),
15913 args,
15914 ))))
15915 } else {
15916 Ok(Expression::Function(f))
15917 }
15918 }
15919 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
15920 "REGEXP_EXTRACT"
15921 if f.args.len() == 4
15922 && matches!(target, DialectType::Snowflake) =>
15923 {
15924 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
15925 let mut args = f.args;
15926 let this = args.remove(0);
15927 let pattern = args.remove(0);
15928 let group = args.remove(0);
15929 let flags = args.remove(0);
15930 Ok(Expression::Function(Box::new(Function::new(
15931 "REGEXP_SUBSTR".to_string(),
15932 vec![
15933 this,
15934 pattern,
15935 Expression::number(1),
15936 Expression::number(1),
15937 flags,
15938 group,
15939 ],
15940 ))))
15941 }
15942 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
15943 "REGEXP_SUBSTR"
15944 if f.args.len() == 3
15945 && matches!(
15946 target,
15947 DialectType::DuckDB
15948 | DialectType::Presto
15949 | DialectType::Trino
15950 | DialectType::Spark
15951 | DialectType::Databricks
15952 ) =>
15953 {
15954 let mut args = f.args;
15955 let this = args.remove(0);
15956 let pattern = args.remove(0);
15957 let position = args.remove(0);
15958 // Wrap subject in SUBSTRING(this, position) to apply the offset
15959 let substring_expr = Expression::Function(Box::new(Function::new(
15960 "SUBSTRING".to_string(),
15961 vec![this, position],
15962 )));
15963 let target_name = match target {
15964 DialectType::DuckDB => "REGEXP_EXTRACT",
15965 _ => "REGEXP_EXTRACT",
15966 };
15967 Ok(Expression::Function(Box::new(Function::new(
15968 target_name.to_string(),
15969 vec![substring_expr, pattern],
15970 ))))
15971 }
15972 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
15973 "TO_DAYS" if f.args.len() == 1 => {
15974 let x = f.args.into_iter().next().unwrap();
15975 let epoch = Expression::string("0000-01-01");
15976 // Build the final target-specific expression directly
15977 let datediff_expr = match target {
15978 DialectType::MySQL | DialectType::SingleStore => {
15979 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
15980 Expression::Function(Box::new(Function::new(
15981 "DATEDIFF".to_string(),
15982 vec![x, epoch],
15983 )))
15984 }
15985 DialectType::DuckDB => {
15986 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
15987 let cast_epoch = Expression::Cast(Box::new(Cast {
15988 this: epoch,
15989 to: DataType::Date,
15990 trailing_comments: Vec::new(),
15991 double_colon_syntax: false,
15992 format: None,
15993 default: None,
15994 }));
15995 let cast_x = Expression::Cast(Box::new(Cast {
15996 this: x,
15997 to: DataType::Date,
15998 trailing_comments: Vec::new(),
15999 double_colon_syntax: false,
16000 format: None,
16001 default: None,
16002 }));
16003 Expression::Function(Box::new(Function::new(
16004 "DATE_DIFF".to_string(),
16005 vec![Expression::string("DAY"), cast_epoch, cast_x],
16006 )))
16007 }
16008 DialectType::Presto
16009 | DialectType::Trino
16010 | DialectType::Athena => {
16011 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
16012 let cast_epoch = Self::double_cast_timestamp_date(epoch);
16013 let cast_x = Self::double_cast_timestamp_date(x);
16014 Expression::Function(Box::new(Function::new(
16015 "DATE_DIFF".to_string(),
16016 vec![Expression::string("DAY"), cast_epoch, cast_x],
16017 )))
16018 }
16019 _ => {
16020 // Default: (DATEDIFF(x, '0000-01-01') + 1)
16021 Expression::Function(Box::new(Function::new(
16022 "DATEDIFF".to_string(),
16023 vec![x, epoch],
16024 )))
16025 }
16026 };
16027 let add_one = Expression::Add(Box::new(BinaryOp::new(
16028 datediff_expr,
16029 Expression::number(1),
16030 )));
16031 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16032 this: add_one,
16033 trailing_comments: Vec::new(),
16034 })))
16035 }
16036 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16037 "STR_TO_DATE"
16038 if f.args.len() == 2
16039 && matches!(
16040 target,
16041 DialectType::Presto | DialectType::Trino
16042 ) =>
16043 {
16044 let mut args = f.args;
16045 let x = args.remove(0);
16046 let format_expr = args.remove(0);
16047 // Check if the format contains time components
16048 let has_time =
16049 if let Expression::Literal(Literal::String(ref fmt)) =
16050 format_expr
16051 {
16052 fmt.contains("%H")
16053 || fmt.contains("%T")
16054 || fmt.contains("%M")
16055 || fmt.contains("%S")
16056 || fmt.contains("%I")
16057 || fmt.contains("%p")
16058 } else {
16059 false
16060 };
16061 let date_parse = Expression::Function(Box::new(Function::new(
16062 "DATE_PARSE".to_string(),
16063 vec![x, format_expr],
16064 )));
16065 if has_time {
16066 // Has time components: just DATE_PARSE
16067 Ok(date_parse)
16068 } else {
16069 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16070 Ok(Expression::Cast(Box::new(Cast {
16071 this: date_parse,
16072 to: DataType::Date,
16073 trailing_comments: Vec::new(),
16074 double_colon_syntax: false,
16075 format: None,
16076 default: None,
16077 })))
16078 }
16079 }
16080 "STR_TO_DATE"
16081 if f.args.len() == 2
16082 && matches!(
16083 target,
16084 DialectType::PostgreSQL | DialectType::Redshift
16085 ) =>
16086 {
16087 let mut args = f.args;
16088 let x = args.remove(0);
16089 let fmt = args.remove(0);
16090 let pg_fmt = match fmt {
16091 Expression::Literal(Literal::String(s)) => Expression::string(
16092 &s.replace("%Y", "YYYY")
16093 .replace("%m", "MM")
16094 .replace("%d", "DD")
16095 .replace("%H", "HH24")
16096 .replace("%M", "MI")
16097 .replace("%S", "SS"),
16098 ),
16099 other => other,
16100 };
16101 let to_date = Expression::Function(Box::new(Function::new(
16102 "TO_DATE".to_string(),
16103 vec![x, pg_fmt],
16104 )));
16105 Ok(Expression::Cast(Box::new(Cast {
16106 this: to_date,
16107 to: DataType::Timestamp {
16108 timezone: false,
16109 precision: None,
16110 },
16111 trailing_comments: Vec::new(),
16112 double_colon_syntax: false,
16113 format: None,
16114 default: None,
16115 })))
16116 }
16117 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16118 "RANGE"
16119 if (f.args.len() == 1 || f.args.len() == 2)
16120 && matches!(target, DialectType::SQLite) =>
16121 {
16122 if f.args.len() == 2 {
16123 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16124 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16125 let mut args = f.args;
16126 let start = args.remove(0);
16127 let end = args.remove(0);
16128 Ok(Expression::Function(Box::new(Function::new(
16129 "GENERATE_SERIES".to_string(),
16130 vec![start, end],
16131 ))))
16132 } else {
16133 Ok(Expression::Function(f))
16134 }
16135 }
16136 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16137 // When source is Snowflake, keep as-is (args already in correct form)
16138 "UNIFORM"
16139 if matches!(target, DialectType::Snowflake)
16140 && (f.args.len() == 2 || f.args.len() == 3) =>
16141 {
16142 if matches!(source, DialectType::Snowflake) {
16143 // Snowflake -> Snowflake: keep as-is
16144 Ok(Expression::Function(f))
16145 } else {
16146 let mut args = f.args;
16147 let low = args.remove(0);
16148 let high = args.remove(0);
16149 let random = if !args.is_empty() {
16150 let seed = args.remove(0);
16151 Expression::Function(Box::new(Function::new(
16152 "RANDOM".to_string(),
16153 vec![seed],
16154 )))
16155 } else {
16156 Expression::Function(Box::new(Function::new(
16157 "RANDOM".to_string(),
16158 vec![],
16159 )))
16160 };
16161 Ok(Expression::Function(Box::new(Function::new(
16162 "UNIFORM".to_string(),
16163 vec![low, high, random],
16164 ))))
16165 }
16166 }
16167 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16168 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16169 let mut args = f.args;
16170 let ts_arg = args.remove(0);
16171 let tz_arg = args.remove(0);
16172 // Cast string literal to TIMESTAMP for all targets
16173 let ts_cast =
16174 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16175 Expression::Cast(Box::new(Cast {
16176 this: ts_arg,
16177 to: DataType::Timestamp {
16178 timezone: false,
16179 precision: None,
16180 },
16181 trailing_comments: vec![],
16182 double_colon_syntax: false,
16183 format: None,
16184 default: None,
16185 }))
16186 } else {
16187 ts_arg
16188 };
16189 match target {
16190 DialectType::Spark | DialectType::Databricks => {
16191 Ok(Expression::Function(Box::new(Function::new(
16192 "TO_UTC_TIMESTAMP".to_string(),
16193 vec![ts_cast, tz_arg],
16194 ))))
16195 }
16196 DialectType::Snowflake => {
16197 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16198 Ok(Expression::Function(Box::new(Function::new(
16199 "CONVERT_TIMEZONE".to_string(),
16200 vec![tz_arg, Expression::string("UTC"), ts_cast],
16201 ))))
16202 }
16203 DialectType::Presto
16204 | DialectType::Trino
16205 | DialectType::Athena => {
16206 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16207 let wtz = Expression::Function(Box::new(Function::new(
16208 "WITH_TIMEZONE".to_string(),
16209 vec![ts_cast, tz_arg],
16210 )));
16211 Ok(Expression::AtTimeZone(Box::new(
16212 crate::expressions::AtTimeZone {
16213 this: wtz,
16214 zone: Expression::string("UTC"),
16215 },
16216 )))
16217 }
16218 DialectType::BigQuery => {
16219 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16220 let cast_dt = Expression::Cast(Box::new(Cast {
16221 this: if let Expression::Cast(c) = ts_cast {
16222 c.this
16223 } else {
16224 ts_cast.clone()
16225 },
16226 to: DataType::Custom {
16227 name: "DATETIME".to_string(),
16228 },
16229 trailing_comments: vec![],
16230 double_colon_syntax: false,
16231 format: None,
16232 default: None,
16233 }));
16234 let ts_func =
16235 Expression::Function(Box::new(Function::new(
16236 "TIMESTAMP".to_string(),
16237 vec![cast_dt, tz_arg],
16238 )));
16239 Ok(Expression::Function(Box::new(Function::new(
16240 "DATETIME".to_string(),
16241 vec![ts_func, Expression::string("UTC")],
16242 ))))
16243 }
16244 _ => {
16245 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16246 let atz1 = Expression::AtTimeZone(Box::new(
16247 crate::expressions::AtTimeZone {
16248 this: ts_cast,
16249 zone: tz_arg,
16250 },
16251 ));
16252 Ok(Expression::AtTimeZone(Box::new(
16253 crate::expressions::AtTimeZone {
16254 this: atz1,
16255 zone: Expression::string("UTC"),
16256 },
16257 )))
16258 }
16259 }
16260 }
16261 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16262 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16263 let mut args = f.args;
16264 let ts_arg = args.remove(0);
16265 let tz_arg = args.remove(0);
16266 // Cast string literal to TIMESTAMP
16267 let ts_cast =
16268 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16269 Expression::Cast(Box::new(Cast {
16270 this: ts_arg,
16271 to: DataType::Timestamp {
16272 timezone: false,
16273 precision: None,
16274 },
16275 trailing_comments: vec![],
16276 double_colon_syntax: false,
16277 format: None,
16278 default: None,
16279 }))
16280 } else {
16281 ts_arg
16282 };
16283 match target {
16284 DialectType::Spark | DialectType::Databricks => {
16285 Ok(Expression::Function(Box::new(Function::new(
16286 "FROM_UTC_TIMESTAMP".to_string(),
16287 vec![ts_cast, tz_arg],
16288 ))))
16289 }
16290 DialectType::Presto
16291 | DialectType::Trino
16292 | DialectType::Athena => {
16293 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16294 Ok(Expression::Function(Box::new(Function::new(
16295 "AT_TIMEZONE".to_string(),
16296 vec![ts_cast, tz_arg],
16297 ))))
16298 }
16299 DialectType::Snowflake => {
16300 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16301 Ok(Expression::Function(Box::new(Function::new(
16302 "CONVERT_TIMEZONE".to_string(),
16303 vec![Expression::string("UTC"), tz_arg, ts_cast],
16304 ))))
16305 }
16306 _ => {
16307 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16308 Ok(Expression::AtTimeZone(Box::new(
16309 crate::expressions::AtTimeZone {
16310 this: ts_cast,
16311 zone: tz_arg,
16312 },
16313 )))
16314 }
16315 }
16316 }
16317 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16318 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16319 let name = match target {
16320 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16321 _ => "MAP",
16322 };
16323 Ok(Expression::Function(Box::new(Function::new(
16324 name.to_string(),
16325 f.args,
16326 ))))
16327 }
16328 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16329 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16330 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16331 Ok(Expression::Function(Box::new(Function::new(
16332 "SPLIT_TO_MAP".to_string(),
16333 f.args,
16334 ))))
16335 }
16336 _ => Ok(Expression::Function(f)),
16337 },
16338 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16339 "TIME_TO_STR" if f.args.len() == 2 => {
16340 let mut args = f.args;
16341 let this = args.remove(0);
16342 let fmt_expr = args.remove(0);
16343 let format =
16344 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16345 s
16346 } else {
16347 "%Y-%m-%d %H:%M:%S".to_string()
16348 };
16349 Ok(Expression::TimeToStr(Box::new(
16350 crate::expressions::TimeToStr {
16351 this: Box::new(this),
16352 format,
16353 culture: None,
16354 zone: None,
16355 },
16356 )))
16357 }
16358 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16359 "STR_TO_TIME" if f.args.len() == 2 => {
16360 let mut args = f.args;
16361 let this = args.remove(0);
16362 let fmt_expr = args.remove(0);
16363 let format =
16364 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16365 s
16366 } else {
16367 "%Y-%m-%d %H:%M:%S".to_string()
16368 };
16369 Ok(Expression::StrToTime(Box::new(
16370 crate::expressions::StrToTime {
16371 this: Box::new(this),
16372 format,
16373 zone: None,
16374 safe: None,
16375 target_type: None,
16376 },
16377 )))
16378 }
16379 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16380 "STR_TO_UNIX" if f.args.len() >= 1 => {
16381 let mut args = f.args;
16382 let this = args.remove(0);
16383 let format = if !args.is_empty() {
16384 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16385 {
16386 Some(s)
16387 } else {
16388 None
16389 }
16390 } else {
16391 None
16392 };
16393 Ok(Expression::StrToUnix(Box::new(
16394 crate::expressions::StrToUnix {
16395 this: Some(Box::new(this)),
16396 format,
16397 },
16398 )))
16399 }
16400 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16401 "TIME_TO_UNIX" if f.args.len() == 1 => {
16402 let mut args = f.args;
16403 let this = args.remove(0);
16404 Ok(Expression::TimeToUnix(Box::new(
16405 crate::expressions::UnaryFunc {
16406 this,
16407 original_name: None,
16408 },
16409 )))
16410 }
16411 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16412 "UNIX_TO_STR" if f.args.len() >= 1 => {
16413 let mut args = f.args;
16414 let this = args.remove(0);
16415 let format = if !args.is_empty() {
16416 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16417 {
16418 Some(s)
16419 } else {
16420 None
16421 }
16422 } else {
16423 None
16424 };
16425 Ok(Expression::UnixToStr(Box::new(
16426 crate::expressions::UnixToStr {
16427 this: Box::new(this),
16428 format,
16429 },
16430 )))
16431 }
16432 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16433 "UNIX_TO_TIME" if f.args.len() == 1 => {
16434 let mut args = f.args;
16435 let this = args.remove(0);
16436 Ok(Expression::UnixToTime(Box::new(
16437 crate::expressions::UnixToTime {
16438 this: Box::new(this),
16439 scale: None,
16440 zone: None,
16441 hours: None,
16442 minutes: None,
16443 format: None,
16444 target_type: None,
16445 },
16446 )))
16447 }
16448 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16449 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16450 let mut args = f.args;
16451 let this = args.remove(0);
16452 Ok(Expression::TimeStrToDate(Box::new(
16453 crate::expressions::UnaryFunc {
16454 this,
16455 original_name: None,
16456 },
16457 )))
16458 }
16459 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16460 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16461 let mut args = f.args;
16462 let this = args.remove(0);
16463 Ok(Expression::TimeStrToTime(Box::new(
16464 crate::expressions::TimeStrToTime {
16465 this: Box::new(this),
16466 zone: None,
16467 },
16468 )))
16469 }
16470 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16471 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16472 match target {
16473 DialectType::DuckDB => {
16474 let mut args = f.args;
16475 let end_date = args.remove(0);
16476 let start_date = args.remove(0);
16477 let cast_end = Self::ensure_cast_date(end_date);
16478 let cast_start = Self::ensure_cast_date(start_date);
16479 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16480 let dd = Expression::Function(Box::new(Function::new(
16481 "DATE_DIFF".to_string(),
16482 vec![
16483 Expression::string("MONTH"),
16484 cast_start.clone(),
16485 cast_end.clone(),
16486 ],
16487 )));
16488 let day_end =
16489 Expression::Function(Box::new(Function::new(
16490 "DAY".to_string(),
16491 vec![cast_end.clone()],
16492 )));
16493 let day_start =
16494 Expression::Function(Box::new(Function::new(
16495 "DAY".to_string(),
16496 vec![cast_start.clone()],
16497 )));
16498 let last_day_end =
16499 Expression::Function(Box::new(Function::new(
16500 "LAST_DAY".to_string(),
16501 vec![cast_end.clone()],
16502 )));
16503 let last_day_start =
16504 Expression::Function(Box::new(Function::new(
16505 "LAST_DAY".to_string(),
16506 vec![cast_start.clone()],
16507 )));
16508 let day_last_end = Expression::Function(Box::new(
16509 Function::new("DAY".to_string(), vec![last_day_end]),
16510 ));
16511 let day_last_start = Expression::Function(Box::new(
16512 Function::new("DAY".to_string(), vec![last_day_start]),
16513 ));
16514 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16515 day_end.clone(),
16516 day_last_end,
16517 )));
16518 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16519 day_start.clone(),
16520 day_last_start,
16521 )));
16522 let both_cond =
16523 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16524 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16525 day_end, day_start,
16526 )));
16527 let day_diff_paren = Expression::Paren(Box::new(
16528 crate::expressions::Paren {
16529 this: day_diff,
16530 trailing_comments: Vec::new(),
16531 },
16532 ));
16533 let frac = Expression::Div(Box::new(BinaryOp::new(
16534 day_diff_paren,
16535 Expression::Literal(Literal::Number(
16536 "31.0".to_string(),
16537 )),
16538 )));
16539 let case_expr = Expression::Case(Box::new(Case {
16540 operand: None,
16541 whens: vec![(both_cond, Expression::number(0))],
16542 else_: Some(frac),
16543 comments: Vec::new(),
16544 }));
16545 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16546 }
16547 DialectType::Snowflake | DialectType::Redshift => {
16548 let mut args = f.args;
16549 let end_date = args.remove(0);
16550 let start_date = args.remove(0);
16551 let unit = Expression::Identifier(Identifier::new("MONTH"));
16552 Ok(Expression::Function(Box::new(Function::new(
16553 "DATEDIFF".to_string(),
16554 vec![unit, start_date, end_date],
16555 ))))
16556 }
16557 DialectType::Presto
16558 | DialectType::Trino
16559 | DialectType::Athena => {
16560 let mut args = f.args;
16561 let end_date = args.remove(0);
16562 let start_date = args.remove(0);
16563 Ok(Expression::Function(Box::new(Function::new(
16564 "DATE_DIFF".to_string(),
16565 vec![Expression::string("MONTH"), start_date, end_date],
16566 ))))
16567 }
16568 _ => Ok(Expression::Function(f)),
16569 }
16570 }
16571 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16572 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16573 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16574 match target {
16575 DialectType::Spark | DialectType::Databricks => {
16576 Ok(Expression::Function(f))
16577 }
16578 _ => {
16579 // Drop the 3rd arg and delegate to the 2-arg logic
16580 let mut args = f.args;
16581 let end_date = args.remove(0);
16582 let start_date = args.remove(0);
16583 // Re-create as 2-arg and process
16584 let f2 = Function::new(
16585 "MONTHS_BETWEEN".to_string(),
16586 vec![end_date, start_date],
16587 );
16588 let e2 = Expression::Function(Box::new(f2));
16589 Self::cross_dialect_normalize(e2, source, target)
16590 }
16591 }
16592 }
16593 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16594 "TO_TIMESTAMP"
16595 if f.args.len() == 1
16596 && matches!(
16597 source,
16598 DialectType::Spark
16599 | DialectType::Databricks
16600 | DialectType::Hive
16601 ) =>
16602 {
16603 let arg = f.args.into_iter().next().unwrap();
16604 Ok(Expression::Cast(Box::new(Cast {
16605 this: arg,
16606 to: DataType::Timestamp {
16607 timezone: false,
16608 precision: None,
16609 },
16610 trailing_comments: vec![],
16611 double_colon_syntax: false,
16612 format: None,
16613 default: None,
16614 })))
16615 }
16616 // STRING(x) -> CAST(x AS STRING) for Spark target
16617 "STRING"
16618 if f.args.len() == 1
16619 && matches!(
16620 source,
16621 DialectType::Spark | DialectType::Databricks
16622 ) =>
16623 {
16624 let arg = f.args.into_iter().next().unwrap();
16625 let dt = match target {
16626 DialectType::Spark
16627 | DialectType::Databricks
16628 | DialectType::Hive => DataType::Custom {
16629 name: "STRING".to_string(),
16630 },
16631 _ => DataType::Text,
16632 };
16633 Ok(Expression::Cast(Box::new(Cast {
16634 this: arg,
16635 to: dt,
16636 trailing_comments: vec![],
16637 double_colon_syntax: false,
16638 format: None,
16639 default: None,
16640 })))
16641 }
16642 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16643 "LOGICAL_OR" if f.args.len() == 1 => {
16644 let name = match target {
16645 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16646 _ => "LOGICAL_OR",
16647 };
16648 Ok(Expression::Function(Box::new(Function::new(
16649 name.to_string(),
16650 f.args,
16651 ))))
16652 }
16653 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16654 "SPLIT"
16655 if f.args.len() == 2
16656 && matches!(
16657 source,
16658 DialectType::Spark
16659 | DialectType::Databricks
16660 | DialectType::Hive
16661 ) =>
16662 {
16663 let name = match target {
16664 DialectType::DuckDB => "STR_SPLIT_REGEX",
16665 DialectType::Presto
16666 | DialectType::Trino
16667 | DialectType::Athena => "REGEXP_SPLIT",
16668 DialectType::Spark
16669 | DialectType::Databricks
16670 | DialectType::Hive => "SPLIT",
16671 _ => "SPLIT",
16672 };
16673 Ok(Expression::Function(Box::new(Function::new(
16674 name.to_string(),
16675 f.args,
16676 ))))
16677 }
16678 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16679 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16680 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16681 Ok(Expression::Function(Box::new(Function::new(
16682 "ELEMENT_AT".to_string(),
16683 f.args,
16684 ))))
16685 }
16686 DialectType::DuckDB => {
16687 let mut args = f.args;
16688 let arr = args.remove(0);
16689 let idx = args.remove(0);
16690 Ok(Expression::Subscript(Box::new(
16691 crate::expressions::Subscript {
16692 this: arr,
16693 index: idx,
16694 },
16695 )))
16696 }
16697 _ => Ok(Expression::Function(f)),
16698 },
16699 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16700 "ARRAY_FILTER" if f.args.len() == 2 => {
16701 let name = match target {
16702 DialectType::DuckDB => "LIST_FILTER",
16703 DialectType::StarRocks => "ARRAY_FILTER",
16704 _ => "FILTER",
16705 };
16706 Ok(Expression::Function(Box::new(Function::new(
16707 name.to_string(),
16708 f.args,
16709 ))))
16710 }
16711 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16712 "FILTER" if f.args.len() == 2 => {
16713 let name = match target {
16714 DialectType::DuckDB => "LIST_FILTER",
16715 DialectType::StarRocks => "ARRAY_FILTER",
16716 _ => "FILTER",
16717 };
16718 Ok(Expression::Function(Box::new(Function::new(
16719 name.to_string(),
16720 f.args,
16721 ))))
16722 }
16723 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16724 "REDUCE" if f.args.len() >= 3 => {
16725 let name = match target {
16726 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16727 _ => "REDUCE",
16728 };
16729 Ok(Expression::Function(Box::new(Function::new(
16730 name.to_string(),
16731 f.args,
16732 ))))
16733 }
16734 // CURRENT_SCHEMA() -> dialect-specific
16735 "CURRENT_SCHEMA" => {
16736 match target {
16737 DialectType::PostgreSQL => {
16738 // PostgreSQL: CURRENT_SCHEMA (no parens)
16739 Ok(Expression::Function(Box::new(Function {
16740 name: "CURRENT_SCHEMA".to_string(),
16741 args: vec![],
16742 distinct: false,
16743 trailing_comments: vec![],
16744 use_bracket_syntax: false,
16745 no_parens: true,
16746 quoted: false,
16747 })))
16748 }
16749 DialectType::MySQL
16750 | DialectType::Doris
16751 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
16752 Function::new("SCHEMA".to_string(), vec![]),
16753 ))),
16754 DialectType::TSQL => Ok(Expression::Function(Box::new(
16755 Function::new("SCHEMA_NAME".to_string(), vec![]),
16756 ))),
16757 DialectType::SQLite => {
16758 Ok(Expression::Literal(Literal::String("main".to_string())))
16759 }
16760 _ => Ok(Expression::Function(f)),
16761 }
16762 }
16763 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16764 "LTRIM" if f.args.len() == 2 => match target {
16765 DialectType::Spark
16766 | DialectType::Hive
16767 | DialectType::Databricks
16768 | DialectType::ClickHouse => {
16769 let mut args = f.args;
16770 let str_expr = args.remove(0);
16771 let chars = args.remove(0);
16772 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16773 this: str_expr,
16774 characters: Some(chars),
16775 position: crate::expressions::TrimPosition::Leading,
16776 sql_standard_syntax: true,
16777 position_explicit: true,
16778 })))
16779 }
16780 _ => Ok(Expression::Function(f)),
16781 },
16782 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16783 "RTRIM" if f.args.len() == 2 => match target {
16784 DialectType::Spark
16785 | DialectType::Hive
16786 | DialectType::Databricks
16787 | DialectType::ClickHouse => {
16788 let mut args = f.args;
16789 let str_expr = args.remove(0);
16790 let chars = args.remove(0);
16791 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16792 this: str_expr,
16793 characters: Some(chars),
16794 position: crate::expressions::TrimPosition::Trailing,
16795 sql_standard_syntax: true,
16796 position_explicit: true,
16797 })))
16798 }
16799 _ => Ok(Expression::Function(f)),
16800 },
16801 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16802 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
16803 DialectType::ClickHouse => {
16804 let mut new_f = *f;
16805 new_f.name = "arrayReverse".to_string();
16806 Ok(Expression::Function(Box::new(new_f)))
16807 }
16808 _ => Ok(Expression::Function(f)),
16809 },
16810 // UUID() -> NEWID() for TSQL
16811 "UUID" if f.args.is_empty() => match target {
16812 DialectType::TSQL | DialectType::Fabric => {
16813 Ok(Expression::Function(Box::new(Function::new(
16814 "NEWID".to_string(),
16815 vec![],
16816 ))))
16817 }
16818 _ => Ok(Expression::Function(f)),
16819 },
16820 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
16821 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
16822 DialectType::ClickHouse => {
16823 let mut new_f = *f;
16824 new_f.name = "farmFingerprint64".to_string();
16825 Ok(Expression::Function(Box::new(new_f)))
16826 }
16827 DialectType::Redshift => {
16828 let mut new_f = *f;
16829 new_f.name = "FARMFINGERPRINT64".to_string();
16830 Ok(Expression::Function(Box::new(new_f)))
16831 }
16832 _ => Ok(Expression::Function(f)),
16833 },
16834 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
16835 "JSON_KEYS" => match target {
16836 DialectType::Databricks | DialectType::Spark => {
16837 let mut new_f = *f;
16838 new_f.name = "JSON_OBJECT_KEYS".to_string();
16839 Ok(Expression::Function(Box::new(new_f)))
16840 }
16841 DialectType::Snowflake => {
16842 let mut new_f = *f;
16843 new_f.name = "OBJECT_KEYS".to_string();
16844 Ok(Expression::Function(Box::new(new_f)))
16845 }
16846 _ => Ok(Expression::Function(f)),
16847 },
16848 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
16849 "WEEKOFYEAR" => match target {
16850 DialectType::Snowflake => {
16851 let mut new_f = *f;
16852 new_f.name = "WEEKISO".to_string();
16853 Ok(Expression::Function(Box::new(new_f)))
16854 }
16855 _ => Ok(Expression::Function(f)),
16856 },
16857 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
16858 "FORMAT"
16859 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
16860 {
16861 match target {
16862 DialectType::Databricks | DialectType::Spark => {
16863 let mut new_f = *f;
16864 new_f.name = "FORMAT_STRING".to_string();
16865 Ok(Expression::Function(Box::new(new_f)))
16866 }
16867 _ => Ok(Expression::Function(f)),
16868 }
16869 }
16870 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
16871 "CONCAT_WS" if f.args.len() >= 2 => match target {
16872 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16873 let mut args = f.args;
16874 let sep = args.remove(0);
16875 let cast_args: Vec<Expression> = args
16876 .into_iter()
16877 .map(|a| {
16878 Expression::Cast(Box::new(Cast {
16879 this: a,
16880 to: DataType::VarChar {
16881 length: None,
16882 parenthesized_length: false,
16883 },
16884 double_colon_syntax: false,
16885 trailing_comments: Vec::new(),
16886 format: None,
16887 default: None,
16888 }))
16889 })
16890 .collect();
16891 let mut new_args = vec![sep];
16892 new_args.extend(cast_args);
16893 Ok(Expression::Function(Box::new(Function::new(
16894 "CONCAT_WS".to_string(),
16895 new_args,
16896 ))))
16897 }
16898 _ => Ok(Expression::Function(f)),
16899 },
16900 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
16901 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
16902 DialectType::Presto
16903 | DialectType::Trino
16904 | DialectType::Athena
16905 | DialectType::Databricks
16906 | DialectType::Spark => {
16907 let mut new_f = *f;
16908 new_f.name = "SLICE".to_string();
16909 Ok(Expression::Function(Box::new(new_f)))
16910 }
16911 DialectType::ClickHouse => {
16912 let mut new_f = *f;
16913 new_f.name = "arraySlice".to_string();
16914 Ok(Expression::Function(Box::new(new_f)))
16915 }
16916 _ => Ok(Expression::Function(f)),
16917 },
16918 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
16919 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
16920 DialectType::DuckDB => {
16921 let mut args = f.args;
16922 let arr = args.remove(0);
16923 let val = args.remove(0);
16924 Ok(Expression::Function(Box::new(Function::new(
16925 "LIST_PREPEND".to_string(),
16926 vec![val, arr],
16927 ))))
16928 }
16929 _ => Ok(Expression::Function(f)),
16930 },
16931 // ARRAY_REMOVE(arr, target) -> dialect-specific
16932 "ARRAY_REMOVE" if f.args.len() == 2 => {
16933 match target {
16934 DialectType::DuckDB => {
16935 let mut args = f.args;
16936 let arr = args.remove(0);
16937 let target_val = args.remove(0);
16938 let u_id = crate::expressions::Identifier::new("_u");
16939 // LIST_FILTER(arr, _u -> _u <> target)
16940 let lambda = Expression::Lambda(Box::new(
16941 crate::expressions::LambdaExpr {
16942 parameters: vec![u_id.clone()],
16943 body: Expression::Neq(Box::new(BinaryOp {
16944 left: Expression::Identifier(u_id),
16945 right: target_val,
16946 left_comments: Vec::new(),
16947 operator_comments: Vec::new(),
16948 trailing_comments: Vec::new(),
16949 })),
16950 colon: false,
16951 parameter_types: Vec::new(),
16952 },
16953 ));
16954 Ok(Expression::Function(Box::new(Function::new(
16955 "LIST_FILTER".to_string(),
16956 vec![arr, lambda],
16957 ))))
16958 }
16959 DialectType::ClickHouse => {
16960 let mut args = f.args;
16961 let arr = args.remove(0);
16962 let target_val = args.remove(0);
16963 let u_id = crate::expressions::Identifier::new("_u");
16964 // arrayFilter(_u -> _u <> target, arr)
16965 let lambda = Expression::Lambda(Box::new(
16966 crate::expressions::LambdaExpr {
16967 parameters: vec![u_id.clone()],
16968 body: Expression::Neq(Box::new(BinaryOp {
16969 left: Expression::Identifier(u_id),
16970 right: target_val,
16971 left_comments: Vec::new(),
16972 operator_comments: Vec::new(),
16973 trailing_comments: Vec::new(),
16974 })),
16975 colon: false,
16976 parameter_types: Vec::new(),
16977 },
16978 ));
16979 Ok(Expression::Function(Box::new(Function::new(
16980 "arrayFilter".to_string(),
16981 vec![lambda, arr],
16982 ))))
16983 }
16984 DialectType::BigQuery => {
16985 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
16986 let mut args = f.args;
16987 let arr = args.remove(0);
16988 let target_val = args.remove(0);
16989 let u_id = crate::expressions::Identifier::new("_u");
16990 let u_col =
16991 Expression::Column(crate::expressions::Column {
16992 name: u_id.clone(),
16993 table: None,
16994 join_mark: false,
16995 trailing_comments: Vec::new(),
16996 });
16997 // UNNEST(the_array) AS _u
16998 let unnest_expr = Expression::Unnest(Box::new(
16999 crate::expressions::UnnestFunc {
17000 this: arr,
17001 expressions: Vec::new(),
17002 with_ordinality: false,
17003 alias: None,
17004 offset_alias: None,
17005 },
17006 ));
17007 let aliased_unnest = Expression::Alias(Box::new(
17008 crate::expressions::Alias {
17009 this: unnest_expr,
17010 alias: u_id.clone(),
17011 column_aliases: Vec::new(),
17012 pre_alias_comments: Vec::new(),
17013 trailing_comments: Vec::new(),
17014 },
17015 ));
17016 // _u <> target
17017 let where_cond = Expression::Neq(Box::new(BinaryOp {
17018 left: u_col.clone(),
17019 right: target_val,
17020 left_comments: Vec::new(),
17021 operator_comments: Vec::new(),
17022 trailing_comments: Vec::new(),
17023 }));
17024 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
17025 let subquery = Expression::Select(Box::new(
17026 crate::expressions::Select::new()
17027 .column(u_col)
17028 .from(aliased_unnest)
17029 .where_(where_cond),
17030 ));
17031 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
17032 Ok(Expression::ArrayFunc(Box::new(
17033 crate::expressions::ArrayConstructor {
17034 expressions: vec![subquery],
17035 bracket_notation: false,
17036 use_list_keyword: false,
17037 },
17038 )))
17039 }
17040 _ => Ok(Expression::Function(f)),
17041 }
17042 }
17043 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17044 "PARSE_JSON" if f.args.len() == 1 => {
17045 match target {
17046 DialectType::SQLite
17047 | DialectType::Doris
17048 | DialectType::MySQL
17049 | DialectType::StarRocks => {
17050 // Strip PARSE_JSON, return the inner argument
17051 Ok(f.args.into_iter().next().unwrap())
17052 }
17053 _ => Ok(Expression::Function(f)),
17054 }
17055 }
17056 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17057 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17058 "JSON_REMOVE" => Ok(Expression::Function(f)),
17059 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17060 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17061 "JSON_SET" => Ok(Expression::Function(f)),
17062 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17063 // Behavior per search value type:
17064 // NULL literal -> CASE WHEN x IS NULL THEN result
17065 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17066 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17067 "DECODE" if f.args.len() >= 3 => {
17068 // Keep as DECODE for targets that support it natively
17069 let keep_as_decode = matches!(
17070 target,
17071 DialectType::Oracle
17072 | DialectType::Snowflake
17073 | DialectType::Redshift
17074 | DialectType::Teradata
17075 | DialectType::Spark
17076 | DialectType::Databricks
17077 );
17078 if keep_as_decode {
17079 return Ok(Expression::Function(f));
17080 }
17081
17082 let mut args = f.args;
17083 let this_expr = args.remove(0);
17084 let mut pairs = Vec::new();
17085 let mut default = None;
17086 let mut i = 0;
17087 while i + 1 < args.len() {
17088 pairs.push((args[i].clone(), args[i + 1].clone()));
17089 i += 2;
17090 }
17091 if i < args.len() {
17092 default = Some(args[i].clone());
17093 }
17094 // Helper: check if expression is a literal value
17095 fn is_literal(e: &Expression) -> bool {
17096 matches!(
17097 e,
17098 Expression::Literal(_)
17099 | Expression::Boolean(_)
17100 | Expression::Neg(_)
17101 )
17102 }
17103 let whens: Vec<(Expression, Expression)> = pairs
17104 .into_iter()
17105 .map(|(search, result)| {
17106 if matches!(&search, Expression::Null(_)) {
17107 // NULL search -> IS NULL
17108 let condition = Expression::Is(Box::new(BinaryOp {
17109 left: this_expr.clone(),
17110 right: Expression::Null(crate::expressions::Null),
17111 left_comments: Vec::new(),
17112 operator_comments: Vec::new(),
17113 trailing_comments: Vec::new(),
17114 }));
17115 (condition, result)
17116 } else if is_literal(&search) {
17117 // Literal search -> simple equality
17118 let eq = Expression::Eq(Box::new(BinaryOp {
17119 left: this_expr.clone(),
17120 right: search,
17121 left_comments: Vec::new(),
17122 operator_comments: Vec::new(),
17123 trailing_comments: Vec::new(),
17124 }));
17125 (eq, result)
17126 } else {
17127 // Non-literal (column ref, expression) -> null-safe comparison
17128 let needs_paren = matches!(
17129 &search,
17130 Expression::Eq(_)
17131 | Expression::Neq(_)
17132 | Expression::Gt(_)
17133 | Expression::Gte(_)
17134 | Expression::Lt(_)
17135 | Expression::Lte(_)
17136 );
17137 let search_for_eq = if needs_paren {
17138 Expression::Paren(Box::new(
17139 crate::expressions::Paren {
17140 this: search.clone(),
17141 trailing_comments: Vec::new(),
17142 },
17143 ))
17144 } else {
17145 search.clone()
17146 };
17147 let eq = Expression::Eq(Box::new(BinaryOp {
17148 left: this_expr.clone(),
17149 right: search_for_eq,
17150 left_comments: Vec::new(),
17151 operator_comments: Vec::new(),
17152 trailing_comments: Vec::new(),
17153 }));
17154 let search_for_null = if needs_paren {
17155 Expression::Paren(Box::new(
17156 crate::expressions::Paren {
17157 this: search.clone(),
17158 trailing_comments: Vec::new(),
17159 },
17160 ))
17161 } else {
17162 search.clone()
17163 };
17164 let x_is_null = Expression::Is(Box::new(BinaryOp {
17165 left: this_expr.clone(),
17166 right: Expression::Null(crate::expressions::Null),
17167 left_comments: Vec::new(),
17168 operator_comments: Vec::new(),
17169 trailing_comments: Vec::new(),
17170 }));
17171 let s_is_null = Expression::Is(Box::new(BinaryOp {
17172 left: search_for_null,
17173 right: Expression::Null(crate::expressions::Null),
17174 left_comments: Vec::new(),
17175 operator_comments: Vec::new(),
17176 trailing_comments: Vec::new(),
17177 }));
17178 let both_null = Expression::And(Box::new(BinaryOp {
17179 left: x_is_null,
17180 right: s_is_null,
17181 left_comments: Vec::new(),
17182 operator_comments: Vec::new(),
17183 trailing_comments: Vec::new(),
17184 }));
17185 let condition = Expression::Or(Box::new(BinaryOp {
17186 left: eq,
17187 right: Expression::Paren(Box::new(
17188 crate::expressions::Paren {
17189 this: both_null,
17190 trailing_comments: Vec::new(),
17191 },
17192 )),
17193 left_comments: Vec::new(),
17194 operator_comments: Vec::new(),
17195 trailing_comments: Vec::new(),
17196 }));
17197 (condition, result)
17198 }
17199 })
17200 .collect();
17201 Ok(Expression::Case(Box::new(Case {
17202 operand: None,
17203 whens,
17204 else_: default,
17205 comments: Vec::new(),
17206 })))
17207 }
17208 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17209 "LEVENSHTEIN" => {
17210 match target {
17211 DialectType::BigQuery => {
17212 let mut new_f = *f;
17213 new_f.name = "EDIT_DISTANCE".to_string();
17214 Ok(Expression::Function(Box::new(new_f)))
17215 }
17216 DialectType::Drill => {
17217 let mut new_f = *f;
17218 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17219 Ok(Expression::Function(Box::new(new_f)))
17220 }
17221 DialectType::PostgreSQL if f.args.len() == 6 => {
17222 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17223 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17224 let mut new_f = *f;
17225 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17226 Ok(Expression::Function(Box::new(new_f)))
17227 }
17228 _ => Ok(Expression::Function(f)),
17229 }
17230 }
17231 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17232 "ARRAY_REVERSE" => match target {
17233 DialectType::ClickHouse => {
17234 let mut new_f = *f;
17235 new_f.name = "arrayReverse".to_string();
17236 Ok(Expression::Function(Box::new(new_f)))
17237 }
17238 _ => Ok(Expression::Function(f)),
17239 },
17240 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17241 "GENERATE_DATE_ARRAY" => {
17242 let mut args = f.args;
17243 if matches!(target, DialectType::BigQuery) {
17244 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17245 if args.len() == 2 {
17246 let default_interval = Expression::Interval(Box::new(
17247 crate::expressions::Interval {
17248 this: Some(Expression::Literal(Literal::String(
17249 "1".to_string(),
17250 ))),
17251 unit: Some(
17252 crate::expressions::IntervalUnitSpec::Simple {
17253 unit: crate::expressions::IntervalUnit::Day,
17254 use_plural: false,
17255 },
17256 ),
17257 },
17258 ));
17259 args.push(default_interval);
17260 }
17261 Ok(Expression::Function(Box::new(Function::new(
17262 "GENERATE_DATE_ARRAY".to_string(),
17263 args,
17264 ))))
17265 } else if matches!(target, DialectType::DuckDB) {
17266 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17267 let start = args.get(0).cloned();
17268 let end = args.get(1).cloned();
17269 let step = args.get(2).cloned().or_else(|| {
17270 Some(Expression::Interval(Box::new(
17271 crate::expressions::Interval {
17272 this: Some(Expression::Literal(Literal::String(
17273 "1".to_string(),
17274 ))),
17275 unit: Some(
17276 crate::expressions::IntervalUnitSpec::Simple {
17277 unit: crate::expressions::IntervalUnit::Day,
17278 use_plural: false,
17279 },
17280 ),
17281 },
17282 )))
17283 });
17284 let gen_series = Expression::GenerateSeries(Box::new(
17285 crate::expressions::GenerateSeries {
17286 start: start.map(Box::new),
17287 end: end.map(Box::new),
17288 step: step.map(Box::new),
17289 is_end_exclusive: None,
17290 },
17291 ));
17292 Ok(Expression::Cast(Box::new(Cast {
17293 this: gen_series,
17294 to: DataType::Array {
17295 element_type: Box::new(DataType::Date),
17296 dimension: None,
17297 },
17298 trailing_comments: vec![],
17299 double_colon_syntax: false,
17300 format: None,
17301 default: None,
17302 })))
17303 } else if matches!(
17304 target,
17305 DialectType::Presto | DialectType::Trino | DialectType::Athena
17306 ) {
17307 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17308 let start = args.get(0).cloned();
17309 let end = args.get(1).cloned();
17310 let step = args.get(2).cloned().or_else(|| {
17311 Some(Expression::Interval(Box::new(
17312 crate::expressions::Interval {
17313 this: Some(Expression::Literal(Literal::String(
17314 "1".to_string(),
17315 ))),
17316 unit: Some(
17317 crate::expressions::IntervalUnitSpec::Simple {
17318 unit: crate::expressions::IntervalUnit::Day,
17319 use_plural: false,
17320 },
17321 ),
17322 },
17323 )))
17324 });
17325 let gen_series = Expression::GenerateSeries(Box::new(
17326 crate::expressions::GenerateSeries {
17327 start: start.map(Box::new),
17328 end: end.map(Box::new),
17329 step: step.map(Box::new),
17330 is_end_exclusive: None,
17331 },
17332 ));
17333 Ok(gen_series)
17334 } else if matches!(
17335 target,
17336 DialectType::Spark | DialectType::Databricks
17337 ) {
17338 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17339 let start = args.get(0).cloned();
17340 let end = args.get(1).cloned();
17341 let step = args.get(2).cloned().or_else(|| {
17342 Some(Expression::Interval(Box::new(
17343 crate::expressions::Interval {
17344 this: Some(Expression::Literal(Literal::String(
17345 "1".to_string(),
17346 ))),
17347 unit: Some(
17348 crate::expressions::IntervalUnitSpec::Simple {
17349 unit: crate::expressions::IntervalUnit::Day,
17350 use_plural: false,
17351 },
17352 ),
17353 },
17354 )))
17355 });
17356 let gen_series = Expression::GenerateSeries(Box::new(
17357 crate::expressions::GenerateSeries {
17358 start: start.map(Box::new),
17359 end: end.map(Box::new),
17360 step: step.map(Box::new),
17361 is_end_exclusive: None,
17362 },
17363 ));
17364 Ok(gen_series)
17365 } else if matches!(target, DialectType::Snowflake) {
17366 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17367 if args.len() == 2 {
17368 let default_interval = Expression::Interval(Box::new(
17369 crate::expressions::Interval {
17370 this: Some(Expression::Literal(Literal::String(
17371 "1".to_string(),
17372 ))),
17373 unit: Some(
17374 crate::expressions::IntervalUnitSpec::Simple {
17375 unit: crate::expressions::IntervalUnit::Day,
17376 use_plural: false,
17377 },
17378 ),
17379 },
17380 ));
17381 args.push(default_interval);
17382 }
17383 Ok(Expression::Function(Box::new(Function::new(
17384 "GENERATE_DATE_ARRAY".to_string(),
17385 args,
17386 ))))
17387 } else if matches!(
17388 target,
17389 DialectType::MySQL
17390 | DialectType::TSQL
17391 | DialectType::Fabric
17392 | DialectType::Redshift
17393 ) {
17394 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17395 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17396 Ok(Expression::Function(Box::new(Function::new(
17397 "GENERATE_DATE_ARRAY".to_string(),
17398 args,
17399 ))))
17400 } else {
17401 // PostgreSQL/others: convert to GenerateSeries
17402 let start = args.get(0).cloned();
17403 let end = args.get(1).cloned();
17404 let step = args.get(2).cloned().or_else(|| {
17405 Some(Expression::Interval(Box::new(
17406 crate::expressions::Interval {
17407 this: Some(Expression::Literal(Literal::String(
17408 "1".to_string(),
17409 ))),
17410 unit: Some(
17411 crate::expressions::IntervalUnitSpec::Simple {
17412 unit: crate::expressions::IntervalUnit::Day,
17413 use_plural: false,
17414 },
17415 ),
17416 },
17417 )))
17418 });
17419 Ok(Expression::GenerateSeries(Box::new(
17420 crate::expressions::GenerateSeries {
17421 start: start.map(Box::new),
17422 end: end.map(Box::new),
17423 step: step.map(Box::new),
17424 is_end_exclusive: None,
17425 },
17426 )))
17427 }
17428 }
17429 _ => Ok(Expression::Function(f)),
17430 }
17431 } else if let Expression::AggregateFunction(mut af) = e {
17432 let name = af.name.to_uppercase();
17433 match name.as_str() {
17434 "ARBITRARY" if af.args.len() == 1 => {
17435 let arg = af.args.into_iter().next().unwrap();
17436 Ok(convert_arbitrary(arg, target))
17437 }
17438 "JSON_ARRAYAGG" => {
17439 match target {
17440 DialectType::PostgreSQL => {
17441 af.name = "JSON_AGG".to_string();
17442 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17443 for ordered in af.order_by.iter_mut() {
17444 if ordered.nulls_first.is_none() {
17445 ordered.nulls_first = Some(true);
17446 }
17447 }
17448 Ok(Expression::AggregateFunction(af))
17449 }
17450 _ => Ok(Expression::AggregateFunction(af)),
17451 }
17452 }
17453 _ => Ok(Expression::AggregateFunction(af)),
17454 }
17455 } else if let Expression::JSONArrayAgg(ja) = e {
17456 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17457 match target {
17458 DialectType::PostgreSQL => {
17459 let mut order_by = Vec::new();
17460 if let Some(order_expr) = ja.order {
17461 if let Expression::OrderBy(ob) = *order_expr {
17462 for mut ordered in ob.expressions {
17463 if ordered.nulls_first.is_none() {
17464 ordered.nulls_first = Some(true);
17465 }
17466 order_by.push(ordered);
17467 }
17468 }
17469 }
17470 Ok(Expression::AggregateFunction(Box::new(
17471 crate::expressions::AggregateFunction {
17472 name: "JSON_AGG".to_string(),
17473 args: vec![*ja.this],
17474 distinct: false,
17475 filter: None,
17476 order_by,
17477 limit: None,
17478 ignore_nulls: None,
17479 },
17480 )))
17481 }
17482 _ => Ok(Expression::JSONArrayAgg(ja)),
17483 }
17484 } else if let Expression::ToNumber(tn) = e {
17485 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17486 let arg = *tn.this;
17487 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17488 this: arg,
17489 to: crate::expressions::DataType::Double {
17490 precision: None,
17491 scale: None,
17492 },
17493 double_colon_syntax: false,
17494 trailing_comments: Vec::new(),
17495 format: None,
17496 default: None,
17497 })))
17498 } else {
17499 Ok(e)
17500 }
17501 }
17502
17503 Action::RegexpLikeToDuckDB => {
17504 if let Expression::RegexpLike(f) = e {
17505 let mut args = vec![f.this, f.pattern];
17506 if let Some(flags) = f.flags {
17507 args.push(flags);
17508 }
17509 Ok(Expression::Function(Box::new(Function::new(
17510 "REGEXP_MATCHES".to_string(),
17511 args,
17512 ))))
17513 } else {
17514 Ok(e)
17515 }
17516 }
17517 Action::EpochConvert => {
17518 if let Expression::Epoch(f) = e {
17519 let arg = f.this;
17520 let name = match target {
17521 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17522 "UNIX_TIMESTAMP"
17523 }
17524 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17525 DialectType::BigQuery => "TIME_TO_UNIX",
17526 _ => "EPOCH",
17527 };
17528 Ok(Expression::Function(Box::new(Function::new(
17529 name.to_string(),
17530 vec![arg],
17531 ))))
17532 } else {
17533 Ok(e)
17534 }
17535 }
17536 Action::EpochMsConvert => {
17537 use crate::expressions::{BinaryOp, Cast};
17538 if let Expression::EpochMs(f) = e {
17539 let arg = f.this;
17540 match target {
17541 DialectType::Spark | DialectType::Databricks => {
17542 Ok(Expression::Function(Box::new(Function::new(
17543 "TIMESTAMP_MILLIS".to_string(),
17544 vec![arg],
17545 ))))
17546 }
17547 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17548 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17549 ))),
17550 DialectType::Presto | DialectType::Trino => {
17551 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17552 let cast_arg = Expression::Cast(Box::new(Cast {
17553 this: arg,
17554 to: DataType::Double {
17555 precision: None,
17556 scale: None,
17557 },
17558 trailing_comments: Vec::new(),
17559 double_colon_syntax: false,
17560 format: None,
17561 default: None,
17562 }));
17563 let div = Expression::Div(Box::new(BinaryOp::new(
17564 cast_arg,
17565 Expression::Function(Box::new(Function::new(
17566 "POW".to_string(),
17567 vec![Expression::number(10), Expression::number(3)],
17568 ))),
17569 )));
17570 Ok(Expression::Function(Box::new(Function::new(
17571 "FROM_UNIXTIME".to_string(),
17572 vec![div],
17573 ))))
17574 }
17575 DialectType::MySQL => {
17576 // FROM_UNIXTIME(x / POWER(10, 3))
17577 let div = Expression::Div(Box::new(BinaryOp::new(
17578 arg,
17579 Expression::Function(Box::new(Function::new(
17580 "POWER".to_string(),
17581 vec![Expression::number(10), Expression::number(3)],
17582 ))),
17583 )));
17584 Ok(Expression::Function(Box::new(Function::new(
17585 "FROM_UNIXTIME".to_string(),
17586 vec![div],
17587 ))))
17588 }
17589 DialectType::PostgreSQL | DialectType::Redshift => {
17590 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17591 let cast_arg = Expression::Cast(Box::new(Cast {
17592 this: arg,
17593 to: DataType::Custom {
17594 name: "DOUBLE PRECISION".to_string(),
17595 },
17596 trailing_comments: Vec::new(),
17597 double_colon_syntax: false,
17598 format: None,
17599 default: None,
17600 }));
17601 let div = Expression::Div(Box::new(BinaryOp::new(
17602 cast_arg,
17603 Expression::Function(Box::new(Function::new(
17604 "POWER".to_string(),
17605 vec![Expression::number(10), Expression::number(3)],
17606 ))),
17607 )));
17608 Ok(Expression::Function(Box::new(Function::new(
17609 "TO_TIMESTAMP".to_string(),
17610 vec![div],
17611 ))))
17612 }
17613 DialectType::ClickHouse => {
17614 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17615 let cast_arg = Expression::Cast(Box::new(Cast {
17616 this: arg,
17617 to: DataType::Nullable {
17618 inner: Box::new(DataType::BigInt { length: None }),
17619 },
17620 trailing_comments: Vec::new(),
17621 double_colon_syntax: false,
17622 format: None,
17623 default: None,
17624 }));
17625 Ok(Expression::Function(Box::new(Function::new(
17626 "fromUnixTimestamp64Milli".to_string(),
17627 vec![cast_arg],
17628 ))))
17629 }
17630 _ => Ok(Expression::Function(Box::new(Function::new(
17631 "EPOCH_MS".to_string(),
17632 vec![arg],
17633 )))),
17634 }
17635 } else {
17636 Ok(e)
17637 }
17638 }
17639 Action::TSQLTypeNormalize => {
17640 if let Expression::DataType(dt) = e {
17641 let new_dt = match &dt {
17642 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17643 DataType::Decimal {
17644 precision: Some(15),
17645 scale: Some(4),
17646 }
17647 }
17648 DataType::Custom { name }
17649 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17650 {
17651 DataType::Decimal {
17652 precision: Some(6),
17653 scale: Some(4),
17654 }
17655 }
17656 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17657 DataType::Timestamp {
17658 timezone: false,
17659 precision: None,
17660 }
17661 }
17662 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17663 DataType::Float {
17664 precision: None,
17665 scale: None,
17666 real_spelling: false,
17667 }
17668 }
17669 DataType::Float {
17670 real_spelling: true,
17671 ..
17672 } => DataType::Float {
17673 precision: None,
17674 scale: None,
17675 real_spelling: false,
17676 },
17677 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17678 DataType::Custom {
17679 name: "BLOB".to_string(),
17680 }
17681 }
17682 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17683 DataType::Boolean
17684 }
17685 DataType::Custom { name }
17686 if name.eq_ignore_ascii_case("ROWVERSION") =>
17687 {
17688 DataType::Custom {
17689 name: "BINARY".to_string(),
17690 }
17691 }
17692 DataType::Custom { name }
17693 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17694 {
17695 match target {
17696 DialectType::Spark
17697 | DialectType::Databricks
17698 | DialectType::Hive => DataType::Custom {
17699 name: "STRING".to_string(),
17700 },
17701 _ => DataType::VarChar {
17702 length: Some(36),
17703 parenthesized_length: true,
17704 },
17705 }
17706 }
17707 DataType::Custom { name }
17708 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17709 {
17710 match target {
17711 DialectType::Spark
17712 | DialectType::Databricks
17713 | DialectType::Hive => DataType::Timestamp {
17714 timezone: false,
17715 precision: None,
17716 },
17717 _ => DataType::Timestamp {
17718 timezone: true,
17719 precision: None,
17720 },
17721 }
17722 }
17723 DataType::Custom { ref name }
17724 if name.to_uppercase().starts_with("DATETIME2(") =>
17725 {
17726 // DATETIME2(n) -> TIMESTAMP
17727 DataType::Timestamp {
17728 timezone: false,
17729 precision: None,
17730 }
17731 }
17732 DataType::Custom { ref name }
17733 if name.to_uppercase().starts_with("TIME(") =>
17734 {
17735 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
17736 match target {
17737 DialectType::Spark
17738 | DialectType::Databricks
17739 | DialectType::Hive => DataType::Timestamp {
17740 timezone: false,
17741 precision: None,
17742 },
17743 _ => return Ok(Expression::DataType(dt)),
17744 }
17745 }
17746 DataType::Custom { ref name }
17747 if name.to_uppercase().starts_with("NUMERIC") =>
17748 {
17749 // Parse NUMERIC(p,s) back to Decimal(p,s)
17750 let upper = name.to_uppercase();
17751 if let Some(inner) = upper
17752 .strip_prefix("NUMERIC(")
17753 .and_then(|s| s.strip_suffix(')'))
17754 {
17755 let parts: Vec<&str> = inner.split(',').collect();
17756 let precision =
17757 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
17758 let scale =
17759 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
17760 DataType::Decimal { precision, scale }
17761 } else if upper == "NUMERIC" {
17762 DataType::Decimal {
17763 precision: None,
17764 scale: None,
17765 }
17766 } else {
17767 return Ok(Expression::DataType(dt));
17768 }
17769 }
17770 DataType::Float {
17771 precision: Some(p), ..
17772 } => {
17773 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
17774 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
17775 let boundary = match target {
17776 DialectType::Hive
17777 | DialectType::Spark
17778 | DialectType::Databricks => 32,
17779 _ => 24,
17780 };
17781 if *p <= boundary {
17782 DataType::Float {
17783 precision: None,
17784 scale: None,
17785 real_spelling: false,
17786 }
17787 } else {
17788 DataType::Double {
17789 precision: None,
17790 scale: None,
17791 }
17792 }
17793 }
17794 DataType::TinyInt { .. } => match target {
17795 DialectType::DuckDB => DataType::Custom {
17796 name: "UTINYINT".to_string(),
17797 },
17798 DialectType::Hive
17799 | DialectType::Spark
17800 | DialectType::Databricks => DataType::SmallInt { length: None },
17801 _ => return Ok(Expression::DataType(dt)),
17802 },
17803 // INTEGER -> INT for Spark/Databricks
17804 DataType::Int {
17805 length,
17806 integer_spelling: true,
17807 } => DataType::Int {
17808 length: *length,
17809 integer_spelling: false,
17810 },
17811 _ => return Ok(Expression::DataType(dt)),
17812 };
17813 Ok(Expression::DataType(new_dt))
17814 } else {
17815 Ok(e)
17816 }
17817 }
17818 Action::MySQLSafeDivide => {
17819 use crate::expressions::{BinaryOp, Cast};
17820 if let Expression::Div(op) = e {
17821 let left = op.left;
17822 let right = op.right;
17823 // For SQLite: CAST left as REAL but NO NULLIF wrapping
17824 if matches!(target, DialectType::SQLite) {
17825 let new_left = Expression::Cast(Box::new(Cast {
17826 this: left,
17827 to: DataType::Float {
17828 precision: None,
17829 scale: None,
17830 real_spelling: true,
17831 },
17832 trailing_comments: Vec::new(),
17833 double_colon_syntax: false,
17834 format: None,
17835 default: None,
17836 }));
17837 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
17838 }
17839 // Wrap right in NULLIF(right, 0)
17840 let nullif_right = Expression::Function(Box::new(Function::new(
17841 "NULLIF".to_string(),
17842 vec![right, Expression::number(0)],
17843 )));
17844 // For some dialects, also CAST the left side
17845 let new_left = match target {
17846 DialectType::PostgreSQL
17847 | DialectType::Redshift
17848 | DialectType::Teradata
17849 | DialectType::Materialize
17850 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
17851 this: left,
17852 to: DataType::Custom {
17853 name: "DOUBLE PRECISION".to_string(),
17854 },
17855 trailing_comments: Vec::new(),
17856 double_colon_syntax: false,
17857 format: None,
17858 default: None,
17859 })),
17860 DialectType::Drill
17861 | DialectType::Trino
17862 | DialectType::Presto
17863 | DialectType::Athena => Expression::Cast(Box::new(Cast {
17864 this: left,
17865 to: DataType::Double {
17866 precision: None,
17867 scale: None,
17868 },
17869 trailing_comments: Vec::new(),
17870 double_colon_syntax: false,
17871 format: None,
17872 default: None,
17873 })),
17874 DialectType::TSQL => Expression::Cast(Box::new(Cast {
17875 this: left,
17876 to: DataType::Float {
17877 precision: None,
17878 scale: None,
17879 real_spelling: false,
17880 },
17881 trailing_comments: Vec::new(),
17882 double_colon_syntax: false,
17883 format: None,
17884 default: None,
17885 })),
17886 _ => left,
17887 };
17888 Ok(Expression::Div(Box::new(BinaryOp::new(
17889 new_left,
17890 nullif_right,
17891 ))))
17892 } else {
17893 Ok(e)
17894 }
17895 }
17896 Action::AlterTableRenameStripSchema => {
17897 if let Expression::AlterTable(mut at) = e {
17898 if let Some(crate::expressions::AlterTableAction::RenameTable(
17899 ref mut new_tbl,
17900 )) = at.actions.first_mut()
17901 {
17902 new_tbl.schema = None;
17903 new_tbl.catalog = None;
17904 }
17905 Ok(Expression::AlterTable(at))
17906 } else {
17907 Ok(e)
17908 }
17909 }
17910 Action::NullsOrdering => {
17911 // Fill in the source dialect's implied null ordering default.
17912 // This makes implicit null ordering explicit so the target generator
17913 // can correctly strip or keep it.
17914 //
17915 // Dialect null ordering categories:
17916 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
17917 // ASC -> NULLS LAST, DESC -> NULLS FIRST
17918 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
17919 // ASC -> NULLS FIRST, DESC -> NULLS LAST
17920 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
17921 // NULLS LAST always (both ASC and DESC)
17922 if let Expression::Ordered(mut o) = e {
17923 let is_asc = !o.desc;
17924
17925 let is_source_nulls_large = matches!(
17926 source,
17927 DialectType::Oracle
17928 | DialectType::PostgreSQL
17929 | DialectType::Redshift
17930 | DialectType::Snowflake
17931 );
17932 let is_source_nulls_last = matches!(
17933 source,
17934 DialectType::DuckDB
17935 | DialectType::Presto
17936 | DialectType::Trino
17937 | DialectType::Dremio
17938 | DialectType::Athena
17939 | DialectType::ClickHouse
17940 | DialectType::Drill
17941 | DialectType::Exasol
17942 | DialectType::DataFusion
17943 );
17944
17945 // Determine target category to check if default matches
17946 let is_target_nulls_large = matches!(
17947 target,
17948 DialectType::Oracle
17949 | DialectType::PostgreSQL
17950 | DialectType::Redshift
17951 | DialectType::Snowflake
17952 );
17953 let is_target_nulls_last = matches!(
17954 target,
17955 DialectType::DuckDB
17956 | DialectType::Presto
17957 | DialectType::Trino
17958 | DialectType::Dremio
17959 | DialectType::Athena
17960 | DialectType::ClickHouse
17961 | DialectType::Drill
17962 | DialectType::Exasol
17963 | DialectType::DataFusion
17964 );
17965
17966 // Compute the implied nulls_first for source
17967 let source_nulls_first = if is_source_nulls_large {
17968 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
17969 } else if is_source_nulls_last {
17970 false // NULLS LAST always
17971 } else {
17972 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
17973 };
17974
17975 // Compute the target's default
17976 let target_nulls_first = if is_target_nulls_large {
17977 !is_asc
17978 } else if is_target_nulls_last {
17979 false
17980 } else {
17981 is_asc
17982 };
17983
17984 // Only add explicit nulls ordering if source and target defaults differ
17985 if source_nulls_first != target_nulls_first {
17986 o.nulls_first = Some(source_nulls_first);
17987 }
17988 // If they match, leave nulls_first as None so the generator won't output it
17989
17990 Ok(Expression::Ordered(o))
17991 } else {
17992 Ok(e)
17993 }
17994 }
17995 Action::StringAggConvert => {
17996 match e {
17997 Expression::WithinGroup(wg) => {
17998 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
17999 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
18000 let (x_opt, sep_opt, distinct) = match wg.this {
18001 Expression::AggregateFunction(ref af)
18002 if af.name.eq_ignore_ascii_case("STRING_AGG")
18003 && af.args.len() >= 2 =>
18004 {
18005 (
18006 Some(af.args[0].clone()),
18007 Some(af.args[1].clone()),
18008 af.distinct,
18009 )
18010 }
18011 Expression::Function(ref f)
18012 if f.name.eq_ignore_ascii_case("STRING_AGG")
18013 && f.args.len() >= 2 =>
18014 {
18015 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
18016 }
18017 Expression::StringAgg(ref sa) => {
18018 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
18019 }
18020 _ => (None, None, false),
18021 };
18022 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
18023 let order_by = wg.order_by;
18024
18025 match target {
18026 DialectType::TSQL | DialectType::Fabric => {
18027 // Keep as WithinGroup(StringAgg) for TSQL
18028 Ok(Expression::WithinGroup(Box::new(
18029 crate::expressions::WithinGroup {
18030 this: Expression::StringAgg(Box::new(
18031 crate::expressions::StringAggFunc {
18032 this: x,
18033 separator: Some(sep),
18034 order_by: None, // order_by goes in WithinGroup, not StringAgg
18035 distinct,
18036 filter: None,
18037 limit: None,
18038 },
18039 )),
18040 order_by,
18041 },
18042 )))
18043 }
18044 DialectType::MySQL
18045 | DialectType::SingleStore
18046 | DialectType::Doris
18047 | DialectType::StarRocks => {
18048 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18049 Ok(Expression::GroupConcat(Box::new(
18050 crate::expressions::GroupConcatFunc {
18051 this: x,
18052 separator: Some(sep),
18053 order_by: Some(order_by),
18054 distinct,
18055 filter: None,
18056 },
18057 )))
18058 }
18059 DialectType::SQLite => {
18060 // GROUP_CONCAT(x, sep) - no ORDER BY support
18061 Ok(Expression::GroupConcat(Box::new(
18062 crate::expressions::GroupConcatFunc {
18063 this: x,
18064 separator: Some(sep),
18065 order_by: None,
18066 distinct,
18067 filter: None,
18068 },
18069 )))
18070 }
18071 DialectType::PostgreSQL | DialectType::Redshift => {
18072 // STRING_AGG(x, sep ORDER BY z)
18073 Ok(Expression::StringAgg(Box::new(
18074 crate::expressions::StringAggFunc {
18075 this: x,
18076 separator: Some(sep),
18077 order_by: Some(order_by),
18078 distinct,
18079 filter: None,
18080 limit: None,
18081 },
18082 )))
18083 }
18084 _ => {
18085 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18086 Ok(Expression::StringAgg(Box::new(
18087 crate::expressions::StringAggFunc {
18088 this: x,
18089 separator: Some(sep),
18090 order_by: Some(order_by),
18091 distinct,
18092 filter: None,
18093 limit: None,
18094 },
18095 )))
18096 }
18097 }
18098 } else {
18099 Ok(Expression::WithinGroup(wg))
18100 }
18101 }
18102 Expression::StringAgg(sa) => {
18103 match target {
18104 DialectType::MySQL
18105 | DialectType::SingleStore
18106 | DialectType::Doris
18107 | DialectType::StarRocks => {
18108 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18109 Ok(Expression::GroupConcat(Box::new(
18110 crate::expressions::GroupConcatFunc {
18111 this: sa.this,
18112 separator: sa.separator,
18113 order_by: sa.order_by,
18114 distinct: sa.distinct,
18115 filter: sa.filter,
18116 },
18117 )))
18118 }
18119 DialectType::SQLite => {
18120 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18121 Ok(Expression::GroupConcat(Box::new(
18122 crate::expressions::GroupConcatFunc {
18123 this: sa.this,
18124 separator: sa.separator,
18125 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18126 distinct: sa.distinct,
18127 filter: sa.filter,
18128 },
18129 )))
18130 }
18131 DialectType::Spark | DialectType::Databricks => {
18132 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18133 Ok(Expression::ListAgg(Box::new(
18134 crate::expressions::ListAggFunc {
18135 this: sa.this,
18136 separator: sa.separator,
18137 on_overflow: None,
18138 order_by: sa.order_by,
18139 distinct: sa.distinct,
18140 filter: None,
18141 },
18142 )))
18143 }
18144 _ => Ok(Expression::StringAgg(sa)),
18145 }
18146 }
18147 _ => Ok(e),
18148 }
18149 }
18150 Action::GroupConcatConvert => {
18151 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18152 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18153 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18154 if let Expression::Function(ref f) = expr {
18155 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18156 let mut result = f.args[0].clone();
18157 for arg in &f.args[1..] {
18158 result = Expression::Concat(Box::new(BinaryOp {
18159 left: result,
18160 right: arg.clone(),
18161 left_comments: vec![],
18162 operator_comments: vec![],
18163 trailing_comments: vec![],
18164 }));
18165 }
18166 return result;
18167 }
18168 }
18169 expr
18170 }
18171 fn expand_concat_to_plus(expr: Expression) -> Expression {
18172 if let Expression::Function(ref f) = expr {
18173 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18174 let mut result = f.args[0].clone();
18175 for arg in &f.args[1..] {
18176 result = Expression::Add(Box::new(BinaryOp {
18177 left: result,
18178 right: arg.clone(),
18179 left_comments: vec![],
18180 operator_comments: vec![],
18181 trailing_comments: vec![],
18182 }));
18183 }
18184 return result;
18185 }
18186 }
18187 expr
18188 }
18189 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18190 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18191 if let Expression::Function(ref f) = expr {
18192 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18193 let new_args: Vec<Expression> = f
18194 .args
18195 .iter()
18196 .map(|arg| {
18197 Expression::Cast(Box::new(crate::expressions::Cast {
18198 this: arg.clone(),
18199 to: crate::expressions::DataType::VarChar {
18200 length: None,
18201 parenthesized_length: false,
18202 },
18203 trailing_comments: Vec::new(),
18204 double_colon_syntax: false,
18205 format: None,
18206 default: None,
18207 }))
18208 })
18209 .collect();
18210 return Expression::Function(Box::new(
18211 crate::expressions::Function::new(
18212 "CONCAT".to_string(),
18213 new_args,
18214 ),
18215 ));
18216 }
18217 }
18218 expr
18219 }
18220 if let Expression::GroupConcat(gc) = e {
18221 match target {
18222 DialectType::Presto => {
18223 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18224 let sep = gc.separator.unwrap_or(Expression::string(","));
18225 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18226 let this = wrap_concat_args_in_varchar_cast(gc.this);
18227 let array_agg =
18228 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18229 this,
18230 distinct: gc.distinct,
18231 filter: gc.filter,
18232 order_by: gc.order_by.unwrap_or_default(),
18233 name: None,
18234 ignore_nulls: None,
18235 having_max: None,
18236 limit: None,
18237 }));
18238 Ok(Expression::ArrayJoin(Box::new(
18239 crate::expressions::ArrayJoinFunc {
18240 this: array_agg,
18241 separator: sep,
18242 null_replacement: None,
18243 },
18244 )))
18245 }
18246 DialectType::Trino => {
18247 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18248 let sep = gc.separator.unwrap_or(Expression::string(","));
18249 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18250 let this = wrap_concat_args_in_varchar_cast(gc.this);
18251 Ok(Expression::ListAgg(Box::new(
18252 crate::expressions::ListAggFunc {
18253 this,
18254 separator: Some(sep),
18255 on_overflow: None,
18256 order_by: gc.order_by,
18257 distinct: gc.distinct,
18258 filter: gc.filter,
18259 },
18260 )))
18261 }
18262 DialectType::PostgreSQL
18263 | DialectType::Redshift
18264 | DialectType::Snowflake
18265 | DialectType::DuckDB
18266 | DialectType::Hive
18267 | DialectType::ClickHouse => {
18268 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18269 let sep = gc.separator.unwrap_or(Expression::string(","));
18270 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18271 let this = expand_concat_to_dpipe(gc.this);
18272 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18273 let order_by = if target == DialectType::PostgreSQL {
18274 gc.order_by.map(|ords| {
18275 ords.into_iter()
18276 .map(|mut o| {
18277 if o.nulls_first.is_none() {
18278 if o.desc {
18279 o.nulls_first = Some(false);
18280 // NULLS LAST
18281 } else {
18282 o.nulls_first = Some(true);
18283 // NULLS FIRST
18284 }
18285 }
18286 o
18287 })
18288 .collect()
18289 })
18290 } else {
18291 gc.order_by
18292 };
18293 Ok(Expression::StringAgg(Box::new(
18294 crate::expressions::StringAggFunc {
18295 this,
18296 separator: Some(sep),
18297 order_by,
18298 distinct: gc.distinct,
18299 filter: gc.filter,
18300 limit: None,
18301 },
18302 )))
18303 }
18304 DialectType::TSQL => {
18305 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18306 // TSQL doesn't support DISTINCT in STRING_AGG
18307 let sep = gc.separator.unwrap_or(Expression::string(","));
18308 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18309 let this = expand_concat_to_plus(gc.this);
18310 Ok(Expression::StringAgg(Box::new(
18311 crate::expressions::StringAggFunc {
18312 this,
18313 separator: Some(sep),
18314 order_by: gc.order_by,
18315 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18316 filter: gc.filter,
18317 limit: None,
18318 },
18319 )))
18320 }
18321 DialectType::SQLite => {
18322 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18323 // SQLite GROUP_CONCAT doesn't support ORDER BY
18324 // Expand CONCAT(a,b,c) -> a || b || c
18325 let this = expand_concat_to_dpipe(gc.this);
18326 Ok(Expression::GroupConcat(Box::new(
18327 crate::expressions::GroupConcatFunc {
18328 this,
18329 separator: gc.separator,
18330 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18331 distinct: gc.distinct,
18332 filter: gc.filter,
18333 },
18334 )))
18335 }
18336 DialectType::Spark | DialectType::Databricks => {
18337 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18338 let sep = gc.separator.unwrap_or(Expression::string(","));
18339 Ok(Expression::ListAgg(Box::new(
18340 crate::expressions::ListAggFunc {
18341 this: gc.this,
18342 separator: Some(sep),
18343 on_overflow: None,
18344 order_by: gc.order_by,
18345 distinct: gc.distinct,
18346 filter: None,
18347 },
18348 )))
18349 }
18350 DialectType::MySQL
18351 | DialectType::SingleStore
18352 | DialectType::StarRocks => {
18353 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18354 if gc.separator.is_none() {
18355 let mut gc = gc;
18356 gc.separator = Some(Expression::string(","));
18357 Ok(Expression::GroupConcat(gc))
18358 } else {
18359 Ok(Expression::GroupConcat(gc))
18360 }
18361 }
18362 _ => Ok(Expression::GroupConcat(gc)),
18363 }
18364 } else {
18365 Ok(e)
18366 }
18367 }
18368 Action::TempTableHash => {
18369 match e {
18370 Expression::CreateTable(mut ct) => {
18371 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18372 let name = &ct.name.name.name;
18373 if name.starts_with('#') {
18374 ct.name.name.name = name.trim_start_matches('#').to_string();
18375 }
18376 // Set temporary flag
18377 ct.temporary = true;
18378 Ok(Expression::CreateTable(ct))
18379 }
18380 Expression::Table(mut tr) => {
18381 // Strip # from table references
18382 let name = &tr.name.name;
18383 if name.starts_with('#') {
18384 tr.name.name = name.trim_start_matches('#').to_string();
18385 }
18386 Ok(Expression::Table(tr))
18387 }
18388 Expression::DropTable(mut dt) => {
18389 // Strip # from DROP TABLE names
18390 for table_ref in &mut dt.names {
18391 if table_ref.name.name.starts_with('#') {
18392 table_ref.name.name =
18393 table_ref.name.name.trim_start_matches('#').to_string();
18394 }
18395 }
18396 Ok(Expression::DropTable(dt))
18397 }
18398 _ => Ok(e),
18399 }
18400 }
18401 Action::NvlClearOriginal => {
18402 if let Expression::Nvl(mut f) = e {
18403 f.original_name = None;
18404 Ok(Expression::Nvl(f))
18405 } else {
18406 Ok(e)
18407 }
18408 }
18409 Action::HiveCastToTryCast => {
18410 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18411 if let Expression::Cast(mut c) = e {
18412 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18413 // (Spark's TIMESTAMP is always timezone-aware)
18414 if matches!(target, DialectType::DuckDB)
18415 && matches!(source, DialectType::Spark | DialectType::Databricks)
18416 && matches!(
18417 c.to,
18418 DataType::Timestamp {
18419 timezone: false,
18420 ..
18421 }
18422 )
18423 {
18424 c.to = DataType::Custom {
18425 name: "TIMESTAMPTZ".to_string(),
18426 };
18427 }
18428 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18429 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18430 if matches!(target, DialectType::Databricks | DialectType::Spark)
18431 && matches!(
18432 source,
18433 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18434 )
18435 && Self::has_varchar_char_type(&c.to)
18436 {
18437 c.to = Self::normalize_varchar_to_string(c.to);
18438 }
18439 Ok(Expression::TryCast(c))
18440 } else {
18441 Ok(e)
18442 }
18443 }
18444 Action::XorExpand => {
18445 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18446 // Snowflake: use BOOLXOR(a, b) instead
18447 if let Expression::Xor(xor) = e {
18448 // Collect all XOR operands
18449 let mut operands = Vec::new();
18450 if let Some(this) = xor.this {
18451 operands.push(*this);
18452 }
18453 if let Some(expr) = xor.expression {
18454 operands.push(*expr);
18455 }
18456 operands.extend(xor.expressions);
18457
18458 // Snowflake: use BOOLXOR(a, b)
18459 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18460 let a = operands.remove(0);
18461 let b = operands.remove(0);
18462 return Ok(Expression::Function(Box::new(Function::new(
18463 "BOOLXOR".to_string(),
18464 vec![a, b],
18465 ))));
18466 }
18467
18468 // Helper to build (a AND NOT b) OR (NOT a AND b)
18469 let make_xor = |a: Expression, b: Expression| -> Expression {
18470 let not_b = Expression::Not(Box::new(
18471 crate::expressions::UnaryOp::new(b.clone()),
18472 ));
18473 let not_a = Expression::Not(Box::new(
18474 crate::expressions::UnaryOp::new(a.clone()),
18475 ));
18476 let left_and = Expression::And(Box::new(BinaryOp {
18477 left: a,
18478 right: Expression::Paren(Box::new(Paren {
18479 this: not_b,
18480 trailing_comments: Vec::new(),
18481 })),
18482 left_comments: Vec::new(),
18483 operator_comments: Vec::new(),
18484 trailing_comments: Vec::new(),
18485 }));
18486 let right_and = Expression::And(Box::new(BinaryOp {
18487 left: Expression::Paren(Box::new(Paren {
18488 this: not_a,
18489 trailing_comments: Vec::new(),
18490 })),
18491 right: b,
18492 left_comments: Vec::new(),
18493 operator_comments: Vec::new(),
18494 trailing_comments: Vec::new(),
18495 }));
18496 Expression::Or(Box::new(BinaryOp {
18497 left: Expression::Paren(Box::new(Paren {
18498 this: left_and,
18499 trailing_comments: Vec::new(),
18500 })),
18501 right: Expression::Paren(Box::new(Paren {
18502 this: right_and,
18503 trailing_comments: Vec::new(),
18504 })),
18505 left_comments: Vec::new(),
18506 operator_comments: Vec::new(),
18507 trailing_comments: Vec::new(),
18508 }))
18509 };
18510
18511 if operands.len() >= 2 {
18512 let mut result = make_xor(operands.remove(0), operands.remove(0));
18513 for operand in operands {
18514 result = make_xor(result, operand);
18515 }
18516 Ok(result)
18517 } else if operands.len() == 1 {
18518 Ok(operands.remove(0))
18519 } else {
18520 // No operands - return FALSE (shouldn't happen)
18521 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18522 value: false,
18523 }))
18524 }
18525 } else {
18526 Ok(e)
18527 }
18528 }
18529 Action::DatePartUnquote => {
18530 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18531 // Convert the quoted string first arg to a bare Column/Identifier
18532 if let Expression::Function(mut f) = e {
18533 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18534 f.args.first()
18535 {
18536 let bare_name = s.to_lowercase();
18537 f.args[0] = Expression::Column(crate::expressions::Column {
18538 name: Identifier::new(bare_name),
18539 table: None,
18540 join_mark: false,
18541 trailing_comments: Vec::new(),
18542 });
18543 }
18544 Ok(Expression::Function(f))
18545 } else {
18546 Ok(e)
18547 }
18548 }
18549 Action::ArrayLengthConvert => {
18550 // Extract the argument from the expression
18551 let arg = match e {
18552 Expression::Cardinality(ref f) => f.this.clone(),
18553 Expression::ArrayLength(ref f) => f.this.clone(),
18554 Expression::ArraySize(ref f) => f.this.clone(),
18555 _ => return Ok(e),
18556 };
18557 match target {
18558 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18559 Ok(Expression::Function(Box::new(Function::new(
18560 "SIZE".to_string(),
18561 vec![arg],
18562 ))))
18563 }
18564 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18565 Ok(Expression::Cardinality(Box::new(
18566 crate::expressions::UnaryFunc::new(arg),
18567 )))
18568 }
18569 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18570 crate::expressions::UnaryFunc::new(arg),
18571 ))),
18572 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18573 crate::expressions::UnaryFunc::new(arg),
18574 ))),
18575 DialectType::PostgreSQL | DialectType::Redshift => {
18576 // PostgreSQL ARRAY_LENGTH requires dimension arg
18577 Ok(Expression::Function(Box::new(Function::new(
18578 "ARRAY_LENGTH".to_string(),
18579 vec![arg, Expression::number(1)],
18580 ))))
18581 }
18582 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18583 crate::expressions::UnaryFunc::new(arg),
18584 ))),
18585 _ => Ok(e), // Keep original
18586 }
18587 }
18588
18589 Action::JsonExtractToArrow => {
18590 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18591 if let Expression::JsonExtract(mut f) = e {
18592 f.arrow_syntax = true;
18593 // Transform path: convert bracket notation to dot notation
18594 // SQLite strips wildcards, DuckDB preserves them
18595 if let Expression::Literal(Literal::String(ref s)) = f.path {
18596 let mut transformed = s.clone();
18597 if matches!(target, DialectType::SQLite) {
18598 transformed = Self::strip_json_wildcards(&transformed);
18599 }
18600 transformed = Self::bracket_to_dot_notation(&transformed);
18601 if transformed != *s {
18602 f.path = Expression::string(&transformed);
18603 }
18604 }
18605 Ok(Expression::JsonExtract(f))
18606 } else {
18607 Ok(e)
18608 }
18609 }
18610
18611 Action::JsonExtractToGetJsonObject => {
18612 if let Expression::JsonExtract(f) = e {
18613 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18614 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18615 // Use proper decomposition that handles brackets
18616 let keys: Vec<Expression> =
18617 if let Expression::Literal(Literal::String(ref s)) = f.path {
18618 let parts = Self::decompose_json_path(s);
18619 parts.into_iter().map(|k| Expression::string(&k)).collect()
18620 } else {
18621 vec![f.path]
18622 };
18623 let func_name = if matches!(target, DialectType::Redshift) {
18624 "JSON_EXTRACT_PATH_TEXT"
18625 } else {
18626 "JSON_EXTRACT_PATH"
18627 };
18628 let mut args = vec![f.this];
18629 args.extend(keys);
18630 Ok(Expression::Function(Box::new(Function::new(
18631 func_name.to_string(),
18632 args,
18633 ))))
18634 } else {
18635 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18636 // Convert bracket double quotes to single quotes
18637 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18638 let normalized = Self::bracket_to_single_quotes(s);
18639 if normalized != *s {
18640 Expression::string(&normalized)
18641 } else {
18642 f.path
18643 }
18644 } else {
18645 f.path
18646 };
18647 Ok(Expression::Function(Box::new(Function::new(
18648 "GET_JSON_OBJECT".to_string(),
18649 vec![f.this, path],
18650 ))))
18651 }
18652 } else {
18653 Ok(e)
18654 }
18655 }
18656
18657 Action::JsonExtractScalarToGetJsonObject => {
18658 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18659 if let Expression::JsonExtractScalar(f) = e {
18660 Ok(Expression::Function(Box::new(Function::new(
18661 "GET_JSON_OBJECT".to_string(),
18662 vec![f.this, f.path],
18663 ))))
18664 } else {
18665 Ok(e)
18666 }
18667 }
18668
18669 Action::JsonExtractToTsql => {
18670 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18671 let (this, path) = match e {
18672 Expression::JsonExtract(f) => (f.this, f.path),
18673 Expression::JsonExtractScalar(f) => (f.this, f.path),
18674 _ => return Ok(e),
18675 };
18676 // Transform path: strip wildcards, convert bracket notation to dot notation
18677 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18678 {
18679 let stripped = Self::strip_json_wildcards(s);
18680 let dotted = Self::bracket_to_dot_notation(&stripped);
18681 Expression::string(&dotted)
18682 } else {
18683 path
18684 };
18685 let json_query = Expression::Function(Box::new(Function::new(
18686 "JSON_QUERY".to_string(),
18687 vec![this.clone(), transformed_path.clone()],
18688 )));
18689 let json_value = Expression::Function(Box::new(Function::new(
18690 "JSON_VALUE".to_string(),
18691 vec![this, transformed_path],
18692 )));
18693 Ok(Expression::Function(Box::new(Function::new(
18694 "ISNULL".to_string(),
18695 vec![json_query, json_value],
18696 ))))
18697 }
18698
18699 Action::JsonExtractToClickHouse => {
18700 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18701 let (this, path) = match e {
18702 Expression::JsonExtract(f) => (f.this, f.path),
18703 Expression::JsonExtractScalar(f) => (f.this, f.path),
18704 _ => return Ok(e),
18705 };
18706 let args: Vec<Expression> =
18707 if let Expression::Literal(Literal::String(ref s)) = path {
18708 let parts = Self::decompose_json_path(s);
18709 let mut result = vec![this];
18710 for part in parts {
18711 // ClickHouse uses 1-based integer indices for array access
18712 if let Ok(idx) = part.parse::<i64>() {
18713 result.push(Expression::number(idx + 1));
18714 } else {
18715 result.push(Expression::string(&part));
18716 }
18717 }
18718 result
18719 } else {
18720 vec![this, path]
18721 };
18722 Ok(Expression::Function(Box::new(Function::new(
18723 "JSONExtractString".to_string(),
18724 args,
18725 ))))
18726 }
18727
18728 Action::JsonExtractScalarConvert => {
18729 // JSON_EXTRACT_SCALAR -> target-specific
18730 if let Expression::JsonExtractScalar(f) = e {
18731 match target {
18732 DialectType::PostgreSQL | DialectType::Redshift => {
18733 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
18734 let keys: Vec<Expression> =
18735 if let Expression::Literal(Literal::String(ref s)) = f.path {
18736 let parts = Self::decompose_json_path(s);
18737 parts.into_iter().map(|k| Expression::string(&k)).collect()
18738 } else {
18739 vec![f.path]
18740 };
18741 let mut args = vec![f.this];
18742 args.extend(keys);
18743 Ok(Expression::Function(Box::new(Function::new(
18744 "JSON_EXTRACT_PATH_TEXT".to_string(),
18745 args,
18746 ))))
18747 }
18748 DialectType::Snowflake => {
18749 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
18750 let stripped_path =
18751 if let Expression::Literal(Literal::String(ref s)) = f.path {
18752 let stripped = Self::strip_json_dollar_prefix(s);
18753 Expression::string(&stripped)
18754 } else {
18755 f.path
18756 };
18757 Ok(Expression::Function(Box::new(Function::new(
18758 "JSON_EXTRACT_PATH_TEXT".to_string(),
18759 vec![f.this, stripped_path],
18760 ))))
18761 }
18762 DialectType::SQLite | DialectType::DuckDB => {
18763 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
18764 Ok(Expression::JsonExtractScalar(Box::new(
18765 crate::expressions::JsonExtractFunc {
18766 this: f.this,
18767 path: f.path,
18768 returning: f.returning,
18769 arrow_syntax: true,
18770 hash_arrow_syntax: false,
18771 wrapper_option: None,
18772 quotes_option: None,
18773 on_scalar_string: false,
18774 on_error: None,
18775 },
18776 )))
18777 }
18778 _ => Ok(Expression::JsonExtractScalar(f)),
18779 }
18780 } else {
18781 Ok(e)
18782 }
18783 }
18784
18785 Action::JsonPathNormalize => {
18786 // Normalize JSON path format for BigQuery, MySQL, etc.
18787 if let Expression::JsonExtract(mut f) = e {
18788 if let Expression::Literal(Literal::String(ref s)) = f.path {
18789 let mut normalized = s.clone();
18790 // Convert bracket notation and handle wildcards per dialect
18791 match target {
18792 DialectType::BigQuery => {
18793 // BigQuery strips wildcards and uses single quotes in brackets
18794 normalized = Self::strip_json_wildcards(&normalized);
18795 normalized = Self::bracket_to_single_quotes(&normalized);
18796 }
18797 DialectType::MySQL => {
18798 // MySQL preserves wildcards, converts brackets to dot notation
18799 normalized = Self::bracket_to_dot_notation(&normalized);
18800 }
18801 _ => {}
18802 }
18803 if normalized != *s {
18804 f.path = Expression::string(&normalized);
18805 }
18806 }
18807 Ok(Expression::JsonExtract(f))
18808 } else {
18809 Ok(e)
18810 }
18811 }
18812
18813 Action::JsonQueryValueConvert => {
18814 // JsonQuery/JsonValue -> target-specific
18815 let (f, is_query) = match e {
18816 Expression::JsonQuery(f) => (f, true),
18817 Expression::JsonValue(f) => (f, false),
18818 _ => return Ok(e),
18819 };
18820 match target {
18821 DialectType::TSQL | DialectType::Fabric => {
18822 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
18823 let json_query = Expression::Function(Box::new(Function::new(
18824 "JSON_QUERY".to_string(),
18825 vec![f.this.clone(), f.path.clone()],
18826 )));
18827 let json_value = Expression::Function(Box::new(Function::new(
18828 "JSON_VALUE".to_string(),
18829 vec![f.this, f.path],
18830 )));
18831 Ok(Expression::Function(Box::new(Function::new(
18832 "ISNULL".to_string(),
18833 vec![json_query, json_value],
18834 ))))
18835 }
18836 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18837 Ok(Expression::Function(Box::new(Function::new(
18838 "GET_JSON_OBJECT".to_string(),
18839 vec![f.this, f.path],
18840 ))))
18841 }
18842 DialectType::PostgreSQL | DialectType::Redshift => {
18843 Ok(Expression::Function(Box::new(Function::new(
18844 "JSON_EXTRACT_PATH_TEXT".to_string(),
18845 vec![f.this, f.path],
18846 ))))
18847 }
18848 DialectType::DuckDB | DialectType::SQLite => {
18849 // json -> path arrow syntax
18850 Ok(Expression::JsonExtract(Box::new(
18851 crate::expressions::JsonExtractFunc {
18852 this: f.this,
18853 path: f.path,
18854 returning: f.returning,
18855 arrow_syntax: true,
18856 hash_arrow_syntax: false,
18857 wrapper_option: f.wrapper_option,
18858 quotes_option: f.quotes_option,
18859 on_scalar_string: f.on_scalar_string,
18860 on_error: f.on_error,
18861 },
18862 )))
18863 }
18864 DialectType::Snowflake => {
18865 // GET_PATH(PARSE_JSON(json), 'path')
18866 // Strip $. prefix from path
18867 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
18868 let json_expr = match &f.this {
18869 Expression::Function(ref inner_f)
18870 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
18871 {
18872 f.this
18873 }
18874 Expression::ParseJson(_) => {
18875 // Already a ParseJson expression, which generates as PARSE_JSON(...)
18876 f.this
18877 }
18878 _ => Expression::Function(Box::new(Function::new(
18879 "PARSE_JSON".to_string(),
18880 vec![f.this],
18881 ))),
18882 };
18883 let path_str = match &f.path {
18884 Expression::Literal(Literal::String(s)) => {
18885 let stripped = s.strip_prefix("$.").unwrap_or(s);
18886 Expression::Literal(Literal::String(stripped.to_string()))
18887 }
18888 other => other.clone(),
18889 };
18890 Ok(Expression::Function(Box::new(Function::new(
18891 "GET_PATH".to_string(),
18892 vec![json_expr, path_str],
18893 ))))
18894 }
18895 _ => {
18896 // Default: keep as JSON_QUERY/JSON_VALUE function
18897 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
18898 Ok(Expression::Function(Box::new(Function::new(
18899 func_name.to_string(),
18900 vec![f.this, f.path],
18901 ))))
18902 }
18903 }
18904 }
18905
18906 Action::JsonLiteralToJsonParse => {
18907 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
18908 if let Expression::Cast(c) = e {
18909 let func_name = if matches!(target, DialectType::Snowflake) {
18910 "PARSE_JSON"
18911 } else {
18912 "JSON_PARSE"
18913 };
18914 Ok(Expression::Function(Box::new(Function::new(
18915 func_name.to_string(),
18916 vec![c.this],
18917 ))))
18918 } else {
18919 Ok(e)
18920 }
18921 }
18922
18923 Action::AtTimeZoneConvert => {
18924 // AT TIME ZONE -> target-specific conversion
18925 if let Expression::AtTimeZone(atz) = e {
18926 match target {
18927 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18928 Ok(Expression::Function(Box::new(Function::new(
18929 "AT_TIMEZONE".to_string(),
18930 vec![atz.this, atz.zone],
18931 ))))
18932 }
18933 DialectType::Spark | DialectType::Databricks => {
18934 Ok(Expression::Function(Box::new(Function::new(
18935 "FROM_UTC_TIMESTAMP".to_string(),
18936 vec![atz.this, atz.zone],
18937 ))))
18938 }
18939 DialectType::Snowflake => {
18940 // CONVERT_TIMEZONE('zone', expr)
18941 Ok(Expression::Function(Box::new(Function::new(
18942 "CONVERT_TIMEZONE".to_string(),
18943 vec![atz.zone, atz.this],
18944 ))))
18945 }
18946 DialectType::BigQuery => {
18947 // TIMESTAMP(DATETIME(expr, 'zone'))
18948 let datetime_call = Expression::Function(Box::new(Function::new(
18949 "DATETIME".to_string(),
18950 vec![atz.this, atz.zone],
18951 )));
18952 Ok(Expression::Function(Box::new(Function::new(
18953 "TIMESTAMP".to_string(),
18954 vec![datetime_call],
18955 ))))
18956 }
18957 _ => Ok(Expression::Function(Box::new(Function::new(
18958 "AT_TIMEZONE".to_string(),
18959 vec![atz.this, atz.zone],
18960 )))),
18961 }
18962 } else {
18963 Ok(e)
18964 }
18965 }
18966
18967 Action::DayOfWeekConvert => {
18968 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
18969 if let Expression::DayOfWeek(f) = e {
18970 match target {
18971 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18972 Function::new("ISODOW".to_string(), vec![f.this]),
18973 ))),
18974 DialectType::Spark | DialectType::Databricks => {
18975 // ((DAYOFWEEK(x) % 7) + 1)
18976 let dayofweek = Expression::Function(Box::new(Function::new(
18977 "DAYOFWEEK".to_string(),
18978 vec![f.this],
18979 )));
18980 let modulo = Expression::Mod(Box::new(BinaryOp {
18981 left: dayofweek,
18982 right: Expression::number(7),
18983 left_comments: Vec::new(),
18984 operator_comments: Vec::new(),
18985 trailing_comments: Vec::new(),
18986 }));
18987 let paren_mod = Expression::Paren(Box::new(Paren {
18988 this: modulo,
18989 trailing_comments: Vec::new(),
18990 }));
18991 let add_one = Expression::Add(Box::new(BinaryOp {
18992 left: paren_mod,
18993 right: Expression::number(1),
18994 left_comments: Vec::new(),
18995 operator_comments: Vec::new(),
18996 trailing_comments: Vec::new(),
18997 }));
18998 Ok(Expression::Paren(Box::new(Paren {
18999 this: add_one,
19000 trailing_comments: Vec::new(),
19001 })))
19002 }
19003 _ => Ok(Expression::DayOfWeek(f)),
19004 }
19005 } else {
19006 Ok(e)
19007 }
19008 }
19009
19010 Action::MaxByMinByConvert => {
19011 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
19012 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
19013 // Handle both Expression::Function and Expression::AggregateFunction
19014 let (is_max, args) = match &e {
19015 Expression::Function(f) => {
19016 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
19017 }
19018 Expression::AggregateFunction(af) => {
19019 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
19020 }
19021 _ => return Ok(e),
19022 };
19023 match target {
19024 DialectType::ClickHouse => {
19025 let name = if is_max { "argMax" } else { "argMin" };
19026 let mut args = args;
19027 args.truncate(2);
19028 Ok(Expression::Function(Box::new(Function::new(
19029 name.to_string(),
19030 args,
19031 ))))
19032 }
19033 DialectType::DuckDB => {
19034 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
19035 Ok(Expression::Function(Box::new(Function::new(
19036 name.to_string(),
19037 args,
19038 ))))
19039 }
19040 DialectType::Spark | DialectType::Databricks => {
19041 let mut args = args;
19042 args.truncate(2);
19043 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19044 Ok(Expression::Function(Box::new(Function::new(
19045 name.to_string(),
19046 args,
19047 ))))
19048 }
19049 _ => Ok(e),
19050 }
19051 }
19052
19053 Action::ElementAtConvert => {
19054 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19055 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19056 (bf.this, bf.expression)
19057 } else if let Expression::Function(ref f) = e {
19058 if f.args.len() >= 2 {
19059 if let Expression::Function(f) = e {
19060 let mut args = f.args;
19061 let arr = args.remove(0);
19062 let idx = args.remove(0);
19063 (arr, idx)
19064 } else {
19065 unreachable!("outer condition already matched Expression::Function")
19066 }
19067 } else {
19068 return Ok(e);
19069 }
19070 } else {
19071 return Ok(e);
19072 };
19073 match target {
19074 DialectType::PostgreSQL => {
19075 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19076 let arr_expr = Expression::Paren(Box::new(Paren {
19077 this: arr,
19078 trailing_comments: vec![],
19079 }));
19080 Ok(Expression::Subscript(Box::new(
19081 crate::expressions::Subscript {
19082 this: arr_expr,
19083 index: idx,
19084 },
19085 )))
19086 }
19087 DialectType::BigQuery => {
19088 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19089 let arr_expr = match arr {
19090 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19091 crate::expressions::ArrayConstructor {
19092 expressions: af.expressions,
19093 bracket_notation: true,
19094 use_list_keyword: false,
19095 },
19096 )),
19097 other => other,
19098 };
19099 let safe_ordinal = Expression::Function(Box::new(Function::new(
19100 "SAFE_ORDINAL".to_string(),
19101 vec![idx],
19102 )));
19103 Ok(Expression::Subscript(Box::new(
19104 crate::expressions::Subscript {
19105 this: arr_expr,
19106 index: safe_ordinal,
19107 },
19108 )))
19109 }
19110 _ => Ok(Expression::Function(Box::new(Function::new(
19111 "ELEMENT_AT".to_string(),
19112 vec![arr, idx],
19113 )))),
19114 }
19115 }
19116
19117 Action::CurrentUserParens => {
19118 // CURRENT_USER -> CURRENT_USER() for Snowflake
19119 Ok(Expression::Function(Box::new(Function::new(
19120 "CURRENT_USER".to_string(),
19121 vec![],
19122 ))))
19123 }
19124
19125 Action::ArrayAggToCollectList => {
19126 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19127 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19128 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19129 match e {
19130 Expression::AggregateFunction(mut af) => {
19131 let is_simple =
19132 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19133 let args = if af.args.is_empty() {
19134 vec![]
19135 } else {
19136 vec![af.args[0].clone()]
19137 };
19138 af.name = "COLLECT_LIST".to_string();
19139 af.args = args;
19140 if is_simple {
19141 af.order_by = Vec::new();
19142 }
19143 Ok(Expression::AggregateFunction(af))
19144 }
19145 Expression::ArrayAgg(agg) => {
19146 let is_simple =
19147 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19148 Ok(Expression::AggregateFunction(Box::new(
19149 crate::expressions::AggregateFunction {
19150 name: "COLLECT_LIST".to_string(),
19151 args: vec![agg.this.clone()],
19152 distinct: agg.distinct,
19153 filter: agg.filter.clone(),
19154 order_by: if is_simple {
19155 Vec::new()
19156 } else {
19157 agg.order_by.clone()
19158 },
19159 limit: agg.limit.clone(),
19160 ignore_nulls: agg.ignore_nulls,
19161 },
19162 )))
19163 }
19164 _ => Ok(e),
19165 }
19166 }
19167
19168 Action::ArraySyntaxConvert => {
19169 match e {
19170 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19171 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19172 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19173 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19174 expressions: arr.expressions,
19175 bracket_notation: true,
19176 use_list_keyword: false,
19177 })),
19178 ),
19179 // ARRAY(y) function style -> ArrayFunc for target dialect
19180 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19181 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19182 let bracket = matches!(
19183 target,
19184 DialectType::BigQuery
19185 | DialectType::DuckDB
19186 | DialectType::ClickHouse
19187 | DialectType::StarRocks
19188 );
19189 Ok(Expression::ArrayFunc(Box::new(
19190 crate::expressions::ArrayConstructor {
19191 expressions: f.args,
19192 bracket_notation: bracket,
19193 use_list_keyword: false,
19194 },
19195 )))
19196 }
19197 _ => Ok(e),
19198 }
19199 }
19200
19201 Action::CastToJsonForSpark => {
19202 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19203 if let Expression::Cast(c) = e {
19204 Ok(Expression::Function(Box::new(Function::new(
19205 "TO_JSON".to_string(),
19206 vec![c.this],
19207 ))))
19208 } else {
19209 Ok(e)
19210 }
19211 }
19212
19213 Action::CastJsonToFromJson => {
19214 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19215 if let Expression::Cast(c) = e {
19216 // Extract the string literal from ParseJson
19217 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19218 pj.this
19219 } else {
19220 c.this
19221 };
19222 // Convert the target DataType to Spark's type string format
19223 let type_str = Self::data_type_to_spark_string(&c.to);
19224 Ok(Expression::Function(Box::new(Function::new(
19225 "FROM_JSON".to_string(),
19226 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19227 ))))
19228 } else {
19229 Ok(e)
19230 }
19231 }
19232
19233 Action::ToJsonConvert => {
19234 // TO_JSON(x) -> target-specific conversion
19235 if let Expression::ToJson(f) = e {
19236 let arg = f.this;
19237 match target {
19238 DialectType::Presto | DialectType::Trino => {
19239 // JSON_FORMAT(CAST(x AS JSON))
19240 let cast_json = Expression::Cast(Box::new(Cast {
19241 this: arg,
19242 to: DataType::Custom {
19243 name: "JSON".to_string(),
19244 },
19245 trailing_comments: vec![],
19246 double_colon_syntax: false,
19247 format: None,
19248 default: None,
19249 }));
19250 Ok(Expression::Function(Box::new(Function::new(
19251 "JSON_FORMAT".to_string(),
19252 vec![cast_json],
19253 ))))
19254 }
19255 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19256 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19257 ))),
19258 DialectType::DuckDB => {
19259 // CAST(TO_JSON(x) AS TEXT)
19260 let to_json =
19261 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19262 this: arg,
19263 original_name: None,
19264 }));
19265 Ok(Expression::Cast(Box::new(Cast {
19266 this: to_json,
19267 to: DataType::Text,
19268 trailing_comments: vec![],
19269 double_colon_syntax: false,
19270 format: None,
19271 default: None,
19272 })))
19273 }
19274 _ => Ok(Expression::ToJson(Box::new(
19275 crate::expressions::UnaryFunc {
19276 this: arg,
19277 original_name: None,
19278 },
19279 ))),
19280 }
19281 } else {
19282 Ok(e)
19283 }
19284 }
19285
19286 Action::VarianceToClickHouse => {
19287 if let Expression::Variance(f) = e {
19288 Ok(Expression::Function(Box::new(Function::new(
19289 "varSamp".to_string(),
19290 vec![f.this],
19291 ))))
19292 } else {
19293 Ok(e)
19294 }
19295 }
19296
19297 Action::StddevToClickHouse => {
19298 if let Expression::Stddev(f) = e {
19299 Ok(Expression::Function(Box::new(Function::new(
19300 "stddevSamp".to_string(),
19301 vec![f.this],
19302 ))))
19303 } else {
19304 Ok(e)
19305 }
19306 }
19307
19308 Action::ApproxQuantileConvert => {
19309 if let Expression::ApproxQuantile(aq) = e {
19310 let mut args = vec![*aq.this];
19311 if let Some(q) = aq.quantile {
19312 args.push(*q);
19313 }
19314 Ok(Expression::Function(Box::new(Function::new(
19315 "APPROX_PERCENTILE".to_string(),
19316 args,
19317 ))))
19318 } else {
19319 Ok(e)
19320 }
19321 }
19322
19323 Action::DollarParamConvert => {
19324 if let Expression::Parameter(p) = e {
19325 Ok(Expression::Parameter(Box::new(
19326 crate::expressions::Parameter {
19327 name: p.name,
19328 index: p.index,
19329 style: crate::expressions::ParameterStyle::At,
19330 quoted: p.quoted,
19331 string_quoted: p.string_quoted,
19332 expression: p.expression,
19333 },
19334 )))
19335 } else {
19336 Ok(e)
19337 }
19338 }
19339
19340 Action::EscapeStringNormalize => {
19341 if let Expression::Literal(Literal::EscapeString(s)) = e {
19342 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19343 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19344 s[2..].to_string()
19345 } else {
19346 s
19347 };
19348 let normalized = stripped
19349 .replace('\n', "\\n")
19350 .replace('\r', "\\r")
19351 .replace('\t', "\\t");
19352 match target {
19353 DialectType::BigQuery => {
19354 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19355 // Use Raw for the b'...' part to avoid double-escaping
19356 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19357 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19358 }
19359 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19360 }
19361 } else {
19362 Ok(e)
19363 }
19364 }
19365
19366 Action::StraightJoinCase => {
19367 // straight_join: keep lowercase for DuckDB, quote for MySQL
19368 if let Expression::Column(col) = e {
19369 if col.name.name == "STRAIGHT_JOIN" {
19370 let mut new_col = col;
19371 new_col.name.name = "straight_join".to_string();
19372 if matches!(target, DialectType::MySQL) {
19373 // MySQL: needs quoting since it's a reserved keyword
19374 new_col.name.quoted = true;
19375 }
19376 Ok(Expression::Column(new_col))
19377 } else {
19378 Ok(Expression::Column(col))
19379 }
19380 } else {
19381 Ok(e)
19382 }
19383 }
19384
19385 Action::TablesampleReservoir => {
19386 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19387 if let Expression::TableSample(mut ts) = e {
19388 if let Some(ref mut sample) = ts.sample {
19389 sample.method = crate::expressions::SampleMethod::Reservoir;
19390 sample.explicit_method = true;
19391 }
19392 Ok(Expression::TableSample(ts))
19393 } else {
19394 Ok(e)
19395 }
19396 }
19397
19398 Action::TablesampleSnowflakeStrip => {
19399 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19400 match e {
19401 Expression::TableSample(mut ts) => {
19402 if let Some(ref mut sample) = ts.sample {
19403 sample.suppress_method_output = true;
19404 sample.unit_after_size = false;
19405 sample.is_percent = false;
19406 }
19407 Ok(Expression::TableSample(ts))
19408 }
19409 Expression::Table(mut t) => {
19410 if let Some(ref mut sample) = t.table_sample {
19411 sample.suppress_method_output = true;
19412 sample.unit_after_size = false;
19413 sample.is_percent = false;
19414 }
19415 Ok(Expression::Table(t))
19416 }
19417 _ => Ok(e),
19418 }
19419 }
19420
19421 Action::FirstToAnyValue => {
19422 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19423 if let Expression::First(mut agg) = e {
19424 agg.ignore_nulls = None;
19425 agg.name = Some("ANY_VALUE".to_string());
19426 Ok(Expression::AnyValue(agg))
19427 } else {
19428 Ok(e)
19429 }
19430 }
19431
19432 Action::ArrayIndexConvert => {
19433 // Subscript index: 1-based to 0-based for BigQuery
19434 if let Expression::Subscript(mut sub) = e {
19435 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19436 if let Ok(val) = n.parse::<i64>() {
19437 sub.index =
19438 Expression::Literal(Literal::Number((val - 1).to_string()));
19439 }
19440 }
19441 Ok(Expression::Subscript(sub))
19442 } else {
19443 Ok(e)
19444 }
19445 }
19446
19447 Action::AnyValueIgnoreNulls => {
19448 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19449 if let Expression::AnyValue(mut av) = e {
19450 if av.ignore_nulls.is_none() {
19451 av.ignore_nulls = Some(true);
19452 }
19453 Ok(Expression::AnyValue(av))
19454 } else {
19455 Ok(e)
19456 }
19457 }
19458
19459 Action::BigQueryNullsOrdering => {
19460 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19461 if let Expression::WindowFunction(mut wf) = e {
19462 for o in &mut wf.over.order_by {
19463 o.nulls_first = None;
19464 }
19465 Ok(Expression::WindowFunction(wf))
19466 } else if let Expression::Ordered(mut o) = e {
19467 o.nulls_first = None;
19468 Ok(Expression::Ordered(o))
19469 } else {
19470 Ok(e)
19471 }
19472 }
19473
19474 Action::SnowflakeFloatProtect => {
19475 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19476 // Snowflake's target transform from converting it to DOUBLE.
19477 // Non-Snowflake sources should keep their FLOAT spelling.
19478 if let Expression::DataType(DataType::Float { .. }) = e {
19479 Ok(Expression::DataType(DataType::Custom {
19480 name: "FLOAT".to_string(),
19481 }))
19482 } else {
19483 Ok(e)
19484 }
19485 }
19486
19487 Action::MysqlNullsOrdering => {
19488 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19489 if let Expression::Ordered(mut o) = e {
19490 let nulls_last = o.nulls_first == Some(false);
19491 let desc = o.desc;
19492 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19493 // If requested ordering matches default, just strip NULLS clause
19494 let matches_default = if desc {
19495 // DESC default is NULLS FIRST, so nulls_first=true matches
19496 o.nulls_first == Some(true)
19497 } else {
19498 // ASC default is NULLS LAST, so nulls_first=false matches
19499 nulls_last
19500 };
19501 if matches_default {
19502 o.nulls_first = None;
19503 Ok(Expression::Ordered(o))
19504 } else {
19505 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19506 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19507 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19508 let null_val = if desc { 1 } else { 0 };
19509 let non_null_val = if desc { 0 } else { 1 };
19510 let _case_expr = Expression::Case(Box::new(Case {
19511 operand: None,
19512 whens: vec![(
19513 Expression::IsNull(Box::new(crate::expressions::IsNull {
19514 this: o.this.clone(),
19515 not: false,
19516 postfix_form: false,
19517 })),
19518 Expression::number(null_val),
19519 )],
19520 else_: Some(Expression::number(non_null_val)),
19521 comments: Vec::new(),
19522 }));
19523 o.nulls_first = None;
19524 // Return a tuple of [case_expr, ordered_expr]
19525 // We need to return both as part of the ORDER BY
19526 // But since transform_recursive processes individual expressions,
19527 // we can't easily add extra ORDER BY items here.
19528 // Instead, strip the nulls_first
19529 o.nulls_first = None;
19530 Ok(Expression::Ordered(o))
19531 }
19532 } else {
19533 Ok(e)
19534 }
19535 }
19536
19537 Action::MysqlNullsLastRewrite => {
19538 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19539 // to simulate NULLS LAST for ASC ordering
19540 if let Expression::WindowFunction(mut wf) = e {
19541 let mut new_order_by = Vec::new();
19542 for o in wf.over.order_by {
19543 if !o.desc {
19544 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19545 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19546 let case_expr = Expression::Case(Box::new(Case {
19547 operand: None,
19548 whens: vec![(
19549 Expression::IsNull(Box::new(crate::expressions::IsNull {
19550 this: o.this.clone(),
19551 not: false,
19552 postfix_form: false,
19553 })),
19554 Expression::Literal(Literal::Number("1".to_string())),
19555 )],
19556 else_: Some(Expression::Literal(Literal::Number(
19557 "0".to_string(),
19558 ))),
19559 comments: Vec::new(),
19560 }));
19561 new_order_by.push(crate::expressions::Ordered {
19562 this: case_expr,
19563 desc: false,
19564 nulls_first: None,
19565 explicit_asc: false,
19566 with_fill: None,
19567 });
19568 let mut ordered = o;
19569 ordered.nulls_first = None;
19570 new_order_by.push(ordered);
19571 } else {
19572 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19573 // No change needed
19574 let mut ordered = o;
19575 ordered.nulls_first = None;
19576 new_order_by.push(ordered);
19577 }
19578 }
19579 wf.over.order_by = new_order_by;
19580 Ok(Expression::WindowFunction(wf))
19581 } else {
19582 Ok(e)
19583 }
19584 }
19585
19586 Action::RespectNullsConvert => {
19587 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19588 if let Expression::WindowFunction(mut wf) = e {
19589 match &mut wf.this {
19590 Expression::FirstValue(ref mut vf) => {
19591 if vf.ignore_nulls == Some(false) {
19592 vf.ignore_nulls = None;
19593 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19594 // but that's handled by the generator's NULLS ordering
19595 }
19596 }
19597 Expression::LastValue(ref mut vf) => {
19598 if vf.ignore_nulls == Some(false) {
19599 vf.ignore_nulls = None;
19600 }
19601 }
19602 _ => {}
19603 }
19604 Ok(Expression::WindowFunction(wf))
19605 } else {
19606 Ok(e)
19607 }
19608 }
19609
19610 Action::CreateTableStripComment => {
19611 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19612 if let Expression::CreateTable(mut ct) = e {
19613 for col in &mut ct.columns {
19614 col.comment = None;
19615 col.constraints.retain(|c| {
19616 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19617 });
19618 // Also remove Comment from constraint_order
19619 col.constraint_order.retain(|c| {
19620 !matches!(c, crate::expressions::ConstraintType::Comment)
19621 });
19622 }
19623 // Strip properties (USING, PARTITIONED BY, etc.)
19624 ct.properties.clear();
19625 Ok(Expression::CreateTable(ct))
19626 } else {
19627 Ok(e)
19628 }
19629 }
19630
19631 Action::AlterTableToSpRename => {
19632 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19633 if let Expression::AlterTable(ref at) = e {
19634 if let Some(crate::expressions::AlterTableAction::RenameTable(
19635 ref new_tbl,
19636 )) = at.actions.first()
19637 {
19638 // Build the old table name using TSQL bracket quoting
19639 let old_name = if let Some(ref schema) = at.name.schema {
19640 if at.name.name.quoted || schema.quoted {
19641 format!("[{}].[{}]", schema.name, at.name.name.name)
19642 } else {
19643 format!("{}.{}", schema.name, at.name.name.name)
19644 }
19645 } else {
19646 if at.name.name.quoted {
19647 format!("[{}]", at.name.name.name)
19648 } else {
19649 at.name.name.name.clone()
19650 }
19651 };
19652 let new_name = new_tbl.name.name.clone();
19653 // EXEC sp_rename 'old_name', 'new_name'
19654 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19655 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19656 } else {
19657 Ok(e)
19658 }
19659 } else {
19660 Ok(e)
19661 }
19662 }
19663
19664 Action::SnowflakeIntervalFormat => {
19665 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19666 if let Expression::Interval(mut iv) = e {
19667 if let (
19668 Some(Expression::Literal(Literal::String(ref val))),
19669 Some(ref unit_spec),
19670 ) = (&iv.this, &iv.unit)
19671 {
19672 let unit_str = match unit_spec {
19673 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19674 match unit {
19675 crate::expressions::IntervalUnit::Year => "YEAR",
19676 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19677 crate::expressions::IntervalUnit::Month => "MONTH",
19678 crate::expressions::IntervalUnit::Week => "WEEK",
19679 crate::expressions::IntervalUnit::Day => "DAY",
19680 crate::expressions::IntervalUnit::Hour => "HOUR",
19681 crate::expressions::IntervalUnit::Minute => "MINUTE",
19682 crate::expressions::IntervalUnit::Second => "SECOND",
19683 crate::expressions::IntervalUnit::Millisecond => {
19684 "MILLISECOND"
19685 }
19686 crate::expressions::IntervalUnit::Microsecond => {
19687 "MICROSECOND"
19688 }
19689 crate::expressions::IntervalUnit::Nanosecond => {
19690 "NANOSECOND"
19691 }
19692 }
19693 }
19694 _ => "",
19695 };
19696 if !unit_str.is_empty() {
19697 let combined = format!("{} {}", val, unit_str);
19698 iv.this = Some(Expression::Literal(Literal::String(combined)));
19699 iv.unit = None;
19700 }
19701 }
19702 Ok(Expression::Interval(iv))
19703 } else {
19704 Ok(e)
19705 }
19706 }
19707
19708 Action::ArrayConcatBracketConvert => {
19709 // Expression::Array/ArrayFunc -> target-specific
19710 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
19711 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
19712 match e {
19713 Expression::Array(arr) => {
19714 if matches!(target, DialectType::Redshift) {
19715 Ok(Expression::Function(Box::new(Function::new(
19716 "ARRAY".to_string(),
19717 arr.expressions,
19718 ))))
19719 } else {
19720 Ok(Expression::ArrayFunc(Box::new(
19721 crate::expressions::ArrayConstructor {
19722 expressions: arr.expressions,
19723 bracket_notation: false,
19724 use_list_keyword: false,
19725 },
19726 )))
19727 }
19728 }
19729 Expression::ArrayFunc(arr) => {
19730 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
19731 if matches!(target, DialectType::Redshift) {
19732 Ok(Expression::Function(Box::new(Function::new(
19733 "ARRAY".to_string(),
19734 arr.expressions,
19735 ))))
19736 } else {
19737 Ok(Expression::ArrayFunc(arr))
19738 }
19739 }
19740 _ => Ok(e),
19741 }
19742 }
19743
19744 Action::BitAggFloatCast => {
19745 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
19746 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19747 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19748 let int_type = DataType::Int {
19749 length: None,
19750 integer_spelling: false,
19751 };
19752 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
19753 if let Expression::Cast(c) = agg_this {
19754 match &c.to {
19755 DataType::Float { .. }
19756 | DataType::Double { .. }
19757 | DataType::Custom { .. } => {
19758 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19759 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
19760 let inner_type = match &c.to {
19761 DataType::Float {
19762 precision, scale, ..
19763 } => DataType::Float {
19764 precision: *precision,
19765 scale: *scale,
19766 real_spelling: true,
19767 },
19768 other => other.clone(),
19769 };
19770 let inner_cast =
19771 Expression::Cast(Box::new(crate::expressions::Cast {
19772 this: c.this.clone(),
19773 to: inner_type,
19774 trailing_comments: Vec::new(),
19775 double_colon_syntax: false,
19776 format: None,
19777 default: None,
19778 }));
19779 let rounded = Expression::Function(Box::new(Function::new(
19780 "ROUND".to_string(),
19781 vec![inner_cast],
19782 )));
19783 Expression::Cast(Box::new(crate::expressions::Cast {
19784 this: rounded,
19785 to: int_dt,
19786 trailing_comments: Vec::new(),
19787 double_colon_syntax: false,
19788 format: None,
19789 default: None,
19790 }))
19791 }
19792 DataType::Decimal { .. } => {
19793 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19794 Expression::Cast(Box::new(crate::expressions::Cast {
19795 this: Expression::Cast(c),
19796 to: int_dt,
19797 trailing_comments: Vec::new(),
19798 double_colon_syntax: false,
19799 format: None,
19800 default: None,
19801 }))
19802 }
19803 _ => Expression::Cast(c),
19804 }
19805 } else {
19806 agg_this
19807 }
19808 };
19809 match e {
19810 Expression::BitwiseOrAgg(mut f) => {
19811 f.this = wrap_agg(f.this, int_type);
19812 Ok(Expression::BitwiseOrAgg(f))
19813 }
19814 Expression::BitwiseAndAgg(mut f) => {
19815 let int_type = DataType::Int {
19816 length: None,
19817 integer_spelling: false,
19818 };
19819 f.this = wrap_agg(f.this, int_type);
19820 Ok(Expression::BitwiseAndAgg(f))
19821 }
19822 Expression::BitwiseXorAgg(mut f) => {
19823 let int_type = DataType::Int {
19824 length: None,
19825 integer_spelling: false,
19826 };
19827 f.this = wrap_agg(f.this, int_type);
19828 Ok(Expression::BitwiseXorAgg(f))
19829 }
19830 _ => Ok(e),
19831 }
19832 }
19833
19834 Action::BitAggSnowflakeRename => {
19835 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
19836 match e {
19837 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
19838 Function::new("BITORAGG".to_string(), vec![f.this]),
19839 ))),
19840 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
19841 Function::new("BITANDAGG".to_string(), vec![f.this]),
19842 ))),
19843 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
19844 Function::new("BITXORAGG".to_string(), vec![f.this]),
19845 ))),
19846 _ => Ok(e),
19847 }
19848 }
19849
19850 Action::StrftimeCastTimestamp => {
19851 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
19852 if let Expression::Cast(mut c) = e {
19853 if matches!(
19854 c.to,
19855 DataType::Timestamp {
19856 timezone: false,
19857 ..
19858 }
19859 ) {
19860 c.to = DataType::Custom {
19861 name: "TIMESTAMP_NTZ".to_string(),
19862 };
19863 }
19864 Ok(Expression::Cast(c))
19865 } else {
19866 Ok(e)
19867 }
19868 }
19869
19870 Action::DecimalDefaultPrecision => {
19871 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
19872 if let Expression::Cast(mut c) = e {
19873 if matches!(
19874 c.to,
19875 DataType::Decimal {
19876 precision: None,
19877 ..
19878 }
19879 ) {
19880 c.to = DataType::Decimal {
19881 precision: Some(18),
19882 scale: Some(3),
19883 };
19884 }
19885 Ok(Expression::Cast(c))
19886 } else {
19887 Ok(e)
19888 }
19889 }
19890
19891 Action::FilterToIff => {
19892 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
19893 if let Expression::Filter(f) = e {
19894 let condition = *f.expression;
19895 let agg = *f.this;
19896 // Strip WHERE from condition
19897 let cond = match condition {
19898 Expression::Where(w) => w.this,
19899 other => other,
19900 };
19901 // Extract the aggregate function and its argument
19902 // We want AVG(IFF(condition, x, NULL))
19903 match agg {
19904 Expression::Function(mut func) => {
19905 if !func.args.is_empty() {
19906 let orig_arg = func.args[0].clone();
19907 let iff_call = Expression::Function(Box::new(Function::new(
19908 "IFF".to_string(),
19909 vec![cond, orig_arg, Expression::Null(Null)],
19910 )));
19911 func.args[0] = iff_call;
19912 Ok(Expression::Function(func))
19913 } else {
19914 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19915 this: Box::new(Expression::Function(func)),
19916 expression: Box::new(cond),
19917 })))
19918 }
19919 }
19920 Expression::Avg(mut avg) => {
19921 let iff_call = Expression::Function(Box::new(Function::new(
19922 "IFF".to_string(),
19923 vec![cond, avg.this.clone(), Expression::Null(Null)],
19924 )));
19925 avg.this = iff_call;
19926 Ok(Expression::Avg(avg))
19927 }
19928 Expression::Sum(mut s) => {
19929 let iff_call = Expression::Function(Box::new(Function::new(
19930 "IFF".to_string(),
19931 vec![cond, s.this.clone(), Expression::Null(Null)],
19932 )));
19933 s.this = iff_call;
19934 Ok(Expression::Sum(s))
19935 }
19936 Expression::Count(mut c) => {
19937 if let Some(ref this_expr) = c.this {
19938 let iff_call = Expression::Function(Box::new(Function::new(
19939 "IFF".to_string(),
19940 vec![cond, this_expr.clone(), Expression::Null(Null)],
19941 )));
19942 c.this = Some(iff_call);
19943 }
19944 Ok(Expression::Count(c))
19945 }
19946 other => {
19947 // Fallback: keep as Filter
19948 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19949 this: Box::new(other),
19950 expression: Box::new(cond),
19951 })))
19952 }
19953 }
19954 } else {
19955 Ok(e)
19956 }
19957 }
19958
19959 Action::AggFilterToIff => {
19960 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
19961 // Helper macro to handle the common AggFunc case
19962 macro_rules! handle_agg_filter_to_iff {
19963 ($variant:ident, $agg:expr) => {{
19964 let mut agg = $agg;
19965 if let Some(filter_cond) = agg.filter.take() {
19966 let iff_call = Expression::Function(Box::new(Function::new(
19967 "IFF".to_string(),
19968 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
19969 )));
19970 agg.this = iff_call;
19971 }
19972 Ok(Expression::$variant(agg))
19973 }};
19974 }
19975
19976 match e {
19977 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
19978 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
19979 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
19980 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
19981 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
19982 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
19983 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
19984 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
19985 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
19986 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
19987 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
19988 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
19989 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
19990 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
19991 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
19992 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
19993 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
19994 Expression::ApproxDistinct(agg) => {
19995 handle_agg_filter_to_iff!(ApproxDistinct, agg)
19996 }
19997 Expression::Count(mut c) => {
19998 if let Some(filter_cond) = c.filter.take() {
19999 if let Some(ref this_expr) = c.this {
20000 let iff_call = Expression::Function(Box::new(Function::new(
20001 "IFF".to_string(),
20002 vec![
20003 filter_cond,
20004 this_expr.clone(),
20005 Expression::Null(Null),
20006 ],
20007 )));
20008 c.this = Some(iff_call);
20009 }
20010 }
20011 Ok(Expression::Count(c))
20012 }
20013 other => Ok(other),
20014 }
20015 }
20016
20017 Action::JsonToGetPath => {
20018 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
20019 if let Expression::JsonExtract(je) = e {
20020 // Convert to PARSE_JSON() wrapper:
20021 // - JSON(x) -> PARSE_JSON(x)
20022 // - PARSE_JSON(x) -> keep as-is
20023 // - anything else -> wrap in PARSE_JSON()
20024 let this = match &je.this {
20025 Expression::Function(f)
20026 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
20027 {
20028 Expression::Function(Box::new(Function::new(
20029 "PARSE_JSON".to_string(),
20030 f.args.clone(),
20031 )))
20032 }
20033 Expression::Function(f)
20034 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20035 {
20036 je.this.clone()
20037 }
20038 // GET_PATH result is already JSON, don't wrap
20039 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20040 je.this.clone()
20041 }
20042 other => {
20043 // Wrap non-JSON expressions in PARSE_JSON()
20044 Expression::Function(Box::new(Function::new(
20045 "PARSE_JSON".to_string(),
20046 vec![other.clone()],
20047 )))
20048 }
20049 };
20050 // Convert path: extract key from JSONPath or strip $. prefix from string
20051 let path = match &je.path {
20052 Expression::JSONPath(jp) => {
20053 // Extract the key from JSONPath: $root.key -> 'key'
20054 let mut key_parts = Vec::new();
20055 for expr in &jp.expressions {
20056 match expr {
20057 Expression::JSONPathRoot(_) => {} // skip root
20058 Expression::JSONPathKey(k) => {
20059 if let Expression::Literal(Literal::String(s)) =
20060 &*k.this
20061 {
20062 key_parts.push(s.clone());
20063 }
20064 }
20065 _ => {}
20066 }
20067 }
20068 if !key_parts.is_empty() {
20069 Expression::Literal(Literal::String(key_parts.join(".")))
20070 } else {
20071 je.path.clone()
20072 }
20073 }
20074 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20075 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20076 Expression::Literal(Literal::String(stripped))
20077 }
20078 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20079 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20080 Expression::Literal(Literal::String(stripped))
20081 }
20082 _ => je.path.clone(),
20083 };
20084 Ok(Expression::Function(Box::new(Function::new(
20085 "GET_PATH".to_string(),
20086 vec![this, path],
20087 ))))
20088 } else {
20089 Ok(e)
20090 }
20091 }
20092
20093 Action::StructToRow => {
20094 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20095 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20096
20097 // Extract key-value pairs from either Struct or MapFunc
20098 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20099 Expression::Struct(s) => Some(
20100 s.fields
20101 .iter()
20102 .map(|(opt_name, field_expr)| {
20103 if let Some(name) = opt_name {
20104 (name.clone(), field_expr.clone())
20105 } else if let Expression::NamedArgument(na) = field_expr {
20106 (na.name.name.clone(), na.value.clone())
20107 } else {
20108 (String::new(), field_expr.clone())
20109 }
20110 })
20111 .collect(),
20112 ),
20113 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20114 m.keys
20115 .iter()
20116 .zip(m.values.iter())
20117 .map(|(key, value)| {
20118 let key_name = match key {
20119 Expression::Literal(Literal::String(s)) => s.clone(),
20120 Expression::Identifier(id) => id.name.clone(),
20121 _ => String::new(),
20122 };
20123 (key_name, value.clone())
20124 })
20125 .collect(),
20126 ),
20127 _ => None,
20128 };
20129
20130 if let Some(pairs) = kv_pairs {
20131 let mut named_args = Vec::new();
20132 for (key_name, value) in pairs {
20133 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20134 named_args.push(Expression::Alias(Box::new(
20135 crate::expressions::Alias::new(
20136 value,
20137 Identifier::new(key_name),
20138 ),
20139 )));
20140 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20141 named_args.push(value);
20142 } else {
20143 named_args.push(value);
20144 }
20145 }
20146
20147 if matches!(target, DialectType::BigQuery) {
20148 Ok(Expression::Function(Box::new(Function::new(
20149 "STRUCT".to_string(),
20150 named_args,
20151 ))))
20152 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20153 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20154 let row_func = Expression::Function(Box::new(Function::new(
20155 "ROW".to_string(),
20156 named_args,
20157 )));
20158
20159 // Try to infer types for each pair
20160 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20161 Expression::Struct(s) => Some(
20162 s.fields
20163 .iter()
20164 .map(|(opt_name, field_expr)| {
20165 if let Some(name) = opt_name {
20166 (name.clone(), field_expr.clone())
20167 } else if let Expression::NamedArgument(na) = field_expr
20168 {
20169 (na.name.name.clone(), na.value.clone())
20170 } else {
20171 (String::new(), field_expr.clone())
20172 }
20173 })
20174 .collect(),
20175 ),
20176 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20177 m.keys
20178 .iter()
20179 .zip(m.values.iter())
20180 .map(|(key, value)| {
20181 let key_name = match key {
20182 Expression::Literal(Literal::String(s)) => {
20183 s.clone()
20184 }
20185 Expression::Identifier(id) => id.name.clone(),
20186 _ => String::new(),
20187 };
20188 (key_name, value.clone())
20189 })
20190 .collect(),
20191 ),
20192 _ => None,
20193 };
20194
20195 if let Some(pairs) = kv_pairs_again {
20196 // Infer types for all values
20197 let mut all_inferred = true;
20198 let mut fields = Vec::new();
20199 for (name, value) in &pairs {
20200 let inferred_type = match value {
20201 Expression::Literal(Literal::Number(n)) => {
20202 if n.contains('.') {
20203 Some(DataType::Double {
20204 precision: None,
20205 scale: None,
20206 })
20207 } else {
20208 Some(DataType::Int {
20209 length: None,
20210 integer_spelling: true,
20211 })
20212 }
20213 }
20214 Expression::Literal(Literal::String(_)) => {
20215 Some(DataType::VarChar {
20216 length: None,
20217 parenthesized_length: false,
20218 })
20219 }
20220 Expression::Boolean(_) => Some(DataType::Boolean),
20221 _ => None,
20222 };
20223 if let Some(dt) = inferred_type {
20224 fields.push(crate::expressions::StructField::new(
20225 name.clone(),
20226 dt,
20227 ));
20228 } else {
20229 all_inferred = false;
20230 break;
20231 }
20232 }
20233
20234 if all_inferred && !fields.is_empty() {
20235 let row_type = DataType::Struct {
20236 fields,
20237 nested: true,
20238 };
20239 Ok(Expression::Cast(Box::new(Cast {
20240 this: row_func,
20241 to: row_type,
20242 trailing_comments: Vec::new(),
20243 double_colon_syntax: false,
20244 format: None,
20245 default: None,
20246 })))
20247 } else {
20248 Ok(row_func)
20249 }
20250 } else {
20251 Ok(row_func)
20252 }
20253 } else {
20254 Ok(Expression::Function(Box::new(Function::new(
20255 "ROW".to_string(),
20256 named_args,
20257 ))))
20258 }
20259 } else {
20260 Ok(e)
20261 }
20262 }
20263
20264 Action::SparkStructConvert => {
20265 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20266 // or DuckDB {'name': val, ...}
20267 if let Expression::Function(f) = e {
20268 // Extract name-value pairs from aliased args
20269 let mut pairs: Vec<(String, Expression)> = Vec::new();
20270 for arg in &f.args {
20271 match arg {
20272 Expression::Alias(a) => {
20273 pairs.push((a.alias.name.clone(), a.this.clone()));
20274 }
20275 _ => {
20276 pairs.push((String::new(), arg.clone()));
20277 }
20278 }
20279 }
20280
20281 match target {
20282 DialectType::DuckDB => {
20283 // Convert to DuckDB struct literal {'name': value, ...}
20284 let mut keys = Vec::new();
20285 let mut values = Vec::new();
20286 for (name, value) in &pairs {
20287 keys.push(Expression::Literal(Literal::String(name.clone())));
20288 values.push(value.clone());
20289 }
20290 Ok(Expression::MapFunc(Box::new(
20291 crate::expressions::MapConstructor {
20292 keys,
20293 values,
20294 curly_brace_syntax: true,
20295 with_map_keyword: false,
20296 },
20297 )))
20298 }
20299 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20300 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20301 let row_args: Vec<Expression> =
20302 pairs.iter().map(|(_, v)| v.clone()).collect();
20303 let row_func = Expression::Function(Box::new(Function::new(
20304 "ROW".to_string(),
20305 row_args,
20306 )));
20307
20308 // Infer types
20309 let mut all_inferred = true;
20310 let mut fields = Vec::new();
20311 for (name, value) in &pairs {
20312 let inferred_type = match value {
20313 Expression::Literal(Literal::Number(n)) => {
20314 if n.contains('.') {
20315 Some(DataType::Double {
20316 precision: None,
20317 scale: None,
20318 })
20319 } else {
20320 Some(DataType::Int {
20321 length: None,
20322 integer_spelling: true,
20323 })
20324 }
20325 }
20326 Expression::Literal(Literal::String(_)) => {
20327 Some(DataType::VarChar {
20328 length: None,
20329 parenthesized_length: false,
20330 })
20331 }
20332 Expression::Boolean(_) => Some(DataType::Boolean),
20333 _ => None,
20334 };
20335 if let Some(dt) = inferred_type {
20336 fields.push(crate::expressions::StructField::new(
20337 name.clone(),
20338 dt,
20339 ));
20340 } else {
20341 all_inferred = false;
20342 break;
20343 }
20344 }
20345
20346 if all_inferred && !fields.is_empty() {
20347 let row_type = DataType::Struct {
20348 fields,
20349 nested: true,
20350 };
20351 Ok(Expression::Cast(Box::new(Cast {
20352 this: row_func,
20353 to: row_type,
20354 trailing_comments: Vec::new(),
20355 double_colon_syntax: false,
20356 format: None,
20357 default: None,
20358 })))
20359 } else {
20360 Ok(row_func)
20361 }
20362 }
20363 _ => Ok(Expression::Function(f)),
20364 }
20365 } else {
20366 Ok(e)
20367 }
20368 }
20369
20370 Action::ApproxCountDistinctToApproxDistinct => {
20371 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20372 if let Expression::ApproxCountDistinct(f) = e {
20373 Ok(Expression::ApproxDistinct(f))
20374 } else {
20375 Ok(e)
20376 }
20377 }
20378
20379 Action::CollectListToArrayAgg => {
20380 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20381 if let Expression::AggregateFunction(f) = e {
20382 let filter_expr = if !f.args.is_empty() {
20383 let arg = f.args[0].clone();
20384 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20385 this: arg,
20386 not: true,
20387 postfix_form: false,
20388 })))
20389 } else {
20390 None
20391 };
20392 let agg = crate::expressions::AggFunc {
20393 this: if f.args.is_empty() {
20394 Expression::Null(crate::expressions::Null)
20395 } else {
20396 f.args[0].clone()
20397 },
20398 distinct: f.distinct,
20399 order_by: f.order_by.clone(),
20400 filter: filter_expr,
20401 ignore_nulls: None,
20402 name: None,
20403 having_max: None,
20404 limit: None,
20405 };
20406 Ok(Expression::ArrayAgg(Box::new(agg)))
20407 } else {
20408 Ok(e)
20409 }
20410 }
20411
20412 Action::CollectSetConvert => {
20413 // COLLECT_SET(x) -> target-specific
20414 if let Expression::AggregateFunction(f) = e {
20415 match target {
20416 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20417 crate::expressions::AggregateFunction {
20418 name: "SET_AGG".to_string(),
20419 args: f.args,
20420 distinct: false,
20421 order_by: f.order_by,
20422 filter: f.filter,
20423 limit: f.limit,
20424 ignore_nulls: f.ignore_nulls,
20425 },
20426 ))),
20427 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20428 crate::expressions::AggregateFunction {
20429 name: "ARRAY_UNIQUE_AGG".to_string(),
20430 args: f.args,
20431 distinct: false,
20432 order_by: f.order_by,
20433 filter: f.filter,
20434 limit: f.limit,
20435 ignore_nulls: f.ignore_nulls,
20436 },
20437 ))),
20438 DialectType::Trino | DialectType::DuckDB => {
20439 let agg = crate::expressions::AggFunc {
20440 this: if f.args.is_empty() {
20441 Expression::Null(crate::expressions::Null)
20442 } else {
20443 f.args[0].clone()
20444 },
20445 distinct: true,
20446 order_by: Vec::new(),
20447 filter: None,
20448 ignore_nulls: None,
20449 name: None,
20450 having_max: None,
20451 limit: None,
20452 };
20453 Ok(Expression::ArrayAgg(Box::new(agg)))
20454 }
20455 _ => Ok(Expression::AggregateFunction(f)),
20456 }
20457 } else {
20458 Ok(e)
20459 }
20460 }
20461
20462 Action::PercentileConvert => {
20463 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20464 if let Expression::AggregateFunction(f) = e {
20465 let name = match target {
20466 DialectType::DuckDB => "QUANTILE",
20467 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20468 _ => "PERCENTILE",
20469 };
20470 Ok(Expression::AggregateFunction(Box::new(
20471 crate::expressions::AggregateFunction {
20472 name: name.to_string(),
20473 args: f.args,
20474 distinct: f.distinct,
20475 order_by: f.order_by,
20476 filter: f.filter,
20477 limit: f.limit,
20478 ignore_nulls: f.ignore_nulls,
20479 },
20480 )))
20481 } else {
20482 Ok(e)
20483 }
20484 }
20485
20486 Action::CorrIsnanWrap => {
20487 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20488 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20489 let corr_clone = e.clone();
20490 let isnan = Expression::Function(Box::new(Function::new(
20491 "ISNAN".to_string(),
20492 vec![corr_clone.clone()],
20493 )));
20494 let case_expr = Expression::Case(Box::new(Case {
20495 operand: None,
20496 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20497 else_: Some(corr_clone),
20498 comments: Vec::new(),
20499 }));
20500 Ok(case_expr)
20501 }
20502
20503 Action::TruncToDateTrunc => {
20504 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20505 if let Expression::Function(f) = e {
20506 if f.args.len() == 2 {
20507 let timestamp = f.args[0].clone();
20508 let unit_expr = f.args[1].clone();
20509
20510 if matches!(target, DialectType::ClickHouse) {
20511 // For ClickHouse, produce Expression::DateTrunc which the generator
20512 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20513 // target transform that would convert it to dateTrunc
20514 let unit_str = Self::get_unit_str_static(&unit_expr);
20515 let dt_field = match unit_str.as_str() {
20516 "YEAR" => DateTimeField::Year,
20517 "MONTH" => DateTimeField::Month,
20518 "DAY" => DateTimeField::Day,
20519 "HOUR" => DateTimeField::Hour,
20520 "MINUTE" => DateTimeField::Minute,
20521 "SECOND" => DateTimeField::Second,
20522 "WEEK" => DateTimeField::Week,
20523 "QUARTER" => DateTimeField::Quarter,
20524 _ => DateTimeField::Custom(unit_str),
20525 };
20526 Ok(Expression::DateTrunc(Box::new(
20527 crate::expressions::DateTruncFunc {
20528 this: timestamp,
20529 unit: dt_field,
20530 },
20531 )))
20532 } else {
20533 let new_args = vec![unit_expr, timestamp];
20534 Ok(Expression::Function(Box::new(Function::new(
20535 "DATE_TRUNC".to_string(),
20536 new_args,
20537 ))))
20538 }
20539 } else {
20540 Ok(Expression::Function(f))
20541 }
20542 } else {
20543 Ok(e)
20544 }
20545 }
20546
20547 Action::ArrayContainsConvert => {
20548 if let Expression::ArrayContains(f) = e {
20549 match target {
20550 DialectType::Presto | DialectType::Trino => {
20551 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20552 Ok(Expression::Function(Box::new(Function::new(
20553 "CONTAINS".to_string(),
20554 vec![f.this, f.expression],
20555 ))))
20556 }
20557 DialectType::Snowflake => {
20558 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20559 let cast_val =
20560 Expression::Cast(Box::new(crate::expressions::Cast {
20561 this: f.expression,
20562 to: crate::expressions::DataType::Custom {
20563 name: "VARIANT".to_string(),
20564 },
20565 trailing_comments: Vec::new(),
20566 double_colon_syntax: false,
20567 format: None,
20568 default: None,
20569 }));
20570 Ok(Expression::Function(Box::new(Function::new(
20571 "ARRAY_CONTAINS".to_string(),
20572 vec![cast_val, f.this],
20573 ))))
20574 }
20575 _ => Ok(Expression::ArrayContains(f)),
20576 }
20577 } else {
20578 Ok(e)
20579 }
20580 }
20581
20582 Action::StrPositionExpand => {
20583 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20584 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20585 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20586 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20587 if let Expression::StrPosition(sp) = e {
20588 let crate::expressions::StrPosition {
20589 this,
20590 substr,
20591 position,
20592 occurrence,
20593 } = *sp;
20594 let string = *this;
20595 let substr_expr = match substr {
20596 Some(s) => *s,
20597 None => Expression::Null(Null),
20598 };
20599 let pos = match position {
20600 Some(p) => *p,
20601 None => Expression::number(1),
20602 };
20603
20604 // SUBSTRING(string, pos)
20605 let substring_call = Expression::Function(Box::new(Function::new(
20606 "SUBSTRING".to_string(),
20607 vec![string.clone(), pos.clone()],
20608 )));
20609 // STRPOS(SUBSTRING(string, pos), substr)
20610 let strpos_call = Expression::Function(Box::new(Function::new(
20611 "STRPOS".to_string(),
20612 vec![substring_call, substr_expr.clone()],
20613 )));
20614 // STRPOS(...) + pos - 1
20615 let pos_adjusted =
20616 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20617 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20618 strpos_call.clone(),
20619 pos.clone(),
20620 ))),
20621 Expression::number(1),
20622 )));
20623 // STRPOS(...) = 0
20624 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20625 strpos_call.clone(),
20626 Expression::number(0),
20627 )));
20628
20629 match target {
20630 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20631 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20632 Ok(Expression::Function(Box::new(Function::new(
20633 "IF".to_string(),
20634 vec![is_zero, Expression::number(0), pos_adjusted],
20635 ))))
20636 }
20637 DialectType::DuckDB => {
20638 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20639 Ok(Expression::Case(Box::new(Case {
20640 operand: None,
20641 whens: vec![(is_zero, Expression::number(0))],
20642 else_: Some(pos_adjusted),
20643 comments: Vec::new(),
20644 })))
20645 }
20646 _ => {
20647 // Reconstruct StrPosition
20648 Ok(Expression::StrPosition(Box::new(
20649 crate::expressions::StrPosition {
20650 this: Box::new(string),
20651 substr: Some(Box::new(substr_expr)),
20652 position: Some(Box::new(pos)),
20653 occurrence,
20654 },
20655 )))
20656 }
20657 }
20658 } else {
20659 Ok(e)
20660 }
20661 }
20662
20663 Action::MonthsBetweenConvert => {
20664 if let Expression::MonthsBetween(mb) = e {
20665 let crate::expressions::BinaryFunc {
20666 this: end_date,
20667 expression: start_date,
20668 ..
20669 } = *mb;
20670 match target {
20671 DialectType::DuckDB => {
20672 let cast_end = Self::ensure_cast_date(end_date);
20673 let cast_start = Self::ensure_cast_date(start_date);
20674 let dd = Expression::Function(Box::new(Function::new(
20675 "DATE_DIFF".to_string(),
20676 vec![
20677 Expression::string("MONTH"),
20678 cast_start.clone(),
20679 cast_end.clone(),
20680 ],
20681 )));
20682 let day_end = Expression::Function(Box::new(Function::new(
20683 "DAY".to_string(),
20684 vec![cast_end.clone()],
20685 )));
20686 let day_start = Expression::Function(Box::new(Function::new(
20687 "DAY".to_string(),
20688 vec![cast_start.clone()],
20689 )));
20690 let last_day_end = Expression::Function(Box::new(Function::new(
20691 "LAST_DAY".to_string(),
20692 vec![cast_end.clone()],
20693 )));
20694 let last_day_start = Expression::Function(Box::new(Function::new(
20695 "LAST_DAY".to_string(),
20696 vec![cast_start.clone()],
20697 )));
20698 let day_last_end = Expression::Function(Box::new(Function::new(
20699 "DAY".to_string(),
20700 vec![last_day_end],
20701 )));
20702 let day_last_start = Expression::Function(Box::new(Function::new(
20703 "DAY".to_string(),
20704 vec![last_day_start],
20705 )));
20706 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20707 day_end.clone(),
20708 day_last_end,
20709 )));
20710 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20711 day_start.clone(),
20712 day_last_start,
20713 )));
20714 let both_cond =
20715 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20716 let day_diff =
20717 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
20718 let day_diff_paren =
20719 Expression::Paren(Box::new(crate::expressions::Paren {
20720 this: day_diff,
20721 trailing_comments: Vec::new(),
20722 }));
20723 let frac = Expression::Div(Box::new(BinaryOp::new(
20724 day_diff_paren,
20725 Expression::Literal(Literal::Number("31.0".to_string())),
20726 )));
20727 let case_expr = Expression::Case(Box::new(Case {
20728 operand: None,
20729 whens: vec![(both_cond, Expression::number(0))],
20730 else_: Some(frac),
20731 comments: Vec::new(),
20732 }));
20733 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20734 }
20735 DialectType::Snowflake | DialectType::Redshift => {
20736 let unit = Expression::Identifier(Identifier::new("MONTH"));
20737 Ok(Expression::Function(Box::new(Function::new(
20738 "DATEDIFF".to_string(),
20739 vec![unit, start_date, end_date],
20740 ))))
20741 }
20742 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20743 Ok(Expression::Function(Box::new(Function::new(
20744 "DATE_DIFF".to_string(),
20745 vec![Expression::string("MONTH"), start_date, end_date],
20746 ))))
20747 }
20748 _ => Ok(Expression::MonthsBetween(Box::new(
20749 crate::expressions::BinaryFunc {
20750 this: end_date,
20751 expression: start_date,
20752 original_name: None,
20753 },
20754 ))),
20755 }
20756 } else {
20757 Ok(e)
20758 }
20759 }
20760
20761 Action::AddMonthsConvert => {
20762 if let Expression::AddMonths(am) = e {
20763 let date = am.this;
20764 let val = am.expression;
20765 match target {
20766 DialectType::TSQL | DialectType::Fabric => {
20767 let cast_date = Self::ensure_cast_datetime2(date);
20768 Ok(Expression::Function(Box::new(Function::new(
20769 "DATEADD".to_string(),
20770 vec![
20771 Expression::Identifier(Identifier::new("MONTH")),
20772 val,
20773 cast_date,
20774 ],
20775 ))))
20776 }
20777 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
20778 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20779 // Optionally wrapped in CAST(... AS type) if the input had a specific type
20780
20781 // Determine the cast type from the date expression
20782 let (cast_date, return_type) = match &date {
20783 Expression::Literal(Literal::String(_)) => {
20784 // String literal: CAST(str AS TIMESTAMP), no outer CAST
20785 (
20786 Expression::Cast(Box::new(Cast {
20787 this: date.clone(),
20788 to: DataType::Timestamp {
20789 precision: None,
20790 timezone: false,
20791 },
20792 trailing_comments: Vec::new(),
20793 double_colon_syntax: false,
20794 format: None,
20795 default: None,
20796 })),
20797 None,
20798 )
20799 }
20800 Expression::Cast(c) => {
20801 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
20802 (date.clone(), Some(c.to.clone()))
20803 }
20804 _ => {
20805 // Expression or NULL::TYPE - keep as-is, check for cast type
20806 if let Expression::Cast(c) = &date {
20807 (date.clone(), Some(c.to.clone()))
20808 } else {
20809 (date.clone(), None)
20810 }
20811 }
20812 };
20813
20814 // Build the interval expression
20815 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
20816 // For integer values, use INTERVAL val MONTH
20817 let is_non_integer_val = match &val {
20818 Expression::Literal(Literal::Number(n)) => n.contains('.'),
20819 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
20820 Expression::Neg(n) => {
20821 if let Expression::Literal(Literal::Number(s)) = &n.this {
20822 s.contains('.')
20823 } else {
20824 false
20825 }
20826 }
20827 _ => false,
20828 };
20829
20830 let add_interval = if is_non_integer_val {
20831 // TO_MONTHS(CAST(ROUND(val) AS INT))
20832 let round_val = Expression::Function(Box::new(Function::new(
20833 "ROUND".to_string(),
20834 vec![val.clone()],
20835 )));
20836 let cast_int = Expression::Cast(Box::new(Cast {
20837 this: round_val,
20838 to: DataType::Int {
20839 length: None,
20840 integer_spelling: false,
20841 },
20842 trailing_comments: Vec::new(),
20843 double_colon_syntax: false,
20844 format: None,
20845 default: None,
20846 }));
20847 Expression::Function(Box::new(Function::new(
20848 "TO_MONTHS".to_string(),
20849 vec![cast_int],
20850 )))
20851 } else {
20852 // INTERVAL val MONTH
20853 // For negative numbers, wrap in parens
20854 let interval_val = match &val {
20855 Expression::Literal(Literal::Number(n))
20856 if n.starts_with('-') =>
20857 {
20858 Expression::Paren(Box::new(Paren {
20859 this: val.clone(),
20860 trailing_comments: Vec::new(),
20861 }))
20862 }
20863 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
20864 this: val.clone(),
20865 trailing_comments: Vec::new(),
20866 })),
20867 Expression::Null(_) => Expression::Paren(Box::new(Paren {
20868 this: val.clone(),
20869 trailing_comments: Vec::new(),
20870 })),
20871 _ => val.clone(),
20872 };
20873 Expression::Interval(Box::new(crate::expressions::Interval {
20874 this: Some(interval_val),
20875 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20876 unit: crate::expressions::IntervalUnit::Month,
20877 use_plural: false,
20878 }),
20879 }))
20880 };
20881
20882 // Build: date + interval
20883 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
20884 cast_date.clone(),
20885 add_interval.clone(),
20886 )));
20887
20888 // Build LAST_DAY(date)
20889 let last_day_date = Expression::Function(Box::new(Function::new(
20890 "LAST_DAY".to_string(),
20891 vec![cast_date.clone()],
20892 )));
20893
20894 // Build LAST_DAY(date + interval)
20895 let last_day_date_plus =
20896 Expression::Function(Box::new(Function::new(
20897 "LAST_DAY".to_string(),
20898 vec![date_plus_interval.clone()],
20899 )));
20900
20901 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20902 let case_expr = Expression::Case(Box::new(Case {
20903 operand: None,
20904 whens: vec![(
20905 Expression::Eq(Box::new(BinaryOp::new(
20906 last_day_date,
20907 cast_date.clone(),
20908 ))),
20909 last_day_date_plus,
20910 )],
20911 else_: Some(date_plus_interval),
20912 comments: Vec::new(),
20913 }));
20914
20915 // Wrap in CAST(... AS type) if needed
20916 if let Some(dt) = return_type {
20917 Ok(Expression::Cast(Box::new(Cast {
20918 this: case_expr,
20919 to: dt,
20920 trailing_comments: Vec::new(),
20921 double_colon_syntax: false,
20922 format: None,
20923 default: None,
20924 })))
20925 } else {
20926 Ok(case_expr)
20927 }
20928 }
20929 DialectType::DuckDB => {
20930 // Non-Snowflake source: simple date + INTERVAL
20931 let cast_date =
20932 if matches!(&date, Expression::Literal(Literal::String(_))) {
20933 Expression::Cast(Box::new(Cast {
20934 this: date,
20935 to: DataType::Timestamp {
20936 precision: None,
20937 timezone: false,
20938 },
20939 trailing_comments: Vec::new(),
20940 double_colon_syntax: false,
20941 format: None,
20942 default: None,
20943 }))
20944 } else {
20945 date
20946 };
20947 let interval =
20948 Expression::Interval(Box::new(crate::expressions::Interval {
20949 this: Some(val),
20950 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20951 unit: crate::expressions::IntervalUnit::Month,
20952 use_plural: false,
20953 }),
20954 }));
20955 Ok(Expression::Add(Box::new(BinaryOp::new(
20956 cast_date, interval,
20957 ))))
20958 }
20959 DialectType::Snowflake => {
20960 // Keep ADD_MONTHS when source is also Snowflake
20961 if matches!(source, DialectType::Snowflake) {
20962 Ok(Expression::Function(Box::new(Function::new(
20963 "ADD_MONTHS".to_string(),
20964 vec![date, val],
20965 ))))
20966 } else {
20967 Ok(Expression::Function(Box::new(Function::new(
20968 "DATEADD".to_string(),
20969 vec![
20970 Expression::Identifier(Identifier::new("MONTH")),
20971 val,
20972 date,
20973 ],
20974 ))))
20975 }
20976 }
20977 DialectType::Redshift => {
20978 Ok(Expression::Function(Box::new(Function::new(
20979 "DATEADD".to_string(),
20980 vec![
20981 Expression::Identifier(Identifier::new("MONTH")),
20982 val,
20983 date,
20984 ],
20985 ))))
20986 }
20987 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20988 let cast_date =
20989 if matches!(&date, Expression::Literal(Literal::String(_))) {
20990 Expression::Cast(Box::new(Cast {
20991 this: date,
20992 to: DataType::Timestamp {
20993 precision: None,
20994 timezone: false,
20995 },
20996 trailing_comments: Vec::new(),
20997 double_colon_syntax: false,
20998 format: None,
20999 default: None,
21000 }))
21001 } else {
21002 date
21003 };
21004 Ok(Expression::Function(Box::new(Function::new(
21005 "DATE_ADD".to_string(),
21006 vec![Expression::string("MONTH"), val, cast_date],
21007 ))))
21008 }
21009 DialectType::BigQuery => {
21010 let interval =
21011 Expression::Interval(Box::new(crate::expressions::Interval {
21012 this: Some(val),
21013 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21014 unit: crate::expressions::IntervalUnit::Month,
21015 use_plural: false,
21016 }),
21017 }));
21018 let cast_date =
21019 if matches!(&date, Expression::Literal(Literal::String(_))) {
21020 Expression::Cast(Box::new(Cast {
21021 this: date,
21022 to: DataType::Custom {
21023 name: "DATETIME".to_string(),
21024 },
21025 trailing_comments: Vec::new(),
21026 double_colon_syntax: false,
21027 format: None,
21028 default: None,
21029 }))
21030 } else {
21031 date
21032 };
21033 Ok(Expression::Function(Box::new(Function::new(
21034 "DATE_ADD".to_string(),
21035 vec![cast_date, interval],
21036 ))))
21037 }
21038 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21039 Ok(Expression::Function(Box::new(Function::new(
21040 "ADD_MONTHS".to_string(),
21041 vec![date, val],
21042 ))))
21043 }
21044 _ => {
21045 // Default: keep as AddMonths expression
21046 Ok(Expression::AddMonths(Box::new(
21047 crate::expressions::BinaryFunc {
21048 this: date,
21049 expression: val,
21050 original_name: None,
21051 },
21052 )))
21053 }
21054 }
21055 } else {
21056 Ok(e)
21057 }
21058 }
21059
21060 Action::PercentileContConvert => {
21061 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21062 // Presto/Trino: APPROX_PERCENTILE(col, p)
21063 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21064 if let Expression::WithinGroup(wg) = e {
21065 // Extract percentile value and order by column
21066 let (percentile, _is_disc) = match &wg.this {
21067 Expression::Function(f) => {
21068 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21069 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21070 Literal::Number("0.5".to_string()),
21071 ));
21072 (pct, is_disc)
21073 }
21074 Expression::AggregateFunction(af) => {
21075 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21076 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21077 Literal::Number("0.5".to_string()),
21078 ));
21079 (pct, is_disc)
21080 }
21081 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21082 _ => return Ok(Expression::WithinGroup(wg)),
21083 };
21084 let col = wg
21085 .order_by
21086 .first()
21087 .map(|o| o.this.clone())
21088 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21089
21090 let func_name = match target {
21091 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21092 "APPROX_PERCENTILE"
21093 }
21094 _ => "PERCENTILE_APPROX", // Spark, Databricks
21095 };
21096 Ok(Expression::Function(Box::new(Function::new(
21097 func_name.to_string(),
21098 vec![col, percentile],
21099 ))))
21100 } else {
21101 Ok(e)
21102 }
21103 }
21104
21105 Action::CurrentUserSparkParens => {
21106 // CURRENT_USER -> CURRENT_USER() for Spark
21107 if let Expression::CurrentUser(_) = e {
21108 Ok(Expression::Function(Box::new(Function::new(
21109 "CURRENT_USER".to_string(),
21110 vec![],
21111 ))))
21112 } else {
21113 Ok(e)
21114 }
21115 }
21116
21117 Action::SparkDateFuncCast => {
21118 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21119 let cast_arg = |arg: Expression| -> Expression {
21120 match target {
21121 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21122 Self::double_cast_timestamp_date(arg)
21123 }
21124 _ => {
21125 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21126 Self::ensure_cast_date(arg)
21127 }
21128 }
21129 };
21130 match e {
21131 Expression::Month(f) => Ok(Expression::Month(Box::new(
21132 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21133 ))),
21134 Expression::Year(f) => Ok(Expression::Year(Box::new(
21135 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21136 ))),
21137 Expression::Day(f) => Ok(Expression::Day(Box::new(
21138 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21139 ))),
21140 other => Ok(other),
21141 }
21142 }
21143
21144 Action::MapFromArraysConvert => {
21145 // Expression::MapFromArrays -> target-specific
21146 if let Expression::MapFromArrays(mfa) = e {
21147 let keys = mfa.this;
21148 let values = mfa.expression;
21149 match target {
21150 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21151 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21152 ))),
21153 _ => {
21154 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21155 Ok(Expression::Function(Box::new(Function::new(
21156 "MAP".to_string(),
21157 vec![keys, values],
21158 ))))
21159 }
21160 }
21161 } else {
21162 Ok(e)
21163 }
21164 }
21165
21166 Action::AnyToExists => {
21167 if let Expression::Any(q) = e {
21168 if let Some(op) = q.op.clone() {
21169 let lambda_param = crate::expressions::Identifier::new("x");
21170 let rhs = Expression::Identifier(lambda_param.clone());
21171 let body = match op {
21172 crate::expressions::QuantifiedOp::Eq => {
21173 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21174 }
21175 crate::expressions::QuantifiedOp::Neq => {
21176 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21177 }
21178 crate::expressions::QuantifiedOp::Lt => {
21179 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21180 }
21181 crate::expressions::QuantifiedOp::Lte => {
21182 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21183 }
21184 crate::expressions::QuantifiedOp::Gt => {
21185 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21186 }
21187 crate::expressions::QuantifiedOp::Gte => {
21188 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21189 }
21190 };
21191 let lambda =
21192 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21193 parameters: vec![lambda_param],
21194 body,
21195 colon: false,
21196 parameter_types: Vec::new(),
21197 }));
21198 Ok(Expression::Function(Box::new(Function::new(
21199 "EXISTS".to_string(),
21200 vec![q.subquery, lambda],
21201 ))))
21202 } else {
21203 Ok(Expression::Any(q))
21204 }
21205 } else {
21206 Ok(e)
21207 }
21208 }
21209
21210 Action::GenerateSeriesConvert => {
21211 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21212 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21213 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21214 if let Expression::Function(f) = e {
21215 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21216 let start = f.args[0].clone();
21217 let end = f.args[1].clone();
21218 let step = f.args.get(2).cloned();
21219
21220 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21221 let step = step.map(|s| Self::normalize_interval_string(s, target));
21222
21223 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21224 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21225 if matches!(
21226 target,
21227 DialectType::Presto
21228 | DialectType::Trino
21229 | DialectType::Athena
21230 | DialectType::Spark
21231 | DialectType::Databricks
21232 | DialectType::Hive
21233 ) {
21234 match &arg {
21235 Expression::CurrentTimestamp(_) => {
21236 Expression::Cast(Box::new(Cast {
21237 this: arg,
21238 to: DataType::Timestamp {
21239 precision: None,
21240 timezone: false,
21241 },
21242 trailing_comments: Vec::new(),
21243 double_colon_syntax: false,
21244 format: None,
21245 default: None,
21246 }))
21247 }
21248 _ => arg,
21249 }
21250 } else {
21251 arg
21252 }
21253 };
21254
21255 let start = maybe_cast_timestamp(start);
21256 let end = maybe_cast_timestamp(end);
21257
21258 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21259 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21260 let mut gs_args = vec![start, end];
21261 if let Some(step) = step {
21262 gs_args.push(step);
21263 }
21264 return Ok(Expression::Function(Box::new(Function::new(
21265 "GENERATE_SERIES".to_string(),
21266 gs_args,
21267 ))));
21268 }
21269
21270 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21271 if matches!(target, DialectType::DuckDB) {
21272 let mut gs_args = vec![start, end];
21273 if let Some(step) = step {
21274 gs_args.push(step);
21275 }
21276 let gs = Expression::Function(Box::new(Function::new(
21277 "GENERATE_SERIES".to_string(),
21278 gs_args,
21279 )));
21280 return Ok(Expression::Function(Box::new(Function::new(
21281 "UNNEST".to_string(),
21282 vec![gs],
21283 ))));
21284 }
21285
21286 let mut seq_args = vec![start, end];
21287 if let Some(step) = step {
21288 seq_args.push(step);
21289 }
21290
21291 let seq = Expression::Function(Box::new(Function::new(
21292 "SEQUENCE".to_string(),
21293 seq_args,
21294 )));
21295
21296 match target {
21297 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21298 // Wrap in UNNEST
21299 Ok(Expression::Function(Box::new(Function::new(
21300 "UNNEST".to_string(),
21301 vec![seq],
21302 ))))
21303 }
21304 DialectType::Spark
21305 | DialectType::Databricks
21306 | DialectType::Hive => {
21307 // Wrap in EXPLODE
21308 Ok(Expression::Function(Box::new(Function::new(
21309 "EXPLODE".to_string(),
21310 vec![seq],
21311 ))))
21312 }
21313 _ => {
21314 // Just SEQUENCE for others
21315 Ok(seq)
21316 }
21317 }
21318 } else {
21319 Ok(Expression::Function(f))
21320 }
21321 } else {
21322 Ok(e)
21323 }
21324 }
21325
21326 Action::ConcatCoalesceWrap => {
21327 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21328 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21329 if let Expression::Function(f) = e {
21330 if f.name.eq_ignore_ascii_case("CONCAT") {
21331 let new_args: Vec<Expression> = f
21332 .args
21333 .into_iter()
21334 .map(|arg| {
21335 let cast_arg = if matches!(
21336 target,
21337 DialectType::Presto
21338 | DialectType::Trino
21339 | DialectType::Athena
21340 ) {
21341 Expression::Cast(Box::new(Cast {
21342 this: arg,
21343 to: DataType::VarChar {
21344 length: None,
21345 parenthesized_length: false,
21346 },
21347 trailing_comments: Vec::new(),
21348 double_colon_syntax: false,
21349 format: None,
21350 default: None,
21351 }))
21352 } else {
21353 arg
21354 };
21355 Expression::Function(Box::new(Function::new(
21356 "COALESCE".to_string(),
21357 vec![cast_arg, Expression::string("")],
21358 )))
21359 })
21360 .collect();
21361 Ok(Expression::Function(Box::new(Function::new(
21362 "CONCAT".to_string(),
21363 new_args,
21364 ))))
21365 } else {
21366 Ok(Expression::Function(f))
21367 }
21368 } else {
21369 Ok(e)
21370 }
21371 }
21372
21373 Action::PipeConcatToConcat => {
21374 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21375 if let Expression::Concat(op) = e {
21376 let cast_left = Expression::Cast(Box::new(Cast {
21377 this: op.left,
21378 to: DataType::VarChar {
21379 length: None,
21380 parenthesized_length: false,
21381 },
21382 trailing_comments: Vec::new(),
21383 double_colon_syntax: false,
21384 format: None,
21385 default: None,
21386 }));
21387 let cast_right = Expression::Cast(Box::new(Cast {
21388 this: op.right,
21389 to: DataType::VarChar {
21390 length: None,
21391 parenthesized_length: false,
21392 },
21393 trailing_comments: Vec::new(),
21394 double_colon_syntax: false,
21395 format: None,
21396 default: None,
21397 }));
21398 Ok(Expression::Function(Box::new(Function::new(
21399 "CONCAT".to_string(),
21400 vec![cast_left, cast_right],
21401 ))))
21402 } else {
21403 Ok(e)
21404 }
21405 }
21406
21407 Action::DivFuncConvert => {
21408 // DIV(a, b) -> target-specific integer division
21409 if let Expression::Function(f) = e {
21410 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21411 let a = f.args[0].clone();
21412 let b = f.args[1].clone();
21413 match target {
21414 DialectType::DuckDB => {
21415 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21416 let int_div = Expression::IntDiv(Box::new(
21417 crate::expressions::BinaryFunc {
21418 this: a,
21419 expression: b,
21420 original_name: None,
21421 },
21422 ));
21423 Ok(Expression::Cast(Box::new(Cast {
21424 this: int_div,
21425 to: DataType::Decimal {
21426 precision: None,
21427 scale: None,
21428 },
21429 trailing_comments: Vec::new(),
21430 double_colon_syntax: false,
21431 format: None,
21432 default: None,
21433 })))
21434 }
21435 DialectType::BigQuery => {
21436 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21437 let div_func = Expression::Function(Box::new(Function::new(
21438 "DIV".to_string(),
21439 vec![a, b],
21440 )));
21441 Ok(Expression::Cast(Box::new(Cast {
21442 this: div_func,
21443 to: DataType::Custom {
21444 name: "NUMERIC".to_string(),
21445 },
21446 trailing_comments: Vec::new(),
21447 double_colon_syntax: false,
21448 format: None,
21449 default: None,
21450 })))
21451 }
21452 DialectType::SQLite => {
21453 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21454 let cast_a = Expression::Cast(Box::new(Cast {
21455 this: a,
21456 to: DataType::Custom {
21457 name: "REAL".to_string(),
21458 },
21459 trailing_comments: Vec::new(),
21460 double_colon_syntax: false,
21461 format: None,
21462 default: None,
21463 }));
21464 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21465 let cast_int = Expression::Cast(Box::new(Cast {
21466 this: div,
21467 to: DataType::Int {
21468 length: None,
21469 integer_spelling: true,
21470 },
21471 trailing_comments: Vec::new(),
21472 double_colon_syntax: false,
21473 format: None,
21474 default: None,
21475 }));
21476 Ok(Expression::Cast(Box::new(Cast {
21477 this: cast_int,
21478 to: DataType::Custom {
21479 name: "REAL".to_string(),
21480 },
21481 trailing_comments: Vec::new(),
21482 double_colon_syntax: false,
21483 format: None,
21484 default: None,
21485 })))
21486 }
21487 _ => Ok(Expression::Function(f)),
21488 }
21489 } else {
21490 Ok(Expression::Function(f))
21491 }
21492 } else {
21493 Ok(e)
21494 }
21495 }
21496
21497 Action::JsonObjectAggConvert => {
21498 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21499 match e {
21500 Expression::Function(f) => Ok(Expression::Function(Box::new(
21501 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21502 ))),
21503 Expression::AggregateFunction(af) => {
21504 // AggregateFunction stores all args in the `args` vec
21505 Ok(Expression::Function(Box::new(Function::new(
21506 "JSON_GROUP_OBJECT".to_string(),
21507 af.args,
21508 ))))
21509 }
21510 other => Ok(other),
21511 }
21512 }
21513
21514 Action::JsonbExistsConvert => {
21515 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21516 if let Expression::Function(f) = e {
21517 if f.args.len() == 2 {
21518 let json_expr = f.args[0].clone();
21519 let key = match &f.args[1] {
21520 Expression::Literal(crate::expressions::Literal::String(s)) => {
21521 format!("$.{}", s)
21522 }
21523 _ => return Ok(Expression::Function(f)),
21524 };
21525 Ok(Expression::Function(Box::new(Function::new(
21526 "JSON_EXISTS".to_string(),
21527 vec![json_expr, Expression::string(&key)],
21528 ))))
21529 } else {
21530 Ok(Expression::Function(f))
21531 }
21532 } else {
21533 Ok(e)
21534 }
21535 }
21536
21537 Action::DateBinConvert => {
21538 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21539 if let Expression::Function(f) = e {
21540 Ok(Expression::Function(Box::new(Function::new(
21541 "TIME_BUCKET".to_string(),
21542 f.args,
21543 ))))
21544 } else {
21545 Ok(e)
21546 }
21547 }
21548
21549 Action::MysqlCastCharToText => {
21550 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21551 if let Expression::Cast(mut c) = e {
21552 c.to = DataType::Text;
21553 Ok(Expression::Cast(c))
21554 } else {
21555 Ok(e)
21556 }
21557 }
21558
21559 Action::SparkCastVarcharToString => {
21560 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21561 match e {
21562 Expression::Cast(mut c) => {
21563 c.to = Self::normalize_varchar_to_string(c.to);
21564 Ok(Expression::Cast(c))
21565 }
21566 Expression::TryCast(mut c) => {
21567 c.to = Self::normalize_varchar_to_string(c.to);
21568 Ok(Expression::TryCast(c))
21569 }
21570 _ => Ok(e),
21571 }
21572 }
21573
21574 Action::MinMaxToLeastGreatest => {
21575 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21576 if let Expression::Function(f) = e {
21577 let name = f.name.to_uppercase();
21578 let new_name = match name.as_str() {
21579 "MIN" => "LEAST",
21580 "MAX" => "GREATEST",
21581 _ => return Ok(Expression::Function(f)),
21582 };
21583 Ok(Expression::Function(Box::new(Function::new(
21584 new_name.to_string(),
21585 f.args,
21586 ))))
21587 } else {
21588 Ok(e)
21589 }
21590 }
21591
21592 Action::ClickHouseUniqToApproxCountDistinct => {
21593 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21594 if let Expression::Function(f) = e {
21595 Ok(Expression::Function(Box::new(Function::new(
21596 "APPROX_COUNT_DISTINCT".to_string(),
21597 f.args,
21598 ))))
21599 } else {
21600 Ok(e)
21601 }
21602 }
21603
21604 Action::ClickHouseAnyToAnyValue => {
21605 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21606 if let Expression::Function(f) = e {
21607 Ok(Expression::Function(Box::new(Function::new(
21608 "ANY_VALUE".to_string(),
21609 f.args,
21610 ))))
21611 } else {
21612 Ok(e)
21613 }
21614 }
21615
21616 Action::OracleVarchar2ToVarchar => {
21617 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21618 if let Expression::DataType(DataType::Custom { ref name }) = e {
21619 let upper = name.to_uppercase();
21620 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21621 let inner =
21622 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21623 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21624 let end = name.len() - 1; // skip trailing ")"
21625 Some(&name[start..end])
21626 } else {
21627 Option::None
21628 };
21629 if let Some(inner_str) = inner {
21630 // Parse the number part, ignoring BYTE/CHAR qualifier
21631 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21632 if let Ok(n) = num_str.parse::<u32>() {
21633 Ok(Expression::DataType(DataType::VarChar {
21634 length: Some(n),
21635 parenthesized_length: false,
21636 }))
21637 } else {
21638 Ok(e)
21639 }
21640 } else {
21641 // Plain VARCHAR2 / NVARCHAR2 without parens
21642 Ok(Expression::DataType(DataType::VarChar {
21643 length: Option::None,
21644 parenthesized_length: false,
21645 }))
21646 }
21647 } else {
21648 Ok(e)
21649 }
21650 }
21651
21652 Action::Nvl2Expand => {
21653 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21654 // But keep as NVL2 for dialects that support it natively
21655 let nvl2_native = matches!(
21656 target,
21657 DialectType::Oracle
21658 | DialectType::Snowflake
21659 | DialectType::Redshift
21660 | DialectType::Teradata
21661 | DialectType::Spark
21662 | DialectType::Databricks
21663 );
21664 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21665 if nvl2_native {
21666 return Ok(Expression::Nvl2(nvl2));
21667 }
21668 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
21669 } else if let Expression::Function(f) = e {
21670 if nvl2_native {
21671 return Ok(Expression::Function(Box::new(Function::new(
21672 "NVL2".to_string(),
21673 f.args,
21674 ))));
21675 }
21676 if f.args.len() < 2 {
21677 return Ok(Expression::Function(f));
21678 }
21679 let mut args = f.args;
21680 let a = args.remove(0);
21681 let b = args.remove(0);
21682 let c = if !args.is_empty() {
21683 Some(args.remove(0))
21684 } else {
21685 Option::None
21686 };
21687 (a, b, c)
21688 } else {
21689 return Ok(e);
21690 };
21691 // Build: NOT (a IS NULL)
21692 let is_null = Expression::IsNull(Box::new(IsNull {
21693 this: a,
21694 not: false,
21695 postfix_form: false,
21696 }));
21697 let not_null =
21698 Expression::Not(Box::new(crate::expressions::UnaryOp { this: is_null }));
21699 Ok(Expression::Case(Box::new(Case {
21700 operand: Option::None,
21701 whens: vec![(not_null, b)],
21702 else_: c,
21703 comments: Vec::new(),
21704 })))
21705 }
21706
21707 Action::IfnullToCoalesce => {
21708 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
21709 if let Expression::Coalesce(mut cf) = e {
21710 cf.original_name = Option::None;
21711 Ok(Expression::Coalesce(cf))
21712 } else if let Expression::Function(f) = e {
21713 Ok(Expression::Function(Box::new(Function::new(
21714 "COALESCE".to_string(),
21715 f.args,
21716 ))))
21717 } else {
21718 Ok(e)
21719 }
21720 }
21721
21722 Action::IsAsciiConvert => {
21723 // IS_ASCII(x) -> dialect-specific ASCII check
21724 if let Expression::Function(f) = e {
21725 let arg = f.args.into_iter().next().unwrap();
21726 match target {
21727 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
21728 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
21729 Ok(Expression::Function(Box::new(Function::new(
21730 "REGEXP_LIKE".to_string(),
21731 vec![
21732 arg,
21733 Expression::Literal(Literal::String(
21734 "^[[:ascii:]]*$".to_string(),
21735 )),
21736 ],
21737 ))))
21738 }
21739 DialectType::PostgreSQL
21740 | DialectType::Redshift
21741 | DialectType::Materialize
21742 | DialectType::RisingWave => {
21743 // (x ~ '^[[:ascii:]]*$')
21744 Ok(Expression::Paren(Box::new(Paren {
21745 this: Expression::RegexpLike(Box::new(
21746 crate::expressions::RegexpFunc {
21747 this: arg,
21748 pattern: Expression::Literal(Literal::String(
21749 "^[[:ascii:]]*$".to_string(),
21750 )),
21751 flags: Option::None,
21752 },
21753 )),
21754 trailing_comments: Vec::new(),
21755 })))
21756 }
21757 DialectType::SQLite => {
21758 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
21759 let hex_lit = Expression::Literal(Literal::HexString(
21760 "2a5b5e012d7f5d2a".to_string(),
21761 ));
21762 let cast_expr = Expression::Cast(Box::new(Cast {
21763 this: hex_lit,
21764 to: DataType::Text,
21765 trailing_comments: Vec::new(),
21766 double_colon_syntax: false,
21767 format: Option::None,
21768 default: Option::None,
21769 }));
21770 let glob = Expression::Glob(Box::new(BinaryOp {
21771 left: arg,
21772 right: cast_expr,
21773 left_comments: Vec::new(),
21774 operator_comments: Vec::new(),
21775 trailing_comments: Vec::new(),
21776 }));
21777 Ok(Expression::Paren(Box::new(Paren {
21778 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
21779 this: glob,
21780 })),
21781 trailing_comments: Vec::new(),
21782 })))
21783 }
21784 DialectType::TSQL | DialectType::Fabric => {
21785 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
21786 let hex_lit = Expression::Literal(Literal::HexNumber(
21787 "255b5e002d7f5d25".to_string(),
21788 ));
21789 let convert_expr = Expression::Convert(Box::new(
21790 crate::expressions::ConvertFunc {
21791 this: hex_lit,
21792 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
21793 style: None,
21794 },
21795 ));
21796 let collated = Expression::Collation(Box::new(
21797 crate::expressions::CollationExpr {
21798 this: convert_expr,
21799 collation: "Latin1_General_BIN".to_string(),
21800 quoted: false,
21801 double_quoted: false,
21802 },
21803 ));
21804 let patindex = Expression::Function(Box::new(Function::new(
21805 "PATINDEX".to_string(),
21806 vec![collated, arg],
21807 )));
21808 let zero = Expression::Literal(Literal::Number("0".to_string()));
21809 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21810 left: patindex,
21811 right: zero,
21812 left_comments: Vec::new(),
21813 operator_comments: Vec::new(),
21814 trailing_comments: Vec::new(),
21815 }));
21816 Ok(Expression::Paren(Box::new(Paren {
21817 this: eq_zero,
21818 trailing_comments: Vec::new(),
21819 })))
21820 }
21821 DialectType::Oracle => {
21822 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
21823 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21824 let s1 = Expression::Literal(Literal::String("^[".to_string()));
21825 let chr1 = Expression::Function(Box::new(Function::new(
21826 "CHR".to_string(),
21827 vec![Expression::Literal(Literal::Number("1".to_string()))],
21828 )));
21829 let dash = Expression::Literal(Literal::String("-".to_string()));
21830 let chr127 = Expression::Function(Box::new(Function::new(
21831 "CHR".to_string(),
21832 vec![Expression::Literal(Literal::Number("127".to_string()))],
21833 )));
21834 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
21835 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21836 let concat1 =
21837 Expression::DPipe(Box::new(crate::expressions::DPipe {
21838 this: Box::new(s1),
21839 expression: Box::new(chr1),
21840 safe: None,
21841 }));
21842 let concat2 =
21843 Expression::DPipe(Box::new(crate::expressions::DPipe {
21844 this: Box::new(concat1),
21845 expression: Box::new(dash),
21846 safe: None,
21847 }));
21848 let concat3 =
21849 Expression::DPipe(Box::new(crate::expressions::DPipe {
21850 this: Box::new(concat2),
21851 expression: Box::new(chr127),
21852 safe: None,
21853 }));
21854 let concat4 =
21855 Expression::DPipe(Box::new(crate::expressions::DPipe {
21856 this: Box::new(concat3),
21857 expression: Box::new(s2),
21858 safe: None,
21859 }));
21860 let regexp_like = Expression::Function(Box::new(Function::new(
21861 "REGEXP_LIKE".to_string(),
21862 vec![arg, concat4],
21863 )));
21864 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
21865 let true_expr = Expression::Column(crate::expressions::Column {
21866 name: Identifier {
21867 name: "TRUE".to_string(),
21868 quoted: false,
21869 trailing_comments: Vec::new(),
21870 },
21871 table: None,
21872 join_mark: false,
21873 trailing_comments: Vec::new(),
21874 });
21875 let nvl = Expression::Function(Box::new(Function::new(
21876 "NVL".to_string(),
21877 vec![regexp_like, true_expr],
21878 )));
21879 Ok(nvl)
21880 }
21881 _ => Ok(Expression::Function(Box::new(Function::new(
21882 "IS_ASCII".to_string(),
21883 vec![arg],
21884 )))),
21885 }
21886 } else {
21887 Ok(e)
21888 }
21889 }
21890
21891 Action::StrPositionConvert => {
21892 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
21893 if let Expression::Function(f) = e {
21894 if f.args.len() < 2 {
21895 return Ok(Expression::Function(f));
21896 }
21897 let mut args = f.args;
21898
21899 let haystack = args.remove(0);
21900 let needle = args.remove(0);
21901 let position = if !args.is_empty() {
21902 Some(args.remove(0))
21903 } else {
21904 Option::None
21905 };
21906 let occurrence = if !args.is_empty() {
21907 Some(args.remove(0))
21908 } else {
21909 Option::None
21910 };
21911
21912 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
21913 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
21914 fn build_position_expansion(
21915 haystack: Expression,
21916 needle: Expression,
21917 pos: Expression,
21918 occurrence: Option<Expression>,
21919 inner_func: &str,
21920 wrapper: &str, // "CASE", "IF", "IIF"
21921 ) -> Expression {
21922 let substr = Expression::Function(Box::new(Function::new(
21923 "SUBSTRING".to_string(),
21924 vec![haystack, pos.clone()],
21925 )));
21926 let mut inner_args = vec![substr, needle];
21927 if let Some(occ) = occurrence {
21928 inner_args.push(occ);
21929 }
21930 let inner_call = Expression::Function(Box::new(Function::new(
21931 inner_func.to_string(),
21932 inner_args,
21933 )));
21934 let zero = Expression::Literal(Literal::Number("0".to_string()));
21935 let one = Expression::Literal(Literal::Number("1".to_string()));
21936 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21937 left: inner_call.clone(),
21938 right: zero.clone(),
21939 left_comments: Vec::new(),
21940 operator_comments: Vec::new(),
21941 trailing_comments: Vec::new(),
21942 }));
21943 let add_pos = Expression::Add(Box::new(BinaryOp {
21944 left: inner_call,
21945 right: pos,
21946 left_comments: Vec::new(),
21947 operator_comments: Vec::new(),
21948 trailing_comments: Vec::new(),
21949 }));
21950 let sub_one = Expression::Sub(Box::new(BinaryOp {
21951 left: add_pos,
21952 right: one,
21953 left_comments: Vec::new(),
21954 operator_comments: Vec::new(),
21955 trailing_comments: Vec::new(),
21956 }));
21957
21958 match wrapper {
21959 "CASE" => Expression::Case(Box::new(Case {
21960 operand: Option::None,
21961 whens: vec![(eq_zero, zero)],
21962 else_: Some(sub_one),
21963 comments: Vec::new(),
21964 })),
21965 "IIF" => Expression::Function(Box::new(Function::new(
21966 "IIF".to_string(),
21967 vec![eq_zero, zero, sub_one],
21968 ))),
21969 _ => Expression::Function(Box::new(Function::new(
21970 "IF".to_string(),
21971 vec![eq_zero, zero, sub_one],
21972 ))),
21973 }
21974 }
21975
21976 match target {
21977 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
21978 DialectType::Athena
21979 | DialectType::DuckDB
21980 | DialectType::Presto
21981 | DialectType::Trino
21982 | DialectType::Drill => {
21983 if let Some(pos) = position {
21984 let wrapper = if matches!(target, DialectType::DuckDB) {
21985 "CASE"
21986 } else {
21987 "IF"
21988 };
21989 let result = build_position_expansion(
21990 haystack, needle, pos, occurrence, "STRPOS", wrapper,
21991 );
21992 if matches!(target, DialectType::Drill) {
21993 // Drill uses backtick-quoted `IF`
21994 if let Expression::Function(mut f) = result {
21995 f.name = "`IF`".to_string();
21996 Ok(Expression::Function(f))
21997 } else {
21998 Ok(result)
21999 }
22000 } else {
22001 Ok(result)
22002 }
22003 } else {
22004 Ok(Expression::Function(Box::new(Function::new(
22005 "STRPOS".to_string(),
22006 vec![haystack, needle],
22007 ))))
22008 }
22009 }
22010 // SQLite: IIF wrapper
22011 DialectType::SQLite => {
22012 if let Some(pos) = position {
22013 Ok(build_position_expansion(
22014 haystack, needle, pos, occurrence, "INSTR", "IIF",
22015 ))
22016 } else {
22017 Ok(Expression::Function(Box::new(Function::new(
22018 "INSTR".to_string(),
22019 vec![haystack, needle],
22020 ))))
22021 }
22022 }
22023 // INSTR group: Teradata, BigQuery, Oracle
22024 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
22025 let mut a = vec![haystack, needle];
22026 if let Some(pos) = position {
22027 a.push(pos);
22028 }
22029 if let Some(occ) = occurrence {
22030 a.push(occ);
22031 }
22032 Ok(Expression::Function(Box::new(Function::new(
22033 "INSTR".to_string(),
22034 a,
22035 ))))
22036 }
22037 // CHARINDEX group: Snowflake, TSQL
22038 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22039 let mut a = vec![needle, haystack];
22040 if let Some(pos) = position {
22041 a.push(pos);
22042 }
22043 Ok(Expression::Function(Box::new(Function::new(
22044 "CHARINDEX".to_string(),
22045 a,
22046 ))))
22047 }
22048 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22049 DialectType::PostgreSQL
22050 | DialectType::Materialize
22051 | DialectType::RisingWave
22052 | DialectType::Redshift => {
22053 if let Some(pos) = position {
22054 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22055 // ELSE POSITION(...) + pos - 1 END
22056 let substr = Expression::Substring(Box::new(
22057 crate::expressions::SubstringFunc {
22058 this: haystack,
22059 start: pos.clone(),
22060 length: Option::None,
22061 from_for_syntax: true,
22062 },
22063 ));
22064 let pos_in = Expression::StrPosition(Box::new(
22065 crate::expressions::StrPosition {
22066 this: Box::new(substr),
22067 substr: Some(Box::new(needle)),
22068 position: Option::None,
22069 occurrence: Option::None,
22070 },
22071 ));
22072 let zero =
22073 Expression::Literal(Literal::Number("0".to_string()));
22074 let one = Expression::Literal(Literal::Number("1".to_string()));
22075 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22076 left: pos_in.clone(),
22077 right: zero.clone(),
22078 left_comments: Vec::new(),
22079 operator_comments: Vec::new(),
22080 trailing_comments: Vec::new(),
22081 }));
22082 let add_pos = Expression::Add(Box::new(BinaryOp {
22083 left: pos_in,
22084 right: pos,
22085 left_comments: Vec::new(),
22086 operator_comments: Vec::new(),
22087 trailing_comments: Vec::new(),
22088 }));
22089 let sub_one = Expression::Sub(Box::new(BinaryOp {
22090 left: add_pos,
22091 right: one,
22092 left_comments: Vec::new(),
22093 operator_comments: Vec::new(),
22094 trailing_comments: Vec::new(),
22095 }));
22096 Ok(Expression::Case(Box::new(Case {
22097 operand: Option::None,
22098 whens: vec![(eq_zero, zero)],
22099 else_: Some(sub_one),
22100 comments: Vec::new(),
22101 })))
22102 } else {
22103 Ok(Expression::StrPosition(Box::new(
22104 crate::expressions::StrPosition {
22105 this: Box::new(haystack),
22106 substr: Some(Box::new(needle)),
22107 position: Option::None,
22108 occurrence: Option::None,
22109 },
22110 )))
22111 }
22112 }
22113 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22114 DialectType::MySQL
22115 | DialectType::SingleStore
22116 | DialectType::TiDB
22117 | DialectType::Hive
22118 | DialectType::Spark
22119 | DialectType::Databricks
22120 | DialectType::Doris
22121 | DialectType::StarRocks => {
22122 let mut a = vec![needle, haystack];
22123 if let Some(pos) = position {
22124 a.push(pos);
22125 }
22126 Ok(Expression::Function(Box::new(Function::new(
22127 "LOCATE".to_string(),
22128 a,
22129 ))))
22130 }
22131 // ClickHouse: POSITION(haystack, needle[, position])
22132 DialectType::ClickHouse => {
22133 let mut a = vec![haystack, needle];
22134 if let Some(pos) = position {
22135 a.push(pos);
22136 }
22137 Ok(Expression::Function(Box::new(Function::new(
22138 "POSITION".to_string(),
22139 a,
22140 ))))
22141 }
22142 _ => {
22143 let mut a = vec![haystack, needle];
22144 if let Some(pos) = position {
22145 a.push(pos);
22146 }
22147 if let Some(occ) = occurrence {
22148 a.push(occ);
22149 }
22150 Ok(Expression::Function(Box::new(Function::new(
22151 "STR_POSITION".to_string(),
22152 a,
22153 ))))
22154 }
22155 }
22156 } else {
22157 Ok(e)
22158 }
22159 }
22160
22161 Action::ArraySumConvert => {
22162 // ARRAY_SUM(arr) -> dialect-specific
22163 if let Expression::Function(f) = e {
22164 let args = f.args;
22165 match target {
22166 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22167 Function::new("LIST_SUM".to_string(), args),
22168 ))),
22169 DialectType::Spark | DialectType::Databricks => {
22170 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22171 let arr = args.into_iter().next().unwrap();
22172 let zero = Expression::Literal(Literal::Number("0".to_string()));
22173 let acc_id = Identifier::new("acc");
22174 let x_id = Identifier::new("x");
22175 let acc = Expression::Identifier(acc_id.clone());
22176 let x = Expression::Identifier(x_id.clone());
22177 let add = Expression::Add(Box::new(BinaryOp {
22178 left: acc.clone(),
22179 right: x,
22180 left_comments: Vec::new(),
22181 operator_comments: Vec::new(),
22182 trailing_comments: Vec::new(),
22183 }));
22184 let lambda1 =
22185 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22186 parameters: vec![acc_id.clone(), x_id],
22187 body: add,
22188 colon: false,
22189 parameter_types: Vec::new(),
22190 }));
22191 let lambda2 =
22192 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22193 parameters: vec![acc_id],
22194 body: acc,
22195 colon: false,
22196 parameter_types: Vec::new(),
22197 }));
22198 Ok(Expression::Function(Box::new(Function::new(
22199 "AGGREGATE".to_string(),
22200 vec![arr, zero, lambda1, lambda2],
22201 ))))
22202 }
22203 DialectType::Presto | DialectType::Athena => {
22204 // Presto/Athena keep ARRAY_SUM natively
22205 Ok(Expression::Function(Box::new(Function::new(
22206 "ARRAY_SUM".to_string(),
22207 args,
22208 ))))
22209 }
22210 DialectType::Trino => {
22211 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22212 if args.len() == 1 {
22213 let arr = args.into_iter().next().unwrap();
22214 let zero =
22215 Expression::Literal(Literal::Number("0".to_string()));
22216 let acc_id = Identifier::new("acc");
22217 let x_id = Identifier::new("x");
22218 let acc = Expression::Identifier(acc_id.clone());
22219 let x = Expression::Identifier(x_id.clone());
22220 let add = Expression::Add(Box::new(BinaryOp {
22221 left: acc.clone(),
22222 right: x,
22223 left_comments: Vec::new(),
22224 operator_comments: Vec::new(),
22225 trailing_comments: Vec::new(),
22226 }));
22227 let lambda1 = Expression::Lambda(Box::new(
22228 crate::expressions::LambdaExpr {
22229 parameters: vec![acc_id.clone(), x_id],
22230 body: add,
22231 colon: false,
22232 parameter_types: Vec::new(),
22233 },
22234 ));
22235 let lambda2 = Expression::Lambda(Box::new(
22236 crate::expressions::LambdaExpr {
22237 parameters: vec![acc_id],
22238 body: acc,
22239 colon: false,
22240 parameter_types: Vec::new(),
22241 },
22242 ));
22243 Ok(Expression::Function(Box::new(Function::new(
22244 "REDUCE".to_string(),
22245 vec![arr, zero, lambda1, lambda2],
22246 ))))
22247 } else {
22248 Ok(Expression::Function(Box::new(Function::new(
22249 "ARRAY_SUM".to_string(),
22250 args,
22251 ))))
22252 }
22253 }
22254 DialectType::ClickHouse => {
22255 // arraySum(lambda, arr) or arraySum(arr)
22256 Ok(Expression::Function(Box::new(Function::new(
22257 "arraySum".to_string(),
22258 args,
22259 ))))
22260 }
22261 _ => Ok(Expression::Function(Box::new(Function::new(
22262 "ARRAY_SUM".to_string(),
22263 args,
22264 )))),
22265 }
22266 } else {
22267 Ok(e)
22268 }
22269 }
22270
22271 Action::ArraySizeConvert => {
22272 if let Expression::Function(f) = e {
22273 Ok(Expression::Function(Box::new(Function::new(
22274 "REPEATED_COUNT".to_string(),
22275 f.args,
22276 ))))
22277 } else {
22278 Ok(e)
22279 }
22280 }
22281
22282 Action::ArrayAnyConvert => {
22283 if let Expression::Function(f) = e {
22284 let mut args = f.args;
22285 if args.len() == 2 {
22286 let arr = args.remove(0);
22287 let lambda = args.remove(0);
22288
22289 // Extract lambda parameter name and body
22290 let (param_name, pred_body) =
22291 if let Expression::Lambda(ref lam) = lambda {
22292 let name = if let Some(p) = lam.parameters.first() {
22293 p.name.clone()
22294 } else {
22295 "x".to_string()
22296 };
22297 (name, lam.body.clone())
22298 } else {
22299 ("x".to_string(), lambda.clone())
22300 };
22301
22302 // Helper: build a function call Expression
22303 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22304 Expression::Function(Box::new(Function::new(
22305 name.to_string(),
22306 args,
22307 )))
22308 };
22309
22310 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22311 let build_filter_pattern = |len_func: &str,
22312 len_args_extra: Vec<Expression>,
22313 filter_expr: Expression|
22314 -> Expression {
22315 // len_func(arr, ...extra) = 0
22316 let mut len_arr_args = vec![arr.clone()];
22317 len_arr_args.extend(len_args_extra.clone());
22318 let len_arr = make_func(len_func, len_arr_args);
22319 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22320 len_arr,
22321 Expression::number(0),
22322 )));
22323
22324 // len_func(filter_expr, ...extra) <> 0
22325 let mut len_filter_args = vec![filter_expr];
22326 len_filter_args.extend(len_args_extra);
22327 let len_filter = make_func(len_func, len_filter_args);
22328 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22329 len_filter,
22330 Expression::number(0),
22331 )));
22332
22333 // (eq_zero OR neq_zero)
22334 let or_expr =
22335 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22336 Expression::Paren(Box::new(Paren {
22337 this: or_expr,
22338 trailing_comments: Vec::new(),
22339 }))
22340 };
22341
22342 match target {
22343 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22344 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22345 }
22346 DialectType::ClickHouse => {
22347 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22348 // ClickHouse arrayFilter takes lambda first, then array
22349 let filter_expr =
22350 make_func("arrayFilter", vec![lambda, arr.clone()]);
22351 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22352 }
22353 DialectType::Databricks | DialectType::Spark => {
22354 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22355 let filter_expr =
22356 make_func("FILTER", vec![arr.clone(), lambda]);
22357 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22358 }
22359 DialectType::DuckDB => {
22360 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22361 let filter_expr =
22362 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22363 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22364 }
22365 DialectType::Teradata => {
22366 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22367 let filter_expr =
22368 make_func("FILTER", vec![arr.clone(), lambda]);
22369 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22370 }
22371 DialectType::BigQuery => {
22372 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22373 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22374 let param_col = Expression::column(¶m_name);
22375 let unnest_expr = Expression::Unnest(Box::new(
22376 crate::expressions::UnnestFunc {
22377 this: arr.clone(),
22378 expressions: vec![],
22379 with_ordinality: false,
22380 alias: Some(Identifier::new(¶m_name)),
22381 offset_alias: None,
22382 },
22383 ));
22384 let mut sel = crate::expressions::Select::default();
22385 sel.expressions = vec![param_col];
22386 sel.from = Some(crate::expressions::From {
22387 expressions: vec![unnest_expr],
22388 });
22389 sel.where_clause =
22390 Some(crate::expressions::Where { this: pred_body });
22391 let array_subquery =
22392 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22393 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22394 }
22395 DialectType::PostgreSQL => {
22396 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22397 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22398 let param_col = Expression::column(¶m_name);
22399 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22400 let unnest_with_alias =
22401 Expression::Alias(Box::new(crate::expressions::Alias {
22402 this: Expression::Unnest(Box::new(
22403 crate::expressions::UnnestFunc {
22404 this: arr.clone(),
22405 expressions: vec![],
22406 with_ordinality: false,
22407 alias: None,
22408 offset_alias: None,
22409 },
22410 )),
22411 alias: Identifier::new("_t0"),
22412 column_aliases: vec![Identifier::new(¶m_name)],
22413 pre_alias_comments: Vec::new(),
22414 trailing_comments: Vec::new(),
22415 }));
22416 let mut sel = crate::expressions::Select::default();
22417 sel.expressions = vec![param_col];
22418 sel.from = Some(crate::expressions::From {
22419 expressions: vec![unnest_with_alias],
22420 });
22421 sel.where_clause =
22422 Some(crate::expressions::Where { this: pred_body });
22423 let array_subquery =
22424 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22425 Ok(build_filter_pattern(
22426 "ARRAY_LENGTH",
22427 vec![Expression::number(1)],
22428 array_subquery,
22429 ))
22430 }
22431 _ => Ok(Expression::Function(Box::new(Function::new(
22432 "ARRAY_ANY".to_string(),
22433 vec![arr, lambda],
22434 )))),
22435 }
22436 } else {
22437 Ok(Expression::Function(Box::new(Function::new(
22438 "ARRAY_ANY".to_string(),
22439 args,
22440 ))))
22441 }
22442 } else {
22443 Ok(e)
22444 }
22445 }
22446
22447 Action::DecodeSimplify => {
22448 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22449 // For literal search values: CASE WHEN x = search THEN result
22450 // For NULL search: CASE WHEN x IS NULL THEN result
22451 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22452 fn is_decode_literal(e: &Expression) -> bool {
22453 matches!(
22454 e,
22455 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22456 )
22457 }
22458
22459 let build_decode_case =
22460 |this_expr: Expression,
22461 pairs: Vec<(Expression, Expression)>,
22462 default: Option<Expression>| {
22463 let whens: Vec<(Expression, Expression)> = pairs
22464 .into_iter()
22465 .map(|(search, result)| {
22466 if matches!(&search, Expression::Null(_)) {
22467 // NULL search -> IS NULL
22468 let condition = Expression::Is(Box::new(BinaryOp {
22469 left: this_expr.clone(),
22470 right: Expression::Null(crate::expressions::Null),
22471 left_comments: Vec::new(),
22472 operator_comments: Vec::new(),
22473 trailing_comments: Vec::new(),
22474 }));
22475 (condition, result)
22476 } else if is_decode_literal(&search)
22477 || is_decode_literal(&this_expr)
22478 {
22479 // At least one side is a literal -> simple equality (no NULL check needed)
22480 let eq = Expression::Eq(Box::new(BinaryOp {
22481 left: this_expr.clone(),
22482 right: search,
22483 left_comments: Vec::new(),
22484 operator_comments: Vec::new(),
22485 trailing_comments: Vec::new(),
22486 }));
22487 (eq, result)
22488 } else {
22489 // Non-literal -> null-safe comparison
22490 let needs_paren = matches!(
22491 &search,
22492 Expression::Eq(_)
22493 | Expression::Neq(_)
22494 | Expression::Gt(_)
22495 | Expression::Gte(_)
22496 | Expression::Lt(_)
22497 | Expression::Lte(_)
22498 );
22499 let search_ref = if needs_paren {
22500 Expression::Paren(Box::new(crate::expressions::Paren {
22501 this: search.clone(),
22502 trailing_comments: Vec::new(),
22503 }))
22504 } else {
22505 search.clone()
22506 };
22507 // Build: x = search OR (x IS NULL AND search IS NULL)
22508 let eq = Expression::Eq(Box::new(BinaryOp {
22509 left: this_expr.clone(),
22510 right: search_ref,
22511 left_comments: Vec::new(),
22512 operator_comments: Vec::new(),
22513 trailing_comments: Vec::new(),
22514 }));
22515 let search_in_null = if needs_paren {
22516 Expression::Paren(Box::new(crate::expressions::Paren {
22517 this: search.clone(),
22518 trailing_comments: Vec::new(),
22519 }))
22520 } else {
22521 search.clone()
22522 };
22523 let x_is_null = Expression::Is(Box::new(BinaryOp {
22524 left: this_expr.clone(),
22525 right: Expression::Null(crate::expressions::Null),
22526 left_comments: Vec::new(),
22527 operator_comments: Vec::new(),
22528 trailing_comments: Vec::new(),
22529 }));
22530 let search_is_null = Expression::Is(Box::new(BinaryOp {
22531 left: search_in_null,
22532 right: Expression::Null(crate::expressions::Null),
22533 left_comments: Vec::new(),
22534 operator_comments: Vec::new(),
22535 trailing_comments: Vec::new(),
22536 }));
22537 let both_null = Expression::And(Box::new(BinaryOp {
22538 left: x_is_null,
22539 right: search_is_null,
22540 left_comments: Vec::new(),
22541 operator_comments: Vec::new(),
22542 trailing_comments: Vec::new(),
22543 }));
22544 let condition = Expression::Or(Box::new(BinaryOp {
22545 left: eq,
22546 right: Expression::Paren(Box::new(
22547 crate::expressions::Paren {
22548 this: both_null,
22549 trailing_comments: Vec::new(),
22550 },
22551 )),
22552 left_comments: Vec::new(),
22553 operator_comments: Vec::new(),
22554 trailing_comments: Vec::new(),
22555 }));
22556 (condition, result)
22557 }
22558 })
22559 .collect();
22560 Expression::Case(Box::new(Case {
22561 operand: None,
22562 whens,
22563 else_: default,
22564 comments: Vec::new(),
22565 }))
22566 };
22567
22568 if let Expression::Decode(decode) = e {
22569 Ok(build_decode_case(
22570 decode.this,
22571 decode.search_results,
22572 decode.default,
22573 ))
22574 } else if let Expression::DecodeCase(dc) = e {
22575 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22576 let mut exprs = dc.expressions;
22577 if exprs.len() < 3 {
22578 return Ok(Expression::DecodeCase(Box::new(
22579 crate::expressions::DecodeCase { expressions: exprs },
22580 )));
22581 }
22582 let this_expr = exprs.remove(0);
22583 let mut pairs = Vec::new();
22584 let mut default = None;
22585 let mut i = 0;
22586 while i + 1 < exprs.len() {
22587 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22588 i += 2;
22589 }
22590 if i < exprs.len() {
22591 // Odd remaining element is the default
22592 default = Some(exprs[i].clone());
22593 }
22594 Ok(build_decode_case(this_expr, pairs, default))
22595 } else {
22596 Ok(e)
22597 }
22598 }
22599
22600 Action::CreateTableLikeToCtas => {
22601 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22602 if let Expression::CreateTable(ct) = e {
22603 let like_source = ct.constraints.iter().find_map(|c| {
22604 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22605 Some(source.clone())
22606 } else {
22607 None
22608 }
22609 });
22610 if let Some(source_table) = like_source {
22611 let mut new_ct = *ct;
22612 new_ct.constraints.clear();
22613 // Build: SELECT * FROM b LIMIT 0
22614 let select = Expression::Select(Box::new(crate::expressions::Select {
22615 expressions: vec![Expression::Star(crate::expressions::Star {
22616 table: None,
22617 except: None,
22618 replace: None,
22619 rename: None,
22620 trailing_comments: Vec::new(),
22621 })],
22622 from: Some(crate::expressions::From {
22623 expressions: vec![Expression::Table(source_table)],
22624 }),
22625 limit: Some(crate::expressions::Limit {
22626 this: Expression::Literal(Literal::Number("0".to_string())),
22627 percent: false,
22628 comments: Vec::new(),
22629 }),
22630 ..Default::default()
22631 }));
22632 new_ct.as_select = Some(select);
22633 Ok(Expression::CreateTable(Box::new(new_ct)))
22634 } else {
22635 Ok(Expression::CreateTable(ct))
22636 }
22637 } else {
22638 Ok(e)
22639 }
22640 }
22641
22642 Action::CreateTableLikeToSelectInto => {
22643 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
22644 if let Expression::CreateTable(ct) = e {
22645 let like_source = ct.constraints.iter().find_map(|c| {
22646 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22647 Some(source.clone())
22648 } else {
22649 None
22650 }
22651 });
22652 if let Some(source_table) = like_source {
22653 let mut aliased_source = source_table;
22654 aliased_source.alias = Some(Identifier::new("temp"));
22655 // Build: SELECT TOP 0 * INTO a FROM b AS temp
22656 let select = Expression::Select(Box::new(crate::expressions::Select {
22657 expressions: vec![Expression::Star(crate::expressions::Star {
22658 table: None,
22659 except: None,
22660 replace: None,
22661 rename: None,
22662 trailing_comments: Vec::new(),
22663 })],
22664 from: Some(crate::expressions::From {
22665 expressions: vec![Expression::Table(aliased_source)],
22666 }),
22667 into: Some(crate::expressions::SelectInto {
22668 this: Expression::Table(ct.name.clone()),
22669 temporary: false,
22670 unlogged: false,
22671 bulk_collect: false,
22672 expressions: Vec::new(),
22673 }),
22674 top: Some(crate::expressions::Top {
22675 this: Expression::Literal(Literal::Number("0".to_string())),
22676 percent: false,
22677 with_ties: false,
22678 parenthesized: false,
22679 }),
22680 ..Default::default()
22681 }));
22682 Ok(select)
22683 } else {
22684 Ok(Expression::CreateTable(ct))
22685 }
22686 } else {
22687 Ok(e)
22688 }
22689 }
22690
22691 Action::CreateTableLikeToAs => {
22692 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
22693 if let Expression::CreateTable(ct) = e {
22694 let like_source = ct.constraints.iter().find_map(|c| {
22695 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22696 Some(source.clone())
22697 } else {
22698 None
22699 }
22700 });
22701 if let Some(source_table) = like_source {
22702 let mut new_ct = *ct;
22703 new_ct.constraints.clear();
22704 // AS b (just a table reference, not a SELECT)
22705 new_ct.as_select = Some(Expression::Table(source_table));
22706 Ok(Expression::CreateTable(Box::new(new_ct)))
22707 } else {
22708 Ok(Expression::CreateTable(ct))
22709 }
22710 } else {
22711 Ok(e)
22712 }
22713 }
22714
22715 Action::TsOrDsToDateConvert => {
22716 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
22717 if let Expression::Function(f) = e {
22718 let mut args = f.args;
22719 let this = args.remove(0);
22720 let fmt = if !args.is_empty() {
22721 match &args[0] {
22722 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22723 _ => None,
22724 }
22725 } else {
22726 None
22727 };
22728 Ok(Expression::TsOrDsToDate(Box::new(
22729 crate::expressions::TsOrDsToDate {
22730 this: Box::new(this),
22731 format: fmt,
22732 safe: None,
22733 },
22734 )))
22735 } else {
22736 Ok(e)
22737 }
22738 }
22739
22740 Action::TsOrDsToDateStrConvert => {
22741 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
22742 if let Expression::Function(f) = e {
22743 let arg = f.args.into_iter().next().unwrap();
22744 let str_type = match target {
22745 DialectType::DuckDB
22746 | DialectType::PostgreSQL
22747 | DialectType::Materialize => DataType::Text,
22748 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22749 DataType::Custom {
22750 name: "STRING".to_string(),
22751 }
22752 }
22753 DialectType::Presto
22754 | DialectType::Trino
22755 | DialectType::Athena
22756 | DialectType::Drill => DataType::VarChar {
22757 length: None,
22758 parenthesized_length: false,
22759 },
22760 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
22761 DataType::Custom {
22762 name: "STRING".to_string(),
22763 }
22764 }
22765 _ => DataType::VarChar {
22766 length: None,
22767 parenthesized_length: false,
22768 },
22769 };
22770 let cast_expr = Expression::Cast(Box::new(Cast {
22771 this: arg,
22772 to: str_type,
22773 double_colon_syntax: false,
22774 trailing_comments: Vec::new(),
22775 format: None,
22776 default: None,
22777 }));
22778 Ok(Expression::Substring(Box::new(
22779 crate::expressions::SubstringFunc {
22780 this: cast_expr,
22781 start: Expression::number(1),
22782 length: Some(Expression::number(10)),
22783 from_for_syntax: false,
22784 },
22785 )))
22786 } else {
22787 Ok(e)
22788 }
22789 }
22790
22791 Action::DateStrToDateConvert => {
22792 // DATE_STR_TO_DATE(x) -> dialect-specific
22793 if let Expression::Function(f) = e {
22794 let arg = f.args.into_iter().next().unwrap();
22795 match target {
22796 DialectType::SQLite => {
22797 // SQLite: just the bare expression (dates are strings)
22798 Ok(arg)
22799 }
22800 _ => Ok(Expression::Cast(Box::new(Cast {
22801 this: arg,
22802 to: DataType::Date,
22803 double_colon_syntax: false,
22804 trailing_comments: Vec::new(),
22805 format: None,
22806 default: None,
22807 }))),
22808 }
22809 } else {
22810 Ok(e)
22811 }
22812 }
22813
22814 Action::TimeStrToDateConvert => {
22815 // TIME_STR_TO_DATE(x) -> dialect-specific
22816 if let Expression::Function(f) = e {
22817 let arg = f.args.into_iter().next().unwrap();
22818 match target {
22819 DialectType::Hive
22820 | DialectType::Doris
22821 | DialectType::StarRocks
22822 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
22823 Function::new("TO_DATE".to_string(), vec![arg]),
22824 ))),
22825 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22826 // Presto: CAST(x AS TIMESTAMP)
22827 Ok(Expression::Cast(Box::new(Cast {
22828 this: arg,
22829 to: DataType::Timestamp {
22830 timezone: false,
22831 precision: None,
22832 },
22833 double_colon_syntax: false,
22834 trailing_comments: Vec::new(),
22835 format: None,
22836 default: None,
22837 })))
22838 }
22839 _ => {
22840 // Default: CAST(x AS DATE)
22841 Ok(Expression::Cast(Box::new(Cast {
22842 this: arg,
22843 to: DataType::Date,
22844 double_colon_syntax: false,
22845 trailing_comments: Vec::new(),
22846 format: None,
22847 default: None,
22848 })))
22849 }
22850 }
22851 } else {
22852 Ok(e)
22853 }
22854 }
22855
22856 Action::TimeStrToTimeConvert => {
22857 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
22858 if let Expression::Function(f) = e {
22859 let mut args = f.args;
22860 let this = args.remove(0);
22861 let zone = if !args.is_empty() {
22862 match &args[0] {
22863 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22864 _ => None,
22865 }
22866 } else {
22867 None
22868 };
22869 let has_zone = zone.is_some();
22870
22871 match target {
22872 DialectType::SQLite => {
22873 // SQLite: just the bare expression
22874 Ok(this)
22875 }
22876 DialectType::MySQL => {
22877 if has_zone {
22878 // MySQL with zone: TIMESTAMP(x)
22879 Ok(Expression::Function(Box::new(Function::new(
22880 "TIMESTAMP".to_string(),
22881 vec![this],
22882 ))))
22883 } else {
22884 // MySQL: CAST(x AS DATETIME) or with precision
22885 // Use DataType::Custom to avoid MySQL's transform_cast converting
22886 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
22887 let precision =
22888 if let Expression::Literal(Literal::String(ref s)) = this {
22889 if let Some(dot_pos) = s.rfind('.') {
22890 let frac = &s[dot_pos + 1..];
22891 let digit_count = frac
22892 .chars()
22893 .take_while(|c| c.is_ascii_digit())
22894 .count();
22895 if digit_count > 0 {
22896 Some(digit_count)
22897 } else {
22898 None
22899 }
22900 } else {
22901 None
22902 }
22903 } else {
22904 None
22905 };
22906 let type_name = match precision {
22907 Some(p) => format!("DATETIME({})", p),
22908 None => "DATETIME".to_string(),
22909 };
22910 Ok(Expression::Cast(Box::new(Cast {
22911 this,
22912 to: DataType::Custom { name: type_name },
22913 double_colon_syntax: false,
22914 trailing_comments: Vec::new(),
22915 format: None,
22916 default: None,
22917 })))
22918 }
22919 }
22920 DialectType::ClickHouse => {
22921 if has_zone {
22922 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
22923 // We need to strip the timezone offset from the literal if present
22924 let clean_this =
22925 if let Expression::Literal(Literal::String(ref s)) = this {
22926 // Strip timezone offset like "-08:00" or "+00:00"
22927 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
22928 if let Some(offset_pos) = re_offset {
22929 if offset_pos > 10 {
22930 // After the date part
22931 let trimmed = s[..offset_pos].to_string();
22932 Expression::Literal(Literal::String(trimmed))
22933 } else {
22934 this.clone()
22935 }
22936 } else {
22937 this.clone()
22938 }
22939 } else {
22940 this.clone()
22941 };
22942 let zone_str = zone.unwrap();
22943 // Build: CAST(x AS DateTime64(6, 'zone'))
22944 let type_name = format!("DateTime64(6, '{}')", zone_str);
22945 Ok(Expression::Cast(Box::new(Cast {
22946 this: clean_this,
22947 to: DataType::Custom { name: type_name },
22948 double_colon_syntax: false,
22949 trailing_comments: Vec::new(),
22950 format: None,
22951 default: None,
22952 })))
22953 } else {
22954 Ok(Expression::Cast(Box::new(Cast {
22955 this,
22956 to: DataType::Custom {
22957 name: "DateTime64(6)".to_string(),
22958 },
22959 double_colon_syntax: false,
22960 trailing_comments: Vec::new(),
22961 format: None,
22962 default: None,
22963 })))
22964 }
22965 }
22966 DialectType::BigQuery => {
22967 if has_zone {
22968 // BigQuery with zone: CAST(x AS TIMESTAMP)
22969 Ok(Expression::Cast(Box::new(Cast {
22970 this,
22971 to: DataType::Timestamp {
22972 timezone: false,
22973 precision: None,
22974 },
22975 double_colon_syntax: false,
22976 trailing_comments: Vec::new(),
22977 format: None,
22978 default: None,
22979 })))
22980 } else {
22981 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
22982 Ok(Expression::Cast(Box::new(Cast {
22983 this,
22984 to: DataType::Custom {
22985 name: "DATETIME".to_string(),
22986 },
22987 double_colon_syntax: false,
22988 trailing_comments: Vec::new(),
22989 format: None,
22990 default: None,
22991 })))
22992 }
22993 }
22994 DialectType::Doris => {
22995 // Doris: CAST(x AS DATETIME)
22996 Ok(Expression::Cast(Box::new(Cast {
22997 this,
22998 to: DataType::Custom {
22999 name: "DATETIME".to_string(),
23000 },
23001 double_colon_syntax: false,
23002 trailing_comments: Vec::new(),
23003 format: None,
23004 default: None,
23005 })))
23006 }
23007 DialectType::TSQL | DialectType::Fabric => {
23008 if has_zone {
23009 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
23010 let cast_expr = Expression::Cast(Box::new(Cast {
23011 this,
23012 to: DataType::Custom {
23013 name: "DATETIMEOFFSET".to_string(),
23014 },
23015 double_colon_syntax: false,
23016 trailing_comments: Vec::new(),
23017 format: None,
23018 default: None,
23019 }));
23020 Ok(Expression::AtTimeZone(Box::new(
23021 crate::expressions::AtTimeZone {
23022 this: cast_expr,
23023 zone: Expression::Literal(Literal::String(
23024 "UTC".to_string(),
23025 )),
23026 },
23027 )))
23028 } else {
23029 // TSQL: CAST(x AS DATETIME2)
23030 Ok(Expression::Cast(Box::new(Cast {
23031 this,
23032 to: DataType::Custom {
23033 name: "DATETIME2".to_string(),
23034 },
23035 double_colon_syntax: false,
23036 trailing_comments: Vec::new(),
23037 format: None,
23038 default: None,
23039 })))
23040 }
23041 }
23042 DialectType::DuckDB => {
23043 if has_zone {
23044 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23045 Ok(Expression::Cast(Box::new(Cast {
23046 this,
23047 to: DataType::Timestamp {
23048 timezone: true,
23049 precision: None,
23050 },
23051 double_colon_syntax: false,
23052 trailing_comments: Vec::new(),
23053 format: None,
23054 default: None,
23055 })))
23056 } else {
23057 // DuckDB: CAST(x AS TIMESTAMP)
23058 Ok(Expression::Cast(Box::new(Cast {
23059 this,
23060 to: DataType::Timestamp {
23061 timezone: false,
23062 precision: None,
23063 },
23064 double_colon_syntax: false,
23065 trailing_comments: Vec::new(),
23066 format: None,
23067 default: None,
23068 })))
23069 }
23070 }
23071 DialectType::PostgreSQL
23072 | DialectType::Materialize
23073 | DialectType::RisingWave => {
23074 if has_zone {
23075 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23076 Ok(Expression::Cast(Box::new(Cast {
23077 this,
23078 to: DataType::Timestamp {
23079 timezone: true,
23080 precision: None,
23081 },
23082 double_colon_syntax: false,
23083 trailing_comments: Vec::new(),
23084 format: None,
23085 default: None,
23086 })))
23087 } else {
23088 // PostgreSQL: CAST(x AS TIMESTAMP)
23089 Ok(Expression::Cast(Box::new(Cast {
23090 this,
23091 to: DataType::Timestamp {
23092 timezone: false,
23093 precision: None,
23094 },
23095 double_colon_syntax: false,
23096 trailing_comments: Vec::new(),
23097 format: None,
23098 default: None,
23099 })))
23100 }
23101 }
23102 DialectType::Snowflake => {
23103 if has_zone {
23104 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23105 Ok(Expression::Cast(Box::new(Cast {
23106 this,
23107 to: DataType::Timestamp {
23108 timezone: true,
23109 precision: None,
23110 },
23111 double_colon_syntax: false,
23112 trailing_comments: Vec::new(),
23113 format: None,
23114 default: None,
23115 })))
23116 } else {
23117 // Snowflake: CAST(x AS TIMESTAMP)
23118 Ok(Expression::Cast(Box::new(Cast {
23119 this,
23120 to: DataType::Timestamp {
23121 timezone: false,
23122 precision: None,
23123 },
23124 double_colon_syntax: false,
23125 trailing_comments: Vec::new(),
23126 format: None,
23127 default: None,
23128 })))
23129 }
23130 }
23131 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23132 if has_zone {
23133 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23134 // Check for precision from sub-second digits
23135 let precision =
23136 if let Expression::Literal(Literal::String(ref s)) = this {
23137 if let Some(dot_pos) = s.rfind('.') {
23138 let frac = &s[dot_pos + 1..];
23139 let digit_count = frac
23140 .chars()
23141 .take_while(|c| c.is_ascii_digit())
23142 .count();
23143 if digit_count > 0
23144 && matches!(target, DialectType::Trino)
23145 {
23146 Some(digit_count as u32)
23147 } else {
23148 None
23149 }
23150 } else {
23151 None
23152 }
23153 } else {
23154 None
23155 };
23156 let dt = if let Some(prec) = precision {
23157 DataType::Timestamp {
23158 timezone: true,
23159 precision: Some(prec),
23160 }
23161 } else {
23162 DataType::Timestamp {
23163 timezone: true,
23164 precision: None,
23165 }
23166 };
23167 Ok(Expression::Cast(Box::new(Cast {
23168 this,
23169 to: dt,
23170 double_colon_syntax: false,
23171 trailing_comments: Vec::new(),
23172 format: None,
23173 default: None,
23174 })))
23175 } else {
23176 // Check for sub-second precision for Trino
23177 let precision =
23178 if let Expression::Literal(Literal::String(ref s)) = this {
23179 if let Some(dot_pos) = s.rfind('.') {
23180 let frac = &s[dot_pos + 1..];
23181 let digit_count = frac
23182 .chars()
23183 .take_while(|c| c.is_ascii_digit())
23184 .count();
23185 if digit_count > 0
23186 && matches!(target, DialectType::Trino)
23187 {
23188 Some(digit_count as u32)
23189 } else {
23190 None
23191 }
23192 } else {
23193 None
23194 }
23195 } else {
23196 None
23197 };
23198 let dt = DataType::Timestamp {
23199 timezone: false,
23200 precision,
23201 };
23202 Ok(Expression::Cast(Box::new(Cast {
23203 this,
23204 to: dt,
23205 double_colon_syntax: false,
23206 trailing_comments: Vec::new(),
23207 format: None,
23208 default: None,
23209 })))
23210 }
23211 }
23212 DialectType::Redshift => {
23213 if has_zone {
23214 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23215 Ok(Expression::Cast(Box::new(Cast {
23216 this,
23217 to: DataType::Timestamp {
23218 timezone: true,
23219 precision: None,
23220 },
23221 double_colon_syntax: false,
23222 trailing_comments: Vec::new(),
23223 format: None,
23224 default: None,
23225 })))
23226 } else {
23227 // Redshift: CAST(x AS TIMESTAMP)
23228 Ok(Expression::Cast(Box::new(Cast {
23229 this,
23230 to: DataType::Timestamp {
23231 timezone: false,
23232 precision: None,
23233 },
23234 double_colon_syntax: false,
23235 trailing_comments: Vec::new(),
23236 format: None,
23237 default: None,
23238 })))
23239 }
23240 }
23241 _ => {
23242 // Default: CAST(x AS TIMESTAMP)
23243 Ok(Expression::Cast(Box::new(Cast {
23244 this,
23245 to: DataType::Timestamp {
23246 timezone: false,
23247 precision: None,
23248 },
23249 double_colon_syntax: false,
23250 trailing_comments: Vec::new(),
23251 format: None,
23252 default: None,
23253 })))
23254 }
23255 }
23256 } else {
23257 Ok(e)
23258 }
23259 }
23260
23261 Action::DateToDateStrConvert => {
23262 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23263 if let Expression::Function(f) = e {
23264 let arg = f.args.into_iter().next().unwrap();
23265 let str_type = match target {
23266 DialectType::DuckDB => DataType::Text,
23267 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23268 DataType::Custom {
23269 name: "STRING".to_string(),
23270 }
23271 }
23272 DialectType::Presto
23273 | DialectType::Trino
23274 | DialectType::Athena
23275 | DialectType::Drill => DataType::VarChar {
23276 length: None,
23277 parenthesized_length: false,
23278 },
23279 _ => DataType::VarChar {
23280 length: None,
23281 parenthesized_length: false,
23282 },
23283 };
23284 Ok(Expression::Cast(Box::new(Cast {
23285 this: arg,
23286 to: str_type,
23287 double_colon_syntax: false,
23288 trailing_comments: Vec::new(),
23289 format: None,
23290 default: None,
23291 })))
23292 } else {
23293 Ok(e)
23294 }
23295 }
23296
23297 Action::DateToDiConvert => {
23298 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23299 if let Expression::Function(f) = e {
23300 let arg = f.args.into_iter().next().unwrap();
23301 let inner = match target {
23302 DialectType::DuckDB => {
23303 // STRFTIME(x, '%Y%m%d')
23304 Expression::Function(Box::new(Function::new(
23305 "STRFTIME".to_string(),
23306 vec![arg, Expression::string("%Y%m%d")],
23307 )))
23308 }
23309 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23310 // DATE_FORMAT(x, 'yyyyMMdd')
23311 Expression::Function(Box::new(Function::new(
23312 "DATE_FORMAT".to_string(),
23313 vec![arg, Expression::string("yyyyMMdd")],
23314 )))
23315 }
23316 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23317 // DATE_FORMAT(x, '%Y%m%d')
23318 Expression::Function(Box::new(Function::new(
23319 "DATE_FORMAT".to_string(),
23320 vec![arg, Expression::string("%Y%m%d")],
23321 )))
23322 }
23323 DialectType::Drill => {
23324 // TO_DATE(x, 'yyyyMMdd')
23325 Expression::Function(Box::new(Function::new(
23326 "TO_DATE".to_string(),
23327 vec![arg, Expression::string("yyyyMMdd")],
23328 )))
23329 }
23330 _ => {
23331 // Default: STRFTIME(x, '%Y%m%d')
23332 Expression::Function(Box::new(Function::new(
23333 "STRFTIME".to_string(),
23334 vec![arg, Expression::string("%Y%m%d")],
23335 )))
23336 }
23337 };
23338 // Use INT (not INTEGER) for Presto/Trino
23339 let int_type = match target {
23340 DialectType::Presto
23341 | DialectType::Trino
23342 | DialectType::Athena
23343 | DialectType::TSQL
23344 | DialectType::Fabric
23345 | DialectType::SQLite
23346 | DialectType::Redshift => DataType::Custom {
23347 name: "INT".to_string(),
23348 },
23349 _ => DataType::Int {
23350 length: None,
23351 integer_spelling: false,
23352 },
23353 };
23354 Ok(Expression::Cast(Box::new(Cast {
23355 this: inner,
23356 to: int_type,
23357 double_colon_syntax: false,
23358 trailing_comments: Vec::new(),
23359 format: None,
23360 default: None,
23361 })))
23362 } else {
23363 Ok(e)
23364 }
23365 }
23366
23367 Action::DiToDateConvert => {
23368 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23369 if let Expression::Function(f) = e {
23370 let arg = f.args.into_iter().next().unwrap();
23371 match target {
23372 DialectType::DuckDB => {
23373 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23374 let cast_text = Expression::Cast(Box::new(Cast {
23375 this: arg,
23376 to: DataType::Text,
23377 double_colon_syntax: false,
23378 trailing_comments: Vec::new(),
23379 format: None,
23380 default: None,
23381 }));
23382 let strptime = Expression::Function(Box::new(Function::new(
23383 "STRPTIME".to_string(),
23384 vec![cast_text, Expression::string("%Y%m%d")],
23385 )));
23386 Ok(Expression::Cast(Box::new(Cast {
23387 this: strptime,
23388 to: DataType::Date,
23389 double_colon_syntax: false,
23390 trailing_comments: Vec::new(),
23391 format: None,
23392 default: None,
23393 })))
23394 }
23395 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23396 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23397 let cast_str = Expression::Cast(Box::new(Cast {
23398 this: arg,
23399 to: DataType::Custom {
23400 name: "STRING".to_string(),
23401 },
23402 double_colon_syntax: false,
23403 trailing_comments: Vec::new(),
23404 format: None,
23405 default: None,
23406 }));
23407 Ok(Expression::Function(Box::new(Function::new(
23408 "TO_DATE".to_string(),
23409 vec![cast_str, Expression::string("yyyyMMdd")],
23410 ))))
23411 }
23412 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23413 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23414 let cast_varchar = Expression::Cast(Box::new(Cast {
23415 this: arg,
23416 to: DataType::VarChar {
23417 length: None,
23418 parenthesized_length: false,
23419 },
23420 double_colon_syntax: false,
23421 trailing_comments: Vec::new(),
23422 format: None,
23423 default: None,
23424 }));
23425 let date_parse = Expression::Function(Box::new(Function::new(
23426 "DATE_PARSE".to_string(),
23427 vec![cast_varchar, Expression::string("%Y%m%d")],
23428 )));
23429 Ok(Expression::Cast(Box::new(Cast {
23430 this: date_parse,
23431 to: DataType::Date,
23432 double_colon_syntax: false,
23433 trailing_comments: Vec::new(),
23434 format: None,
23435 default: None,
23436 })))
23437 }
23438 DialectType::Drill => {
23439 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23440 let cast_varchar = Expression::Cast(Box::new(Cast {
23441 this: arg,
23442 to: DataType::VarChar {
23443 length: None,
23444 parenthesized_length: false,
23445 },
23446 double_colon_syntax: false,
23447 trailing_comments: Vec::new(),
23448 format: None,
23449 default: None,
23450 }));
23451 Ok(Expression::Function(Box::new(Function::new(
23452 "TO_DATE".to_string(),
23453 vec![cast_varchar, Expression::string("yyyyMMdd")],
23454 ))))
23455 }
23456 _ => Ok(Expression::Function(Box::new(Function::new(
23457 "DI_TO_DATE".to_string(),
23458 vec![arg],
23459 )))),
23460 }
23461 } else {
23462 Ok(e)
23463 }
23464 }
23465
23466 Action::TsOrDiToDiConvert => {
23467 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23468 if let Expression::Function(f) = e {
23469 let arg = f.args.into_iter().next().unwrap();
23470 let str_type = match target {
23471 DialectType::DuckDB => DataType::Text,
23472 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23473 DataType::Custom {
23474 name: "STRING".to_string(),
23475 }
23476 }
23477 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23478 DataType::VarChar {
23479 length: None,
23480 parenthesized_length: false,
23481 }
23482 }
23483 _ => DataType::VarChar {
23484 length: None,
23485 parenthesized_length: false,
23486 },
23487 };
23488 let cast_str = Expression::Cast(Box::new(Cast {
23489 this: arg,
23490 to: str_type,
23491 double_colon_syntax: false,
23492 trailing_comments: Vec::new(),
23493 format: None,
23494 default: None,
23495 }));
23496 let replace_expr = Expression::Function(Box::new(Function::new(
23497 "REPLACE".to_string(),
23498 vec![cast_str, Expression::string("-"), Expression::string("")],
23499 )));
23500 let substr_name = match target {
23501 DialectType::DuckDB
23502 | DialectType::Hive
23503 | DialectType::Spark
23504 | DialectType::Databricks => "SUBSTR",
23505 _ => "SUBSTR",
23506 };
23507 let substr = Expression::Function(Box::new(Function::new(
23508 substr_name.to_string(),
23509 vec![replace_expr, Expression::number(1), Expression::number(8)],
23510 )));
23511 // Use INT (not INTEGER) for Presto/Trino etc.
23512 let int_type = match target {
23513 DialectType::Presto
23514 | DialectType::Trino
23515 | DialectType::Athena
23516 | DialectType::TSQL
23517 | DialectType::Fabric
23518 | DialectType::SQLite
23519 | DialectType::Redshift => DataType::Custom {
23520 name: "INT".to_string(),
23521 },
23522 _ => DataType::Int {
23523 length: None,
23524 integer_spelling: false,
23525 },
23526 };
23527 Ok(Expression::Cast(Box::new(Cast {
23528 this: substr,
23529 to: int_type,
23530 double_colon_syntax: false,
23531 trailing_comments: Vec::new(),
23532 format: None,
23533 default: None,
23534 })))
23535 } else {
23536 Ok(e)
23537 }
23538 }
23539
23540 Action::UnixToStrConvert => {
23541 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23542 if let Expression::Function(f) = e {
23543 let mut args = f.args;
23544 let this = args.remove(0);
23545 let fmt_expr = if !args.is_empty() {
23546 Some(args.remove(0))
23547 } else {
23548 None
23549 };
23550
23551 // Check if format is a string literal
23552 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23553 if let Expression::Literal(Literal::String(s)) = f {
23554 Some(s.clone())
23555 } else {
23556 None
23557 }
23558 });
23559
23560 if let Some(fmt_string) = fmt_str {
23561 // String literal format -> use UnixToStr expression (generator handles it)
23562 Ok(Expression::UnixToStr(Box::new(
23563 crate::expressions::UnixToStr {
23564 this: Box::new(this),
23565 format: Some(fmt_string),
23566 },
23567 )))
23568 } else if let Some(fmt_e) = fmt_expr {
23569 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23570 match target {
23571 DialectType::DuckDB => {
23572 // STRFTIME(TO_TIMESTAMP(x), y)
23573 let to_ts = Expression::Function(Box::new(Function::new(
23574 "TO_TIMESTAMP".to_string(),
23575 vec![this],
23576 )));
23577 Ok(Expression::Function(Box::new(Function::new(
23578 "STRFTIME".to_string(),
23579 vec![to_ts, fmt_e],
23580 ))))
23581 }
23582 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23583 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23584 let from_unix = Expression::Function(Box::new(Function::new(
23585 "FROM_UNIXTIME".to_string(),
23586 vec![this],
23587 )));
23588 Ok(Expression::Function(Box::new(Function::new(
23589 "DATE_FORMAT".to_string(),
23590 vec![from_unix, fmt_e],
23591 ))))
23592 }
23593 DialectType::Hive
23594 | DialectType::Spark
23595 | DialectType::Databricks
23596 | DialectType::Doris
23597 | DialectType::StarRocks => {
23598 // FROM_UNIXTIME(x, y)
23599 Ok(Expression::Function(Box::new(Function::new(
23600 "FROM_UNIXTIME".to_string(),
23601 vec![this, fmt_e],
23602 ))))
23603 }
23604 _ => {
23605 // Default: keep as UNIX_TO_STR(x, y)
23606 Ok(Expression::Function(Box::new(Function::new(
23607 "UNIX_TO_STR".to_string(),
23608 vec![this, fmt_e],
23609 ))))
23610 }
23611 }
23612 } else {
23613 Ok(Expression::UnixToStr(Box::new(
23614 crate::expressions::UnixToStr {
23615 this: Box::new(this),
23616 format: None,
23617 },
23618 )))
23619 }
23620 } else {
23621 Ok(e)
23622 }
23623 }
23624
23625 Action::UnixToTimeConvert => {
23626 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
23627 if let Expression::Function(f) = e {
23628 let arg = f.args.into_iter().next().unwrap();
23629 Ok(Expression::UnixToTime(Box::new(
23630 crate::expressions::UnixToTime {
23631 this: Box::new(arg),
23632 scale: None,
23633 zone: None,
23634 hours: None,
23635 minutes: None,
23636 format: None,
23637 target_type: None,
23638 },
23639 )))
23640 } else {
23641 Ok(e)
23642 }
23643 }
23644
23645 Action::UnixToTimeStrConvert => {
23646 // UNIX_TO_TIME_STR(x) -> dialect-specific
23647 if let Expression::Function(f) = e {
23648 let arg = f.args.into_iter().next().unwrap();
23649 match target {
23650 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23651 // FROM_UNIXTIME(x)
23652 Ok(Expression::Function(Box::new(Function::new(
23653 "FROM_UNIXTIME".to_string(),
23654 vec![arg],
23655 ))))
23656 }
23657 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23658 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
23659 let from_unix = Expression::Function(Box::new(Function::new(
23660 "FROM_UNIXTIME".to_string(),
23661 vec![arg],
23662 )));
23663 Ok(Expression::Cast(Box::new(Cast {
23664 this: from_unix,
23665 to: DataType::VarChar {
23666 length: None,
23667 parenthesized_length: false,
23668 },
23669 double_colon_syntax: false,
23670 trailing_comments: Vec::new(),
23671 format: None,
23672 default: None,
23673 })))
23674 }
23675 DialectType::DuckDB => {
23676 // CAST(TO_TIMESTAMP(x) AS TEXT)
23677 let to_ts = Expression::Function(Box::new(Function::new(
23678 "TO_TIMESTAMP".to_string(),
23679 vec![arg],
23680 )));
23681 Ok(Expression::Cast(Box::new(Cast {
23682 this: to_ts,
23683 to: DataType::Text,
23684 double_colon_syntax: false,
23685 trailing_comments: Vec::new(),
23686 format: None,
23687 default: None,
23688 })))
23689 }
23690 _ => Ok(Expression::Function(Box::new(Function::new(
23691 "UNIX_TO_TIME_STR".to_string(),
23692 vec![arg],
23693 )))),
23694 }
23695 } else {
23696 Ok(e)
23697 }
23698 }
23699
23700 Action::TimeToUnixConvert => {
23701 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
23702 if let Expression::Function(f) = e {
23703 let arg = f.args.into_iter().next().unwrap();
23704 Ok(Expression::TimeToUnix(Box::new(
23705 crate::expressions::UnaryFunc {
23706 this: arg,
23707 original_name: None,
23708 },
23709 )))
23710 } else {
23711 Ok(e)
23712 }
23713 }
23714
23715 Action::TimeToStrConvert => {
23716 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
23717 if let Expression::Function(f) = e {
23718 let mut args = f.args;
23719 let this = args.remove(0);
23720 let fmt = match args.remove(0) {
23721 Expression::Literal(Literal::String(s)) => s,
23722 other => {
23723 return Ok(Expression::Function(Box::new(Function::new(
23724 "TIME_TO_STR".to_string(),
23725 vec![this, other],
23726 ))));
23727 }
23728 };
23729 Ok(Expression::TimeToStr(Box::new(
23730 crate::expressions::TimeToStr {
23731 this: Box::new(this),
23732 format: fmt,
23733 culture: None,
23734 zone: None,
23735 },
23736 )))
23737 } else {
23738 Ok(e)
23739 }
23740 }
23741
23742 Action::StrToUnixConvert => {
23743 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
23744 if let Expression::Function(f) = e {
23745 let mut args = f.args;
23746 let this = args.remove(0);
23747 let fmt = match args.remove(0) {
23748 Expression::Literal(Literal::String(s)) => s,
23749 other => {
23750 return Ok(Expression::Function(Box::new(Function::new(
23751 "STR_TO_UNIX".to_string(),
23752 vec![this, other],
23753 ))));
23754 }
23755 };
23756 Ok(Expression::StrToUnix(Box::new(
23757 crate::expressions::StrToUnix {
23758 this: Some(Box::new(this)),
23759 format: Some(fmt),
23760 },
23761 )))
23762 } else {
23763 Ok(e)
23764 }
23765 }
23766
23767 Action::TimeStrToUnixConvert => {
23768 // TIME_STR_TO_UNIX(x) -> dialect-specific
23769 if let Expression::Function(f) = e {
23770 let arg = f.args.into_iter().next().unwrap();
23771 match target {
23772 DialectType::DuckDB => {
23773 // EPOCH(CAST(x AS TIMESTAMP))
23774 let cast_ts = Expression::Cast(Box::new(Cast {
23775 this: arg,
23776 to: DataType::Timestamp {
23777 timezone: false,
23778 precision: None,
23779 },
23780 double_colon_syntax: false,
23781 trailing_comments: Vec::new(),
23782 format: None,
23783 default: None,
23784 }));
23785 Ok(Expression::Function(Box::new(Function::new(
23786 "EPOCH".to_string(),
23787 vec![cast_ts],
23788 ))))
23789 }
23790 DialectType::Hive
23791 | DialectType::Doris
23792 | DialectType::StarRocks
23793 | DialectType::MySQL => {
23794 // UNIX_TIMESTAMP(x)
23795 Ok(Expression::Function(Box::new(Function::new(
23796 "UNIX_TIMESTAMP".to_string(),
23797 vec![arg],
23798 ))))
23799 }
23800 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23801 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
23802 let date_parse = Expression::Function(Box::new(Function::new(
23803 "DATE_PARSE".to_string(),
23804 vec![arg, Expression::string("%Y-%m-%d %T")],
23805 )));
23806 Ok(Expression::Function(Box::new(Function::new(
23807 "TO_UNIXTIME".to_string(),
23808 vec![date_parse],
23809 ))))
23810 }
23811 _ => Ok(Expression::Function(Box::new(Function::new(
23812 "TIME_STR_TO_UNIX".to_string(),
23813 vec![arg],
23814 )))),
23815 }
23816 } else {
23817 Ok(e)
23818 }
23819 }
23820
23821 Action::TimeToTimeStrConvert => {
23822 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
23823 if let Expression::Function(f) = e {
23824 let arg = f.args.into_iter().next().unwrap();
23825 let str_type = match target {
23826 DialectType::DuckDB => DataType::Text,
23827 DialectType::Hive
23828 | DialectType::Spark
23829 | DialectType::Databricks
23830 | DialectType::Doris
23831 | DialectType::StarRocks => DataType::Custom {
23832 name: "STRING".to_string(),
23833 },
23834 DialectType::Redshift => DataType::Custom {
23835 name: "VARCHAR(MAX)".to_string(),
23836 },
23837 _ => DataType::VarChar {
23838 length: None,
23839 parenthesized_length: false,
23840 },
23841 };
23842 Ok(Expression::Cast(Box::new(Cast {
23843 this: arg,
23844 to: str_type,
23845 double_colon_syntax: false,
23846 trailing_comments: Vec::new(),
23847 format: None,
23848 default: None,
23849 })))
23850 } else {
23851 Ok(e)
23852 }
23853 }
23854
23855 Action::DateTruncSwapArgs => {
23856 // DATE_TRUNC('unit', x) from Generic -> target-specific
23857 if let Expression::Function(f) = e {
23858 if f.args.len() == 2 {
23859 let unit_arg = f.args[0].clone();
23860 let expr_arg = f.args[1].clone();
23861 // Extract unit string from the first arg
23862 let unit_str = match &unit_arg {
23863 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23864 _ => return Ok(Expression::Function(f)),
23865 };
23866 match target {
23867 DialectType::BigQuery => {
23868 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
23869 let unit_ident =
23870 Expression::Column(crate::expressions::Column {
23871 name: crate::expressions::Identifier::new(unit_str),
23872 table: None,
23873 join_mark: false,
23874 trailing_comments: Vec::new(),
23875 });
23876 Ok(Expression::Function(Box::new(Function::new(
23877 "DATE_TRUNC".to_string(),
23878 vec![expr_arg, unit_ident],
23879 ))))
23880 }
23881 DialectType::Doris => {
23882 // Doris: DATE_TRUNC(x, 'UNIT')
23883 Ok(Expression::Function(Box::new(Function::new(
23884 "DATE_TRUNC".to_string(),
23885 vec![expr_arg, Expression::string(&unit_str)],
23886 ))))
23887 }
23888 DialectType::StarRocks => {
23889 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
23890 Ok(Expression::Function(Box::new(Function::new(
23891 "DATE_TRUNC".to_string(),
23892 vec![Expression::string(&unit_str), expr_arg],
23893 ))))
23894 }
23895 DialectType::Spark | DialectType::Databricks => {
23896 // Spark: TRUNC(x, 'UNIT')
23897 Ok(Expression::Function(Box::new(Function::new(
23898 "TRUNC".to_string(),
23899 vec![expr_arg, Expression::string(&unit_str)],
23900 ))))
23901 }
23902 DialectType::MySQL => {
23903 // MySQL: complex expansion based on unit
23904 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
23905 }
23906 _ => Ok(Expression::Function(f)),
23907 }
23908 } else {
23909 Ok(Expression::Function(f))
23910 }
23911 } else {
23912 Ok(e)
23913 }
23914 }
23915
23916 Action::TimestampTruncConvert => {
23917 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
23918 if let Expression::Function(f) = e {
23919 if f.args.len() >= 2 {
23920 let expr_arg = f.args[0].clone();
23921 let unit_arg = f.args[1].clone();
23922 let tz_arg = if f.args.len() >= 3 {
23923 Some(f.args[2].clone())
23924 } else {
23925 None
23926 };
23927 // Extract unit string
23928 let unit_str = match &unit_arg {
23929 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23930 Expression::Column(c) => c.name.name.to_uppercase(),
23931 _ => {
23932 return Ok(Expression::Function(f));
23933 }
23934 };
23935 match target {
23936 DialectType::Spark | DialectType::Databricks => {
23937 // Spark: DATE_TRUNC('UNIT', x)
23938 Ok(Expression::Function(Box::new(Function::new(
23939 "DATE_TRUNC".to_string(),
23940 vec![Expression::string(&unit_str), expr_arg],
23941 ))))
23942 }
23943 DialectType::Doris | DialectType::StarRocks => {
23944 // Doris: DATE_TRUNC(x, 'UNIT')
23945 Ok(Expression::Function(Box::new(Function::new(
23946 "DATE_TRUNC".to_string(),
23947 vec![expr_arg, Expression::string(&unit_str)],
23948 ))))
23949 }
23950 DialectType::BigQuery => {
23951 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
23952 let unit_ident =
23953 Expression::Column(crate::expressions::Column {
23954 name: crate::expressions::Identifier::new(unit_str),
23955 table: None,
23956 join_mark: false,
23957 trailing_comments: Vec::new(),
23958 });
23959 let mut args = vec![expr_arg, unit_ident];
23960 if let Some(tz) = tz_arg {
23961 args.push(tz);
23962 }
23963 Ok(Expression::Function(Box::new(Function::new(
23964 "TIMESTAMP_TRUNC".to_string(),
23965 args,
23966 ))))
23967 }
23968 DialectType::DuckDB => {
23969 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
23970 if let Some(tz) = tz_arg {
23971 let tz_str = match &tz {
23972 Expression::Literal(Literal::String(s)) => s.clone(),
23973 _ => "UTC".to_string(),
23974 };
23975 // x AT TIME ZONE 'tz'
23976 let at_tz = Expression::AtTimeZone(Box::new(
23977 crate::expressions::AtTimeZone {
23978 this: expr_arg,
23979 zone: Expression::string(&tz_str),
23980 },
23981 ));
23982 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
23983 let trunc = Expression::Function(Box::new(Function::new(
23984 "DATE_TRUNC".to_string(),
23985 vec![Expression::string(&unit_str), at_tz],
23986 )));
23987 // DATE_TRUNC(...) AT TIME ZONE 'tz'
23988 Ok(Expression::AtTimeZone(Box::new(
23989 crate::expressions::AtTimeZone {
23990 this: trunc,
23991 zone: Expression::string(&tz_str),
23992 },
23993 )))
23994 } else {
23995 Ok(Expression::Function(Box::new(Function::new(
23996 "DATE_TRUNC".to_string(),
23997 vec![Expression::string(&unit_str), expr_arg],
23998 ))))
23999 }
24000 }
24001 DialectType::Presto
24002 | DialectType::Trino
24003 | DialectType::Athena
24004 | DialectType::Snowflake => {
24005 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
24006 Ok(Expression::Function(Box::new(Function::new(
24007 "DATE_TRUNC".to_string(),
24008 vec![Expression::string(&unit_str), expr_arg],
24009 ))))
24010 }
24011 _ => {
24012 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
24013 let mut args = vec![Expression::string(&unit_str), expr_arg];
24014 if let Some(tz) = tz_arg {
24015 args.push(tz);
24016 }
24017 Ok(Expression::Function(Box::new(Function::new(
24018 "DATE_TRUNC".to_string(),
24019 args,
24020 ))))
24021 }
24022 }
24023 } else {
24024 Ok(Expression::Function(f))
24025 }
24026 } else {
24027 Ok(e)
24028 }
24029 }
24030
24031 Action::StrToDateConvert => {
24032 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
24033 if let Expression::Function(f) = e {
24034 if f.args.len() == 2 {
24035 let mut args = f.args;
24036 let this = args.remove(0);
24037 let fmt_expr = args.remove(0);
24038 let fmt_str = match &fmt_expr {
24039 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24040 _ => None,
24041 };
24042 let default_date = "%Y-%m-%d";
24043 let default_time = "%Y-%m-%d %H:%M:%S";
24044 let is_default = fmt_str
24045 .as_ref()
24046 .map_or(false, |f| f == default_date || f == default_time);
24047
24048 if is_default {
24049 // Default format: handle per-dialect
24050 match target {
24051 DialectType::MySQL
24052 | DialectType::Doris
24053 | DialectType::StarRocks => {
24054 // Keep STR_TO_DATE(x, fmt) as-is
24055 Ok(Expression::Function(Box::new(Function::new(
24056 "STR_TO_DATE".to_string(),
24057 vec![this, fmt_expr],
24058 ))))
24059 }
24060 DialectType::Hive => {
24061 // Hive: CAST(x AS DATE)
24062 Ok(Expression::Cast(Box::new(Cast {
24063 this,
24064 to: DataType::Date,
24065 double_colon_syntax: false,
24066 trailing_comments: Vec::new(),
24067 format: None,
24068 default: None,
24069 })))
24070 }
24071 DialectType::Presto
24072 | DialectType::Trino
24073 | DialectType::Athena => {
24074 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24075 let date_parse =
24076 Expression::Function(Box::new(Function::new(
24077 "DATE_PARSE".to_string(),
24078 vec![this, fmt_expr],
24079 )));
24080 Ok(Expression::Cast(Box::new(Cast {
24081 this: date_parse,
24082 to: DataType::Date,
24083 double_colon_syntax: false,
24084 trailing_comments: Vec::new(),
24085 format: None,
24086 default: None,
24087 })))
24088 }
24089 _ => {
24090 // Others: TsOrDsToDate (delegates to generator)
24091 Ok(Expression::TsOrDsToDate(Box::new(
24092 crate::expressions::TsOrDsToDate {
24093 this: Box::new(this),
24094 format: None,
24095 safe: None,
24096 },
24097 )))
24098 }
24099 }
24100 } else if let Some(fmt) = fmt_str {
24101 match target {
24102 DialectType::Doris
24103 | DialectType::StarRocks
24104 | DialectType::MySQL => {
24105 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24106 let mut normalized = fmt.clone();
24107 normalized = normalized.replace("%-d", "%e");
24108 normalized = normalized.replace("%-m", "%c");
24109 normalized = normalized.replace("%H:%M:%S", "%T");
24110 Ok(Expression::Function(Box::new(Function::new(
24111 "STR_TO_DATE".to_string(),
24112 vec![this, Expression::string(&normalized)],
24113 ))))
24114 }
24115 DialectType::Hive => {
24116 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24117 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24118 let unix_ts =
24119 Expression::Function(Box::new(Function::new(
24120 "UNIX_TIMESTAMP".to_string(),
24121 vec![this, Expression::string(&java_fmt)],
24122 )));
24123 let from_unix =
24124 Expression::Function(Box::new(Function::new(
24125 "FROM_UNIXTIME".to_string(),
24126 vec![unix_ts],
24127 )));
24128 Ok(Expression::Cast(Box::new(Cast {
24129 this: from_unix,
24130 to: DataType::Date,
24131 double_colon_syntax: false,
24132 trailing_comments: Vec::new(),
24133 format: None,
24134 default: None,
24135 })))
24136 }
24137 DialectType::Spark | DialectType::Databricks => {
24138 // Spark: TO_DATE(x, java_fmt)
24139 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24140 Ok(Expression::Function(Box::new(Function::new(
24141 "TO_DATE".to_string(),
24142 vec![this, Expression::string(&java_fmt)],
24143 ))))
24144 }
24145 DialectType::Drill => {
24146 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24147 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24148 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24149 let java_fmt = java_fmt.replace('T', "'T'");
24150 Ok(Expression::Function(Box::new(Function::new(
24151 "TO_DATE".to_string(),
24152 vec![this, Expression::string(&java_fmt)],
24153 ))))
24154 }
24155 _ => {
24156 // For other dialects: use TsOrDsToDate which delegates to generator
24157 Ok(Expression::TsOrDsToDate(Box::new(
24158 crate::expressions::TsOrDsToDate {
24159 this: Box::new(this),
24160 format: Some(fmt),
24161 safe: None,
24162 },
24163 )))
24164 }
24165 }
24166 } else {
24167 // Non-string format - keep as-is
24168 let mut new_args = Vec::new();
24169 new_args.push(this);
24170 new_args.push(fmt_expr);
24171 Ok(Expression::Function(Box::new(Function::new(
24172 "STR_TO_DATE".to_string(),
24173 new_args,
24174 ))))
24175 }
24176 } else {
24177 Ok(Expression::Function(f))
24178 }
24179 } else {
24180 Ok(e)
24181 }
24182 }
24183
24184 Action::TsOrDsAddConvert => {
24185 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24186 if let Expression::Function(f) = e {
24187 if f.args.len() == 3 {
24188 let mut args = f.args;
24189 let x = args.remove(0);
24190 let n = args.remove(0);
24191 let unit_expr = args.remove(0);
24192 let unit_str = match &unit_expr {
24193 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24194 _ => "DAY".to_string(),
24195 };
24196
24197 match target {
24198 DialectType::Hive
24199 | DialectType::Spark
24200 | DialectType::Databricks => {
24201 // DATE_ADD(x, n) - only supports DAY unit
24202 Ok(Expression::Function(Box::new(Function::new(
24203 "DATE_ADD".to_string(),
24204 vec![x, n],
24205 ))))
24206 }
24207 DialectType::MySQL => {
24208 // DATE_ADD(x, INTERVAL n UNIT)
24209 let iu = match unit_str.to_uppercase().as_str() {
24210 "YEAR" => crate::expressions::IntervalUnit::Year,
24211 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24212 "MONTH" => crate::expressions::IntervalUnit::Month,
24213 "WEEK" => crate::expressions::IntervalUnit::Week,
24214 "HOUR" => crate::expressions::IntervalUnit::Hour,
24215 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24216 "SECOND" => crate::expressions::IntervalUnit::Second,
24217 _ => crate::expressions::IntervalUnit::Day,
24218 };
24219 let interval = Expression::Interval(Box::new(
24220 crate::expressions::Interval {
24221 this: Some(n),
24222 unit: Some(
24223 crate::expressions::IntervalUnitSpec::Simple {
24224 unit: iu,
24225 use_plural: false,
24226 },
24227 ),
24228 },
24229 ));
24230 Ok(Expression::Function(Box::new(Function::new(
24231 "DATE_ADD".to_string(),
24232 vec![x, interval],
24233 ))))
24234 }
24235 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24236 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24237 let cast_ts = Expression::Cast(Box::new(Cast {
24238 this: x,
24239 to: DataType::Timestamp {
24240 precision: None,
24241 timezone: false,
24242 },
24243 double_colon_syntax: false,
24244 trailing_comments: Vec::new(),
24245 format: None,
24246 default: None,
24247 }));
24248 let cast_date = Expression::Cast(Box::new(Cast {
24249 this: cast_ts,
24250 to: DataType::Date,
24251 double_colon_syntax: false,
24252 trailing_comments: Vec::new(),
24253 format: None,
24254 default: None,
24255 }));
24256 Ok(Expression::Function(Box::new(Function::new(
24257 "DATE_ADD".to_string(),
24258 vec![Expression::string(&unit_str), n, cast_date],
24259 ))))
24260 }
24261 DialectType::DuckDB => {
24262 // CAST(x AS DATE) + INTERVAL n UNIT
24263 let cast_date = Expression::Cast(Box::new(Cast {
24264 this: x,
24265 to: DataType::Date,
24266 double_colon_syntax: false,
24267 trailing_comments: Vec::new(),
24268 format: None,
24269 default: None,
24270 }));
24271 let iu = match unit_str.to_uppercase().as_str() {
24272 "YEAR" => crate::expressions::IntervalUnit::Year,
24273 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24274 "MONTH" => crate::expressions::IntervalUnit::Month,
24275 "WEEK" => crate::expressions::IntervalUnit::Week,
24276 "HOUR" => crate::expressions::IntervalUnit::Hour,
24277 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24278 "SECOND" => crate::expressions::IntervalUnit::Second,
24279 _ => crate::expressions::IntervalUnit::Day,
24280 };
24281 let interval = Expression::Interval(Box::new(
24282 crate::expressions::Interval {
24283 this: Some(n),
24284 unit: Some(
24285 crate::expressions::IntervalUnitSpec::Simple {
24286 unit: iu,
24287 use_plural: false,
24288 },
24289 ),
24290 },
24291 ));
24292 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24293 left: cast_date,
24294 right: interval,
24295 left_comments: Vec::new(),
24296 operator_comments: Vec::new(),
24297 trailing_comments: Vec::new(),
24298 })))
24299 }
24300 DialectType::Drill => {
24301 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24302 let cast_date = Expression::Cast(Box::new(Cast {
24303 this: x,
24304 to: DataType::Date,
24305 double_colon_syntax: false,
24306 trailing_comments: Vec::new(),
24307 format: None,
24308 default: None,
24309 }));
24310 let iu = match unit_str.to_uppercase().as_str() {
24311 "YEAR" => crate::expressions::IntervalUnit::Year,
24312 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24313 "MONTH" => crate::expressions::IntervalUnit::Month,
24314 "WEEK" => crate::expressions::IntervalUnit::Week,
24315 "HOUR" => crate::expressions::IntervalUnit::Hour,
24316 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24317 "SECOND" => crate::expressions::IntervalUnit::Second,
24318 _ => crate::expressions::IntervalUnit::Day,
24319 };
24320 let interval = Expression::Interval(Box::new(
24321 crate::expressions::Interval {
24322 this: Some(n),
24323 unit: Some(
24324 crate::expressions::IntervalUnitSpec::Simple {
24325 unit: iu,
24326 use_plural: false,
24327 },
24328 ),
24329 },
24330 ));
24331 Ok(Expression::Function(Box::new(Function::new(
24332 "DATE_ADD".to_string(),
24333 vec![cast_date, interval],
24334 ))))
24335 }
24336 _ => {
24337 // Default: keep as TS_OR_DS_ADD
24338 Ok(Expression::Function(Box::new(Function::new(
24339 "TS_OR_DS_ADD".to_string(),
24340 vec![x, n, unit_expr],
24341 ))))
24342 }
24343 }
24344 } else {
24345 Ok(Expression::Function(f))
24346 }
24347 } else {
24348 Ok(e)
24349 }
24350 }
24351
24352 Action::DateFromUnixDateConvert => {
24353 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24354 if let Expression::Function(f) = e {
24355 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24356 if matches!(
24357 target,
24358 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24359 ) {
24360 return Ok(Expression::Function(Box::new(Function::new(
24361 "DATE_FROM_UNIX_DATE".to_string(),
24362 f.args,
24363 ))));
24364 }
24365 let n = f.args.into_iter().next().unwrap();
24366 let epoch_date = Expression::Cast(Box::new(Cast {
24367 this: Expression::string("1970-01-01"),
24368 to: DataType::Date,
24369 double_colon_syntax: false,
24370 trailing_comments: Vec::new(),
24371 format: None,
24372 default: None,
24373 }));
24374 match target {
24375 DialectType::DuckDB => {
24376 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24377 let interval =
24378 Expression::Interval(Box::new(crate::expressions::Interval {
24379 this: Some(n),
24380 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24381 unit: crate::expressions::IntervalUnit::Day,
24382 use_plural: false,
24383 }),
24384 }));
24385 Ok(Expression::Add(Box::new(
24386 crate::expressions::BinaryOp::new(epoch_date, interval),
24387 )))
24388 }
24389 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24390 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24391 Ok(Expression::Function(Box::new(Function::new(
24392 "DATE_ADD".to_string(),
24393 vec![Expression::string("DAY"), n, epoch_date],
24394 ))))
24395 }
24396 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24397 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24398 Ok(Expression::Function(Box::new(Function::new(
24399 "DATEADD".to_string(),
24400 vec![
24401 Expression::Identifier(Identifier::new("DAY")),
24402 n,
24403 epoch_date,
24404 ],
24405 ))))
24406 }
24407 DialectType::BigQuery => {
24408 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24409 let interval =
24410 Expression::Interval(Box::new(crate::expressions::Interval {
24411 this: Some(n),
24412 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24413 unit: crate::expressions::IntervalUnit::Day,
24414 use_plural: false,
24415 }),
24416 }));
24417 Ok(Expression::Function(Box::new(Function::new(
24418 "DATE_ADD".to_string(),
24419 vec![epoch_date, interval],
24420 ))))
24421 }
24422 DialectType::MySQL
24423 | DialectType::Doris
24424 | DialectType::StarRocks
24425 | DialectType::Drill => {
24426 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24427 let interval =
24428 Expression::Interval(Box::new(crate::expressions::Interval {
24429 this: Some(n),
24430 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24431 unit: crate::expressions::IntervalUnit::Day,
24432 use_plural: false,
24433 }),
24434 }));
24435 Ok(Expression::Function(Box::new(Function::new(
24436 "DATE_ADD".to_string(),
24437 vec![epoch_date, interval],
24438 ))))
24439 }
24440 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24441 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24442 Ok(Expression::Function(Box::new(Function::new(
24443 "DATE_ADD".to_string(),
24444 vec![epoch_date, n],
24445 ))))
24446 }
24447 DialectType::PostgreSQL
24448 | DialectType::Materialize
24449 | DialectType::RisingWave => {
24450 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24451 let n_str = match &n {
24452 Expression::Literal(Literal::Number(s)) => s.clone(),
24453 _ => Self::expr_to_string_static(&n),
24454 };
24455 let interval =
24456 Expression::Interval(Box::new(crate::expressions::Interval {
24457 this: Some(Expression::string(&format!("{} DAY", n_str))),
24458 unit: None,
24459 }));
24460 Ok(Expression::Add(Box::new(
24461 crate::expressions::BinaryOp::new(epoch_date, interval),
24462 )))
24463 }
24464 _ => {
24465 // Default: keep as-is
24466 Ok(Expression::Function(Box::new(Function::new(
24467 "DATE_FROM_UNIX_DATE".to_string(),
24468 vec![n],
24469 ))))
24470 }
24471 }
24472 } else {
24473 Ok(e)
24474 }
24475 }
24476
24477 Action::ArrayRemoveConvert => {
24478 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24479 if let Expression::ArrayRemove(bf) = e {
24480 let arr = bf.this;
24481 let target_val = bf.expression;
24482 match target {
24483 DialectType::DuckDB => {
24484 let u_id = crate::expressions::Identifier::new("_u");
24485 let lambda =
24486 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24487 parameters: vec![u_id.clone()],
24488 body: Expression::Neq(Box::new(BinaryOp {
24489 left: Expression::Identifier(u_id),
24490 right: target_val,
24491 left_comments: Vec::new(),
24492 operator_comments: Vec::new(),
24493 trailing_comments: Vec::new(),
24494 })),
24495 colon: false,
24496 parameter_types: Vec::new(),
24497 }));
24498 Ok(Expression::Function(Box::new(Function::new(
24499 "LIST_FILTER".to_string(),
24500 vec![arr, lambda],
24501 ))))
24502 }
24503 DialectType::ClickHouse => {
24504 let u_id = crate::expressions::Identifier::new("_u");
24505 let lambda =
24506 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24507 parameters: vec![u_id.clone()],
24508 body: Expression::Neq(Box::new(BinaryOp {
24509 left: Expression::Identifier(u_id),
24510 right: target_val,
24511 left_comments: Vec::new(),
24512 operator_comments: Vec::new(),
24513 trailing_comments: Vec::new(),
24514 })),
24515 colon: false,
24516 parameter_types: Vec::new(),
24517 }));
24518 Ok(Expression::Function(Box::new(Function::new(
24519 "arrayFilter".to_string(),
24520 vec![lambda, arr],
24521 ))))
24522 }
24523 DialectType::BigQuery => {
24524 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24525 let u_id = crate::expressions::Identifier::new("_u");
24526 let u_col = Expression::Column(crate::expressions::Column {
24527 name: u_id.clone(),
24528 table: None,
24529 join_mark: false,
24530 trailing_comments: Vec::new(),
24531 });
24532 let unnest_expr =
24533 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24534 this: arr,
24535 expressions: Vec::new(),
24536 with_ordinality: false,
24537 alias: None,
24538 offset_alias: None,
24539 }));
24540 let aliased_unnest =
24541 Expression::Alias(Box::new(crate::expressions::Alias {
24542 this: unnest_expr,
24543 alias: u_id.clone(),
24544 column_aliases: Vec::new(),
24545 pre_alias_comments: Vec::new(),
24546 trailing_comments: Vec::new(),
24547 }));
24548 let where_cond = Expression::Neq(Box::new(BinaryOp {
24549 left: u_col.clone(),
24550 right: target_val,
24551 left_comments: Vec::new(),
24552 operator_comments: Vec::new(),
24553 trailing_comments: Vec::new(),
24554 }));
24555 let subquery = Expression::Select(Box::new(
24556 crate::expressions::Select::new()
24557 .column(u_col)
24558 .from(aliased_unnest)
24559 .where_(where_cond),
24560 ));
24561 Ok(Expression::ArrayFunc(Box::new(
24562 crate::expressions::ArrayConstructor {
24563 expressions: vec![subquery],
24564 bracket_notation: false,
24565 use_list_keyword: false,
24566 },
24567 )))
24568 }
24569 _ => Ok(Expression::ArrayRemove(Box::new(
24570 crate::expressions::BinaryFunc {
24571 original_name: None,
24572 this: arr,
24573 expression: target_val,
24574 },
24575 ))),
24576 }
24577 } else {
24578 Ok(e)
24579 }
24580 }
24581
24582 Action::ArrayReverseConvert => {
24583 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
24584 if let Expression::ArrayReverse(af) = e {
24585 Ok(Expression::Function(Box::new(Function::new(
24586 "arrayReverse".to_string(),
24587 vec![af.this],
24588 ))))
24589 } else {
24590 Ok(e)
24591 }
24592 }
24593
24594 Action::JsonKeysConvert => {
24595 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
24596 if let Expression::JsonKeys(uf) = e {
24597 match target {
24598 DialectType::Spark | DialectType::Databricks => {
24599 Ok(Expression::Function(Box::new(Function::new(
24600 "JSON_OBJECT_KEYS".to_string(),
24601 vec![uf.this],
24602 ))))
24603 }
24604 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24605 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
24606 ))),
24607 _ => Ok(Expression::JsonKeys(uf)),
24608 }
24609 } else {
24610 Ok(e)
24611 }
24612 }
24613
24614 Action::ParseJsonStrip => {
24615 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
24616 if let Expression::ParseJson(uf) = e {
24617 Ok(uf.this)
24618 } else {
24619 Ok(e)
24620 }
24621 }
24622
24623 Action::ArraySizeDrill => {
24624 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
24625 if let Expression::ArraySize(uf) = e {
24626 Ok(Expression::Function(Box::new(Function::new(
24627 "REPEATED_COUNT".to_string(),
24628 vec![uf.this],
24629 ))))
24630 } else {
24631 Ok(e)
24632 }
24633 }
24634
24635 Action::WeekOfYearToWeekIso => {
24636 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
24637 if let Expression::WeekOfYear(uf) = e {
24638 Ok(Expression::Function(Box::new(Function::new(
24639 "WEEKISO".to_string(),
24640 vec![uf.this],
24641 ))))
24642 } else {
24643 Ok(e)
24644 }
24645 }
24646 }
24647 })
24648 }
24649
24650 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
24651 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
24652 use crate::expressions::Function;
24653 match unit {
24654 "DAY" => {
24655 // DATE(x)
24656 Ok(Expression::Function(Box::new(Function::new(
24657 "DATE".to_string(),
24658 vec![expr.clone()],
24659 ))))
24660 }
24661 "WEEK" => {
24662 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
24663 let year_x = Expression::Function(Box::new(Function::new(
24664 "YEAR".to_string(),
24665 vec![expr.clone()],
24666 )));
24667 let week_x = Expression::Function(Box::new(Function::new(
24668 "WEEK".to_string(),
24669 vec![expr.clone(), Expression::number(1)],
24670 )));
24671 let concat_args = vec![
24672 year_x,
24673 Expression::string(" "),
24674 week_x,
24675 Expression::string(" 1"),
24676 ];
24677 let concat = Expression::Function(Box::new(Function::new(
24678 "CONCAT".to_string(),
24679 concat_args,
24680 )));
24681 Ok(Expression::Function(Box::new(Function::new(
24682 "STR_TO_DATE".to_string(),
24683 vec![concat, Expression::string("%Y %u %w")],
24684 ))))
24685 }
24686 "MONTH" => {
24687 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
24688 let year_x = Expression::Function(Box::new(Function::new(
24689 "YEAR".to_string(),
24690 vec![expr.clone()],
24691 )));
24692 let month_x = Expression::Function(Box::new(Function::new(
24693 "MONTH".to_string(),
24694 vec![expr.clone()],
24695 )));
24696 let concat_args = vec![
24697 year_x,
24698 Expression::string(" "),
24699 month_x,
24700 Expression::string(" 1"),
24701 ];
24702 let concat = Expression::Function(Box::new(Function::new(
24703 "CONCAT".to_string(),
24704 concat_args,
24705 )));
24706 Ok(Expression::Function(Box::new(Function::new(
24707 "STR_TO_DATE".to_string(),
24708 vec![concat, Expression::string("%Y %c %e")],
24709 ))))
24710 }
24711 "QUARTER" => {
24712 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
24713 let year_x = Expression::Function(Box::new(Function::new(
24714 "YEAR".to_string(),
24715 vec![expr.clone()],
24716 )));
24717 let quarter_x = Expression::Function(Box::new(Function::new(
24718 "QUARTER".to_string(),
24719 vec![expr.clone()],
24720 )));
24721 // QUARTER(x) * 3 - 2
24722 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
24723 left: quarter_x,
24724 right: Expression::number(3),
24725 left_comments: Vec::new(),
24726 operator_comments: Vec::new(),
24727 trailing_comments: Vec::new(),
24728 }));
24729 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
24730 left: mul,
24731 right: Expression::number(2),
24732 left_comments: Vec::new(),
24733 operator_comments: Vec::new(),
24734 trailing_comments: Vec::new(),
24735 }));
24736 let concat_args = vec![
24737 year_x,
24738 Expression::string(" "),
24739 sub,
24740 Expression::string(" 1"),
24741 ];
24742 let concat = Expression::Function(Box::new(Function::new(
24743 "CONCAT".to_string(),
24744 concat_args,
24745 )));
24746 Ok(Expression::Function(Box::new(Function::new(
24747 "STR_TO_DATE".to_string(),
24748 vec![concat, Expression::string("%Y %c %e")],
24749 ))))
24750 }
24751 "YEAR" => {
24752 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
24753 let year_x = Expression::Function(Box::new(Function::new(
24754 "YEAR".to_string(),
24755 vec![expr.clone()],
24756 )));
24757 let concat_args = vec![year_x, Expression::string(" 1 1")];
24758 let concat = Expression::Function(Box::new(Function::new(
24759 "CONCAT".to_string(),
24760 concat_args,
24761 )));
24762 Ok(Expression::Function(Box::new(Function::new(
24763 "STR_TO_DATE".to_string(),
24764 vec![concat, Expression::string("%Y %c %e")],
24765 ))))
24766 }
24767 _ => {
24768 // Unsupported unit -> keep as DATE_TRUNC
24769 Ok(Expression::Function(Box::new(Function::new(
24770 "DATE_TRUNC".to_string(),
24771 vec![Expression::string(unit), expr.clone()],
24772 ))))
24773 }
24774 }
24775 }
24776
24777 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
24778 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
24779 use crate::expressions::DataType;
24780 match dt {
24781 DataType::VarChar { .. } | DataType::Char { .. } => true,
24782 DataType::Struct { fields, .. } => fields
24783 .iter()
24784 .any(|f| Self::has_varchar_char_type(&f.data_type)),
24785 _ => false,
24786 }
24787 }
24788
24789 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
24790 fn normalize_varchar_to_string(
24791 dt: crate::expressions::DataType,
24792 ) -> crate::expressions::DataType {
24793 use crate::expressions::DataType;
24794 match dt {
24795 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
24796 name: "STRING".to_string(),
24797 },
24798 DataType::Struct { fields, nested } => {
24799 let fields = fields
24800 .into_iter()
24801 .map(|mut f| {
24802 f.data_type = Self::normalize_varchar_to_string(f.data_type);
24803 f
24804 })
24805 .collect();
24806 DataType::Struct { fields, nested }
24807 }
24808 other => other,
24809 }
24810 }
24811
24812 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
24813 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
24814 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
24815 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
24816 let trimmed = s.trim();
24817
24818 // Find where digits end and unit text begins
24819 let digit_end = trimmed
24820 .find(|c: char| !c.is_ascii_digit())
24821 .unwrap_or(trimmed.len());
24822 if digit_end == 0 || digit_end == trimmed.len() {
24823 return expr;
24824 }
24825 let num = &trimmed[..digit_end];
24826 let unit_text = trimmed[digit_end..].trim().to_uppercase();
24827 if unit_text.is_empty() {
24828 return expr;
24829 }
24830
24831 let known_units = [
24832 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
24833 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
24834 ];
24835 if !known_units.contains(&unit_text.as_str()) {
24836 return expr;
24837 }
24838
24839 let unit_str = unit_text.clone();
24840 // Singularize
24841 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
24842 &unit_str[..unit_str.len() - 1]
24843 } else {
24844 &unit_str
24845 };
24846 let unit = unit_singular;
24847
24848 match target {
24849 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24850 // INTERVAL '2' DAY
24851 let iu = match unit {
24852 "DAY" => crate::expressions::IntervalUnit::Day,
24853 "HOUR" => crate::expressions::IntervalUnit::Hour,
24854 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24855 "SECOND" => crate::expressions::IntervalUnit::Second,
24856 "WEEK" => crate::expressions::IntervalUnit::Week,
24857 "MONTH" => crate::expressions::IntervalUnit::Month,
24858 "YEAR" => crate::expressions::IntervalUnit::Year,
24859 _ => return expr,
24860 };
24861 return Expression::Interval(Box::new(crate::expressions::Interval {
24862 this: Some(Expression::string(num)),
24863 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24864 unit: iu,
24865 use_plural: false,
24866 }),
24867 }));
24868 }
24869 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
24870 // INTERVAL '2 DAYS'
24871 let plural = if num != "1" && !unit_str.ends_with('S') {
24872 format!("{} {}S", num, unit)
24873 } else if unit_str.ends_with('S') {
24874 format!("{} {}", num, unit_str)
24875 } else {
24876 format!("{} {}", num, unit)
24877 };
24878 return Expression::Interval(Box::new(crate::expressions::Interval {
24879 this: Some(Expression::string(&plural)),
24880 unit: None,
24881 }));
24882 }
24883 _ => {
24884 // Spark/Databricks/Hive: INTERVAL '1' DAY
24885 let iu = match unit {
24886 "DAY" => crate::expressions::IntervalUnit::Day,
24887 "HOUR" => crate::expressions::IntervalUnit::Hour,
24888 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24889 "SECOND" => crate::expressions::IntervalUnit::Second,
24890 "WEEK" => crate::expressions::IntervalUnit::Week,
24891 "MONTH" => crate::expressions::IntervalUnit::Month,
24892 "YEAR" => crate::expressions::IntervalUnit::Year,
24893 _ => return expr,
24894 };
24895 return Expression::Interval(Box::new(crate::expressions::Interval {
24896 this: Some(Expression::string(num)),
24897 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24898 unit: iu,
24899 use_plural: false,
24900 }),
24901 }));
24902 }
24903 }
24904 }
24905 // If it's already an INTERVAL expression, pass through
24906 expr
24907 }
24908
24909 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
24910 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
24911 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
24912 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
24913 fn rewrite_unnest_expansion(
24914 select: &crate::expressions::Select,
24915 target: DialectType,
24916 ) -> Option<crate::expressions::Select> {
24917 use crate::expressions::{
24918 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
24919 UnnestFunc,
24920 };
24921
24922 let index_offset: i64 = match target {
24923 DialectType::Presto | DialectType::Trino => 1,
24924 _ => 0, // BigQuery, Snowflake
24925 };
24926
24927 let if_func_name = match target {
24928 DialectType::Snowflake => "IFF",
24929 _ => "IF",
24930 };
24931
24932 let array_length_func = match target {
24933 DialectType::BigQuery => "ARRAY_LENGTH",
24934 DialectType::Presto | DialectType::Trino => "CARDINALITY",
24935 DialectType::Snowflake => "ARRAY_SIZE",
24936 _ => "ARRAY_LENGTH",
24937 };
24938
24939 let use_table_aliases = matches!(
24940 target,
24941 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
24942 );
24943 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
24944
24945 fn make_col(name: &str, table: Option<&str>) -> Expression {
24946 if let Some(tbl) = table {
24947 Expression::Column(Column {
24948 name: Identifier::new(name.to_string()),
24949 table: Some(Identifier::new(tbl.to_string())),
24950 join_mark: false,
24951 trailing_comments: Vec::new(),
24952 })
24953 } else {
24954 Expression::Identifier(Identifier::new(name.to_string()))
24955 }
24956 }
24957
24958 fn make_join(this: Expression) -> Join {
24959 Join {
24960 this,
24961 on: None,
24962 using: Vec::new(),
24963 kind: JoinKind::Cross,
24964 use_inner_keyword: false,
24965 use_outer_keyword: false,
24966 deferred_condition: false,
24967 join_hint: None,
24968 match_condition: None,
24969 pivots: Vec::new(),
24970 comments: Vec::new(),
24971 nesting_group: 0,
24972 directed: false,
24973 }
24974 }
24975
24976 // Collect UNNEST info from SELECT expressions
24977 struct UnnestInfo {
24978 arr_expr: Expression,
24979 col_alias: String,
24980 pos_alias: String,
24981 source_alias: String,
24982 original_expr: Expression,
24983 has_outer_alias: Option<String>,
24984 }
24985
24986 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
24987 let mut col_counter = 0usize;
24988 let mut pos_counter = 1usize;
24989 let mut source_counter = 1usize;
24990
24991 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
24992 match expr {
24993 Expression::Unnest(u) => Some(u.this.clone()),
24994 Expression::Function(f)
24995 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
24996 {
24997 Some(f.args[0].clone())
24998 }
24999 Expression::Alias(a) => extract_unnest_arg(&a.this),
25000 Expression::Add(op)
25001 | Expression::Sub(op)
25002 | Expression::Mul(op)
25003 | Expression::Div(op) => {
25004 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
25005 }
25006 _ => None,
25007 }
25008 }
25009
25010 fn get_alias_name(expr: &Expression) -> Option<String> {
25011 if let Expression::Alias(a) = expr {
25012 Some(a.alias.name.clone())
25013 } else {
25014 None
25015 }
25016 }
25017
25018 for sel_expr in &select.expressions {
25019 if let Some(arr) = extract_unnest_arg(sel_expr) {
25020 col_counter += 1;
25021 pos_counter += 1;
25022 source_counter += 1;
25023
25024 let col_alias = if col_counter == 1 {
25025 "col".to_string()
25026 } else {
25027 format!("col_{}", col_counter)
25028 };
25029 let pos_alias = format!("pos_{}", pos_counter);
25030 let source_alias = format!("_u_{}", source_counter);
25031 let has_outer_alias = get_alias_name(sel_expr);
25032
25033 unnest_infos.push(UnnestInfo {
25034 arr_expr: arr,
25035 col_alias,
25036 pos_alias,
25037 source_alias,
25038 original_expr: sel_expr.clone(),
25039 has_outer_alias,
25040 });
25041 }
25042 }
25043
25044 if unnest_infos.is_empty() {
25045 return None;
25046 }
25047
25048 let series_alias = "pos".to_string();
25049 let series_source_alias = "_u".to_string();
25050 let tbl_ref = if use_table_aliases {
25051 Some(series_source_alias.as_str())
25052 } else {
25053 None
25054 };
25055
25056 // Build new SELECT expressions
25057 let mut new_select_exprs = Vec::new();
25058 for info in &unnest_infos {
25059 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25060 let src_ref = if use_table_aliases {
25061 Some(info.source_alias.as_str())
25062 } else {
25063 None
25064 };
25065
25066 let pos_col = make_col(&series_alias, tbl_ref);
25067 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25068 let col_ref = make_col(actual_col_name, src_ref);
25069
25070 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25071 pos_col.clone(),
25072 unnest_pos_col.clone(),
25073 )));
25074 let mut if_args = vec![eq_cond, col_ref];
25075 if null_third_arg {
25076 if_args.push(Expression::Null(crate::expressions::Null));
25077 }
25078
25079 let if_expr =
25080 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25081 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25082
25083 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25084 final_expr,
25085 Identifier::new(actual_col_name.clone()),
25086 ))));
25087 }
25088
25089 // Build array size expressions for GREATEST
25090 let size_exprs: Vec<Expression> = unnest_infos
25091 .iter()
25092 .map(|info| {
25093 Expression::Function(Box::new(Function::new(
25094 array_length_func.to_string(),
25095 vec![info.arr_expr.clone()],
25096 )))
25097 })
25098 .collect();
25099
25100 let greatest =
25101 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25102
25103 let series_end = if index_offset == 0 {
25104 Expression::Sub(Box::new(BinaryOp::new(
25105 greatest,
25106 Expression::Literal(Literal::Number("1".to_string())),
25107 )))
25108 } else {
25109 greatest
25110 };
25111
25112 // Build the position array source
25113 let series_unnest_expr = match target {
25114 DialectType::BigQuery => {
25115 let gen_array = Expression::Function(Box::new(Function::new(
25116 "GENERATE_ARRAY".to_string(),
25117 vec![
25118 Expression::Literal(Literal::Number("0".to_string())),
25119 series_end,
25120 ],
25121 )));
25122 Expression::Unnest(Box::new(UnnestFunc {
25123 this: gen_array,
25124 expressions: Vec::new(),
25125 with_ordinality: false,
25126 alias: None,
25127 offset_alias: None,
25128 }))
25129 }
25130 DialectType::Presto | DialectType::Trino => {
25131 let sequence = Expression::Function(Box::new(Function::new(
25132 "SEQUENCE".to_string(),
25133 vec![
25134 Expression::Literal(Literal::Number("1".to_string())),
25135 series_end,
25136 ],
25137 )));
25138 Expression::Unnest(Box::new(UnnestFunc {
25139 this: sequence,
25140 expressions: Vec::new(),
25141 with_ordinality: false,
25142 alias: None,
25143 offset_alias: None,
25144 }))
25145 }
25146 DialectType::Snowflake => {
25147 let range_end = Expression::Add(Box::new(BinaryOp::new(
25148 Expression::Paren(Box::new(crate::expressions::Paren {
25149 this: series_end,
25150 trailing_comments: Vec::new(),
25151 })),
25152 Expression::Literal(Literal::Number("1".to_string())),
25153 )));
25154 let gen_range = Expression::Function(Box::new(Function::new(
25155 "ARRAY_GENERATE_RANGE".to_string(),
25156 vec![
25157 Expression::Literal(Literal::Number("0".to_string())),
25158 range_end,
25159 ],
25160 )));
25161 let flatten_arg =
25162 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25163 name: Identifier::new("INPUT".to_string()),
25164 value: gen_range,
25165 separator: crate::expressions::NamedArgSeparator::DArrow,
25166 }));
25167 let flatten = Expression::Function(Box::new(Function::new(
25168 "FLATTEN".to_string(),
25169 vec![flatten_arg],
25170 )));
25171 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25172 }
25173 _ => return None,
25174 };
25175
25176 // Build series alias expression
25177 let series_alias_expr = if use_table_aliases {
25178 let col_aliases = if matches!(target, DialectType::Snowflake) {
25179 vec![
25180 Identifier::new("seq".to_string()),
25181 Identifier::new("key".to_string()),
25182 Identifier::new("path".to_string()),
25183 Identifier::new("index".to_string()),
25184 Identifier::new(series_alias.clone()),
25185 Identifier::new("this".to_string()),
25186 ]
25187 } else {
25188 vec![Identifier::new(series_alias.clone())]
25189 };
25190 Expression::Alias(Box::new(Alias {
25191 this: series_unnest_expr,
25192 alias: Identifier::new(series_source_alias.clone()),
25193 column_aliases: col_aliases,
25194 pre_alias_comments: Vec::new(),
25195 trailing_comments: Vec::new(),
25196 }))
25197 } else {
25198 Expression::Alias(Box::new(Alias::new(
25199 series_unnest_expr,
25200 Identifier::new(series_alias.clone()),
25201 )))
25202 };
25203
25204 // Build CROSS JOINs for each UNNEST
25205 let mut joins = Vec::new();
25206 for info in &unnest_infos {
25207 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25208
25209 let unnest_join_expr = match target {
25210 DialectType::BigQuery => {
25211 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25212 let unnest = UnnestFunc {
25213 this: info.arr_expr.clone(),
25214 expressions: Vec::new(),
25215 with_ordinality: true,
25216 alias: Some(Identifier::new(actual_col_name.clone())),
25217 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25218 };
25219 Expression::Unnest(Box::new(unnest))
25220 }
25221 DialectType::Presto | DialectType::Trino => {
25222 let unnest = UnnestFunc {
25223 this: info.arr_expr.clone(),
25224 expressions: Vec::new(),
25225 with_ordinality: true,
25226 alias: None,
25227 offset_alias: None,
25228 };
25229 Expression::Alias(Box::new(Alias {
25230 this: Expression::Unnest(Box::new(unnest)),
25231 alias: Identifier::new(info.source_alias.clone()),
25232 column_aliases: vec![
25233 Identifier::new(actual_col_name.clone()),
25234 Identifier::new(info.pos_alias.clone()),
25235 ],
25236 pre_alias_comments: Vec::new(),
25237 trailing_comments: Vec::new(),
25238 }))
25239 }
25240 DialectType::Snowflake => {
25241 let flatten_arg =
25242 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25243 name: Identifier::new("INPUT".to_string()),
25244 value: info.arr_expr.clone(),
25245 separator: crate::expressions::NamedArgSeparator::DArrow,
25246 }));
25247 let flatten = Expression::Function(Box::new(Function::new(
25248 "FLATTEN".to_string(),
25249 vec![flatten_arg],
25250 )));
25251 let table_fn = Expression::Function(Box::new(Function::new(
25252 "TABLE".to_string(),
25253 vec![flatten],
25254 )));
25255 Expression::Alias(Box::new(Alias {
25256 this: table_fn,
25257 alias: Identifier::new(info.source_alias.clone()),
25258 column_aliases: vec![
25259 Identifier::new("seq".to_string()),
25260 Identifier::new("key".to_string()),
25261 Identifier::new("path".to_string()),
25262 Identifier::new(info.pos_alias.clone()),
25263 Identifier::new(actual_col_name.clone()),
25264 Identifier::new("this".to_string()),
25265 ],
25266 pre_alias_comments: Vec::new(),
25267 trailing_comments: Vec::new(),
25268 }))
25269 }
25270 _ => return None,
25271 };
25272
25273 joins.push(make_join(unnest_join_expr));
25274 }
25275
25276 // Build WHERE clause
25277 let mut where_conditions: Vec<Expression> = Vec::new();
25278 for info in &unnest_infos {
25279 let src_ref = if use_table_aliases {
25280 Some(info.source_alias.as_str())
25281 } else {
25282 None
25283 };
25284 let pos_col = make_col(&series_alias, tbl_ref);
25285 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25286
25287 let arr_size = Expression::Function(Box::new(Function::new(
25288 array_length_func.to_string(),
25289 vec![info.arr_expr.clone()],
25290 )));
25291
25292 let size_ref = if index_offset == 0 {
25293 Expression::Paren(Box::new(crate::expressions::Paren {
25294 this: Expression::Sub(Box::new(BinaryOp::new(
25295 arr_size,
25296 Expression::Literal(Literal::Number("1".to_string())),
25297 ))),
25298 trailing_comments: Vec::new(),
25299 }))
25300 } else {
25301 arr_size
25302 };
25303
25304 let eq = Expression::Eq(Box::new(BinaryOp::new(
25305 pos_col.clone(),
25306 unnest_pos_col.clone(),
25307 )));
25308 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25309 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25310 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25311 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25312 this: and_cond,
25313 trailing_comments: Vec::new(),
25314 }));
25315 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25316
25317 where_conditions.push(or_cond);
25318 }
25319
25320 let where_expr = if where_conditions.len() == 1 {
25321 // Single condition: no parens needed
25322 where_conditions.into_iter().next().unwrap()
25323 } else {
25324 // Multiple conditions: wrap each OR in parens, then combine with AND
25325 let wrap = |e: Expression| {
25326 Expression::Paren(Box::new(crate::expressions::Paren {
25327 this: e,
25328 trailing_comments: Vec::new(),
25329 }))
25330 };
25331 let mut iter = where_conditions.into_iter();
25332 let first = wrap(iter.next().unwrap());
25333 let second = wrap(iter.next().unwrap());
25334 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25335 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25336 trailing_comments: Vec::new(),
25337 }));
25338 for cond in iter {
25339 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25340 }
25341 combined
25342 };
25343
25344 // Build the new SELECT
25345 let mut new_select = select.clone();
25346 new_select.expressions = new_select_exprs;
25347
25348 if new_select.from.is_some() {
25349 let mut all_joins = vec![make_join(series_alias_expr)];
25350 all_joins.extend(joins);
25351 new_select.joins.extend(all_joins);
25352 } else {
25353 new_select.from = Some(From {
25354 expressions: vec![series_alias_expr],
25355 });
25356 new_select.joins.extend(joins);
25357 }
25358
25359 if let Some(ref existing_where) = new_select.where_clause {
25360 let combined = Expression::And(Box::new(BinaryOp::new(
25361 existing_where.this.clone(),
25362 where_expr,
25363 )));
25364 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25365 } else {
25366 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25367 }
25368
25369 Some(new_select)
25370 }
25371
25372 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25373 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25374 match original {
25375 Expression::Unnest(_) => replacement.clone(),
25376 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25377 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25378 Expression::Add(op) => {
25379 let left = Self::replace_unnest_with_if(&op.left, replacement);
25380 let right = Self::replace_unnest_with_if(&op.right, replacement);
25381 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25382 }
25383 Expression::Sub(op) => {
25384 let left = Self::replace_unnest_with_if(&op.left, replacement);
25385 let right = Self::replace_unnest_with_if(&op.right, replacement);
25386 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25387 }
25388 Expression::Mul(op) => {
25389 let left = Self::replace_unnest_with_if(&op.left, replacement);
25390 let right = Self::replace_unnest_with_if(&op.right, replacement);
25391 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25392 }
25393 Expression::Div(op) => {
25394 let left = Self::replace_unnest_with_if(&op.left, replacement);
25395 let right = Self::replace_unnest_with_if(&op.right, replacement);
25396 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25397 }
25398 _ => original.clone(),
25399 }
25400 }
25401
25402 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25403 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25404 fn decompose_json_path(path: &str) -> Vec<String> {
25405 let mut parts = Vec::new();
25406 let path = if path.starts_with("$.") {
25407 &path[2..]
25408 } else if path.starts_with('$') {
25409 &path[1..]
25410 } else {
25411 path
25412 };
25413 if path.is_empty() {
25414 return parts;
25415 }
25416 let mut current = String::new();
25417 let chars: Vec<char> = path.chars().collect();
25418 let mut i = 0;
25419 while i < chars.len() {
25420 match chars[i] {
25421 '.' => {
25422 if !current.is_empty() {
25423 parts.push(current.clone());
25424 current.clear();
25425 }
25426 i += 1;
25427 }
25428 '[' => {
25429 if !current.is_empty() {
25430 parts.push(current.clone());
25431 current.clear();
25432 }
25433 i += 1;
25434 let mut bracket_content = String::new();
25435 while i < chars.len() && chars[i] != ']' {
25436 if chars[i] == '"' || chars[i] == '\'' {
25437 let quote = chars[i];
25438 i += 1;
25439 while i < chars.len() && chars[i] != quote {
25440 bracket_content.push(chars[i]);
25441 i += 1;
25442 }
25443 if i < chars.len() {
25444 i += 1;
25445 }
25446 } else {
25447 bracket_content.push(chars[i]);
25448 i += 1;
25449 }
25450 }
25451 if i < chars.len() {
25452 i += 1;
25453 }
25454 if bracket_content != "*" {
25455 parts.push(bracket_content);
25456 }
25457 }
25458 _ => {
25459 current.push(chars[i]);
25460 i += 1;
25461 }
25462 }
25463 }
25464 if !current.is_empty() {
25465 parts.push(current);
25466 }
25467 parts
25468 }
25469
25470 /// Strip `$` prefix from a JSON path, keeping the rest.
25471 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25472 fn strip_json_dollar_prefix(path: &str) -> String {
25473 if path.starts_with("$.") {
25474 path[2..].to_string()
25475 } else if path.starts_with('$') {
25476 path[1..].to_string()
25477 } else {
25478 path.to_string()
25479 }
25480 }
25481
25482 /// Strip `[*]` wildcards from a JSON path.
25483 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25484 fn strip_json_wildcards(path: &str) -> String {
25485 path.replace("[*]", "")
25486 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25487 .trim_end_matches('.')
25488 .to_string()
25489 }
25490
25491 /// Convert bracket notation to dot notation for JSON paths.
25492 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25493 fn bracket_to_dot_notation(path: &str) -> String {
25494 let mut result = String::new();
25495 let chars: Vec<char> = path.chars().collect();
25496 let mut i = 0;
25497 while i < chars.len() {
25498 if chars[i] == '[' {
25499 // Read bracket content
25500 i += 1;
25501 let mut bracket_content = String::new();
25502 let mut is_quoted = false;
25503 let mut _quote_char = '"';
25504 while i < chars.len() && chars[i] != ']' {
25505 if chars[i] == '"' || chars[i] == '\'' {
25506 is_quoted = true;
25507 _quote_char = chars[i];
25508 i += 1;
25509 while i < chars.len() && chars[i] != _quote_char {
25510 bracket_content.push(chars[i]);
25511 i += 1;
25512 }
25513 if i < chars.len() {
25514 i += 1;
25515 }
25516 } else {
25517 bracket_content.push(chars[i]);
25518 i += 1;
25519 }
25520 }
25521 if i < chars.len() {
25522 i += 1;
25523 } // skip ]
25524 if bracket_content == "*" {
25525 // Keep wildcard as-is
25526 result.push_str("[*]");
25527 } else if is_quoted {
25528 // Quoted bracket -> dot notation with quotes
25529 result.push('.');
25530 result.push('"');
25531 result.push_str(&bracket_content);
25532 result.push('"');
25533 } else {
25534 // Numeric index -> keep as bracket
25535 result.push('[');
25536 result.push_str(&bracket_content);
25537 result.push(']');
25538 }
25539 } else {
25540 result.push(chars[i]);
25541 i += 1;
25542 }
25543 }
25544 result
25545 }
25546
25547 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25548 /// `$["a b"]` -> `$['a b']`
25549 fn bracket_to_single_quotes(path: &str) -> String {
25550 let mut result = String::new();
25551 let chars: Vec<char> = path.chars().collect();
25552 let mut i = 0;
25553 while i < chars.len() {
25554 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25555 result.push('[');
25556 result.push('\'');
25557 i += 2; // skip [ and "
25558 while i < chars.len() && chars[i] != '"' {
25559 result.push(chars[i]);
25560 i += 1;
25561 }
25562 if i < chars.len() {
25563 i += 1;
25564 } // skip closing "
25565 result.push('\'');
25566 } else {
25567 result.push(chars[i]);
25568 i += 1;
25569 }
25570 }
25571 result
25572 }
25573
25574 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
25575 /// or PostgreSQL #temp -> TEMPORARY.
25576 /// Also strips # from INSERT INTO #table for non-TSQL targets.
25577 fn transform_select_into(
25578 expr: Expression,
25579 _source: DialectType,
25580 target: DialectType,
25581 ) -> Expression {
25582 use crate::expressions::{CreateTable, Expression, TableRef};
25583
25584 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
25585 if let Expression::Insert(ref insert) = expr {
25586 if insert.table.name.name.starts_with('#')
25587 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
25588 {
25589 let mut new_insert = insert.clone();
25590 new_insert.table.name.name =
25591 insert.table.name.name.trim_start_matches('#').to_string();
25592 return Expression::Insert(new_insert);
25593 }
25594 return expr;
25595 }
25596
25597 if let Expression::Select(ref select) = expr {
25598 if let Some(ref into) = select.into {
25599 let table_name_raw = match &into.this {
25600 Expression::Table(tr) => tr.name.name.clone(),
25601 Expression::Identifier(id) => id.name.clone(),
25602 _ => String::new(),
25603 };
25604 let is_temp = table_name_raw.starts_with('#') || into.temporary;
25605 let clean_name = table_name_raw.trim_start_matches('#').to_string();
25606
25607 match target {
25608 DialectType::DuckDB | DialectType::Snowflake => {
25609 // SELECT INTO -> CREATE TABLE AS SELECT
25610 let mut new_select = select.clone();
25611 new_select.into = None;
25612 let ct = CreateTable {
25613 name: TableRef::new(clean_name),
25614 on_cluster: None,
25615 columns: Vec::new(),
25616 constraints: Vec::new(),
25617 if_not_exists: false,
25618 temporary: is_temp,
25619 or_replace: false,
25620 table_modifier: None,
25621 as_select: Some(Expression::Select(new_select)),
25622 as_select_parenthesized: false,
25623 on_commit: None,
25624 clone_source: None,
25625 clone_at_clause: None,
25626 shallow_clone: false,
25627 is_copy: false,
25628 leading_comments: Vec::new(),
25629 with_properties: Vec::new(),
25630 teradata_post_name_options: Vec::new(),
25631 with_data: None,
25632 with_statistics: None,
25633 teradata_indexes: Vec::new(),
25634 with_cte: None,
25635 properties: Vec::new(),
25636 partition_of: None,
25637 post_table_properties: Vec::new(),
25638 mysql_table_options: Vec::new(),
25639 inherits: Vec::new(),
25640 on_property: None,
25641 copy_grants: false,
25642 using_template: None,
25643 rollup: None,
25644 };
25645 return Expression::CreateTable(Box::new(ct));
25646 }
25647 DialectType::PostgreSQL | DialectType::Redshift => {
25648 // PostgreSQL: #foo -> INTO TEMPORARY foo
25649 if is_temp && !into.temporary {
25650 let mut new_select = select.clone();
25651 let mut new_into = into.clone();
25652 new_into.temporary = true;
25653 new_into.unlogged = false;
25654 new_into.this = Expression::Table(TableRef::new(clean_name));
25655 new_select.into = Some(new_into);
25656 Expression::Select(new_select)
25657 } else {
25658 expr
25659 }
25660 }
25661 _ => expr,
25662 }
25663 } else {
25664 expr
25665 }
25666 } else {
25667 expr
25668 }
25669 }
25670
25671 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
25672 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
25673 fn transform_create_table_properties(
25674 ct: &mut crate::expressions::CreateTable,
25675 _source: DialectType,
25676 target: DialectType,
25677 ) {
25678 use crate::expressions::{
25679 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
25680 Properties,
25681 };
25682
25683 // Helper to convert a raw property value string to the correct Expression
25684 let value_to_expr = |v: &str| -> Expression {
25685 let trimmed = v.trim();
25686 // Check if it's a quoted string (starts and ends with ')
25687 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
25688 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
25689 }
25690 // Check if it's a number
25691 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
25692 Expression::Literal(Literal::Number(trimmed.to_string()))
25693 }
25694 // Check if it's ARRAY[...] or ARRAY(...)
25695 else if trimmed.to_uppercase().starts_with("ARRAY") {
25696 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
25697 let inner = trimmed
25698 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
25699 .trim_start_matches('[')
25700 .trim_start_matches('(')
25701 .trim_end_matches(']')
25702 .trim_end_matches(')');
25703 let elements: Vec<Expression> = inner
25704 .split(',')
25705 .map(|e| {
25706 let elem = e.trim().trim_matches('\'');
25707 Expression::Literal(Literal::String(elem.to_string()))
25708 })
25709 .collect();
25710 Expression::Function(Box::new(crate::expressions::Function::new(
25711 "ARRAY".to_string(),
25712 elements,
25713 )))
25714 }
25715 // Otherwise, just output as identifier (unquoted)
25716 else {
25717 Expression::Identifier(Identifier::new(trimmed.to_string()))
25718 }
25719 };
25720
25721 if ct.with_properties.is_empty() && ct.properties.is_empty() {
25722 return;
25723 }
25724
25725 // Handle Presto-style WITH properties
25726 if !ct.with_properties.is_empty() {
25727 // Extract FORMAT property and remaining properties
25728 let mut format_value: Option<String> = None;
25729 let mut partitioned_by: Option<String> = None;
25730 let mut other_props: Vec<(String, String)> = Vec::new();
25731
25732 for (key, value) in ct.with_properties.drain(..) {
25733 let key_upper = key.to_uppercase();
25734 if key_upper == "FORMAT" {
25735 // Strip surrounding quotes from value if present
25736 format_value = Some(value.trim_matches('\'').to_string());
25737 } else if key_upper == "PARTITIONED_BY" {
25738 partitioned_by = Some(value);
25739 } else {
25740 other_props.push((key, value));
25741 }
25742 }
25743
25744 match target {
25745 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25746 // Presto: keep WITH properties but lowercase 'format' key
25747 if let Some(fmt) = format_value {
25748 ct.with_properties
25749 .push(("format".to_string(), format!("'{}'", fmt)));
25750 }
25751 if let Some(part) = partitioned_by {
25752 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
25753 let trimmed = part.trim();
25754 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
25755 // Also handle ARRAY['...'] format - keep as-is
25756 if trimmed.to_uppercase().starts_with("ARRAY") {
25757 ct.with_properties
25758 .push(("PARTITIONED_BY".to_string(), part));
25759 } else {
25760 // Parse column names from the parenthesized list
25761 let cols: Vec<&str> = inner
25762 .split(',')
25763 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
25764 .collect();
25765 let array_val = format!(
25766 "ARRAY[{}]",
25767 cols.iter()
25768 .map(|c| format!("'{}'", c))
25769 .collect::<Vec<_>>()
25770 .join(", ")
25771 );
25772 ct.with_properties
25773 .push(("PARTITIONED_BY".to_string(), array_val));
25774 }
25775 }
25776 ct.with_properties.extend(other_props);
25777 }
25778 DialectType::Hive => {
25779 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
25780 if let Some(fmt) = format_value {
25781 ct.properties.push(Expression::FileFormatProperty(Box::new(
25782 FileFormatProperty {
25783 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25784 expressions: vec![],
25785 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
25786 value: true,
25787 }))),
25788 },
25789 )));
25790 }
25791 if let Some(_part) = partitioned_by {
25792 // PARTITIONED_BY handling is complex - move columns to partitioned by
25793 // For now, the partition columns are extracted from the column list
25794 Self::apply_partitioned_by(ct, &_part, target);
25795 }
25796 if !other_props.is_empty() {
25797 let eq_exprs: Vec<Expression> = other_props
25798 .into_iter()
25799 .map(|(k, v)| {
25800 Expression::Eq(Box::new(BinaryOp::new(
25801 Expression::Literal(Literal::String(k)),
25802 value_to_expr(&v),
25803 )))
25804 })
25805 .collect();
25806 ct.properties
25807 .push(Expression::Properties(Box::new(Properties {
25808 expressions: eq_exprs,
25809 })));
25810 }
25811 }
25812 DialectType::Spark | DialectType::Databricks => {
25813 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
25814 if let Some(fmt) = format_value {
25815 ct.properties.push(Expression::FileFormatProperty(Box::new(
25816 FileFormatProperty {
25817 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25818 expressions: vec![],
25819 hive_format: None, // None means USING syntax
25820 },
25821 )));
25822 }
25823 if let Some(_part) = partitioned_by {
25824 Self::apply_partitioned_by(ct, &_part, target);
25825 }
25826 if !other_props.is_empty() {
25827 let eq_exprs: Vec<Expression> = other_props
25828 .into_iter()
25829 .map(|(k, v)| {
25830 Expression::Eq(Box::new(BinaryOp::new(
25831 Expression::Literal(Literal::String(k)),
25832 value_to_expr(&v),
25833 )))
25834 })
25835 .collect();
25836 ct.properties
25837 .push(Expression::Properties(Box::new(Properties {
25838 expressions: eq_exprs,
25839 })));
25840 }
25841 }
25842 DialectType::DuckDB => {
25843 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
25844 // Keep nothing
25845 }
25846 _ => {
25847 // For other dialects, keep WITH properties as-is
25848 if let Some(fmt) = format_value {
25849 ct.with_properties
25850 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
25851 }
25852 if let Some(part) = partitioned_by {
25853 ct.with_properties
25854 .push(("PARTITIONED_BY".to_string(), part));
25855 }
25856 ct.with_properties.extend(other_props);
25857 }
25858 }
25859 }
25860
25861 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
25862 // and Hive STORED AS -> Presto WITH (format=...) conversion
25863 if !ct.properties.is_empty() {
25864 let is_presto_target = matches!(
25865 target,
25866 DialectType::Presto | DialectType::Trino | DialectType::Athena
25867 );
25868 let is_duckdb_target = matches!(target, DialectType::DuckDB);
25869
25870 if is_presto_target || is_duckdb_target {
25871 let mut new_properties = Vec::new();
25872 for prop in ct.properties.drain(..) {
25873 match &prop {
25874 Expression::FileFormatProperty(ffp) => {
25875 if is_presto_target {
25876 // Convert STORED AS/USING to WITH (format=...)
25877 if let Some(ref fmt_expr) = ffp.this {
25878 let fmt_str = match fmt_expr.as_ref() {
25879 Expression::Identifier(id) => id.name.clone(),
25880 Expression::Literal(Literal::String(s)) => s.clone(),
25881 _ => {
25882 new_properties.push(prop);
25883 continue;
25884 }
25885 };
25886 ct.with_properties
25887 .push(("format".to_string(), format!("'{}'", fmt_str)));
25888 }
25889 }
25890 // DuckDB: just strip file format properties
25891 }
25892 // Convert TBLPROPERTIES to WITH properties for Presto target
25893 Expression::Properties(props) if is_presto_target => {
25894 for expr in &props.expressions {
25895 if let Expression::Eq(eq) = expr {
25896 // Extract key and value from the Eq expression
25897 let key = match &eq.left {
25898 Expression::Literal(Literal::String(s)) => s.clone(),
25899 Expression::Identifier(id) => id.name.clone(),
25900 _ => continue,
25901 };
25902 let value = match &eq.right {
25903 Expression::Literal(Literal::String(s)) => {
25904 format!("'{}'", s)
25905 }
25906 Expression::Literal(Literal::Number(n)) => n.clone(),
25907 Expression::Identifier(id) => id.name.clone(),
25908 _ => continue,
25909 };
25910 ct.with_properties.push((key, value));
25911 }
25912 }
25913 }
25914 // Convert PartitionedByProperty for Presto target
25915 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
25916 // Check if it contains ColumnDef expressions (Hive-style with types)
25917 if let Expression::Tuple(ref tuple) = *pbp.this {
25918 let mut col_names: Vec<String> = Vec::new();
25919 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
25920 let mut has_col_defs = false;
25921 for expr in &tuple.expressions {
25922 if let Expression::ColumnDef(ref cd) = expr {
25923 has_col_defs = true;
25924 col_names.push(cd.name.name.clone());
25925 col_defs.push(*cd.clone());
25926 } else if let Expression::Column(ref col) = expr {
25927 col_names.push(col.name.name.clone());
25928 } else if let Expression::Identifier(ref id) = expr {
25929 col_names.push(id.name.clone());
25930 } else {
25931 // For function expressions like MONTHS(y), serialize to SQL
25932 let generic = Dialect::get(DialectType::Generic);
25933 if let Ok(sql) = generic.generate(expr) {
25934 col_names.push(sql);
25935 }
25936 }
25937 }
25938 if has_col_defs {
25939 // Merge partition column defs into the main column list
25940 for cd in col_defs {
25941 ct.columns.push(cd);
25942 }
25943 }
25944 if !col_names.is_empty() {
25945 // Add PARTITIONED_BY property
25946 let array_val = format!(
25947 "ARRAY[{}]",
25948 col_names
25949 .iter()
25950 .map(|n| format!("'{}'", n))
25951 .collect::<Vec<_>>()
25952 .join(", ")
25953 );
25954 ct.with_properties
25955 .push(("PARTITIONED_BY".to_string(), array_val));
25956 }
25957 }
25958 // Skip - don't keep in properties
25959 }
25960 _ => {
25961 if !is_duckdb_target {
25962 new_properties.push(prop);
25963 }
25964 }
25965 }
25966 }
25967 ct.properties = new_properties;
25968 } else {
25969 // For Hive/Spark targets, unquote format names in STORED AS
25970 for prop in &mut ct.properties {
25971 if let Expression::FileFormatProperty(ref mut ffp) = prop {
25972 if let Some(ref mut fmt_expr) = ffp.this {
25973 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
25974 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
25975 let unquoted = s.clone();
25976 *fmt_expr =
25977 Box::new(Expression::Identifier(Identifier::new(unquoted)));
25978 }
25979 }
25980 }
25981 }
25982 }
25983 }
25984 }
25985
25986 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
25987 fn apply_partitioned_by(
25988 ct: &mut crate::expressions::CreateTable,
25989 partitioned_by_value: &str,
25990 target: DialectType,
25991 ) {
25992 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
25993
25994 // Parse the ARRAY['col1', 'col2'] value to extract column names
25995 let mut col_names: Vec<String> = Vec::new();
25996 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
25997 let inner = partitioned_by_value
25998 .trim()
25999 .trim_start_matches("ARRAY")
26000 .trim_start_matches('[')
26001 .trim_start_matches('(')
26002 .trim_end_matches(']')
26003 .trim_end_matches(')');
26004 for part in inner.split(',') {
26005 let col = part.trim().trim_matches('\'').trim_matches('"');
26006 if !col.is_empty() {
26007 col_names.push(col.to_string());
26008 }
26009 }
26010
26011 if col_names.is_empty() {
26012 return;
26013 }
26014
26015 if matches!(target, DialectType::Hive) {
26016 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
26017 let mut partition_col_defs = Vec::new();
26018 for col_name in &col_names {
26019 // Find and remove from columns
26020 if let Some(pos) = ct
26021 .columns
26022 .iter()
26023 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
26024 {
26025 let col_def = ct.columns.remove(pos);
26026 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
26027 }
26028 }
26029 if !partition_col_defs.is_empty() {
26030 ct.properties
26031 .push(Expression::PartitionedByProperty(Box::new(
26032 PartitionedByProperty {
26033 this: Box::new(Expression::Tuple(Box::new(Tuple {
26034 expressions: partition_col_defs,
26035 }))),
26036 },
26037 )));
26038 }
26039 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26040 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26041 // Use quoted identifiers to match the quoting style of the original column definitions
26042 let partition_exprs: Vec<Expression> = col_names
26043 .iter()
26044 .map(|name| {
26045 // Check if the column exists in the column list and use its quoting
26046 let is_quoted = ct
26047 .columns
26048 .iter()
26049 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26050 let ident = if is_quoted {
26051 Identifier::quoted(name.clone())
26052 } else {
26053 Identifier::new(name.clone())
26054 };
26055 Expression::Column(Column {
26056 name: ident,
26057 table: None,
26058 join_mark: false,
26059 trailing_comments: Vec::new(),
26060 })
26061 })
26062 .collect();
26063 ct.properties
26064 .push(Expression::PartitionedByProperty(Box::new(
26065 PartitionedByProperty {
26066 this: Box::new(Expression::Tuple(Box::new(Tuple {
26067 expressions: partition_exprs,
26068 }))),
26069 },
26070 )));
26071 }
26072 // DuckDB: strip partitioned_by entirely (already handled)
26073 }
26074
26075 /// Convert a DataType to Spark's type string format (using angle brackets)
26076 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26077 use crate::expressions::DataType;
26078 match dt {
26079 DataType::Int { .. } => "INT".to_string(),
26080 DataType::BigInt { .. } => "BIGINT".to_string(),
26081 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26082 DataType::TinyInt { .. } => "TINYINT".to_string(),
26083 DataType::Float { .. } => "FLOAT".to_string(),
26084 DataType::Double { .. } => "DOUBLE".to_string(),
26085 DataType::Decimal {
26086 precision: Some(p),
26087 scale: Some(s),
26088 } => format!("DECIMAL({}, {})", p, s),
26089 DataType::Decimal {
26090 precision: Some(p), ..
26091 } => format!("DECIMAL({})", p),
26092 DataType::Decimal { .. } => "DECIMAL".to_string(),
26093 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26094 "STRING".to_string()
26095 }
26096 DataType::Char { .. } => "STRING".to_string(),
26097 DataType::Boolean => "BOOLEAN".to_string(),
26098 DataType::Date => "DATE".to_string(),
26099 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26100 DataType::Json | DataType::JsonB => "STRING".to_string(),
26101 DataType::Binary { .. } => "BINARY".to_string(),
26102 DataType::Array { element_type, .. } => {
26103 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26104 }
26105 DataType::Map {
26106 key_type,
26107 value_type,
26108 } => format!(
26109 "MAP<{}, {}>",
26110 Self::data_type_to_spark_string(key_type),
26111 Self::data_type_to_spark_string(value_type)
26112 ),
26113 DataType::Struct { fields, .. } => {
26114 let field_strs: Vec<String> = fields
26115 .iter()
26116 .map(|f| {
26117 if f.name.is_empty() {
26118 Self::data_type_to_spark_string(&f.data_type)
26119 } else {
26120 format!(
26121 "{}: {}",
26122 f.name,
26123 Self::data_type_to_spark_string(&f.data_type)
26124 )
26125 }
26126 })
26127 .collect();
26128 format!("STRUCT<{}>", field_strs.join(", "))
26129 }
26130 DataType::Custom { name } => name.clone(),
26131 _ => format!("{:?}", dt),
26132 }
26133 }
26134
26135 /// Extract value and unit from an Interval expression
26136 /// Returns (value_expression, IntervalUnit)
26137 fn extract_interval_parts(
26138 interval_expr: &Expression,
26139 ) -> (Expression, crate::expressions::IntervalUnit) {
26140 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26141
26142 if let Expression::Interval(iv) = interval_expr {
26143 let val = iv.this.clone().unwrap_or(Expression::number(0));
26144 let unit = match &iv.unit {
26145 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26146 None => {
26147 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26148 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26149 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26150 if parts.len() == 2 {
26151 let unit_str = parts[1].trim().to_uppercase();
26152 let parsed_unit = match unit_str.as_str() {
26153 "YEAR" | "YEARS" => IntervalUnit::Year,
26154 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26155 "MONTH" | "MONTHS" => IntervalUnit::Month,
26156 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26157 "DAY" | "DAYS" => IntervalUnit::Day,
26158 "HOUR" | "HOURS" => IntervalUnit::Hour,
26159 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26160 "SECOND" | "SECONDS" => IntervalUnit::Second,
26161 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26162 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26163 _ => IntervalUnit::Day,
26164 };
26165 // Return just the numeric part as value and parsed unit
26166 return (
26167 Expression::Literal(crate::expressions::Literal::String(
26168 parts[0].to_string(),
26169 )),
26170 parsed_unit,
26171 );
26172 }
26173 IntervalUnit::Day
26174 } else {
26175 IntervalUnit::Day
26176 }
26177 }
26178 _ => IntervalUnit::Day,
26179 };
26180 (val, unit)
26181 } else {
26182 // Not an interval - pass through
26183 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26184 }
26185 }
26186
26187 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26188 fn normalize_bigquery_function(
26189 e: Expression,
26190 source: DialectType,
26191 target: DialectType,
26192 ) -> Result<Expression> {
26193 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26194
26195 let f = if let Expression::Function(f) = e {
26196 *f
26197 } else {
26198 return Ok(e);
26199 };
26200 let name = f.name.to_uppercase();
26201 let mut args = f.args;
26202
26203 /// Helper to extract unit string from an identifier, column, or literal expression
26204 fn get_unit_str(expr: &Expression) -> String {
26205 match expr {
26206 Expression::Identifier(id) => id.name.to_uppercase(),
26207 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26208 Expression::Column(col) => col.name.name.to_uppercase(),
26209 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26210 Expression::Function(f) => {
26211 let base = f.name.to_uppercase();
26212 if !f.args.is_empty() {
26213 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26214 let inner = get_unit_str(&f.args[0]);
26215 format!("{}({})", base, inner)
26216 } else {
26217 base
26218 }
26219 }
26220 _ => "DAY".to_string(),
26221 }
26222 }
26223
26224 /// Parse unit string to IntervalUnit
26225 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26226 match s {
26227 "YEAR" => crate::expressions::IntervalUnit::Year,
26228 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26229 "MONTH" => crate::expressions::IntervalUnit::Month,
26230 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26231 "DAY" => crate::expressions::IntervalUnit::Day,
26232 "HOUR" => crate::expressions::IntervalUnit::Hour,
26233 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26234 "SECOND" => crate::expressions::IntervalUnit::Second,
26235 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26236 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26237 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26238 _ => crate::expressions::IntervalUnit::Day,
26239 }
26240 }
26241
26242 match name.as_str() {
26243 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26244 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26245 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26246 let date1 = args.remove(0);
26247 let date2 = args.remove(0);
26248 let unit_expr = args.remove(0);
26249 let unit_str = get_unit_str(&unit_expr);
26250
26251 if matches!(target, DialectType::BigQuery) {
26252 // BigQuery -> BigQuery: just uppercase the unit
26253 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26254 return Ok(Expression::Function(Box::new(Function::new(
26255 f.name,
26256 vec![date1, date2, unit],
26257 ))));
26258 }
26259
26260 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26261 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26262 if matches!(target, DialectType::Snowflake) {
26263 return Ok(Expression::TimestampDiff(Box::new(
26264 crate::expressions::TimestampDiff {
26265 this: Box::new(date2),
26266 expression: Box::new(date1),
26267 unit: Some(unit_str),
26268 },
26269 )));
26270 }
26271
26272 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26273 if matches!(target, DialectType::DuckDB) {
26274 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26275 // CAST to TIME
26276 let cast_fn = |e: Expression| -> Expression {
26277 match e {
26278 Expression::Literal(Literal::String(s)) => {
26279 Expression::Cast(Box::new(Cast {
26280 this: Expression::Literal(Literal::String(s)),
26281 to: DataType::Custom {
26282 name: "TIME".to_string(),
26283 },
26284 trailing_comments: vec![],
26285 double_colon_syntax: false,
26286 format: None,
26287 default: None,
26288 }))
26289 }
26290 other => other,
26291 }
26292 };
26293 (cast_fn(date1), cast_fn(date2))
26294 } else if name == "DATETIME_DIFF" {
26295 // CAST to TIMESTAMP
26296 (
26297 Self::ensure_cast_timestamp(date1),
26298 Self::ensure_cast_timestamp(date2),
26299 )
26300 } else {
26301 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26302 (
26303 Self::ensure_cast_timestamptz(date1),
26304 Self::ensure_cast_timestamptz(date2),
26305 )
26306 };
26307 return Ok(Expression::Function(Box::new(Function::new(
26308 "DATE_DIFF".to_string(),
26309 vec![
26310 Expression::Literal(Literal::String(unit_str)),
26311 cast_d2,
26312 cast_d1,
26313 ],
26314 ))));
26315 }
26316
26317 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26318 let unit = Expression::Identifier(Identifier::new(unit_str));
26319 Ok(Expression::Function(Box::new(Function::new(
26320 "TIMESTAMPDIFF".to_string(),
26321 vec![unit, date2, date1],
26322 ))))
26323 }
26324
26325 // DATEDIFF(unit, start, end) -> target-specific form
26326 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26327 "DATEDIFF" if args.len() == 3 => {
26328 let arg0 = args.remove(0);
26329 let arg1 = args.remove(0);
26330 let arg2 = args.remove(0);
26331 let unit_str = get_unit_str(&arg0);
26332
26333 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26334 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26335 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26336
26337 if matches!(target, DialectType::Snowflake) {
26338 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26339 let unit = Expression::Identifier(Identifier::new(unit_str));
26340 return Ok(Expression::Function(Box::new(Function::new(
26341 "DATEDIFF".to_string(),
26342 vec![unit, arg1, arg2],
26343 ))));
26344 }
26345
26346 if matches!(target, DialectType::DuckDB) {
26347 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26348 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26349 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26350 return Ok(Expression::Function(Box::new(Function::new(
26351 "DATE_DIFF".to_string(),
26352 vec![
26353 Expression::Literal(Literal::String(unit_str)),
26354 cast_d1,
26355 cast_d2,
26356 ],
26357 ))));
26358 }
26359
26360 if matches!(target, DialectType::BigQuery) {
26361 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26362 let cast_d1 = Self::ensure_cast_datetime(arg1);
26363 let cast_d2 = Self::ensure_cast_datetime(arg2);
26364 let unit = Expression::Identifier(Identifier::new(unit_str));
26365 return Ok(Expression::Function(Box::new(Function::new(
26366 "DATE_DIFF".to_string(),
26367 vec![cast_d2, cast_d1, unit],
26368 ))));
26369 }
26370
26371 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26372 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26373 let unit = Expression::Identifier(Identifier::new(unit_str));
26374 return Ok(Expression::Function(Box::new(Function::new(
26375 "DATEDIFF".to_string(),
26376 vec![unit, arg1, arg2],
26377 ))));
26378 }
26379
26380 if matches!(target, DialectType::Hive) {
26381 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26382 match unit_str.as_str() {
26383 "MONTH" => {
26384 return Ok(Expression::Function(Box::new(Function::new(
26385 "CAST".to_string(),
26386 vec![Expression::Function(Box::new(Function::new(
26387 "MONTHS_BETWEEN".to_string(),
26388 vec![arg2, arg1],
26389 )))],
26390 ))));
26391 }
26392 "WEEK" => {
26393 return Ok(Expression::Cast(Box::new(Cast {
26394 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26395 Expression::Function(Box::new(Function::new(
26396 "DATEDIFF".to_string(),
26397 vec![arg2, arg1],
26398 ))),
26399 Expression::Literal(Literal::Number("7".to_string())),
26400 ))),
26401 to: DataType::Int {
26402 length: None,
26403 integer_spelling: false,
26404 },
26405 trailing_comments: vec![],
26406 double_colon_syntax: false,
26407 format: None,
26408 default: None,
26409 })));
26410 }
26411 _ => {
26412 // Default: DATEDIFF(end, start) for DAY
26413 return Ok(Expression::Function(Box::new(Function::new(
26414 "DATEDIFF".to_string(),
26415 vec![arg2, arg1],
26416 ))));
26417 }
26418 }
26419 }
26420
26421 if matches!(
26422 target,
26423 DialectType::Presto | DialectType::Trino | DialectType::Athena
26424 ) {
26425 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26426 return Ok(Expression::Function(Box::new(Function::new(
26427 "DATE_DIFF".to_string(),
26428 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26429 ))));
26430 }
26431
26432 if matches!(target, DialectType::TSQL) {
26433 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26434 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26435 let unit = Expression::Identifier(Identifier::new(unit_str));
26436 return Ok(Expression::Function(Box::new(Function::new(
26437 "DATEDIFF".to_string(),
26438 vec![unit, arg1, cast_d2],
26439 ))));
26440 }
26441
26442 if matches!(target, DialectType::PostgreSQL) {
26443 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26444 // For now, use DATEDIFF (passthrough) with uppercased unit
26445 let unit = Expression::Identifier(Identifier::new(unit_str));
26446 return Ok(Expression::Function(Box::new(Function::new(
26447 "DATEDIFF".to_string(),
26448 vec![unit, arg1, arg2],
26449 ))));
26450 }
26451
26452 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26453 let unit = Expression::Identifier(Identifier::new(unit_str));
26454 Ok(Expression::Function(Box::new(Function::new(
26455 "DATEDIFF".to_string(),
26456 vec![unit, arg1, arg2],
26457 ))))
26458 }
26459
26460 // DATE_DIFF(date1, date2, unit) -> standard form
26461 "DATE_DIFF" if args.len() == 3 => {
26462 let date1 = args.remove(0);
26463 let date2 = args.remove(0);
26464 let unit_expr = args.remove(0);
26465 let unit_str = get_unit_str(&unit_expr);
26466
26467 if matches!(target, DialectType::BigQuery) {
26468 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26469 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26470 "WEEK".to_string()
26471 } else {
26472 unit_str
26473 };
26474 let norm_d1 = Self::date_literal_to_cast(date1);
26475 let norm_d2 = Self::date_literal_to_cast(date2);
26476 let unit = Expression::Identifier(Identifier::new(norm_unit));
26477 return Ok(Expression::Function(Box::new(Function::new(
26478 f.name,
26479 vec![norm_d1, norm_d2, unit],
26480 ))));
26481 }
26482
26483 if matches!(target, DialectType::MySQL) {
26484 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26485 let norm_d1 = Self::date_literal_to_cast(date1);
26486 let norm_d2 = Self::date_literal_to_cast(date2);
26487 return Ok(Expression::Function(Box::new(Function::new(
26488 "DATEDIFF".to_string(),
26489 vec![norm_d1, norm_d2],
26490 ))));
26491 }
26492
26493 if matches!(target, DialectType::StarRocks) {
26494 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26495 let norm_d1 = Self::date_literal_to_cast(date1);
26496 let norm_d2 = Self::date_literal_to_cast(date2);
26497 return Ok(Expression::Function(Box::new(Function::new(
26498 "DATE_DIFF".to_string(),
26499 vec![
26500 Expression::Literal(Literal::String(unit_str)),
26501 norm_d1,
26502 norm_d2,
26503 ],
26504 ))));
26505 }
26506
26507 if matches!(target, DialectType::DuckDB) {
26508 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26509 let norm_d1 = Self::ensure_cast_date(date1);
26510 let norm_d2 = Self::ensure_cast_date(date2);
26511
26512 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26513 let is_week_variant = unit_str == "WEEK"
26514 || unit_str.starts_with("WEEK(")
26515 || unit_str == "ISOWEEK";
26516 if is_week_variant {
26517 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26518 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26519 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26520 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26521 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26522 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26523 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26524 Some("1") // Shift Sunday to Monday alignment
26525 } else if unit_str == "WEEK(SATURDAY)" {
26526 Some("-5")
26527 } else if unit_str == "WEEK(TUESDAY)" {
26528 Some("-1")
26529 } else if unit_str == "WEEK(WEDNESDAY)" {
26530 Some("-2")
26531 } else if unit_str == "WEEK(THURSDAY)" {
26532 Some("-3")
26533 } else if unit_str == "WEEK(FRIDAY)" {
26534 Some("-4")
26535 } else {
26536 Some("1") // default to Sunday
26537 };
26538
26539 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26540 let shifted = if let Some(off) = offset {
26541 let interval =
26542 Expression::Interval(Box::new(crate::expressions::Interval {
26543 this: Some(Expression::Literal(Literal::String(
26544 off.to_string(),
26545 ))),
26546 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26547 unit: crate::expressions::IntervalUnit::Day,
26548 use_plural: false,
26549 }),
26550 }));
26551 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26552 date, interval,
26553 )))
26554 } else {
26555 date
26556 };
26557 Expression::Function(Box::new(Function::new(
26558 "DATE_TRUNC".to_string(),
26559 vec![
26560 Expression::Literal(Literal::String("WEEK".to_string())),
26561 shifted,
26562 ],
26563 )))
26564 };
26565
26566 let trunc_d2 = make_trunc(norm_d2, day_offset);
26567 let trunc_d1 = make_trunc(norm_d1, day_offset);
26568 return Ok(Expression::Function(Box::new(Function::new(
26569 "DATE_DIFF".to_string(),
26570 vec![
26571 Expression::Literal(Literal::String("WEEK".to_string())),
26572 trunc_d2,
26573 trunc_d1,
26574 ],
26575 ))));
26576 }
26577
26578 return Ok(Expression::Function(Box::new(Function::new(
26579 "DATE_DIFF".to_string(),
26580 vec![
26581 Expression::Literal(Literal::String(unit_str)),
26582 norm_d2,
26583 norm_d1,
26584 ],
26585 ))));
26586 }
26587
26588 // Default: DATEDIFF(unit, date2, date1)
26589 let unit = Expression::Identifier(Identifier::new(unit_str));
26590 Ok(Expression::Function(Box::new(Function::new(
26591 "DATEDIFF".to_string(),
26592 vec![unit, date2, date1],
26593 ))))
26594 }
26595
26596 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
26597 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
26598 let ts = args.remove(0);
26599 let interval_expr = args.remove(0);
26600 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26601
26602 match target {
26603 DialectType::Snowflake => {
26604 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
26605 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
26606 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
26607 let unit_str = Self::interval_unit_to_string(&unit);
26608 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26609 Ok(Expression::TimestampAdd(Box::new(
26610 crate::expressions::TimestampAdd {
26611 this: Box::new(val),
26612 expression: Box::new(cast_ts),
26613 unit: Some(unit_str),
26614 },
26615 )))
26616 }
26617 DialectType::Spark | DialectType::Databricks => {
26618 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
26619 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
26620 let interval =
26621 Expression::Interval(Box::new(crate::expressions::Interval {
26622 this: Some(val),
26623 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26624 unit,
26625 use_plural: false,
26626 }),
26627 }));
26628 Ok(Expression::Add(Box::new(
26629 crate::expressions::BinaryOp::new(ts, interval),
26630 )))
26631 } else if name == "DATETIME_ADD"
26632 && matches!(target, DialectType::Databricks)
26633 {
26634 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
26635 let unit_str = Self::interval_unit_to_string(&unit);
26636 Ok(Expression::Function(Box::new(Function::new(
26637 "TIMESTAMPADD".to_string(),
26638 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
26639 ))))
26640 } else {
26641 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
26642 let unit_str = Self::interval_unit_to_string(&unit);
26643 let cast_ts =
26644 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
26645 Self::maybe_cast_ts(ts)
26646 } else {
26647 ts
26648 };
26649 Ok(Expression::Function(Box::new(Function::new(
26650 "DATE_ADD".to_string(),
26651 vec![
26652 Expression::Identifier(Identifier::new(unit_str)),
26653 val,
26654 cast_ts,
26655 ],
26656 ))))
26657 }
26658 }
26659 DialectType::MySQL => {
26660 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
26661 let mysql_ts = if name.starts_with("TIMESTAMP") {
26662 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26663 match &ts {
26664 Expression::Function(ref inner_f)
26665 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26666 {
26667 // Already wrapped, keep as-is
26668 ts
26669 }
26670 _ => {
26671 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
26672 let unwrapped = match ts {
26673 Expression::Literal(Literal::Timestamp(s)) => {
26674 Expression::Literal(Literal::String(s))
26675 }
26676 other => other,
26677 };
26678 Expression::Function(Box::new(Function::new(
26679 "TIMESTAMP".to_string(),
26680 vec![unwrapped],
26681 )))
26682 }
26683 }
26684 } else {
26685 ts
26686 };
26687 Ok(Expression::DateAdd(Box::new(
26688 crate::expressions::DateAddFunc {
26689 this: mysql_ts,
26690 interval: val,
26691 unit,
26692 },
26693 )))
26694 }
26695 _ => {
26696 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
26697 let cast_ts = if matches!(target, DialectType::DuckDB) {
26698 if name == "DATETIME_ADD" {
26699 Self::ensure_cast_timestamp(ts)
26700 } else if name.starts_with("TIMESTAMP") {
26701 Self::maybe_cast_ts_to_tz(ts, &name)
26702 } else {
26703 ts
26704 }
26705 } else {
26706 ts
26707 };
26708 Ok(Expression::DateAdd(Box::new(
26709 crate::expressions::DateAddFunc {
26710 this: cast_ts,
26711 interval: val,
26712 unit,
26713 },
26714 )))
26715 }
26716 }
26717 }
26718
26719 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
26720 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
26721 let ts = args.remove(0);
26722 let interval_expr = args.remove(0);
26723 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26724
26725 match target {
26726 DialectType::Snowflake => {
26727 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
26728 let unit_str = Self::interval_unit_to_string(&unit);
26729 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26730 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26731 val,
26732 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26733 this: Expression::number(1),
26734 })),
26735 )));
26736 Ok(Expression::TimestampAdd(Box::new(
26737 crate::expressions::TimestampAdd {
26738 this: Box::new(neg_val),
26739 expression: Box::new(cast_ts),
26740 unit: Some(unit_str),
26741 },
26742 )))
26743 }
26744 DialectType::Spark | DialectType::Databricks => {
26745 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
26746 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
26747 {
26748 // Spark: ts - INTERVAL val UNIT
26749 let cast_ts = if name.starts_with("TIMESTAMP") {
26750 Self::maybe_cast_ts(ts)
26751 } else {
26752 ts
26753 };
26754 let interval =
26755 Expression::Interval(Box::new(crate::expressions::Interval {
26756 this: Some(val),
26757 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26758 unit,
26759 use_plural: false,
26760 }),
26761 }));
26762 Ok(Expression::Sub(Box::new(
26763 crate::expressions::BinaryOp::new(cast_ts, interval),
26764 )))
26765 } else {
26766 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
26767 let unit_str = Self::interval_unit_to_string(&unit);
26768 let neg_val =
26769 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26770 val,
26771 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26772 this: Expression::number(1),
26773 })),
26774 )));
26775 Ok(Expression::Function(Box::new(Function::new(
26776 "TIMESTAMPADD".to_string(),
26777 vec![
26778 Expression::Identifier(Identifier::new(unit_str)),
26779 neg_val,
26780 ts,
26781 ],
26782 ))))
26783 }
26784 }
26785 DialectType::MySQL => {
26786 let mysql_ts = if name.starts_with("TIMESTAMP") {
26787 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26788 match &ts {
26789 Expression::Function(ref inner_f)
26790 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26791 {
26792 // Already wrapped, keep as-is
26793 ts
26794 }
26795 _ => {
26796 let unwrapped = match ts {
26797 Expression::Literal(Literal::Timestamp(s)) => {
26798 Expression::Literal(Literal::String(s))
26799 }
26800 other => other,
26801 };
26802 Expression::Function(Box::new(Function::new(
26803 "TIMESTAMP".to_string(),
26804 vec![unwrapped],
26805 )))
26806 }
26807 }
26808 } else {
26809 ts
26810 };
26811 Ok(Expression::DateSub(Box::new(
26812 crate::expressions::DateAddFunc {
26813 this: mysql_ts,
26814 interval: val,
26815 unit,
26816 },
26817 )))
26818 }
26819 _ => {
26820 let cast_ts = if matches!(target, DialectType::DuckDB) {
26821 if name == "DATETIME_SUB" {
26822 Self::ensure_cast_timestamp(ts)
26823 } else if name.starts_with("TIMESTAMP") {
26824 Self::maybe_cast_ts_to_tz(ts, &name)
26825 } else {
26826 ts
26827 }
26828 } else {
26829 ts
26830 };
26831 Ok(Expression::DateSub(Box::new(
26832 crate::expressions::DateAddFunc {
26833 this: cast_ts,
26834 interval: val,
26835 unit,
26836 },
26837 )))
26838 }
26839 }
26840 }
26841
26842 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
26843 "DATE_SUB" if args.len() == 2 => {
26844 let date = args.remove(0);
26845 let interval_expr = args.remove(0);
26846 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26847
26848 match target {
26849 DialectType::Databricks | DialectType::Spark => {
26850 // Databricks/Spark: DATE_ADD(date, -val)
26851 // Use DateAdd expression with negative val so it generates correctly
26852 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
26853 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
26854 // Instead, we directly output as a simple negated DateSub
26855 Ok(Expression::DateSub(Box::new(
26856 crate::expressions::DateAddFunc {
26857 this: date,
26858 interval: val,
26859 unit,
26860 },
26861 )))
26862 }
26863 DialectType::DuckDB => {
26864 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
26865 let cast_date = Self::ensure_cast_date(date);
26866 let interval =
26867 Expression::Interval(Box::new(crate::expressions::Interval {
26868 this: Some(val),
26869 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26870 unit,
26871 use_plural: false,
26872 }),
26873 }));
26874 Ok(Expression::Sub(Box::new(
26875 crate::expressions::BinaryOp::new(cast_date, interval),
26876 )))
26877 }
26878 DialectType::Snowflake => {
26879 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
26880 // Just ensure the date is cast properly
26881 let cast_date = Self::ensure_cast_date(date);
26882 Ok(Expression::DateSub(Box::new(
26883 crate::expressions::DateAddFunc {
26884 this: cast_date,
26885 interval: val,
26886 unit,
26887 },
26888 )))
26889 }
26890 DialectType::PostgreSQL => {
26891 // PostgreSQL: date - INTERVAL 'val UNIT'
26892 let unit_str = Self::interval_unit_to_string(&unit);
26893 let interval =
26894 Expression::Interval(Box::new(crate::expressions::Interval {
26895 this: Some(Expression::Literal(Literal::String(format!(
26896 "{} {}",
26897 Self::expr_to_string(&val),
26898 unit_str
26899 )))),
26900 unit: None,
26901 }));
26902 Ok(Expression::Sub(Box::new(
26903 crate::expressions::BinaryOp::new(date, interval),
26904 )))
26905 }
26906 _ => Ok(Expression::DateSub(Box::new(
26907 crate::expressions::DateAddFunc {
26908 this: date,
26909 interval: val,
26910 unit,
26911 },
26912 ))),
26913 }
26914 }
26915
26916 // DATEADD(unit, val, date) -> target-specific form
26917 // Used by: Redshift, Snowflake, TSQL, ClickHouse
26918 "DATEADD" if args.len() == 3 => {
26919 let arg0 = args.remove(0);
26920 let arg1 = args.remove(0);
26921 let arg2 = args.remove(0);
26922 let unit_str = get_unit_str(&arg0);
26923
26924 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
26925 // Keep DATEADD(UNIT, val, date) with uppercased unit
26926 let unit = Expression::Identifier(Identifier::new(unit_str));
26927 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
26928 let date = if matches!(target, DialectType::TSQL)
26929 && !matches!(
26930 source,
26931 DialectType::Spark | DialectType::Databricks | DialectType::Hive
26932 ) {
26933 Self::ensure_cast_datetime2(arg2)
26934 } else {
26935 arg2
26936 };
26937 return Ok(Expression::Function(Box::new(Function::new(
26938 "DATEADD".to_string(),
26939 vec![unit, arg1, date],
26940 ))));
26941 }
26942
26943 if matches!(target, DialectType::DuckDB) {
26944 // DuckDB: date + INTERVAL 'val' UNIT
26945 let iu = parse_interval_unit(&unit_str);
26946 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26947 this: Some(arg1),
26948 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26949 unit: iu,
26950 use_plural: false,
26951 }),
26952 }));
26953 let cast_date = Self::ensure_cast_timestamp(arg2);
26954 return Ok(Expression::Add(Box::new(
26955 crate::expressions::BinaryOp::new(cast_date, interval),
26956 )));
26957 }
26958
26959 if matches!(target, DialectType::BigQuery) {
26960 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
26961 let iu = parse_interval_unit(&unit_str);
26962 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26963 this: Some(arg1),
26964 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26965 unit: iu,
26966 use_plural: false,
26967 }),
26968 }));
26969 return Ok(Expression::Function(Box::new(Function::new(
26970 "DATE_ADD".to_string(),
26971 vec![arg2, interval],
26972 ))));
26973 }
26974
26975 if matches!(target, DialectType::Databricks) {
26976 // Databricks: keep DATEADD(UNIT, val, date) format
26977 let unit = Expression::Identifier(Identifier::new(unit_str));
26978 return Ok(Expression::Function(Box::new(Function::new(
26979 "DATEADD".to_string(),
26980 vec![unit, arg1, arg2],
26981 ))));
26982 }
26983
26984 if matches!(target, DialectType::Spark) {
26985 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
26986 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
26987 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
26988 if let Ok(val) = n.parse::<i64>() {
26989 return Expression::Literal(crate::expressions::Literal::Number(
26990 (val * factor).to_string(),
26991 ));
26992 }
26993 }
26994 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26995 expr,
26996 Expression::Literal(crate::expressions::Literal::Number(
26997 factor.to_string(),
26998 )),
26999 )))
27000 }
27001 match unit_str.as_str() {
27002 "YEAR" => {
27003 let months = multiply_expr_dateadd(arg1, 12);
27004 return Ok(Expression::Function(Box::new(Function::new(
27005 "ADD_MONTHS".to_string(),
27006 vec![arg2, months],
27007 ))));
27008 }
27009 "QUARTER" => {
27010 let months = multiply_expr_dateadd(arg1, 3);
27011 return Ok(Expression::Function(Box::new(Function::new(
27012 "ADD_MONTHS".to_string(),
27013 vec![arg2, months],
27014 ))));
27015 }
27016 "MONTH" => {
27017 return Ok(Expression::Function(Box::new(Function::new(
27018 "ADD_MONTHS".to_string(),
27019 vec![arg2, arg1],
27020 ))));
27021 }
27022 "WEEK" => {
27023 let days = multiply_expr_dateadd(arg1, 7);
27024 return Ok(Expression::Function(Box::new(Function::new(
27025 "DATE_ADD".to_string(),
27026 vec![arg2, days],
27027 ))));
27028 }
27029 "DAY" => {
27030 return Ok(Expression::Function(Box::new(Function::new(
27031 "DATE_ADD".to_string(),
27032 vec![arg2, arg1],
27033 ))));
27034 }
27035 _ => {
27036 let unit = Expression::Identifier(Identifier::new(unit_str));
27037 return Ok(Expression::Function(Box::new(Function::new(
27038 "DATE_ADD".to_string(),
27039 vec![unit, arg1, arg2],
27040 ))));
27041 }
27042 }
27043 }
27044
27045 if matches!(target, DialectType::Hive) {
27046 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27047 match unit_str.as_str() {
27048 "DAY" => {
27049 return Ok(Expression::Function(Box::new(Function::new(
27050 "DATE_ADD".to_string(),
27051 vec![arg2, arg1],
27052 ))));
27053 }
27054 "MONTH" => {
27055 return Ok(Expression::Function(Box::new(Function::new(
27056 "ADD_MONTHS".to_string(),
27057 vec![arg2, arg1],
27058 ))));
27059 }
27060 _ => {
27061 let iu = parse_interval_unit(&unit_str);
27062 let interval =
27063 Expression::Interval(Box::new(crate::expressions::Interval {
27064 this: Some(arg1),
27065 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27066 unit: iu,
27067 use_plural: false,
27068 }),
27069 }));
27070 return Ok(Expression::Add(Box::new(
27071 crate::expressions::BinaryOp::new(arg2, interval),
27072 )));
27073 }
27074 }
27075 }
27076
27077 if matches!(target, DialectType::PostgreSQL) {
27078 // PostgreSQL: date + INTERVAL 'val UNIT'
27079 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27080 this: Some(Expression::Literal(Literal::String(format!(
27081 "{} {}",
27082 Self::expr_to_string(&arg1),
27083 unit_str
27084 )))),
27085 unit: None,
27086 }));
27087 return Ok(Expression::Add(Box::new(
27088 crate::expressions::BinaryOp::new(arg2, interval),
27089 )));
27090 }
27091
27092 if matches!(
27093 target,
27094 DialectType::Presto | DialectType::Trino | DialectType::Athena
27095 ) {
27096 // Presto/Trino: DATE_ADD('UNIT', val, date)
27097 return Ok(Expression::Function(Box::new(Function::new(
27098 "DATE_ADD".to_string(),
27099 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27100 ))));
27101 }
27102
27103 if matches!(target, DialectType::ClickHouse) {
27104 // ClickHouse: DATE_ADD(UNIT, val, date)
27105 let unit = Expression::Identifier(Identifier::new(unit_str));
27106 return Ok(Expression::Function(Box::new(Function::new(
27107 "DATE_ADD".to_string(),
27108 vec![unit, arg1, arg2],
27109 ))));
27110 }
27111
27112 // Default: keep DATEADD with uppercased unit
27113 let unit = Expression::Identifier(Identifier::new(unit_str));
27114 Ok(Expression::Function(Box::new(Function::new(
27115 "DATEADD".to_string(),
27116 vec![unit, arg1, arg2],
27117 ))))
27118 }
27119
27120 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27121 "DATE_ADD" if args.len() == 3 => {
27122 let arg0 = args.remove(0);
27123 let arg1 = args.remove(0);
27124 let arg2 = args.remove(0);
27125 let unit_str = get_unit_str(&arg0);
27126
27127 if matches!(
27128 target,
27129 DialectType::Presto | DialectType::Trino | DialectType::Athena
27130 ) {
27131 // Presto/Trino: DATE_ADD('UNIT', val, date)
27132 return Ok(Expression::Function(Box::new(Function::new(
27133 "DATE_ADD".to_string(),
27134 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27135 ))));
27136 }
27137
27138 if matches!(
27139 target,
27140 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27141 ) {
27142 // DATEADD(UNIT, val, date)
27143 let unit = Expression::Identifier(Identifier::new(unit_str));
27144 let date = if matches!(target, DialectType::TSQL) {
27145 Self::ensure_cast_datetime2(arg2)
27146 } else {
27147 arg2
27148 };
27149 return Ok(Expression::Function(Box::new(Function::new(
27150 "DATEADD".to_string(),
27151 vec![unit, arg1, date],
27152 ))));
27153 }
27154
27155 if matches!(target, DialectType::DuckDB) {
27156 // DuckDB: date + INTERVAL val UNIT
27157 let iu = parse_interval_unit(&unit_str);
27158 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27159 this: Some(arg1),
27160 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27161 unit: iu,
27162 use_plural: false,
27163 }),
27164 }));
27165 return Ok(Expression::Add(Box::new(
27166 crate::expressions::BinaryOp::new(arg2, interval),
27167 )));
27168 }
27169
27170 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27171 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27172 let unit = Expression::Identifier(Identifier::new(unit_str));
27173 return Ok(Expression::Function(Box::new(Function::new(
27174 "DATE_ADD".to_string(),
27175 vec![unit, arg1, arg2],
27176 ))));
27177 }
27178
27179 // Default: DATE_ADD(UNIT, val, date)
27180 let unit = Expression::Identifier(Identifier::new(unit_str));
27181 Ok(Expression::Function(Box::new(Function::new(
27182 "DATE_ADD".to_string(),
27183 vec![unit, arg1, arg2],
27184 ))))
27185 }
27186
27187 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27188 "DATE_ADD" if args.len() == 2 => {
27189 let date = args.remove(0);
27190 let interval_expr = args.remove(0);
27191 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27192 let unit_str = Self::interval_unit_to_string(&unit);
27193
27194 match target {
27195 DialectType::DuckDB => {
27196 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27197 let cast_date = Self::ensure_cast_date(date);
27198 let quoted_val = Self::quote_interval_val(&val);
27199 let interval =
27200 Expression::Interval(Box::new(crate::expressions::Interval {
27201 this: Some(quoted_val),
27202 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27203 unit,
27204 use_plural: false,
27205 }),
27206 }));
27207 Ok(Expression::Add(Box::new(
27208 crate::expressions::BinaryOp::new(cast_date, interval),
27209 )))
27210 }
27211 DialectType::PostgreSQL => {
27212 // PostgreSQL: date + INTERVAL 'val UNIT'
27213 let interval =
27214 Expression::Interval(Box::new(crate::expressions::Interval {
27215 this: Some(Expression::Literal(Literal::String(format!(
27216 "{} {}",
27217 Self::expr_to_string(&val),
27218 unit_str
27219 )))),
27220 unit: None,
27221 }));
27222 Ok(Expression::Add(Box::new(
27223 crate::expressions::BinaryOp::new(date, interval),
27224 )))
27225 }
27226 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27227 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27228 let val_str = Self::expr_to_string(&val);
27229 Ok(Expression::Function(Box::new(Function::new(
27230 "DATE_ADD".to_string(),
27231 vec![
27232 Expression::Literal(Literal::String(unit_str)),
27233 Expression::Cast(Box::new(Cast {
27234 this: Expression::Literal(Literal::String(val_str)),
27235 to: DataType::BigInt { length: None },
27236 trailing_comments: vec![],
27237 double_colon_syntax: false,
27238 format: None,
27239 default: None,
27240 })),
27241 date,
27242 ],
27243 ))))
27244 }
27245 DialectType::Spark | DialectType::Hive => {
27246 // Spark/Hive: DATE_ADD(date, val) for DAY
27247 match unit_str.as_str() {
27248 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27249 "DATE_ADD".to_string(),
27250 vec![date, val],
27251 )))),
27252 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27253 "ADD_MONTHS".to_string(),
27254 vec![date, val],
27255 )))),
27256 _ => {
27257 let iu = parse_interval_unit(&unit_str);
27258 let interval =
27259 Expression::Interval(Box::new(crate::expressions::Interval {
27260 this: Some(val),
27261 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27262 unit: iu,
27263 use_plural: false,
27264 }),
27265 }));
27266 Ok(Expression::Function(Box::new(Function::new(
27267 "DATE_ADD".to_string(),
27268 vec![date, interval],
27269 ))))
27270 }
27271 }
27272 }
27273 DialectType::Snowflake => {
27274 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27275 let cast_date = Self::ensure_cast_date(date);
27276 let val_str = Self::expr_to_string(&val);
27277 Ok(Expression::Function(Box::new(Function::new(
27278 "DATEADD".to_string(),
27279 vec![
27280 Expression::Identifier(Identifier::new(unit_str)),
27281 Expression::Literal(Literal::String(val_str)),
27282 cast_date,
27283 ],
27284 ))))
27285 }
27286 DialectType::TSQL | DialectType::Fabric => {
27287 let cast_date = Self::ensure_cast_datetime2(date);
27288 Ok(Expression::Function(Box::new(Function::new(
27289 "DATEADD".to_string(),
27290 vec![
27291 Expression::Identifier(Identifier::new(unit_str)),
27292 val,
27293 cast_date,
27294 ],
27295 ))))
27296 }
27297 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27298 "DATEADD".to_string(),
27299 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27300 )))),
27301 DialectType::MySQL => {
27302 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27303 let quoted_val = Self::quote_interval_val(&val);
27304 let iu = parse_interval_unit(&unit_str);
27305 let interval =
27306 Expression::Interval(Box::new(crate::expressions::Interval {
27307 this: Some(quoted_val),
27308 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27309 unit: iu,
27310 use_plural: false,
27311 }),
27312 }));
27313 Ok(Expression::Function(Box::new(Function::new(
27314 "DATE_ADD".to_string(),
27315 vec![date, interval],
27316 ))))
27317 }
27318 DialectType::BigQuery => {
27319 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27320 let quoted_val = Self::quote_interval_val(&val);
27321 let iu = parse_interval_unit(&unit_str);
27322 let interval =
27323 Expression::Interval(Box::new(crate::expressions::Interval {
27324 this: Some(quoted_val),
27325 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27326 unit: iu,
27327 use_plural: false,
27328 }),
27329 }));
27330 Ok(Expression::Function(Box::new(Function::new(
27331 "DATE_ADD".to_string(),
27332 vec![date, interval],
27333 ))))
27334 }
27335 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27336 "DATEADD".to_string(),
27337 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27338 )))),
27339 _ => {
27340 // Default: keep as DATE_ADD with decomposed interval
27341 Ok(Expression::DateAdd(Box::new(
27342 crate::expressions::DateAddFunc {
27343 this: date,
27344 interval: val,
27345 unit,
27346 },
27347 )))
27348 }
27349 }
27350 }
27351
27352 // ADD_MONTHS(date, val) -> target-specific form
27353 "ADD_MONTHS" if args.len() == 2 => {
27354 let date = args.remove(0);
27355 let val = args.remove(0);
27356
27357 if matches!(target, DialectType::TSQL) {
27358 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27359 let cast_date = Self::ensure_cast_datetime2(date);
27360 return Ok(Expression::Function(Box::new(Function::new(
27361 "DATEADD".to_string(),
27362 vec![
27363 Expression::Identifier(Identifier::new("MONTH")),
27364 val,
27365 cast_date,
27366 ],
27367 ))));
27368 }
27369
27370 if matches!(target, DialectType::DuckDB) {
27371 // DuckDB: date + INTERVAL val MONTH
27372 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27373 this: Some(val),
27374 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27375 unit: crate::expressions::IntervalUnit::Month,
27376 use_plural: false,
27377 }),
27378 }));
27379 return Ok(Expression::Add(Box::new(
27380 crate::expressions::BinaryOp::new(date, interval),
27381 )));
27382 }
27383
27384 if matches!(target, DialectType::Snowflake) {
27385 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27386 if matches!(source, DialectType::Snowflake) {
27387 return Ok(Expression::Function(Box::new(Function::new(
27388 "ADD_MONTHS".to_string(),
27389 vec![date, val],
27390 ))));
27391 }
27392 return Ok(Expression::Function(Box::new(Function::new(
27393 "DATEADD".to_string(),
27394 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27395 ))));
27396 }
27397
27398 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27399 // Spark: ADD_MONTHS(date, val) - keep as is
27400 return Ok(Expression::Function(Box::new(Function::new(
27401 "ADD_MONTHS".to_string(),
27402 vec![date, val],
27403 ))));
27404 }
27405
27406 if matches!(target, DialectType::Hive) {
27407 return Ok(Expression::Function(Box::new(Function::new(
27408 "ADD_MONTHS".to_string(),
27409 vec![date, val],
27410 ))));
27411 }
27412
27413 if matches!(
27414 target,
27415 DialectType::Presto | DialectType::Trino | DialectType::Athena
27416 ) {
27417 // Presto: DATE_ADD('MONTH', val, date)
27418 return Ok(Expression::Function(Box::new(Function::new(
27419 "DATE_ADD".to_string(),
27420 vec![
27421 Expression::Literal(Literal::String("MONTH".to_string())),
27422 val,
27423 date,
27424 ],
27425 ))));
27426 }
27427
27428 // Default: keep ADD_MONTHS
27429 Ok(Expression::Function(Box::new(Function::new(
27430 "ADD_MONTHS".to_string(),
27431 vec![date, val],
27432 ))))
27433 }
27434
27435 // SAFE_DIVIDE(x, y) -> target-specific form directly
27436 "SAFE_DIVIDE" if args.len() == 2 => {
27437 let x = args.remove(0);
27438 let y = args.remove(0);
27439 // Wrap x and y in parens if they're complex expressions
27440 let y_ref = match &y {
27441 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27442 y.clone()
27443 }
27444 _ => Expression::Paren(Box::new(Paren {
27445 this: y.clone(),
27446 trailing_comments: vec![],
27447 })),
27448 };
27449 let x_ref = match &x {
27450 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27451 x.clone()
27452 }
27453 _ => Expression::Paren(Box::new(Paren {
27454 this: x.clone(),
27455 trailing_comments: vec![],
27456 })),
27457 };
27458 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27459 y_ref.clone(),
27460 Expression::number(0),
27461 )));
27462 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27463 x_ref.clone(),
27464 y_ref.clone(),
27465 )));
27466
27467 match target {
27468 DialectType::DuckDB | DialectType::PostgreSQL => {
27469 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27470 let result_div = if matches!(target, DialectType::PostgreSQL) {
27471 let cast_x = Expression::Cast(Box::new(Cast {
27472 this: x_ref,
27473 to: DataType::Custom {
27474 name: "DOUBLE PRECISION".to_string(),
27475 },
27476 trailing_comments: vec![],
27477 double_colon_syntax: false,
27478 format: None,
27479 default: None,
27480 }));
27481 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27482 cast_x, y_ref,
27483 )))
27484 } else {
27485 div_expr
27486 };
27487 Ok(Expression::Case(Box::new(crate::expressions::Case {
27488 operand: None,
27489 whens: vec![(condition, result_div)],
27490 else_: Some(Expression::Null(crate::expressions::Null)),
27491 comments: Vec::new(),
27492 })))
27493 }
27494 DialectType::Snowflake => {
27495 // IFF(y <> 0, x / y, NULL)
27496 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27497 condition,
27498 true_value: div_expr,
27499 false_value: Some(Expression::Null(crate::expressions::Null)),
27500 original_name: Some("IFF".to_string()),
27501 })))
27502 }
27503 DialectType::Presto | DialectType::Trino => {
27504 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27505 let cast_x = Expression::Cast(Box::new(Cast {
27506 this: x_ref,
27507 to: DataType::Double {
27508 precision: None,
27509 scale: None,
27510 },
27511 trailing_comments: vec![],
27512 double_colon_syntax: false,
27513 format: None,
27514 default: None,
27515 }));
27516 let cast_div = Expression::Div(Box::new(
27517 crate::expressions::BinaryOp::new(cast_x, y_ref),
27518 ));
27519 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27520 condition,
27521 true_value: cast_div,
27522 false_value: Some(Expression::Null(crate::expressions::Null)),
27523 original_name: None,
27524 })))
27525 }
27526 _ => {
27527 // IF(y <> 0, x / y, NULL)
27528 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27529 condition,
27530 true_value: div_expr,
27531 false_value: Some(Expression::Null(crate::expressions::Null)),
27532 original_name: None,
27533 })))
27534 }
27535 }
27536 }
27537
27538 // GENERATE_UUID() -> UUID() with CAST to string
27539 "GENERATE_UUID" => {
27540 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27541 this: None,
27542 name: None,
27543 is_string: None,
27544 }));
27545 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27546 let cast_type = match target {
27547 DialectType::DuckDB => Some(DataType::Text),
27548 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27549 length: None,
27550 parenthesized_length: false,
27551 }),
27552 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27553 Some(DataType::String { length: None })
27554 }
27555 _ => None,
27556 };
27557 if let Some(dt) = cast_type {
27558 Ok(Expression::Cast(Box::new(Cast {
27559 this: uuid_expr,
27560 to: dt,
27561 trailing_comments: vec![],
27562 double_colon_syntax: false,
27563 format: None,
27564 default: None,
27565 })))
27566 } else {
27567 Ok(uuid_expr)
27568 }
27569 }
27570
27571 // COUNTIF(x) -> CountIf expression
27572 "COUNTIF" if args.len() == 1 => {
27573 let arg = args.remove(0);
27574 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
27575 this: arg,
27576 distinct: false,
27577 filter: None,
27578 order_by: vec![],
27579 name: None,
27580 ignore_nulls: None,
27581 having_max: None,
27582 limit: None,
27583 })))
27584 }
27585
27586 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
27587 "EDIT_DISTANCE" => {
27588 // Strip named arguments (max_distance => N) and pass as positional
27589 let mut positional_args: Vec<Expression> = vec![];
27590 for arg in args {
27591 match arg {
27592 Expression::NamedArgument(na) => {
27593 positional_args.push(na.value);
27594 }
27595 other => positional_args.push(other),
27596 }
27597 }
27598 if positional_args.len() >= 2 {
27599 let col1 = positional_args.remove(0);
27600 let col2 = positional_args.remove(0);
27601 let levenshtein = crate::expressions::BinaryFunc {
27602 this: col1,
27603 expression: col2,
27604 original_name: None,
27605 };
27606 // Pass extra args through a function wrapper with all args
27607 if !positional_args.is_empty() {
27608 let max_dist = positional_args.remove(0);
27609 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
27610 if matches!(target, DialectType::DuckDB) {
27611 let lev = Expression::Function(Box::new(Function::new(
27612 "LEVENSHTEIN".to_string(),
27613 vec![levenshtein.this, levenshtein.expression],
27614 )));
27615 let lev_is_null =
27616 Expression::IsNull(Box::new(crate::expressions::IsNull {
27617 this: lev.clone(),
27618 not: false,
27619 postfix_form: false,
27620 }));
27621 let max_is_null =
27622 Expression::IsNull(Box::new(crate::expressions::IsNull {
27623 this: max_dist.clone(),
27624 not: false,
27625 postfix_form: false,
27626 }));
27627 let null_check =
27628 Expression::Or(Box::new(crate::expressions::BinaryOp {
27629 left: lev_is_null,
27630 right: max_is_null,
27631 left_comments: Vec::new(),
27632 operator_comments: Vec::new(),
27633 trailing_comments: Vec::new(),
27634 }));
27635 let least =
27636 Expression::Least(Box::new(crate::expressions::VarArgFunc {
27637 expressions: vec![lev, max_dist],
27638 original_name: None,
27639 }));
27640 return Ok(Expression::Case(Box::new(crate::expressions::Case {
27641 operand: None,
27642 whens: vec![(
27643 null_check,
27644 Expression::Null(crate::expressions::Null),
27645 )],
27646 else_: Some(least),
27647 comments: Vec::new(),
27648 })));
27649 }
27650 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
27651 all_args.extend(positional_args);
27652 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
27653 let func_name = if matches!(target, DialectType::PostgreSQL) {
27654 "LEVENSHTEIN_LESS_EQUAL"
27655 } else {
27656 "LEVENSHTEIN"
27657 };
27658 return Ok(Expression::Function(Box::new(Function::new(
27659 func_name.to_string(),
27660 all_args,
27661 ))));
27662 }
27663 Ok(Expression::Levenshtein(Box::new(levenshtein)))
27664 } else {
27665 Ok(Expression::Function(Box::new(Function::new(
27666 "EDIT_DISTANCE".to_string(),
27667 positional_args,
27668 ))))
27669 }
27670 }
27671
27672 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
27673 "TIMESTAMP_SECONDS" if args.len() == 1 => {
27674 let arg = args.remove(0);
27675 Ok(Expression::UnixToTime(Box::new(
27676 crate::expressions::UnixToTime {
27677 this: Box::new(arg),
27678 scale: Some(0),
27679 zone: None,
27680 hours: None,
27681 minutes: None,
27682 format: None,
27683 target_type: None,
27684 },
27685 )))
27686 }
27687
27688 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
27689 "TIMESTAMP_MILLIS" if args.len() == 1 => {
27690 let arg = args.remove(0);
27691 Ok(Expression::UnixToTime(Box::new(
27692 crate::expressions::UnixToTime {
27693 this: Box::new(arg),
27694 scale: Some(3),
27695 zone: None,
27696 hours: None,
27697 minutes: None,
27698 format: None,
27699 target_type: None,
27700 },
27701 )))
27702 }
27703
27704 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
27705 "TIMESTAMP_MICROS" if args.len() == 1 => {
27706 let arg = args.remove(0);
27707 Ok(Expression::UnixToTime(Box::new(
27708 crate::expressions::UnixToTime {
27709 this: Box::new(arg),
27710 scale: Some(6),
27711 zone: None,
27712 hours: None,
27713 minutes: None,
27714 format: None,
27715 target_type: None,
27716 },
27717 )))
27718 }
27719
27720 // DIV(x, y) -> IntDiv expression
27721 "DIV" if args.len() == 2 => {
27722 let x = args.remove(0);
27723 let y = args.remove(0);
27724 Ok(Expression::IntDiv(Box::new(
27725 crate::expressions::BinaryFunc {
27726 this: x,
27727 expression: y,
27728 original_name: None,
27729 },
27730 )))
27731 }
27732
27733 // TO_HEX(x) -> target-specific form
27734 "TO_HEX" if args.len() == 1 => {
27735 let arg = args.remove(0);
27736 // Check if inner function already returns hex string in certain targets
27737 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
27738 if matches!(target, DialectType::BigQuery) {
27739 // BQ->BQ: keep as TO_HEX
27740 Ok(Expression::Function(Box::new(Function::new(
27741 "TO_HEX".to_string(),
27742 vec![arg],
27743 ))))
27744 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
27745 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
27746 Ok(arg)
27747 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
27748 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
27749 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
27750 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
27751 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
27752 if let Expression::Function(ref inner_f) = arg {
27753 let inner_args = inner_f.args.clone();
27754 let binary_func = match inner_f.name.to_uppercase().as_str() {
27755 "SHA1" => Expression::Function(Box::new(Function::new(
27756 "SHA1_BINARY".to_string(),
27757 inner_args,
27758 ))),
27759 "MD5" => Expression::Function(Box::new(Function::new(
27760 "MD5_BINARY".to_string(),
27761 inner_args,
27762 ))),
27763 "SHA256" => {
27764 let mut a = inner_args;
27765 a.push(Expression::number(256));
27766 Expression::Function(Box::new(Function::new(
27767 "SHA2_BINARY".to_string(),
27768 a,
27769 )))
27770 }
27771 "SHA512" => {
27772 let mut a = inner_args;
27773 a.push(Expression::number(512));
27774 Expression::Function(Box::new(Function::new(
27775 "SHA2_BINARY".to_string(),
27776 a,
27777 )))
27778 }
27779 _ => arg.clone(),
27780 };
27781 Ok(Expression::Function(Box::new(Function::new(
27782 "TO_CHAR".to_string(),
27783 vec![binary_func],
27784 ))))
27785 } else {
27786 let inner = Expression::Function(Box::new(Function::new(
27787 "HEX".to_string(),
27788 vec![arg],
27789 )));
27790 Ok(Expression::Lower(Box::new(
27791 crate::expressions::UnaryFunc::new(inner),
27792 )))
27793 }
27794 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
27795 let inner = Expression::Function(Box::new(Function::new(
27796 "TO_HEX".to_string(),
27797 vec![arg],
27798 )));
27799 Ok(Expression::Lower(Box::new(
27800 crate::expressions::UnaryFunc::new(inner),
27801 )))
27802 } else {
27803 let inner =
27804 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
27805 Ok(Expression::Lower(Box::new(
27806 crate::expressions::UnaryFunc::new(inner),
27807 )))
27808 }
27809 }
27810
27811 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
27812 "LAST_DAY" if args.len() == 2 => {
27813 let date = args.remove(0);
27814 let _unit = args.remove(0); // Strip the unit (MONTH is default)
27815 Ok(Expression::Function(Box::new(Function::new(
27816 "LAST_DAY".to_string(),
27817 vec![date],
27818 ))))
27819 }
27820
27821 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
27822 "GENERATE_ARRAY" => {
27823 let start = args.get(0).cloned();
27824 let end = args.get(1).cloned();
27825 let step = args.get(2).cloned();
27826 Ok(Expression::GenerateSeries(Box::new(
27827 crate::expressions::GenerateSeries {
27828 start: start.map(Box::new),
27829 end: end.map(Box::new),
27830 step: step.map(Box::new),
27831 is_end_exclusive: None,
27832 },
27833 )))
27834 }
27835
27836 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
27837 "GENERATE_TIMESTAMP_ARRAY" => {
27838 let start = args.get(0).cloned();
27839 let end = args.get(1).cloned();
27840 let step = args.get(2).cloned();
27841
27842 if matches!(target, DialectType::DuckDB) {
27843 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
27844 // Only cast string literals - leave columns/expressions as-is
27845 let maybe_cast_ts = |expr: Expression| -> Expression {
27846 if matches!(&expr, Expression::Literal(Literal::String(_))) {
27847 Expression::Cast(Box::new(Cast {
27848 this: expr,
27849 to: DataType::Timestamp {
27850 precision: None,
27851 timezone: false,
27852 },
27853 trailing_comments: vec![],
27854 double_colon_syntax: false,
27855 format: None,
27856 default: None,
27857 }))
27858 } else {
27859 expr
27860 }
27861 };
27862 let cast_start = start.map(maybe_cast_ts);
27863 let cast_end = end.map(maybe_cast_ts);
27864 Ok(Expression::GenerateSeries(Box::new(
27865 crate::expressions::GenerateSeries {
27866 start: cast_start.map(Box::new),
27867 end: cast_end.map(Box::new),
27868 step: step.map(Box::new),
27869 is_end_exclusive: None,
27870 },
27871 )))
27872 } else {
27873 Ok(Expression::GenerateSeries(Box::new(
27874 crate::expressions::GenerateSeries {
27875 start: start.map(Box::new),
27876 end: end.map(Box::new),
27877 step: step.map(Box::new),
27878 is_end_exclusive: None,
27879 },
27880 )))
27881 }
27882 }
27883
27884 // TO_JSON(x) -> target-specific (from Spark/Hive)
27885 "TO_JSON" => {
27886 match target {
27887 DialectType::Presto | DialectType::Trino => {
27888 // JSON_FORMAT(CAST(x AS JSON))
27889 let arg = args
27890 .into_iter()
27891 .next()
27892 .unwrap_or(Expression::Null(crate::expressions::Null));
27893 let cast_json = Expression::Cast(Box::new(Cast {
27894 this: arg,
27895 to: DataType::Custom {
27896 name: "JSON".to_string(),
27897 },
27898 trailing_comments: vec![],
27899 double_colon_syntax: false,
27900 format: None,
27901 default: None,
27902 }));
27903 Ok(Expression::Function(Box::new(Function::new(
27904 "JSON_FORMAT".to_string(),
27905 vec![cast_json],
27906 ))))
27907 }
27908 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
27909 "TO_JSON_STRING".to_string(),
27910 args,
27911 )))),
27912 DialectType::DuckDB => {
27913 // CAST(TO_JSON(x) AS TEXT)
27914 let arg = args
27915 .into_iter()
27916 .next()
27917 .unwrap_or(Expression::Null(crate::expressions::Null));
27918 let to_json = Expression::Function(Box::new(Function::new(
27919 "TO_JSON".to_string(),
27920 vec![arg],
27921 )));
27922 Ok(Expression::Cast(Box::new(Cast {
27923 this: to_json,
27924 to: DataType::Text,
27925 trailing_comments: vec![],
27926 double_colon_syntax: false,
27927 format: None,
27928 default: None,
27929 })))
27930 }
27931 _ => Ok(Expression::Function(Box::new(Function::new(
27932 "TO_JSON".to_string(),
27933 args,
27934 )))),
27935 }
27936 }
27937
27938 // TO_JSON_STRING(x) -> target-specific
27939 "TO_JSON_STRING" => {
27940 match target {
27941 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
27942 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
27943 ),
27944 DialectType::Presto | DialectType::Trino => {
27945 // JSON_FORMAT(CAST(x AS JSON))
27946 let arg = args
27947 .into_iter()
27948 .next()
27949 .unwrap_or(Expression::Null(crate::expressions::Null));
27950 let cast_json = Expression::Cast(Box::new(Cast {
27951 this: arg,
27952 to: DataType::Custom {
27953 name: "JSON".to_string(),
27954 },
27955 trailing_comments: vec![],
27956 double_colon_syntax: false,
27957 format: None,
27958 default: None,
27959 }));
27960 Ok(Expression::Function(Box::new(Function::new(
27961 "JSON_FORMAT".to_string(),
27962 vec![cast_json],
27963 ))))
27964 }
27965 DialectType::DuckDB => {
27966 // CAST(TO_JSON(x) AS TEXT)
27967 let arg = args
27968 .into_iter()
27969 .next()
27970 .unwrap_or(Expression::Null(crate::expressions::Null));
27971 let to_json = Expression::Function(Box::new(Function::new(
27972 "TO_JSON".to_string(),
27973 vec![arg],
27974 )));
27975 Ok(Expression::Cast(Box::new(Cast {
27976 this: to_json,
27977 to: DataType::Text,
27978 trailing_comments: vec![],
27979 double_colon_syntax: false,
27980 format: None,
27981 default: None,
27982 })))
27983 }
27984 DialectType::Snowflake => {
27985 // TO_JSON(x)
27986 Ok(Expression::Function(Box::new(Function::new(
27987 "TO_JSON".to_string(),
27988 args,
27989 ))))
27990 }
27991 _ => Ok(Expression::Function(Box::new(Function::new(
27992 "TO_JSON_STRING".to_string(),
27993 args,
27994 )))),
27995 }
27996 }
27997
27998 // SAFE_ADD(x, y) -> SafeAdd expression
27999 "SAFE_ADD" if args.len() == 2 => {
28000 let x = args.remove(0);
28001 let y = args.remove(0);
28002 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
28003 this: Box::new(x),
28004 expression: Box::new(y),
28005 })))
28006 }
28007
28008 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
28009 "SAFE_SUBTRACT" if args.len() == 2 => {
28010 let x = args.remove(0);
28011 let y = args.remove(0);
28012 Ok(Expression::SafeSubtract(Box::new(
28013 crate::expressions::SafeSubtract {
28014 this: Box::new(x),
28015 expression: Box::new(y),
28016 },
28017 )))
28018 }
28019
28020 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
28021 "SAFE_MULTIPLY" if args.len() == 2 => {
28022 let x = args.remove(0);
28023 let y = args.remove(0);
28024 Ok(Expression::SafeMultiply(Box::new(
28025 crate::expressions::SafeMultiply {
28026 this: Box::new(x),
28027 expression: Box::new(y),
28028 },
28029 )))
28030 }
28031
28032 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
28033 "REGEXP_CONTAINS" if args.len() == 2 => {
28034 let str_expr = args.remove(0);
28035 let pattern = args.remove(0);
28036 Ok(Expression::RegexpLike(Box::new(
28037 crate::expressions::RegexpFunc {
28038 this: str_expr,
28039 pattern,
28040 flags: None,
28041 },
28042 )))
28043 }
28044
28045 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28046 "CONTAINS_SUBSTR" if args.len() == 2 => {
28047 let a = args.remove(0);
28048 let b = args.remove(0);
28049 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28050 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28051 Ok(Expression::Function(Box::new(Function::new(
28052 "CONTAINS".to_string(),
28053 vec![lower_a, lower_b],
28054 ))))
28055 }
28056
28057 // INT64(x) -> CAST(x AS BIGINT)
28058 "INT64" if args.len() == 1 => {
28059 let arg = args.remove(0);
28060 Ok(Expression::Cast(Box::new(Cast {
28061 this: arg,
28062 to: DataType::BigInt { length: None },
28063 trailing_comments: vec![],
28064 double_colon_syntax: false,
28065 format: None,
28066 default: None,
28067 })))
28068 }
28069
28070 // INSTR(str, substr) -> target-specific
28071 "INSTR" if args.len() >= 2 => {
28072 let str_expr = args.remove(0);
28073 let substr = args.remove(0);
28074 if matches!(target, DialectType::Snowflake) {
28075 // CHARINDEX(substr, str)
28076 Ok(Expression::Function(Box::new(Function::new(
28077 "CHARINDEX".to_string(),
28078 vec![substr, str_expr],
28079 ))))
28080 } else if matches!(target, DialectType::BigQuery) {
28081 // Keep as INSTR
28082 Ok(Expression::Function(Box::new(Function::new(
28083 "INSTR".to_string(),
28084 vec![str_expr, substr],
28085 ))))
28086 } else {
28087 // Default: keep as INSTR
28088 Ok(Expression::Function(Box::new(Function::new(
28089 "INSTR".to_string(),
28090 vec![str_expr, substr],
28091 ))))
28092 }
28093 }
28094
28095 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28096 "DATE_TRUNC" if args.len() == 2 => {
28097 let expr = args.remove(0);
28098 let unit_expr = args.remove(0);
28099 let unit_str = get_unit_str(&unit_expr);
28100
28101 match target {
28102 DialectType::DuckDB
28103 | DialectType::Snowflake
28104 | DialectType::PostgreSQL
28105 | DialectType::Presto
28106 | DialectType::Trino
28107 | DialectType::Databricks
28108 | DialectType::Spark
28109 | DialectType::Redshift
28110 | DialectType::ClickHouse
28111 | DialectType::TSQL => {
28112 // Standard: DATE_TRUNC('UNIT', expr)
28113 Ok(Expression::Function(Box::new(Function::new(
28114 "DATE_TRUNC".to_string(),
28115 vec![Expression::Literal(Literal::String(unit_str)), expr],
28116 ))))
28117 }
28118 _ => {
28119 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28120 Ok(Expression::Function(Box::new(Function::new(
28121 "DATE_TRUNC".to_string(),
28122 vec![expr, unit_expr],
28123 ))))
28124 }
28125 }
28126 }
28127
28128 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28129 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28130 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28131 let ts = args.remove(0);
28132 let unit_expr = args.remove(0);
28133 let tz = if !args.is_empty() {
28134 Some(args.remove(0))
28135 } else {
28136 None
28137 };
28138 let unit_str = get_unit_str(&unit_expr);
28139
28140 match target {
28141 DialectType::DuckDB => {
28142 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28143 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28144 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28145 let is_coarse = matches!(
28146 unit_str.as_str(),
28147 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28148 );
28149 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28150 let cast_ts = if name == "DATETIME_TRUNC" {
28151 match ts {
28152 Expression::Literal(Literal::String(ref _s)) => {
28153 Expression::Cast(Box::new(Cast {
28154 this: ts,
28155 to: DataType::Timestamp {
28156 precision: None,
28157 timezone: false,
28158 },
28159 trailing_comments: vec![],
28160 double_colon_syntax: false,
28161 format: None,
28162 default: None,
28163 }))
28164 }
28165 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28166 }
28167 } else {
28168 Self::maybe_cast_ts_to_tz(ts, &name)
28169 };
28170
28171 if let Some(tz_arg) = tz {
28172 if is_coarse {
28173 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28174 let at_tz = Expression::AtTimeZone(Box::new(
28175 crate::expressions::AtTimeZone {
28176 this: cast_ts,
28177 zone: tz_arg.clone(),
28178 },
28179 ));
28180 let date_trunc = Expression::Function(Box::new(Function::new(
28181 "DATE_TRUNC".to_string(),
28182 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28183 )));
28184 Ok(Expression::AtTimeZone(Box::new(
28185 crate::expressions::AtTimeZone {
28186 this: date_trunc,
28187 zone: tz_arg,
28188 },
28189 )))
28190 } else {
28191 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28192 Ok(Expression::Function(Box::new(Function::new(
28193 "DATE_TRUNC".to_string(),
28194 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28195 ))))
28196 }
28197 } else {
28198 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28199 Ok(Expression::Function(Box::new(Function::new(
28200 "DATE_TRUNC".to_string(),
28201 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28202 ))))
28203 }
28204 }
28205 DialectType::Databricks | DialectType::Spark => {
28206 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28207 Ok(Expression::Function(Box::new(Function::new(
28208 "DATE_TRUNC".to_string(),
28209 vec![Expression::Literal(Literal::String(unit_str)), ts],
28210 ))))
28211 }
28212 _ => {
28213 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28214 let unit = Expression::Literal(Literal::String(unit_str));
28215 let mut date_trunc_args = vec![unit, ts];
28216 if let Some(tz_arg) = tz {
28217 date_trunc_args.push(tz_arg);
28218 }
28219 Ok(Expression::Function(Box::new(Function::new(
28220 "TIMESTAMP_TRUNC".to_string(),
28221 date_trunc_args,
28222 ))))
28223 }
28224 }
28225 }
28226
28227 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28228 "TIME" => {
28229 if args.len() == 3 {
28230 // TIME(h, m, s) constructor
28231 match target {
28232 DialectType::TSQL => {
28233 // TIMEFROMPARTS(h, m, s, 0, 0)
28234 args.push(Expression::number(0));
28235 args.push(Expression::number(0));
28236 Ok(Expression::Function(Box::new(Function::new(
28237 "TIMEFROMPARTS".to_string(),
28238 args,
28239 ))))
28240 }
28241 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28242 "MAKETIME".to_string(),
28243 args,
28244 )))),
28245 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28246 Function::new("MAKE_TIME".to_string(), args),
28247 ))),
28248 _ => Ok(Expression::Function(Box::new(Function::new(
28249 "TIME".to_string(),
28250 args,
28251 )))),
28252 }
28253 } else if args.len() == 1 {
28254 let arg = args.remove(0);
28255 if matches!(target, DialectType::Spark) {
28256 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28257 Ok(Expression::Cast(Box::new(Cast {
28258 this: arg,
28259 to: DataType::Timestamp {
28260 timezone: false,
28261 precision: None,
28262 },
28263 trailing_comments: vec![],
28264 double_colon_syntax: false,
28265 format: None,
28266 default: None,
28267 })))
28268 } else {
28269 // Most targets: CAST(x AS TIME)
28270 Ok(Expression::Cast(Box::new(Cast {
28271 this: arg,
28272 to: DataType::Time {
28273 precision: None,
28274 timezone: false,
28275 },
28276 trailing_comments: vec![],
28277 double_colon_syntax: false,
28278 format: None,
28279 default: None,
28280 })))
28281 }
28282 } else if args.len() == 2 {
28283 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28284 let expr = args.remove(0);
28285 let tz = args.remove(0);
28286 let cast_tstz = Expression::Cast(Box::new(Cast {
28287 this: expr,
28288 to: DataType::Timestamp {
28289 timezone: true,
28290 precision: None,
28291 },
28292 trailing_comments: vec![],
28293 double_colon_syntax: false,
28294 format: None,
28295 default: None,
28296 }));
28297 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28298 this: cast_tstz,
28299 zone: tz,
28300 }));
28301 Ok(Expression::Cast(Box::new(Cast {
28302 this: at_tz,
28303 to: DataType::Time {
28304 precision: None,
28305 timezone: false,
28306 },
28307 trailing_comments: vec![],
28308 double_colon_syntax: false,
28309 format: None,
28310 default: None,
28311 })))
28312 } else {
28313 Ok(Expression::Function(Box::new(Function::new(
28314 "TIME".to_string(),
28315 args,
28316 ))))
28317 }
28318 }
28319
28320 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28321 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28322 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28323 // DATETIME(y, m, d, h, min, s) -> target-specific
28324 "DATETIME" => {
28325 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28326 if matches!(target, DialectType::BigQuery) {
28327 if args.len() == 2 {
28328 let has_time_literal =
28329 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28330 if has_time_literal {
28331 let first = args.remove(0);
28332 let second = args.remove(0);
28333 let time_as_cast = match second {
28334 Expression::Literal(Literal::Time(s)) => {
28335 Expression::Cast(Box::new(Cast {
28336 this: Expression::Literal(Literal::String(s)),
28337 to: DataType::Time {
28338 precision: None,
28339 timezone: false,
28340 },
28341 trailing_comments: vec![],
28342 double_colon_syntax: false,
28343 format: None,
28344 default: None,
28345 }))
28346 }
28347 other => other,
28348 };
28349 return Ok(Expression::Function(Box::new(Function::new(
28350 "DATETIME".to_string(),
28351 vec![first, time_as_cast],
28352 ))));
28353 }
28354 }
28355 return Ok(Expression::Function(Box::new(Function::new(
28356 "DATETIME".to_string(),
28357 args,
28358 ))));
28359 }
28360
28361 if args.len() == 1 {
28362 let arg = args.remove(0);
28363 Ok(Expression::Cast(Box::new(Cast {
28364 this: arg,
28365 to: DataType::Timestamp {
28366 timezone: false,
28367 precision: None,
28368 },
28369 trailing_comments: vec![],
28370 double_colon_syntax: false,
28371 format: None,
28372 default: None,
28373 })))
28374 } else if args.len() == 2 {
28375 let first = args.remove(0);
28376 let second = args.remove(0);
28377 // Check if second arg is a TIME literal
28378 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28379 if is_time_literal {
28380 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28381 let cast_date = Expression::Cast(Box::new(Cast {
28382 this: first,
28383 to: DataType::Date,
28384 trailing_comments: vec![],
28385 double_colon_syntax: false,
28386 format: None,
28387 default: None,
28388 }));
28389 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28390 let time_as_string = match second {
28391 Expression::Literal(Literal::Time(s)) => {
28392 Expression::Literal(Literal::String(s))
28393 }
28394 other => other,
28395 };
28396 let cast_time = Expression::Cast(Box::new(Cast {
28397 this: time_as_string,
28398 to: DataType::Time {
28399 precision: None,
28400 timezone: false,
28401 },
28402 trailing_comments: vec![],
28403 double_colon_syntax: false,
28404 format: None,
28405 default: None,
28406 }));
28407 let add_expr =
28408 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28409 Ok(Expression::Cast(Box::new(Cast {
28410 this: add_expr,
28411 to: DataType::Timestamp {
28412 timezone: false,
28413 precision: None,
28414 },
28415 trailing_comments: vec![],
28416 double_colon_syntax: false,
28417 format: None,
28418 default: None,
28419 })))
28420 } else {
28421 // DATETIME('string', 'timezone')
28422 let cast_tstz = Expression::Cast(Box::new(Cast {
28423 this: first,
28424 to: DataType::Timestamp {
28425 timezone: true,
28426 precision: None,
28427 },
28428 trailing_comments: vec![],
28429 double_colon_syntax: false,
28430 format: None,
28431 default: None,
28432 }));
28433 let at_tz =
28434 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28435 this: cast_tstz,
28436 zone: second,
28437 }));
28438 Ok(Expression::Cast(Box::new(Cast {
28439 this: at_tz,
28440 to: DataType::Timestamp {
28441 timezone: false,
28442 precision: None,
28443 },
28444 trailing_comments: vec![],
28445 double_colon_syntax: false,
28446 format: None,
28447 default: None,
28448 })))
28449 }
28450 } else if args.len() >= 3 {
28451 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28452 // For other targets, use MAKE_TIMESTAMP or similar
28453 if matches!(target, DialectType::Snowflake) {
28454 Ok(Expression::Function(Box::new(Function::new(
28455 "TIMESTAMP_FROM_PARTS".to_string(),
28456 args,
28457 ))))
28458 } else {
28459 Ok(Expression::Function(Box::new(Function::new(
28460 "DATETIME".to_string(),
28461 args,
28462 ))))
28463 }
28464 } else {
28465 Ok(Expression::Function(Box::new(Function::new(
28466 "DATETIME".to_string(),
28467 args,
28468 ))))
28469 }
28470 }
28471
28472 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28473 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28474 "TIMESTAMP" => {
28475 if args.len() == 1 {
28476 let arg = args.remove(0);
28477 Ok(Expression::Cast(Box::new(Cast {
28478 this: arg,
28479 to: DataType::Timestamp {
28480 timezone: true,
28481 precision: None,
28482 },
28483 trailing_comments: vec![],
28484 double_colon_syntax: false,
28485 format: None,
28486 default: None,
28487 })))
28488 } else if args.len() == 2 {
28489 let arg = args.remove(0);
28490 let tz = args.remove(0);
28491 let cast_ts = Expression::Cast(Box::new(Cast {
28492 this: arg,
28493 to: DataType::Timestamp {
28494 timezone: false,
28495 precision: None,
28496 },
28497 trailing_comments: vec![],
28498 double_colon_syntax: false,
28499 format: None,
28500 default: None,
28501 }));
28502 if matches!(target, DialectType::Snowflake) {
28503 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28504 Ok(Expression::Function(Box::new(Function::new(
28505 "CONVERT_TIMEZONE".to_string(),
28506 vec![tz, cast_ts],
28507 ))))
28508 } else {
28509 Ok(Expression::AtTimeZone(Box::new(
28510 crate::expressions::AtTimeZone {
28511 this: cast_ts,
28512 zone: tz,
28513 },
28514 )))
28515 }
28516 } else {
28517 Ok(Expression::Function(Box::new(Function::new(
28518 "TIMESTAMP".to_string(),
28519 args,
28520 ))))
28521 }
28522 }
28523
28524 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28525 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28526 "STRING" => {
28527 if args.len() == 1 {
28528 let arg = args.remove(0);
28529 let cast_type = match target {
28530 DialectType::DuckDB => DataType::Text,
28531 _ => DataType::VarChar {
28532 length: None,
28533 parenthesized_length: false,
28534 },
28535 };
28536 Ok(Expression::Cast(Box::new(Cast {
28537 this: arg,
28538 to: cast_type,
28539 trailing_comments: vec![],
28540 double_colon_syntax: false,
28541 format: None,
28542 default: None,
28543 })))
28544 } else if args.len() == 2 {
28545 let arg = args.remove(0);
28546 let tz = args.remove(0);
28547 let cast_type = match target {
28548 DialectType::DuckDB => DataType::Text,
28549 _ => DataType::VarChar {
28550 length: None,
28551 parenthesized_length: false,
28552 },
28553 };
28554 if matches!(target, DialectType::Snowflake) {
28555 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
28556 let convert_tz = Expression::Function(Box::new(Function::new(
28557 "CONVERT_TIMEZONE".to_string(),
28558 vec![
28559 Expression::Literal(Literal::String("UTC".to_string())),
28560 tz,
28561 arg,
28562 ],
28563 )));
28564 Ok(Expression::Cast(Box::new(Cast {
28565 this: convert_tz,
28566 to: cast_type,
28567 trailing_comments: vec![],
28568 double_colon_syntax: false,
28569 format: None,
28570 default: None,
28571 })))
28572 } else {
28573 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
28574 let cast_ts = Expression::Cast(Box::new(Cast {
28575 this: arg,
28576 to: DataType::Timestamp {
28577 timezone: false,
28578 precision: None,
28579 },
28580 trailing_comments: vec![],
28581 double_colon_syntax: false,
28582 format: None,
28583 default: None,
28584 }));
28585 let at_utc =
28586 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28587 this: cast_ts,
28588 zone: Expression::Literal(Literal::String("UTC".to_string())),
28589 }));
28590 let at_tz =
28591 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28592 this: at_utc,
28593 zone: tz,
28594 }));
28595 Ok(Expression::Cast(Box::new(Cast {
28596 this: at_tz,
28597 to: cast_type,
28598 trailing_comments: vec![],
28599 double_colon_syntax: false,
28600 format: None,
28601 default: None,
28602 })))
28603 }
28604 } else {
28605 Ok(Expression::Function(Box::new(Function::new(
28606 "STRING".to_string(),
28607 args,
28608 ))))
28609 }
28610 }
28611
28612 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
28613 "UNIX_SECONDS" if args.len() == 1 => {
28614 let ts = args.remove(0);
28615 match target {
28616 DialectType::DuckDB => {
28617 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
28618 let cast_ts = Self::ensure_cast_timestamptz(ts);
28619 let epoch = Expression::Function(Box::new(Function::new(
28620 "EPOCH".to_string(),
28621 vec![cast_ts],
28622 )));
28623 Ok(Expression::Cast(Box::new(Cast {
28624 this: epoch,
28625 to: DataType::BigInt { length: None },
28626 trailing_comments: vec![],
28627 double_colon_syntax: false,
28628 format: None,
28629 default: None,
28630 })))
28631 }
28632 DialectType::Snowflake => {
28633 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
28634 let epoch = Expression::Cast(Box::new(Cast {
28635 this: Expression::Literal(Literal::String(
28636 "1970-01-01 00:00:00+00".to_string(),
28637 )),
28638 to: DataType::Timestamp {
28639 timezone: true,
28640 precision: None,
28641 },
28642 trailing_comments: vec![],
28643 double_colon_syntax: false,
28644 format: None,
28645 default: None,
28646 }));
28647 Ok(Expression::TimestampDiff(Box::new(
28648 crate::expressions::TimestampDiff {
28649 this: Box::new(epoch),
28650 expression: Box::new(ts),
28651 unit: Some("SECONDS".to_string()),
28652 },
28653 )))
28654 }
28655 _ => Ok(Expression::Function(Box::new(Function::new(
28656 "UNIX_SECONDS".to_string(),
28657 vec![ts],
28658 )))),
28659 }
28660 }
28661
28662 "UNIX_MILLIS" if args.len() == 1 => {
28663 let ts = args.remove(0);
28664 match target {
28665 DialectType::DuckDB => {
28666 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
28667 let cast_ts = Self::ensure_cast_timestamptz(ts);
28668 Ok(Expression::Function(Box::new(Function::new(
28669 "EPOCH_MS".to_string(),
28670 vec![cast_ts],
28671 ))))
28672 }
28673 _ => Ok(Expression::Function(Box::new(Function::new(
28674 "UNIX_MILLIS".to_string(),
28675 vec![ts],
28676 )))),
28677 }
28678 }
28679
28680 "UNIX_MICROS" if args.len() == 1 => {
28681 let ts = args.remove(0);
28682 match target {
28683 DialectType::DuckDB => {
28684 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
28685 let cast_ts = Self::ensure_cast_timestamptz(ts);
28686 Ok(Expression::Function(Box::new(Function::new(
28687 "EPOCH_US".to_string(),
28688 vec![cast_ts],
28689 ))))
28690 }
28691 _ => Ok(Expression::Function(Box::new(Function::new(
28692 "UNIX_MICROS".to_string(),
28693 vec![ts],
28694 )))),
28695 }
28696 }
28697
28698 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
28699 "ARRAY_CONCAT" | "LIST_CONCAT" => {
28700 match target {
28701 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28702 // CONCAT(arr1, arr2, ...)
28703 Ok(Expression::Function(Box::new(Function::new(
28704 "CONCAT".to_string(),
28705 args,
28706 ))))
28707 }
28708 DialectType::Presto | DialectType::Trino => {
28709 // CONCAT(arr1, arr2, ...)
28710 Ok(Expression::Function(Box::new(Function::new(
28711 "CONCAT".to_string(),
28712 args,
28713 ))))
28714 }
28715 DialectType::Snowflake => {
28716 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28717 if args.len() == 1 {
28718 // ARRAY_CAT requires 2 args, add empty array as []
28719 let empty_arr = Expression::ArrayFunc(Box::new(
28720 crate::expressions::ArrayConstructor {
28721 expressions: vec![],
28722 bracket_notation: true,
28723 use_list_keyword: false,
28724 },
28725 ));
28726 let mut new_args = args;
28727 new_args.push(empty_arr);
28728 Ok(Expression::Function(Box::new(Function::new(
28729 "ARRAY_CAT".to_string(),
28730 new_args,
28731 ))))
28732 } else if args.is_empty() {
28733 Ok(Expression::Function(Box::new(Function::new(
28734 "ARRAY_CAT".to_string(),
28735 args,
28736 ))))
28737 } else {
28738 let mut it = args.into_iter().rev();
28739 let mut result = it.next().unwrap();
28740 for arr in it {
28741 result = Expression::Function(Box::new(Function::new(
28742 "ARRAY_CAT".to_string(),
28743 vec![arr, result],
28744 )));
28745 }
28746 Ok(result)
28747 }
28748 }
28749 DialectType::PostgreSQL => {
28750 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28751 if args.len() <= 1 {
28752 Ok(Expression::Function(Box::new(Function::new(
28753 "ARRAY_CAT".to_string(),
28754 args,
28755 ))))
28756 } else {
28757 let mut it = args.into_iter().rev();
28758 let mut result = it.next().unwrap();
28759 for arr in it {
28760 result = Expression::Function(Box::new(Function::new(
28761 "ARRAY_CAT".to_string(),
28762 vec![arr, result],
28763 )));
28764 }
28765 Ok(result)
28766 }
28767 }
28768 DialectType::Redshift => {
28769 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
28770 if args.len() <= 2 {
28771 Ok(Expression::Function(Box::new(Function::new(
28772 "ARRAY_CONCAT".to_string(),
28773 args,
28774 ))))
28775 } else {
28776 let mut it = args.into_iter().rev();
28777 let mut result = it.next().unwrap();
28778 for arr in it {
28779 result = Expression::Function(Box::new(Function::new(
28780 "ARRAY_CONCAT".to_string(),
28781 vec![arr, result],
28782 )));
28783 }
28784 Ok(result)
28785 }
28786 }
28787 DialectType::DuckDB => {
28788 // LIST_CONCAT supports multiple args natively in DuckDB
28789 Ok(Expression::Function(Box::new(Function::new(
28790 "LIST_CONCAT".to_string(),
28791 args,
28792 ))))
28793 }
28794 _ => Ok(Expression::Function(Box::new(Function::new(
28795 "ARRAY_CONCAT".to_string(),
28796 args,
28797 )))),
28798 }
28799 }
28800
28801 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
28802 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
28803 let arg = args.remove(0);
28804 match target {
28805 DialectType::Snowflake => {
28806 let array_agg =
28807 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
28808 this: arg,
28809 distinct: false,
28810 filter: None,
28811 order_by: vec![],
28812 name: None,
28813 ignore_nulls: None,
28814 having_max: None,
28815 limit: None,
28816 }));
28817 Ok(Expression::Function(Box::new(Function::new(
28818 "ARRAY_FLATTEN".to_string(),
28819 vec![array_agg],
28820 ))))
28821 }
28822 _ => Ok(Expression::Function(Box::new(Function::new(
28823 "ARRAY_CONCAT_AGG".to_string(),
28824 vec![arg],
28825 )))),
28826 }
28827 }
28828
28829 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
28830 "MD5" if args.len() == 1 => {
28831 let arg = args.remove(0);
28832 match target {
28833 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28834 // UNHEX(MD5(x))
28835 let md5 = Expression::Function(Box::new(Function::new(
28836 "MD5".to_string(),
28837 vec![arg],
28838 )));
28839 Ok(Expression::Function(Box::new(Function::new(
28840 "UNHEX".to_string(),
28841 vec![md5],
28842 ))))
28843 }
28844 DialectType::Snowflake => {
28845 // MD5_BINARY(x)
28846 Ok(Expression::Function(Box::new(Function::new(
28847 "MD5_BINARY".to_string(),
28848 vec![arg],
28849 ))))
28850 }
28851 _ => Ok(Expression::Function(Box::new(Function::new(
28852 "MD5".to_string(),
28853 vec![arg],
28854 )))),
28855 }
28856 }
28857
28858 "SHA1" if args.len() == 1 => {
28859 let arg = args.remove(0);
28860 match target {
28861 DialectType::DuckDB => {
28862 // UNHEX(SHA1(x))
28863 let sha1 = Expression::Function(Box::new(Function::new(
28864 "SHA1".to_string(),
28865 vec![arg],
28866 )));
28867 Ok(Expression::Function(Box::new(Function::new(
28868 "UNHEX".to_string(),
28869 vec![sha1],
28870 ))))
28871 }
28872 _ => Ok(Expression::Function(Box::new(Function::new(
28873 "SHA1".to_string(),
28874 vec![arg],
28875 )))),
28876 }
28877 }
28878
28879 "SHA256" if args.len() == 1 => {
28880 let arg = args.remove(0);
28881 match target {
28882 DialectType::DuckDB => {
28883 // UNHEX(SHA256(x))
28884 let sha = Expression::Function(Box::new(Function::new(
28885 "SHA256".to_string(),
28886 vec![arg],
28887 )));
28888 Ok(Expression::Function(Box::new(Function::new(
28889 "UNHEX".to_string(),
28890 vec![sha],
28891 ))))
28892 }
28893 DialectType::Snowflake => {
28894 // SHA2_BINARY(x, 256)
28895 Ok(Expression::Function(Box::new(Function::new(
28896 "SHA2_BINARY".to_string(),
28897 vec![arg, Expression::number(256)],
28898 ))))
28899 }
28900 DialectType::Redshift | DialectType::Spark => {
28901 // SHA2(x, 256)
28902 Ok(Expression::Function(Box::new(Function::new(
28903 "SHA2".to_string(),
28904 vec![arg, Expression::number(256)],
28905 ))))
28906 }
28907 _ => Ok(Expression::Function(Box::new(Function::new(
28908 "SHA256".to_string(),
28909 vec![arg],
28910 )))),
28911 }
28912 }
28913
28914 "SHA512" if args.len() == 1 => {
28915 let arg = args.remove(0);
28916 match target {
28917 DialectType::Snowflake => {
28918 // SHA2_BINARY(x, 512)
28919 Ok(Expression::Function(Box::new(Function::new(
28920 "SHA2_BINARY".to_string(),
28921 vec![arg, Expression::number(512)],
28922 ))))
28923 }
28924 DialectType::Redshift | DialectType::Spark => {
28925 // SHA2(x, 512)
28926 Ok(Expression::Function(Box::new(Function::new(
28927 "SHA2".to_string(),
28928 vec![arg, Expression::number(512)],
28929 ))))
28930 }
28931 _ => Ok(Expression::Function(Box::new(Function::new(
28932 "SHA512".to_string(),
28933 vec![arg],
28934 )))),
28935 }
28936 }
28937
28938 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
28939 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
28940 let str_expr = args.remove(0);
28941 let pattern = args.remove(0);
28942
28943 // Check if pattern contains capturing groups (parentheses)
28944 let has_groups = match &pattern {
28945 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
28946 _ => false,
28947 };
28948
28949 match target {
28950 DialectType::DuckDB => {
28951 let group = if has_groups {
28952 Expression::number(1)
28953 } else {
28954 Expression::number(0)
28955 };
28956 Ok(Expression::Function(Box::new(Function::new(
28957 "REGEXP_EXTRACT_ALL".to_string(),
28958 vec![str_expr, pattern, group],
28959 ))))
28960 }
28961 DialectType::Spark | DialectType::Databricks => {
28962 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
28963 if has_groups {
28964 Ok(Expression::Function(Box::new(Function::new(
28965 "REGEXP_EXTRACT_ALL".to_string(),
28966 vec![str_expr, pattern],
28967 ))))
28968 } else {
28969 Ok(Expression::Function(Box::new(Function::new(
28970 "REGEXP_EXTRACT_ALL".to_string(),
28971 vec![str_expr, pattern, Expression::number(0)],
28972 ))))
28973 }
28974 }
28975 DialectType::Presto | DialectType::Trino => {
28976 if has_groups {
28977 Ok(Expression::Function(Box::new(Function::new(
28978 "REGEXP_EXTRACT_ALL".to_string(),
28979 vec![str_expr, pattern, Expression::number(1)],
28980 ))))
28981 } else {
28982 Ok(Expression::Function(Box::new(Function::new(
28983 "REGEXP_EXTRACT_ALL".to_string(),
28984 vec![str_expr, pattern],
28985 ))))
28986 }
28987 }
28988 DialectType::Snowflake => {
28989 if has_groups {
28990 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
28991 Ok(Expression::Function(Box::new(Function::new(
28992 "REGEXP_EXTRACT_ALL".to_string(),
28993 vec![
28994 str_expr,
28995 pattern,
28996 Expression::number(1),
28997 Expression::number(1),
28998 Expression::Literal(Literal::String("c".to_string())),
28999 Expression::number(1),
29000 ],
29001 ))))
29002 } else {
29003 Ok(Expression::Function(Box::new(Function::new(
29004 "REGEXP_EXTRACT_ALL".to_string(),
29005 vec![str_expr, pattern],
29006 ))))
29007 }
29008 }
29009 _ => Ok(Expression::Function(Box::new(Function::new(
29010 "REGEXP_EXTRACT_ALL".to_string(),
29011 vec![str_expr, pattern],
29012 )))),
29013 }
29014 }
29015
29016 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
29017 "MOD" if args.len() == 2 => {
29018 match target {
29019 DialectType::PostgreSQL
29020 | DialectType::DuckDB
29021 | DialectType::Presto
29022 | DialectType::Trino
29023 | DialectType::Athena
29024 | DialectType::Snowflake => {
29025 let x = args.remove(0);
29026 let y = args.remove(0);
29027 // Wrap complex expressions in parens to preserve precedence
29028 let needs_paren = |e: &Expression| {
29029 matches!(
29030 e,
29031 Expression::Add(_)
29032 | Expression::Sub(_)
29033 | Expression::Mul(_)
29034 | Expression::Div(_)
29035 )
29036 };
29037 let x = if needs_paren(&x) {
29038 Expression::Paren(Box::new(crate::expressions::Paren {
29039 this: x,
29040 trailing_comments: vec![],
29041 }))
29042 } else {
29043 x
29044 };
29045 let y = if needs_paren(&y) {
29046 Expression::Paren(Box::new(crate::expressions::Paren {
29047 this: y,
29048 trailing_comments: vec![],
29049 }))
29050 } else {
29051 y
29052 };
29053 Ok(Expression::Mod(Box::new(
29054 crate::expressions::BinaryOp::new(x, y),
29055 )))
29056 }
29057 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29058 // Hive/Spark: a % b
29059 let x = args.remove(0);
29060 let y = args.remove(0);
29061 let needs_paren = |e: &Expression| {
29062 matches!(
29063 e,
29064 Expression::Add(_)
29065 | Expression::Sub(_)
29066 | Expression::Mul(_)
29067 | Expression::Div(_)
29068 )
29069 };
29070 let x = if needs_paren(&x) {
29071 Expression::Paren(Box::new(crate::expressions::Paren {
29072 this: x,
29073 trailing_comments: vec![],
29074 }))
29075 } else {
29076 x
29077 };
29078 let y = if needs_paren(&y) {
29079 Expression::Paren(Box::new(crate::expressions::Paren {
29080 this: y,
29081 trailing_comments: vec![],
29082 }))
29083 } else {
29084 y
29085 };
29086 Ok(Expression::Mod(Box::new(
29087 crate::expressions::BinaryOp::new(x, y),
29088 )))
29089 }
29090 _ => Ok(Expression::Function(Box::new(Function::new(
29091 "MOD".to_string(),
29092 args,
29093 )))),
29094 }
29095 }
29096
29097 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29098 "ARRAY_FILTER" if args.len() == 2 => {
29099 let name = match target {
29100 DialectType::DuckDB => "LIST_FILTER",
29101 DialectType::StarRocks => "ARRAY_FILTER",
29102 _ => "FILTER",
29103 };
29104 Ok(Expression::Function(Box::new(Function::new(
29105 name.to_string(),
29106 args,
29107 ))))
29108 }
29109 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29110 "FILTER" if args.len() == 2 => {
29111 let name = match target {
29112 DialectType::DuckDB => "LIST_FILTER",
29113 DialectType::StarRocks => "ARRAY_FILTER",
29114 _ => "FILTER",
29115 };
29116 Ok(Expression::Function(Box::new(Function::new(
29117 name.to_string(),
29118 args,
29119 ))))
29120 }
29121 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29122 "REDUCE" if args.len() >= 3 => {
29123 let name = match target {
29124 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29125 _ => "REDUCE",
29126 };
29127 Ok(Expression::Function(Box::new(Function::new(
29128 name.to_string(),
29129 args,
29130 ))))
29131 }
29132 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29133 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29134 Function::new("ARRAY_REVERSE".to_string(), args),
29135 ))),
29136
29137 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29138 "CONCAT" if args.len() > 2 => match target {
29139 DialectType::DuckDB => {
29140 let mut it = args.into_iter();
29141 let mut result = it.next().unwrap();
29142 for arg in it {
29143 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29144 this: Box::new(result),
29145 expression: Box::new(arg),
29146 safe: None,
29147 }));
29148 }
29149 Ok(result)
29150 }
29151 _ => Ok(Expression::Function(Box::new(Function::new(
29152 "CONCAT".to_string(),
29153 args,
29154 )))),
29155 },
29156
29157 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29158 "GENERATE_DATE_ARRAY" => {
29159 if matches!(target, DialectType::BigQuery) {
29160 // BQ->BQ: add default interval if not present
29161 if args.len() == 2 {
29162 let start = args.remove(0);
29163 let end = args.remove(0);
29164 let default_interval =
29165 Expression::Interval(Box::new(crate::expressions::Interval {
29166 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29167 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29168 unit: crate::expressions::IntervalUnit::Day,
29169 use_plural: false,
29170 }),
29171 }));
29172 Ok(Expression::Function(Box::new(Function::new(
29173 "GENERATE_DATE_ARRAY".to_string(),
29174 vec![start, end, default_interval],
29175 ))))
29176 } else {
29177 Ok(Expression::Function(Box::new(Function::new(
29178 "GENERATE_DATE_ARRAY".to_string(),
29179 args,
29180 ))))
29181 }
29182 } else if matches!(target, DialectType::DuckDB) {
29183 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29184 let start = args.get(0).cloned();
29185 let end = args.get(1).cloned();
29186 let step = args.get(2).cloned().or_else(|| {
29187 Some(Expression::Interval(Box::new(
29188 crate::expressions::Interval {
29189 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29190 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29191 unit: crate::expressions::IntervalUnit::Day,
29192 use_plural: false,
29193 }),
29194 },
29195 )))
29196 });
29197
29198 // Wrap start/end in CAST(... AS DATE) only for string literals
29199 let maybe_cast_date = |expr: Expression| -> Expression {
29200 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29201 Expression::Cast(Box::new(Cast {
29202 this: expr,
29203 to: DataType::Date,
29204 trailing_comments: vec![],
29205 double_colon_syntax: false,
29206 format: None,
29207 default: None,
29208 }))
29209 } else {
29210 expr
29211 }
29212 };
29213 let cast_start = start.map(maybe_cast_date);
29214 let cast_end = end.map(maybe_cast_date);
29215
29216 let gen_series =
29217 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29218 start: cast_start.map(Box::new),
29219 end: cast_end.map(Box::new),
29220 step: step.map(Box::new),
29221 is_end_exclusive: None,
29222 }));
29223
29224 // Wrap in CAST(... AS DATE[])
29225 Ok(Expression::Cast(Box::new(Cast {
29226 this: gen_series,
29227 to: DataType::Array {
29228 element_type: Box::new(DataType::Date),
29229 dimension: None,
29230 },
29231 trailing_comments: vec![],
29232 double_colon_syntax: false,
29233 format: None,
29234 default: None,
29235 })))
29236 } else if matches!(target, DialectType::Snowflake) {
29237 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29238 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29239 if args.len() == 2 {
29240 let start = args.remove(0);
29241 let end = args.remove(0);
29242 let default_interval =
29243 Expression::Interval(Box::new(crate::expressions::Interval {
29244 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29245 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29246 unit: crate::expressions::IntervalUnit::Day,
29247 use_plural: false,
29248 }),
29249 }));
29250 Ok(Expression::Function(Box::new(Function::new(
29251 "GENERATE_DATE_ARRAY".to_string(),
29252 vec![start, end, default_interval],
29253 ))))
29254 } else {
29255 Ok(Expression::Function(Box::new(Function::new(
29256 "GENERATE_DATE_ARRAY".to_string(),
29257 args,
29258 ))))
29259 }
29260 } else {
29261 // Convert to GenerateSeries for other targets
29262 let start = args.get(0).cloned();
29263 let end = args.get(1).cloned();
29264 let step = args.get(2).cloned().or_else(|| {
29265 Some(Expression::Interval(Box::new(
29266 crate::expressions::Interval {
29267 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29268 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29269 unit: crate::expressions::IntervalUnit::Day,
29270 use_plural: false,
29271 }),
29272 },
29273 )))
29274 });
29275 Ok(Expression::GenerateSeries(Box::new(
29276 crate::expressions::GenerateSeries {
29277 start: start.map(Box::new),
29278 end: end.map(Box::new),
29279 step: step.map(Box::new),
29280 is_end_exclusive: None,
29281 },
29282 )))
29283 }
29284 }
29285
29286 // PARSE_DATE(format, str) -> target-specific
29287 "PARSE_DATE" if args.len() == 2 => {
29288 let format = args.remove(0);
29289 let str_expr = args.remove(0);
29290 match target {
29291 DialectType::DuckDB => {
29292 // CAST(STRPTIME(str, duck_format) AS DATE)
29293 let duck_format = Self::bq_format_to_duckdb(&format);
29294 let strptime = Expression::Function(Box::new(Function::new(
29295 "STRPTIME".to_string(),
29296 vec![str_expr, duck_format],
29297 )));
29298 Ok(Expression::Cast(Box::new(Cast {
29299 this: strptime,
29300 to: DataType::Date,
29301 trailing_comments: vec![],
29302 double_colon_syntax: false,
29303 format: None,
29304 default: None,
29305 })))
29306 }
29307 DialectType::Snowflake => {
29308 // _POLYGLOT_DATE(str, snowflake_format)
29309 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29310 let sf_format = Self::bq_format_to_snowflake(&format);
29311 Ok(Expression::Function(Box::new(Function::new(
29312 "_POLYGLOT_DATE".to_string(),
29313 vec![str_expr, sf_format],
29314 ))))
29315 }
29316 _ => Ok(Expression::Function(Box::new(Function::new(
29317 "PARSE_DATE".to_string(),
29318 vec![format, str_expr],
29319 )))),
29320 }
29321 }
29322
29323 // PARSE_TIMESTAMP(format, str) -> target-specific
29324 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29325 let format = args.remove(0);
29326 let str_expr = args.remove(0);
29327 let tz = if !args.is_empty() {
29328 Some(args.remove(0))
29329 } else {
29330 None
29331 };
29332 match target {
29333 DialectType::DuckDB => {
29334 let duck_format = Self::bq_format_to_duckdb(&format);
29335 let strptime = Expression::Function(Box::new(Function::new(
29336 "STRPTIME".to_string(),
29337 vec![str_expr, duck_format],
29338 )));
29339 Ok(strptime)
29340 }
29341 _ => {
29342 let mut result_args = vec![format, str_expr];
29343 if let Some(tz_arg) = tz {
29344 result_args.push(tz_arg);
29345 }
29346 Ok(Expression::Function(Box::new(Function::new(
29347 "PARSE_TIMESTAMP".to_string(),
29348 result_args,
29349 ))))
29350 }
29351 }
29352 }
29353
29354 // FORMAT_DATE(format, date) -> target-specific
29355 "FORMAT_DATE" if args.len() == 2 => {
29356 let format = args.remove(0);
29357 let date_expr = args.remove(0);
29358 match target {
29359 DialectType::DuckDB => {
29360 // STRFTIME(CAST(date AS DATE), format)
29361 let cast_date = Expression::Cast(Box::new(Cast {
29362 this: date_expr,
29363 to: DataType::Date,
29364 trailing_comments: vec![],
29365 double_colon_syntax: false,
29366 format: None,
29367 default: None,
29368 }));
29369 Ok(Expression::Function(Box::new(Function::new(
29370 "STRFTIME".to_string(),
29371 vec![cast_date, format],
29372 ))))
29373 }
29374 _ => Ok(Expression::Function(Box::new(Function::new(
29375 "FORMAT_DATE".to_string(),
29376 vec![format, date_expr],
29377 )))),
29378 }
29379 }
29380
29381 // FORMAT_DATETIME(format, datetime) -> target-specific
29382 "FORMAT_DATETIME" if args.len() == 2 => {
29383 let format = args.remove(0);
29384 let dt_expr = args.remove(0);
29385
29386 if matches!(target, DialectType::BigQuery) {
29387 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29388 let norm_format = Self::bq_format_normalize_bq(&format);
29389 // Also strip DATETIME keyword from typed literals
29390 let norm_dt = match dt_expr {
29391 Expression::Literal(Literal::Timestamp(s)) => {
29392 Expression::Cast(Box::new(Cast {
29393 this: Expression::Literal(Literal::String(s)),
29394 to: DataType::Custom {
29395 name: "DATETIME".to_string(),
29396 },
29397 trailing_comments: vec![],
29398 double_colon_syntax: false,
29399 format: None,
29400 default: None,
29401 }))
29402 }
29403 other => other,
29404 };
29405 return Ok(Expression::Function(Box::new(Function::new(
29406 "FORMAT_DATETIME".to_string(),
29407 vec![norm_format, norm_dt],
29408 ))));
29409 }
29410
29411 match target {
29412 DialectType::DuckDB => {
29413 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29414 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29415 let duck_format = Self::bq_format_to_duckdb(&format);
29416 Ok(Expression::Function(Box::new(Function::new(
29417 "STRFTIME".to_string(),
29418 vec![cast_dt, duck_format],
29419 ))))
29420 }
29421 _ => Ok(Expression::Function(Box::new(Function::new(
29422 "FORMAT_DATETIME".to_string(),
29423 vec![format, dt_expr],
29424 )))),
29425 }
29426 }
29427
29428 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29429 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29430 let format = args.remove(0);
29431 let ts_expr = args.remove(0);
29432 match target {
29433 DialectType::DuckDB => {
29434 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29435 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29436 let cast_ts = Expression::Cast(Box::new(Cast {
29437 this: cast_tstz,
29438 to: DataType::Timestamp {
29439 timezone: false,
29440 precision: None,
29441 },
29442 trailing_comments: vec![],
29443 double_colon_syntax: false,
29444 format: None,
29445 default: None,
29446 }));
29447 Ok(Expression::Function(Box::new(Function::new(
29448 "STRFTIME".to_string(),
29449 vec![cast_ts, format],
29450 ))))
29451 }
29452 DialectType::Snowflake => {
29453 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29454 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29455 let cast_ts = Expression::Cast(Box::new(Cast {
29456 this: cast_tstz,
29457 to: DataType::Timestamp {
29458 timezone: false,
29459 precision: None,
29460 },
29461 trailing_comments: vec![],
29462 double_colon_syntax: false,
29463 format: None,
29464 default: None,
29465 }));
29466 let sf_format = Self::bq_format_to_snowflake(&format);
29467 Ok(Expression::Function(Box::new(Function::new(
29468 "TO_CHAR".to_string(),
29469 vec![cast_ts, sf_format],
29470 ))))
29471 }
29472 _ => Ok(Expression::Function(Box::new(Function::new(
29473 "FORMAT_TIMESTAMP".to_string(),
29474 vec![format, ts_expr],
29475 )))),
29476 }
29477 }
29478
29479 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29480 "UNIX_DATE" if args.len() == 1 => {
29481 let date = args.remove(0);
29482 match target {
29483 DialectType::DuckDB => {
29484 let epoch = Expression::Cast(Box::new(Cast {
29485 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29486 to: DataType::Date,
29487 trailing_comments: vec![],
29488 double_colon_syntax: false,
29489 format: None,
29490 default: None,
29491 }));
29492 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29493 // Need to convert DATE literal to CAST
29494 let norm_date = Self::date_literal_to_cast(date);
29495 Ok(Expression::Function(Box::new(Function::new(
29496 "DATE_DIFF".to_string(),
29497 vec![
29498 Expression::Literal(Literal::String("DAY".to_string())),
29499 epoch,
29500 norm_date,
29501 ],
29502 ))))
29503 }
29504 _ => Ok(Expression::Function(Box::new(Function::new(
29505 "UNIX_DATE".to_string(),
29506 vec![date],
29507 )))),
29508 }
29509 }
29510
29511 // UNIX_SECONDS(ts) -> target-specific
29512 "UNIX_SECONDS" if args.len() == 1 => {
29513 let ts = args.remove(0);
29514 match target {
29515 DialectType::DuckDB => {
29516 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29517 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29518 let epoch = Expression::Function(Box::new(Function::new(
29519 "EPOCH".to_string(),
29520 vec![norm_ts],
29521 )));
29522 Ok(Expression::Cast(Box::new(Cast {
29523 this: epoch,
29524 to: DataType::BigInt { length: None },
29525 trailing_comments: vec![],
29526 double_colon_syntax: false,
29527 format: None,
29528 default: None,
29529 })))
29530 }
29531 DialectType::Snowflake => {
29532 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29533 let epoch = Expression::Cast(Box::new(Cast {
29534 this: Expression::Literal(Literal::String(
29535 "1970-01-01 00:00:00+00".to_string(),
29536 )),
29537 to: DataType::Timestamp {
29538 timezone: true,
29539 precision: None,
29540 },
29541 trailing_comments: vec![],
29542 double_colon_syntax: false,
29543 format: None,
29544 default: None,
29545 }));
29546 Ok(Expression::Function(Box::new(Function::new(
29547 "TIMESTAMPDIFF".to_string(),
29548 vec![
29549 Expression::Identifier(Identifier::new("SECONDS".to_string())),
29550 epoch,
29551 ts,
29552 ],
29553 ))))
29554 }
29555 _ => Ok(Expression::Function(Box::new(Function::new(
29556 "UNIX_SECONDS".to_string(),
29557 vec![ts],
29558 )))),
29559 }
29560 }
29561
29562 // UNIX_MILLIS(ts) -> target-specific
29563 "UNIX_MILLIS" if args.len() == 1 => {
29564 let ts = args.remove(0);
29565 match target {
29566 DialectType::DuckDB => {
29567 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29568 Ok(Expression::Function(Box::new(Function::new(
29569 "EPOCH_MS".to_string(),
29570 vec![norm_ts],
29571 ))))
29572 }
29573 _ => Ok(Expression::Function(Box::new(Function::new(
29574 "UNIX_MILLIS".to_string(),
29575 vec![ts],
29576 )))),
29577 }
29578 }
29579
29580 // UNIX_MICROS(ts) -> target-specific
29581 "UNIX_MICROS" if args.len() == 1 => {
29582 let ts = args.remove(0);
29583 match target {
29584 DialectType::DuckDB => {
29585 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29586 Ok(Expression::Function(Box::new(Function::new(
29587 "EPOCH_US".to_string(),
29588 vec![norm_ts],
29589 ))))
29590 }
29591 _ => Ok(Expression::Function(Box::new(Function::new(
29592 "UNIX_MICROS".to_string(),
29593 vec![ts],
29594 )))),
29595 }
29596 }
29597
29598 // INSTR(str, substr) -> target-specific
29599 "INSTR" => {
29600 if matches!(target, DialectType::BigQuery) {
29601 // BQ->BQ: keep as INSTR
29602 Ok(Expression::Function(Box::new(Function::new(
29603 "INSTR".to_string(),
29604 args,
29605 ))))
29606 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
29607 // Snowflake: CHARINDEX(substr, str) - swap args
29608 let str_expr = args.remove(0);
29609 let substr = args.remove(0);
29610 Ok(Expression::Function(Box::new(Function::new(
29611 "CHARINDEX".to_string(),
29612 vec![substr, str_expr],
29613 ))))
29614 } else {
29615 // Keep as INSTR for other targets
29616 Ok(Expression::Function(Box::new(Function::new(
29617 "INSTR".to_string(),
29618 args,
29619 ))))
29620 }
29621 }
29622
29623 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
29624 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
29625 if matches!(target, DialectType::BigQuery) {
29626 // BQ->BQ: always output with parens (function form), keep any timezone arg
29627 Ok(Expression::Function(Box::new(Function::new(name, args))))
29628 } else if name == "CURRENT_DATE" && args.len() == 1 {
29629 // CURRENT_DATE('UTC') - has timezone arg
29630 let tz_arg = args.remove(0);
29631 match target {
29632 DialectType::DuckDB => {
29633 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
29634 let ct = Expression::CurrentTimestamp(
29635 crate::expressions::CurrentTimestamp {
29636 precision: None,
29637 sysdate: false,
29638 },
29639 );
29640 let at_tz =
29641 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29642 this: ct,
29643 zone: tz_arg,
29644 }));
29645 Ok(Expression::Cast(Box::new(Cast {
29646 this: at_tz,
29647 to: DataType::Date,
29648 trailing_comments: vec![],
29649 double_colon_syntax: false,
29650 format: None,
29651 default: None,
29652 })))
29653 }
29654 DialectType::Snowflake => {
29655 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
29656 let ct = Expression::Function(Box::new(Function::new(
29657 "CURRENT_TIMESTAMP".to_string(),
29658 vec![],
29659 )));
29660 let convert = Expression::Function(Box::new(Function::new(
29661 "CONVERT_TIMEZONE".to_string(),
29662 vec![tz_arg, ct],
29663 )));
29664 Ok(Expression::Cast(Box::new(Cast {
29665 this: convert,
29666 to: DataType::Date,
29667 trailing_comments: vec![],
29668 double_colon_syntax: false,
29669 format: None,
29670 default: None,
29671 })))
29672 }
29673 _ => {
29674 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
29675 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
29676 Ok(Expression::AtTimeZone(Box::new(
29677 crate::expressions::AtTimeZone {
29678 this: cd,
29679 zone: tz_arg,
29680 },
29681 )))
29682 }
29683 }
29684 } else if (name == "CURRENT_TIMESTAMP"
29685 || name == "CURRENT_TIME"
29686 || name == "CURRENT_DATE")
29687 && args.is_empty()
29688 && matches!(
29689 target,
29690 DialectType::PostgreSQL
29691 | DialectType::DuckDB
29692 | DialectType::Presto
29693 | DialectType::Trino
29694 )
29695 {
29696 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
29697 if name == "CURRENT_TIMESTAMP" {
29698 Ok(Expression::CurrentTimestamp(
29699 crate::expressions::CurrentTimestamp {
29700 precision: None,
29701 sysdate: false,
29702 },
29703 ))
29704 } else if name == "CURRENT_DATE" {
29705 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
29706 } else {
29707 // CURRENT_TIME
29708 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
29709 precision: None,
29710 }))
29711 }
29712 } else {
29713 // All other targets: keep as function (with parens)
29714 Ok(Expression::Function(Box::new(Function::new(name, args))))
29715 }
29716 }
29717
29718 // JSON_QUERY(json, path) -> target-specific
29719 "JSON_QUERY" if args.len() == 2 => {
29720 match target {
29721 DialectType::DuckDB | DialectType::SQLite => {
29722 // json -> path syntax
29723 let json_expr = args.remove(0);
29724 let path = args.remove(0);
29725 Ok(Expression::JsonExtract(Box::new(
29726 crate::expressions::JsonExtractFunc {
29727 this: json_expr,
29728 path,
29729 returning: None,
29730 arrow_syntax: true,
29731 hash_arrow_syntax: false,
29732 wrapper_option: None,
29733 quotes_option: None,
29734 on_scalar_string: false,
29735 on_error: None,
29736 },
29737 )))
29738 }
29739 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29740 Ok(Expression::Function(Box::new(Function::new(
29741 "GET_JSON_OBJECT".to_string(),
29742 args,
29743 ))))
29744 }
29745 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
29746 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
29747 )),
29748 _ => Ok(Expression::Function(Box::new(Function::new(
29749 "JSON_QUERY".to_string(),
29750 args,
29751 )))),
29752 }
29753 }
29754
29755 // JSON_VALUE_ARRAY(json, path) -> target-specific
29756 "JSON_VALUE_ARRAY" if args.len() == 2 => {
29757 match target {
29758 DialectType::DuckDB => {
29759 // CAST(json -> path AS TEXT[])
29760 let json_expr = args.remove(0);
29761 let path = args.remove(0);
29762 let arrow = Expression::JsonExtract(Box::new(
29763 crate::expressions::JsonExtractFunc {
29764 this: json_expr,
29765 path,
29766 returning: None,
29767 arrow_syntax: true,
29768 hash_arrow_syntax: false,
29769 wrapper_option: None,
29770 quotes_option: None,
29771 on_scalar_string: false,
29772 on_error: None,
29773 },
29774 ));
29775 Ok(Expression::Cast(Box::new(Cast {
29776 this: arrow,
29777 to: DataType::Array {
29778 element_type: Box::new(DataType::Text),
29779 dimension: None,
29780 },
29781 trailing_comments: vec![],
29782 double_colon_syntax: false,
29783 format: None,
29784 default: None,
29785 })))
29786 }
29787 DialectType::Snowflake => {
29788 let json_expr = args.remove(0);
29789 let path_expr = args.remove(0);
29790 // Convert JSON path from $.path to just path
29791 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
29792 {
29793 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
29794 Expression::Literal(Literal::String(trimmed.to_string()))
29795 } else {
29796 path_expr
29797 };
29798 let parse_json = Expression::Function(Box::new(Function::new(
29799 "PARSE_JSON".to_string(),
29800 vec![json_expr],
29801 )));
29802 let get_path = Expression::Function(Box::new(Function::new(
29803 "GET_PATH".to_string(),
29804 vec![parse_json, sf_path],
29805 )));
29806 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
29807 let cast_expr = Expression::Cast(Box::new(Cast {
29808 this: Expression::Identifier(Identifier::new("x")),
29809 to: DataType::VarChar {
29810 length: None,
29811 parenthesized_length: false,
29812 },
29813 trailing_comments: vec![],
29814 double_colon_syntax: false,
29815 format: None,
29816 default: None,
29817 }));
29818 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29819 parameters: vec![Identifier::new("x")],
29820 body: cast_expr,
29821 colon: false,
29822 parameter_types: vec![],
29823 }));
29824 Ok(Expression::Function(Box::new(Function::new(
29825 "TRANSFORM".to_string(),
29826 vec![get_path, lambda],
29827 ))))
29828 }
29829 _ => Ok(Expression::Function(Box::new(Function::new(
29830 "JSON_VALUE_ARRAY".to_string(),
29831 args,
29832 )))),
29833 }
29834 }
29835
29836 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
29837 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
29838 // This is different from Hive/Spark where 3rd arg is "group_index"
29839 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
29840 match target {
29841 DialectType::DuckDB
29842 | DialectType::Presto
29843 | DialectType::Trino
29844 | DialectType::Athena => {
29845 if args.len() == 2 {
29846 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
29847 args.push(Expression::number(1));
29848 Ok(Expression::Function(Box::new(Function::new(
29849 "REGEXP_EXTRACT".to_string(),
29850 args,
29851 ))))
29852 } else if args.len() == 3 {
29853 let val = args.remove(0);
29854 let regex = args.remove(0);
29855 let position = args.remove(0);
29856 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29857 if is_pos_1 {
29858 Ok(Expression::Function(Box::new(Function::new(
29859 "REGEXP_EXTRACT".to_string(),
29860 vec![val, regex, Expression::number(1)],
29861 ))))
29862 } else {
29863 let substring_expr = Expression::Function(Box::new(Function::new(
29864 "SUBSTRING".to_string(),
29865 vec![val, position],
29866 )));
29867 let nullif_expr = Expression::Function(Box::new(Function::new(
29868 "NULLIF".to_string(),
29869 vec![
29870 substring_expr,
29871 Expression::Literal(Literal::String(String::new())),
29872 ],
29873 )));
29874 Ok(Expression::Function(Box::new(Function::new(
29875 "REGEXP_EXTRACT".to_string(),
29876 vec![nullif_expr, regex, Expression::number(1)],
29877 ))))
29878 }
29879 } else if args.len() == 4 {
29880 let val = args.remove(0);
29881 let regex = args.remove(0);
29882 let position = args.remove(0);
29883 let occurrence = args.remove(0);
29884 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29885 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
29886 if is_pos_1 && is_occ_1 {
29887 Ok(Expression::Function(Box::new(Function::new(
29888 "REGEXP_EXTRACT".to_string(),
29889 vec![val, regex, Expression::number(1)],
29890 ))))
29891 } else {
29892 let subject = if is_pos_1 {
29893 val
29894 } else {
29895 let substring_expr = Expression::Function(Box::new(
29896 Function::new("SUBSTRING".to_string(), vec![val, position]),
29897 ));
29898 Expression::Function(Box::new(Function::new(
29899 "NULLIF".to_string(),
29900 vec![
29901 substring_expr,
29902 Expression::Literal(Literal::String(String::new())),
29903 ],
29904 )))
29905 };
29906 let extract_all = Expression::Function(Box::new(Function::new(
29907 "REGEXP_EXTRACT_ALL".to_string(),
29908 vec![subject, regex, Expression::number(1)],
29909 )));
29910 Ok(Expression::Function(Box::new(Function::new(
29911 "ARRAY_EXTRACT".to_string(),
29912 vec![extract_all, occurrence],
29913 ))))
29914 }
29915 } else {
29916 Ok(Expression::Function(Box::new(Function {
29917 name: f.name,
29918 args,
29919 distinct: f.distinct,
29920 trailing_comments: f.trailing_comments,
29921 use_bracket_syntax: f.use_bracket_syntax,
29922 no_parens: f.no_parens,
29923 quoted: f.quoted,
29924 })))
29925 }
29926 }
29927 DialectType::Snowflake => {
29928 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
29929 Ok(Expression::Function(Box::new(Function::new(
29930 "REGEXP_SUBSTR".to_string(),
29931 args,
29932 ))))
29933 }
29934 _ => {
29935 // For other targets (Hive/Spark/BigQuery): pass through as-is
29936 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
29937 Ok(Expression::Function(Box::new(Function {
29938 name: f.name,
29939 args,
29940 distinct: f.distinct,
29941 trailing_comments: f.trailing_comments,
29942 use_bracket_syntax: f.use_bracket_syntax,
29943 no_parens: f.no_parens,
29944 quoted: f.quoted,
29945 })))
29946 }
29947 }
29948 }
29949
29950 // BigQuery STRUCT(args) -> target-specific struct expression
29951 "STRUCT" => {
29952 // Convert Function args to Struct fields
29953 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
29954 for (i, arg) in args.into_iter().enumerate() {
29955 match arg {
29956 Expression::Alias(a) => {
29957 // Named field: expr AS name
29958 fields.push((Some(a.alias.name.clone()), a.this));
29959 }
29960 other => {
29961 // Unnamed field: for Spark/Hive, keep as None
29962 // For Snowflake, auto-name as _N
29963 // For DuckDB, use column name for column refs, _N for others
29964 if matches!(target, DialectType::Snowflake) {
29965 fields.push((Some(format!("_{}", i)), other));
29966 } else if matches!(target, DialectType::DuckDB) {
29967 let auto_name = match &other {
29968 Expression::Column(col) => col.name.name.clone(),
29969 _ => format!("_{}", i),
29970 };
29971 fields.push((Some(auto_name), other));
29972 } else {
29973 fields.push((None, other));
29974 }
29975 }
29976 }
29977 }
29978
29979 match target {
29980 DialectType::Snowflake => {
29981 // OBJECT_CONSTRUCT('name', value, ...)
29982 let mut oc_args = Vec::new();
29983 for (name, val) in &fields {
29984 if let Some(n) = name {
29985 oc_args.push(Expression::Literal(Literal::String(n.clone())));
29986 oc_args.push(val.clone());
29987 } else {
29988 oc_args.push(val.clone());
29989 }
29990 }
29991 Ok(Expression::Function(Box::new(Function::new(
29992 "OBJECT_CONSTRUCT".to_string(),
29993 oc_args,
29994 ))))
29995 }
29996 DialectType::DuckDB => {
29997 // {'name': value, ...}
29998 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29999 fields,
30000 })))
30001 }
30002 DialectType::Hive => {
30003 // STRUCT(val1, val2, ...) - strip aliases
30004 let hive_fields: Vec<(Option<String>, Expression)> =
30005 fields.into_iter().map(|(_, v)| (None, v)).collect();
30006 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30007 fields: hive_fields,
30008 })))
30009 }
30010 DialectType::Spark | DialectType::Databricks => {
30011 // Use Expression::Struct to bypass Spark target transform auto-naming
30012 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30013 fields,
30014 })))
30015 }
30016 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30017 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
30018 let all_named =
30019 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
30020 let all_types_inferable = all_named
30021 && fields
30022 .iter()
30023 .all(|(_, val)| Self::can_infer_presto_type(val));
30024 let row_args: Vec<Expression> =
30025 fields.iter().map(|(_, v)| v.clone()).collect();
30026 let row_expr = Expression::Function(Box::new(Function::new(
30027 "ROW".to_string(),
30028 row_args,
30029 )));
30030 if all_named && all_types_inferable {
30031 // Build ROW type with inferred types
30032 let mut row_type_fields = Vec::new();
30033 for (name, val) in &fields {
30034 if let Some(n) = name {
30035 let type_str = Self::infer_sql_type_for_presto(val);
30036 row_type_fields.push(crate::expressions::StructField::new(
30037 n.clone(),
30038 crate::expressions::DataType::Custom { name: type_str },
30039 ));
30040 }
30041 }
30042 let row_type = crate::expressions::DataType::Struct {
30043 fields: row_type_fields,
30044 nested: true,
30045 };
30046 Ok(Expression::Cast(Box::new(Cast {
30047 this: row_expr,
30048 to: row_type,
30049 trailing_comments: Vec::new(),
30050 double_colon_syntax: false,
30051 format: None,
30052 default: None,
30053 })))
30054 } else {
30055 Ok(row_expr)
30056 }
30057 }
30058 _ => {
30059 // Default: keep as STRUCT function with original args
30060 let mut new_args = Vec::new();
30061 for (name, val) in fields {
30062 if let Some(n) = name {
30063 new_args.push(Expression::Alias(Box::new(
30064 crate::expressions::Alias::new(val, Identifier::new(n)),
30065 )));
30066 } else {
30067 new_args.push(val);
30068 }
30069 }
30070 Ok(Expression::Function(Box::new(Function::new(
30071 "STRUCT".to_string(),
30072 new_args,
30073 ))))
30074 }
30075 }
30076 }
30077
30078 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30079 "ROUND" if args.len() == 3 => {
30080 let x = args.remove(0);
30081 let n = args.remove(0);
30082 let mode = args.remove(0);
30083 // Check if mode is 'ROUND_HALF_EVEN'
30084 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30085 if is_half_even && matches!(target, DialectType::DuckDB) {
30086 Ok(Expression::Function(Box::new(Function::new(
30087 "ROUND_EVEN".to_string(),
30088 vec![x, n],
30089 ))))
30090 } else {
30091 // Pass through with all args
30092 Ok(Expression::Function(Box::new(Function::new(
30093 "ROUND".to_string(),
30094 vec![x, n, mode],
30095 ))))
30096 }
30097 }
30098
30099 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30100 "MAKE_INTERVAL" => {
30101 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30102 // The positional args are: year, month
30103 // Named args are: day =>, minute =>, etc.
30104 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30105 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30106 // For BigQuery->BigQuery: reorder named args (day before minute)
30107 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30108 let mut parts: Vec<(String, String)> = Vec::new();
30109 let mut pos_idx = 0;
30110 let pos_units = ["year", "month"];
30111 for arg in &args {
30112 if let Expression::NamedArgument(na) = arg {
30113 // Named arg like minute => 5
30114 let unit = na.name.name.clone();
30115 if let Expression::Literal(Literal::Number(n)) = &na.value {
30116 parts.push((unit, n.clone()));
30117 }
30118 } else if pos_idx < pos_units.len() {
30119 if let Expression::Literal(Literal::Number(n)) = arg {
30120 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30121 }
30122 pos_idx += 1;
30123 }
30124 }
30125 // Don't sort - preserve original argument order
30126 let separator = if matches!(target, DialectType::Snowflake) {
30127 ", "
30128 } else {
30129 " "
30130 };
30131 let interval_str = parts
30132 .iter()
30133 .map(|(u, v)| format!("{} {}", v, u))
30134 .collect::<Vec<_>>()
30135 .join(separator);
30136 Ok(Expression::Interval(Box::new(
30137 crate::expressions::Interval {
30138 this: Some(Expression::Literal(Literal::String(interval_str))),
30139 unit: None,
30140 },
30141 )))
30142 } else if matches!(target, DialectType::BigQuery) {
30143 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30144 let mut positional = Vec::new();
30145 let mut named: Vec<(
30146 String,
30147 Expression,
30148 crate::expressions::NamedArgSeparator,
30149 )> = Vec::new();
30150 let _pos_units = ["year", "month"];
30151 let mut _pos_idx = 0;
30152 for arg in args {
30153 if let Expression::NamedArgument(na) = arg {
30154 named.push((na.name.name.clone(), na.value, na.separator));
30155 } else {
30156 positional.push(arg);
30157 _pos_idx += 1;
30158 }
30159 }
30160 // Sort named args by: day, hour, minute, second
30161 let unit_order = |u: &str| -> usize {
30162 match u.to_lowercase().as_str() {
30163 "day" => 0,
30164 "hour" => 1,
30165 "minute" => 2,
30166 "second" => 3,
30167 _ => 4,
30168 }
30169 };
30170 named.sort_by_key(|(u, _, _)| unit_order(u));
30171 let mut result_args = positional;
30172 for (name, value, sep) in named {
30173 result_args.push(Expression::NamedArgument(Box::new(
30174 crate::expressions::NamedArgument {
30175 name: Identifier::new(&name),
30176 value,
30177 separator: sep,
30178 },
30179 )));
30180 }
30181 Ok(Expression::Function(Box::new(Function::new(
30182 "MAKE_INTERVAL".to_string(),
30183 result_args,
30184 ))))
30185 } else {
30186 Ok(Expression::Function(Box::new(Function::new(
30187 "MAKE_INTERVAL".to_string(),
30188 args,
30189 ))))
30190 }
30191 }
30192
30193 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30194 "ARRAY_TO_STRING" if args.len() == 3 => {
30195 let arr = args.remove(0);
30196 let sep = args.remove(0);
30197 let null_text = args.remove(0);
30198 match target {
30199 DialectType::DuckDB => {
30200 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30201 let _lambda_param =
30202 Expression::Identifier(crate::expressions::Identifier::new("x"));
30203 let coalesce =
30204 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30205 original_name: None,
30206 expressions: vec![
30207 Expression::Identifier(crate::expressions::Identifier::new(
30208 "x",
30209 )),
30210 null_text,
30211 ],
30212 }));
30213 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30214 parameters: vec![crate::expressions::Identifier::new("x")],
30215 body: coalesce,
30216 colon: false,
30217 parameter_types: vec![],
30218 }));
30219 let list_transform = Expression::Function(Box::new(Function::new(
30220 "LIST_TRANSFORM".to_string(),
30221 vec![arr, lambda],
30222 )));
30223 Ok(Expression::Function(Box::new(Function::new(
30224 "ARRAY_TO_STRING".to_string(),
30225 vec![list_transform, sep],
30226 ))))
30227 }
30228 _ => Ok(Expression::Function(Box::new(Function::new(
30229 "ARRAY_TO_STRING".to_string(),
30230 vec![arr, sep, null_text],
30231 )))),
30232 }
30233 }
30234
30235 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30236 "LENGTH" if args.len() == 1 => {
30237 let arg = args.remove(0);
30238 match target {
30239 DialectType::DuckDB => {
30240 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30241 let typeof_func = Expression::Function(Box::new(Function::new(
30242 "TYPEOF".to_string(),
30243 vec![arg.clone()],
30244 )));
30245 let blob_cast = Expression::Cast(Box::new(Cast {
30246 this: arg.clone(),
30247 to: DataType::VarBinary { length: None },
30248 trailing_comments: vec![],
30249 double_colon_syntax: false,
30250 format: None,
30251 default: None,
30252 }));
30253 let octet_length = Expression::Function(Box::new(Function::new(
30254 "OCTET_LENGTH".to_string(),
30255 vec![blob_cast],
30256 )));
30257 let text_cast = Expression::Cast(Box::new(Cast {
30258 this: arg,
30259 to: DataType::Text,
30260 trailing_comments: vec![],
30261 double_colon_syntax: false,
30262 format: None,
30263 default: None,
30264 }));
30265 let length_text = Expression::Function(Box::new(Function::new(
30266 "LENGTH".to_string(),
30267 vec![text_cast],
30268 )));
30269 Ok(Expression::Case(Box::new(crate::expressions::Case {
30270 operand: Some(typeof_func),
30271 whens: vec![(
30272 Expression::Literal(Literal::String("BLOB".to_string())),
30273 octet_length,
30274 )],
30275 else_: Some(length_text),
30276 comments: Vec::new(),
30277 })))
30278 }
30279 _ => Ok(Expression::Function(Box::new(Function::new(
30280 "LENGTH".to_string(),
30281 vec![arg],
30282 )))),
30283 }
30284 }
30285
30286 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30287 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30288 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30289 // The args should be [x, fraction] with the null handling stripped
30290 // For DuckDB: QUANTILE_CONT(x, fraction)
30291 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30292 match target {
30293 DialectType::DuckDB => {
30294 // Strip down to just 2 args, rename to QUANTILE_CONT
30295 let x = args[0].clone();
30296 let frac = args[1].clone();
30297 Ok(Expression::Function(Box::new(Function::new(
30298 "QUANTILE_CONT".to_string(),
30299 vec![x, frac],
30300 ))))
30301 }
30302 _ => Ok(Expression::Function(Box::new(Function::new(
30303 "PERCENTILE_CONT".to_string(),
30304 args,
30305 )))),
30306 }
30307 }
30308
30309 // All others: pass through
30310 _ => Ok(Expression::Function(Box::new(Function {
30311 name: f.name,
30312 args,
30313 distinct: f.distinct,
30314 trailing_comments: f.trailing_comments,
30315 use_bracket_syntax: f.use_bracket_syntax,
30316 no_parens: f.no_parens,
30317 quoted: f.quoted,
30318 }))),
30319 }
30320 }
30321
30322 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30323 /// Returns false for column references and other non-literal expressions where the type is unknown.
30324 fn can_infer_presto_type(expr: &Expression) -> bool {
30325 match expr {
30326 Expression::Literal(_) => true,
30327 Expression::Boolean(_) => true,
30328 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30329 Expression::Struct(_) | Expression::StructFunc(_) => true,
30330 Expression::Function(f) => {
30331 let up = f.name.to_uppercase();
30332 up == "STRUCT"
30333 || up == "ROW"
30334 || up == "CURRENT_DATE"
30335 || up == "CURRENT_TIMESTAMP"
30336 || up == "NOW"
30337 }
30338 Expression::Cast(_) => true,
30339 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30340 _ => false,
30341 }
30342 }
30343
30344 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30345 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30346 use crate::expressions::Literal;
30347 match expr {
30348 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30349 Expression::Literal(Literal::Number(n)) => {
30350 if n.contains('.') {
30351 "DOUBLE".to_string()
30352 } else {
30353 "INTEGER".to_string()
30354 }
30355 }
30356 Expression::Boolean(_) => "BOOLEAN".to_string(),
30357 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30358 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30359 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30360 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30361 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30362 Expression::Function(f) => {
30363 let up = f.name.to_uppercase();
30364 if up == "STRUCT" || up == "ROW" {
30365 "ROW".to_string()
30366 } else if up == "CURRENT_DATE" {
30367 "DATE".to_string()
30368 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30369 "TIMESTAMP".to_string()
30370 } else {
30371 "VARCHAR".to_string()
30372 }
30373 }
30374 Expression::Cast(c) => {
30375 // If already cast, use the target type
30376 Self::data_type_to_presto_string(&c.to)
30377 }
30378 _ => "VARCHAR".to_string(),
30379 }
30380 }
30381
30382 /// Convert a DataType to its Presto/Trino string representation for ROW type
30383 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30384 use crate::expressions::DataType;
30385 match dt {
30386 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30387 "VARCHAR".to_string()
30388 }
30389 DataType::Int { .. }
30390 | DataType::BigInt { .. }
30391 | DataType::SmallInt { .. }
30392 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30393 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30394 DataType::Boolean => "BOOLEAN".to_string(),
30395 DataType::Date => "DATE".to_string(),
30396 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30397 DataType::Struct { fields, .. } => {
30398 let field_strs: Vec<String> = fields
30399 .iter()
30400 .map(|f| {
30401 format!(
30402 "{} {}",
30403 f.name,
30404 Self::data_type_to_presto_string(&f.data_type)
30405 )
30406 })
30407 .collect();
30408 format!("ROW({})", field_strs.join(", "))
30409 }
30410 DataType::Array { element_type, .. } => {
30411 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30412 }
30413 DataType::Custom { name } => {
30414 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30415 name.clone()
30416 }
30417 _ => "VARCHAR".to_string(),
30418 }
30419 }
30420
30421 /// Convert IntervalUnit to string
30422 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30423 match unit {
30424 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30425 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30426 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30427 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30428 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30429 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30430 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30431 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30432 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30433 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30434 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30435 }
30436 }
30437
30438 /// Extract unit string from an expression (uppercased)
30439 fn get_unit_str_static(expr: &Expression) -> String {
30440 use crate::expressions::Literal;
30441 match expr {
30442 Expression::Identifier(id) => id.name.to_uppercase(),
30443 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30444 Expression::Column(col) => col.name.name.to_uppercase(),
30445 Expression::Function(f) => {
30446 let base = f.name.to_uppercase();
30447 if !f.args.is_empty() {
30448 let inner = Self::get_unit_str_static(&f.args[0]);
30449 format!("{}({})", base, inner)
30450 } else {
30451 base
30452 }
30453 }
30454 _ => "DAY".to_string(),
30455 }
30456 }
30457
30458 /// Parse unit string to IntervalUnit
30459 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30460 match s {
30461 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30462 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30463 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30464 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30465 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30466 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30467 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30468 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30469 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30470 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30471 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30472 _ => crate::expressions::IntervalUnit::Day,
30473 }
30474 }
30475
30476 /// Convert expression to simple string for interval building
30477 fn expr_to_string_static(expr: &Expression) -> String {
30478 use crate::expressions::Literal;
30479 match expr {
30480 Expression::Literal(Literal::Number(s)) => s.clone(),
30481 Expression::Literal(Literal::String(s)) => s.clone(),
30482 Expression::Identifier(id) => id.name.clone(),
30483 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30484 _ => "1".to_string(),
30485 }
30486 }
30487
30488 /// Extract a simple string representation from a literal expression
30489 fn expr_to_string(expr: &Expression) -> String {
30490 use crate::expressions::Literal;
30491 match expr {
30492 Expression::Literal(Literal::Number(s)) => s.clone(),
30493 Expression::Literal(Literal::String(s)) => s.clone(),
30494 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30495 Expression::Identifier(id) => id.name.clone(),
30496 _ => "1".to_string(),
30497 }
30498 }
30499
30500 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30501 fn quote_interval_val(expr: &Expression) -> Expression {
30502 use crate::expressions::Literal;
30503 match expr {
30504 Expression::Literal(Literal::Number(n)) => {
30505 Expression::Literal(Literal::String(n.clone()))
30506 }
30507 Expression::Literal(Literal::String(_)) => expr.clone(),
30508 Expression::Neg(inner) => {
30509 if let Expression::Literal(Literal::Number(n)) = &inner.this {
30510 Expression::Literal(Literal::String(format!("-{}", n)))
30511 } else {
30512 expr.clone()
30513 }
30514 }
30515 _ => expr.clone(),
30516 }
30517 }
30518
30519 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
30520 fn timestamp_string_has_timezone(ts: &str) -> bool {
30521 let trimmed = ts.trim();
30522 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
30523 if let Some(last_space) = trimmed.rfind(' ') {
30524 let suffix = &trimmed[last_space + 1..];
30525 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
30526 let rest = &suffix[1..];
30527 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
30528 return true;
30529 }
30530 }
30531 }
30532 // Check for named timezone abbreviations
30533 let ts_lower = trimmed.to_lowercase();
30534 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
30535 for abbrev in &tz_abbrevs {
30536 if ts_lower.ends_with(abbrev) {
30537 return true;
30538 }
30539 }
30540 false
30541 }
30542
30543 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
30544 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
30545 use crate::expressions::{Cast, DataType, Literal};
30546 match expr {
30547 Expression::Literal(Literal::Timestamp(s)) => {
30548 let tz = func_name.starts_with("TIMESTAMP");
30549 Expression::Cast(Box::new(Cast {
30550 this: Expression::Literal(Literal::String(s)),
30551 to: if tz {
30552 DataType::Timestamp {
30553 timezone: true,
30554 precision: None,
30555 }
30556 } else {
30557 DataType::Timestamp {
30558 timezone: false,
30559 precision: None,
30560 }
30561 },
30562 trailing_comments: vec![],
30563 double_colon_syntax: false,
30564 format: None,
30565 default: None,
30566 }))
30567 }
30568 other => other,
30569 }
30570 }
30571
30572 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
30573 fn maybe_cast_ts(expr: Expression) -> Expression {
30574 use crate::expressions::{Cast, DataType, Literal};
30575 match expr {
30576 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30577 this: Expression::Literal(Literal::String(s)),
30578 to: DataType::Timestamp {
30579 timezone: false,
30580 precision: None,
30581 },
30582 trailing_comments: vec![],
30583 double_colon_syntax: false,
30584 format: None,
30585 default: None,
30586 })),
30587 other => other,
30588 }
30589 }
30590
30591 /// Convert DATE 'x' literal to CAST('x' AS DATE)
30592 fn date_literal_to_cast(expr: Expression) -> Expression {
30593 use crate::expressions::{Cast, DataType, Literal};
30594 match expr {
30595 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30596 this: Expression::Literal(Literal::String(s)),
30597 to: DataType::Date,
30598 trailing_comments: vec![],
30599 double_colon_syntax: false,
30600 format: None,
30601 default: None,
30602 })),
30603 other => other,
30604 }
30605 }
30606
30607 /// Ensure an expression that should be a date is CAST(... AS DATE).
30608 /// Handles both DATE literals and string literals that look like dates.
30609 fn ensure_cast_date(expr: Expression) -> Expression {
30610 use crate::expressions::{Cast, DataType, Literal};
30611 match expr {
30612 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30613 this: Expression::Literal(Literal::String(s)),
30614 to: DataType::Date,
30615 trailing_comments: vec![],
30616 double_colon_syntax: false,
30617 format: None,
30618 default: None,
30619 })),
30620 Expression::Literal(Literal::String(ref _s)) => {
30621 // String literal that should be a date -> CAST('s' AS DATE)
30622 Expression::Cast(Box::new(Cast {
30623 this: expr,
30624 to: DataType::Date,
30625 trailing_comments: vec![],
30626 double_colon_syntax: false,
30627 format: None,
30628 default: None,
30629 }))
30630 }
30631 // Already a CAST or other expression -> leave as-is
30632 other => other,
30633 }
30634 }
30635
30636 /// Force CAST(expr AS DATE) for any expression (not just literals)
30637 /// Skips if the expression is already a CAST to DATE
30638 fn force_cast_date(expr: Expression) -> Expression {
30639 use crate::expressions::{Cast, DataType};
30640 // If it's already a CAST to DATE, don't double-wrap
30641 if let Expression::Cast(ref c) = expr {
30642 if matches!(c.to, DataType::Date) {
30643 return expr;
30644 }
30645 }
30646 Expression::Cast(Box::new(Cast {
30647 this: expr,
30648 to: DataType::Date,
30649 trailing_comments: vec![],
30650 double_colon_syntax: false,
30651 format: None,
30652 default: None,
30653 }))
30654 }
30655
30656 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
30657 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
30658 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
30659 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
30660
30661 fn ensure_to_date_preserved(expr: Expression) -> Expression {
30662 use crate::expressions::{Function, Literal};
30663 if matches!(expr, Expression::Literal(Literal::String(_))) {
30664 Expression::Function(Box::new(Function::new(
30665 Self::PRESERVED_TO_DATE.to_string(),
30666 vec![expr],
30667 )))
30668 } else {
30669 expr
30670 }
30671 }
30672
30673 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
30674 fn try_cast_date(expr: Expression) -> Expression {
30675 use crate::expressions::{Cast, DataType};
30676 Expression::TryCast(Box::new(Cast {
30677 this: expr,
30678 to: DataType::Date,
30679 trailing_comments: vec![],
30680 double_colon_syntax: false,
30681 format: None,
30682 default: None,
30683 }))
30684 }
30685
30686 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
30687 fn double_cast_timestamp_date(expr: Expression) -> Expression {
30688 use crate::expressions::{Cast, DataType};
30689 let inner = Expression::Cast(Box::new(Cast {
30690 this: expr,
30691 to: DataType::Timestamp {
30692 timezone: false,
30693 precision: None,
30694 },
30695 trailing_comments: vec![],
30696 double_colon_syntax: false,
30697 format: None,
30698 default: None,
30699 }));
30700 Expression::Cast(Box::new(Cast {
30701 this: inner,
30702 to: DataType::Date,
30703 trailing_comments: vec![],
30704 double_colon_syntax: false,
30705 format: None,
30706 default: None,
30707 }))
30708 }
30709
30710 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
30711 fn double_cast_datetime_date(expr: Expression) -> Expression {
30712 use crate::expressions::{Cast, DataType};
30713 let inner = Expression::Cast(Box::new(Cast {
30714 this: expr,
30715 to: DataType::Custom {
30716 name: "DATETIME".to_string(),
30717 },
30718 trailing_comments: vec![],
30719 double_colon_syntax: false,
30720 format: None,
30721 default: None,
30722 }));
30723 Expression::Cast(Box::new(Cast {
30724 this: inner,
30725 to: DataType::Date,
30726 trailing_comments: vec![],
30727 double_colon_syntax: false,
30728 format: None,
30729 default: None,
30730 }))
30731 }
30732
30733 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
30734 fn double_cast_datetime2_date(expr: Expression) -> Expression {
30735 use crate::expressions::{Cast, DataType};
30736 let inner = Expression::Cast(Box::new(Cast {
30737 this: expr,
30738 to: DataType::Custom {
30739 name: "DATETIME2".to_string(),
30740 },
30741 trailing_comments: vec![],
30742 double_colon_syntax: false,
30743 format: None,
30744 default: None,
30745 }));
30746 Expression::Cast(Box::new(Cast {
30747 this: inner,
30748 to: DataType::Date,
30749 trailing_comments: vec![],
30750 double_colon_syntax: false,
30751 format: None,
30752 default: None,
30753 }))
30754 }
30755
30756 /// Convert Hive/Java-style date format strings to C-style (strftime) format
30757 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
30758 fn hive_format_to_c_format(fmt: &str) -> String {
30759 let mut result = String::new();
30760 let chars: Vec<char> = fmt.chars().collect();
30761 let mut i = 0;
30762 while i < chars.len() {
30763 match chars[i] {
30764 'y' => {
30765 let mut count = 0;
30766 while i < chars.len() && chars[i] == 'y' {
30767 count += 1;
30768 i += 1;
30769 }
30770 if count >= 4 {
30771 result.push_str("%Y");
30772 } else if count == 2 {
30773 result.push_str("%y");
30774 } else {
30775 result.push_str("%Y");
30776 }
30777 }
30778 'M' => {
30779 let mut count = 0;
30780 while i < chars.len() && chars[i] == 'M' {
30781 count += 1;
30782 i += 1;
30783 }
30784 if count >= 3 {
30785 result.push_str("%b");
30786 } else if count == 2 {
30787 result.push_str("%m");
30788 } else {
30789 result.push_str("%m");
30790 }
30791 }
30792 'd' => {
30793 let mut _count = 0;
30794 while i < chars.len() && chars[i] == 'd' {
30795 _count += 1;
30796 i += 1;
30797 }
30798 result.push_str("%d");
30799 }
30800 'H' => {
30801 let mut _count = 0;
30802 while i < chars.len() && chars[i] == 'H' {
30803 _count += 1;
30804 i += 1;
30805 }
30806 result.push_str("%H");
30807 }
30808 'h' => {
30809 let mut _count = 0;
30810 while i < chars.len() && chars[i] == 'h' {
30811 _count += 1;
30812 i += 1;
30813 }
30814 result.push_str("%I");
30815 }
30816 'm' => {
30817 let mut _count = 0;
30818 while i < chars.len() && chars[i] == 'm' {
30819 _count += 1;
30820 i += 1;
30821 }
30822 result.push_str("%M");
30823 }
30824 's' => {
30825 let mut _count = 0;
30826 while i < chars.len() && chars[i] == 's' {
30827 _count += 1;
30828 i += 1;
30829 }
30830 result.push_str("%S");
30831 }
30832 'S' => {
30833 // Fractional seconds - skip
30834 while i < chars.len() && chars[i] == 'S' {
30835 i += 1;
30836 }
30837 result.push_str("%f");
30838 }
30839 'a' => {
30840 // AM/PM
30841 while i < chars.len() && chars[i] == 'a' {
30842 i += 1;
30843 }
30844 result.push_str("%p");
30845 }
30846 'E' => {
30847 let mut count = 0;
30848 while i < chars.len() && chars[i] == 'E' {
30849 count += 1;
30850 i += 1;
30851 }
30852 if count >= 4 {
30853 result.push_str("%A");
30854 } else {
30855 result.push_str("%a");
30856 }
30857 }
30858 '\'' => {
30859 // Quoted literal text - pass through the quotes and content
30860 result.push('\'');
30861 i += 1;
30862 while i < chars.len() && chars[i] != '\'' {
30863 result.push(chars[i]);
30864 i += 1;
30865 }
30866 if i < chars.len() {
30867 result.push('\'');
30868 i += 1;
30869 }
30870 }
30871 c => {
30872 result.push(c);
30873 i += 1;
30874 }
30875 }
30876 }
30877 result
30878 }
30879
30880 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
30881 fn hive_format_to_presto_format(fmt: &str) -> String {
30882 let c_fmt = Self::hive_format_to_c_format(fmt);
30883 // Presto uses %T for HH:MM:SS
30884 c_fmt.replace("%H:%M:%S", "%T")
30885 }
30886
30887 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
30888 fn ensure_cast_timestamp(expr: Expression) -> Expression {
30889 use crate::expressions::{Cast, DataType, Literal};
30890 match expr {
30891 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30892 this: Expression::Literal(Literal::String(s)),
30893 to: DataType::Timestamp {
30894 timezone: false,
30895 precision: None,
30896 },
30897 trailing_comments: vec![],
30898 double_colon_syntax: false,
30899 format: None,
30900 default: None,
30901 })),
30902 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30903 this: expr,
30904 to: DataType::Timestamp {
30905 timezone: false,
30906 precision: None,
30907 },
30908 trailing_comments: vec![],
30909 double_colon_syntax: false,
30910 format: None,
30911 default: None,
30912 })),
30913 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30914 this: Expression::Literal(Literal::String(s)),
30915 to: DataType::Timestamp {
30916 timezone: false,
30917 precision: None,
30918 },
30919 trailing_comments: vec![],
30920 double_colon_syntax: false,
30921 format: None,
30922 default: None,
30923 })),
30924 other => other,
30925 }
30926 }
30927
30928 /// Force CAST to TIMESTAMP for any expression (not just literals)
30929 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
30930 fn force_cast_timestamp(expr: Expression) -> Expression {
30931 use crate::expressions::{Cast, DataType};
30932 // Don't double-wrap if already a CAST to TIMESTAMP
30933 if let Expression::Cast(ref c) = expr {
30934 if matches!(c.to, DataType::Timestamp { .. }) {
30935 return expr;
30936 }
30937 }
30938 Expression::Cast(Box::new(Cast {
30939 this: expr,
30940 to: DataType::Timestamp {
30941 timezone: false,
30942 precision: None,
30943 },
30944 trailing_comments: vec![],
30945 double_colon_syntax: false,
30946 format: None,
30947 default: None,
30948 }))
30949 }
30950
30951 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
30952 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
30953 use crate::expressions::{Cast, DataType, Literal};
30954 match expr {
30955 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30956 this: Expression::Literal(Literal::String(s)),
30957 to: DataType::Timestamp {
30958 timezone: true,
30959 precision: None,
30960 },
30961 trailing_comments: vec![],
30962 double_colon_syntax: false,
30963 format: None,
30964 default: None,
30965 })),
30966 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30967 this: expr,
30968 to: DataType::Timestamp {
30969 timezone: true,
30970 precision: None,
30971 },
30972 trailing_comments: vec![],
30973 double_colon_syntax: false,
30974 format: None,
30975 default: None,
30976 })),
30977 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30978 this: Expression::Literal(Literal::String(s)),
30979 to: DataType::Timestamp {
30980 timezone: true,
30981 precision: None,
30982 },
30983 trailing_comments: vec![],
30984 double_colon_syntax: false,
30985 format: None,
30986 default: None,
30987 })),
30988 other => other,
30989 }
30990 }
30991
30992 /// Ensure expression is CAST to DATETIME (for BigQuery)
30993 fn ensure_cast_datetime(expr: Expression) -> Expression {
30994 use crate::expressions::{Cast, DataType, Literal};
30995 match expr {
30996 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30997 this: expr,
30998 to: DataType::Custom {
30999 name: "DATETIME".to_string(),
31000 },
31001 trailing_comments: vec![],
31002 double_colon_syntax: false,
31003 format: None,
31004 default: None,
31005 })),
31006 other => other,
31007 }
31008 }
31009
31010 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
31011 fn force_cast_datetime(expr: Expression) -> Expression {
31012 use crate::expressions::{Cast, DataType};
31013 if let Expression::Cast(ref c) = expr {
31014 if let DataType::Custom { ref name } = c.to {
31015 if name.eq_ignore_ascii_case("DATETIME") {
31016 return expr;
31017 }
31018 }
31019 }
31020 Expression::Cast(Box::new(Cast {
31021 this: expr,
31022 to: DataType::Custom {
31023 name: "DATETIME".to_string(),
31024 },
31025 trailing_comments: vec![],
31026 double_colon_syntax: false,
31027 format: None,
31028 default: None,
31029 }))
31030 }
31031
31032 /// Ensure expression is CAST to DATETIME2 (for TSQL)
31033 fn ensure_cast_datetime2(expr: Expression) -> Expression {
31034 use crate::expressions::{Cast, DataType, Literal};
31035 match expr {
31036 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31037 this: expr,
31038 to: DataType::Custom {
31039 name: "DATETIME2".to_string(),
31040 },
31041 trailing_comments: vec![],
31042 double_colon_syntax: false,
31043 format: None,
31044 default: None,
31045 })),
31046 other => other,
31047 }
31048 }
31049
31050 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31051 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31052 use crate::expressions::{Cast, DataType, Literal};
31053 match expr {
31054 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31055 this: Expression::Literal(Literal::String(s)),
31056 to: DataType::Timestamp {
31057 timezone: true,
31058 precision: None,
31059 },
31060 trailing_comments: vec![],
31061 double_colon_syntax: false,
31062 format: None,
31063 default: None,
31064 })),
31065 other => other,
31066 }
31067 }
31068
31069 /// Convert BigQuery format string to Snowflake format string
31070 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31071 use crate::expressions::Literal;
31072 if let Expression::Literal(Literal::String(s)) = format_expr {
31073 let sf = s
31074 .replace("%Y", "yyyy")
31075 .replace("%m", "mm")
31076 .replace("%d", "DD")
31077 .replace("%H", "HH24")
31078 .replace("%M", "MI")
31079 .replace("%S", "SS")
31080 .replace("%b", "mon")
31081 .replace("%B", "Month")
31082 .replace("%e", "FMDD");
31083 Expression::Literal(Literal::String(sf))
31084 } else {
31085 format_expr.clone()
31086 }
31087 }
31088
31089 /// Convert BigQuery format string to DuckDB format string
31090 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31091 use crate::expressions::Literal;
31092 if let Expression::Literal(Literal::String(s)) = format_expr {
31093 let duck = s
31094 .replace("%T", "%H:%M:%S")
31095 .replace("%F", "%Y-%m-%d")
31096 .replace("%D", "%m/%d/%y")
31097 .replace("%x", "%m/%d/%y")
31098 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31099 .replace("%e", "%-d")
31100 .replace("%E6S", "%S.%f");
31101 Expression::Literal(Literal::String(duck))
31102 } else {
31103 format_expr.clone()
31104 }
31105 }
31106
31107 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31108 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31109 use crate::expressions::Literal;
31110 if let Expression::Literal(Literal::String(s)) = format_expr {
31111 // Replace format elements from longest to shortest to avoid partial matches
31112 let result = s
31113 .replace("YYYYMMDD", "%Y%m%d")
31114 .replace("YYYY", "%Y")
31115 .replace("YY", "%y")
31116 .replace("MONTH", "%B")
31117 .replace("MON", "%b")
31118 .replace("MM", "%m")
31119 .replace("DD", "%d")
31120 .replace("HH24", "%H")
31121 .replace("HH12", "%I")
31122 .replace("HH", "%I")
31123 .replace("MI", "%M")
31124 .replace("SSTZH", "%S%z")
31125 .replace("SS", "%S")
31126 .replace("TZH", "%z");
31127 Expression::Literal(Literal::String(result))
31128 } else {
31129 format_expr.clone()
31130 }
31131 }
31132
31133 /// Normalize BigQuery format strings for BQ->BQ output
31134 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31135 use crate::expressions::Literal;
31136 if let Expression::Literal(Literal::String(s)) = format_expr {
31137 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31138 Expression::Literal(Literal::String(norm))
31139 } else {
31140 format_expr.clone()
31141 }
31142 }
31143}
31144
31145#[cfg(test)]
31146mod tests {
31147 use super::*;
31148
31149 #[test]
31150 fn test_dialect_type_from_str() {
31151 assert_eq!(
31152 "postgres".parse::<DialectType>().unwrap(),
31153 DialectType::PostgreSQL
31154 );
31155 assert_eq!(
31156 "postgresql".parse::<DialectType>().unwrap(),
31157 DialectType::PostgreSQL
31158 );
31159 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31160 assert_eq!(
31161 "bigquery".parse::<DialectType>().unwrap(),
31162 DialectType::BigQuery
31163 );
31164 }
31165
31166 #[test]
31167 fn test_basic_transpile() {
31168 let dialect = Dialect::get(DialectType::Generic);
31169 let result = dialect
31170 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31171 .unwrap();
31172 assert_eq!(result.len(), 1);
31173 assert_eq!(result[0], "SELECT 1");
31174 }
31175
31176 #[test]
31177 fn test_function_transformation_mysql() {
31178 // NVL should be transformed to IFNULL in MySQL
31179 let dialect = Dialect::get(DialectType::Generic);
31180 let result = dialect
31181 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31182 .unwrap();
31183 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31184 }
31185
31186 #[test]
31187 fn test_get_path_duckdb() {
31188 // Test: step by step
31189 let snowflake = Dialect::get(DialectType::Snowflake);
31190
31191 // Step 1: Parse and check what Snowflake produces as intermediate
31192 let result_sf_sf = snowflake
31193 .transpile_to(
31194 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31195 DialectType::Snowflake,
31196 )
31197 .unwrap();
31198 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31199
31200 // Step 2: DuckDB target
31201 let result_sf_dk = snowflake
31202 .transpile_to(
31203 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31204 DialectType::DuckDB,
31205 )
31206 .unwrap();
31207 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31208
31209 // Step 3: GET_PATH directly
31210 let result_gp = snowflake
31211 .transpile_to(
31212 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31213 DialectType::DuckDB,
31214 )
31215 .unwrap();
31216 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31217 }
31218
31219 #[test]
31220 fn test_function_transformation_postgres() {
31221 // IFNULL should be transformed to COALESCE in PostgreSQL
31222 let dialect = Dialect::get(DialectType::Generic);
31223 let result = dialect
31224 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31225 .unwrap();
31226 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31227
31228 // NVL should also be transformed to COALESCE
31229 let result = dialect
31230 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31231 .unwrap();
31232 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31233 }
31234
31235 #[test]
31236 fn test_hive_cast_to_trycast() {
31237 // Hive CAST should become TRY_CAST for targets that support it
31238 let hive = Dialect::get(DialectType::Hive);
31239 let result = hive
31240 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31241 .unwrap();
31242 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31243
31244 let result = hive
31245 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31246 .unwrap();
31247 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31248 }
31249
31250 #[test]
31251 fn test_hive_array_identity() {
31252 // Hive ARRAY<DATE> should preserve angle bracket syntax
31253 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31254 let hive = Dialect::get(DialectType::Hive);
31255
31256 // Test via transpile_to (this works)
31257 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31258 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31259 assert!(
31260 result[0].contains("ARRAY<DATE>"),
31261 "transpile_to: Expected ARRAY<DATE>, got: {}",
31262 result[0]
31263 );
31264
31265 // Test via parse -> transform -> generate (identity test path)
31266 let ast = hive.parse(sql).unwrap();
31267 let transformed = hive.transform(ast[0].clone()).unwrap();
31268 let output = hive.generate(&transformed).unwrap();
31269 eprintln!("Hive ARRAY via identity path: {}", output);
31270 assert!(
31271 output.contains("ARRAY<DATE>"),
31272 "identity path: Expected ARRAY<DATE>, got: {}",
31273 output
31274 );
31275 }
31276
31277 #[test]
31278 fn test_starrocks_delete_between_expansion() {
31279 // StarRocks doesn't support BETWEEN in DELETE statements
31280 let dialect = Dialect::get(DialectType::Generic);
31281
31282 // BETWEEN should be expanded to >= AND <= in DELETE
31283 let result = dialect
31284 .transpile_to(
31285 "DELETE FROM t WHERE a BETWEEN b AND c",
31286 DialectType::StarRocks,
31287 )
31288 .unwrap();
31289 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31290
31291 // NOT BETWEEN should be expanded to < OR > in DELETE
31292 let result = dialect
31293 .transpile_to(
31294 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31295 DialectType::StarRocks,
31296 )
31297 .unwrap();
31298 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31299
31300 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31301 let result = dialect
31302 .transpile_to(
31303 "SELECT * FROM t WHERE a BETWEEN b AND c",
31304 DialectType::StarRocks,
31305 )
31306 .unwrap();
31307 assert!(
31308 result[0].contains("BETWEEN"),
31309 "BETWEEN should be preserved in SELECT"
31310 );
31311 }
31312
31313 #[test]
31314 fn test_snowflake_ltrim_rtrim_parse() {
31315 let sf = Dialect::get(DialectType::Snowflake);
31316 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31317 let result = sf.transpile_to(sql, DialectType::DuckDB);
31318 match &result {
31319 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31320 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31321 }
31322 assert!(
31323 result.is_ok(),
31324 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31325 result.err()
31326 );
31327 }
31328
31329 #[test]
31330 fn test_duckdb_count_if_parse() {
31331 let duck = Dialect::get(DialectType::DuckDB);
31332 let sql = "COUNT_IF(x)";
31333 let result = duck.transpile_to(sql, DialectType::DuckDB);
31334 match &result {
31335 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31336 Err(e) => eprintln!("COUNT_IF error: {}", e),
31337 }
31338 assert!(
31339 result.is_ok(),
31340 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31341 result.err()
31342 );
31343 }
31344
31345 #[test]
31346 fn test_tsql_cast_tinyint_parse() {
31347 let tsql = Dialect::get(DialectType::TSQL);
31348 let sql = "CAST(X AS TINYINT)";
31349 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31350 match &result {
31351 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31352 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31353 }
31354 assert!(
31355 result.is_ok(),
31356 "Expected successful transpile, got error: {:?}",
31357 result.err()
31358 );
31359 }
31360
31361 #[test]
31362 fn test_pg_hash_bitwise_xor() {
31363 let dialect = Dialect::get(DialectType::PostgreSQL);
31364 let result = dialect
31365 .transpile_to("x # y", DialectType::PostgreSQL)
31366 .unwrap();
31367 assert_eq!(result[0], "x # y");
31368 }
31369
31370 #[test]
31371 fn test_pg_array_to_duckdb() {
31372 let dialect = Dialect::get(DialectType::PostgreSQL);
31373 let result = dialect
31374 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31375 .unwrap();
31376 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31377 }
31378
31379 #[test]
31380 fn test_array_remove_bigquery() {
31381 let dialect = Dialect::get(DialectType::Generic);
31382 let result = dialect
31383 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31384 .unwrap();
31385 assert_eq!(
31386 result[0],
31387 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31388 );
31389 }
31390
31391 #[test]
31392 fn test_map_clickhouse_case() {
31393 let dialect = Dialect::get(DialectType::Generic);
31394 let parsed = dialect
31395 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31396 .unwrap();
31397 eprintln!("MAP parsed: {:?}", parsed);
31398 let result = dialect
31399 .transpile_to(
31400 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31401 DialectType::ClickHouse,
31402 )
31403 .unwrap();
31404 eprintln!("MAP result: {}", result[0]);
31405 }
31406
31407 #[test]
31408 fn test_generate_date_array_presto() {
31409 let dialect = Dialect::get(DialectType::Generic);
31410 let result = dialect.transpile_to(
31411 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31412 DialectType::Presto,
31413 ).unwrap();
31414 eprintln!("GDA -> Presto: {}", result[0]);
31415 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31416 }
31417
31418 #[test]
31419 fn test_generate_date_array_postgres() {
31420 let dialect = Dialect::get(DialectType::Generic);
31421 let result = dialect.transpile_to(
31422 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31423 DialectType::PostgreSQL,
31424 ).unwrap();
31425 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31426 }
31427
31428 #[test]
31429 fn test_generate_date_array_snowflake() {
31430 let dialect = Dialect::get(DialectType::Generic);
31431 let result = dialect.transpile_to(
31432 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31433 DialectType::Snowflake,
31434 ).unwrap();
31435 eprintln!("GDA -> Snowflake: {}", result[0]);
31436 }
31437
31438 #[test]
31439 fn test_array_length_generate_date_array_snowflake() {
31440 let dialect = Dialect::get(DialectType::Generic);
31441 let result = dialect.transpile_to(
31442 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31443 DialectType::Snowflake,
31444 ).unwrap();
31445 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31446 }
31447
31448 #[test]
31449 fn test_generate_date_array_mysql() {
31450 let dialect = Dialect::get(DialectType::Generic);
31451 let result = dialect.transpile_to(
31452 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31453 DialectType::MySQL,
31454 ).unwrap();
31455 eprintln!("GDA -> MySQL: {}", result[0]);
31456 }
31457
31458 #[test]
31459 fn test_generate_date_array_redshift() {
31460 let dialect = Dialect::get(DialectType::Generic);
31461 let result = dialect.transpile_to(
31462 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31463 DialectType::Redshift,
31464 ).unwrap();
31465 eprintln!("GDA -> Redshift: {}", result[0]);
31466 }
31467
31468 #[test]
31469 fn test_generate_date_array_tsql() {
31470 let dialect = Dialect::get(DialectType::Generic);
31471 let result = dialect.transpile_to(
31472 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31473 DialectType::TSQL,
31474 ).unwrap();
31475 eprintln!("GDA -> TSQL: {}", result[0]);
31476 }
31477
31478 #[test]
31479 fn test_struct_colon_syntax() {
31480 let dialect = Dialect::get(DialectType::Generic);
31481 // Test without colon first
31482 let result = dialect.transpile_to(
31483 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
31484 DialectType::ClickHouse,
31485 );
31486 match result {
31487 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
31488 Err(e) => eprintln!("STRUCT no colon error: {}", e),
31489 }
31490 // Now test with colon
31491 let result = dialect.transpile_to(
31492 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
31493 DialectType::ClickHouse,
31494 );
31495 match result {
31496 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
31497 Err(e) => eprintln!("STRUCT colon error: {}", e),
31498 }
31499 }
31500
31501 #[test]
31502 fn test_generate_date_array_cte_wrapped_mysql() {
31503 let dialect = Dialect::get(DialectType::Generic);
31504 let result = dialect.transpile_to(
31505 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31506 DialectType::MySQL,
31507 ).unwrap();
31508 eprintln!("GDA CTE -> MySQL: {}", result[0]);
31509 }
31510
31511 #[test]
31512 fn test_generate_date_array_cte_wrapped_tsql() {
31513 let dialect = Dialect::get(DialectType::Generic);
31514 let result = dialect.transpile_to(
31515 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31516 DialectType::TSQL,
31517 ).unwrap();
31518 eprintln!("GDA CTE -> TSQL: {}", result[0]);
31519 }
31520
31521 #[test]
31522 fn test_decode_literal_no_null_check() {
31523 // Oracle DECODE with all literals should produce simple equality, no IS NULL
31524 let dialect = Dialect::get(DialectType::Oracle);
31525 let result = dialect
31526 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
31527 .unwrap();
31528 assert_eq!(
31529 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
31530 "Literal DECODE should not have IS NULL checks"
31531 );
31532 }
31533
31534 #[test]
31535 fn test_decode_column_vs_literal_no_null_check() {
31536 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
31537 let dialect = Dialect::get(DialectType::Oracle);
31538 let result = dialect
31539 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
31540 .unwrap();
31541 assert_eq!(
31542 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
31543 "Column vs literal DECODE should not have IS NULL checks"
31544 );
31545 }
31546
31547 #[test]
31548 fn test_decode_column_vs_column_keeps_null_check() {
31549 // Oracle DECODE with column vs column should keep null-safe comparison
31550 let dialect = Dialect::get(DialectType::Oracle);
31551 let result = dialect
31552 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
31553 .unwrap();
31554 assert!(
31555 result[0].contains("IS NULL"),
31556 "Column vs column DECODE should have IS NULL checks, got: {}",
31557 result[0]
31558 );
31559 }
31560
31561 #[test]
31562 fn test_decode_null_search() {
31563 // Oracle DECODE with NULL search should use IS NULL
31564 let dialect = Dialect::get(DialectType::Oracle);
31565 let result = dialect
31566 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
31567 .unwrap();
31568 assert_eq!(
31569 result[0],
31570 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
31571 );
31572 }
31573}