polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 }))
558 }};
559 }
560
561 // First recursively transform children, then apply the transform function
562 let expr = match expr {
563 Expression::Select(mut select) => {
564 select.expressions = select
565 .expressions
566 .into_iter()
567 .map(|e| transform_recursive(e, transform_fn))
568 .collect::<Result<Vec<_>>>()?;
569
570 // Transform FROM clause
571 if let Some(mut from) = select.from.take() {
572 from.expressions = from
573 .expressions
574 .into_iter()
575 .map(|e| transform_recursive(e, transform_fn))
576 .collect::<Result<Vec<_>>>()?;
577 select.from = Some(from);
578 }
579
580 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
581 select.joins = select
582 .joins
583 .into_iter()
584 .map(|mut join| {
585 join.this = transform_recursive(join.this, transform_fn)?;
586 if let Some(on) = join.on.take() {
587 join.on = Some(transform_recursive(on, transform_fn)?);
588 }
589 // Wrap join in Expression::Join to allow transform_fn to transform it
590 match transform_fn(Expression::Join(Box::new(join)))? {
591 Expression::Join(j) => Ok(*j),
592 _ => Err(crate::error::Error::parse(
593 "Join transformation returned non-join expression",
594 0,
595 0,
596 0,
597 0,
598 )),
599 }
600 })
601 .collect::<Result<Vec<_>>>()?;
602
603 // Transform LATERAL VIEW expressions (Hive/Spark)
604 select.lateral_views = select
605 .lateral_views
606 .into_iter()
607 .map(|mut lv| {
608 lv.this = transform_recursive(lv.this, transform_fn)?;
609 Ok(lv)
610 })
611 .collect::<Result<Vec<_>>>()?;
612
613 // Transform WHERE clause
614 if let Some(mut where_clause) = select.where_clause.take() {
615 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
616 select.where_clause = Some(where_clause);
617 }
618
619 // Transform GROUP BY
620 if let Some(mut group_by) = select.group_by.take() {
621 group_by.expressions = group_by
622 .expressions
623 .into_iter()
624 .map(|e| transform_recursive(e, transform_fn))
625 .collect::<Result<Vec<_>>>()?;
626 select.group_by = Some(group_by);
627 }
628
629 // Transform HAVING
630 if let Some(mut having) = select.having.take() {
631 having.this = transform_recursive(having.this, transform_fn)?;
632 select.having = Some(having);
633 }
634
635 // Transform WITH (CTEs)
636 if let Some(mut with) = select.with.take() {
637 with.ctes = with
638 .ctes
639 .into_iter()
640 .map(|mut cte| {
641 let original = cte.this.clone();
642 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
643 cte
644 })
645 .collect();
646 select.with = Some(with);
647 }
648
649 // Transform ORDER BY
650 if let Some(mut order) = select.order_by.take() {
651 order.expressions = order
652 .expressions
653 .into_iter()
654 .map(|o| {
655 let mut o = o;
656 let original = o.this.clone();
657 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
658 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
659 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
660 Ok(Expression::Ordered(transformed)) => *transformed,
661 Ok(_) | Err(_) => o,
662 }
663 })
664 .collect();
665 select.order_by = Some(order);
666 }
667
668 // Transform WINDOW clause order_by
669 if let Some(ref mut windows) = select.windows {
670 for nw in windows.iter_mut() {
671 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
672 .into_iter()
673 .map(|o| {
674 let mut o = o;
675 let original = o.this.clone();
676 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
677 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
678 Ok(Expression::Ordered(transformed)) => *transformed,
679 Ok(_) | Err(_) => o,
680 }
681 })
682 .collect();
683 }
684 }
685
686 // Transform QUALIFY
687 if let Some(mut qual) = select.qualify.take() {
688 qual.this = transform_recursive(qual.this, transform_fn)?;
689 select.qualify = Some(qual);
690 }
691
692 Expression::Select(select)
693 }
694 Expression::Function(mut f) => {
695 f.args = f
696 .args
697 .into_iter()
698 .map(|e| transform_recursive(e, transform_fn))
699 .collect::<Result<Vec<_>>>()?;
700 Expression::Function(f)
701 }
702 Expression::AggregateFunction(mut f) => {
703 f.args = f
704 .args
705 .into_iter()
706 .map(|e| transform_recursive(e, transform_fn))
707 .collect::<Result<Vec<_>>>()?;
708 if let Some(filter) = f.filter {
709 f.filter = Some(transform_recursive(filter, transform_fn)?);
710 }
711 Expression::AggregateFunction(f)
712 }
713 Expression::WindowFunction(mut wf) => {
714 wf.this = transform_recursive(wf.this, transform_fn)?;
715 wf.over.partition_by = wf
716 .over
717 .partition_by
718 .into_iter()
719 .map(|e| transform_recursive(e, transform_fn))
720 .collect::<Result<Vec<_>>>()?;
721 // Transform order_by items through Expression::Ordered wrapper
722 wf.over.order_by = wf
723 .over
724 .order_by
725 .into_iter()
726 .map(|o| {
727 let mut o = o;
728 o.this = transform_recursive(o.this, transform_fn)?;
729 match transform_fn(Expression::Ordered(Box::new(o)))? {
730 Expression::Ordered(transformed) => Ok(*transformed),
731 _ => Err(crate::error::Error::parse(
732 "Ordered transformation returned non-Ordered expression",
733 0,
734 0,
735 0,
736 0,
737 )),
738 }
739 })
740 .collect::<Result<Vec<_>>>()?;
741 Expression::WindowFunction(wf)
742 }
743 Expression::Alias(mut a) => {
744 a.this = transform_recursive(a.this, transform_fn)?;
745 Expression::Alias(a)
746 }
747 Expression::Cast(mut c) => {
748 c.this = transform_recursive(c.this, transform_fn)?;
749 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
750 c.to = transform_data_type_recursive(c.to, transform_fn)?;
751 Expression::Cast(c)
752 }
753 Expression::And(op) => transform_binary!(And, *op),
754 Expression::Or(op) => transform_binary!(Or, *op),
755 Expression::Add(op) => transform_binary!(Add, *op),
756 Expression::Sub(op) => transform_binary!(Sub, *op),
757 Expression::Mul(op) => transform_binary!(Mul, *op),
758 Expression::Div(op) => transform_binary!(Div, *op),
759 Expression::Eq(op) => transform_binary!(Eq, *op),
760 Expression::Lt(op) => transform_binary!(Lt, *op),
761 Expression::Gt(op) => transform_binary!(Gt, *op),
762 Expression::Paren(mut p) => {
763 p.this = transform_recursive(p.this, transform_fn)?;
764 Expression::Paren(p)
765 }
766 Expression::Coalesce(mut f) => {
767 f.expressions = f
768 .expressions
769 .into_iter()
770 .map(|e| transform_recursive(e, transform_fn))
771 .collect::<Result<Vec<_>>>()?;
772 Expression::Coalesce(f)
773 }
774 Expression::IfNull(mut f) => {
775 f.this = transform_recursive(f.this, transform_fn)?;
776 f.expression = transform_recursive(f.expression, transform_fn)?;
777 Expression::IfNull(f)
778 }
779 Expression::Nvl(mut f) => {
780 f.this = transform_recursive(f.this, transform_fn)?;
781 f.expression = transform_recursive(f.expression, transform_fn)?;
782 Expression::Nvl(f)
783 }
784 Expression::In(mut i) => {
785 i.this = transform_recursive(i.this, transform_fn)?;
786 i.expressions = i
787 .expressions
788 .into_iter()
789 .map(|e| transform_recursive(e, transform_fn))
790 .collect::<Result<Vec<_>>>()?;
791 if let Some(query) = i.query {
792 i.query = Some(transform_recursive(query, transform_fn)?);
793 }
794 Expression::In(i)
795 }
796 Expression::Not(mut n) => {
797 n.this = transform_recursive(n.this, transform_fn)?;
798 Expression::Not(n)
799 }
800 Expression::ArraySlice(mut s) => {
801 s.this = transform_recursive(s.this, transform_fn)?;
802 if let Some(start) = s.start {
803 s.start = Some(transform_recursive(start, transform_fn)?);
804 }
805 if let Some(end) = s.end {
806 s.end = Some(transform_recursive(end, transform_fn)?);
807 }
808 Expression::ArraySlice(s)
809 }
810 Expression::Subscript(mut s) => {
811 s.this = transform_recursive(s.this, transform_fn)?;
812 s.index = transform_recursive(s.index, transform_fn)?;
813 Expression::Subscript(s)
814 }
815 Expression::Array(mut a) => {
816 a.expressions = a
817 .expressions
818 .into_iter()
819 .map(|e| transform_recursive(e, transform_fn))
820 .collect::<Result<Vec<_>>>()?;
821 Expression::Array(a)
822 }
823 Expression::Struct(mut s) => {
824 let mut new_fields = Vec::new();
825 for (name, expr) in s.fields {
826 let transformed = transform_recursive(expr, transform_fn)?;
827 new_fields.push((name, transformed));
828 }
829 s.fields = new_fields;
830 Expression::Struct(s)
831 }
832 Expression::NamedArgument(mut na) => {
833 na.value = transform_recursive(na.value, transform_fn)?;
834 Expression::NamedArgument(na)
835 }
836 Expression::MapFunc(mut m) => {
837 m.keys = m
838 .keys
839 .into_iter()
840 .map(|e| transform_recursive(e, transform_fn))
841 .collect::<Result<Vec<_>>>()?;
842 m.values = m
843 .values
844 .into_iter()
845 .map(|e| transform_recursive(e, transform_fn))
846 .collect::<Result<Vec<_>>>()?;
847 Expression::MapFunc(m)
848 }
849 Expression::ArrayFunc(mut a) => {
850 a.expressions = a
851 .expressions
852 .into_iter()
853 .map(|e| transform_recursive(e, transform_fn))
854 .collect::<Result<Vec<_>>>()?;
855 Expression::ArrayFunc(a)
856 }
857 Expression::Lambda(mut l) => {
858 l.body = transform_recursive(l.body, transform_fn)?;
859 Expression::Lambda(l)
860 }
861 Expression::JsonExtract(mut f) => {
862 f.this = transform_recursive(f.this, transform_fn)?;
863 f.path = transform_recursive(f.path, transform_fn)?;
864 Expression::JsonExtract(f)
865 }
866 Expression::JsonExtractScalar(mut f) => {
867 f.this = transform_recursive(f.this, transform_fn)?;
868 f.path = transform_recursive(f.path, transform_fn)?;
869 Expression::JsonExtractScalar(f)
870 }
871
872 // ===== UnaryFunc-based expressions =====
873 // These all have a single `this: Expression` child
874 Expression::Length(mut f) => {
875 f.this = transform_recursive(f.this, transform_fn)?;
876 Expression::Length(f)
877 }
878 Expression::Upper(mut f) => {
879 f.this = transform_recursive(f.this, transform_fn)?;
880 Expression::Upper(f)
881 }
882 Expression::Lower(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 Expression::Lower(f)
885 }
886 Expression::LTrim(mut f) => {
887 f.this = transform_recursive(f.this, transform_fn)?;
888 Expression::LTrim(f)
889 }
890 Expression::RTrim(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::RTrim(f)
893 }
894 Expression::Reverse(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Reverse(f)
897 }
898 Expression::Abs(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Abs(f)
901 }
902 Expression::Ceil(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::Ceil(f)
905 }
906 Expression::Floor(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::Floor(f)
909 }
910 Expression::Sign(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Sign(f)
913 }
914 Expression::Sqrt(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Sqrt(f)
917 }
918 Expression::Cbrt(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Cbrt(f)
921 }
922 Expression::Ln(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Ln(f)
925 }
926 Expression::Log(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 if let Some(base) = f.base {
929 f.base = Some(transform_recursive(base, transform_fn)?);
930 }
931 Expression::Log(f)
932 }
933 Expression::Exp(mut f) => {
934 f.this = transform_recursive(f.this, transform_fn)?;
935 Expression::Exp(f)
936 }
937 Expression::Date(mut f) => {
938 f.this = transform_recursive(f.this, transform_fn)?;
939 Expression::Date(f)
940 }
941 Expression::Stddev(mut f) => {
942 f.this = transform_recursive(f.this, transform_fn)?;
943 Expression::Stddev(f)
944 }
945 Expression::Variance(mut f) => {
946 f.this = transform_recursive(f.this, transform_fn)?;
947 Expression::Variance(f)
948 }
949
950 // ===== BinaryFunc-based expressions =====
951 Expression::ModFunc(mut f) => {
952 f.this = transform_recursive(f.this, transform_fn)?;
953 f.expression = transform_recursive(f.expression, transform_fn)?;
954 Expression::ModFunc(f)
955 }
956 Expression::Power(mut f) => {
957 f.this = transform_recursive(f.this, transform_fn)?;
958 f.expression = transform_recursive(f.expression, transform_fn)?;
959 Expression::Power(f)
960 }
961 Expression::MapFromArrays(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 f.expression = transform_recursive(f.expression, transform_fn)?;
964 Expression::MapFromArrays(f)
965 }
966 Expression::ElementAt(mut f) => {
967 f.this = transform_recursive(f.this, transform_fn)?;
968 f.expression = transform_recursive(f.expression, transform_fn)?;
969 Expression::ElementAt(f)
970 }
971 Expression::MapContainsKey(mut f) => {
972 f.this = transform_recursive(f.this, transform_fn)?;
973 f.expression = transform_recursive(f.expression, transform_fn)?;
974 Expression::MapContainsKey(f)
975 }
976 Expression::Left(mut f) => {
977 f.this = transform_recursive(f.this, transform_fn)?;
978 f.length = transform_recursive(f.length, transform_fn)?;
979 Expression::Left(f)
980 }
981 Expression::Right(mut f) => {
982 f.this = transform_recursive(f.this, transform_fn)?;
983 f.length = transform_recursive(f.length, transform_fn)?;
984 Expression::Right(f)
985 }
986 Expression::Repeat(mut f) => {
987 f.this = transform_recursive(f.this, transform_fn)?;
988 f.times = transform_recursive(f.times, transform_fn)?;
989 Expression::Repeat(f)
990 }
991
992 // ===== Complex function expressions =====
993 Expression::Substring(mut f) => {
994 f.this = transform_recursive(f.this, transform_fn)?;
995 f.start = transform_recursive(f.start, transform_fn)?;
996 if let Some(len) = f.length {
997 f.length = Some(transform_recursive(len, transform_fn)?);
998 }
999 Expression::Substring(f)
1000 }
1001 Expression::Replace(mut f) => {
1002 f.this = transform_recursive(f.this, transform_fn)?;
1003 f.old = transform_recursive(f.old, transform_fn)?;
1004 f.new = transform_recursive(f.new, transform_fn)?;
1005 Expression::Replace(f)
1006 }
1007 Expression::ConcatWs(mut f) => {
1008 f.separator = transform_recursive(f.separator, transform_fn)?;
1009 f.expressions = f
1010 .expressions
1011 .into_iter()
1012 .map(|e| transform_recursive(e, transform_fn))
1013 .collect::<Result<Vec<_>>>()?;
1014 Expression::ConcatWs(f)
1015 }
1016 Expression::Trim(mut f) => {
1017 f.this = transform_recursive(f.this, transform_fn)?;
1018 if let Some(chars) = f.characters {
1019 f.characters = Some(transform_recursive(chars, transform_fn)?);
1020 }
1021 Expression::Trim(f)
1022 }
1023 Expression::Split(mut f) => {
1024 f.this = transform_recursive(f.this, transform_fn)?;
1025 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1026 Expression::Split(f)
1027 }
1028 Expression::Lpad(mut f) => {
1029 f.this = transform_recursive(f.this, transform_fn)?;
1030 f.length = transform_recursive(f.length, transform_fn)?;
1031 if let Some(fill) = f.fill {
1032 f.fill = Some(transform_recursive(fill, transform_fn)?);
1033 }
1034 Expression::Lpad(f)
1035 }
1036 Expression::Rpad(mut f) => {
1037 f.this = transform_recursive(f.this, transform_fn)?;
1038 f.length = transform_recursive(f.length, transform_fn)?;
1039 if let Some(fill) = f.fill {
1040 f.fill = Some(transform_recursive(fill, transform_fn)?);
1041 }
1042 Expression::Rpad(f)
1043 }
1044
1045 // ===== Conditional expressions =====
1046 Expression::Case(mut c) => {
1047 if let Some(operand) = c.operand {
1048 c.operand = Some(transform_recursive(operand, transform_fn)?);
1049 }
1050 c.whens = c
1051 .whens
1052 .into_iter()
1053 .map(|(cond, then)| {
1054 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1055 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1056 (new_cond, new_then)
1057 })
1058 .collect();
1059 if let Some(else_expr) = c.else_ {
1060 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1061 }
1062 Expression::Case(c)
1063 }
1064 Expression::IfFunc(mut f) => {
1065 f.condition = transform_recursive(f.condition, transform_fn)?;
1066 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1067 if let Some(false_val) = f.false_value {
1068 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1069 }
1070 Expression::IfFunc(f)
1071 }
1072
1073 // ===== Date/Time expressions =====
1074 Expression::DateAdd(mut f) => {
1075 f.this = transform_recursive(f.this, transform_fn)?;
1076 f.interval = transform_recursive(f.interval, transform_fn)?;
1077 Expression::DateAdd(f)
1078 }
1079 Expression::DateSub(mut f) => {
1080 f.this = transform_recursive(f.this, transform_fn)?;
1081 f.interval = transform_recursive(f.interval, transform_fn)?;
1082 Expression::DateSub(f)
1083 }
1084 Expression::DateDiff(mut f) => {
1085 f.this = transform_recursive(f.this, transform_fn)?;
1086 f.expression = transform_recursive(f.expression, transform_fn)?;
1087 Expression::DateDiff(f)
1088 }
1089 Expression::DateTrunc(mut f) => {
1090 f.this = transform_recursive(f.this, transform_fn)?;
1091 Expression::DateTrunc(f)
1092 }
1093 Expression::Extract(mut f) => {
1094 f.this = transform_recursive(f.this, transform_fn)?;
1095 Expression::Extract(f)
1096 }
1097
1098 // ===== JSON expressions =====
1099 Expression::JsonObject(mut f) => {
1100 f.pairs = f
1101 .pairs
1102 .into_iter()
1103 .map(|(k, v)| {
1104 let new_k = transform_recursive(k, transform_fn)?;
1105 let new_v = transform_recursive(v, transform_fn)?;
1106 Ok((new_k, new_v))
1107 })
1108 .collect::<Result<Vec<_>>>()?;
1109 Expression::JsonObject(f)
1110 }
1111
1112 // ===== Subquery expressions =====
1113 Expression::Subquery(mut s) => {
1114 s.this = transform_recursive(s.this, transform_fn)?;
1115 Expression::Subquery(s)
1116 }
1117 Expression::Exists(mut e) => {
1118 e.this = transform_recursive(e.this, transform_fn)?;
1119 Expression::Exists(e)
1120 }
1121
1122 // ===== Set operations =====
1123 Expression::Union(mut u) => {
1124 u.left = transform_recursive(u.left, transform_fn)?;
1125 u.right = transform_recursive(u.right, transform_fn)?;
1126 Expression::Union(u)
1127 }
1128 Expression::Intersect(mut i) => {
1129 i.left = transform_recursive(i.left, transform_fn)?;
1130 i.right = transform_recursive(i.right, transform_fn)?;
1131 Expression::Intersect(i)
1132 }
1133 Expression::Except(mut e) => {
1134 e.left = transform_recursive(e.left, transform_fn)?;
1135 e.right = transform_recursive(e.right, transform_fn)?;
1136 Expression::Except(e)
1137 }
1138
1139 // ===== DML expressions =====
1140 Expression::Insert(mut ins) => {
1141 // Transform VALUES clause expressions
1142 let mut new_values = Vec::new();
1143 for row in ins.values {
1144 let mut new_row = Vec::new();
1145 for e in row {
1146 new_row.push(transform_recursive(e, transform_fn)?);
1147 }
1148 new_values.push(new_row);
1149 }
1150 ins.values = new_values;
1151
1152 // Transform query (for INSERT ... SELECT)
1153 if let Some(query) = ins.query {
1154 ins.query = Some(transform_recursive(query, transform_fn)?);
1155 }
1156
1157 // Transform RETURNING clause
1158 let mut new_returning = Vec::new();
1159 for e in ins.returning {
1160 new_returning.push(transform_recursive(e, transform_fn)?);
1161 }
1162 ins.returning = new_returning;
1163
1164 // Transform ON CONFLICT clause
1165 if let Some(on_conflict) = ins.on_conflict {
1166 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1167 }
1168
1169 Expression::Insert(ins)
1170 }
1171 Expression::Update(mut upd) => {
1172 upd.set = upd
1173 .set
1174 .into_iter()
1175 .map(|(id, val)| {
1176 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1177 (id, new_val)
1178 })
1179 .collect();
1180 if let Some(mut where_clause) = upd.where_clause.take() {
1181 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1182 upd.where_clause = Some(where_clause);
1183 }
1184 Expression::Update(upd)
1185 }
1186 Expression::Delete(mut del) => {
1187 if let Some(mut where_clause) = del.where_clause.take() {
1188 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1189 del.where_clause = Some(where_clause);
1190 }
1191 Expression::Delete(del)
1192 }
1193
1194 // ===== CTE expressions =====
1195 Expression::With(mut w) => {
1196 w.ctes = w
1197 .ctes
1198 .into_iter()
1199 .map(|mut cte| {
1200 let original = cte.this.clone();
1201 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1202 cte
1203 })
1204 .collect();
1205 Expression::With(w)
1206 }
1207 Expression::Cte(mut c) => {
1208 c.this = transform_recursive(c.this, transform_fn)?;
1209 Expression::Cte(c)
1210 }
1211
1212 // ===== Order expressions =====
1213 Expression::Ordered(mut o) => {
1214 o.this = transform_recursive(o.this, transform_fn)?;
1215 Expression::Ordered(o)
1216 }
1217
1218 // ===== Negation =====
1219 Expression::Neg(mut n) => {
1220 n.this = transform_recursive(n.this, transform_fn)?;
1221 Expression::Neg(n)
1222 }
1223
1224 // ===== Between =====
1225 Expression::Between(mut b) => {
1226 b.this = transform_recursive(b.this, transform_fn)?;
1227 b.low = transform_recursive(b.low, transform_fn)?;
1228 b.high = transform_recursive(b.high, transform_fn)?;
1229 Expression::Between(b)
1230 }
1231
1232 // ===== Like expressions =====
1233 Expression::Like(mut l) => {
1234 l.left = transform_recursive(l.left, transform_fn)?;
1235 l.right = transform_recursive(l.right, transform_fn)?;
1236 Expression::Like(l)
1237 }
1238 Expression::ILike(mut l) => {
1239 l.left = transform_recursive(l.left, transform_fn)?;
1240 l.right = transform_recursive(l.right, transform_fn)?;
1241 Expression::ILike(l)
1242 }
1243
1244 // ===== Additional binary ops not covered by macro =====
1245 Expression::Neq(op) => transform_binary!(Neq, *op),
1246 Expression::Lte(op) => transform_binary!(Lte, *op),
1247 Expression::Gte(op) => transform_binary!(Gte, *op),
1248 Expression::Mod(op) => transform_binary!(Mod, *op),
1249 Expression::Concat(op) => transform_binary!(Concat, *op),
1250 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1251 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1252 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1253 Expression::Is(op) => transform_binary!(Is, *op),
1254
1255 // ===== TryCast / SafeCast =====
1256 Expression::TryCast(mut c) => {
1257 c.this = transform_recursive(c.this, transform_fn)?;
1258 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1259 Expression::TryCast(c)
1260 }
1261 Expression::SafeCast(mut c) => {
1262 c.this = transform_recursive(c.this, transform_fn)?;
1263 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1264 Expression::SafeCast(c)
1265 }
1266
1267 // ===== Misc =====
1268 Expression::Unnest(mut f) => {
1269 f.this = transform_recursive(f.this, transform_fn)?;
1270 f.expressions = f
1271 .expressions
1272 .into_iter()
1273 .map(|e| transform_recursive(e, transform_fn))
1274 .collect::<Result<Vec<_>>>()?;
1275 Expression::Unnest(f)
1276 }
1277 Expression::Explode(mut f) => {
1278 f.this = transform_recursive(f.this, transform_fn)?;
1279 Expression::Explode(f)
1280 }
1281 Expression::GroupConcat(mut f) => {
1282 f.this = transform_recursive(f.this, transform_fn)?;
1283 Expression::GroupConcat(f)
1284 }
1285 Expression::StringAgg(mut f) => {
1286 f.this = transform_recursive(f.this, transform_fn)?;
1287 Expression::StringAgg(f)
1288 }
1289 Expression::ListAgg(mut f) => {
1290 f.this = transform_recursive(f.this, transform_fn)?;
1291 Expression::ListAgg(f)
1292 }
1293 Expression::ArrayAgg(mut f) => {
1294 f.this = transform_recursive(f.this, transform_fn)?;
1295 Expression::ArrayAgg(f)
1296 }
1297 Expression::ParseJson(mut f) => {
1298 f.this = transform_recursive(f.this, transform_fn)?;
1299 Expression::ParseJson(f)
1300 }
1301 Expression::ToJson(mut f) => {
1302 f.this = transform_recursive(f.this, transform_fn)?;
1303 Expression::ToJson(f)
1304 }
1305 Expression::JSONExtract(mut e) => {
1306 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1307 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1308 Expression::JSONExtract(e)
1309 }
1310 Expression::JSONExtractScalar(mut e) => {
1311 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1312 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1313 Expression::JSONExtractScalar(e)
1314 }
1315
1316 // StrToTime: recurse into this
1317 Expression::StrToTime(mut e) => {
1318 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1319 Expression::StrToTime(e)
1320 }
1321
1322 // UnixToTime: recurse into this
1323 Expression::UnixToTime(mut e) => {
1324 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1325 Expression::UnixToTime(e)
1326 }
1327
1328 // CreateTable: recurse into column defaults, on_update expressions, and data types
1329 Expression::CreateTable(mut ct) => {
1330 for col in &mut ct.columns {
1331 if let Some(default_expr) = col.default.take() {
1332 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1333 }
1334 if let Some(on_update_expr) = col.on_update.take() {
1335 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1336 }
1337 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1338 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1339 // contexts and may not produce correct results for DDL column definitions.
1340 // The DDL type mappings would need dedicated handling per source/target pair.
1341 }
1342 if let Some(as_select) = ct.as_select.take() {
1343 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1344 }
1345 Expression::CreateTable(ct)
1346 }
1347
1348 // CreateProcedure: recurse into body expressions
1349 Expression::CreateProcedure(mut cp) => {
1350 if let Some(body) = cp.body.take() {
1351 cp.body = Some(match body {
1352 FunctionBody::Expression(expr) => {
1353 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1354 }
1355 FunctionBody::Return(expr) => {
1356 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1357 }
1358 FunctionBody::Statements(stmts) => {
1359 let transformed_stmts = stmts
1360 .into_iter()
1361 .map(|s| transform_recursive(s, transform_fn))
1362 .collect::<Result<Vec<_>>>()?;
1363 FunctionBody::Statements(transformed_stmts)
1364 }
1365 other => other,
1366 });
1367 }
1368 Expression::CreateProcedure(cp)
1369 }
1370
1371 // CreateFunction: recurse into body expressions
1372 Expression::CreateFunction(mut cf) => {
1373 if let Some(body) = cf.body.take() {
1374 cf.body = Some(match body {
1375 FunctionBody::Expression(expr) => {
1376 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1377 }
1378 FunctionBody::Return(expr) => {
1379 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1380 }
1381 FunctionBody::Statements(stmts) => {
1382 let transformed_stmts = stmts
1383 .into_iter()
1384 .map(|s| transform_recursive(s, transform_fn))
1385 .collect::<Result<Vec<_>>>()?;
1386 FunctionBody::Statements(transformed_stmts)
1387 }
1388 other => other,
1389 });
1390 }
1391 Expression::CreateFunction(cf)
1392 }
1393
1394 // MemberOf: recurse into left and right operands
1395 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1396 // ArrayContainsAll (@>): recurse into left and right operands
1397 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1398 // ArrayContainedBy (<@): recurse into left and right operands
1399 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1400 // ArrayOverlaps (&&): recurse into left and right operands
1401 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1402 // TsMatch (@@): recurse into left and right operands
1403 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1404 // Adjacent (-|-): recurse into left and right operands
1405 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1406
1407 // Table: recurse into when (HistoricalData) and changes fields
1408 Expression::Table(mut t) => {
1409 if let Some(when) = t.when.take() {
1410 let transformed =
1411 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1412 if let Expression::HistoricalData(hd) = transformed {
1413 t.when = Some(hd);
1414 }
1415 }
1416 if let Some(changes) = t.changes.take() {
1417 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1418 if let Expression::Changes(c) = transformed {
1419 t.changes = Some(c);
1420 }
1421 }
1422 Expression::Table(t)
1423 }
1424
1425 // HistoricalData (Snowflake time travel): recurse into expression
1426 Expression::HistoricalData(mut hd) => {
1427 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1428 Expression::HistoricalData(hd)
1429 }
1430
1431 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1432 Expression::Changes(mut c) => {
1433 if let Some(at_before) = c.at_before.take() {
1434 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1435 }
1436 if let Some(end) = c.end.take() {
1437 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1438 }
1439 Expression::Changes(c)
1440 }
1441
1442 // TableArgument: TABLE(expr) or MODEL(expr)
1443 Expression::TableArgument(mut ta) => {
1444 ta.this = transform_recursive(ta.this, transform_fn)?;
1445 Expression::TableArgument(ta)
1446 }
1447
1448 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1449 Expression::JoinedTable(mut jt) => {
1450 jt.left = transform_recursive(jt.left, transform_fn)?;
1451 for join in &mut jt.joins {
1452 join.this = transform_recursive(
1453 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1454 transform_fn,
1455 )?;
1456 if let Some(on) = join.on.take() {
1457 join.on = Some(transform_recursive(on, transform_fn)?);
1458 }
1459 }
1460 jt.lateral_views = jt
1461 .lateral_views
1462 .into_iter()
1463 .map(|mut lv| {
1464 lv.this = transform_recursive(lv.this, transform_fn)?;
1465 Ok(lv)
1466 })
1467 .collect::<Result<Vec<_>>>()?;
1468 Expression::JoinedTable(jt)
1469 }
1470
1471 // Lateral: LATERAL func() - recurse into the function expression
1472 Expression::Lateral(mut lat) => {
1473 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1474 Expression::Lateral(lat)
1475 }
1476
1477 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1478 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1479 // as a unit together with the WithinGroup wrapper
1480 Expression::WithinGroup(mut wg) => {
1481 wg.order_by = wg
1482 .order_by
1483 .into_iter()
1484 .map(|mut o| {
1485 let original = o.this.clone();
1486 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1487 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1488 Ok(Expression::Ordered(transformed)) => *transformed,
1489 Ok(_) | Err(_) => o,
1490 }
1491 })
1492 .collect();
1493 Expression::WithinGroup(wg)
1494 }
1495
1496 // Filter: recurse into both the aggregate and the filter condition
1497 Expression::Filter(mut f) => {
1498 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1499 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1500 Expression::Filter(f)
1501 }
1502
1503 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1504 Expression::BitwiseOrAgg(mut f) => {
1505 f.this = transform_recursive(f.this, transform_fn)?;
1506 Expression::BitwiseOrAgg(f)
1507 }
1508 Expression::BitwiseAndAgg(mut f) => {
1509 f.this = transform_recursive(f.this, transform_fn)?;
1510 Expression::BitwiseAndAgg(f)
1511 }
1512 Expression::BitwiseXorAgg(mut f) => {
1513 f.this = transform_recursive(f.this, transform_fn)?;
1514 Expression::BitwiseXorAgg(f)
1515 }
1516 Expression::PipeOperator(mut pipe) => {
1517 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1518 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1519 Expression::PipeOperator(pipe)
1520 }
1521
1522 // Pass through leaf nodes unchanged
1523 other => other,
1524 };
1525
1526 // Then apply the transform function
1527 transform_fn(expr)
1528}
1529
1530/// Returns the tokenizer config, generator config, and expression transform closure
1531/// for a built-in dialect type. This is the shared implementation used by both
1532/// `Dialect::get()` and custom dialect construction.
1533fn configs_for_dialect_type(
1534 dt: DialectType,
1535) -> (
1536 TokenizerConfig,
1537 GeneratorConfig,
1538 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1539) {
1540 macro_rules! dialect_configs {
1541 ($dialect_struct:ident) => {{
1542 let d = $dialect_struct;
1543 (
1544 d.tokenizer_config(),
1545 d.generator_config(),
1546 Box::new(move |e| $dialect_struct.transform_expr(e)),
1547 )
1548 }};
1549 }
1550 match dt {
1551 #[cfg(feature = "dialect-postgresql")]
1552 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1553 #[cfg(feature = "dialect-mysql")]
1554 DialectType::MySQL => dialect_configs!(MySQLDialect),
1555 #[cfg(feature = "dialect-bigquery")]
1556 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1557 #[cfg(feature = "dialect-snowflake")]
1558 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1559 #[cfg(feature = "dialect-duckdb")]
1560 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1561 #[cfg(feature = "dialect-tsql")]
1562 DialectType::TSQL => dialect_configs!(TSQLDialect),
1563 #[cfg(feature = "dialect-oracle")]
1564 DialectType::Oracle => dialect_configs!(OracleDialect),
1565 #[cfg(feature = "dialect-hive")]
1566 DialectType::Hive => dialect_configs!(HiveDialect),
1567 #[cfg(feature = "dialect-spark")]
1568 DialectType::Spark => dialect_configs!(SparkDialect),
1569 #[cfg(feature = "dialect-sqlite")]
1570 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1571 #[cfg(feature = "dialect-presto")]
1572 DialectType::Presto => dialect_configs!(PrestoDialect),
1573 #[cfg(feature = "dialect-trino")]
1574 DialectType::Trino => dialect_configs!(TrinoDialect),
1575 #[cfg(feature = "dialect-redshift")]
1576 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1577 #[cfg(feature = "dialect-clickhouse")]
1578 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1579 #[cfg(feature = "dialect-databricks")]
1580 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1581 #[cfg(feature = "dialect-athena")]
1582 DialectType::Athena => dialect_configs!(AthenaDialect),
1583 #[cfg(feature = "dialect-teradata")]
1584 DialectType::Teradata => dialect_configs!(TeradataDialect),
1585 #[cfg(feature = "dialect-doris")]
1586 DialectType::Doris => dialect_configs!(DorisDialect),
1587 #[cfg(feature = "dialect-starrocks")]
1588 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1589 #[cfg(feature = "dialect-materialize")]
1590 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1591 #[cfg(feature = "dialect-risingwave")]
1592 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1593 #[cfg(feature = "dialect-singlestore")]
1594 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1595 #[cfg(feature = "dialect-cockroachdb")]
1596 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1597 #[cfg(feature = "dialect-tidb")]
1598 DialectType::TiDB => dialect_configs!(TiDBDialect),
1599 #[cfg(feature = "dialect-druid")]
1600 DialectType::Druid => dialect_configs!(DruidDialect),
1601 #[cfg(feature = "dialect-solr")]
1602 DialectType::Solr => dialect_configs!(SolrDialect),
1603 #[cfg(feature = "dialect-tableau")]
1604 DialectType::Tableau => dialect_configs!(TableauDialect),
1605 #[cfg(feature = "dialect-dune")]
1606 DialectType::Dune => dialect_configs!(DuneDialect),
1607 #[cfg(feature = "dialect-fabric")]
1608 DialectType::Fabric => dialect_configs!(FabricDialect),
1609 #[cfg(feature = "dialect-drill")]
1610 DialectType::Drill => dialect_configs!(DrillDialect),
1611 #[cfg(feature = "dialect-dremio")]
1612 DialectType::Dremio => dialect_configs!(DremioDialect),
1613 #[cfg(feature = "dialect-exasol")]
1614 DialectType::Exasol => dialect_configs!(ExasolDialect),
1615 #[cfg(feature = "dialect-datafusion")]
1616 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1617 _ => dialect_configs!(GenericDialect),
1618 }
1619}
1620
1621// ---------------------------------------------------------------------------
1622// Custom dialect registry
1623// ---------------------------------------------------------------------------
1624
1625static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1626 LazyLock::new(|| RwLock::new(HashMap::new()));
1627
1628struct CustomDialectConfig {
1629 name: String,
1630 base_dialect: DialectType,
1631 tokenizer_config: TokenizerConfig,
1632 generator_config: GeneratorConfig,
1633 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1634 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1635}
1636
1637/// Fluent builder for creating and registering custom SQL dialects.
1638///
1639/// A custom dialect is based on an existing built-in dialect and allows selective
1640/// overrides of tokenizer configuration, generator configuration, and expression
1641/// transforms.
1642///
1643/// # Example
1644///
1645/// ```rust,ignore
1646/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1647/// use polyglot_sql::generator::NormalizeFunctions;
1648///
1649/// CustomDialectBuilder::new("my_postgres")
1650/// .based_on(DialectType::PostgreSQL)
1651/// .generator_config_modifier(|gc| {
1652/// gc.normalize_functions = NormalizeFunctions::Lower;
1653/// })
1654/// .register()
1655/// .unwrap();
1656///
1657/// let d = Dialect::get_by_name("my_postgres").unwrap();
1658/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1659/// let sql = d.generate(&exprs[0]).unwrap();
1660/// assert_eq!(sql, "select count(*)");
1661///
1662/// polyglot_sql::unregister_custom_dialect("my_postgres");
1663/// ```
1664pub struct CustomDialectBuilder {
1665 name: String,
1666 base_dialect: DialectType,
1667 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1668 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1669 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1670 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1671}
1672
1673impl CustomDialectBuilder {
1674 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1675 pub fn new(name: impl Into<String>) -> Self {
1676 Self {
1677 name: name.into(),
1678 base_dialect: DialectType::Generic,
1679 tokenizer_modifier: None,
1680 generator_modifier: None,
1681 transform: None,
1682 preprocess: None,
1683 }
1684 }
1685
1686 /// Set the base built-in dialect to inherit configuration from.
1687 pub fn based_on(mut self, dialect: DialectType) -> Self {
1688 self.base_dialect = dialect;
1689 self
1690 }
1691
1692 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1693 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1694 where
1695 F: FnOnce(&mut TokenizerConfig) + 'static,
1696 {
1697 self.tokenizer_modifier = Some(Box::new(f));
1698 self
1699 }
1700
1701 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1702 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1703 where
1704 F: FnOnce(&mut GeneratorConfig) + 'static,
1705 {
1706 self.generator_modifier = Some(Box::new(f));
1707 self
1708 }
1709
1710 /// Set a custom per-node expression transform function.
1711 ///
1712 /// This replaces the base dialect's transform. It is called on every expression
1713 /// node during the recursive transform pass.
1714 pub fn transform_fn<F>(mut self, f: F) -> Self
1715 where
1716 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1717 {
1718 self.transform = Some(Arc::new(f));
1719 self
1720 }
1721
1722 /// Set a custom whole-tree preprocessing function.
1723 ///
1724 /// This replaces the base dialect's built-in preprocessing. It is called once
1725 /// on the entire expression tree before the recursive per-node transform.
1726 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1727 where
1728 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1729 {
1730 self.preprocess = Some(Arc::new(f));
1731 self
1732 }
1733
1734 /// Build the custom dialect configuration and register it in the global registry.
1735 ///
1736 /// Returns an error if:
1737 /// - The name collides with a built-in dialect name
1738 /// - A custom dialect with the same name is already registered
1739 pub fn register(self) -> Result<()> {
1740 // Reject names that collide with built-in dialects
1741 if DialectType::from_str(&self.name).is_ok() {
1742 return Err(crate::error::Error::parse(
1743 format!(
1744 "Cannot register custom dialect '{}': name collides with built-in dialect",
1745 self.name
1746 ),
1747 0,
1748 0,
1749 0,
1750 0,
1751 ));
1752 }
1753
1754 // Get base configs
1755 let (mut tok_config, mut gen_config, _base_transform) =
1756 configs_for_dialect_type(self.base_dialect);
1757
1758 // Apply modifiers
1759 if let Some(tok_mod) = self.tokenizer_modifier {
1760 tok_mod(&mut tok_config);
1761 }
1762 if let Some(gen_mod) = self.generator_modifier {
1763 gen_mod(&mut gen_config);
1764 }
1765
1766 let config = CustomDialectConfig {
1767 name: self.name.clone(),
1768 base_dialect: self.base_dialect,
1769 tokenizer_config: tok_config,
1770 generator_config: gen_config,
1771 transform: self.transform,
1772 preprocess: self.preprocess,
1773 };
1774
1775 register_custom_dialect(config)
1776 }
1777}
1778
1779use std::str::FromStr;
1780
1781fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1782 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1783 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1784 })?;
1785
1786 if registry.contains_key(&config.name) {
1787 return Err(crate::error::Error::parse(
1788 format!("Custom dialect '{}' is already registered", config.name),
1789 0,
1790 0,
1791 0,
1792 0,
1793 ));
1794 }
1795
1796 registry.insert(config.name.clone(), Arc::new(config));
1797 Ok(())
1798}
1799
1800/// Remove a custom dialect from the global registry.
1801///
1802/// Returns `true` if a dialect with that name was found and removed,
1803/// `false` if no such custom dialect existed.
1804pub fn unregister_custom_dialect(name: &str) -> bool {
1805 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1806 registry.remove(name).is_some()
1807 } else {
1808 false
1809 }
1810}
1811
1812fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1813 CUSTOM_DIALECT_REGISTRY
1814 .read()
1815 .ok()
1816 .and_then(|registry| registry.get(name).cloned())
1817}
1818
1819/// Main entry point for dialect-specific SQL operations.
1820///
1821/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1822/// transformer for a specific SQL database engine. It is the high-level API through
1823/// which callers parse, generate, transform, and transpile SQL.
1824///
1825/// # Usage
1826///
1827/// ```rust,ignore
1828/// use polyglot_sql::dialects::{Dialect, DialectType};
1829///
1830/// // Parse PostgreSQL SQL into an AST
1831/// let pg = Dialect::get(DialectType::PostgreSQL);
1832/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1833///
1834/// // Transpile from PostgreSQL to BigQuery
1835/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1836/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1837/// ```
1838///
1839/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1840/// The struct is `Send + Sync` safe so it can be shared across threads.
1841pub struct Dialect {
1842 dialect_type: DialectType,
1843 tokenizer: Tokenizer,
1844 generator_config: GeneratorConfig,
1845 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1846 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1847 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1848 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1849 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1850}
1851
1852impl Dialect {
1853 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1854 ///
1855 /// This is the primary constructor. It initializes the tokenizer, generator config,
1856 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1857 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1858 /// config routing.
1859 pub fn get(dialect_type: DialectType) -> Self {
1860 let (tokenizer_config, generator_config, transformer) =
1861 configs_for_dialect_type(dialect_type);
1862
1863 // Set up expression-specific generator config for hybrid dialects
1864 let generator_config_for_expr: Option<
1865 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1866 > = match dialect_type {
1867 #[cfg(feature = "dialect-athena")]
1868 DialectType::Athena => Some(Box::new(|expr| {
1869 AthenaDialect.generator_config_for_expr(expr)
1870 })),
1871 _ => None,
1872 };
1873
1874 Self {
1875 dialect_type,
1876 tokenizer: Tokenizer::new(tokenizer_config),
1877 generator_config,
1878 transformer,
1879 generator_config_for_expr,
1880 custom_preprocess: None,
1881 }
1882 }
1883
1884 /// Look up a dialect by string name.
1885 ///
1886 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1887 /// falls back to the custom dialect registry. Returns `None` if no dialect
1888 /// with the given name exists.
1889 pub fn get_by_name(name: &str) -> Option<Self> {
1890 // Try built-in first
1891 if let Ok(dt) = DialectType::from_str(name) {
1892 return Some(Self::get(dt));
1893 }
1894
1895 // Try custom registry
1896 let config = get_custom_dialect_config(name)?;
1897 Some(Self::from_custom_config(&config))
1898 }
1899
1900 /// Construct a `Dialect` from a custom dialect configuration.
1901 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1902 // Build the transformer: use custom if provided, else use base dialect's
1903 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1904 if let Some(ref custom_transform) = config.transform {
1905 let t = Arc::clone(custom_transform);
1906 Box::new(move |e| t(e))
1907 } else {
1908 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1909 base_transform
1910 };
1911
1912 // Build the custom preprocess: use custom if provided
1913 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1914 config.preprocess.as_ref().map(|p| {
1915 let p = Arc::clone(p);
1916 Box::new(move |e: Expression| p(e))
1917 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1918 });
1919
1920 Self {
1921 dialect_type: config.base_dialect,
1922 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1923 generator_config: config.generator_config.clone(),
1924 transformer,
1925 generator_config_for_expr: None,
1926 custom_preprocess,
1927 }
1928 }
1929
1930 /// Get the dialect type
1931 pub fn dialect_type(&self) -> DialectType {
1932 self.dialect_type
1933 }
1934
1935 /// Get the generator configuration
1936 pub fn generator_config(&self) -> &GeneratorConfig {
1937 &self.generator_config
1938 }
1939
1940 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1941 ///
1942 /// The input may contain multiple semicolon-separated statements; each one
1943 /// produces a separate element in the returned vector. Tokenization uses
1944 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1945 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1946 let tokens = self.tokenizer.tokenize(sql)?;
1947 let config = crate::parser::ParserConfig {
1948 dialect: Some(self.dialect_type),
1949 ..Default::default()
1950 };
1951 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1952 parser.parse()
1953 }
1954
1955 /// Tokenize SQL using this dialect's tokenizer configuration.
1956 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1957 self.tokenizer.tokenize(sql)
1958 }
1959
1960 /// Get the generator config for a specific expression (supports hybrid dialects)
1961 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1962 if let Some(ref config_fn) = self.generator_config_for_expr {
1963 config_fn(expr)
1964 } else {
1965 self.generator_config.clone()
1966 }
1967 }
1968
1969 /// Generates a SQL string from an [`Expression`] AST node.
1970 ///
1971 /// The output uses this dialect's generator configuration for identifier quoting,
1972 /// keyword casing, function name normalization, and syntax style. The result is
1973 /// a single-line (non-pretty) SQL string.
1974 pub fn generate(&self, expr: &Expression) -> Result<String> {
1975 let config = self.get_config_for_expr(expr);
1976 let mut generator = Generator::with_config(config);
1977 generator.generate(expr)
1978 }
1979
1980 /// Generate SQL from an expression with pretty printing enabled
1981 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1982 let mut config = self.get_config_for_expr(expr);
1983 config.pretty = true;
1984 let mut generator = Generator::with_config(config);
1985 generator.generate(expr)
1986 }
1987
1988 /// Generate SQL from an expression with source dialect info (for transpilation)
1989 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
1990 let mut config = self.get_config_for_expr(expr);
1991 config.source_dialect = Some(source);
1992 let mut generator = Generator::with_config(config);
1993 generator.generate(expr)
1994 }
1995
1996 /// Generate SQL from an expression with pretty printing and source dialect info
1997 pub fn generate_pretty_with_source(
1998 &self,
1999 expr: &Expression,
2000 source: DialectType,
2001 ) -> Result<String> {
2002 let mut config = self.get_config_for_expr(expr);
2003 config.pretty = true;
2004 config.source_dialect = Some(source);
2005 let mut generator = Generator::with_config(config);
2006 generator.generate(expr)
2007 }
2008
2009 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2010 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2011 let mut config = self.get_config_for_expr(expr);
2012 config.always_quote_identifiers = true;
2013 let mut generator = Generator::with_config(config);
2014 generator.generate(expr)
2015 }
2016
2017 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2018 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2019 let mut config = self.generator_config.clone();
2020 config.pretty = true;
2021 config.always_quote_identifiers = true;
2022 let mut generator = Generator::with_config(config);
2023 generator.generate(expr)
2024 }
2025
2026 /// Generate SQL from an expression with caller-specified config overrides
2027 pub fn generate_with_overrides(
2028 &self,
2029 expr: &Expression,
2030 overrides: impl FnOnce(&mut GeneratorConfig),
2031 ) -> Result<String> {
2032 let mut config = self.get_config_for_expr(expr);
2033 overrides(&mut config);
2034 let mut generator = Generator::with_config(config);
2035 generator.generate(expr)
2036 }
2037
2038 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2039 ///
2040 /// The transformation proceeds in two phases:
2041 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2042 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2043 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2044 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2045 ///
2046 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2047 /// and for identity transforms (normalizing SQL within the same dialect).
2048 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2049 // Apply preprocessing transforms based on dialect
2050 let preprocessed = self.preprocess(expr)?;
2051 // Then apply recursive transformation
2052 transform_recursive(preprocessed, &self.transformer)
2053 }
2054
2055 /// Apply dialect-specific preprocessing transforms
2056 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2057 // If a custom preprocess function is set, use it instead of the built-in logic
2058 if let Some(ref custom_preprocess) = self.custom_preprocess {
2059 return custom_preprocess(expr);
2060 }
2061
2062 #[cfg(any(
2063 feature = "dialect-mysql",
2064 feature = "dialect-postgresql",
2065 feature = "dialect-bigquery",
2066 feature = "dialect-snowflake",
2067 feature = "dialect-tsql",
2068 feature = "dialect-spark",
2069 feature = "dialect-databricks",
2070 feature = "dialect-hive",
2071 feature = "dialect-sqlite",
2072 feature = "dialect-trino",
2073 feature = "dialect-presto",
2074 feature = "dialect-duckdb",
2075 feature = "dialect-redshift",
2076 feature = "dialect-starrocks",
2077 feature = "dialect-oracle",
2078 feature = "dialect-clickhouse",
2079 ))]
2080 use crate::transforms;
2081
2082 match self.dialect_type {
2083 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2084 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2085 #[cfg(feature = "dialect-mysql")]
2086 DialectType::MySQL => {
2087 let expr = transforms::eliminate_qualify(expr)?;
2088 let expr = transforms::eliminate_full_outer_join(expr)?;
2089 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2090 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2091 Ok(expr)
2092 }
2093 // PostgreSQL doesn't support QUALIFY
2094 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2095 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2096 #[cfg(feature = "dialect-postgresql")]
2097 DialectType::PostgreSQL => {
2098 let expr = transforms::eliminate_qualify(expr)?;
2099 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2100 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2101 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2102 // Only normalize when sqlglot would fully parse (no body) —
2103 // sqlglot falls back to Command for complex function bodies,
2104 // preserving the original text including TO.
2105 let expr = if let Expression::CreateFunction(mut cf) = expr {
2106 if cf.body.is_none() {
2107 for opt in &mut cf.set_options {
2108 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2109 &mut opt.value
2110 {
2111 *use_to = false;
2112 }
2113 }
2114 }
2115 Expression::CreateFunction(cf)
2116 } else {
2117 expr
2118 };
2119 Ok(expr)
2120 }
2121 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2122 #[cfg(feature = "dialect-bigquery")]
2123 DialectType::BigQuery => {
2124 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2125 let expr = transforms::pushdown_cte_column_names(expr)?;
2126 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2127 Ok(expr)
2128 }
2129 // Snowflake
2130 #[cfg(feature = "dialect-snowflake")]
2131 DialectType::Snowflake => {
2132 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2133 let expr = transforms::eliminate_window_clause(expr)?;
2134 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2135 Ok(expr)
2136 }
2137 // TSQL doesn't support QUALIFY
2138 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2139 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2140 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2141 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2142 #[cfg(feature = "dialect-tsql")]
2143 DialectType::TSQL => {
2144 let expr = transforms::eliminate_qualify(expr)?;
2145 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2146 let expr = transforms::ensure_bools(expr)?;
2147 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2148 let expr = transforms::move_ctes_to_top_level(expr)?;
2149 let expr = transforms::qualify_derived_table_outputs(expr)?;
2150 Ok(expr)
2151 }
2152 // Spark doesn't support QUALIFY (but Databricks does)
2153 // Spark doesn't support CTEs in subqueries (hoist to top level)
2154 #[cfg(feature = "dialect-spark")]
2155 DialectType::Spark => {
2156 let expr = transforms::eliminate_qualify(expr)?;
2157 let expr = transforms::add_auto_table_alias(expr)?;
2158 let expr = transforms::simplify_nested_paren_values(expr)?;
2159 let expr = transforms::move_ctes_to_top_level(expr)?;
2160 Ok(expr)
2161 }
2162 // Databricks supports QUALIFY natively
2163 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2164 #[cfg(feature = "dialect-databricks")]
2165 DialectType::Databricks => {
2166 let expr = transforms::add_auto_table_alias(expr)?;
2167 let expr = transforms::simplify_nested_paren_values(expr)?;
2168 let expr = transforms::move_ctes_to_top_level(expr)?;
2169 Ok(expr)
2170 }
2171 // Hive doesn't support QUALIFY or CTEs in subqueries
2172 #[cfg(feature = "dialect-hive")]
2173 DialectType::Hive => {
2174 let expr = transforms::eliminate_qualify(expr)?;
2175 let expr = transforms::move_ctes_to_top_level(expr)?;
2176 Ok(expr)
2177 }
2178 // SQLite doesn't support QUALIFY
2179 #[cfg(feature = "dialect-sqlite")]
2180 DialectType::SQLite => {
2181 let expr = transforms::eliminate_qualify(expr)?;
2182 Ok(expr)
2183 }
2184 // Trino doesn't support QUALIFY
2185 #[cfg(feature = "dialect-trino")]
2186 DialectType::Trino => {
2187 let expr = transforms::eliminate_qualify(expr)?;
2188 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2189 Ok(expr)
2190 }
2191 // Presto doesn't support QUALIFY or WINDOW clause
2192 #[cfg(feature = "dialect-presto")]
2193 DialectType::Presto => {
2194 let expr = transforms::eliminate_qualify(expr)?;
2195 let expr = transforms::eliminate_window_clause(expr)?;
2196 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2197 Ok(expr)
2198 }
2199 // DuckDB supports QUALIFY - no elimination needed
2200 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2201 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2202 #[cfg(feature = "dialect-duckdb")]
2203 DialectType::DuckDB => {
2204 let expr = transforms::expand_posexplode_duckdb(expr)?;
2205 let expr = transforms::expand_like_any(expr)?;
2206 Ok(expr)
2207 }
2208 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2209 #[cfg(feature = "dialect-redshift")]
2210 DialectType::Redshift => {
2211 let expr = transforms::eliminate_qualify(expr)?;
2212 let expr = transforms::eliminate_window_clause(expr)?;
2213 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2214 Ok(expr)
2215 }
2216 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2217 #[cfg(feature = "dialect-starrocks")]
2218 DialectType::StarRocks => {
2219 let expr = transforms::eliminate_qualify(expr)?;
2220 let expr = transforms::expand_between_in_delete(expr)?;
2221 Ok(expr)
2222 }
2223 // DataFusion supports QUALIFY and semi/anti joins natively
2224 #[cfg(feature = "dialect-datafusion")]
2225 DialectType::DataFusion => Ok(expr),
2226 // Oracle doesn't support QUALIFY
2227 #[cfg(feature = "dialect-oracle")]
2228 DialectType::Oracle => {
2229 let expr = transforms::eliminate_qualify(expr)?;
2230 Ok(expr)
2231 }
2232 // Drill - no special preprocessing needed
2233 #[cfg(feature = "dialect-drill")]
2234 DialectType::Drill => Ok(expr),
2235 // Teradata - no special preprocessing needed
2236 #[cfg(feature = "dialect-teradata")]
2237 DialectType::Teradata => Ok(expr),
2238 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2239 #[cfg(feature = "dialect-clickhouse")]
2240 DialectType::ClickHouse => {
2241 let expr = transforms::no_limit_order_by_union(expr)?;
2242 Ok(expr)
2243 }
2244 // Other dialects - no preprocessing
2245 _ => Ok(expr),
2246 }
2247 }
2248
2249 /// Transpile SQL from this dialect to another
2250 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2251 self.transpile_to_inner(sql, target, false)
2252 }
2253
2254 /// Transpile SQL from this dialect to another with pretty printing enabled
2255 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2256 self.transpile_to_inner(sql, target, true)
2257 }
2258
2259 #[cfg(not(feature = "transpile"))]
2260 fn transpile_to_inner(
2261 &self,
2262 sql: &str,
2263 target: DialectType,
2264 pretty: bool,
2265 ) -> Result<Vec<String>> {
2266 // Without the transpile feature, only same-dialect or to/from generic is supported
2267 if self.dialect_type != target
2268 && self.dialect_type != DialectType::Generic
2269 && target != DialectType::Generic
2270 {
2271 return Err(crate::error::Error::parse(
2272 "Cross-dialect transpilation not available in this build",
2273 0,
2274 0,
2275 0,
2276 0,
2277 ));
2278 }
2279
2280 let expressions = self.parse(sql)?;
2281 let target_dialect = Dialect::get(target);
2282 let generic_identity =
2283 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2284
2285 if generic_identity {
2286 return expressions
2287 .into_iter()
2288 .map(|expr| {
2289 if pretty {
2290 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2291 } else {
2292 target_dialect.generate_with_source(&expr, self.dialect_type)
2293 }
2294 })
2295 .collect();
2296 }
2297
2298 expressions
2299 .into_iter()
2300 .map(|expr| {
2301 let transformed = target_dialect.transform(expr)?;
2302 if pretty {
2303 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2304 } else {
2305 target_dialect.generate_with_source(&transformed, self.dialect_type)
2306 }
2307 })
2308 .collect()
2309 }
2310
2311 #[cfg(feature = "transpile")]
2312 fn transpile_to_inner(
2313 &self,
2314 sql: &str,
2315 target: DialectType,
2316 pretty: bool,
2317 ) -> Result<Vec<String>> {
2318 let expressions = self.parse(sql)?;
2319 let target_dialect = Dialect::get(target);
2320 let generic_identity =
2321 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2322
2323 if generic_identity {
2324 return expressions
2325 .into_iter()
2326 .map(|expr| {
2327 if pretty {
2328 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2329 } else {
2330 target_dialect.generate_with_source(&expr, self.dialect_type)
2331 }
2332 })
2333 .collect();
2334 }
2335
2336 expressions
2337 .into_iter()
2338 .map(|expr| {
2339 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2340 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2341 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2342 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2343 use crate::expressions::DataType as DT;
2344 transform_recursive(expr, &|e| match e {
2345 Expression::DataType(DT::VarChar { .. }) => {
2346 Ok(Expression::DataType(DT::Text))
2347 }
2348 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2349 _ => Ok(e),
2350 })?
2351 } else {
2352 expr
2353 };
2354
2355 // When source and target differ, first normalize the source dialect's
2356 // AST constructs to standard SQL, so that the target dialect can handle them.
2357 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2358 let normalized =
2359 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2360 self.transform(expr)?
2361 } else {
2362 expr
2363 };
2364
2365 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2366 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2367 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2368 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2369 let normalized =
2370 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2371 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2372 {
2373 transform_recursive(normalized, &|e| {
2374 if let Expression::Function(ref f) = e {
2375 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2376 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2377 if let (
2378 Expression::Function(ref jq),
2379 Expression::Function(ref jv),
2380 ) = (&f.args[0], &f.args[1])
2381 {
2382 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2383 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2384 {
2385 // Unwrap: return just JSON_QUERY(...)
2386 return Ok(f.args[0].clone());
2387 }
2388 }
2389 }
2390 }
2391 Ok(e)
2392 })?
2393 } else {
2394 normalized
2395 };
2396
2397 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2398 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2399 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2400 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2401 && !matches!(target, DialectType::Snowflake)
2402 {
2403 transform_recursive(normalized, &|e| {
2404 if let Expression::Function(ref f) = e {
2405 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2406 return Ok(Expression::Localtime(Box::new(
2407 crate::expressions::Localtime { this: None },
2408 )));
2409 }
2410 }
2411 Ok(e)
2412 })?
2413 } else {
2414 normalized
2415 };
2416
2417 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2418 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2419 // transform. DuckDB requires the count argument to be BIGINT.
2420 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2421 && matches!(target, DialectType::DuckDB)
2422 {
2423 transform_recursive(normalized, &|e| {
2424 if let Expression::Function(ref f) = e {
2425 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2426 // Check if first arg is space string literal
2427 if let Expression::Literal(crate::expressions::Literal::String(
2428 ref s,
2429 )) = f.args[0]
2430 {
2431 if s == " " {
2432 // Wrap second arg in CAST(... AS BIGINT) if not already
2433 if !matches!(f.args[1], Expression::Cast(_)) {
2434 let mut new_args = f.args.clone();
2435 new_args[1] = Expression::Cast(Box::new(
2436 crate::expressions::Cast {
2437 this: new_args[1].clone(),
2438 to: crate::expressions::DataType::BigInt {
2439 length: None,
2440 },
2441 trailing_comments: Vec::new(),
2442 double_colon_syntax: false,
2443 format: None,
2444 default: None,
2445 },
2446 ));
2447 return Ok(Expression::Function(Box::new(
2448 crate::expressions::Function {
2449 name: f.name.clone(),
2450 args: new_args,
2451 distinct: f.distinct,
2452 trailing_comments: f.trailing_comments.clone(),
2453 use_bracket_syntax: f.use_bracket_syntax,
2454 no_parens: f.no_parens,
2455 quoted: f.quoted,
2456 span: None,
2457 },
2458 )));
2459 }
2460 }
2461 }
2462 }
2463 }
2464 Ok(e)
2465 })?
2466 } else {
2467 normalized
2468 };
2469
2470 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2471 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2472 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2473 && !matches!(target, DialectType::BigQuery)
2474 {
2475 crate::transforms::propagate_struct_field_names(normalized)?
2476 } else {
2477 normalized
2478 };
2479
2480 // Apply cross-dialect semantic normalizations
2481 let normalized =
2482 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2483
2484 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2485 // (SELECT UNNEST(..., max_depth => 2)) subquery
2486 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2487 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2488 && matches!(target, DialectType::DuckDB)
2489 {
2490 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2491 } else {
2492 normalized
2493 };
2494
2495 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2496 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2497 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2498 && matches!(
2499 target,
2500 DialectType::DuckDB
2501 | DialectType::Presto
2502 | DialectType::Trino
2503 | DialectType::Athena
2504 | DialectType::Spark
2505 | DialectType::Databricks
2506 ) {
2507 crate::transforms::unnest_alias_to_column_alias(normalized)?
2508 } else if matches!(self.dialect_type, DialectType::BigQuery)
2509 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2510 {
2511 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2512 // but don't convert alias format (no _t0 wrapper)
2513 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2514 // For Redshift: strip UNNEST when arg is a column reference path
2515 if matches!(target, DialectType::Redshift) {
2516 crate::transforms::strip_unnest_column_refs(result)?
2517 } else {
2518 result
2519 }
2520 } else {
2521 normalized
2522 };
2523
2524 // For Presto/Trino targets from PostgreSQL/Redshift source:
2525 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2526 let normalized = if matches!(
2527 self.dialect_type,
2528 DialectType::PostgreSQL | DialectType::Redshift
2529 ) && matches!(
2530 target,
2531 DialectType::Presto | DialectType::Trino | DialectType::Athena
2532 ) {
2533 crate::transforms::wrap_unnest_join_aliases(normalized)?
2534 } else {
2535 normalized
2536 };
2537
2538 // Eliminate DISTINCT ON with target-dialect awareness
2539 // This must happen after source transform (which may produce DISTINCT ON)
2540 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2541 let normalized =
2542 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2543
2544 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2545 let normalized = if matches!(target, DialectType::Snowflake) {
2546 Self::transform_generate_date_array_snowflake(normalized)?
2547 } else {
2548 normalized
2549 };
2550
2551 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2552 let normalized = if matches!(
2553 target,
2554 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2555 ) {
2556 crate::transforms::unnest_to_explode_select(normalized)?
2557 } else {
2558 normalized
2559 };
2560
2561 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2562 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2563 crate::transforms::no_limit_order_by_union(normalized)?
2564 } else {
2565 normalized
2566 };
2567
2568 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2569 // Python sqlglot does this in the TSQL generator, but we can't do it there
2570 // because it would break TSQL -> TSQL identity
2571 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2572 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2573 {
2574 transform_recursive(normalized, &|e| {
2575 if let Expression::Count(ref c) = e {
2576 // Build COUNT_BIG(...) as an AggregateFunction
2577 let args = if c.star {
2578 vec![Expression::Star(crate::expressions::Star {
2579 table: None,
2580 except: None,
2581 replace: None,
2582 rename: None,
2583 trailing_comments: Vec::new(),
2584 span: None,
2585 })]
2586 } else if let Some(ref this) = c.this {
2587 vec![this.clone()]
2588 } else {
2589 vec![]
2590 };
2591 Ok(Expression::AggregateFunction(Box::new(
2592 crate::expressions::AggregateFunction {
2593 name: "COUNT_BIG".to_string(),
2594 args,
2595 distinct: c.distinct,
2596 filter: c.filter.clone(),
2597 order_by: Vec::new(),
2598 limit: None,
2599 ignore_nulls: None,
2600 },
2601 )))
2602 } else {
2603 Ok(e)
2604 }
2605 })?
2606 } else {
2607 normalized
2608 };
2609
2610 let transformed = target_dialect.transform(normalized)?;
2611 let mut sql = if pretty {
2612 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2613 } else {
2614 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2615 };
2616
2617 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2618 if pretty && target == DialectType::Snowflake {
2619 sql = Self::normalize_snowflake_pretty(sql);
2620 }
2621
2622 Ok(sql)
2623 })
2624 .collect()
2625 }
2626}
2627
2628// Transpile-only methods: cross-dialect normalization and helpers
2629#[cfg(feature = "transpile")]
2630impl Dialect {
2631 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2632 /// Converts:
2633 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2634 /// To:
2635 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2636 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2637 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2638 use crate::expressions::*;
2639 transform_recursive(expr, &|e| {
2640 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2641 if let Expression::ArraySize(ref af) = e {
2642 if let Expression::Function(ref f) = af.this {
2643 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2644 let result = Self::convert_array_size_gda_snowflake(f)?;
2645 return Ok(result);
2646 }
2647 }
2648 }
2649
2650 let Expression::Select(mut sel) = e else {
2651 return Ok(e);
2652 };
2653
2654 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2655 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2656 let mut gda_join_idx: Option<usize> = None;
2657
2658 for (idx, join) in sel.joins.iter().enumerate() {
2659 // The join.this may be:
2660 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2661 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2662 let (unnest_ref, alias_name) = match &join.this {
2663 Expression::Unnest(ref unnest) => {
2664 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2665 (Some(unnest.as_ref()), alias)
2666 }
2667 Expression::Alias(ref a) => {
2668 if let Expression::Unnest(ref unnest) = a.this {
2669 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2670 } else {
2671 (None, None)
2672 }
2673 }
2674 _ => (None, None),
2675 };
2676
2677 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2678 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2679 if let Expression::Function(ref f) = unnest.this {
2680 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2681 let start_expr = f.args[0].clone();
2682 let end_expr = f.args[1].clone();
2683 let step = f.args.get(2).cloned();
2684
2685 // Extract unit from step interval
2686 let unit = if let Some(Expression::Interval(ref iv)) = step {
2687 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2688 Some(format!("{:?}", unit).to_uppercase())
2689 } else if let Some(ref this) = iv.this {
2690 // The interval may be stored as a string like "1 MONTH"
2691 if let Expression::Literal(Literal::String(ref s)) = this {
2692 let parts: Vec<&str> = s.split_whitespace().collect();
2693 if parts.len() == 2 {
2694 Some(parts[1].to_uppercase())
2695 } else if parts.len() == 1 {
2696 // Single word like "MONTH" or just "1"
2697 let upper = parts[0].to_uppercase();
2698 if matches!(
2699 upper.as_str(),
2700 "YEAR"
2701 | "QUARTER"
2702 | "MONTH"
2703 | "WEEK"
2704 | "DAY"
2705 | "HOUR"
2706 | "MINUTE"
2707 | "SECOND"
2708 ) {
2709 Some(upper)
2710 } else {
2711 None
2712 }
2713 } else {
2714 None
2715 }
2716 } else {
2717 None
2718 }
2719 } else {
2720 None
2721 }
2722 } else {
2723 None
2724 };
2725
2726 if let Some(unit_str) = unit {
2727 gda_info = Some((alias, start_expr, end_expr, unit_str));
2728 gda_join_idx = Some(idx);
2729 }
2730 }
2731 }
2732 }
2733 if gda_info.is_some() {
2734 break;
2735 }
2736 }
2737
2738 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2739 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2740 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2741 let result = Self::try_transform_from_gda_snowflake(sel);
2742 return result;
2743 };
2744 let join_idx = gda_join_idx.unwrap();
2745
2746 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2747 let datediff = Expression::Function(Box::new(Function::new(
2748 "DATEDIFF".to_string(),
2749 vec![
2750 Expression::Column(Column {
2751 name: Identifier::new(&unit_str),
2752 table: None,
2753 join_mark: false,
2754 trailing_comments: vec![],
2755 span: None,
2756 }),
2757 start_expr.clone(),
2758 end_expr.clone(),
2759 ],
2760 )));
2761 // (DATEDIFF(...) + 1 - 1) + 1
2762 let plus_one = Expression::Add(Box::new(BinaryOp {
2763 left: datediff,
2764 right: Expression::Literal(Literal::Number("1".to_string())),
2765 left_comments: vec![],
2766 operator_comments: vec![],
2767 trailing_comments: vec![],
2768 }));
2769 let minus_one = Expression::Sub(Box::new(BinaryOp {
2770 left: plus_one,
2771 right: Expression::Literal(Literal::Number("1".to_string())),
2772 left_comments: vec![],
2773 operator_comments: vec![],
2774 trailing_comments: vec![],
2775 }));
2776 let paren_inner = Expression::Paren(Box::new(Paren {
2777 this: minus_one,
2778 trailing_comments: vec![],
2779 }));
2780 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2781 left: paren_inner,
2782 right: Expression::Literal(Literal::Number("1".to_string())),
2783 left_comments: vec![],
2784 operator_comments: vec![],
2785 trailing_comments: vec![],
2786 }));
2787
2788 let array_gen_range = Expression::Function(Box::new(Function::new(
2789 "ARRAY_GENERATE_RANGE".to_string(),
2790 vec![
2791 Expression::Literal(Literal::Number("0".to_string())),
2792 outer_plus_one,
2793 ],
2794 )));
2795
2796 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2797 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2798 name: Identifier::new("INPUT"),
2799 value: array_gen_range,
2800 separator: crate::expressions::NamedArgSeparator::DArrow,
2801 }));
2802 let flatten = Expression::Function(Box::new(Function::new(
2803 "FLATTEN".to_string(),
2804 vec![flatten_input],
2805 )));
2806
2807 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2808 let alias_table = Alias {
2809 this: flatten,
2810 alias: Identifier::new("_t0"),
2811 column_aliases: vec![
2812 Identifier::new("seq"),
2813 Identifier::new("key"),
2814 Identifier::new("path"),
2815 Identifier::new("index"),
2816 Identifier::new(&alias_name),
2817 Identifier::new("this"),
2818 ],
2819 pre_alias_comments: vec![],
2820 trailing_comments: vec![],
2821 };
2822 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2823 this: Box::new(Expression::Alias(Box::new(alias_table))),
2824 view: None,
2825 outer: None,
2826 alias: None,
2827 alias_quoted: false,
2828 cross_apply: None,
2829 ordinality: None,
2830 column_aliases: vec![],
2831 }));
2832
2833 // Remove the original join and add to FROM expressions
2834 sel.joins.remove(join_idx);
2835 if let Some(ref mut from) = sel.from {
2836 from.expressions.push(lateral_expr);
2837 }
2838
2839 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2840 let dateadd_expr = Expression::Function(Box::new(Function::new(
2841 "DATEADD".to_string(),
2842 vec![
2843 Expression::Column(Column {
2844 name: Identifier::new(&unit_str),
2845 table: None,
2846 join_mark: false,
2847 trailing_comments: vec![],
2848 span: None,
2849 }),
2850 Expression::Cast(Box::new(Cast {
2851 this: Expression::Column(Column {
2852 name: Identifier::new(&alias_name),
2853 table: None,
2854 join_mark: false,
2855 trailing_comments: vec![],
2856 span: None,
2857 }),
2858 to: DataType::Int {
2859 length: None,
2860 integer_spelling: false,
2861 },
2862 trailing_comments: vec![],
2863 double_colon_syntax: false,
2864 format: None,
2865 default: None,
2866 })),
2867 Expression::Cast(Box::new(Cast {
2868 this: start_expr.clone(),
2869 to: DataType::Date,
2870 trailing_comments: vec![],
2871 double_colon_syntax: false,
2872 format: None,
2873 default: None,
2874 })),
2875 ],
2876 )));
2877
2878 // Replace references to the alias in the SELECT list
2879 let new_exprs: Vec<Expression> = sel
2880 .expressions
2881 .iter()
2882 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2883 .collect();
2884 sel.expressions = new_exprs;
2885
2886 Ok(Expression::Select(sel))
2887 })
2888 }
2889
2890 /// Helper: replace column references to `alias_name` with dateadd expression
2891 fn replace_column_ref_with_dateadd(
2892 expr: &Expression,
2893 alias_name: &str,
2894 dateadd: &Expression,
2895 ) -> Expression {
2896 use crate::expressions::*;
2897 match expr {
2898 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2899 // Plain column reference -> DATEADD(...) AS alias_name
2900 Expression::Alias(Box::new(Alias {
2901 this: dateadd.clone(),
2902 alias: Identifier::new(alias_name),
2903 column_aliases: vec![],
2904 pre_alias_comments: vec![],
2905 trailing_comments: vec![],
2906 }))
2907 }
2908 Expression::Alias(a) => {
2909 // Check if the inner expression references the alias
2910 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2911 Expression::Alias(Box::new(Alias {
2912 this: new_this,
2913 alias: a.alias.clone(),
2914 column_aliases: a.column_aliases.clone(),
2915 pre_alias_comments: a.pre_alias_comments.clone(),
2916 trailing_comments: a.trailing_comments.clone(),
2917 }))
2918 }
2919 _ => expr.clone(),
2920 }
2921 }
2922
2923 /// Helper: replace column references in inner expression (not top-level)
2924 fn replace_column_ref_inner(
2925 expr: &Expression,
2926 alias_name: &str,
2927 dateadd: &Expression,
2928 ) -> Expression {
2929 use crate::expressions::*;
2930 match expr {
2931 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2932 dateadd.clone()
2933 }
2934 Expression::Add(op) => {
2935 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2936 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2937 Expression::Add(Box::new(BinaryOp {
2938 left,
2939 right,
2940 left_comments: op.left_comments.clone(),
2941 operator_comments: op.operator_comments.clone(),
2942 trailing_comments: op.trailing_comments.clone(),
2943 }))
2944 }
2945 Expression::Sub(op) => {
2946 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2947 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2948 Expression::Sub(Box::new(BinaryOp {
2949 left,
2950 right,
2951 left_comments: op.left_comments.clone(),
2952 operator_comments: op.operator_comments.clone(),
2953 trailing_comments: op.trailing_comments.clone(),
2954 }))
2955 }
2956 Expression::Mul(op) => {
2957 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2958 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2959 Expression::Mul(Box::new(BinaryOp {
2960 left,
2961 right,
2962 left_comments: op.left_comments.clone(),
2963 operator_comments: op.operator_comments.clone(),
2964 trailing_comments: op.trailing_comments.clone(),
2965 }))
2966 }
2967 _ => expr.clone(),
2968 }
2969 }
2970
2971 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
2972 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
2973 fn try_transform_from_gda_snowflake(
2974 mut sel: Box<crate::expressions::Select>,
2975 ) -> Result<Expression> {
2976 use crate::expressions::*;
2977
2978 // Extract GDA info from FROM clause
2979 let mut gda_info: Option<(
2980 usize,
2981 String,
2982 Expression,
2983 Expression,
2984 String,
2985 Option<(String, Vec<Identifier>)>,
2986 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
2987
2988 if let Some(ref from) = sel.from {
2989 for (idx, table_expr) in from.expressions.iter().enumerate() {
2990 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
2991 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
2992 let (unnest_opt, outer_alias_info) = match table_expr {
2993 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
2994 Expression::Alias(ref a) => {
2995 if let Expression::Unnest(ref unnest) = a.this {
2996 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
2997 (Some(unnest.as_ref()), Some(alias_info))
2998 } else {
2999 (None, None)
3000 }
3001 }
3002 _ => (None, None),
3003 };
3004
3005 if let Some(unnest) = unnest_opt {
3006 // Check for GENERATE_DATE_ARRAY function
3007 let func_opt = match &unnest.this {
3008 Expression::Function(ref f)
3009 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3010 && f.args.len() >= 2 =>
3011 {
3012 Some(f)
3013 }
3014 // Also check for GenerateSeries (from earlier normalization)
3015 _ => None,
3016 };
3017
3018 if let Some(f) = func_opt {
3019 let start_expr = f.args[0].clone();
3020 let end_expr = f.args[1].clone();
3021 let step = f.args.get(2).cloned();
3022
3023 // Extract unit and column name
3024 let unit = Self::extract_interval_unit_str(&step);
3025 let col_name = outer_alias_info
3026 .as_ref()
3027 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3028 .unwrap_or_else(|| "value".to_string());
3029
3030 if let Some(unit_str) = unit {
3031 gda_info = Some((
3032 idx,
3033 col_name,
3034 start_expr,
3035 end_expr,
3036 unit_str,
3037 outer_alias_info,
3038 ));
3039 break;
3040 }
3041 }
3042 }
3043 }
3044 }
3045
3046 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3047 else {
3048 return Ok(Expression::Select(sel));
3049 };
3050
3051 // Build the Snowflake subquery:
3052 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3053 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3054
3055 // DATEDIFF(unit, start, end)
3056 let datediff = Expression::Function(Box::new(Function::new(
3057 "DATEDIFF".to_string(),
3058 vec![
3059 Expression::Column(Column {
3060 name: Identifier::new(&unit_str),
3061 table: None,
3062 join_mark: false,
3063 trailing_comments: vec![],
3064 span: None,
3065 }),
3066 start_expr.clone(),
3067 end_expr.clone(),
3068 ],
3069 )));
3070 // (DATEDIFF(...) + 1 - 1) + 1
3071 let plus_one = Expression::Add(Box::new(BinaryOp {
3072 left: datediff,
3073 right: Expression::Literal(Literal::Number("1".to_string())),
3074 left_comments: vec![],
3075 operator_comments: vec![],
3076 trailing_comments: vec![],
3077 }));
3078 let minus_one = Expression::Sub(Box::new(BinaryOp {
3079 left: plus_one,
3080 right: Expression::Literal(Literal::Number("1".to_string())),
3081 left_comments: vec![],
3082 operator_comments: vec![],
3083 trailing_comments: vec![],
3084 }));
3085 let paren_inner = Expression::Paren(Box::new(Paren {
3086 this: minus_one,
3087 trailing_comments: vec![],
3088 }));
3089 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3090 left: paren_inner,
3091 right: Expression::Literal(Literal::Number("1".to_string())),
3092 left_comments: vec![],
3093 operator_comments: vec![],
3094 trailing_comments: vec![],
3095 }));
3096
3097 let array_gen_range = Expression::Function(Box::new(Function::new(
3098 "ARRAY_GENERATE_RANGE".to_string(),
3099 vec![
3100 Expression::Literal(Literal::Number("0".to_string())),
3101 outer_plus_one,
3102 ],
3103 )));
3104
3105 // TABLE(FLATTEN(INPUT => ...))
3106 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3107 name: Identifier::new("INPUT"),
3108 value: array_gen_range,
3109 separator: crate::expressions::NamedArgSeparator::DArrow,
3110 }));
3111 let flatten = Expression::Function(Box::new(Function::new(
3112 "FLATTEN".to_string(),
3113 vec![flatten_input],
3114 )));
3115
3116 // Determine alias name for the table: use outer alias or _t0
3117 let table_alias_name = outer_alias_info
3118 .as_ref()
3119 .map(|(name, _)| name.clone())
3120 .unwrap_or_else(|| "_t0".to_string());
3121
3122 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3123 let table_func =
3124 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3125 let flatten_aliased = Expression::Alias(Box::new(Alias {
3126 this: table_func,
3127 alias: Identifier::new(&table_alias_name),
3128 column_aliases: vec![
3129 Identifier::new("seq"),
3130 Identifier::new("key"),
3131 Identifier::new("path"),
3132 Identifier::new("index"),
3133 Identifier::new(&col_name),
3134 Identifier::new("this"),
3135 ],
3136 pre_alias_comments: vec![],
3137 trailing_comments: vec![],
3138 }));
3139
3140 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3141 let dateadd_expr = Expression::Function(Box::new(Function::new(
3142 "DATEADD".to_string(),
3143 vec![
3144 Expression::Column(Column {
3145 name: Identifier::new(&unit_str),
3146 table: None,
3147 join_mark: false,
3148 trailing_comments: vec![],
3149 span: None,
3150 }),
3151 Expression::Cast(Box::new(Cast {
3152 this: Expression::Column(Column {
3153 name: Identifier::new(&col_name),
3154 table: None,
3155 join_mark: false,
3156 trailing_comments: vec![],
3157 span: None,
3158 }),
3159 to: DataType::Int {
3160 length: None,
3161 integer_spelling: false,
3162 },
3163 trailing_comments: vec![],
3164 double_colon_syntax: false,
3165 format: None,
3166 default: None,
3167 })),
3168 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3169 start_expr.clone(),
3170 ],
3171 )));
3172 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3173 this: dateadd_expr,
3174 alias: Identifier::new(&col_name),
3175 column_aliases: vec![],
3176 pre_alias_comments: vec![],
3177 trailing_comments: vec![],
3178 }));
3179
3180 // Build inner SELECT
3181 let mut inner_select = Select::new();
3182 inner_select.expressions = vec![dateadd_aliased];
3183 inner_select.from = Some(From {
3184 expressions: vec![flatten_aliased],
3185 });
3186
3187 let inner_select_expr = Expression::Select(Box::new(inner_select));
3188 let subquery = Expression::Subquery(Box::new(Subquery {
3189 this: inner_select_expr,
3190 alias: None,
3191 column_aliases: vec![],
3192 order_by: None,
3193 limit: None,
3194 offset: None,
3195 distribute_by: None,
3196 sort_by: None,
3197 cluster_by: None,
3198 lateral: false,
3199 modifiers_inside: false,
3200 trailing_comments: vec![],
3201 }));
3202
3203 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3204 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3205 Expression::Alias(Box::new(Alias {
3206 this: subquery,
3207 alias: Identifier::new(&alias_name),
3208 column_aliases: col_aliases,
3209 pre_alias_comments: vec![],
3210 trailing_comments: vec![],
3211 }))
3212 } else {
3213 subquery
3214 };
3215
3216 // Replace the FROM expression
3217 if let Some(ref mut from) = sel.from {
3218 from.expressions[from_idx] = replacement;
3219 }
3220
3221 Ok(Expression::Select(sel))
3222 }
3223
3224 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3225 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3226 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3227 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3228 use crate::expressions::*;
3229
3230 let start_expr = f.args[0].clone();
3231 let end_expr = f.args[1].clone();
3232 let step = f.args.get(2).cloned();
3233 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3234 let col_name = "value";
3235
3236 // Build the inner subquery: same as try_transform_from_gda_snowflake
3237 let datediff = Expression::Function(Box::new(Function::new(
3238 "DATEDIFF".to_string(),
3239 vec![
3240 Expression::Column(Column {
3241 name: Identifier::new(&unit_str),
3242 table: None,
3243 join_mark: false,
3244 trailing_comments: vec![],
3245 span: None,
3246 }),
3247 start_expr.clone(),
3248 end_expr.clone(),
3249 ],
3250 )));
3251 let plus_one = Expression::Add(Box::new(BinaryOp {
3252 left: datediff,
3253 right: Expression::Literal(Literal::Number("1".to_string())),
3254 left_comments: vec![],
3255 operator_comments: vec![],
3256 trailing_comments: vec![],
3257 }));
3258 let minus_one = Expression::Sub(Box::new(BinaryOp {
3259 left: plus_one,
3260 right: Expression::Literal(Literal::Number("1".to_string())),
3261 left_comments: vec![],
3262 operator_comments: vec![],
3263 trailing_comments: vec![],
3264 }));
3265 let paren_inner = Expression::Paren(Box::new(Paren {
3266 this: minus_one,
3267 trailing_comments: vec![],
3268 }));
3269 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3270 left: paren_inner,
3271 right: Expression::Literal(Literal::Number("1".to_string())),
3272 left_comments: vec![],
3273 operator_comments: vec![],
3274 trailing_comments: vec![],
3275 }));
3276
3277 let array_gen_range = Expression::Function(Box::new(Function::new(
3278 "ARRAY_GENERATE_RANGE".to_string(),
3279 vec![
3280 Expression::Literal(Literal::Number("0".to_string())),
3281 outer_plus_one,
3282 ],
3283 )));
3284
3285 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3286 name: Identifier::new("INPUT"),
3287 value: array_gen_range,
3288 separator: crate::expressions::NamedArgSeparator::DArrow,
3289 }));
3290 let flatten = Expression::Function(Box::new(Function::new(
3291 "FLATTEN".to_string(),
3292 vec![flatten_input],
3293 )));
3294
3295 let table_func =
3296 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3297 let flatten_aliased = Expression::Alias(Box::new(Alias {
3298 this: table_func,
3299 alias: Identifier::new("_t0"),
3300 column_aliases: vec![
3301 Identifier::new("seq"),
3302 Identifier::new("key"),
3303 Identifier::new("path"),
3304 Identifier::new("index"),
3305 Identifier::new(col_name),
3306 Identifier::new("this"),
3307 ],
3308 pre_alias_comments: vec![],
3309 trailing_comments: vec![],
3310 }));
3311
3312 let dateadd_expr = Expression::Function(Box::new(Function::new(
3313 "DATEADD".to_string(),
3314 vec![
3315 Expression::Column(Column {
3316 name: Identifier::new(&unit_str),
3317 table: None,
3318 join_mark: false,
3319 trailing_comments: vec![],
3320 span: None,
3321 }),
3322 Expression::Cast(Box::new(Cast {
3323 this: Expression::Column(Column {
3324 name: Identifier::new(col_name),
3325 table: None,
3326 join_mark: false,
3327 trailing_comments: vec![],
3328 span: None,
3329 }),
3330 to: DataType::Int {
3331 length: None,
3332 integer_spelling: false,
3333 },
3334 trailing_comments: vec![],
3335 double_colon_syntax: false,
3336 format: None,
3337 default: None,
3338 })),
3339 start_expr.clone(),
3340 ],
3341 )));
3342 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3343 this: dateadd_expr,
3344 alias: Identifier::new(col_name),
3345 column_aliases: vec![],
3346 pre_alias_comments: vec![],
3347 trailing_comments: vec![],
3348 }));
3349
3350 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3351 let mut inner_select = Select::new();
3352 inner_select.expressions = vec![dateadd_aliased];
3353 inner_select.from = Some(From {
3354 expressions: vec![flatten_aliased],
3355 });
3356
3357 // Wrap in subquery for the inner part
3358 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3359 this: Expression::Select(Box::new(inner_select)),
3360 alias: None,
3361 column_aliases: vec![],
3362 order_by: None,
3363 limit: None,
3364 offset: None,
3365 distribute_by: None,
3366 sort_by: None,
3367 cluster_by: None,
3368 lateral: false,
3369 modifiers_inside: false,
3370 trailing_comments: vec![],
3371 }));
3372
3373 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3374 let star = Expression::Star(Star {
3375 table: None,
3376 except: None,
3377 replace: None,
3378 rename: None,
3379 trailing_comments: vec![],
3380 span: None,
3381 });
3382 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3383 this: star,
3384 distinct: false,
3385 filter: None,
3386 order_by: vec![],
3387 name: Some("ARRAY_AGG".to_string()),
3388 ignore_nulls: None,
3389 having_max: None,
3390 limit: None,
3391 }));
3392
3393 let mut outer_select = Select::new();
3394 outer_select.expressions = vec![array_agg];
3395 outer_select.from = Some(From {
3396 expressions: vec![inner_subquery],
3397 });
3398
3399 // Wrap in a subquery
3400 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3401 this: Expression::Select(Box::new(outer_select)),
3402 alias: None,
3403 column_aliases: vec![],
3404 order_by: None,
3405 limit: None,
3406 offset: None,
3407 distribute_by: None,
3408 sort_by: None,
3409 cluster_by: None,
3410 lateral: false,
3411 modifiers_inside: false,
3412 trailing_comments: vec![],
3413 }));
3414
3415 // ARRAY_SIZE(subquery)
3416 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3417 outer_subquery,
3418 ))))
3419 }
3420
3421 /// Extract interval unit string from an optional step expression.
3422 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3423 use crate::expressions::*;
3424 if let Some(Expression::Interval(ref iv)) = step {
3425 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3426 return Some(format!("{:?}", unit).to_uppercase());
3427 }
3428 if let Some(ref this) = iv.this {
3429 if let Expression::Literal(Literal::String(ref s)) = this {
3430 let parts: Vec<&str> = s.split_whitespace().collect();
3431 if parts.len() == 2 {
3432 return Some(parts[1].to_uppercase());
3433 } else if parts.len() == 1 {
3434 let upper = parts[0].to_uppercase();
3435 if matches!(
3436 upper.as_str(),
3437 "YEAR"
3438 | "QUARTER"
3439 | "MONTH"
3440 | "WEEK"
3441 | "DAY"
3442 | "HOUR"
3443 | "MINUTE"
3444 | "SECOND"
3445 ) {
3446 return Some(upper);
3447 }
3448 }
3449 }
3450 }
3451 }
3452 // Default to DAY if no step or no interval
3453 if step.is_none() {
3454 return Some("DAY".to_string());
3455 }
3456 None
3457 }
3458
3459 fn normalize_snowflake_pretty(mut sql: String) -> String {
3460 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3461 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3462 {
3463 sql = sql.replace(
3464 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3465 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3466 );
3467
3468 sql = sql.replace(
3469 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3470 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3471 );
3472
3473 sql = sql.replace(
3474 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3475 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3476 );
3477 }
3478
3479 sql
3480 }
3481
3482 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3483 /// This handles cases where the same syntax has different semantics across dialects.
3484 fn cross_dialect_normalize(
3485 expr: Expression,
3486 source: DialectType,
3487 target: DialectType,
3488 ) -> Result<Expression> {
3489 use crate::expressions::{
3490 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3491 Function, Identifier, IsNull, Literal, Null, Paren,
3492 };
3493
3494 // Helper to tag which kind of transform to apply
3495 #[derive(Debug)]
3496 enum Action {
3497 None,
3498 GreatestLeastNull,
3499 ArrayGenerateRange,
3500 Div0TypedDivision,
3501 ArrayAggCollectList,
3502 ArrayAggWithinGroupFilter,
3503 ArrayAggFilter,
3504 CastTimestampToDatetime,
3505 DateTruncWrapCast,
3506 ToDateToCast,
3507 ConvertTimezoneToExpr,
3508 SetToVariable,
3509 RegexpReplaceSnowflakeToDuckDB,
3510 BigQueryFunctionNormalize,
3511 BigQuerySafeDivide,
3512 BigQueryCastType,
3513 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3514 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3515 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3516 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3517 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3518 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3519 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3520 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3521 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3522 EpochConvert, // Expression::Epoch -> target-specific epoch function
3523 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3524 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3525 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3526 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3527 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3528 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3529 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3530 TempTableHash, // TSQL #table -> temp table normalization
3531 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3532 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3533 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3534 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3535 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3536 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3537 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3538 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3539 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3540 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3541 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3542 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3543 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3544 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3545 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3546 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3547 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3548 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3549 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3550 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3551 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3552 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3553 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3554 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3555 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3556 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3557 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3558 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3559 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3560 DollarParamConvert, // $foo -> @foo for BigQuery
3561 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3562 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3563 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3564 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3565 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3566 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3567 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3568 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3569 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3570 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3571 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3572 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3573 RespectNullsConvert, // RESPECT NULLS window function handling
3574 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3575 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3576 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3577 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3578 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3579 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3580 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3581 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3582 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3583 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3584 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3585 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3586 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3587 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3588 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3589 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3590 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3591 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3592 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3593 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3594 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3595 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3596 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3597 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3598 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3599 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3600 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3601 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3602 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3603 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3604 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3605 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3606 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3607 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3608 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3609 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3610 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3611 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3612 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3613 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3614 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3615 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3616 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3617 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3618 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3619 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3620 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3621 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3622 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3623 ArraySumConvert, // ARRAY_SUM -> target-specific
3624 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3625 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3626 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3627 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3628 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3629 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3630 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3631 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3632 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3633 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3634 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3635 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3636 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3637 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3638 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3639 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3640 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3641 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3642 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3643 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3644 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3645 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3646 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3647 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3648 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3649 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3650 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3651 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3652 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3653 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3654 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3655 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3656 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3657 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3658 }
3659
3660 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3661 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3662 Self::transform_select_into(expr, source, target)
3663 } else {
3664 expr
3665 };
3666
3667 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3668 let expr = if !matches!(
3669 target,
3670 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3671 ) {
3672 if let Expression::Select(mut select) = expr {
3673 if let Some(ref mut offset) = select.offset {
3674 offset.rows = None;
3675 }
3676 Expression::Select(select)
3677 } else {
3678 expr
3679 }
3680 } else {
3681 expr
3682 };
3683
3684 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3685 let expr = if matches!(target, DialectType::Oracle) {
3686 if let Expression::Select(mut select) = expr {
3687 if let Some(limit) = select.limit.take() {
3688 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3689 select.fetch = Some(crate::expressions::Fetch {
3690 direction: "FIRST".to_string(),
3691 count: Some(limit.this),
3692 percent: false,
3693 rows: true,
3694 with_ties: false,
3695 });
3696 }
3697 // Add ROWS to OFFSET if present
3698 if let Some(ref mut offset) = select.offset {
3699 offset.rows = Some(true);
3700 }
3701 Expression::Select(select)
3702 } else {
3703 expr
3704 }
3705 } else {
3706 expr
3707 };
3708
3709 // Handle CreateTable WITH properties transformation before recursive transforms
3710 let expr = if let Expression::CreateTable(mut ct) = expr {
3711 Self::transform_create_table_properties(&mut ct, source, target);
3712
3713 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3714 // When the PARTITIONED BY clause contains column definitions, merge them into the
3715 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3716 if matches!(
3717 source,
3718 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3719 ) {
3720 let mut partition_col_names: Vec<String> = Vec::new();
3721 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3722 let mut has_col_def_partitions = false;
3723
3724 // Check if any PARTITIONED BY property contains ColumnDef expressions
3725 for prop in &ct.properties {
3726 if let Expression::PartitionedByProperty(ref pbp) = prop {
3727 if let Expression::Tuple(ref tuple) = *pbp.this {
3728 for expr in &tuple.expressions {
3729 if let Expression::ColumnDef(ref cd) = expr {
3730 has_col_def_partitions = true;
3731 partition_col_names.push(cd.name.name.clone());
3732 partition_col_defs.push(*cd.clone());
3733 }
3734 }
3735 }
3736 }
3737 }
3738
3739 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3740 // Merge partition columns into main column list
3741 for cd in partition_col_defs {
3742 ct.columns.push(cd);
3743 }
3744
3745 // Replace PARTITIONED BY property with column-name-only version
3746 ct.properties
3747 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3748
3749 if matches!(
3750 target,
3751 DialectType::Presto | DialectType::Trino | DialectType::Athena
3752 ) {
3753 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3754 let array_elements: Vec<String> = partition_col_names
3755 .iter()
3756 .map(|n| format!("'{}'", n))
3757 .collect();
3758 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3759 ct.with_properties
3760 .push(("PARTITIONED_BY".to_string(), array_value));
3761 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3762 // Spark: PARTITIONED BY (y, z) - just column names
3763 let name_exprs: Vec<Expression> = partition_col_names
3764 .iter()
3765 .map(|n| {
3766 Expression::Column(crate::expressions::Column {
3767 name: crate::expressions::Identifier::new(n.clone()),
3768 table: None,
3769 join_mark: false,
3770 trailing_comments: Vec::new(),
3771 span: None,
3772 })
3773 })
3774 .collect();
3775 ct.properties.insert(
3776 0,
3777 Expression::PartitionedByProperty(Box::new(
3778 crate::expressions::PartitionedByProperty {
3779 this: Box::new(Expression::Tuple(Box::new(
3780 crate::expressions::Tuple {
3781 expressions: name_exprs,
3782 },
3783 ))),
3784 },
3785 )),
3786 );
3787 }
3788 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3789 }
3790
3791 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3792 // are handled by transform_create_table_properties which runs first
3793 }
3794
3795 // Strip LOCATION property for Presto/Trino (not supported)
3796 if matches!(
3797 target,
3798 DialectType::Presto | DialectType::Trino | DialectType::Athena
3799 ) {
3800 ct.properties
3801 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3802 }
3803
3804 // Strip table-level constraints for Spark/Hive/Databricks
3805 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3806 if matches!(
3807 target,
3808 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3809 ) {
3810 ct.constraints.retain(|c| {
3811 matches!(
3812 c,
3813 crate::expressions::TableConstraint::PrimaryKey { .. }
3814 | crate::expressions::TableConstraint::Like { .. }
3815 )
3816 });
3817 for constraint in &mut ct.constraints {
3818 if let crate::expressions::TableConstraint::PrimaryKey {
3819 columns,
3820 modifiers,
3821 ..
3822 } = constraint
3823 {
3824 // Strip ASC/DESC from column names
3825 for col in columns.iter_mut() {
3826 if col.name.ends_with(" ASC") {
3827 col.name = col.name[..col.name.len() - 4].to_string();
3828 } else if col.name.ends_with(" DESC") {
3829 col.name = col.name[..col.name.len() - 5].to_string();
3830 }
3831 }
3832 // Strip TSQL-specific modifiers
3833 modifiers.clustered = None;
3834 modifiers.with_options.clear();
3835 modifiers.on_filegroup = None;
3836 }
3837 }
3838 }
3839
3840 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3841 if matches!(target, DialectType::Databricks) {
3842 for col in &mut ct.columns {
3843 if col.auto_increment {
3844 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3845 col.data_type = crate::expressions::DataType::BigInt { length: None };
3846 }
3847 }
3848 }
3849 }
3850
3851 // Spark/Databricks: INTEGER -> INT in column definitions
3852 // Python sqlglot always outputs INT for Spark/Databricks
3853 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3854 for col in &mut ct.columns {
3855 if let crate::expressions::DataType::Int {
3856 integer_spelling, ..
3857 } = &mut col.data_type
3858 {
3859 *integer_spelling = false;
3860 }
3861 }
3862 }
3863
3864 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3865 if matches!(target, DialectType::Hive | DialectType::Spark) {
3866 for col in &mut ct.columns {
3867 // If nullable is explicitly true (NULL), change to None (omit it)
3868 if col.nullable == Some(true) {
3869 col.nullable = None;
3870 }
3871 // Also remove from constraints if stored there
3872 col.constraints
3873 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3874 }
3875 }
3876
3877 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3878 if ct.on_property.is_some()
3879 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3880 {
3881 ct.on_property = None;
3882 }
3883
3884 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3885 // Snowflake doesn't support typed arrays in DDL
3886 if matches!(target, DialectType::Snowflake) {
3887 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3888 if let crate::expressions::DataType::Array { .. } = dt {
3889 *dt = crate::expressions::DataType::Custom {
3890 name: "ARRAY".to_string(),
3891 };
3892 }
3893 }
3894 for col in &mut ct.columns {
3895 strip_array_type_params(&mut col.data_type);
3896 }
3897 }
3898
3899 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3900 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3901 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3902 if matches!(target, DialectType::PostgreSQL) {
3903 for col in &mut ct.columns {
3904 if col.auto_increment && !col.constraint_order.is_empty() {
3905 use crate::expressions::ConstraintType;
3906 let has_explicit_not_null = col
3907 .constraint_order
3908 .iter()
3909 .any(|ct| *ct == ConstraintType::NotNull);
3910
3911 if has_explicit_not_null {
3912 // Source had explicit NOT NULL - preserve original order
3913 // Just ensure nullable is set
3914 if col.nullable != Some(false) {
3915 col.nullable = Some(false);
3916 }
3917 } else {
3918 // Source didn't have explicit NOT NULL - build order with
3919 // AutoIncrement + NotNull first, then remaining constraints
3920 let mut new_order = Vec::new();
3921 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3922 new_order.push(ConstraintType::AutoIncrement);
3923 new_order.push(ConstraintType::NotNull);
3924 // Add remaining constraints in original order (except AutoIncrement)
3925 for ct_type in &col.constraint_order {
3926 if *ct_type != ConstraintType::AutoIncrement {
3927 new_order.push(ct_type.clone());
3928 }
3929 }
3930 col.constraint_order = new_order;
3931 col.nullable = Some(false);
3932 }
3933 }
3934 }
3935 }
3936
3937 Expression::CreateTable(ct)
3938 } else {
3939 expr
3940 };
3941
3942 // Handle CreateView column stripping for Presto/Trino target
3943 let expr = if let Expression::CreateView(mut cv) = expr {
3944 // Presto/Trino: drop column list when view has a SELECT body
3945 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
3946 {
3947 if !matches!(&cv.query, Expression::Null(_)) {
3948 cv.columns.clear();
3949 }
3950 }
3951 Expression::CreateView(cv)
3952 } else {
3953 expr
3954 };
3955
3956 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
3957 let expr = if !matches!(
3958 target,
3959 DialectType::Presto | DialectType::Trino | DialectType::Athena
3960 ) {
3961 if let Expression::Select(mut select) = expr {
3962 if let Some(ref mut with) = select.with {
3963 for cte in &mut with.ctes {
3964 if let Expression::Values(ref vals) = cte.this {
3965 // Build: SELECT * FROM (VALUES ...) AS _values
3966 let values_subquery =
3967 Expression::Subquery(Box::new(crate::expressions::Subquery {
3968 this: Expression::Values(vals.clone()),
3969 alias: Some(Identifier::new("_values".to_string())),
3970 column_aliases: Vec::new(),
3971 order_by: None,
3972 limit: None,
3973 offset: None,
3974 distribute_by: None,
3975 sort_by: None,
3976 cluster_by: None,
3977 lateral: false,
3978 modifiers_inside: false,
3979 trailing_comments: Vec::new(),
3980 }));
3981 let mut new_select = crate::expressions::Select::new();
3982 new_select.expressions =
3983 vec![Expression::Star(crate::expressions::Star {
3984 table: None,
3985 except: None,
3986 replace: None,
3987 rename: None,
3988 trailing_comments: Vec::new(),
3989 span: None,
3990 })];
3991 new_select.from = Some(crate::expressions::From {
3992 expressions: vec![values_subquery],
3993 });
3994 cte.this = Expression::Select(Box::new(new_select));
3995 }
3996 }
3997 }
3998 Expression::Select(select)
3999 } else {
4000 expr
4001 }
4002 } else {
4003 expr
4004 };
4005
4006 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4007 let expr = if matches!(target, DialectType::PostgreSQL) {
4008 if let Expression::CreateIndex(mut ci) = expr {
4009 for col in &mut ci.columns {
4010 if col.nulls_first.is_none() {
4011 col.nulls_first = Some(true);
4012 }
4013 }
4014 Expression::CreateIndex(ci)
4015 } else {
4016 expr
4017 }
4018 } else {
4019 expr
4020 };
4021
4022 transform_recursive(expr, &|e| {
4023 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4024 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4025 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4026 if let Expression::Cast(ref c) = e {
4027 // Check if this is a CAST of an array to a struct array type
4028 let is_struct_array_cast =
4029 matches!(&c.to, crate::expressions::DataType::Array { .. });
4030 if is_struct_array_cast {
4031 let has_auto_named_structs = match &c.this {
4032 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4033 if let Expression::Struct(s) = elem {
4034 s.fields.iter().all(|(name, _)| {
4035 name.as_ref().map_or(true, |n| {
4036 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4037 })
4038 })
4039 } else {
4040 false
4041 }
4042 }),
4043 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4044 if let Expression::Struct(s) = elem {
4045 s.fields.iter().all(|(name, _)| {
4046 name.as_ref().map_or(true, |n| {
4047 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4048 })
4049 })
4050 } else {
4051 false
4052 }
4053 }),
4054 _ => false,
4055 };
4056 if has_auto_named_structs {
4057 let convert_struct_to_row = |elem: Expression| -> Expression {
4058 if let Expression::Struct(s) = elem {
4059 let row_args: Vec<Expression> =
4060 s.fields.into_iter().map(|(_, v)| v).collect();
4061 Expression::Function(Box::new(Function::new(
4062 "ROW".to_string(),
4063 row_args,
4064 )))
4065 } else {
4066 elem
4067 }
4068 };
4069 let mut c_clone = c.as_ref().clone();
4070 match &mut c_clone.this {
4071 Expression::Array(arr) => {
4072 arr.expressions = arr
4073 .expressions
4074 .drain(..)
4075 .map(convert_struct_to_row)
4076 .collect();
4077 }
4078 Expression::ArrayFunc(arr) => {
4079 arr.expressions = arr
4080 .expressions
4081 .drain(..)
4082 .map(convert_struct_to_row)
4083 .collect();
4084 }
4085 _ => {}
4086 }
4087 return Ok(Expression::Cast(Box::new(c_clone)));
4088 }
4089 }
4090 }
4091 }
4092
4093 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4094 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4095 if let Expression::Select(ref sel) = e {
4096 if sel.kind.as_deref() == Some("STRUCT") {
4097 let mut fields = Vec::new();
4098 for expr in &sel.expressions {
4099 match expr {
4100 Expression::Alias(a) => {
4101 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4102 }
4103 Expression::Column(c) => {
4104 fields.push((Some(c.name.name.clone()), expr.clone()));
4105 }
4106 _ => {
4107 fields.push((None, expr.clone()));
4108 }
4109 }
4110 }
4111 let struct_lit =
4112 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4113 let mut new_select = sel.as_ref().clone();
4114 new_select.kind = None;
4115 new_select.expressions = vec![struct_lit];
4116 return Ok(Expression::Select(Box::new(new_select)));
4117 }
4118 }
4119 }
4120
4121 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4122 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4123 && matches!(
4124 target,
4125 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4126 )
4127 {
4128 if let Expression::Parameter(ref p) = e {
4129 if p.style == crate::expressions::ParameterStyle::At {
4130 if let Some(ref name) = p.name {
4131 return Ok(Expression::Parameter(Box::new(
4132 crate::expressions::Parameter {
4133 name: Some(name.clone()),
4134 index: p.index,
4135 style: crate::expressions::ParameterStyle::DollarBrace,
4136 quoted: p.quoted,
4137 string_quoted: p.string_quoted,
4138 expression: None,
4139 },
4140 )));
4141 }
4142 }
4143 }
4144 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4145 if let Expression::Column(ref col) = e {
4146 if col.name.name.starts_with('@') && col.table.is_none() {
4147 let var_name = col.name.name.trim_start_matches('@').to_string();
4148 return Ok(Expression::Parameter(Box::new(
4149 crate::expressions::Parameter {
4150 name: Some(var_name),
4151 index: None,
4152 style: crate::expressions::ParameterStyle::DollarBrace,
4153 quoted: false,
4154 string_quoted: false,
4155 expression: None,
4156 },
4157 )));
4158 }
4159 }
4160 }
4161
4162 // Convert @variable -> variable in SET statements for Spark/Databricks
4163 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4164 && matches!(target, DialectType::Spark | DialectType::Databricks)
4165 {
4166 if let Expression::SetStatement(ref s) = e {
4167 let mut new_items = s.items.clone();
4168 let mut changed = false;
4169 for item in &mut new_items {
4170 // Strip @ from the SET name (Parameter style)
4171 if let Expression::Parameter(ref p) = item.name {
4172 if p.style == crate::expressions::ParameterStyle::At {
4173 if let Some(ref name) = p.name {
4174 item.name = Expression::Identifier(Identifier::new(name));
4175 changed = true;
4176 }
4177 }
4178 }
4179 // Strip @ from the SET name (Identifier style - SET parser)
4180 if let Expression::Identifier(ref id) = item.name {
4181 if id.name.starts_with('@') {
4182 let var_name = id.name.trim_start_matches('@').to_string();
4183 item.name = Expression::Identifier(Identifier::new(&var_name));
4184 changed = true;
4185 }
4186 }
4187 // Strip @ from the SET name (Column style - alternative parsing)
4188 if let Expression::Column(ref col) = item.name {
4189 if col.name.name.starts_with('@') && col.table.is_none() {
4190 let var_name = col.name.name.trim_start_matches('@').to_string();
4191 item.name = Expression::Identifier(Identifier::new(&var_name));
4192 changed = true;
4193 }
4194 }
4195 }
4196 if changed {
4197 let mut new_set = (**s).clone();
4198 new_set.items = new_items;
4199 return Ok(Expression::SetStatement(Box::new(new_set)));
4200 }
4201 }
4202 }
4203
4204 // Strip NOLOCK hint for non-TSQL targets
4205 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4206 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4207 {
4208 if let Expression::Table(ref tr) = e {
4209 if !tr.hints.is_empty() {
4210 let mut new_tr = tr.clone();
4211 new_tr.hints.clear();
4212 return Ok(Expression::Table(new_tr));
4213 }
4214 }
4215 }
4216
4217 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4218 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4219 if matches!(target, DialectType::Snowflake) {
4220 if let Expression::IsTrue(ref itf) = e {
4221 if let Expression::Boolean(ref b) = itf.this {
4222 if !itf.not {
4223 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4224 value: b.value,
4225 }));
4226 } else {
4227 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4228 value: !b.value,
4229 }));
4230 }
4231 }
4232 }
4233 if let Expression::IsFalse(ref itf) = e {
4234 if let Expression::Boolean(ref b) = itf.this {
4235 if !itf.not {
4236 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4237 value: !b.value,
4238 }));
4239 } else {
4240 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4241 value: b.value,
4242 }));
4243 }
4244 }
4245 }
4246 }
4247
4248 // BigQuery: split dotted backtick identifiers in table names
4249 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4250 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4251 if let Expression::CreateTable(ref ct) = e {
4252 let mut changed = false;
4253 let mut new_ct = ct.clone();
4254 // Split the table name
4255 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4256 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4257 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4258 let was_quoted = ct.name.name.quoted;
4259 let mk_id = |s: &str| {
4260 if was_quoted {
4261 Identifier::quoted(s)
4262 } else {
4263 Identifier::new(s)
4264 }
4265 };
4266 if parts.len() == 3 {
4267 new_ct.name.catalog = Some(mk_id(parts[0]));
4268 new_ct.name.schema = Some(mk_id(parts[1]));
4269 new_ct.name.name = mk_id(parts[2]);
4270 changed = true;
4271 } else if parts.len() == 2 {
4272 new_ct.name.schema = Some(mk_id(parts[0]));
4273 new_ct.name.name = mk_id(parts[1]);
4274 changed = true;
4275 }
4276 }
4277 // Split the clone source name
4278 if let Some(ref clone_src) = ct.clone_source {
4279 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4280 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4281 let was_quoted = clone_src.name.quoted;
4282 let mk_id = |s: &str| {
4283 if was_quoted {
4284 Identifier::quoted(s)
4285 } else {
4286 Identifier::new(s)
4287 }
4288 };
4289 let mut new_src = clone_src.clone();
4290 if parts.len() == 3 {
4291 new_src.catalog = Some(mk_id(parts[0]));
4292 new_src.schema = Some(mk_id(parts[1]));
4293 new_src.name = mk_id(parts[2]);
4294 new_ct.clone_source = Some(new_src);
4295 changed = true;
4296 } else if parts.len() == 2 {
4297 new_src.schema = Some(mk_id(parts[0]));
4298 new_src.name = mk_id(parts[1]);
4299 new_ct.clone_source = Some(new_src);
4300 changed = true;
4301 }
4302 }
4303 }
4304 if changed {
4305 return Ok(Expression::CreateTable(new_ct));
4306 }
4307 }
4308 }
4309
4310 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4311 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4312 if matches!(source, DialectType::BigQuery)
4313 && matches!(
4314 target,
4315 DialectType::DuckDB
4316 | DialectType::Presto
4317 | DialectType::Trino
4318 | DialectType::Athena
4319 )
4320 {
4321 if let Expression::Subscript(ref sub) = e {
4322 let (new_index, is_safe) = match &sub.index {
4323 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4324 Expression::Literal(Literal::Number(n)) => {
4325 if let Ok(val) = n.parse::<i64>() {
4326 (
4327 Some(Expression::Literal(Literal::Number(
4328 (val + 1).to_string(),
4329 ))),
4330 false,
4331 )
4332 } else {
4333 (None, false)
4334 }
4335 }
4336 // OFFSET(n) -> n+1 (0-based)
4337 Expression::Function(ref f)
4338 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4339 {
4340 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4341 if let Ok(val) = n.parse::<i64>() {
4342 (
4343 Some(Expression::Literal(Literal::Number(
4344 (val + 1).to_string(),
4345 ))),
4346 false,
4347 )
4348 } else {
4349 (
4350 Some(Expression::Add(Box::new(
4351 crate::expressions::BinaryOp::new(
4352 f.args[0].clone(),
4353 Expression::number(1),
4354 ),
4355 ))),
4356 false,
4357 )
4358 }
4359 } else {
4360 (
4361 Some(Expression::Add(Box::new(
4362 crate::expressions::BinaryOp::new(
4363 f.args[0].clone(),
4364 Expression::number(1),
4365 ),
4366 ))),
4367 false,
4368 )
4369 }
4370 }
4371 // ORDINAL(n) -> n (already 1-based)
4372 Expression::Function(ref f)
4373 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4374 {
4375 (Some(f.args[0].clone()), false)
4376 }
4377 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4378 Expression::Function(ref f)
4379 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4380 {
4381 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4382 if let Ok(val) = n.parse::<i64>() {
4383 (
4384 Some(Expression::Literal(Literal::Number(
4385 (val + 1).to_string(),
4386 ))),
4387 true,
4388 )
4389 } else {
4390 (
4391 Some(Expression::Add(Box::new(
4392 crate::expressions::BinaryOp::new(
4393 f.args[0].clone(),
4394 Expression::number(1),
4395 ),
4396 ))),
4397 true,
4398 )
4399 }
4400 } else {
4401 (
4402 Some(Expression::Add(Box::new(
4403 crate::expressions::BinaryOp::new(
4404 f.args[0].clone(),
4405 Expression::number(1),
4406 ),
4407 ))),
4408 true,
4409 )
4410 }
4411 }
4412 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4413 Expression::Function(ref f)
4414 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4415 {
4416 (Some(f.args[0].clone()), true)
4417 }
4418 _ => (None, false),
4419 };
4420 if let Some(idx) = new_index {
4421 if is_safe
4422 && matches!(
4423 target,
4424 DialectType::Presto | DialectType::Trino | DialectType::Athena
4425 )
4426 {
4427 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4428 return Ok(Expression::Function(Box::new(Function::new(
4429 "ELEMENT_AT".to_string(),
4430 vec![sub.this.clone(), idx],
4431 ))));
4432 } else {
4433 // DuckDB or non-safe: just use subscript with converted index
4434 return Ok(Expression::Subscript(Box::new(
4435 crate::expressions::Subscript {
4436 this: sub.this.clone(),
4437 index: idx,
4438 },
4439 )));
4440 }
4441 }
4442 }
4443 }
4444
4445 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4446 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4447 if let Expression::Length(ref uf) = e {
4448 let arg = uf.this.clone();
4449 let typeof_func = Expression::Function(Box::new(Function::new(
4450 "TYPEOF".to_string(),
4451 vec![arg.clone()],
4452 )));
4453 let blob_cast = Expression::Cast(Box::new(Cast {
4454 this: arg.clone(),
4455 to: DataType::VarBinary { length: None },
4456 trailing_comments: vec![],
4457 double_colon_syntax: false,
4458 format: None,
4459 default: None,
4460 }));
4461 let octet_length = Expression::Function(Box::new(Function::new(
4462 "OCTET_LENGTH".to_string(),
4463 vec![blob_cast],
4464 )));
4465 let text_cast = Expression::Cast(Box::new(Cast {
4466 this: arg,
4467 to: DataType::Text,
4468 trailing_comments: vec![],
4469 double_colon_syntax: false,
4470 format: None,
4471 default: None,
4472 }));
4473 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4474 this: text_cast,
4475 original_name: None,
4476 }));
4477 return Ok(Expression::Case(Box::new(Case {
4478 operand: Some(typeof_func),
4479 whens: vec![(
4480 Expression::Literal(Literal::String("BLOB".to_string())),
4481 octet_length,
4482 )],
4483 else_: Some(length_text),
4484 comments: Vec::new(),
4485 })));
4486 }
4487 }
4488
4489 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4490 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4491 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4492 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4493 if let Expression::Alias(ref a) = e {
4494 if matches!(&a.this, Expression::Unnest(_)) {
4495 if a.column_aliases.is_empty() {
4496 // Drop the entire alias, return just the UNNEST expression
4497 return Ok(a.this.clone());
4498 } else {
4499 // Use first column alias as the main alias
4500 let mut new_alias = a.as_ref().clone();
4501 new_alias.alias = a.column_aliases[0].clone();
4502 new_alias.column_aliases.clear();
4503 return Ok(Expression::Alias(Box::new(new_alias)));
4504 }
4505 }
4506 }
4507 }
4508
4509 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4510 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4511 if let Expression::In(ref in_expr) = e {
4512 if let Some(ref unnest_inner) = in_expr.unnest {
4513 // Build the function call for the target dialect
4514 let func_expr = if matches!(
4515 target,
4516 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4517 ) {
4518 // Use EXPLODE for Hive/Spark
4519 Expression::Function(Box::new(Function::new(
4520 "EXPLODE".to_string(),
4521 vec![*unnest_inner.clone()],
4522 )))
4523 } else {
4524 // Use UNNEST for Presto/Trino/DuckDB/etc.
4525 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4526 this: *unnest_inner.clone(),
4527 expressions: Vec::new(),
4528 with_ordinality: false,
4529 alias: None,
4530 offset_alias: None,
4531 }))
4532 };
4533
4534 // Wrap in SELECT
4535 let mut inner_select = crate::expressions::Select::new();
4536 inner_select.expressions = vec![func_expr];
4537
4538 let subquery_expr = Expression::Select(Box::new(inner_select));
4539
4540 return Ok(Expression::In(Box::new(crate::expressions::In {
4541 this: in_expr.this.clone(),
4542 expressions: Vec::new(),
4543 query: Some(subquery_expr),
4544 not: in_expr.not,
4545 global: in_expr.global,
4546 unnest: None,
4547 is_field: false,
4548 })));
4549 }
4550 }
4551 }
4552
4553 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4554 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4555 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4556 if let Expression::Alias(ref a) = e {
4557 if let Expression::Function(ref f) = a.this {
4558 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4559 && !a.column_aliases.is_empty()
4560 {
4561 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4562 let col_alias = a.column_aliases[0].clone();
4563 let mut inner_select = crate::expressions::Select::new();
4564 inner_select.expressions =
4565 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4566 Expression::Identifier(Identifier::new("value".to_string())),
4567 col_alias,
4568 )))];
4569 inner_select.from = Some(crate::expressions::From {
4570 expressions: vec![a.this.clone()],
4571 });
4572 let subquery =
4573 Expression::Subquery(Box::new(crate::expressions::Subquery {
4574 this: Expression::Select(Box::new(inner_select)),
4575 alias: Some(a.alias.clone()),
4576 column_aliases: Vec::new(),
4577 order_by: None,
4578 limit: None,
4579 offset: None,
4580 lateral: false,
4581 modifiers_inside: false,
4582 trailing_comments: Vec::new(),
4583 distribute_by: None,
4584 sort_by: None,
4585 cluster_by: None,
4586 }));
4587 return Ok(subquery);
4588 }
4589 }
4590 }
4591 }
4592
4593 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4594 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4595 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4596 if matches!(source, DialectType::BigQuery) {
4597 if let Expression::Select(ref s) = e {
4598 if let Some(ref from) = s.from {
4599 if from.expressions.len() >= 2 {
4600 // Collect table names from first expression
4601 let first_tables: Vec<String> = from
4602 .expressions
4603 .iter()
4604 .take(1)
4605 .filter_map(|expr| {
4606 if let Expression::Table(t) = expr {
4607 Some(t.name.name.to_lowercase())
4608 } else {
4609 None
4610 }
4611 })
4612 .collect();
4613
4614 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4615 // or have a dotted name matching a table
4616 let mut needs_rewrite = false;
4617 for expr in from.expressions.iter().skip(1) {
4618 if let Expression::Table(t) = expr {
4619 if let Some(ref schema) = t.schema {
4620 if first_tables.contains(&schema.name.to_lowercase()) {
4621 needs_rewrite = true;
4622 break;
4623 }
4624 }
4625 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4626 if t.schema.is_none() && t.name.name.contains('.') {
4627 let parts: Vec<&str> = t.name.name.split('.').collect();
4628 if parts.len() >= 2
4629 && first_tables.contains(&parts[0].to_lowercase())
4630 {
4631 needs_rewrite = true;
4632 break;
4633 }
4634 }
4635 }
4636 }
4637
4638 if needs_rewrite {
4639 let mut new_select = s.clone();
4640 let mut new_from_exprs = vec![from.expressions[0].clone()];
4641 let mut new_joins = s.joins.clone();
4642
4643 for expr in from.expressions.iter().skip(1) {
4644 if let Expression::Table(ref t) = expr {
4645 if let Some(ref schema) = t.schema {
4646 if first_tables.contains(&schema.name.to_lowercase()) {
4647 // This is an array path reference, convert to CROSS JOIN UNNEST
4648 let col_expr = Expression::Column(
4649 crate::expressions::Column {
4650 name: t.name.clone(),
4651 table: Some(schema.clone()),
4652 join_mark: false,
4653 trailing_comments: vec![],
4654 span: None,
4655 },
4656 );
4657 let unnest_expr = Expression::Unnest(Box::new(
4658 crate::expressions::UnnestFunc {
4659 this: col_expr,
4660 expressions: Vec::new(),
4661 with_ordinality: false,
4662 alias: None,
4663 offset_alias: None,
4664 },
4665 ));
4666 let join_this = if let Some(ref alias) = t.alias {
4667 if matches!(
4668 target,
4669 DialectType::Presto
4670 | DialectType::Trino
4671 | DialectType::Athena
4672 ) {
4673 // Presto: UNNEST(x) AS _t0(results)
4674 Expression::Alias(Box::new(
4675 crate::expressions::Alias {
4676 this: unnest_expr,
4677 alias: Identifier::new("_t0"),
4678 column_aliases: vec![alias.clone()],
4679 pre_alias_comments: vec![],
4680 trailing_comments: vec![],
4681 },
4682 ))
4683 } else {
4684 // BigQuery: UNNEST(x) AS results
4685 Expression::Alias(Box::new(
4686 crate::expressions::Alias {
4687 this: unnest_expr,
4688 alias: alias.clone(),
4689 column_aliases: vec![],
4690 pre_alias_comments: vec![],
4691 trailing_comments: vec![],
4692 },
4693 ))
4694 }
4695 } else {
4696 unnest_expr
4697 };
4698 new_joins.push(crate::expressions::Join {
4699 kind: crate::expressions::JoinKind::Cross,
4700 this: join_this,
4701 on: None,
4702 using: Vec::new(),
4703 use_inner_keyword: false,
4704 use_outer_keyword: false,
4705 deferred_condition: false,
4706 join_hint: None,
4707 match_condition: None,
4708 pivots: Vec::new(),
4709 comments: Vec::new(),
4710 nesting_group: 0,
4711 directed: false,
4712 });
4713 } else {
4714 new_from_exprs.push(expr.clone());
4715 }
4716 } else if t.schema.is_none() && t.name.name.contains('.') {
4717 // Dotted name in quoted identifier: `Coordinates.position`
4718 let parts: Vec<&str> = t.name.name.split('.').collect();
4719 if parts.len() >= 2
4720 && first_tables.contains(&parts[0].to_lowercase())
4721 {
4722 let join_this =
4723 if matches!(target, DialectType::BigQuery) {
4724 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4725 Expression::Table(t.clone())
4726 } else {
4727 // Other targets: split into "schema"."name"
4728 let mut new_t = t.clone();
4729 new_t.schema =
4730 Some(Identifier::quoted(parts[0]));
4731 new_t.name = Identifier::quoted(parts[1]);
4732 Expression::Table(new_t)
4733 };
4734 new_joins.push(crate::expressions::Join {
4735 kind: crate::expressions::JoinKind::Cross,
4736 this: join_this,
4737 on: None,
4738 using: Vec::new(),
4739 use_inner_keyword: false,
4740 use_outer_keyword: false,
4741 deferred_condition: false,
4742 join_hint: None,
4743 match_condition: None,
4744 pivots: Vec::new(),
4745 comments: Vec::new(),
4746 nesting_group: 0,
4747 directed: false,
4748 });
4749 } else {
4750 new_from_exprs.push(expr.clone());
4751 }
4752 } else {
4753 new_from_exprs.push(expr.clone());
4754 }
4755 } else {
4756 new_from_exprs.push(expr.clone());
4757 }
4758 }
4759
4760 new_select.from = Some(crate::expressions::From {
4761 expressions: new_from_exprs,
4762 ..from.clone()
4763 });
4764 new_select.joins = new_joins;
4765 return Ok(Expression::Select(new_select));
4766 }
4767 }
4768 }
4769 }
4770 }
4771
4772 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4773 if matches!(
4774 target,
4775 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4776 ) {
4777 if let Expression::Select(ref s) = e {
4778 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4779 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4780 matches!(expr, Expression::Unnest(_))
4781 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4782 };
4783 let has_unnest_join = s.joins.iter().any(|j| {
4784 j.kind == crate::expressions::JoinKind::Cross && (
4785 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4786 || is_unnest_or_explode_expr(&j.this)
4787 )
4788 });
4789 if has_unnest_join {
4790 let mut select = s.clone();
4791 let mut new_joins = Vec::new();
4792 for join in select.joins.drain(..) {
4793 if join.kind == crate::expressions::JoinKind::Cross {
4794 // Extract the UNNEST/EXPLODE from the join
4795 let (func_expr, table_alias, col_aliases) = match &join.this {
4796 Expression::Alias(a) => {
4797 let ta = if a.alias.is_empty() {
4798 None
4799 } else {
4800 Some(a.alias.clone())
4801 };
4802 let cas = a.column_aliases.clone();
4803 match &a.this {
4804 Expression::Unnest(u) => {
4805 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4806 if !u.expressions.is_empty() {
4807 let mut all_args = vec![u.this.clone()];
4808 all_args.extend(u.expressions.clone());
4809 let arrays_zip =
4810 Expression::Function(Box::new(
4811 crate::expressions::Function::new(
4812 "ARRAYS_ZIP".to_string(),
4813 all_args,
4814 ),
4815 ));
4816 let inline = Expression::Function(Box::new(
4817 crate::expressions::Function::new(
4818 "INLINE".to_string(),
4819 vec![arrays_zip],
4820 ),
4821 ));
4822 (Some(inline), ta, a.column_aliases.clone())
4823 } else {
4824 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4825 let func_name = if u.with_ordinality {
4826 "POSEXPLODE"
4827 } else {
4828 "EXPLODE"
4829 };
4830 let explode = Expression::Function(Box::new(
4831 crate::expressions::Function::new(
4832 func_name.to_string(),
4833 vec![u.this.clone()],
4834 ),
4835 ));
4836 // For POSEXPLODE, add 'pos' to column aliases
4837 let cas = if u.with_ordinality {
4838 let mut pos_aliases =
4839 vec![Identifier::new(
4840 "pos".to_string(),
4841 )];
4842 pos_aliases
4843 .extend(a.column_aliases.clone());
4844 pos_aliases
4845 } else {
4846 a.column_aliases.clone()
4847 };
4848 (Some(explode), ta, cas)
4849 }
4850 }
4851 Expression::Function(f)
4852 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4853 {
4854 (Some(Expression::Function(f.clone())), ta, cas)
4855 }
4856 _ => (None, None, Vec::new()),
4857 }
4858 }
4859 Expression::Unnest(u) => {
4860 let func_name = if u.with_ordinality {
4861 "POSEXPLODE"
4862 } else {
4863 "EXPLODE"
4864 };
4865 let explode = Expression::Function(Box::new(
4866 crate::expressions::Function::new(
4867 func_name.to_string(),
4868 vec![u.this.clone()],
4869 ),
4870 ));
4871 let ta = u.alias.clone();
4872 let col_aliases = if u.with_ordinality {
4873 vec![Identifier::new("pos".to_string())]
4874 } else {
4875 Vec::new()
4876 };
4877 (Some(explode), ta, col_aliases)
4878 }
4879 _ => (None, None, Vec::new()),
4880 };
4881 if let Some(func) = func_expr {
4882 select.lateral_views.push(crate::expressions::LateralView {
4883 this: func,
4884 table_alias,
4885 column_aliases: col_aliases,
4886 outer: false,
4887 });
4888 } else {
4889 new_joins.push(join);
4890 }
4891 } else {
4892 new_joins.push(join);
4893 }
4894 }
4895 select.joins = new_joins;
4896 return Ok(Expression::Select(select));
4897 }
4898 }
4899 }
4900
4901 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4902 // for BigQuery, Presto/Trino, Snowflake
4903 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4904 && matches!(
4905 target,
4906 DialectType::BigQuery
4907 | DialectType::Presto
4908 | DialectType::Trino
4909 | DialectType::Snowflake
4910 )
4911 {
4912 if let Expression::Select(ref s) = e {
4913 // Check if any SELECT expressions contain UNNEST
4914 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4915 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4916 fn contains_unnest(e: &Expression) -> bool {
4917 match e {
4918 Expression::Unnest(_) => true,
4919 Expression::Function(f)
4920 if f.name.eq_ignore_ascii_case("UNNEST") =>
4921 {
4922 true
4923 }
4924 Expression::Alias(a) => contains_unnest(&a.this),
4925 Expression::Add(op)
4926 | Expression::Sub(op)
4927 | Expression::Mul(op)
4928 | Expression::Div(op) => {
4929 contains_unnest(&op.left) || contains_unnest(&op.right)
4930 }
4931 _ => false,
4932 }
4933 }
4934 contains_unnest(expr)
4935 });
4936
4937 if has_unnest_in_select {
4938 let rewritten = Self::rewrite_unnest_expansion(s, target);
4939 if let Some(new_select) = rewritten {
4940 return Ok(Expression::Select(Box::new(new_select)));
4941 }
4942 }
4943 }
4944 }
4945
4946 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
4947 // BigQuery '\n' -> PostgreSQL literal newline in string
4948 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
4949 {
4950 if let Expression::Literal(Literal::String(ref s)) = e {
4951 if s.contains("\\n")
4952 || s.contains("\\t")
4953 || s.contains("\\r")
4954 || s.contains("\\\\")
4955 {
4956 let converted = s
4957 .replace("\\n", "\n")
4958 .replace("\\t", "\t")
4959 .replace("\\r", "\r")
4960 .replace("\\\\", "\\");
4961 return Ok(Expression::Literal(Literal::String(converted)));
4962 }
4963 }
4964 }
4965
4966 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
4967 // when source != target (identity tests keep the Literal::Timestamp for native handling)
4968 if source != target {
4969 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
4970 let s = s.clone();
4971 // MySQL: TIMESTAMP handling depends on source dialect
4972 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
4973 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
4974 if matches!(target, DialectType::MySQL) {
4975 if matches!(source, DialectType::BigQuery) {
4976 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
4977 return Ok(Expression::Function(Box::new(Function::new(
4978 "TIMESTAMP".to_string(),
4979 vec![Expression::Literal(Literal::String(s))],
4980 ))));
4981 } else {
4982 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
4983 return Ok(Expression::Cast(Box::new(Cast {
4984 this: Expression::Literal(Literal::String(s)),
4985 to: DataType::Custom {
4986 name: "DATETIME".to_string(),
4987 },
4988 trailing_comments: Vec::new(),
4989 double_colon_syntax: false,
4990 format: None,
4991 default: None,
4992 })));
4993 }
4994 }
4995 let dt = match target {
4996 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
4997 name: "DATETIME".to_string(),
4998 },
4999 DialectType::Snowflake => {
5000 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5001 if matches!(source, DialectType::BigQuery) {
5002 DataType::Custom {
5003 name: "TIMESTAMPTZ".to_string(),
5004 }
5005 } else if matches!(
5006 source,
5007 DialectType::PostgreSQL
5008 | DialectType::Redshift
5009 | DialectType::Snowflake
5010 ) {
5011 DataType::Timestamp {
5012 precision: None,
5013 timezone: false,
5014 }
5015 } else {
5016 DataType::Custom {
5017 name: "TIMESTAMPNTZ".to_string(),
5018 }
5019 }
5020 }
5021 DialectType::Spark | DialectType::Databricks => {
5022 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5023 if matches!(source, DialectType::BigQuery) {
5024 DataType::Timestamp {
5025 precision: None,
5026 timezone: false,
5027 }
5028 } else {
5029 DataType::Custom {
5030 name: "TIMESTAMP_NTZ".to_string(),
5031 }
5032 }
5033 }
5034 DialectType::ClickHouse => DataType::Nullable {
5035 inner: Box::new(DataType::Custom {
5036 name: "DateTime".to_string(),
5037 }),
5038 },
5039 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5040 name: "DATETIME2".to_string(),
5041 },
5042 DialectType::DuckDB => {
5043 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5044 // or when the timestamp string explicitly has timezone info
5045 if matches!(source, DialectType::BigQuery)
5046 || Self::timestamp_string_has_timezone(&s)
5047 {
5048 DataType::Custom {
5049 name: "TIMESTAMPTZ".to_string(),
5050 }
5051 } else {
5052 DataType::Timestamp {
5053 precision: None,
5054 timezone: false,
5055 }
5056 }
5057 }
5058 _ => DataType::Timestamp {
5059 precision: None,
5060 timezone: false,
5061 },
5062 };
5063 return Ok(Expression::Cast(Box::new(Cast {
5064 this: Expression::Literal(Literal::String(s)),
5065 to: dt,
5066 trailing_comments: vec![],
5067 double_colon_syntax: false,
5068 format: None,
5069 default: None,
5070 })));
5071 }
5072 }
5073
5074 // PostgreSQL DELETE requires explicit AS for table aliases
5075 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5076 if let Expression::Delete(ref del) = e {
5077 if del.alias.is_some() && !del.alias_explicit_as {
5078 let mut new_del = del.clone();
5079 new_del.alias_explicit_as = true;
5080 return Ok(Expression::Delete(new_del));
5081 }
5082 }
5083 }
5084
5085 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5086 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5087 // while others don't support it (Presto, Spark, DuckDB, etc.)
5088 {
5089 let needs_distinct =
5090 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5091 let drop_distinct = matches!(
5092 target,
5093 DialectType::Presto
5094 | DialectType::Trino
5095 | DialectType::Athena
5096 | DialectType::Spark
5097 | DialectType::Databricks
5098 | DialectType::DuckDB
5099 | DialectType::Hive
5100 | DialectType::MySQL
5101 | DialectType::PostgreSQL
5102 | DialectType::SQLite
5103 | DialectType::TSQL
5104 | DialectType::Redshift
5105 | DialectType::Snowflake
5106 | DialectType::Oracle
5107 | DialectType::Teradata
5108 | DialectType::Drill
5109 | DialectType::Doris
5110 | DialectType::StarRocks
5111 );
5112 match &e {
5113 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5114 let mut new_u = (**u).clone();
5115 new_u.distinct = true;
5116 return Ok(Expression::Union(Box::new(new_u)));
5117 }
5118 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5119 let mut new_i = (**i).clone();
5120 new_i.distinct = true;
5121 return Ok(Expression::Intersect(Box::new(new_i)));
5122 }
5123 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5124 let mut new_ex = (**ex).clone();
5125 new_ex.distinct = true;
5126 return Ok(Expression::Except(Box::new(new_ex)));
5127 }
5128 Expression::Union(u) if u.distinct && drop_distinct => {
5129 let mut new_u = (**u).clone();
5130 new_u.distinct = false;
5131 return Ok(Expression::Union(Box::new(new_u)));
5132 }
5133 Expression::Intersect(i) if i.distinct && drop_distinct => {
5134 let mut new_i = (**i).clone();
5135 new_i.distinct = false;
5136 return Ok(Expression::Intersect(Box::new(new_i)));
5137 }
5138 Expression::Except(ex) if ex.distinct && drop_distinct => {
5139 let mut new_ex = (**ex).clone();
5140 new_ex.distinct = false;
5141 return Ok(Expression::Except(Box::new(new_ex)));
5142 }
5143 _ => {}
5144 }
5145 }
5146
5147 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5148 if matches!(target, DialectType::ClickHouse) {
5149 if let Expression::Function(ref f) = e {
5150 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5151 let mut new_f = f.as_ref().clone();
5152 new_f.name = "map".to_string();
5153 return Ok(Expression::Function(Box::new(new_f)));
5154 }
5155 }
5156 }
5157
5158 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5159 if matches!(target, DialectType::ClickHouse) {
5160 if let Expression::Intersect(ref i) = e {
5161 if i.all {
5162 let mut new_i = (**i).clone();
5163 new_i.all = false;
5164 return Ok(Expression::Intersect(Box::new(new_i)));
5165 }
5166 }
5167 }
5168
5169 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5170 // Only from Generic source, to prevent double-wrapping
5171 if matches!(source, DialectType::Generic) {
5172 if let Expression::Div(ref op) = e {
5173 let cast_type = match target {
5174 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5175 precision: None,
5176 scale: None,
5177 real_spelling: false,
5178 }),
5179 DialectType::Drill
5180 | DialectType::Trino
5181 | DialectType::Athena
5182 | DialectType::Presto => Some(DataType::Double {
5183 precision: None,
5184 scale: None,
5185 }),
5186 DialectType::PostgreSQL
5187 | DialectType::Redshift
5188 | DialectType::Materialize
5189 | DialectType::Teradata
5190 | DialectType::RisingWave => Some(DataType::Double {
5191 precision: None,
5192 scale: None,
5193 }),
5194 _ => None,
5195 };
5196 if let Some(dt) = cast_type {
5197 let cast_left = Expression::Cast(Box::new(Cast {
5198 this: op.left.clone(),
5199 to: dt,
5200 double_colon_syntax: false,
5201 trailing_comments: Vec::new(),
5202 format: None,
5203 default: None,
5204 }));
5205 let new_op = crate::expressions::BinaryOp {
5206 left: cast_left,
5207 right: op.right.clone(),
5208 left_comments: op.left_comments.clone(),
5209 operator_comments: op.operator_comments.clone(),
5210 trailing_comments: op.trailing_comments.clone(),
5211 };
5212 return Ok(Expression::Div(Box::new(new_op)));
5213 }
5214 }
5215 }
5216
5217 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5218 if matches!(target, DialectType::DuckDB) {
5219 if let Expression::CreateDatabase(db) = e {
5220 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5221 schema.if_not_exists = db.if_not_exists;
5222 return Ok(Expression::CreateSchema(Box::new(schema)));
5223 }
5224 if let Expression::DropDatabase(db) = e {
5225 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5226 schema.if_exists = db.if_exists;
5227 return Ok(Expression::DropSchema(Box::new(schema)));
5228 }
5229 }
5230
5231 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5232 if matches!(source, DialectType::ClickHouse)
5233 && !matches!(target, DialectType::ClickHouse)
5234 {
5235 if let Expression::Cast(ref c) = e {
5236 if let DataType::Custom { ref name } = c.to {
5237 let upper = name.to_uppercase();
5238 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5239 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5240 let inner_upper = inner.to_uppercase();
5241 let new_dt = match inner_upper.as_str() {
5242 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5243 precision: None,
5244 timezone: false,
5245 },
5246 "DATE" => DataType::Date,
5247 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5248 "INT32" | "INT" | "INTEGER" => DataType::Int {
5249 length: None,
5250 integer_spelling: false,
5251 },
5252 "FLOAT64" | "DOUBLE" => DataType::Double {
5253 precision: None,
5254 scale: None,
5255 },
5256 "STRING" => DataType::Text,
5257 _ => DataType::Custom {
5258 name: inner.to_string(),
5259 },
5260 };
5261 let mut new_cast = c.clone();
5262 new_cast.to = new_dt;
5263 return Ok(Expression::Cast(new_cast));
5264 }
5265 }
5266 }
5267 }
5268
5269 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5270 if matches!(target, DialectType::Snowflake) {
5271 if let Expression::ArrayConcatAgg(ref agg) = e {
5272 let mut agg_clone = agg.as_ref().clone();
5273 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5274 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5275 let flatten = Expression::Function(Box::new(Function::new(
5276 "ARRAY_FLATTEN".to_string(),
5277 vec![array_agg],
5278 )));
5279 return Ok(flatten);
5280 }
5281 }
5282
5283 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5284 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5285 if let Expression::ArrayConcatAgg(agg) = e {
5286 let arg = agg.this;
5287 return Ok(Expression::Function(Box::new(Function::new(
5288 "ARRAY_CONCAT_AGG".to_string(),
5289 vec![arg],
5290 ))));
5291 }
5292 }
5293
5294 // Determine what action to take by inspecting e immutably
5295 let action = {
5296 let source_propagates_nulls =
5297 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5298 let target_ignores_nulls =
5299 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5300
5301 match &e {
5302 Expression::Function(f) => {
5303 let name = f.name.to_uppercase();
5304 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5305 if (name == "DATE_PART" || name == "DATEPART")
5306 && f.args.len() == 2
5307 && matches!(target, DialectType::Snowflake)
5308 && !matches!(source, DialectType::Snowflake)
5309 && matches!(
5310 &f.args[0],
5311 Expression::Literal(crate::expressions::Literal::String(_))
5312 )
5313 {
5314 Action::DatePartUnquote
5315 } else if source_propagates_nulls
5316 && target_ignores_nulls
5317 && (name == "GREATEST" || name == "LEAST")
5318 && f.args.len() >= 2
5319 {
5320 Action::GreatestLeastNull
5321 } else if matches!(source, DialectType::Snowflake)
5322 && name == "ARRAY_GENERATE_RANGE"
5323 && f.args.len() >= 2
5324 {
5325 Action::ArrayGenerateRange
5326 } else if matches!(source, DialectType::Snowflake)
5327 && matches!(target, DialectType::DuckDB)
5328 && name == "DATE_TRUNC"
5329 && f.args.len() == 2
5330 {
5331 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5332 // Logic based on Python sqlglot's input_type_preserved flag:
5333 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5334 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5335 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5336 let unit_str = match &f.args[0] {
5337 Expression::Literal(crate::expressions::Literal::String(s)) => {
5338 Some(s.to_uppercase())
5339 }
5340 _ => None,
5341 };
5342 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5343 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5344 });
5345 match &f.args[1] {
5346 Expression::Cast(c) => match &c.to {
5347 DataType::Time { .. } => Action::DateTruncWrapCast,
5348 DataType::Custom { name }
5349 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5350 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5351 {
5352 Action::DateTruncWrapCast
5353 }
5354 DataType::Timestamp { timezone: true, .. } => {
5355 Action::DateTruncWrapCast
5356 }
5357 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5358 DataType::Timestamp {
5359 timezone: false, ..
5360 } if is_date_unit => Action::DateTruncWrapCast,
5361 _ => Action::None,
5362 },
5363 _ => Action::None,
5364 }
5365 } else if matches!(source, DialectType::Snowflake)
5366 && matches!(target, DialectType::DuckDB)
5367 && name == "TO_DATE"
5368 && f.args.len() == 1
5369 && !matches!(
5370 &f.args[0],
5371 Expression::Literal(crate::expressions::Literal::String(_))
5372 )
5373 {
5374 Action::ToDateToCast
5375 } else if !matches!(source, DialectType::Redshift)
5376 && matches!(target, DialectType::Redshift)
5377 && name == "CONVERT_TIMEZONE"
5378 && (f.args.len() == 2 || f.args.len() == 3)
5379 {
5380 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5381 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5382 // The Redshift parser adds 'UTC' as default source_tz, but when
5383 // transpiling from other dialects, we should preserve the original form.
5384 Action::ConvertTimezoneToExpr
5385 } else if matches!(source, DialectType::Snowflake)
5386 && matches!(target, DialectType::DuckDB)
5387 && name == "REGEXP_REPLACE"
5388 && f.args.len() == 4
5389 && !matches!(
5390 &f.args[3],
5391 Expression::Literal(crate::expressions::Literal::String(_))
5392 )
5393 {
5394 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5395 Action::RegexpReplaceSnowflakeToDuckDB
5396 } else if name == "_BQ_TO_HEX" {
5397 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5398 Action::BigQueryToHexBare
5399 } else if matches!(source, DialectType::BigQuery)
5400 && !matches!(target, DialectType::BigQuery)
5401 {
5402 // BigQuery-specific functions that need to be converted to standard forms
5403 match name.as_str() {
5404 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5405 | "DATE_DIFF"
5406 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5407 | "DATETIME_ADD" | "DATETIME_SUB"
5408 | "TIME_ADD" | "TIME_SUB"
5409 | "DATE_ADD" | "DATE_SUB"
5410 | "SAFE_DIVIDE"
5411 | "GENERATE_UUID"
5412 | "COUNTIF"
5413 | "EDIT_DISTANCE"
5414 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5415 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5416 | "TO_HEX"
5417 | "TO_JSON_STRING"
5418 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5419 | "DIV"
5420 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5421 | "LAST_DAY"
5422 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5423 | "REGEXP_CONTAINS"
5424 | "CONTAINS_SUBSTR"
5425 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5426 | "SAFE_CAST"
5427 | "GENERATE_DATE_ARRAY"
5428 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5429 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5430 | "ARRAY_CONCAT"
5431 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5432 | "INSTR"
5433 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5434 | "GENERATE_UUID()" // just in case
5435 | "REGEXP_EXTRACT_ALL"
5436 | "REGEXP_EXTRACT"
5437 | "INT64"
5438 | "ARRAY_CONCAT_AGG"
5439 | "DATE_DIFF(" // just in case
5440 | "TO_HEX_MD5" // internal
5441 | "MOD"
5442 | "CONCAT"
5443 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5444 | "STRUCT"
5445 | "ROUND"
5446 | "MAKE_INTERVAL"
5447 | "ARRAY_TO_STRING"
5448 | "PERCENTILE_CONT"
5449 => Action::BigQueryFunctionNormalize,
5450 "ARRAY" if matches!(target, DialectType::Snowflake)
5451 && f.args.len() == 1
5452 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5453 => Action::BigQueryArraySelectAsStructToSnowflake,
5454 _ => Action::None,
5455 }
5456 } else if matches!(source, DialectType::BigQuery)
5457 && matches!(target, DialectType::BigQuery)
5458 {
5459 // BigQuery -> BigQuery normalizations
5460 match name.as_str() {
5461 "TIMESTAMP_DIFF"
5462 | "DATETIME_DIFF"
5463 | "TIME_DIFF"
5464 | "DATE_DIFF"
5465 | "DATE_ADD"
5466 | "TO_HEX"
5467 | "CURRENT_TIMESTAMP"
5468 | "CURRENT_DATE"
5469 | "CURRENT_TIME"
5470 | "CURRENT_DATETIME"
5471 | "GENERATE_DATE_ARRAY"
5472 | "INSTR"
5473 | "FORMAT_DATETIME"
5474 | "DATETIME"
5475 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5476 _ => Action::None,
5477 }
5478 } else {
5479 // Generic function normalization for non-BigQuery sources
5480 match name.as_str() {
5481 "ARBITRARY" | "AGGREGATE"
5482 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5483 | "STRUCT_EXTRACT"
5484 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5485 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5486 | "SUBSTRINGINDEX"
5487 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5488 | "UNICODE"
5489 | "XOR"
5490 | "ARRAY_REVERSE_SORT"
5491 | "ENCODE" | "DECODE"
5492 | "QUANTILE"
5493 | "EPOCH" | "EPOCH_MS"
5494 | "HASHBYTES"
5495 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5496 | "APPROX_DISTINCT"
5497 | "DATE_PARSE" | "FORMAT_DATETIME"
5498 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5499 | "RLIKE"
5500 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5501 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5502 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5503 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5504 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5505 | "MAP" | "MAP_FROM_ENTRIES"
5506 | "COLLECT_LIST" | "COLLECT_SET"
5507 | "ISNAN" | "IS_NAN"
5508 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5509 | "FORMAT_NUMBER"
5510 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5511 | "ELEMENT_AT"
5512 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5513 | "SPLIT_PART"
5514 // GENERATE_SERIES: handled separately below
5515 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5516 | "JSON_QUERY" | "JSON_VALUE"
5517 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5518 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5519 | "CURDATE" | "CURTIME"
5520 | "ARRAY_TO_STRING"
5521 | "ARRAY_SORT" | "SORT_ARRAY"
5522 | "LEFT" | "RIGHT"
5523 | "MAP_FROM_ARRAYS"
5524 | "LIKE" | "ILIKE"
5525 | "ARRAY_CONCAT" | "LIST_CONCAT"
5526 | "QUANTILE_CONT" | "QUANTILE_DISC"
5527 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5528 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5529 | "LOCATE" | "STRPOS" | "INSTR"
5530 | "CHAR"
5531 // CONCAT: handled separately for COALESCE wrapping
5532 | "ARRAY_JOIN"
5533 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5534 | "ISNULL"
5535 | "MONTHNAME"
5536 | "TO_TIMESTAMP"
5537 | "TO_DATE"
5538 | "TO_JSON"
5539 | "REGEXP_SPLIT"
5540 | "SPLIT"
5541 | "FORMATDATETIME"
5542 | "ARRAYJOIN"
5543 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5544 | "NVL"
5545 | "TO_CHAR"
5546 | "DBMS_RANDOM.VALUE"
5547 | "REGEXP_LIKE"
5548 | "REPLICATE"
5549 | "LEN"
5550 | "COUNT_BIG"
5551 | "DATEFROMPARTS"
5552 | "DATETIMEFROMPARTS"
5553 | "CONVERT" | "TRY_CONVERT"
5554 | "STRFTIME" | "STRPTIME"
5555 | "DATE_FORMAT" | "FORMAT_DATE"
5556 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5557 | "FROM_BASE64" | "TO_BASE64"
5558 | "GETDATE"
5559 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5560 | "TO_UTF8" | "FROM_UTF8"
5561 | "STARTS_WITH" | "STARTSWITH"
5562 | "APPROX_COUNT_DISTINCT"
5563 | "JSON_FORMAT"
5564 | "SYSDATE"
5565 | "LOGICAL_OR" | "LOGICAL_AND"
5566 | "MONTHS_ADD"
5567 | "SCHEMA_NAME"
5568 | "STRTOL"
5569 | "EDITDIST3"
5570 | "FORMAT"
5571 | "LIST_CONTAINS" | "LIST_HAS"
5572 | "VARIANCE" | "STDDEV"
5573 | "ISINF"
5574 | "TO_UNIXTIME"
5575 | "FROM_UNIXTIME"
5576 | "DATEPART" | "DATE_PART"
5577 | "DATENAME"
5578 | "STRING_AGG"
5579 | "JSON_ARRAYAGG"
5580 | "APPROX_QUANTILE"
5581 | "MAKE_DATE"
5582 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5583 | "RANGE"
5584 | "TRY_ELEMENT_AT"
5585 | "STR_TO_MAP"
5586 | "STRING"
5587 | "STR_TO_TIME"
5588 | "CURRENT_SCHEMA"
5589 | "LTRIM" | "RTRIM"
5590 | "UUID"
5591 | "FARM_FINGERPRINT"
5592 | "JSON_KEYS"
5593 | "WEEKOFYEAR"
5594 | "CONCAT_WS"
5595 | "ARRAY_SLICE"
5596 | "ARRAY_PREPEND"
5597 | "ARRAY_REMOVE"
5598 | "GENERATE_DATE_ARRAY"
5599 | "PARSE_JSON"
5600 | "JSON_REMOVE"
5601 | "JSON_SET"
5602 | "LEVENSHTEIN"
5603 => Action::GenericFunctionNormalize,
5604 // Canonical date functions -> dialect-specific
5605 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5606 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5607 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5608 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5609 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5610 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5611 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5612 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5613 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5614 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5615 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5616 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5617 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5618 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5619 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5620 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5621 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5622 // STR_TO_DATE(x, fmt) -> dialect-specific
5623 "STR_TO_DATE" if f.args.len() == 2
5624 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5625 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5626 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5627 "TS_OR_DS_ADD" if f.args.len() == 3
5628 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5629 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5630 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5631 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5632 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5633 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5634 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5635 // IS_ASCII(x) -> dialect-specific
5636 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5637 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5638 "STR_POSITION" => Action::StrPositionConvert,
5639 // ARRAY_SUM -> dialect-specific
5640 "ARRAY_SUM" => Action::ArraySumConvert,
5641 // ARRAY_SIZE -> dialect-specific (Drill only)
5642 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5643 // ARRAY_ANY -> dialect-specific
5644 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5645 // Functions needing specific cross-dialect transforms
5646 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5647 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5648 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5649 "ARRAY" if matches!(source, DialectType::BigQuery)
5650 && matches!(target, DialectType::Snowflake)
5651 && f.args.len() == 1
5652 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5653 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5654 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5655 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5656 "DATE_TRUNC" if f.args.len() == 2
5657 && matches!(source, DialectType::Generic)
5658 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5659 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5660 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5661 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5662 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5663 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5664 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5665 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5666 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5667 // GENERATE_SERIES with interval normalization for PG target
5668 "GENERATE_SERIES" if f.args.len() >= 3
5669 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5670 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5671 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5672 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5673 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5674 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5675 "CONCAT" => Action::GenericFunctionNormalize,
5676 // DIV(a, b) -> target-specific integer division
5677 "DIV" if f.args.len() == 2
5678 && matches!(source, DialectType::PostgreSQL)
5679 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5680 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5681 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5682 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5683 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5684 "JSONB_EXISTS" if f.args.len() == 2
5685 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5686 // DATE_BIN -> TIME_BUCKET for DuckDB
5687 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5688 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5689 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5690 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5691 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5692 // ClickHouse any -> ANY_VALUE for other dialects
5693 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5694 _ => Action::None,
5695 }
5696 }
5697 }
5698 Expression::AggregateFunction(af) => {
5699 let name = af.name.to_uppercase();
5700 match name.as_str() {
5701 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5702 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5703 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5704 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5705 if matches!(target, DialectType::DuckDB) =>
5706 {
5707 Action::JsonObjectAggConvert
5708 }
5709 "ARRAY_AGG"
5710 if matches!(
5711 target,
5712 DialectType::Hive
5713 | DialectType::Spark
5714 | DialectType::Databricks
5715 ) =>
5716 {
5717 Action::ArrayAggToCollectList
5718 }
5719 "MAX_BY" | "MIN_BY"
5720 if matches!(
5721 target,
5722 DialectType::ClickHouse
5723 | DialectType::Spark
5724 | DialectType::Databricks
5725 | DialectType::DuckDB
5726 ) =>
5727 {
5728 Action::MaxByMinByConvert
5729 }
5730 "COLLECT_LIST"
5731 if matches!(
5732 target,
5733 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5734 ) =>
5735 {
5736 Action::CollectListToArrayAgg
5737 }
5738 "COLLECT_SET"
5739 if matches!(
5740 target,
5741 DialectType::Presto
5742 | DialectType::Trino
5743 | DialectType::Snowflake
5744 | DialectType::DuckDB
5745 ) =>
5746 {
5747 Action::CollectSetConvert
5748 }
5749 "PERCENTILE"
5750 if matches!(
5751 target,
5752 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5753 ) =>
5754 {
5755 Action::PercentileConvert
5756 }
5757 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5758 "CORR"
5759 if matches!(target, DialectType::DuckDB)
5760 && matches!(source, DialectType::Snowflake) =>
5761 {
5762 Action::CorrIsnanWrap
5763 }
5764 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5765 "APPROX_QUANTILES"
5766 if matches!(source, DialectType::BigQuery)
5767 && matches!(target, DialectType::DuckDB) =>
5768 {
5769 Action::BigQueryApproxQuantiles
5770 }
5771 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5772 "PERCENTILE_CONT"
5773 if matches!(source, DialectType::BigQuery)
5774 && matches!(target, DialectType::DuckDB)
5775 && af.args.len() >= 2 =>
5776 {
5777 Action::BigQueryPercentileContToDuckDB
5778 }
5779 _ => Action::None,
5780 }
5781 }
5782 Expression::JSONArrayAgg(_) => match target {
5783 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5784 _ => Action::None,
5785 },
5786 Expression::ToNumber(tn) => {
5787 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5788 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5789 match target {
5790 DialectType::Oracle
5791 | DialectType::Snowflake
5792 | DialectType::Teradata => Action::None,
5793 _ => Action::GenericFunctionNormalize,
5794 }
5795 } else {
5796 Action::None
5797 }
5798 }
5799 Expression::Nvl2(_) => {
5800 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5801 // Keep as NVL2 for dialects that support it natively
5802 match target {
5803 DialectType::Oracle
5804 | DialectType::Snowflake
5805 | DialectType::Teradata
5806 | DialectType::Spark
5807 | DialectType::Databricks
5808 | DialectType::Redshift => Action::None,
5809 _ => Action::Nvl2Expand,
5810 }
5811 }
5812 Expression::Decode(_) | Expression::DecodeCase(_) => {
5813 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5814 // Keep as DECODE for Oracle/Snowflake
5815 match target {
5816 DialectType::Oracle | DialectType::Snowflake => Action::None,
5817 _ => Action::DecodeSimplify,
5818 }
5819 }
5820 Expression::Coalesce(ref cf) => {
5821 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5822 // BigQuery keeps IFNULL natively when source is also BigQuery
5823 if cf.original_name.as_deref() == Some("IFNULL")
5824 && !(matches!(source, DialectType::BigQuery)
5825 && matches!(target, DialectType::BigQuery))
5826 {
5827 Action::IfnullToCoalesce
5828 } else {
5829 Action::None
5830 }
5831 }
5832 Expression::IfFunc(if_func) => {
5833 if matches!(source, DialectType::Snowflake)
5834 && matches!(
5835 target,
5836 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5837 )
5838 && matches!(if_func.false_value, Some(Expression::Div(_)))
5839 {
5840 Action::Div0TypedDivision
5841 } else {
5842 Action::None
5843 }
5844 }
5845 Expression::ToJson(_) => match target {
5846 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5847 DialectType::BigQuery => Action::ToJsonConvert,
5848 DialectType::DuckDB => Action::ToJsonConvert,
5849 _ => Action::None,
5850 },
5851 Expression::ArrayAgg(ref agg) => {
5852 if matches!(
5853 target,
5854 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5855 ) {
5856 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5857 Action::ArrayAggToCollectList
5858 } else if matches!(
5859 source,
5860 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5861 ) && matches!(target, DialectType::DuckDB)
5862 && agg.filter.is_some()
5863 {
5864 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5865 // Need to add NOT x IS NULL to existing filter
5866 Action::ArrayAggNullFilter
5867 } else if matches!(target, DialectType::DuckDB)
5868 && agg.ignore_nulls == Some(true)
5869 && !agg.order_by.is_empty()
5870 {
5871 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5872 Action::ArrayAggIgnoreNullsDuckDB
5873 } else if !matches!(source, DialectType::Snowflake) {
5874 Action::None
5875 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5876 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5877 == Some("ARRAY_AGG".to_string())
5878 || agg.name.is_none();
5879 if is_array_agg {
5880 Action::ArrayAggCollectList
5881 } else {
5882 Action::None
5883 }
5884 } else if matches!(
5885 target,
5886 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5887 ) && agg.filter.is_none()
5888 {
5889 Action::ArrayAggFilter
5890 } else {
5891 Action::None
5892 }
5893 }
5894 Expression::WithinGroup(wg) => {
5895 if matches!(source, DialectType::Snowflake)
5896 && matches!(
5897 target,
5898 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5899 )
5900 && matches!(wg.this, Expression::ArrayAgg(_))
5901 {
5902 Action::ArrayAggWithinGroupFilter
5903 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5904 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5905 || matches!(&wg.this, Expression::StringAgg(_))
5906 {
5907 Action::StringAggConvert
5908 } else if matches!(
5909 target,
5910 DialectType::Presto
5911 | DialectType::Trino
5912 | DialectType::Athena
5913 | DialectType::Spark
5914 | DialectType::Databricks
5915 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5916 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5917 || matches!(&wg.this, Expression::PercentileCont(_)))
5918 {
5919 Action::PercentileContConvert
5920 } else {
5921 Action::None
5922 }
5923 }
5924 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5925 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5926 // DATETIME is the timezone-unaware type
5927 Expression::Cast(ref c) => {
5928 if c.format.is_some()
5929 && (matches!(source, DialectType::BigQuery)
5930 || matches!(source, DialectType::Teradata))
5931 {
5932 Action::BigQueryCastFormat
5933 } else if matches!(target, DialectType::BigQuery)
5934 && !matches!(source, DialectType::BigQuery)
5935 && matches!(
5936 c.to,
5937 DataType::Timestamp {
5938 timezone: false,
5939 ..
5940 }
5941 )
5942 {
5943 Action::CastTimestampToDatetime
5944 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
5945 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
5946 && matches!(
5947 c.to,
5948 DataType::Timestamp {
5949 timezone: false,
5950 ..
5951 }
5952 )
5953 {
5954 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5955 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
5956 Action::CastTimestampToDatetime
5957 } else if matches!(
5958 source,
5959 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5960 ) && matches!(
5961 target,
5962 DialectType::Presto
5963 | DialectType::Trino
5964 | DialectType::Athena
5965 | DialectType::DuckDB
5966 | DialectType::Snowflake
5967 | DialectType::BigQuery
5968 | DialectType::Databricks
5969 | DialectType::TSQL
5970 ) {
5971 Action::HiveCastToTryCast
5972 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5973 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
5974 {
5975 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
5976 Action::CastTimestamptzToFunc
5977 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5978 && matches!(
5979 target,
5980 DialectType::Hive
5981 | DialectType::Spark
5982 | DialectType::Databricks
5983 | DialectType::BigQuery
5984 )
5985 {
5986 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5987 Action::CastTimestampStripTz
5988 } else if matches!(&c.to, DataType::Json)
5989 && matches!(&c.this, Expression::Literal(Literal::String(_)))
5990 && matches!(
5991 target,
5992 DialectType::Presto
5993 | DialectType::Trino
5994 | DialectType::Athena
5995 | DialectType::Snowflake
5996 )
5997 {
5998 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
5999 // Only when the input is a string literal (JSON 'value' syntax)
6000 Action::JsonLiteralToJsonParse
6001 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6002 && matches!(target, DialectType::Spark | DialectType::Databricks)
6003 {
6004 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6005 Action::CastToJsonForSpark
6006 } else if (matches!(
6007 &c.to,
6008 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6009 )) && matches!(
6010 target,
6011 DialectType::Spark | DialectType::Databricks
6012 ) && (matches!(&c.this, Expression::ParseJson(_))
6013 || matches!(
6014 &c.this,
6015 Expression::Function(f)
6016 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6017 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6018 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6019 ))
6020 {
6021 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6022 // -> FROM_JSON(..., type_string) for Spark
6023 Action::CastJsonToFromJson
6024 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6025 && matches!(
6026 c.to,
6027 DataType::Timestamp {
6028 timezone: false,
6029 ..
6030 }
6031 )
6032 && matches!(source, DialectType::DuckDB)
6033 {
6034 Action::StrftimeCastTimestamp
6035 } else if matches!(source, DialectType::DuckDB)
6036 && matches!(
6037 c.to,
6038 DataType::Decimal {
6039 precision: None,
6040 ..
6041 }
6042 )
6043 {
6044 Action::DecimalDefaultPrecision
6045 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6046 && matches!(c.to, DataType::Char { length: None })
6047 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6048 {
6049 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6050 Action::MysqlCastCharToText
6051 } else if matches!(
6052 source,
6053 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6054 ) && matches!(
6055 target,
6056 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6057 ) && Self::has_varchar_char_type(&c.to)
6058 {
6059 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6060 Action::SparkCastVarcharToString
6061 } else {
6062 Action::None
6063 }
6064 }
6065 Expression::SafeCast(ref c) => {
6066 if c.format.is_some()
6067 && matches!(source, DialectType::BigQuery)
6068 && !matches!(target, DialectType::BigQuery)
6069 {
6070 Action::BigQueryCastFormat
6071 } else {
6072 Action::None
6073 }
6074 }
6075 // For DuckDB: DATE_TRUNC should preserve the input type
6076 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6077 if matches!(source, DialectType::Snowflake)
6078 && matches!(target, DialectType::DuckDB)
6079 {
6080 Action::DateTruncWrapCast
6081 } else {
6082 Action::None
6083 }
6084 }
6085 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6086 Expression::SetStatement(s) => {
6087 if matches!(target, DialectType::DuckDB)
6088 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6089 && s.items.iter().any(|item| item.kind.is_none())
6090 {
6091 Action::SetToVariable
6092 } else {
6093 Action::None
6094 }
6095 }
6096 // Cross-dialect NULL ordering normalization.
6097 // When nulls_first is not specified, fill in the source dialect's implied
6098 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6099 Expression::Ordered(o) => {
6100 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6101 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6102 Action::MysqlNullsOrdering
6103 } else {
6104 // Skip targets that don't support NULLS FIRST/LAST syntax
6105 let target_supports_nulls = !matches!(
6106 target,
6107 DialectType::MySQL
6108 | DialectType::TSQL
6109 | DialectType::StarRocks
6110 | DialectType::Doris
6111 );
6112 if o.nulls_first.is_none() && source != target && target_supports_nulls
6113 {
6114 Action::NullsOrdering
6115 } else {
6116 Action::None
6117 }
6118 }
6119 }
6120 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6121 Expression::DataType(dt) => {
6122 if matches!(source, DialectType::BigQuery)
6123 && !matches!(target, DialectType::BigQuery)
6124 {
6125 match dt {
6126 DataType::Custom { ref name }
6127 if name.eq_ignore_ascii_case("INT64")
6128 || name.eq_ignore_ascii_case("FLOAT64")
6129 || name.eq_ignore_ascii_case("BOOL")
6130 || name.eq_ignore_ascii_case("BYTES")
6131 || name.eq_ignore_ascii_case("NUMERIC")
6132 || name.eq_ignore_ascii_case("STRING")
6133 || name.eq_ignore_ascii_case("DATETIME") =>
6134 {
6135 Action::BigQueryCastType
6136 }
6137 _ => Action::None,
6138 }
6139 } else if matches!(source, DialectType::TSQL) {
6140 // For TSQL source -> any target (including TSQL itself for REAL)
6141 match dt {
6142 // REAL -> FLOAT even for TSQL->TSQL
6143 DataType::Custom { ref name }
6144 if name.eq_ignore_ascii_case("REAL") =>
6145 {
6146 Action::TSQLTypeNormalize
6147 }
6148 DataType::Float {
6149 real_spelling: true,
6150 ..
6151 } => Action::TSQLTypeNormalize,
6152 // Other TSQL type normalizations only for non-TSQL targets
6153 DataType::Custom { ref name }
6154 if !matches!(target, DialectType::TSQL)
6155 && (name.eq_ignore_ascii_case("MONEY")
6156 || name.eq_ignore_ascii_case("SMALLMONEY")
6157 || name.eq_ignore_ascii_case("DATETIME2")
6158 || name.eq_ignore_ascii_case("IMAGE")
6159 || name.eq_ignore_ascii_case("BIT")
6160 || name.eq_ignore_ascii_case("ROWVERSION")
6161 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6162 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6163 || name.to_uppercase().starts_with("NUMERIC")
6164 || name.to_uppercase().starts_with("DATETIME2(")
6165 || name.to_uppercase().starts_with("TIME(")) =>
6166 {
6167 Action::TSQLTypeNormalize
6168 }
6169 DataType::Float {
6170 precision: Some(_), ..
6171 } if !matches!(target, DialectType::TSQL) => {
6172 Action::TSQLTypeNormalize
6173 }
6174 DataType::TinyInt { .. }
6175 if !matches!(target, DialectType::TSQL) =>
6176 {
6177 Action::TSQLTypeNormalize
6178 }
6179 // INTEGER -> INT for Databricks/Spark targets
6180 DataType::Int {
6181 integer_spelling: true,
6182 ..
6183 } if matches!(
6184 target,
6185 DialectType::Databricks | DialectType::Spark
6186 ) =>
6187 {
6188 Action::TSQLTypeNormalize
6189 }
6190 _ => Action::None,
6191 }
6192 } else if (matches!(source, DialectType::Oracle)
6193 || matches!(source, DialectType::Generic))
6194 && !matches!(target, DialectType::Oracle)
6195 {
6196 match dt {
6197 DataType::Custom { ref name }
6198 if name.to_uppercase().starts_with("VARCHAR2(")
6199 || name.to_uppercase().starts_with("NVARCHAR2(")
6200 || name.eq_ignore_ascii_case("VARCHAR2")
6201 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6202 {
6203 Action::OracleVarchar2ToVarchar
6204 }
6205 _ => Action::None,
6206 }
6207 } else if matches!(target, DialectType::Snowflake)
6208 && !matches!(source, DialectType::Snowflake)
6209 {
6210 // When target is Snowflake but source is NOT Snowflake,
6211 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6212 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6213 // should keep their FLOAT spelling.
6214 match dt {
6215 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6216 _ => Action::None,
6217 }
6218 } else {
6219 Action::None
6220 }
6221 }
6222 // LOWER patterns from BigQuery TO_HEX conversions:
6223 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6224 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6225 Expression::Lower(uf) => {
6226 if matches!(source, DialectType::BigQuery) {
6227 match &uf.this {
6228 Expression::Lower(_) => Action::BigQueryToHexLower,
6229 Expression::Function(f)
6230 if f.name == "TO_HEX"
6231 && matches!(target, DialectType::BigQuery) =>
6232 {
6233 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6234 Action::BigQueryToHexLower
6235 }
6236 _ => Action::None,
6237 }
6238 } else {
6239 Action::None
6240 }
6241 }
6242 // UPPER patterns from BigQuery TO_HEX conversions:
6243 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6244 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6245 Expression::Upper(uf) => {
6246 if matches!(source, DialectType::BigQuery) {
6247 match &uf.this {
6248 Expression::Lower(_) => Action::BigQueryToHexUpper,
6249 _ => Action::None,
6250 }
6251 } else {
6252 Action::None
6253 }
6254 }
6255 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6256 // Snowflake supports LAST_DAY with unit, so keep it there
6257 Expression::LastDay(ld) => {
6258 if matches!(source, DialectType::BigQuery)
6259 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6260 && ld.unit.is_some()
6261 {
6262 Action::BigQueryLastDayStripUnit
6263 } else {
6264 Action::None
6265 }
6266 }
6267 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6268 Expression::SafeDivide(_) => {
6269 if matches!(source, DialectType::BigQuery)
6270 && !matches!(target, DialectType::BigQuery)
6271 {
6272 Action::BigQuerySafeDivide
6273 } else {
6274 Action::None
6275 }
6276 }
6277 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6278 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6279 Expression::AnyValue(ref agg) => {
6280 if matches!(source, DialectType::BigQuery)
6281 && matches!(target, DialectType::DuckDB)
6282 && agg.having_max.is_some()
6283 {
6284 Action::BigQueryAnyValueHaving
6285 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6286 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6287 && agg.ignore_nulls.is_none()
6288 {
6289 Action::AnyValueIgnoreNulls
6290 } else {
6291 Action::None
6292 }
6293 }
6294 Expression::Any(ref q) => {
6295 if matches!(source, DialectType::PostgreSQL)
6296 && matches!(
6297 target,
6298 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6299 )
6300 && q.op.is_some()
6301 && !matches!(
6302 q.subquery,
6303 Expression::Select(_) | Expression::Subquery(_)
6304 )
6305 {
6306 Action::AnyToExists
6307 } else {
6308 Action::None
6309 }
6310 }
6311 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6312 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6313 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6314 Expression::RegexpLike(_)
6315 if !matches!(source, DialectType::DuckDB)
6316 && matches!(target, DialectType::DuckDB) =>
6317 {
6318 Action::RegexpLikeToDuckDB
6319 }
6320 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6321 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6322 Expression::Div(ref op)
6323 if matches!(
6324 source,
6325 DialectType::MySQL
6326 | DialectType::DuckDB
6327 | DialectType::SingleStore
6328 | DialectType::TiDB
6329 | DialectType::ClickHouse
6330 | DialectType::Doris
6331 ) && matches!(
6332 target,
6333 DialectType::PostgreSQL
6334 | DialectType::Redshift
6335 | DialectType::Drill
6336 | DialectType::Trino
6337 | DialectType::Presto
6338 | DialectType::Athena
6339 | DialectType::TSQL
6340 | DialectType::Teradata
6341 | DialectType::SQLite
6342 | DialectType::BigQuery
6343 | DialectType::Snowflake
6344 | DialectType::Databricks
6345 | DialectType::Oracle
6346 | DialectType::Materialize
6347 | DialectType::RisingWave
6348 ) =>
6349 {
6350 // Only wrap if RHS is not already NULLIF
6351 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6352 {
6353 Action::MySQLSafeDivide
6354 } else {
6355 Action::None
6356 }
6357 }
6358 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6359 // For TSQL/Fabric, convert to sp_rename instead
6360 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6361 if let Some(crate::expressions::AlterTableAction::RenameTable(
6362 ref new_tbl,
6363 )) = at.actions.first()
6364 {
6365 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6366 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6367 Action::AlterTableToSpRename
6368 } else if new_tbl.schema.is_some()
6369 && matches!(
6370 target,
6371 DialectType::BigQuery
6372 | DialectType::Doris
6373 | DialectType::StarRocks
6374 | DialectType::DuckDB
6375 | DialectType::PostgreSQL
6376 | DialectType::Redshift
6377 )
6378 {
6379 Action::AlterTableRenameStripSchema
6380 } else {
6381 Action::None
6382 }
6383 } else {
6384 Action::None
6385 }
6386 }
6387 // EPOCH(x) expression -> target-specific epoch conversion
6388 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6389 Action::EpochConvert
6390 }
6391 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6392 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6393 Action::EpochMsConvert
6394 }
6395 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6396 Expression::StringAgg(_) => {
6397 if matches!(
6398 target,
6399 DialectType::MySQL
6400 | DialectType::SingleStore
6401 | DialectType::Doris
6402 | DialectType::StarRocks
6403 | DialectType::SQLite
6404 ) {
6405 Action::StringAggConvert
6406 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6407 Action::StringAggConvert
6408 } else {
6409 Action::None
6410 }
6411 }
6412 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6413 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6414 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6415 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6416 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6417 Action::ArrayLengthConvert
6418 }
6419 Expression::ArraySize(_) => {
6420 if matches!(target, DialectType::Drill) {
6421 Action::ArraySizeDrill
6422 } else {
6423 Action::ArrayLengthConvert
6424 }
6425 }
6426 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6427 Expression::ArrayRemove(_) => match target {
6428 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6429 Action::ArrayRemoveConvert
6430 }
6431 _ => Action::None,
6432 },
6433 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6434 Expression::ArrayReverse(_) => match target {
6435 DialectType::ClickHouse => Action::ArrayReverseConvert,
6436 _ => Action::None,
6437 },
6438 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6439 Expression::JsonKeys(_) => match target {
6440 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6441 Action::JsonKeysConvert
6442 }
6443 _ => Action::None,
6444 },
6445 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6446 Expression::ParseJson(_) => match target {
6447 DialectType::SQLite
6448 | DialectType::Doris
6449 | DialectType::MySQL
6450 | DialectType::StarRocks => Action::ParseJsonStrip,
6451 _ => Action::None,
6452 },
6453 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6454 Expression::WeekOfYear(_)
6455 if matches!(target, DialectType::Snowflake)
6456 && !matches!(source, DialectType::Snowflake) =>
6457 {
6458 Action::WeekOfYearToWeekIso
6459 }
6460 // NVL: clear original_name so generator uses dialect-specific function names
6461 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6462 // XOR: expand for dialects that don't support the XOR keyword
6463 Expression::Xor(_) => {
6464 let target_supports_xor = matches!(
6465 target,
6466 DialectType::MySQL
6467 | DialectType::SingleStore
6468 | DialectType::Doris
6469 | DialectType::StarRocks
6470 );
6471 if !target_supports_xor {
6472 Action::XorExpand
6473 } else {
6474 Action::None
6475 }
6476 }
6477 // TSQL #table -> temp table normalization (CREATE TABLE)
6478 Expression::CreateTable(ct)
6479 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6480 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6481 && ct.name.name.name.starts_with('#') =>
6482 {
6483 Action::TempTableHash
6484 }
6485 // TSQL #table -> strip # from table references in SELECT/etc.
6486 Expression::Table(tr)
6487 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6488 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6489 && tr.name.name.starts_with('#') =>
6490 {
6491 Action::TempTableHash
6492 }
6493 // TSQL #table -> strip # from DROP TABLE names
6494 Expression::DropTable(ref dt)
6495 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6496 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6497 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6498 {
6499 Action::TempTableHash
6500 }
6501 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6502 Expression::JsonExtract(_)
6503 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6504 {
6505 Action::JsonExtractToTsql
6506 }
6507 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6508 Expression::JsonExtractScalar(_)
6509 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6510 {
6511 Action::JsonExtractToTsql
6512 }
6513 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6514 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6515 Action::JsonExtractToClickHouse
6516 }
6517 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6518 Expression::JsonExtractScalar(_)
6519 if matches!(target, DialectType::ClickHouse) =>
6520 {
6521 Action::JsonExtractToClickHouse
6522 }
6523 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6524 Expression::JsonExtract(ref f)
6525 if !f.arrow_syntax
6526 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6527 {
6528 Action::JsonExtractToArrow
6529 }
6530 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6531 Expression::JsonExtract(ref f)
6532 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6533 && !matches!(
6534 source,
6535 DialectType::PostgreSQL
6536 | DialectType::Redshift
6537 | DialectType::Materialize
6538 )
6539 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6540 {
6541 Action::JsonExtractToGetJsonObject
6542 }
6543 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6544 Expression::JsonExtract(_)
6545 if matches!(
6546 target,
6547 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6548 ) =>
6549 {
6550 Action::JsonExtractToGetJsonObject
6551 }
6552 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6553 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6554 Expression::JsonExtractScalar(ref f)
6555 if !f.arrow_syntax
6556 && !f.hash_arrow_syntax
6557 && matches!(
6558 target,
6559 DialectType::PostgreSQL
6560 | DialectType::Redshift
6561 | DialectType::Snowflake
6562 | DialectType::SQLite
6563 | DialectType::DuckDB
6564 ) =>
6565 {
6566 Action::JsonExtractScalarConvert
6567 }
6568 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6569 Expression::JsonExtractScalar(_)
6570 if matches!(
6571 target,
6572 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6573 ) =>
6574 {
6575 Action::JsonExtractScalarToGetJsonObject
6576 }
6577 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6578 Expression::JsonExtract(ref f)
6579 if !f.arrow_syntax
6580 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6581 {
6582 Action::JsonPathNormalize
6583 }
6584 // JsonQuery (parsed JSON_QUERY) -> target-specific
6585 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6586 // JsonValue (parsed JSON_VALUE) -> target-specific
6587 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6588 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6589 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6590 Expression::AtTimeZone(_)
6591 if matches!(
6592 target,
6593 DialectType::Presto
6594 | DialectType::Trino
6595 | DialectType::Athena
6596 | DialectType::Spark
6597 | DialectType::Databricks
6598 | DialectType::BigQuery
6599 | DialectType::Snowflake
6600 ) =>
6601 {
6602 Action::AtTimeZoneConvert
6603 }
6604 // DAY_OF_WEEK -> dialect-specific
6605 Expression::DayOfWeek(_)
6606 if matches!(
6607 target,
6608 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6609 ) =>
6610 {
6611 Action::DayOfWeekConvert
6612 }
6613 // CURRENT_USER -> CURRENT_USER() for Snowflake
6614 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6615 Action::CurrentUserParens
6616 }
6617 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6618 Expression::ElementAt(_)
6619 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6620 {
6621 Action::ElementAtConvert
6622 }
6623 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6624 Expression::ArrayFunc(ref arr)
6625 if !arr.bracket_notation
6626 && matches!(
6627 target,
6628 DialectType::Spark
6629 | DialectType::Databricks
6630 | DialectType::Hive
6631 | DialectType::BigQuery
6632 | DialectType::DuckDB
6633 | DialectType::Snowflake
6634 | DialectType::Presto
6635 | DialectType::Trino
6636 | DialectType::Athena
6637 | DialectType::ClickHouse
6638 | DialectType::StarRocks
6639 ) =>
6640 {
6641 Action::ArraySyntaxConvert
6642 }
6643 // VARIANCE expression -> varSamp for ClickHouse
6644 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6645 Action::VarianceToClickHouse
6646 }
6647 // STDDEV expression -> stddevSamp for ClickHouse
6648 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6649 Action::StddevToClickHouse
6650 }
6651 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6652 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6653 Action::ApproxQuantileConvert
6654 }
6655 // MonthsBetween -> target-specific
6656 Expression::MonthsBetween(_)
6657 if !matches!(
6658 target,
6659 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6660 ) =>
6661 {
6662 Action::MonthsBetweenConvert
6663 }
6664 // AddMonths -> target-specific DATEADD/DATE_ADD
6665 Expression::AddMonths(_) => Action::AddMonthsConvert,
6666 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6667 Expression::MapFromArrays(_)
6668 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6669 {
6670 Action::MapFromArraysConvert
6671 }
6672 // CURRENT_USER -> CURRENT_USER() for Spark
6673 Expression::CurrentUser(_)
6674 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6675 {
6676 Action::CurrentUserSparkParens
6677 }
6678 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6679 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6680 if matches!(
6681 source,
6682 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6683 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6684 && matches!(
6685 target,
6686 DialectType::DuckDB
6687 | DialectType::Presto
6688 | DialectType::Trino
6689 | DialectType::Athena
6690 | DialectType::PostgreSQL
6691 | DialectType::Redshift
6692 ) =>
6693 {
6694 Action::SparkDateFuncCast
6695 }
6696 // $parameter -> @parameter for BigQuery
6697 Expression::Parameter(ref p)
6698 if matches!(target, DialectType::BigQuery)
6699 && matches!(source, DialectType::DuckDB)
6700 && (p.style == crate::expressions::ParameterStyle::Dollar
6701 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6702 {
6703 Action::DollarParamConvert
6704 }
6705 // EscapeString literal: normalize literal newlines to \n
6706 Expression::Literal(Literal::EscapeString(ref s))
6707 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6708 {
6709 Action::EscapeStringNormalize
6710 }
6711 // straight_join: keep lowercase for DuckDB, quote for MySQL
6712 Expression::Column(ref col)
6713 if col.name.name == "STRAIGHT_JOIN"
6714 && col.table.is_none()
6715 && matches!(source, DialectType::DuckDB)
6716 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6717 {
6718 Action::StraightJoinCase
6719 }
6720 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6721 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6722 Expression::Interval(ref iv)
6723 if matches!(
6724 target,
6725 DialectType::Snowflake
6726 | DialectType::PostgreSQL
6727 | DialectType::Redshift
6728 ) && iv.unit.is_some()
6729 && matches!(
6730 &iv.this,
6731 Some(Expression::Literal(Literal::String(_)))
6732 ) =>
6733 {
6734 Action::SnowflakeIntervalFormat
6735 }
6736 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6737 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6738 if let Some(ref sample) = ts.sample {
6739 if !sample.explicit_method {
6740 Action::TablesampleReservoir
6741 } else {
6742 Action::None
6743 }
6744 } else {
6745 Action::None
6746 }
6747 }
6748 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6749 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6750 Expression::TableSample(ref ts)
6751 if matches!(target, DialectType::Snowflake)
6752 && !matches!(source, DialectType::Snowflake)
6753 && ts.sample.is_some() =>
6754 {
6755 if let Some(ref sample) = ts.sample {
6756 if !sample.explicit_method {
6757 Action::TablesampleSnowflakeStrip
6758 } else {
6759 Action::None
6760 }
6761 } else {
6762 Action::None
6763 }
6764 }
6765 Expression::Table(ref t)
6766 if matches!(target, DialectType::Snowflake)
6767 && !matches!(source, DialectType::Snowflake)
6768 && t.table_sample.is_some() =>
6769 {
6770 if let Some(ref sample) = t.table_sample {
6771 if !sample.explicit_method {
6772 Action::TablesampleSnowflakeStrip
6773 } else {
6774 Action::None
6775 }
6776 } else {
6777 Action::None
6778 }
6779 }
6780 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6781 Expression::AlterTable(ref at)
6782 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6783 && !at.actions.is_empty()
6784 && matches!(
6785 at.actions.first(),
6786 Some(crate::expressions::AlterTableAction::RenameTable(_))
6787 ) =>
6788 {
6789 Action::AlterTableToSpRename
6790 }
6791 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6792 Expression::Subscript(ref sub)
6793 if matches!(
6794 target,
6795 DialectType::BigQuery
6796 | DialectType::Hive
6797 | DialectType::Spark
6798 | DialectType::Databricks
6799 ) && matches!(
6800 source,
6801 DialectType::DuckDB
6802 | DialectType::PostgreSQL
6803 | DialectType::Presto
6804 | DialectType::Trino
6805 | DialectType::Redshift
6806 | DialectType::ClickHouse
6807 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6808 {
6809 Action::ArrayIndexConvert
6810 }
6811 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6812 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6813 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6814 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6815 Expression::WindowFunction(ref wf) => {
6816 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6817 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6818 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6819 if matches!(target, DialectType::BigQuery)
6820 && !is_row_number
6821 && !wf.over.order_by.is_empty()
6822 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6823 {
6824 Action::BigQueryNullsOrdering
6825 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6826 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6827 } else {
6828 let source_nulls_last = matches!(source, DialectType::DuckDB);
6829 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6830 matches!(
6831 f.kind,
6832 crate::expressions::WindowFrameKind::Range
6833 | crate::expressions::WindowFrameKind::Groups
6834 )
6835 });
6836 if source_nulls_last
6837 && matches!(target, DialectType::MySQL)
6838 && !wf.over.order_by.is_empty()
6839 && wf.over.order_by.iter().any(|o| !o.desc)
6840 && !has_range_frame
6841 {
6842 Action::MysqlNullsLastRewrite
6843 } else {
6844 match &wf.this {
6845 Expression::FirstValue(ref vf)
6846 | Expression::LastValue(ref vf)
6847 if vf.ignore_nulls == Some(false) =>
6848 {
6849 // RESPECT NULLS
6850 match target {
6851 DialectType::SQLite => Action::RespectNullsConvert,
6852 _ => Action::None,
6853 }
6854 }
6855 _ => Action::None,
6856 }
6857 }
6858 }
6859 }
6860 // CREATE TABLE a LIKE b -> dialect-specific transformations
6861 Expression::CreateTable(ref ct)
6862 if ct.columns.is_empty()
6863 && ct.constraints.iter().any(|c| {
6864 matches!(c, crate::expressions::TableConstraint::Like { .. })
6865 })
6866 && matches!(
6867 target,
6868 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6869 ) =>
6870 {
6871 Action::CreateTableLikeToCtas
6872 }
6873 Expression::CreateTable(ref ct)
6874 if ct.columns.is_empty()
6875 && ct.constraints.iter().any(|c| {
6876 matches!(c, crate::expressions::TableConstraint::Like { .. })
6877 })
6878 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6879 {
6880 Action::CreateTableLikeToSelectInto
6881 }
6882 Expression::CreateTable(ref ct)
6883 if ct.columns.is_empty()
6884 && ct.constraints.iter().any(|c| {
6885 matches!(c, crate::expressions::TableConstraint::Like { .. })
6886 })
6887 && matches!(target, DialectType::ClickHouse) =>
6888 {
6889 Action::CreateTableLikeToAs
6890 }
6891 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6892 Expression::CreateTable(ref ct)
6893 if matches!(target, DialectType::DuckDB)
6894 && matches!(
6895 source,
6896 DialectType::DuckDB
6897 | DialectType::Spark
6898 | DialectType::Databricks
6899 | DialectType::Hive
6900 ) =>
6901 {
6902 let has_comment = ct.columns.iter().any(|c| {
6903 c.comment.is_some()
6904 || c.constraints.iter().any(|con| {
6905 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6906 })
6907 });
6908 let has_props = !ct.properties.is_empty();
6909 if has_comment || has_props {
6910 Action::CreateTableStripComment
6911 } else {
6912 Action::None
6913 }
6914 }
6915 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6916 Expression::Array(_)
6917 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6918 {
6919 Action::ArrayConcatBracketConvert
6920 }
6921 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6922 Expression::ArrayFunc(ref arr)
6923 if arr.bracket_notation
6924 && matches!(source, DialectType::BigQuery)
6925 && matches!(target, DialectType::Redshift) =>
6926 {
6927 Action::ArrayConcatBracketConvert
6928 }
6929 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6930 Expression::BitwiseOrAgg(ref f)
6931 | Expression::BitwiseAndAgg(ref f)
6932 | Expression::BitwiseXorAgg(ref f) => {
6933 if matches!(target, DialectType::DuckDB) {
6934 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
6935 if let Expression::Cast(ref c) = f.this {
6936 match &c.to {
6937 DataType::Float { .. }
6938 | DataType::Double { .. }
6939 | DataType::Decimal { .. } => Action::BitAggFloatCast,
6940 DataType::Custom { ref name }
6941 if name.eq_ignore_ascii_case("REAL") =>
6942 {
6943 Action::BitAggFloatCast
6944 }
6945 _ => Action::None,
6946 }
6947 } else {
6948 Action::None
6949 }
6950 } else if matches!(target, DialectType::Snowflake) {
6951 Action::BitAggSnowflakeRename
6952 } else {
6953 Action::None
6954 }
6955 }
6956 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
6957 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
6958 Action::FilterToIff
6959 }
6960 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6961 Expression::Avg(ref f)
6962 | Expression::Sum(ref f)
6963 | Expression::Min(ref f)
6964 | Expression::Max(ref f)
6965 | Expression::CountIf(ref f)
6966 | Expression::Stddev(ref f)
6967 | Expression::StddevPop(ref f)
6968 | Expression::StddevSamp(ref f)
6969 | Expression::Variance(ref f)
6970 | Expression::VarPop(ref f)
6971 | Expression::VarSamp(ref f)
6972 | Expression::Median(ref f)
6973 | Expression::Mode(ref f)
6974 | Expression::First(ref f)
6975 | Expression::Last(ref f)
6976 | Expression::ApproxDistinct(ref f)
6977 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6978 {
6979 Action::AggFilterToIff
6980 }
6981 Expression::Count(ref c)
6982 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6983 {
6984 Action::AggFilterToIff
6985 }
6986 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
6987 Expression::Count(ref c)
6988 if c.distinct
6989 && matches!(&c.this, Some(Expression::Tuple(_)))
6990 && matches!(
6991 target,
6992 DialectType::Presto
6993 | DialectType::Trino
6994 | DialectType::DuckDB
6995 | DialectType::PostgreSQL
6996 ) =>
6997 {
6998 Action::CountDistinctMultiArg
6999 }
7000 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7001 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7002 Action::JsonToGetPath
7003 }
7004 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7005 Expression::Struct(_)
7006 if matches!(
7007 target,
7008 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7009 ) && matches!(source, DialectType::DuckDB) =>
7010 {
7011 Action::StructToRow
7012 }
7013 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7014 Expression::MapFunc(ref m)
7015 if m.curly_brace_syntax
7016 && matches!(
7017 target,
7018 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7019 )
7020 && matches!(source, DialectType::DuckDB) =>
7021 {
7022 Action::StructToRow
7023 }
7024 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7025 Expression::ApproxCountDistinct(_)
7026 if matches!(
7027 target,
7028 DialectType::Presto | DialectType::Trino | DialectType::Athena
7029 ) =>
7030 {
7031 Action::ApproxCountDistinctToApproxDistinct
7032 }
7033 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7034 Expression::ArrayContains(_)
7035 if matches!(
7036 target,
7037 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7038 ) =>
7039 {
7040 Action::ArrayContainsConvert
7041 }
7042 // StrPosition with position -> complex expansion for Presto/DuckDB
7043 // STRPOS doesn't support a position arg in these dialects
7044 Expression::StrPosition(ref sp)
7045 if sp.position.is_some()
7046 && matches!(
7047 target,
7048 DialectType::Presto
7049 | DialectType::Trino
7050 | DialectType::Athena
7051 | DialectType::DuckDB
7052 ) =>
7053 {
7054 Action::StrPositionExpand
7055 }
7056 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7057 Expression::First(ref f)
7058 if f.ignore_nulls == Some(true)
7059 && matches!(target, DialectType::DuckDB) =>
7060 {
7061 Action::FirstToAnyValue
7062 }
7063 // BEGIN -> START TRANSACTION for Presto/Trino
7064 Expression::Command(ref cmd)
7065 if cmd.this.eq_ignore_ascii_case("BEGIN")
7066 && matches!(
7067 target,
7068 DialectType::Presto | DialectType::Trino | DialectType::Athena
7069 ) =>
7070 {
7071 // Handled inline below
7072 Action::None // We'll handle it directly
7073 }
7074 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7075 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7076 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7077 Expression::Concat(ref _op)
7078 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7079 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7080 {
7081 Action::PipeConcatToConcat
7082 }
7083 _ => Action::None,
7084 }
7085 };
7086
7087 match action {
7088 Action::None => {
7089 // Handle inline transforms that don't need a dedicated action
7090
7091 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7092 if let Expression::Between(ref b) = e {
7093 if let Some(sym) = b.symmetric {
7094 let keeps_symmetric =
7095 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7096 if !keeps_symmetric {
7097 if sym {
7098 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7099 let b = if let Expression::Between(b) = e {
7100 *b
7101 } else {
7102 unreachable!()
7103 };
7104 let between1 = Expression::Between(Box::new(
7105 crate::expressions::Between {
7106 this: b.this.clone(),
7107 low: b.low.clone(),
7108 high: b.high.clone(),
7109 not: b.not,
7110 symmetric: None,
7111 },
7112 ));
7113 let between2 = Expression::Between(Box::new(
7114 crate::expressions::Between {
7115 this: b.this,
7116 low: b.high,
7117 high: b.low,
7118 not: b.not,
7119 symmetric: None,
7120 },
7121 ));
7122 return Ok(Expression::Paren(Box::new(
7123 crate::expressions::Paren {
7124 this: Expression::Or(Box::new(
7125 crate::expressions::BinaryOp::new(
7126 between1, between2,
7127 ),
7128 )),
7129 trailing_comments: vec![],
7130 },
7131 )));
7132 } else {
7133 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7134 let b = if let Expression::Between(b) = e {
7135 *b
7136 } else {
7137 unreachable!()
7138 };
7139 return Ok(Expression::Between(Box::new(
7140 crate::expressions::Between {
7141 this: b.this,
7142 low: b.low,
7143 high: b.high,
7144 not: b.not,
7145 symmetric: None,
7146 },
7147 )));
7148 }
7149 }
7150 }
7151 }
7152
7153 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7154 if let Expression::ILike(ref _like) = e {
7155 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7156 let like = if let Expression::ILike(l) = e {
7157 *l
7158 } else {
7159 unreachable!()
7160 };
7161 let lower_left = Expression::Function(Box::new(Function::new(
7162 "LOWER".to_string(),
7163 vec![like.left],
7164 )));
7165 let lower_right = Expression::Function(Box::new(Function::new(
7166 "LOWER".to_string(),
7167 vec![like.right],
7168 )));
7169 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7170 left: lower_left,
7171 right: lower_right,
7172 escape: like.escape,
7173 quantifier: like.quantifier,
7174 })));
7175 }
7176 }
7177
7178 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7179 if let Expression::MethodCall(ref mc) = e {
7180 if matches!(source, DialectType::Oracle)
7181 && mc.method.name.eq_ignore_ascii_case("VALUE")
7182 && mc.args.is_empty()
7183 {
7184 let is_dbms_random = match &mc.this {
7185 Expression::Identifier(id) => {
7186 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7187 }
7188 Expression::Column(col) => {
7189 col.table.is_none()
7190 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7191 }
7192 _ => false,
7193 };
7194 if is_dbms_random {
7195 let func_name = match target {
7196 DialectType::PostgreSQL
7197 | DialectType::Redshift
7198 | DialectType::DuckDB
7199 | DialectType::SQLite => "RANDOM",
7200 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7201 _ => "RAND",
7202 };
7203 return Ok(Expression::Function(Box::new(Function::new(
7204 func_name.to_string(),
7205 vec![],
7206 ))));
7207 }
7208 }
7209 }
7210 // TRIM without explicit position -> add BOTH for ClickHouse
7211 if let Expression::Trim(ref trim) = e {
7212 if matches!(target, DialectType::ClickHouse)
7213 && trim.sql_standard_syntax
7214 && trim.characters.is_some()
7215 && !trim.position_explicit
7216 {
7217 let mut new_trim = (**trim).clone();
7218 new_trim.position_explicit = true;
7219 return Ok(Expression::Trim(Box::new(new_trim)));
7220 }
7221 }
7222 // BEGIN -> START TRANSACTION for Presto/Trino
7223 if let Expression::Transaction(ref txn) = e {
7224 if matches!(
7225 target,
7226 DialectType::Presto | DialectType::Trino | DialectType::Athena
7227 ) {
7228 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7229 let mut txn = txn.clone();
7230 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7231 "START".to_string(),
7232 ))));
7233 return Ok(Expression::Transaction(Box::new(*txn)));
7234 }
7235 }
7236 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7237 if matches!(
7238 target,
7239 DialectType::Presto | DialectType::Trino | DialectType::Athena
7240 ) {
7241 match &e {
7242 Expression::IsTrue(itf) if !itf.not => {
7243 // x IS TRUE -> x
7244 return Ok(itf.this.clone());
7245 }
7246 Expression::IsTrue(itf) if itf.not => {
7247 // x IS NOT TRUE -> NOT x
7248 return Ok(Expression::Not(Box::new(
7249 crate::expressions::UnaryOp {
7250 this: itf.this.clone(),
7251 },
7252 )));
7253 }
7254 Expression::IsFalse(itf) if !itf.not => {
7255 // x IS FALSE -> NOT x
7256 return Ok(Expression::Not(Box::new(
7257 crate::expressions::UnaryOp {
7258 this: itf.this.clone(),
7259 },
7260 )));
7261 }
7262 Expression::IsFalse(itf) if itf.not => {
7263 // x IS NOT FALSE -> NOT NOT x
7264 let not_x =
7265 Expression::Not(Box::new(crate::expressions::UnaryOp {
7266 this: itf.this.clone(),
7267 }));
7268 return Ok(Expression::Not(Box::new(
7269 crate::expressions::UnaryOp { this: not_x },
7270 )));
7271 }
7272 _ => {}
7273 }
7274 }
7275 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7276 if matches!(target, DialectType::Redshift) {
7277 if let Expression::IsFalse(ref itf) = e {
7278 if itf.not {
7279 return Ok(Expression::Not(Box::new(
7280 crate::expressions::UnaryOp {
7281 this: Expression::IsFalse(Box::new(
7282 crate::expressions::IsTrueFalse {
7283 this: itf.this.clone(),
7284 not: false,
7285 },
7286 )),
7287 },
7288 )));
7289 }
7290 }
7291 }
7292 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7293 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7294 if let Expression::Function(ref f) = e {
7295 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7296 && matches!(source, DialectType::Snowflake)
7297 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7298 {
7299 if f.args.len() == 3 {
7300 let mut args = f.args.clone();
7301 args.push(Expression::string("g"));
7302 return Ok(Expression::Function(Box::new(Function::new(
7303 "REGEXP_REPLACE".to_string(),
7304 args,
7305 ))));
7306 } else if f.args.len() == 4 {
7307 // 4th arg might be position, add 'g' as 5th
7308 let mut args = f.args.clone();
7309 args.push(Expression::string("g"));
7310 return Ok(Expression::Function(Box::new(Function::new(
7311 "REGEXP_REPLACE".to_string(),
7312 args,
7313 ))));
7314 }
7315 }
7316 }
7317 Ok(e)
7318 }
7319
7320 Action::GreatestLeastNull => {
7321 let f = if let Expression::Function(f) = e {
7322 *f
7323 } else {
7324 unreachable!("action only triggered for Function expressions")
7325 };
7326 let mut null_checks: Vec<Expression> = f
7327 .args
7328 .iter()
7329 .map(|a| {
7330 Expression::IsNull(Box::new(IsNull {
7331 this: a.clone(),
7332 not: false,
7333 postfix_form: false,
7334 }))
7335 })
7336 .collect();
7337 let condition = if null_checks.len() == 1 {
7338 null_checks.remove(0)
7339 } else {
7340 let first = null_checks.remove(0);
7341 null_checks.into_iter().fold(first, |acc, check| {
7342 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7343 })
7344 };
7345 Ok(Expression::Case(Box::new(Case {
7346 operand: None,
7347 whens: vec![(condition, Expression::Null(Null))],
7348 else_: Some(Expression::Function(Box::new(Function::new(
7349 f.name, f.args,
7350 )))),
7351 comments: Vec::new(),
7352 })))
7353 }
7354
7355 Action::ArrayGenerateRange => {
7356 let f = if let Expression::Function(f) = e {
7357 *f
7358 } else {
7359 unreachable!("action only triggered for Function expressions")
7360 };
7361 let start = f.args[0].clone();
7362 let end = f.args[1].clone();
7363 let step = f.args.get(2).cloned();
7364
7365 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7366 end.clone(),
7367 Expression::number(1),
7368 )));
7369
7370 match target {
7371 DialectType::PostgreSQL | DialectType::Redshift => {
7372 let mut args = vec![start, end_minus_1];
7373 if let Some(s) = step {
7374 args.push(s);
7375 }
7376 Ok(Expression::Function(Box::new(Function::new(
7377 "GENERATE_SERIES".to_string(),
7378 args,
7379 ))))
7380 }
7381 DialectType::Presto | DialectType::Trino => {
7382 let mut args = vec![start, end_minus_1];
7383 if let Some(s) = step {
7384 args.push(s);
7385 }
7386 Ok(Expression::Function(Box::new(Function::new(
7387 "SEQUENCE".to_string(),
7388 args,
7389 ))))
7390 }
7391 DialectType::BigQuery => {
7392 let mut args = vec![start, end_minus_1];
7393 if let Some(s) = step {
7394 args.push(s);
7395 }
7396 Ok(Expression::Function(Box::new(Function::new(
7397 "GENERATE_ARRAY".to_string(),
7398 args,
7399 ))))
7400 }
7401 DialectType::Snowflake => {
7402 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7403 Expression::Paren(Box::new(Paren {
7404 this: end_minus_1,
7405 trailing_comments: vec![],
7406 })),
7407 Expression::number(1),
7408 )));
7409 let mut args = vec![start, normalized_end];
7410 if let Some(s) = step {
7411 args.push(s);
7412 }
7413 Ok(Expression::Function(Box::new(Function::new(
7414 "ARRAY_GENERATE_RANGE".to_string(),
7415 args,
7416 ))))
7417 }
7418 _ => Ok(Expression::Function(Box::new(Function::new(
7419 f.name, f.args,
7420 )))),
7421 }
7422 }
7423
7424 Action::Div0TypedDivision => {
7425 let if_func = if let Expression::IfFunc(f) = e {
7426 *f
7427 } else {
7428 unreachable!("action only triggered for IfFunc expressions")
7429 };
7430 if let Some(Expression::Div(div)) = if_func.false_value {
7431 let cast_type = if matches!(target, DialectType::SQLite) {
7432 DataType::Float {
7433 precision: None,
7434 scale: None,
7435 real_spelling: true,
7436 }
7437 } else {
7438 DataType::Double {
7439 precision: None,
7440 scale: None,
7441 }
7442 };
7443 let casted_left = Expression::Cast(Box::new(Cast {
7444 this: div.left,
7445 to: cast_type,
7446 trailing_comments: vec![],
7447 double_colon_syntax: false,
7448 format: None,
7449 default: None,
7450 }));
7451 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7452 condition: if_func.condition,
7453 true_value: if_func.true_value,
7454 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7455 casted_left,
7456 div.right,
7457 )))),
7458 original_name: if_func.original_name,
7459 })))
7460 } else {
7461 // Not actually a Div, reconstruct
7462 Ok(Expression::IfFunc(Box::new(if_func)))
7463 }
7464 }
7465
7466 Action::ArrayAggCollectList => {
7467 let agg = if let Expression::ArrayAgg(a) = e {
7468 *a
7469 } else {
7470 unreachable!("action only triggered for ArrayAgg expressions")
7471 };
7472 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7473 name: Some("COLLECT_LIST".to_string()),
7474 ..agg
7475 })))
7476 }
7477
7478 Action::ArrayAggWithinGroupFilter => {
7479 let wg = if let Expression::WithinGroup(w) = e {
7480 *w
7481 } else {
7482 unreachable!("action only triggered for WithinGroup expressions")
7483 };
7484 if let Expression::ArrayAgg(inner_agg) = wg.this {
7485 let col = inner_agg.this.clone();
7486 let filter = Expression::IsNull(Box::new(IsNull {
7487 this: col,
7488 not: true,
7489 postfix_form: false,
7490 }));
7491 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7492 let order_by = if matches!(target, DialectType::DuckDB) {
7493 wg.order_by
7494 .into_iter()
7495 .map(|mut o| {
7496 if o.desc && o.nulls_first.is_none() {
7497 o.nulls_first = Some(true);
7498 }
7499 o
7500 })
7501 .collect()
7502 } else {
7503 wg.order_by
7504 };
7505 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7506 this: inner_agg.this,
7507 distinct: inner_agg.distinct,
7508 filter: Some(filter),
7509 order_by,
7510 name: inner_agg.name,
7511 ignore_nulls: inner_agg.ignore_nulls,
7512 having_max: inner_agg.having_max,
7513 limit: inner_agg.limit,
7514 })))
7515 } else {
7516 Ok(Expression::WithinGroup(Box::new(wg)))
7517 }
7518 }
7519
7520 Action::ArrayAggFilter => {
7521 let agg = if let Expression::ArrayAgg(a) = e {
7522 *a
7523 } else {
7524 unreachable!("action only triggered for ArrayAgg expressions")
7525 };
7526 let col = agg.this.clone();
7527 let filter = Expression::IsNull(Box::new(IsNull {
7528 this: col,
7529 not: true,
7530 postfix_form: false,
7531 }));
7532 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7533 filter: Some(filter),
7534 ..agg
7535 })))
7536 }
7537
7538 Action::ArrayAggNullFilter => {
7539 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7540 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7541 let agg = if let Expression::ArrayAgg(a) = e {
7542 *a
7543 } else {
7544 unreachable!("action only triggered for ArrayAgg expressions")
7545 };
7546 let col = agg.this.clone();
7547 let not_null = Expression::IsNull(Box::new(IsNull {
7548 this: col,
7549 not: true,
7550 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7551 }));
7552 let new_filter = if let Some(existing_filter) = agg.filter {
7553 // AND the NOT IS NULL with existing filter
7554 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7555 existing_filter,
7556 not_null,
7557 )))
7558 } else {
7559 not_null
7560 };
7561 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7562 filter: Some(new_filter),
7563 ..agg
7564 })))
7565 }
7566
7567 Action::BigQueryArraySelectAsStructToSnowflake => {
7568 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7569 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7570 if let Expression::Function(mut f) = e {
7571 let is_match = f.args.len() == 1
7572 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7573 if is_match {
7574 let inner_select = match f.args.remove(0) {
7575 Expression::Select(s) => *s,
7576 _ => unreachable!(
7577 "argument already verified to be a Select expression"
7578 ),
7579 };
7580 // Build OBJECT_CONSTRUCT args from SELECT expressions
7581 let mut oc_args = Vec::new();
7582 for expr in &inner_select.expressions {
7583 match expr {
7584 Expression::Alias(a) => {
7585 let key = Expression::Literal(Literal::String(
7586 a.alias.name.clone(),
7587 ));
7588 let value = a.this.clone();
7589 oc_args.push(key);
7590 oc_args.push(value);
7591 }
7592 Expression::Column(c) => {
7593 let key = Expression::Literal(Literal::String(
7594 c.name.name.clone(),
7595 ));
7596 oc_args.push(key);
7597 oc_args.push(expr.clone());
7598 }
7599 _ => {
7600 oc_args.push(expr.clone());
7601 }
7602 }
7603 }
7604 let object_construct = Expression::Function(Box::new(Function::new(
7605 "OBJECT_CONSTRUCT".to_string(),
7606 oc_args,
7607 )));
7608 let array_agg = Expression::Function(Box::new(Function::new(
7609 "ARRAY_AGG".to_string(),
7610 vec![object_construct],
7611 )));
7612 let mut new_select = crate::expressions::Select::new();
7613 new_select.expressions = vec![array_agg];
7614 new_select.from = inner_select.from.clone();
7615 new_select.where_clause = inner_select.where_clause.clone();
7616 new_select.group_by = inner_select.group_by.clone();
7617 new_select.having = inner_select.having.clone();
7618 new_select.joins = inner_select.joins.clone();
7619 Ok(Expression::Subquery(Box::new(
7620 crate::expressions::Subquery {
7621 this: Expression::Select(Box::new(new_select)),
7622 alias: None,
7623 column_aliases: Vec::new(),
7624 order_by: None,
7625 limit: None,
7626 offset: None,
7627 distribute_by: None,
7628 sort_by: None,
7629 cluster_by: None,
7630 lateral: false,
7631 modifiers_inside: false,
7632 trailing_comments: Vec::new(),
7633 },
7634 )))
7635 } else {
7636 Ok(Expression::Function(f))
7637 }
7638 } else {
7639 Ok(e)
7640 }
7641 }
7642
7643 Action::BigQueryPercentileContToDuckDB => {
7644 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7645 if let Expression::AggregateFunction(mut af) = e {
7646 af.name = "QUANTILE_CONT".to_string();
7647 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7648 // Keep only first 2 args
7649 if af.args.len() > 2 {
7650 af.args.truncate(2);
7651 }
7652 Ok(Expression::AggregateFunction(af))
7653 } else {
7654 Ok(e)
7655 }
7656 }
7657
7658 Action::ArrayAggIgnoreNullsDuckDB => {
7659 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7660 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7661 let mut agg = if let Expression::ArrayAgg(a) = e {
7662 *a
7663 } else {
7664 unreachable!("action only triggered for ArrayAgg expressions")
7665 };
7666 agg.ignore_nulls = None; // Strip IGNORE NULLS
7667 if !agg.order_by.is_empty() {
7668 agg.order_by[0].nulls_first = Some(true);
7669 }
7670 Ok(Expression::ArrayAgg(Box::new(agg)))
7671 }
7672
7673 Action::CountDistinctMultiArg => {
7674 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7675 if let Expression::Count(c) = e {
7676 if let Some(Expression::Tuple(t)) = c.this {
7677 let args = t.expressions;
7678 // Build CASE expression:
7679 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7680 let mut whens = Vec::new();
7681 for arg in &args {
7682 whens.push((
7683 Expression::IsNull(Box::new(IsNull {
7684 this: arg.clone(),
7685 not: false,
7686 postfix_form: false,
7687 })),
7688 Expression::Null(crate::expressions::Null),
7689 ));
7690 }
7691 // Build the tuple for ELSE
7692 let tuple_expr =
7693 Expression::Tuple(Box::new(crate::expressions::Tuple {
7694 expressions: args,
7695 }));
7696 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7697 operand: None,
7698 whens,
7699 else_: Some(tuple_expr),
7700 comments: Vec::new(),
7701 }));
7702 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7703 this: Some(case_expr),
7704 star: false,
7705 distinct: true,
7706 filter: c.filter,
7707 ignore_nulls: c.ignore_nulls,
7708 original_name: c.original_name,
7709 })))
7710 } else {
7711 Ok(Expression::Count(c))
7712 }
7713 } else {
7714 Ok(e)
7715 }
7716 }
7717
7718 Action::CastTimestampToDatetime => {
7719 let c = if let Expression::Cast(c) = e {
7720 *c
7721 } else {
7722 unreachable!("action only triggered for Cast expressions")
7723 };
7724 Ok(Expression::Cast(Box::new(Cast {
7725 to: DataType::Custom {
7726 name: "DATETIME".to_string(),
7727 },
7728 ..c
7729 })))
7730 }
7731
7732 Action::CastTimestampStripTz => {
7733 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7734 let c = if let Expression::Cast(c) = e {
7735 *c
7736 } else {
7737 unreachable!("action only triggered for Cast expressions")
7738 };
7739 Ok(Expression::Cast(Box::new(Cast {
7740 to: DataType::Timestamp {
7741 precision: None,
7742 timezone: false,
7743 },
7744 ..c
7745 })))
7746 }
7747
7748 Action::CastTimestamptzToFunc => {
7749 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7750 let c = if let Expression::Cast(c) = e {
7751 *c
7752 } else {
7753 unreachable!("action only triggered for Cast expressions")
7754 };
7755 Ok(Expression::Function(Box::new(Function::new(
7756 "TIMESTAMP".to_string(),
7757 vec![c.this],
7758 ))))
7759 }
7760
7761 Action::ToDateToCast => {
7762 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7763 if let Expression::Function(f) = e {
7764 let arg = f.args.into_iter().next().unwrap();
7765 Ok(Expression::Cast(Box::new(Cast {
7766 this: arg,
7767 to: DataType::Date,
7768 double_colon_syntax: false,
7769 trailing_comments: vec![],
7770 format: None,
7771 default: None,
7772 })))
7773 } else {
7774 Ok(e)
7775 }
7776 }
7777 Action::DateTruncWrapCast => {
7778 // Handle both Expression::DateTrunc/TimestampTrunc and
7779 // Expression::Function("DATE_TRUNC", [unit, expr])
7780 match e {
7781 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7782 let input_type = match &d.this {
7783 Expression::Cast(c) => Some(c.to.clone()),
7784 _ => None,
7785 };
7786 if let Some(cast_type) = input_type {
7787 let is_time = matches!(cast_type, DataType::Time { .. });
7788 if is_time {
7789 let date_expr = Expression::Cast(Box::new(Cast {
7790 this: Expression::Literal(
7791 crate::expressions::Literal::String(
7792 "1970-01-01".to_string(),
7793 ),
7794 ),
7795 to: DataType::Date,
7796 double_colon_syntax: false,
7797 trailing_comments: vec![],
7798 format: None,
7799 default: None,
7800 }));
7801 let add_expr =
7802 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7803 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7804 this: add_expr,
7805 unit: d.unit,
7806 }));
7807 Ok(Expression::Cast(Box::new(Cast {
7808 this: inner,
7809 to: cast_type,
7810 double_colon_syntax: false,
7811 trailing_comments: vec![],
7812 format: None,
7813 default: None,
7814 })))
7815 } else {
7816 let inner = Expression::DateTrunc(Box::new(*d));
7817 Ok(Expression::Cast(Box::new(Cast {
7818 this: inner,
7819 to: cast_type,
7820 double_colon_syntax: false,
7821 trailing_comments: vec![],
7822 format: None,
7823 default: None,
7824 })))
7825 }
7826 } else {
7827 Ok(Expression::DateTrunc(d))
7828 }
7829 }
7830 Expression::Function(f) if f.args.len() == 2 => {
7831 // Function-based DATE_TRUNC(unit, expr)
7832 let input_type = match &f.args[1] {
7833 Expression::Cast(c) => Some(c.to.clone()),
7834 _ => None,
7835 };
7836 if let Some(cast_type) = input_type {
7837 let is_time = matches!(cast_type, DataType::Time { .. });
7838 if is_time {
7839 let date_expr = Expression::Cast(Box::new(Cast {
7840 this: Expression::Literal(
7841 crate::expressions::Literal::String(
7842 "1970-01-01".to_string(),
7843 ),
7844 ),
7845 to: DataType::Date,
7846 double_colon_syntax: false,
7847 trailing_comments: vec![],
7848 format: None,
7849 default: None,
7850 }));
7851 let mut args = f.args;
7852 let unit_arg = args.remove(0);
7853 let time_expr = args.remove(0);
7854 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7855 date_expr, time_expr,
7856 )));
7857 let inner = Expression::Function(Box::new(Function::new(
7858 "DATE_TRUNC".to_string(),
7859 vec![unit_arg, add_expr],
7860 )));
7861 Ok(Expression::Cast(Box::new(Cast {
7862 this: inner,
7863 to: cast_type,
7864 double_colon_syntax: false,
7865 trailing_comments: vec![],
7866 format: None,
7867 default: None,
7868 })))
7869 } else {
7870 // Wrap the function in CAST
7871 Ok(Expression::Cast(Box::new(Cast {
7872 this: Expression::Function(f),
7873 to: cast_type,
7874 double_colon_syntax: false,
7875 trailing_comments: vec![],
7876 format: None,
7877 default: None,
7878 })))
7879 }
7880 } else {
7881 Ok(Expression::Function(f))
7882 }
7883 }
7884 other => Ok(other),
7885 }
7886 }
7887
7888 Action::RegexpReplaceSnowflakeToDuckDB => {
7889 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7890 if let Expression::Function(f) = e {
7891 let mut args = f.args;
7892 let subject = args.remove(0);
7893 let pattern = args.remove(0);
7894 let replacement = args.remove(0);
7895 Ok(Expression::Function(Box::new(Function::new(
7896 "REGEXP_REPLACE".to_string(),
7897 vec![
7898 subject,
7899 pattern,
7900 replacement,
7901 Expression::Literal(crate::expressions::Literal::String(
7902 "g".to_string(),
7903 )),
7904 ],
7905 ))))
7906 } else {
7907 Ok(e)
7908 }
7909 }
7910
7911 Action::SetToVariable => {
7912 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7913 if let Expression::SetStatement(mut s) = e {
7914 for item in &mut s.items {
7915 if item.kind.is_none() {
7916 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
7917 let already_variable = match &item.name {
7918 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
7919 _ => false,
7920 };
7921 if already_variable {
7922 // Extract the actual name and set kind
7923 if let Expression::Identifier(ref mut id) = item.name {
7924 let actual_name = id.name["VARIABLE ".len()..].to_string();
7925 id.name = actual_name;
7926 }
7927 }
7928 item.kind = Some("VARIABLE".to_string());
7929 }
7930 }
7931 Ok(Expression::SetStatement(s))
7932 } else {
7933 Ok(e)
7934 }
7935 }
7936
7937 Action::ConvertTimezoneToExpr => {
7938 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
7939 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
7940 if let Expression::Function(f) = e {
7941 if f.args.len() == 2 {
7942 let mut args = f.args;
7943 let target_tz = args.remove(0);
7944 let timestamp = args.remove(0);
7945 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7946 source_tz: None,
7947 target_tz: Some(Box::new(target_tz)),
7948 timestamp: Some(Box::new(timestamp)),
7949 options: vec![],
7950 })))
7951 } else if f.args.len() == 3 {
7952 let mut args = f.args;
7953 let source_tz = args.remove(0);
7954 let target_tz = args.remove(0);
7955 let timestamp = args.remove(0);
7956 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7957 source_tz: Some(Box::new(source_tz)),
7958 target_tz: Some(Box::new(target_tz)),
7959 timestamp: Some(Box::new(timestamp)),
7960 options: vec![],
7961 })))
7962 } else {
7963 Ok(Expression::Function(f))
7964 }
7965 } else {
7966 Ok(e)
7967 }
7968 }
7969
7970 Action::BigQueryCastType => {
7971 // Convert BigQuery types to standard SQL types
7972 if let Expression::DataType(dt) = e {
7973 match dt {
7974 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
7975 Ok(Expression::DataType(DataType::BigInt { length: None }))
7976 }
7977 DataType::Custom { ref name }
7978 if name.eq_ignore_ascii_case("FLOAT64") =>
7979 {
7980 Ok(Expression::DataType(DataType::Double {
7981 precision: None,
7982 scale: None,
7983 }))
7984 }
7985 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
7986 Ok(Expression::DataType(DataType::Boolean))
7987 }
7988 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
7989 Ok(Expression::DataType(DataType::VarBinary { length: None }))
7990 }
7991 DataType::Custom { ref name }
7992 if name.eq_ignore_ascii_case("NUMERIC") =>
7993 {
7994 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
7995 // default precision (18, 3) being added to bare DECIMAL
7996 if matches!(target, DialectType::DuckDB) {
7997 Ok(Expression::DataType(DataType::Custom {
7998 name: "DECIMAL".to_string(),
7999 }))
8000 } else {
8001 Ok(Expression::DataType(DataType::Decimal {
8002 precision: None,
8003 scale: None,
8004 }))
8005 }
8006 }
8007 DataType::Custom { ref name }
8008 if name.eq_ignore_ascii_case("STRING") =>
8009 {
8010 Ok(Expression::DataType(DataType::String { length: None }))
8011 }
8012 DataType::Custom { ref name }
8013 if name.eq_ignore_ascii_case("DATETIME") =>
8014 {
8015 Ok(Expression::DataType(DataType::Timestamp {
8016 precision: None,
8017 timezone: false,
8018 }))
8019 }
8020 _ => Ok(Expression::DataType(dt)),
8021 }
8022 } else {
8023 Ok(e)
8024 }
8025 }
8026
8027 Action::BigQuerySafeDivide => {
8028 // Convert SafeDivide expression to IF/CASE form for most targets
8029 if let Expression::SafeDivide(sd) = e {
8030 let x = *sd.this;
8031 let y = *sd.expression;
8032 // Wrap x and y in parens if they're complex expressions
8033 let y_ref = match &y {
8034 Expression::Column(_)
8035 | Expression::Literal(_)
8036 | Expression::Identifier(_) => y.clone(),
8037 _ => Expression::Paren(Box::new(Paren {
8038 this: y.clone(),
8039 trailing_comments: vec![],
8040 })),
8041 };
8042 let x_ref = match &x {
8043 Expression::Column(_)
8044 | Expression::Literal(_)
8045 | Expression::Identifier(_) => x.clone(),
8046 _ => Expression::Paren(Box::new(Paren {
8047 this: x.clone(),
8048 trailing_comments: vec![],
8049 })),
8050 };
8051 let condition = Expression::Neq(Box::new(BinaryOp::new(
8052 y_ref.clone(),
8053 Expression::number(0),
8054 )));
8055 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
8056
8057 if matches!(target, DialectType::Presto | DialectType::Trino) {
8058 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
8059 let cast_x = Expression::Cast(Box::new(Cast {
8060 this: match &x {
8061 Expression::Column(_)
8062 | Expression::Literal(_)
8063 | Expression::Identifier(_) => x,
8064 _ => Expression::Paren(Box::new(Paren {
8065 this: x,
8066 trailing_comments: vec![],
8067 })),
8068 },
8069 to: DataType::Double {
8070 precision: None,
8071 scale: None,
8072 },
8073 trailing_comments: vec![],
8074 double_colon_syntax: false,
8075 format: None,
8076 default: None,
8077 }));
8078 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8079 cast_x,
8080 match &y {
8081 Expression::Column(_)
8082 | Expression::Literal(_)
8083 | Expression::Identifier(_) => y,
8084 _ => Expression::Paren(Box::new(Paren {
8085 this: y,
8086 trailing_comments: vec![],
8087 })),
8088 },
8089 )));
8090 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8091 condition,
8092 true_value: cast_div,
8093 false_value: Some(Expression::Null(Null)),
8094 original_name: None,
8095 })))
8096 } else if matches!(target, DialectType::PostgreSQL) {
8097 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8098 let cast_x = Expression::Cast(Box::new(Cast {
8099 this: match &x {
8100 Expression::Column(_)
8101 | Expression::Literal(_)
8102 | Expression::Identifier(_) => x,
8103 _ => Expression::Paren(Box::new(Paren {
8104 this: x,
8105 trailing_comments: vec![],
8106 })),
8107 },
8108 to: DataType::Custom {
8109 name: "DOUBLE PRECISION".to_string(),
8110 },
8111 trailing_comments: vec![],
8112 double_colon_syntax: false,
8113 format: None,
8114 default: None,
8115 }));
8116 let y_paren = match &y {
8117 Expression::Column(_)
8118 | Expression::Literal(_)
8119 | Expression::Identifier(_) => y,
8120 _ => Expression::Paren(Box::new(Paren {
8121 this: y,
8122 trailing_comments: vec![],
8123 })),
8124 };
8125 let cast_div =
8126 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8127 Ok(Expression::Case(Box::new(Case {
8128 operand: None,
8129 whens: vec![(condition, cast_div)],
8130 else_: Some(Expression::Null(Null)),
8131 comments: Vec::new(),
8132 })))
8133 } else if matches!(target, DialectType::DuckDB) {
8134 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8135 Ok(Expression::Case(Box::new(Case {
8136 operand: None,
8137 whens: vec![(condition, div_expr)],
8138 else_: Some(Expression::Null(Null)),
8139 comments: Vec::new(),
8140 })))
8141 } else if matches!(target, DialectType::Snowflake) {
8142 // Snowflake: IFF(y <> 0, x / y, NULL)
8143 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8144 condition,
8145 true_value: div_expr,
8146 false_value: Some(Expression::Null(Null)),
8147 original_name: Some("IFF".to_string()),
8148 })))
8149 } else {
8150 // All others: IF(y <> 0, x / y, NULL)
8151 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8152 condition,
8153 true_value: div_expr,
8154 false_value: Some(Expression::Null(Null)),
8155 original_name: None,
8156 })))
8157 }
8158 } else {
8159 Ok(e)
8160 }
8161 }
8162
8163 Action::BigQueryLastDayStripUnit => {
8164 if let Expression::LastDay(mut ld) = e {
8165 ld.unit = None; // Strip the unit (MONTH is default)
8166 match target {
8167 DialectType::PostgreSQL => {
8168 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8169 let date_trunc = Expression::Function(Box::new(Function::new(
8170 "DATE_TRUNC".to_string(),
8171 vec![
8172 Expression::Literal(crate::expressions::Literal::String(
8173 "MONTH".to_string(),
8174 )),
8175 ld.this.clone(),
8176 ],
8177 )));
8178 let plus_month =
8179 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8180 date_trunc,
8181 Expression::Interval(Box::new(
8182 crate::expressions::Interval {
8183 this: Some(Expression::Literal(
8184 crate::expressions::Literal::String(
8185 "1 MONTH".to_string(),
8186 ),
8187 )),
8188 unit: None,
8189 },
8190 )),
8191 )));
8192 let minus_day =
8193 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8194 plus_month,
8195 Expression::Interval(Box::new(
8196 crate::expressions::Interval {
8197 this: Some(Expression::Literal(
8198 crate::expressions::Literal::String(
8199 "1 DAY".to_string(),
8200 ),
8201 )),
8202 unit: None,
8203 },
8204 )),
8205 )));
8206 Ok(Expression::Cast(Box::new(Cast {
8207 this: minus_day,
8208 to: DataType::Date,
8209 trailing_comments: vec![],
8210 double_colon_syntax: false,
8211 format: None,
8212 default: None,
8213 })))
8214 }
8215 DialectType::Presto => {
8216 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8217 Ok(Expression::Function(Box::new(Function::new(
8218 "LAST_DAY_OF_MONTH".to_string(),
8219 vec![ld.this],
8220 ))))
8221 }
8222 DialectType::ClickHouse => {
8223 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8224 // Need to wrap the DATE type in Nullable
8225 let nullable_date = match ld.this {
8226 Expression::Cast(mut c) => {
8227 c.to = DataType::Nullable {
8228 inner: Box::new(DataType::Date),
8229 };
8230 Expression::Cast(c)
8231 }
8232 other => other,
8233 };
8234 ld.this = nullable_date;
8235 Ok(Expression::LastDay(ld))
8236 }
8237 _ => Ok(Expression::LastDay(ld)),
8238 }
8239 } else {
8240 Ok(e)
8241 }
8242 }
8243
8244 Action::BigQueryCastFormat => {
8245 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8246 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8247 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8248 let (this, to, format_expr, is_safe) = match e {
8249 Expression::Cast(ref c) if c.format.is_some() => (
8250 c.this.clone(),
8251 c.to.clone(),
8252 c.format.as_ref().unwrap().as_ref().clone(),
8253 false,
8254 ),
8255 Expression::SafeCast(ref c) if c.format.is_some() => (
8256 c.this.clone(),
8257 c.to.clone(),
8258 c.format.as_ref().unwrap().as_ref().clone(),
8259 true,
8260 ),
8261 _ => return Ok(e),
8262 };
8263 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8264 if matches!(target, DialectType::BigQuery) {
8265 match &to {
8266 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8267 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8268 return Ok(e);
8269 }
8270 _ => {}
8271 }
8272 }
8273 // Extract timezone from format if AT TIME ZONE is present
8274 let (actual_format_expr, timezone) = match &format_expr {
8275 Expression::AtTimeZone(ref atz) => {
8276 (atz.this.clone(), Some(atz.zone.clone()))
8277 }
8278 _ => (format_expr.clone(), None),
8279 };
8280 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8281 match target {
8282 DialectType::BigQuery => {
8283 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8284 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8285 let func_name = match &to {
8286 DataType::Date => "PARSE_DATE",
8287 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8288 DataType::Time { .. } => "PARSE_TIMESTAMP",
8289 _ => "PARSE_TIMESTAMP",
8290 };
8291 let mut func_args = vec![strftime_fmt, this];
8292 if let Some(tz) = timezone {
8293 func_args.push(tz);
8294 }
8295 Ok(Expression::Function(Box::new(Function::new(
8296 func_name.to_string(),
8297 func_args,
8298 ))))
8299 }
8300 DialectType::DuckDB => {
8301 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8302 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8303 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8304 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8305 let parse_call = Expression::Function(Box::new(Function::new(
8306 parse_fn_name.to_string(),
8307 vec![this, duck_fmt],
8308 )));
8309 Ok(Expression::Cast(Box::new(Cast {
8310 this: parse_call,
8311 to,
8312 trailing_comments: vec![],
8313 double_colon_syntax: false,
8314 format: None,
8315 default: None,
8316 })))
8317 }
8318 _ => Ok(e),
8319 }
8320 }
8321
8322 Action::BigQueryFunctionNormalize => {
8323 Self::normalize_bigquery_function(e, source, target)
8324 }
8325
8326 Action::BigQueryToHexBare => {
8327 // Not used anymore - handled directly in normalize_bigquery_function
8328 Ok(e)
8329 }
8330
8331 Action::BigQueryToHexLower => {
8332 if let Expression::Lower(uf) = e {
8333 match uf.this {
8334 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8335 Expression::Function(f)
8336 if matches!(target, DialectType::BigQuery)
8337 && f.name == "TO_HEX" =>
8338 {
8339 Ok(Expression::Function(f))
8340 }
8341 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8342 Expression::Lower(inner_uf) => {
8343 if matches!(target, DialectType::BigQuery) {
8344 // BQ->BQ: extract TO_HEX
8345 if let Expression::Function(f) = inner_uf.this {
8346 Ok(Expression::Function(Box::new(Function::new(
8347 "TO_HEX".to_string(),
8348 f.args,
8349 ))))
8350 } else {
8351 Ok(Expression::Lower(inner_uf))
8352 }
8353 } else {
8354 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8355 Ok(Expression::Lower(inner_uf))
8356 }
8357 }
8358 other => {
8359 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8360 this: other,
8361 original_name: None,
8362 })))
8363 }
8364 }
8365 } else {
8366 Ok(e)
8367 }
8368 }
8369
8370 Action::BigQueryToHexUpper => {
8371 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8372 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8373 if let Expression::Upper(uf) = e {
8374 if let Expression::Lower(inner_uf) = uf.this {
8375 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8376 if matches!(target, DialectType::BigQuery) {
8377 // Restore TO_HEX name in inner function
8378 if let Expression::Function(f) = inner_uf.this {
8379 let restored = Expression::Function(Box::new(Function::new(
8380 "TO_HEX".to_string(),
8381 f.args,
8382 )));
8383 Ok(Expression::Upper(Box::new(
8384 crate::expressions::UnaryFunc::new(restored),
8385 )))
8386 } else {
8387 Ok(Expression::Upper(inner_uf))
8388 }
8389 } else {
8390 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8391 Ok(inner_uf.this)
8392 }
8393 } else {
8394 Ok(Expression::Upper(uf))
8395 }
8396 } else {
8397 Ok(e)
8398 }
8399 }
8400
8401 Action::BigQueryAnyValueHaving => {
8402 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8403 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8404 if let Expression::AnyValue(agg) = e {
8405 if let Some((having_expr, is_max)) = agg.having_max {
8406 let func_name = if is_max {
8407 "ARG_MAX_NULL"
8408 } else {
8409 "ARG_MIN_NULL"
8410 };
8411 Ok(Expression::Function(Box::new(Function::new(
8412 func_name.to_string(),
8413 vec![agg.this, *having_expr],
8414 ))))
8415 } else {
8416 Ok(Expression::AnyValue(agg))
8417 }
8418 } else {
8419 Ok(e)
8420 }
8421 }
8422
8423 Action::BigQueryApproxQuantiles => {
8424 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8425 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8426 if let Expression::AggregateFunction(agg) = e {
8427 if agg.args.len() >= 2 {
8428 let x_expr = agg.args[0].clone();
8429 let n_expr = &agg.args[1];
8430
8431 // Extract the numeric value from n_expr
8432 let n = match n_expr {
8433 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8434 s.parse::<usize>().unwrap_or(2)
8435 }
8436 _ => 2,
8437 };
8438
8439 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8440 let mut quantiles = Vec::new();
8441 for i in 0..=n {
8442 let q = i as f64 / n as f64;
8443 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8444 if q == 0.0 {
8445 quantiles.push(Expression::number(0));
8446 } else if q == 1.0 {
8447 quantiles.push(Expression::number(1));
8448 } else {
8449 quantiles.push(Expression::Literal(
8450 crate::expressions::Literal::Number(format!("{}", q)),
8451 ));
8452 }
8453 }
8454
8455 let array_expr =
8456 Expression::Array(Box::new(crate::expressions::Array {
8457 expressions: quantiles,
8458 }));
8459
8460 // Preserve DISTINCT modifier
8461 let mut new_func = Function::new(
8462 "APPROX_QUANTILE".to_string(),
8463 vec![x_expr, array_expr],
8464 );
8465 new_func.distinct = agg.distinct;
8466 Ok(Expression::Function(Box::new(new_func)))
8467 } else {
8468 Ok(Expression::AggregateFunction(agg))
8469 }
8470 } else {
8471 Ok(e)
8472 }
8473 }
8474
8475 Action::GenericFunctionNormalize => {
8476 // Helper closure to convert ARBITRARY to target-specific function
8477 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8478 let name = match target {
8479 DialectType::ClickHouse => "any",
8480 DialectType::TSQL | DialectType::SQLite => "MAX",
8481 DialectType::Hive => "FIRST",
8482 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8483 "ARBITRARY"
8484 }
8485 _ => "ANY_VALUE",
8486 };
8487 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8488 }
8489
8490 if let Expression::Function(f) = e {
8491 let name = f.name.to_uppercase();
8492 match name.as_str() {
8493 "ARBITRARY" if f.args.len() == 1 => {
8494 let arg = f.args.into_iter().next().unwrap();
8495 Ok(convert_arbitrary(arg, target))
8496 }
8497 "TO_NUMBER" if f.args.len() == 1 => {
8498 let arg = f.args.into_iter().next().unwrap();
8499 match target {
8500 DialectType::Oracle | DialectType::Snowflake => {
8501 Ok(Expression::Function(Box::new(Function::new(
8502 "TO_NUMBER".to_string(),
8503 vec![arg],
8504 ))))
8505 }
8506 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8507 this: arg,
8508 to: crate::expressions::DataType::Double {
8509 precision: None,
8510 scale: None,
8511 },
8512 double_colon_syntax: false,
8513 trailing_comments: Vec::new(),
8514 format: None,
8515 default: None,
8516 }))),
8517 }
8518 }
8519 "AGGREGATE" if f.args.len() >= 3 => match target {
8520 DialectType::DuckDB
8521 | DialectType::Hive
8522 | DialectType::Presto
8523 | DialectType::Trino => Ok(Expression::Function(Box::new(
8524 Function::new("REDUCE".to_string(), f.args),
8525 ))),
8526 _ => Ok(Expression::Function(f)),
8527 },
8528 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8529 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8530 if matches!(target, DialectType::DuckDB) {
8531 Ok(Expression::Function(f))
8532 } else {
8533 let mut args = f.args;
8534 let this = args.remove(0);
8535 let pattern = args.remove(0);
8536 let flags = if args.is_empty() {
8537 None
8538 } else {
8539 Some(args.remove(0))
8540 };
8541 Ok(Expression::RegexpLike(Box::new(
8542 crate::expressions::RegexpFunc {
8543 this,
8544 pattern,
8545 flags,
8546 },
8547 )))
8548 }
8549 }
8550 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8551 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8552 if matches!(target, DialectType::DuckDB) {
8553 Ok(Expression::Function(f))
8554 } else {
8555 let mut args = f.args;
8556 let this = args.remove(0);
8557 let pattern = args.remove(0);
8558 let flags = if args.is_empty() {
8559 None
8560 } else {
8561 Some(args.remove(0))
8562 };
8563 Ok(Expression::RegexpLike(Box::new(
8564 crate::expressions::RegexpFunc {
8565 this,
8566 pattern,
8567 flags,
8568 },
8569 )))
8570 }
8571 }
8572 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8573 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8574 let mut args = f.args;
8575 let this = args.remove(0);
8576 let field_expr = args.remove(0);
8577 // Extract string literal to get field name
8578 let field_name = match &field_expr {
8579 Expression::Literal(crate::expressions::Literal::String(s)) => {
8580 s.clone()
8581 }
8582 Expression::Identifier(id) => id.name.clone(),
8583 _ => {
8584 return Ok(Expression::Function(Box::new(Function::new(
8585 "STRUCT_EXTRACT".to_string(),
8586 vec![this, field_expr],
8587 ))))
8588 }
8589 };
8590 Ok(Expression::StructExtract(Box::new(
8591 crate::expressions::StructExtractFunc {
8592 this,
8593 field: crate::expressions::Identifier::new(field_name),
8594 },
8595 )))
8596 }
8597 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8598 "LIST_FILTER" if f.args.len() == 2 => {
8599 let name = match target {
8600 DialectType::DuckDB => "LIST_FILTER",
8601 _ => "FILTER",
8602 };
8603 Ok(Expression::Function(Box::new(Function::new(
8604 name.to_string(),
8605 f.args,
8606 ))))
8607 }
8608 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8609 "LIST_TRANSFORM" if f.args.len() == 2 => {
8610 let name = match target {
8611 DialectType::DuckDB => "LIST_TRANSFORM",
8612 _ => "TRANSFORM",
8613 };
8614 Ok(Expression::Function(Box::new(Function::new(
8615 name.to_string(),
8616 f.args,
8617 ))))
8618 }
8619 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8620 "LIST_SORT" if f.args.len() >= 1 => {
8621 let name = match target {
8622 DialectType::DuckDB
8623 | DialectType::Presto
8624 | DialectType::Trino => "ARRAY_SORT",
8625 _ => "SORT_ARRAY",
8626 };
8627 Ok(Expression::Function(Box::new(Function::new(
8628 name.to_string(),
8629 f.args,
8630 ))))
8631 }
8632 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8633 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8634 match target {
8635 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8636 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8637 ))),
8638 DialectType::Spark
8639 | DialectType::Databricks
8640 | DialectType::Hive => {
8641 let mut args = f.args;
8642 args.push(Expression::Identifier(
8643 crate::expressions::Identifier::new("FALSE"),
8644 ));
8645 Ok(Expression::Function(Box::new(Function::new(
8646 "SORT_ARRAY".to_string(),
8647 args,
8648 ))))
8649 }
8650 DialectType::Presto
8651 | DialectType::Trino
8652 | DialectType::Athena => {
8653 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8654 let arr = f.args.into_iter().next().unwrap();
8655 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8656 parameters: vec![
8657 crate::expressions::Identifier::new("a"),
8658 crate::expressions::Identifier::new("b"),
8659 ],
8660 body: Expression::Case(Box::new(Case {
8661 operand: None,
8662 whens: vec![
8663 (
8664 Expression::Lt(Box::new(BinaryOp::new(
8665 Expression::Identifier(crate::expressions::Identifier::new("a")),
8666 Expression::Identifier(crate::expressions::Identifier::new("b")),
8667 ))),
8668 Expression::number(1),
8669 ),
8670 (
8671 Expression::Gt(Box::new(BinaryOp::new(
8672 Expression::Identifier(crate::expressions::Identifier::new("a")),
8673 Expression::Identifier(crate::expressions::Identifier::new("b")),
8674 ))),
8675 Expression::Literal(Literal::Number("-1".to_string())),
8676 ),
8677 ],
8678 else_: Some(Expression::number(0)),
8679 comments: Vec::new(),
8680 })),
8681 colon: false,
8682 parameter_types: Vec::new(),
8683 }));
8684 Ok(Expression::Function(Box::new(Function::new(
8685 "ARRAY_SORT".to_string(),
8686 vec![arr, lambda],
8687 ))))
8688 }
8689 _ => Ok(Expression::Function(Box::new(Function::new(
8690 "LIST_REVERSE_SORT".to_string(),
8691 f.args,
8692 )))),
8693 }
8694 }
8695 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8696 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8697 let mut args = f.args;
8698 args.push(Expression::string(","));
8699 let name = match target {
8700 DialectType::DuckDB => "STR_SPLIT",
8701 DialectType::Presto | DialectType::Trino => "SPLIT",
8702 DialectType::Spark
8703 | DialectType::Databricks
8704 | DialectType::Hive => "SPLIT",
8705 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8706 DialectType::Redshift => "SPLIT_TO_ARRAY",
8707 _ => "SPLIT",
8708 };
8709 Ok(Expression::Function(Box::new(Function::new(
8710 name.to_string(),
8711 args,
8712 ))))
8713 }
8714 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8715 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8716 let name = match target {
8717 DialectType::DuckDB => "STR_SPLIT",
8718 DialectType::Presto | DialectType::Trino => "SPLIT",
8719 DialectType::Spark
8720 | DialectType::Databricks
8721 | DialectType::Hive => "SPLIT",
8722 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8723 DialectType::Redshift => "SPLIT_TO_ARRAY",
8724 _ => "SPLIT",
8725 };
8726 Ok(Expression::Function(Box::new(Function::new(
8727 name.to_string(),
8728 f.args,
8729 ))))
8730 }
8731 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8732 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8733 let name = match target {
8734 DialectType::DuckDB => "STR_SPLIT",
8735 DialectType::Presto | DialectType::Trino => "SPLIT",
8736 DialectType::Spark
8737 | DialectType::Databricks
8738 | DialectType::Hive => "SPLIT",
8739 DialectType::Doris | DialectType::StarRocks => {
8740 "SPLIT_BY_STRING"
8741 }
8742 DialectType::PostgreSQL | DialectType::Redshift => {
8743 "STRING_TO_ARRAY"
8744 }
8745 _ => "SPLIT",
8746 };
8747 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8748 if matches!(
8749 target,
8750 DialectType::Spark
8751 | DialectType::Databricks
8752 | DialectType::Hive
8753 ) {
8754 let mut args = f.args;
8755 let x = args.remove(0);
8756 let sep = args.remove(0);
8757 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8758 let escaped_sep =
8759 Expression::Function(Box::new(Function::new(
8760 "CONCAT".to_string(),
8761 vec![
8762 Expression::string("\\Q"),
8763 sep,
8764 Expression::string("\\E"),
8765 ],
8766 )));
8767 Ok(Expression::Function(Box::new(Function::new(
8768 name.to_string(),
8769 vec![x, escaped_sep],
8770 ))))
8771 } else {
8772 Ok(Expression::Function(Box::new(Function::new(
8773 name.to_string(),
8774 f.args,
8775 ))))
8776 }
8777 }
8778 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8779 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8780 let name = match target {
8781 DialectType::DuckDB => "STR_SPLIT_REGEX",
8782 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8783 DialectType::Spark
8784 | DialectType::Databricks
8785 | DialectType::Hive => "SPLIT",
8786 _ => "REGEXP_SPLIT",
8787 };
8788 Ok(Expression::Function(Box::new(Function::new(
8789 name.to_string(),
8790 f.args,
8791 ))))
8792 }
8793 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8794 "SPLIT"
8795 if f.args.len() == 2
8796 && matches!(
8797 source,
8798 DialectType::Presto
8799 | DialectType::Trino
8800 | DialectType::Athena
8801 | DialectType::StarRocks
8802 | DialectType::Doris
8803 )
8804 && matches!(
8805 target,
8806 DialectType::Spark
8807 | DialectType::Databricks
8808 | DialectType::Hive
8809 ) =>
8810 {
8811 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8812 let mut args = f.args;
8813 let x = args.remove(0);
8814 let sep = args.remove(0);
8815 let escaped_sep = Expression::Function(Box::new(Function::new(
8816 "CONCAT".to_string(),
8817 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8818 )));
8819 Ok(Expression::Function(Box::new(Function::new(
8820 "SPLIT".to_string(),
8821 vec![x, escaped_sep],
8822 ))))
8823 }
8824 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8825 // For ClickHouse target, preserve original name to maintain camelCase
8826 "SUBSTRINGINDEX" => {
8827 let name = if matches!(target, DialectType::ClickHouse) {
8828 f.name.clone()
8829 } else {
8830 "SUBSTRING_INDEX".to_string()
8831 };
8832 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8833 }
8834 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8835 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8836 // Get the array argument (first arg, drop dimension args)
8837 let mut args = f.args;
8838 let arr = if args.is_empty() {
8839 return Ok(Expression::Function(Box::new(Function::new(
8840 name.to_string(),
8841 args,
8842 ))));
8843 } else {
8844 args.remove(0)
8845 };
8846 let name =
8847 match target {
8848 DialectType::Spark
8849 | DialectType::Databricks
8850 | DialectType::Hive => "SIZE",
8851 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8852 DialectType::BigQuery => "ARRAY_LENGTH",
8853 DialectType::DuckDB => {
8854 // DuckDB: use ARRAY_LENGTH with all args
8855 let mut all_args = vec![arr];
8856 all_args.extend(args);
8857 return Ok(Expression::Function(Box::new(
8858 Function::new("ARRAY_LENGTH".to_string(), all_args),
8859 )));
8860 }
8861 DialectType::PostgreSQL | DialectType::Redshift => {
8862 // Keep ARRAY_LENGTH with dimension arg
8863 let mut all_args = vec![arr];
8864 all_args.extend(args);
8865 return Ok(Expression::Function(Box::new(
8866 Function::new("ARRAY_LENGTH".to_string(), all_args),
8867 )));
8868 }
8869 DialectType::ClickHouse => "LENGTH",
8870 _ => "ARRAY_LENGTH",
8871 };
8872 Ok(Expression::Function(Box::new(Function::new(
8873 name.to_string(),
8874 vec![arr],
8875 ))))
8876 }
8877 // UNICODE(x) -> target-specific codepoint function
8878 "UNICODE" if f.args.len() == 1 => {
8879 match target {
8880 DialectType::SQLite | DialectType::DuckDB => {
8881 Ok(Expression::Function(Box::new(Function::new(
8882 "UNICODE".to_string(),
8883 f.args,
8884 ))))
8885 }
8886 DialectType::Oracle => {
8887 // ASCII(UNISTR(x))
8888 let inner = Expression::Function(Box::new(Function::new(
8889 "UNISTR".to_string(),
8890 f.args,
8891 )));
8892 Ok(Expression::Function(Box::new(Function::new(
8893 "ASCII".to_string(),
8894 vec![inner],
8895 ))))
8896 }
8897 DialectType::MySQL => {
8898 // ORD(CONVERT(x USING utf32))
8899 let arg = f.args.into_iter().next().unwrap();
8900 let convert_expr = Expression::ConvertToCharset(Box::new(
8901 crate::expressions::ConvertToCharset {
8902 this: Box::new(arg),
8903 dest: Some(Box::new(Expression::Identifier(
8904 crate::expressions::Identifier::new("utf32"),
8905 ))),
8906 source: None,
8907 },
8908 ));
8909 Ok(Expression::Function(Box::new(Function::new(
8910 "ORD".to_string(),
8911 vec![convert_expr],
8912 ))))
8913 }
8914 _ => Ok(Expression::Function(Box::new(Function::new(
8915 "ASCII".to_string(),
8916 f.args,
8917 )))),
8918 }
8919 }
8920 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
8921 "XOR" if f.args.len() >= 2 => {
8922 match target {
8923 DialectType::ClickHouse => {
8924 // ClickHouse: keep as xor() function with lowercase name
8925 Ok(Expression::Function(Box::new(Function::new(
8926 "xor".to_string(),
8927 f.args,
8928 ))))
8929 }
8930 DialectType::Presto | DialectType::Trino => {
8931 if f.args.len() == 2 {
8932 Ok(Expression::Function(Box::new(Function::new(
8933 "BITWISE_XOR".to_string(),
8934 f.args,
8935 ))))
8936 } else {
8937 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
8938 let mut args = f.args;
8939 let first = args.remove(0);
8940 let second = args.remove(0);
8941 let mut result =
8942 Expression::Function(Box::new(Function::new(
8943 "BITWISE_XOR".to_string(),
8944 vec![first, second],
8945 )));
8946 for arg in args {
8947 result =
8948 Expression::Function(Box::new(Function::new(
8949 "BITWISE_XOR".to_string(),
8950 vec![result, arg],
8951 )));
8952 }
8953 Ok(result)
8954 }
8955 }
8956 DialectType::MySQL
8957 | DialectType::SingleStore
8958 | DialectType::Doris
8959 | DialectType::StarRocks => {
8960 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
8961 let args = f.args;
8962 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
8963 this: None,
8964 expression: None,
8965 expressions: args,
8966 })))
8967 }
8968 DialectType::PostgreSQL | DialectType::Redshift => {
8969 // PostgreSQL: a # b (hash operator for XOR)
8970 let mut args = f.args;
8971 let first = args.remove(0);
8972 let second = args.remove(0);
8973 let mut result = Expression::BitwiseXor(Box::new(
8974 BinaryOp::new(first, second),
8975 ));
8976 for arg in args {
8977 result = Expression::BitwiseXor(Box::new(
8978 BinaryOp::new(result, arg),
8979 ));
8980 }
8981 Ok(result)
8982 }
8983 DialectType::DuckDB => {
8984 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
8985 Ok(Expression::Function(Box::new(Function::new(
8986 "XOR".to_string(),
8987 f.args,
8988 ))))
8989 }
8990 DialectType::BigQuery => {
8991 // BigQuery: a ^ b (caret operator for XOR)
8992 let mut args = f.args;
8993 let first = args.remove(0);
8994 let second = args.remove(0);
8995 let mut result = Expression::BitwiseXor(Box::new(
8996 BinaryOp::new(first, second),
8997 ));
8998 for arg in args {
8999 result = Expression::BitwiseXor(Box::new(
9000 BinaryOp::new(result, arg),
9001 ));
9002 }
9003 Ok(result)
9004 }
9005 _ => Ok(Expression::Function(Box::new(Function::new(
9006 "XOR".to_string(),
9007 f.args,
9008 )))),
9009 }
9010 }
9011 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9012 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
9013 match target {
9014 DialectType::Spark
9015 | DialectType::Databricks
9016 | DialectType::Hive => {
9017 let mut args = f.args;
9018 args.push(Expression::Identifier(
9019 crate::expressions::Identifier::new("FALSE"),
9020 ));
9021 Ok(Expression::Function(Box::new(Function::new(
9022 "SORT_ARRAY".to_string(),
9023 args,
9024 ))))
9025 }
9026 DialectType::Presto
9027 | DialectType::Trino
9028 | DialectType::Athena => {
9029 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9030 let arr = f.args.into_iter().next().unwrap();
9031 let lambda = Expression::Lambda(Box::new(
9032 crate::expressions::LambdaExpr {
9033 parameters: vec![
9034 Identifier::new("a"),
9035 Identifier::new("b"),
9036 ],
9037 colon: false,
9038 parameter_types: Vec::new(),
9039 body: Expression::Case(Box::new(Case {
9040 operand: None,
9041 whens: vec![
9042 (
9043 Expression::Lt(Box::new(
9044 BinaryOp::new(
9045 Expression::Identifier(
9046 Identifier::new("a"),
9047 ),
9048 Expression::Identifier(
9049 Identifier::new("b"),
9050 ),
9051 ),
9052 )),
9053 Expression::number(1),
9054 ),
9055 (
9056 Expression::Gt(Box::new(
9057 BinaryOp::new(
9058 Expression::Identifier(
9059 Identifier::new("a"),
9060 ),
9061 Expression::Identifier(
9062 Identifier::new("b"),
9063 ),
9064 ),
9065 )),
9066 Expression::Neg(Box::new(
9067 crate::expressions::UnaryOp {
9068 this: Expression::number(1),
9069 },
9070 )),
9071 ),
9072 ],
9073 else_: Some(Expression::number(0)),
9074 comments: Vec::new(),
9075 })),
9076 },
9077 ));
9078 Ok(Expression::Function(Box::new(Function::new(
9079 "ARRAY_SORT".to_string(),
9080 vec![arr, lambda],
9081 ))))
9082 }
9083 _ => Ok(Expression::Function(Box::new(Function::new(
9084 "ARRAY_REVERSE_SORT".to_string(),
9085 f.args,
9086 )))),
9087 }
9088 }
9089 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9090 "ENCODE" if f.args.len() == 1 => match target {
9091 DialectType::Spark
9092 | DialectType::Databricks
9093 | DialectType::Hive => {
9094 let mut args = f.args;
9095 args.push(Expression::string("utf-8"));
9096 Ok(Expression::Function(Box::new(Function::new(
9097 "ENCODE".to_string(),
9098 args,
9099 ))))
9100 }
9101 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9102 Ok(Expression::Function(Box::new(Function::new(
9103 "TO_UTF8".to_string(),
9104 f.args,
9105 ))))
9106 }
9107 _ => Ok(Expression::Function(Box::new(Function::new(
9108 "ENCODE".to_string(),
9109 f.args,
9110 )))),
9111 },
9112 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9113 "DECODE" if f.args.len() == 1 => match target {
9114 DialectType::Spark
9115 | DialectType::Databricks
9116 | DialectType::Hive => {
9117 let mut args = f.args;
9118 args.push(Expression::string("utf-8"));
9119 Ok(Expression::Function(Box::new(Function::new(
9120 "DECODE".to_string(),
9121 args,
9122 ))))
9123 }
9124 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9125 Ok(Expression::Function(Box::new(Function::new(
9126 "FROM_UTF8".to_string(),
9127 f.args,
9128 ))))
9129 }
9130 _ => Ok(Expression::Function(Box::new(Function::new(
9131 "DECODE".to_string(),
9132 f.args,
9133 )))),
9134 },
9135 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9136 "QUANTILE" if f.args.len() == 2 => {
9137 let name = match target {
9138 DialectType::Spark
9139 | DialectType::Databricks
9140 | DialectType::Hive => "PERCENTILE",
9141 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9142 DialectType::BigQuery => "PERCENTILE_CONT",
9143 _ => "QUANTILE",
9144 };
9145 Ok(Expression::Function(Box::new(Function::new(
9146 name.to_string(),
9147 f.args,
9148 ))))
9149 }
9150 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9151 "QUANTILE_CONT" if f.args.len() == 2 => {
9152 let mut args = f.args;
9153 let column = args.remove(0);
9154 let quantile = args.remove(0);
9155 match target {
9156 DialectType::DuckDB => {
9157 Ok(Expression::Function(Box::new(Function::new(
9158 "QUANTILE_CONT".to_string(),
9159 vec![column, quantile],
9160 ))))
9161 }
9162 DialectType::PostgreSQL
9163 | DialectType::Redshift
9164 | DialectType::Snowflake => {
9165 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9166 let inner = Expression::PercentileCont(Box::new(
9167 crate::expressions::PercentileFunc {
9168 this: column.clone(),
9169 percentile: quantile,
9170 order_by: None,
9171 filter: None,
9172 },
9173 ));
9174 Ok(Expression::WithinGroup(Box::new(
9175 crate::expressions::WithinGroup {
9176 this: inner,
9177 order_by: vec![crate::expressions::Ordered {
9178 this: column,
9179 desc: false,
9180 nulls_first: None,
9181 explicit_asc: false,
9182 with_fill: None,
9183 }],
9184 },
9185 )))
9186 }
9187 _ => Ok(Expression::Function(Box::new(Function::new(
9188 "QUANTILE_CONT".to_string(),
9189 vec![column, quantile],
9190 )))),
9191 }
9192 }
9193 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9194 "QUANTILE_DISC" if f.args.len() == 2 => {
9195 let mut args = f.args;
9196 let column = args.remove(0);
9197 let quantile = args.remove(0);
9198 match target {
9199 DialectType::DuckDB => {
9200 Ok(Expression::Function(Box::new(Function::new(
9201 "QUANTILE_DISC".to_string(),
9202 vec![column, quantile],
9203 ))))
9204 }
9205 DialectType::PostgreSQL
9206 | DialectType::Redshift
9207 | DialectType::Snowflake => {
9208 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9209 let inner = Expression::PercentileDisc(Box::new(
9210 crate::expressions::PercentileFunc {
9211 this: column.clone(),
9212 percentile: quantile,
9213 order_by: None,
9214 filter: None,
9215 },
9216 ));
9217 Ok(Expression::WithinGroup(Box::new(
9218 crate::expressions::WithinGroup {
9219 this: inner,
9220 order_by: vec![crate::expressions::Ordered {
9221 this: column,
9222 desc: false,
9223 nulls_first: None,
9224 explicit_asc: false,
9225 with_fill: None,
9226 }],
9227 },
9228 )))
9229 }
9230 _ => Ok(Expression::Function(Box::new(Function::new(
9231 "QUANTILE_DISC".to_string(),
9232 vec![column, quantile],
9233 )))),
9234 }
9235 }
9236 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9237 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9238 let name = match target {
9239 DialectType::Presto
9240 | DialectType::Trino
9241 | DialectType::Athena => "APPROX_PERCENTILE",
9242 DialectType::Spark
9243 | DialectType::Databricks
9244 | DialectType::Hive => "PERCENTILE_APPROX",
9245 DialectType::DuckDB => "APPROX_QUANTILE",
9246 DialectType::PostgreSQL | DialectType::Redshift => {
9247 "PERCENTILE_CONT"
9248 }
9249 _ => &f.name,
9250 };
9251 Ok(Expression::Function(Box::new(Function::new(
9252 name.to_string(),
9253 f.args,
9254 ))))
9255 }
9256 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9257 "EPOCH" if f.args.len() == 1 => {
9258 let name = match target {
9259 DialectType::Spark
9260 | DialectType::Databricks
9261 | DialectType::Hive => "UNIX_TIMESTAMP",
9262 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9263 _ => "EPOCH",
9264 };
9265 Ok(Expression::Function(Box::new(Function::new(
9266 name.to_string(),
9267 f.args,
9268 ))))
9269 }
9270 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9271 "EPOCH_MS" if f.args.len() == 1 => {
9272 match target {
9273 DialectType::Spark | DialectType::Databricks => {
9274 Ok(Expression::Function(Box::new(Function::new(
9275 "TIMESTAMP_MILLIS".to_string(),
9276 f.args,
9277 ))))
9278 }
9279 DialectType::Hive => {
9280 // Hive: FROM_UNIXTIME(x / 1000)
9281 let arg = f.args.into_iter().next().unwrap();
9282 let div_expr = Expression::Div(Box::new(
9283 crate::expressions::BinaryOp::new(
9284 arg,
9285 Expression::number(1000),
9286 ),
9287 ));
9288 Ok(Expression::Function(Box::new(Function::new(
9289 "FROM_UNIXTIME".to_string(),
9290 vec![div_expr],
9291 ))))
9292 }
9293 DialectType::Presto | DialectType::Trino => {
9294 Ok(Expression::Function(Box::new(Function::new(
9295 "FROM_UNIXTIME".to_string(),
9296 vec![Expression::Div(Box::new(
9297 crate::expressions::BinaryOp::new(
9298 f.args.into_iter().next().unwrap(),
9299 Expression::number(1000),
9300 ),
9301 ))],
9302 ))))
9303 }
9304 _ => Ok(Expression::Function(Box::new(Function::new(
9305 "EPOCH_MS".to_string(),
9306 f.args,
9307 )))),
9308 }
9309 }
9310 // HASHBYTES('algorithm', x) -> target-specific hash function
9311 "HASHBYTES" if f.args.len() == 2 => {
9312 // Keep HASHBYTES as-is for TSQL target
9313 if matches!(target, DialectType::TSQL) {
9314 return Ok(Expression::Function(f));
9315 }
9316 let algo_expr = &f.args[0];
9317 let algo = match algo_expr {
9318 Expression::Literal(crate::expressions::Literal::String(s)) => {
9319 s.to_uppercase()
9320 }
9321 _ => return Ok(Expression::Function(f)),
9322 };
9323 let data_arg = f.args.into_iter().nth(1).unwrap();
9324 match algo.as_str() {
9325 "SHA1" => {
9326 let name = match target {
9327 DialectType::Spark | DialectType::Databricks => "SHA",
9328 DialectType::Hive => "SHA1",
9329 _ => "SHA1",
9330 };
9331 Ok(Expression::Function(Box::new(Function::new(
9332 name.to_string(),
9333 vec![data_arg],
9334 ))))
9335 }
9336 "SHA2_256" => {
9337 Ok(Expression::Function(Box::new(Function::new(
9338 "SHA2".to_string(),
9339 vec![data_arg, Expression::number(256)],
9340 ))))
9341 }
9342 "SHA2_512" => {
9343 Ok(Expression::Function(Box::new(Function::new(
9344 "SHA2".to_string(),
9345 vec![data_arg, Expression::number(512)],
9346 ))))
9347 }
9348 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9349 "MD5".to_string(),
9350 vec![data_arg],
9351 )))),
9352 _ => Ok(Expression::Function(Box::new(Function::new(
9353 "HASHBYTES".to_string(),
9354 vec![Expression::string(&algo), data_arg],
9355 )))),
9356 }
9357 }
9358 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9359 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9360 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9361 let mut args = f.args;
9362 let json_expr = args.remove(0);
9363 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9364 let mut json_path = "$".to_string();
9365 for a in &args {
9366 match a {
9367 Expression::Literal(
9368 crate::expressions::Literal::String(s),
9369 ) => {
9370 // Numeric string keys become array indices: [0]
9371 if s.chars().all(|c| c.is_ascii_digit()) {
9372 json_path.push('[');
9373 json_path.push_str(s);
9374 json_path.push(']');
9375 } else {
9376 json_path.push('.');
9377 json_path.push_str(s);
9378 }
9379 }
9380 _ => {
9381 json_path.push_str(".?");
9382 }
9383 }
9384 }
9385 match target {
9386 DialectType::Spark
9387 | DialectType::Databricks
9388 | DialectType::Hive => {
9389 Ok(Expression::Function(Box::new(Function::new(
9390 "GET_JSON_OBJECT".to_string(),
9391 vec![json_expr, Expression::string(&json_path)],
9392 ))))
9393 }
9394 DialectType::Presto | DialectType::Trino => {
9395 let func_name = if is_text {
9396 "JSON_EXTRACT_SCALAR"
9397 } else {
9398 "JSON_EXTRACT"
9399 };
9400 Ok(Expression::Function(Box::new(Function::new(
9401 func_name.to_string(),
9402 vec![json_expr, Expression::string(&json_path)],
9403 ))))
9404 }
9405 DialectType::BigQuery | DialectType::MySQL => {
9406 let func_name = if is_text {
9407 "JSON_EXTRACT_SCALAR"
9408 } else {
9409 "JSON_EXTRACT"
9410 };
9411 Ok(Expression::Function(Box::new(Function::new(
9412 func_name.to_string(),
9413 vec![json_expr, Expression::string(&json_path)],
9414 ))))
9415 }
9416 DialectType::PostgreSQL | DialectType::Materialize => {
9417 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9418 let func_name = if is_text {
9419 "JSON_EXTRACT_PATH_TEXT"
9420 } else {
9421 "JSON_EXTRACT_PATH"
9422 };
9423 let mut new_args = vec![json_expr];
9424 new_args.extend(args);
9425 Ok(Expression::Function(Box::new(Function::new(
9426 func_name.to_string(),
9427 new_args,
9428 ))))
9429 }
9430 DialectType::DuckDB | DialectType::SQLite => {
9431 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9432 if is_text {
9433 Ok(Expression::JsonExtractScalar(Box::new(
9434 crate::expressions::JsonExtractFunc {
9435 this: json_expr,
9436 path: Expression::string(&json_path),
9437 returning: None,
9438 arrow_syntax: true,
9439 hash_arrow_syntax: false,
9440 wrapper_option: None,
9441 quotes_option: None,
9442 on_scalar_string: false,
9443 on_error: None,
9444 },
9445 )))
9446 } else {
9447 Ok(Expression::JsonExtract(Box::new(
9448 crate::expressions::JsonExtractFunc {
9449 this: json_expr,
9450 path: Expression::string(&json_path),
9451 returning: None,
9452 arrow_syntax: true,
9453 hash_arrow_syntax: false,
9454 wrapper_option: None,
9455 quotes_option: None,
9456 on_scalar_string: false,
9457 on_error: None,
9458 },
9459 )))
9460 }
9461 }
9462 DialectType::Redshift => {
9463 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9464 let mut new_args = vec![json_expr];
9465 new_args.extend(args);
9466 Ok(Expression::Function(Box::new(Function::new(
9467 "JSON_EXTRACT_PATH_TEXT".to_string(),
9468 new_args,
9469 ))))
9470 }
9471 DialectType::TSQL => {
9472 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9473 let jq = Expression::Function(Box::new(Function::new(
9474 "JSON_QUERY".to_string(),
9475 vec![json_expr.clone(), Expression::string(&json_path)],
9476 )));
9477 let jv = Expression::Function(Box::new(Function::new(
9478 "JSON_VALUE".to_string(),
9479 vec![json_expr, Expression::string(&json_path)],
9480 )));
9481 Ok(Expression::Function(Box::new(Function::new(
9482 "ISNULL".to_string(),
9483 vec![jq, jv],
9484 ))))
9485 }
9486 DialectType::ClickHouse => {
9487 let func_name = if is_text {
9488 "JSONExtractString"
9489 } else {
9490 "JSONExtractRaw"
9491 };
9492 let mut new_args = vec![json_expr];
9493 new_args.extend(args);
9494 Ok(Expression::Function(Box::new(Function::new(
9495 func_name.to_string(),
9496 new_args,
9497 ))))
9498 }
9499 _ => {
9500 let func_name = if is_text {
9501 "JSON_EXTRACT_SCALAR"
9502 } else {
9503 "JSON_EXTRACT"
9504 };
9505 Ok(Expression::Function(Box::new(Function::new(
9506 func_name.to_string(),
9507 vec![json_expr, Expression::string(&json_path)],
9508 ))))
9509 }
9510 }
9511 }
9512 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9513 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9514 let name = match target {
9515 DialectType::Spark
9516 | DialectType::Databricks
9517 | DialectType::Hive
9518 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9519 _ => "APPROX_DISTINCT",
9520 };
9521 let mut args = f.args;
9522 // Hive doesn't support the accuracy parameter
9523 if name == "APPROX_COUNT_DISTINCT"
9524 && matches!(target, DialectType::Hive)
9525 {
9526 args.truncate(1);
9527 }
9528 Ok(Expression::Function(Box::new(Function::new(
9529 name.to_string(),
9530 args,
9531 ))))
9532 }
9533 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9534 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9535 // Determine source default group index
9536 let source_default = match source {
9537 DialectType::Presto
9538 | DialectType::Trino
9539 | DialectType::DuckDB => 0,
9540 _ => 1, // Hive/Spark/Databricks default = 1
9541 };
9542 // Determine target default group index
9543 let target_default = match target {
9544 DialectType::Presto
9545 | DialectType::Trino
9546 | DialectType::DuckDB
9547 | DialectType::BigQuery => 0,
9548 DialectType::Snowflake => {
9549 // Snowflake uses REGEXP_SUBSTR
9550 return Ok(Expression::Function(Box::new(Function::new(
9551 "REGEXP_SUBSTR".to_string(),
9552 f.args,
9553 ))));
9554 }
9555 _ => 1, // Hive/Spark/Databricks default = 1
9556 };
9557 if source_default != target_default {
9558 let mut args = f.args;
9559 args.push(Expression::number(source_default));
9560 Ok(Expression::Function(Box::new(Function::new(
9561 "REGEXP_EXTRACT".to_string(),
9562 args,
9563 ))))
9564 } else {
9565 Ok(Expression::Function(Box::new(Function::new(
9566 "REGEXP_EXTRACT".to_string(),
9567 f.args,
9568 ))))
9569 }
9570 }
9571 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9572 "RLIKE" if f.args.len() == 2 => {
9573 let mut args = f.args;
9574 let str_expr = args.remove(0);
9575 let pattern = args.remove(0);
9576 match target {
9577 DialectType::DuckDB => {
9578 // REGEXP_MATCHES(str, pattern)
9579 Ok(Expression::Function(Box::new(Function::new(
9580 "REGEXP_MATCHES".to_string(),
9581 vec![str_expr, pattern],
9582 ))))
9583 }
9584 _ => {
9585 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9586 Ok(Expression::RegexpLike(Box::new(
9587 crate::expressions::RegexpFunc {
9588 this: str_expr,
9589 pattern,
9590 flags: None,
9591 },
9592 )))
9593 }
9594 }
9595 }
9596 // EOMONTH(date[, month_offset]) -> target-specific
9597 "EOMONTH" if f.args.len() >= 1 => {
9598 let mut args = f.args;
9599 let date_arg = args.remove(0);
9600 let month_offset = if !args.is_empty() {
9601 Some(args.remove(0))
9602 } else {
9603 None
9604 };
9605
9606 // Helper: wrap date in CAST to DATE
9607 let cast_to_date = |e: Expression| -> Expression {
9608 Expression::Cast(Box::new(Cast {
9609 this: e,
9610 to: DataType::Date,
9611 trailing_comments: vec![],
9612 double_colon_syntax: false,
9613 format: None,
9614 default: None,
9615 }))
9616 };
9617
9618 match target {
9619 DialectType::TSQL | DialectType::Fabric => {
9620 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9621 let date = cast_to_date(date_arg);
9622 let date = if let Some(offset) = month_offset {
9623 Expression::Function(Box::new(Function::new(
9624 "DATEADD".to_string(),
9625 vec![
9626 Expression::Identifier(Identifier::new(
9627 "MONTH",
9628 )),
9629 offset,
9630 date,
9631 ],
9632 )))
9633 } else {
9634 date
9635 };
9636 Ok(Expression::Function(Box::new(Function::new(
9637 "EOMONTH".to_string(),
9638 vec![date],
9639 ))))
9640 }
9641 DialectType::Presto
9642 | DialectType::Trino
9643 | DialectType::Athena => {
9644 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9645 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9646 let cast_ts = Expression::Cast(Box::new(Cast {
9647 this: date_arg,
9648 to: DataType::Timestamp {
9649 timezone: false,
9650 precision: None,
9651 },
9652 trailing_comments: vec![],
9653 double_colon_syntax: false,
9654 format: None,
9655 default: None,
9656 }));
9657 let date = cast_to_date(cast_ts);
9658 let date = if let Some(offset) = month_offset {
9659 Expression::Function(Box::new(Function::new(
9660 "DATE_ADD".to_string(),
9661 vec![Expression::string("MONTH"), offset, date],
9662 )))
9663 } else {
9664 date
9665 };
9666 Ok(Expression::Function(Box::new(Function::new(
9667 "LAST_DAY_OF_MONTH".to_string(),
9668 vec![date],
9669 ))))
9670 }
9671 DialectType::PostgreSQL => {
9672 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9673 let date = cast_to_date(date_arg);
9674 let date = if let Some(offset) = month_offset {
9675 let interval_str = format!(
9676 "{} MONTH",
9677 Self::expr_to_string_static(&offset)
9678 );
9679 Expression::Add(Box::new(
9680 crate::expressions::BinaryOp::new(
9681 date,
9682 Expression::Interval(Box::new(
9683 crate::expressions::Interval {
9684 this: Some(Expression::string(
9685 &interval_str,
9686 )),
9687 unit: None,
9688 },
9689 )),
9690 ),
9691 ))
9692 } else {
9693 date
9694 };
9695 let truncated =
9696 Expression::Function(Box::new(Function::new(
9697 "DATE_TRUNC".to_string(),
9698 vec![Expression::string("MONTH"), date],
9699 )));
9700 let plus_month = Expression::Add(Box::new(
9701 crate::expressions::BinaryOp::new(
9702 truncated,
9703 Expression::Interval(Box::new(
9704 crate::expressions::Interval {
9705 this: Some(Expression::string("1 MONTH")),
9706 unit: None,
9707 },
9708 )),
9709 ),
9710 ));
9711 let minus_day = Expression::Sub(Box::new(
9712 crate::expressions::BinaryOp::new(
9713 plus_month,
9714 Expression::Interval(Box::new(
9715 crate::expressions::Interval {
9716 this: Some(Expression::string("1 DAY")),
9717 unit: None,
9718 },
9719 )),
9720 ),
9721 ));
9722 Ok(Expression::Cast(Box::new(Cast {
9723 this: minus_day,
9724 to: DataType::Date,
9725 trailing_comments: vec![],
9726 double_colon_syntax: false,
9727 format: None,
9728 default: None,
9729 })))
9730 }
9731 DialectType::DuckDB => {
9732 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9733 let date = cast_to_date(date_arg);
9734 let date = if let Some(offset) = month_offset {
9735 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9736 let interval_val =
9737 if matches!(&offset, Expression::Neg(_)) {
9738 Expression::Paren(Box::new(
9739 crate::expressions::Paren {
9740 this: offset,
9741 trailing_comments: Vec::new(),
9742 },
9743 ))
9744 } else {
9745 offset
9746 };
9747 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9748 date,
9749 Expression::Interval(Box::new(crate::expressions::Interval {
9750 this: Some(interval_val),
9751 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9752 unit: crate::expressions::IntervalUnit::Month,
9753 use_plural: false,
9754 }),
9755 })),
9756 )))
9757 } else {
9758 date
9759 };
9760 Ok(Expression::Function(Box::new(Function::new(
9761 "LAST_DAY".to_string(),
9762 vec![date],
9763 ))))
9764 }
9765 DialectType::Snowflake | DialectType::Redshift => {
9766 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9767 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9768 let date = if matches!(target, DialectType::Snowflake) {
9769 Expression::Function(Box::new(Function::new(
9770 "TO_DATE".to_string(),
9771 vec![date_arg],
9772 )))
9773 } else {
9774 cast_to_date(date_arg)
9775 };
9776 let date = if let Some(offset) = month_offset {
9777 Expression::Function(Box::new(Function::new(
9778 "DATEADD".to_string(),
9779 vec![
9780 Expression::Identifier(Identifier::new(
9781 "MONTH",
9782 )),
9783 offset,
9784 date,
9785 ],
9786 )))
9787 } else {
9788 date
9789 };
9790 Ok(Expression::Function(Box::new(Function::new(
9791 "LAST_DAY".to_string(),
9792 vec![date],
9793 ))))
9794 }
9795 DialectType::Spark | DialectType::Databricks => {
9796 // Spark: LAST_DAY(TO_DATE(date))
9797 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9798 let date = Expression::Function(Box::new(Function::new(
9799 "TO_DATE".to_string(),
9800 vec![date_arg],
9801 )));
9802 let date = if let Some(offset) = month_offset {
9803 Expression::Function(Box::new(Function::new(
9804 "ADD_MONTHS".to_string(),
9805 vec![date, offset],
9806 )))
9807 } else {
9808 date
9809 };
9810 Ok(Expression::Function(Box::new(Function::new(
9811 "LAST_DAY".to_string(),
9812 vec![date],
9813 ))))
9814 }
9815 DialectType::MySQL => {
9816 // MySQL: LAST_DAY(DATE(date)) - no offset
9817 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9818 let date = if let Some(offset) = month_offset {
9819 let iu = crate::expressions::IntervalUnit::Month;
9820 Expression::DateAdd(Box::new(
9821 crate::expressions::DateAddFunc {
9822 this: date_arg,
9823 interval: offset,
9824 unit: iu,
9825 },
9826 ))
9827 } else {
9828 Expression::Function(Box::new(Function::new(
9829 "DATE".to_string(),
9830 vec![date_arg],
9831 )))
9832 };
9833 Ok(Expression::Function(Box::new(Function::new(
9834 "LAST_DAY".to_string(),
9835 vec![date],
9836 ))))
9837 }
9838 DialectType::BigQuery => {
9839 // BigQuery: LAST_DAY(CAST(date AS DATE))
9840 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9841 let date = cast_to_date(date_arg);
9842 let date = if let Some(offset) = month_offset {
9843 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9844 this: Some(offset),
9845 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9846 unit: crate::expressions::IntervalUnit::Month,
9847 use_plural: false,
9848 }),
9849 }));
9850 Expression::Function(Box::new(Function::new(
9851 "DATE_ADD".to_string(),
9852 vec![date, interval],
9853 )))
9854 } else {
9855 date
9856 };
9857 Ok(Expression::Function(Box::new(Function::new(
9858 "LAST_DAY".to_string(),
9859 vec![date],
9860 ))))
9861 }
9862 DialectType::ClickHouse => {
9863 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9864 let date = Expression::Cast(Box::new(Cast {
9865 this: date_arg,
9866 to: DataType::Nullable {
9867 inner: Box::new(DataType::Date),
9868 },
9869 trailing_comments: vec![],
9870 double_colon_syntax: false,
9871 format: None,
9872 default: None,
9873 }));
9874 let date = if let Some(offset) = month_offset {
9875 Expression::Function(Box::new(Function::new(
9876 "DATE_ADD".to_string(),
9877 vec![
9878 Expression::Identifier(Identifier::new(
9879 "MONTH",
9880 )),
9881 offset,
9882 date,
9883 ],
9884 )))
9885 } else {
9886 date
9887 };
9888 Ok(Expression::Function(Box::new(Function::new(
9889 "LAST_DAY".to_string(),
9890 vec![date],
9891 ))))
9892 }
9893 DialectType::Hive => {
9894 // Hive: LAST_DAY(date)
9895 let date = if let Some(offset) = month_offset {
9896 Expression::Function(Box::new(Function::new(
9897 "ADD_MONTHS".to_string(),
9898 vec![date_arg, offset],
9899 )))
9900 } else {
9901 date_arg
9902 };
9903 Ok(Expression::Function(Box::new(Function::new(
9904 "LAST_DAY".to_string(),
9905 vec![date],
9906 ))))
9907 }
9908 _ => {
9909 // Default: LAST_DAY(date)
9910 let date = if let Some(offset) = month_offset {
9911 let unit =
9912 Expression::Identifier(Identifier::new("MONTH"));
9913 Expression::Function(Box::new(Function::new(
9914 "DATEADD".to_string(),
9915 vec![unit, offset, date_arg],
9916 )))
9917 } else {
9918 date_arg
9919 };
9920 Ok(Expression::Function(Box::new(Function::new(
9921 "LAST_DAY".to_string(),
9922 vec![date],
9923 ))))
9924 }
9925 }
9926 }
9927 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
9928 "LAST_DAY" | "LAST_DAY_OF_MONTH"
9929 if !matches!(source, DialectType::BigQuery)
9930 && f.args.len() >= 1 =>
9931 {
9932 let first_arg = f.args.into_iter().next().unwrap();
9933 match target {
9934 DialectType::TSQL | DialectType::Fabric => {
9935 Ok(Expression::Function(Box::new(Function::new(
9936 "EOMONTH".to_string(),
9937 vec![first_arg],
9938 ))))
9939 }
9940 DialectType::Presto
9941 | DialectType::Trino
9942 | DialectType::Athena => {
9943 Ok(Expression::Function(Box::new(Function::new(
9944 "LAST_DAY_OF_MONTH".to_string(),
9945 vec![first_arg],
9946 ))))
9947 }
9948 _ => Ok(Expression::Function(Box::new(Function::new(
9949 "LAST_DAY".to_string(),
9950 vec![first_arg],
9951 )))),
9952 }
9953 }
9954 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
9955 "MAP"
9956 if f.args.len() == 2
9957 && matches!(
9958 source,
9959 DialectType::Presto
9960 | DialectType::Trino
9961 | DialectType::Athena
9962 ) =>
9963 {
9964 let keys_arg = f.args[0].clone();
9965 let vals_arg = f.args[1].clone();
9966
9967 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
9968 fn extract_array_elements(
9969 expr: &Expression,
9970 ) -> Option<&Vec<Expression>> {
9971 match expr {
9972 Expression::Array(arr) => Some(&arr.expressions),
9973 Expression::ArrayFunc(arr) => Some(&arr.expressions),
9974 Expression::Function(f)
9975 if f.name.eq_ignore_ascii_case("ARRAY") =>
9976 {
9977 Some(&f.args)
9978 }
9979 _ => None,
9980 }
9981 }
9982
9983 match target {
9984 DialectType::Spark | DialectType::Databricks => {
9985 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
9986 Ok(Expression::Function(Box::new(Function::new(
9987 "MAP_FROM_ARRAYS".to_string(),
9988 f.args,
9989 ))))
9990 }
9991 DialectType::Hive => {
9992 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
9993 if let (Some(keys), Some(vals)) = (
9994 extract_array_elements(&keys_arg),
9995 extract_array_elements(&vals_arg),
9996 ) {
9997 if keys.len() == vals.len() {
9998 let mut interleaved = Vec::new();
9999 for (k, v) in keys.iter().zip(vals.iter()) {
10000 interleaved.push(k.clone());
10001 interleaved.push(v.clone());
10002 }
10003 Ok(Expression::Function(Box::new(Function::new(
10004 "MAP".to_string(),
10005 interleaved,
10006 ))))
10007 } else {
10008 Ok(Expression::Function(Box::new(Function::new(
10009 "MAP".to_string(),
10010 f.args,
10011 ))))
10012 }
10013 } else {
10014 Ok(Expression::Function(Box::new(Function::new(
10015 "MAP".to_string(),
10016 f.args,
10017 ))))
10018 }
10019 }
10020 DialectType::Snowflake => {
10021 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
10022 if let (Some(keys), Some(vals)) = (
10023 extract_array_elements(&keys_arg),
10024 extract_array_elements(&vals_arg),
10025 ) {
10026 if keys.len() == vals.len() {
10027 let mut interleaved = Vec::new();
10028 for (k, v) in keys.iter().zip(vals.iter()) {
10029 interleaved.push(k.clone());
10030 interleaved.push(v.clone());
10031 }
10032 Ok(Expression::Function(Box::new(Function::new(
10033 "OBJECT_CONSTRUCT".to_string(),
10034 interleaved,
10035 ))))
10036 } else {
10037 Ok(Expression::Function(Box::new(Function::new(
10038 "MAP".to_string(),
10039 f.args,
10040 ))))
10041 }
10042 } else {
10043 Ok(Expression::Function(Box::new(Function::new(
10044 "MAP".to_string(),
10045 f.args,
10046 ))))
10047 }
10048 }
10049 _ => Ok(Expression::Function(f)),
10050 }
10051 }
10052 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
10053 "MAP"
10054 if f.args.is_empty()
10055 && matches!(
10056 source,
10057 DialectType::Hive
10058 | DialectType::Spark
10059 | DialectType::Databricks
10060 )
10061 && matches!(
10062 target,
10063 DialectType::Presto
10064 | DialectType::Trino
10065 | DialectType::Athena
10066 ) =>
10067 {
10068 let empty_keys =
10069 Expression::Array(Box::new(crate::expressions::Array {
10070 expressions: vec![],
10071 }));
10072 let empty_vals =
10073 Expression::Array(Box::new(crate::expressions::Array {
10074 expressions: vec![],
10075 }));
10076 Ok(Expression::Function(Box::new(Function::new(
10077 "MAP".to_string(),
10078 vec![empty_keys, empty_vals],
10079 ))))
10080 }
10081 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10082 "MAP"
10083 if f.args.len() >= 2
10084 && f.args.len() % 2 == 0
10085 && matches!(
10086 source,
10087 DialectType::Hive
10088 | DialectType::Spark
10089 | DialectType::Databricks
10090 | DialectType::ClickHouse
10091 ) =>
10092 {
10093 let args = f.args;
10094 match target {
10095 DialectType::DuckDB => {
10096 // MAP([k1, k2], [v1, v2])
10097 let mut keys = Vec::new();
10098 let mut vals = Vec::new();
10099 for (i, arg) in args.into_iter().enumerate() {
10100 if i % 2 == 0 {
10101 keys.push(arg);
10102 } else {
10103 vals.push(arg);
10104 }
10105 }
10106 let keys_arr = Expression::Array(Box::new(
10107 crate::expressions::Array { expressions: keys },
10108 ));
10109 let vals_arr = Expression::Array(Box::new(
10110 crate::expressions::Array { expressions: vals },
10111 ));
10112 Ok(Expression::Function(Box::new(Function::new(
10113 "MAP".to_string(),
10114 vec![keys_arr, vals_arr],
10115 ))))
10116 }
10117 DialectType::Presto | DialectType::Trino => {
10118 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10119 let mut keys = Vec::new();
10120 let mut vals = Vec::new();
10121 for (i, arg) in args.into_iter().enumerate() {
10122 if i % 2 == 0 {
10123 keys.push(arg);
10124 } else {
10125 vals.push(arg);
10126 }
10127 }
10128 let keys_arr = Expression::Array(Box::new(
10129 crate::expressions::Array { expressions: keys },
10130 ));
10131 let vals_arr = Expression::Array(Box::new(
10132 crate::expressions::Array { expressions: vals },
10133 ));
10134 Ok(Expression::Function(Box::new(Function::new(
10135 "MAP".to_string(),
10136 vec![keys_arr, vals_arr],
10137 ))))
10138 }
10139 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10140 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10141 ))),
10142 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10143 Function::new("map".to_string(), args),
10144 ))),
10145 _ => Ok(Expression::Function(Box::new(Function::new(
10146 "MAP".to_string(),
10147 args,
10148 )))),
10149 }
10150 }
10151 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10152 "COLLECT_LIST" if f.args.len() >= 1 => {
10153 let name = match target {
10154 DialectType::Spark
10155 | DialectType::Databricks
10156 | DialectType::Hive => "COLLECT_LIST",
10157 DialectType::DuckDB
10158 | DialectType::PostgreSQL
10159 | DialectType::Redshift
10160 | DialectType::Snowflake
10161 | DialectType::BigQuery => "ARRAY_AGG",
10162 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10163 _ => "ARRAY_AGG",
10164 };
10165 Ok(Expression::Function(Box::new(Function::new(
10166 name.to_string(),
10167 f.args,
10168 ))))
10169 }
10170 // COLLECT_SET(x) -> target-specific distinct array aggregation
10171 "COLLECT_SET" if f.args.len() >= 1 => {
10172 let name = match target {
10173 DialectType::Spark
10174 | DialectType::Databricks
10175 | DialectType::Hive => "COLLECT_SET",
10176 DialectType::Presto
10177 | DialectType::Trino
10178 | DialectType::Athena => "SET_AGG",
10179 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10180 _ => "ARRAY_AGG",
10181 };
10182 Ok(Expression::Function(Box::new(Function::new(
10183 name.to_string(),
10184 f.args,
10185 ))))
10186 }
10187 // ISNAN(x) / IS_NAN(x) - normalize
10188 "ISNAN" | "IS_NAN" => {
10189 let name = match target {
10190 DialectType::Spark
10191 | DialectType::Databricks
10192 | DialectType::Hive => "ISNAN",
10193 DialectType::Presto
10194 | DialectType::Trino
10195 | DialectType::Athena => "IS_NAN",
10196 DialectType::BigQuery
10197 | DialectType::PostgreSQL
10198 | DialectType::Redshift => "IS_NAN",
10199 DialectType::ClickHouse => "IS_NAN",
10200 _ => "ISNAN",
10201 };
10202 Ok(Expression::Function(Box::new(Function::new(
10203 name.to_string(),
10204 f.args,
10205 ))))
10206 }
10207 // SPLIT_PART(str, delim, index) -> target-specific
10208 "SPLIT_PART" if f.args.len() == 3 => {
10209 match target {
10210 DialectType::Spark | DialectType::Databricks => {
10211 // Keep as SPLIT_PART (Spark 3.4+)
10212 Ok(Expression::Function(Box::new(Function::new(
10213 "SPLIT_PART".to_string(),
10214 f.args,
10215 ))))
10216 }
10217 DialectType::DuckDB
10218 | DialectType::PostgreSQL
10219 | DialectType::Snowflake
10220 | DialectType::Redshift
10221 | DialectType::Trino
10222 | DialectType::Presto => Ok(Expression::Function(Box::new(
10223 Function::new("SPLIT_PART".to_string(), f.args),
10224 ))),
10225 DialectType::Hive => {
10226 // SPLIT(str, delim)[index]
10227 // Complex conversion, just keep as-is for now
10228 Ok(Expression::Function(Box::new(Function::new(
10229 "SPLIT_PART".to_string(),
10230 f.args,
10231 ))))
10232 }
10233 _ => Ok(Expression::Function(Box::new(Function::new(
10234 "SPLIT_PART".to_string(),
10235 f.args,
10236 )))),
10237 }
10238 }
10239 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10240 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10241 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10242 match target {
10243 DialectType::Spark
10244 | DialectType::Databricks
10245 | DialectType::Hive => {
10246 let mut args = f.args;
10247 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10248 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10249 if let Some(Expression::Function(inner)) = args.first() {
10250 if inner.name.eq_ignore_ascii_case("TRY")
10251 && inner.args.len() == 1
10252 {
10253 let mut inner_args = inner.args.clone();
10254 args[0] = inner_args.remove(0);
10255 }
10256 }
10257 Ok(Expression::Function(Box::new(Function::new(
10258 "GET_JSON_OBJECT".to_string(),
10259 args,
10260 ))))
10261 }
10262 DialectType::DuckDB | DialectType::SQLite => {
10263 // json -> path syntax
10264 let mut args = f.args;
10265 let json_expr = args.remove(0);
10266 let path = args.remove(0);
10267 Ok(Expression::JsonExtract(Box::new(
10268 crate::expressions::JsonExtractFunc {
10269 this: json_expr,
10270 path,
10271 returning: None,
10272 arrow_syntax: true,
10273 hash_arrow_syntax: false,
10274 wrapper_option: None,
10275 quotes_option: None,
10276 on_scalar_string: false,
10277 on_error: None,
10278 },
10279 )))
10280 }
10281 DialectType::TSQL => {
10282 let func_name = if is_scalar {
10283 "JSON_VALUE"
10284 } else {
10285 "JSON_QUERY"
10286 };
10287 Ok(Expression::Function(Box::new(Function::new(
10288 func_name.to_string(),
10289 f.args,
10290 ))))
10291 }
10292 DialectType::PostgreSQL | DialectType::Redshift => {
10293 let func_name = if is_scalar {
10294 "JSON_EXTRACT_PATH_TEXT"
10295 } else {
10296 "JSON_EXTRACT_PATH"
10297 };
10298 Ok(Expression::Function(Box::new(Function::new(
10299 func_name.to_string(),
10300 f.args,
10301 ))))
10302 }
10303 _ => Ok(Expression::Function(Box::new(Function::new(
10304 name.to_string(),
10305 f.args,
10306 )))),
10307 }
10308 }
10309 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10310 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10311 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10312 if f.args.len() >= 2
10313 && matches!(source, DialectType::SingleStore) =>
10314 {
10315 let is_bson = name == "BSON_EXTRACT_BSON";
10316 let mut args = f.args;
10317 let json_expr = args.remove(0);
10318
10319 // Build JSONPath from remaining arguments
10320 let mut path = String::from("$");
10321 for arg in &args {
10322 if let Expression::Literal(
10323 crate::expressions::Literal::String(s),
10324 ) = arg
10325 {
10326 // Check if it's a numeric string (array index)
10327 if s.parse::<i64>().is_ok() {
10328 path.push('[');
10329 path.push_str(s);
10330 path.push(']');
10331 } else {
10332 path.push('.');
10333 path.push_str(s);
10334 }
10335 }
10336 }
10337
10338 let target_func = if is_bson {
10339 "JSONB_EXTRACT"
10340 } else {
10341 "JSON_EXTRACT"
10342 };
10343 Ok(Expression::Function(Box::new(Function::new(
10344 target_func.to_string(),
10345 vec![json_expr, Expression::string(&path)],
10346 ))))
10347 }
10348 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10349 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10350 Ok(Expression::Function(Box::new(Function {
10351 name: "arraySum".to_string(),
10352 args: f.args,
10353 distinct: f.distinct,
10354 trailing_comments: f.trailing_comments,
10355 use_bracket_syntax: f.use_bracket_syntax,
10356 no_parens: f.no_parens,
10357 quoted: f.quoted,
10358 span: None,
10359 })))
10360 }
10361 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10362 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10363 // and is handled by JsonQueryValueConvert action. This handles the case where
10364 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10365 "JSON_QUERY" | "JSON_VALUE"
10366 if f.args.len() == 2
10367 && matches!(
10368 source,
10369 DialectType::TSQL | DialectType::Fabric
10370 ) =>
10371 {
10372 match target {
10373 DialectType::Spark
10374 | DialectType::Databricks
10375 | DialectType::Hive => Ok(Expression::Function(Box::new(
10376 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10377 ))),
10378 _ => Ok(Expression::Function(Box::new(Function::new(
10379 name.to_string(),
10380 f.args,
10381 )))),
10382 }
10383 }
10384 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10385 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10386 let arg = f.args.into_iter().next().unwrap();
10387 let is_hive_source = matches!(
10388 source,
10389 DialectType::Hive
10390 | DialectType::Spark
10391 | DialectType::Databricks
10392 );
10393 match target {
10394 DialectType::DuckDB if is_hive_source => {
10395 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10396 let strptime =
10397 Expression::Function(Box::new(Function::new(
10398 "STRPTIME".to_string(),
10399 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10400 )));
10401 Ok(Expression::Function(Box::new(Function::new(
10402 "EPOCH".to_string(),
10403 vec![strptime],
10404 ))))
10405 }
10406 DialectType::Presto | DialectType::Trino if is_hive_source => {
10407 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10408 let cast_varchar =
10409 Expression::Cast(Box::new(crate::expressions::Cast {
10410 this: arg.clone(),
10411 to: DataType::VarChar {
10412 length: None,
10413 parenthesized_length: false,
10414 },
10415 trailing_comments: vec![],
10416 double_colon_syntax: false,
10417 format: None,
10418 default: None,
10419 }));
10420 let date_parse =
10421 Expression::Function(Box::new(Function::new(
10422 "DATE_PARSE".to_string(),
10423 vec![
10424 cast_varchar,
10425 Expression::string("%Y-%m-%d %T"),
10426 ],
10427 )));
10428 let try_expr = Expression::Function(Box::new(
10429 Function::new("TRY".to_string(), vec![date_parse]),
10430 ));
10431 let date_format =
10432 Expression::Function(Box::new(Function::new(
10433 "DATE_FORMAT".to_string(),
10434 vec![arg, Expression::string("%Y-%m-%d %T")],
10435 )));
10436 let parse_datetime =
10437 Expression::Function(Box::new(Function::new(
10438 "PARSE_DATETIME".to_string(),
10439 vec![
10440 date_format,
10441 Expression::string("yyyy-MM-dd HH:mm:ss"),
10442 ],
10443 )));
10444 let coalesce =
10445 Expression::Function(Box::new(Function::new(
10446 "COALESCE".to_string(),
10447 vec![try_expr, parse_datetime],
10448 )));
10449 Ok(Expression::Function(Box::new(Function::new(
10450 "TO_UNIXTIME".to_string(),
10451 vec![coalesce],
10452 ))))
10453 }
10454 DialectType::Presto | DialectType::Trino => {
10455 Ok(Expression::Function(Box::new(Function::new(
10456 "TO_UNIXTIME".to_string(),
10457 vec![arg],
10458 ))))
10459 }
10460 _ => Ok(Expression::Function(Box::new(Function::new(
10461 "UNIX_TIMESTAMP".to_string(),
10462 vec![arg],
10463 )))),
10464 }
10465 }
10466 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10467 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10468 DialectType::Spark
10469 | DialectType::Databricks
10470 | DialectType::Hive => Ok(Expression::Function(Box::new(
10471 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10472 ))),
10473 _ => Ok(Expression::Function(Box::new(Function::new(
10474 "TO_UNIX_TIMESTAMP".to_string(),
10475 f.args,
10476 )))),
10477 },
10478 // CURDATE() -> CURRENT_DATE
10479 "CURDATE" => {
10480 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10481 }
10482 // CURTIME() -> CURRENT_TIME
10483 "CURTIME" => {
10484 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10485 precision: None,
10486 }))
10487 }
10488 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10489 "ARRAY_SORT" if f.args.len() >= 1 => {
10490 match target {
10491 DialectType::Hive => {
10492 let mut args = f.args;
10493 args.truncate(1); // Drop lambda comparator
10494 Ok(Expression::Function(Box::new(Function::new(
10495 "SORT_ARRAY".to_string(),
10496 args,
10497 ))))
10498 }
10499 _ => Ok(Expression::Function(f)),
10500 }
10501 }
10502 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10503 "SORT_ARRAY" if f.args.len() == 1 => match target {
10504 DialectType::Hive
10505 | DialectType::Spark
10506 | DialectType::Databricks => Ok(Expression::Function(f)),
10507 _ => Ok(Expression::Function(Box::new(Function::new(
10508 "ARRAY_SORT".to_string(),
10509 f.args,
10510 )))),
10511 },
10512 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10513 "SORT_ARRAY" if f.args.len() == 2 => {
10514 let is_desc =
10515 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10516 if is_desc {
10517 match target {
10518 DialectType::DuckDB => {
10519 Ok(Expression::Function(Box::new(Function::new(
10520 "ARRAY_REVERSE_SORT".to_string(),
10521 vec![f.args.into_iter().next().unwrap()],
10522 ))))
10523 }
10524 DialectType::Presto | DialectType::Trino => {
10525 let arr_arg = f.args.into_iter().next().unwrap();
10526 let a =
10527 Expression::Column(crate::expressions::Column {
10528 name: crate::expressions::Identifier::new("a"),
10529 table: None,
10530 join_mark: false,
10531 trailing_comments: Vec::new(),
10532 span: None,
10533 });
10534 let b =
10535 Expression::Column(crate::expressions::Column {
10536 name: crate::expressions::Identifier::new("b"),
10537 table: None,
10538 join_mark: false,
10539 trailing_comments: Vec::new(),
10540 span: None,
10541 });
10542 let case_expr = Expression::Case(Box::new(
10543 crate::expressions::Case {
10544 operand: None,
10545 whens: vec![
10546 (
10547 Expression::Lt(Box::new(
10548 BinaryOp::new(a.clone(), b.clone()),
10549 )),
10550 Expression::Literal(Literal::Number(
10551 "1".to_string(),
10552 )),
10553 ),
10554 (
10555 Expression::Gt(Box::new(
10556 BinaryOp::new(a.clone(), b.clone()),
10557 )),
10558 Expression::Literal(Literal::Number(
10559 "-1".to_string(),
10560 )),
10561 ),
10562 ],
10563 else_: Some(Expression::Literal(
10564 Literal::Number("0".to_string()),
10565 )),
10566 comments: Vec::new(),
10567 },
10568 ));
10569 let lambda = Expression::Lambda(Box::new(
10570 crate::expressions::LambdaExpr {
10571 parameters: vec![
10572 crate::expressions::Identifier::new("a"),
10573 crate::expressions::Identifier::new("b"),
10574 ],
10575 body: case_expr,
10576 colon: false,
10577 parameter_types: Vec::new(),
10578 },
10579 ));
10580 Ok(Expression::Function(Box::new(Function::new(
10581 "ARRAY_SORT".to_string(),
10582 vec![arr_arg, lambda],
10583 ))))
10584 }
10585 _ => Ok(Expression::Function(f)),
10586 }
10587 } else {
10588 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10589 match target {
10590 DialectType::Hive => Ok(Expression::Function(f)),
10591 _ => Ok(Expression::Function(Box::new(Function::new(
10592 "ARRAY_SORT".to_string(),
10593 vec![f.args.into_iter().next().unwrap()],
10594 )))),
10595 }
10596 }
10597 }
10598 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10599 "LEFT" if f.args.len() == 2 => {
10600 match target {
10601 DialectType::Hive
10602 | DialectType::Presto
10603 | DialectType::Trino
10604 | DialectType::Athena => {
10605 let x = f.args[0].clone();
10606 let n = f.args[1].clone();
10607 Ok(Expression::Function(Box::new(Function::new(
10608 "SUBSTRING".to_string(),
10609 vec![x, Expression::number(1), n],
10610 ))))
10611 }
10612 DialectType::Spark | DialectType::Databricks
10613 if matches!(
10614 source,
10615 DialectType::TSQL | DialectType::Fabric
10616 ) =>
10617 {
10618 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10619 let x = f.args[0].clone();
10620 let n = f.args[1].clone();
10621 let cast_x = Expression::Cast(Box::new(Cast {
10622 this: x,
10623 to: DataType::VarChar {
10624 length: None,
10625 parenthesized_length: false,
10626 },
10627 double_colon_syntax: false,
10628 trailing_comments: Vec::new(),
10629 format: None,
10630 default: None,
10631 }));
10632 Ok(Expression::Function(Box::new(Function::new(
10633 "LEFT".to_string(),
10634 vec![cast_x, n],
10635 ))))
10636 }
10637 _ => Ok(Expression::Function(f)),
10638 }
10639 }
10640 "RIGHT" if f.args.len() == 2 => {
10641 match target {
10642 DialectType::Hive
10643 | DialectType::Presto
10644 | DialectType::Trino
10645 | DialectType::Athena => {
10646 let x = f.args[0].clone();
10647 let n = f.args[1].clone();
10648 // SUBSTRING(x, LENGTH(x) - (n - 1))
10649 let len_x = Expression::Function(Box::new(Function::new(
10650 "LENGTH".to_string(),
10651 vec![x.clone()],
10652 )));
10653 let n_minus_1 = Expression::Sub(Box::new(
10654 crate::expressions::BinaryOp::new(
10655 n,
10656 Expression::number(1),
10657 ),
10658 ));
10659 let n_minus_1_paren = Expression::Paren(Box::new(
10660 crate::expressions::Paren {
10661 this: n_minus_1,
10662 trailing_comments: Vec::new(),
10663 },
10664 ));
10665 let offset = Expression::Sub(Box::new(
10666 crate::expressions::BinaryOp::new(
10667 len_x,
10668 n_minus_1_paren,
10669 ),
10670 ));
10671 Ok(Expression::Function(Box::new(Function::new(
10672 "SUBSTRING".to_string(),
10673 vec![x, offset],
10674 ))))
10675 }
10676 DialectType::Spark | DialectType::Databricks
10677 if matches!(
10678 source,
10679 DialectType::TSQL | DialectType::Fabric
10680 ) =>
10681 {
10682 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10683 let x = f.args[0].clone();
10684 let n = f.args[1].clone();
10685 let cast_x = Expression::Cast(Box::new(Cast {
10686 this: x,
10687 to: DataType::VarChar {
10688 length: None,
10689 parenthesized_length: false,
10690 },
10691 double_colon_syntax: false,
10692 trailing_comments: Vec::new(),
10693 format: None,
10694 default: None,
10695 }));
10696 Ok(Expression::Function(Box::new(Function::new(
10697 "RIGHT".to_string(),
10698 vec![cast_x, n],
10699 ))))
10700 }
10701 _ => Ok(Expression::Function(f)),
10702 }
10703 }
10704 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10705 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10706 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10707 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10708 ))),
10709 DialectType::Spark | DialectType::Databricks => {
10710 Ok(Expression::Function(Box::new(Function::new(
10711 "MAP_FROM_ARRAYS".to_string(),
10712 f.args,
10713 ))))
10714 }
10715 _ => Ok(Expression::Function(Box::new(Function::new(
10716 "MAP".to_string(),
10717 f.args,
10718 )))),
10719 },
10720 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10721 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10722 "LIKE" if f.args.len() >= 2 => {
10723 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10724 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10725 (f.args[1].clone(), f.args[0].clone())
10726 } else {
10727 // Standard: LIKE(string, pattern) -> string LIKE pattern
10728 (f.args[0].clone(), f.args[1].clone())
10729 };
10730 let escape = if f.args.len() >= 3 {
10731 Some(f.args[2].clone())
10732 } else {
10733 None
10734 };
10735 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10736 left: this,
10737 right: pattern,
10738 escape,
10739 quantifier: None,
10740 })))
10741 }
10742 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10743 "ILIKE" if f.args.len() >= 2 => {
10744 let this = f.args[0].clone();
10745 let pattern = f.args[1].clone();
10746 let escape = if f.args.len() >= 3 {
10747 Some(f.args[2].clone())
10748 } else {
10749 None
10750 };
10751 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10752 left: this,
10753 right: pattern,
10754 escape,
10755 quantifier: None,
10756 })))
10757 }
10758 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10759 "CHAR" if f.args.len() == 1 => match target {
10760 DialectType::MySQL
10761 | DialectType::SingleStore
10762 | DialectType::TSQL => Ok(Expression::Function(f)),
10763 _ => Ok(Expression::Function(Box::new(Function::new(
10764 "CHR".to_string(),
10765 f.args,
10766 )))),
10767 },
10768 // CONCAT(a, b) -> a || b for PostgreSQL
10769 "CONCAT"
10770 if f.args.len() == 2
10771 && matches!(target, DialectType::PostgreSQL)
10772 && matches!(
10773 source,
10774 DialectType::ClickHouse | DialectType::MySQL
10775 ) =>
10776 {
10777 let mut args = f.args;
10778 let right = args.pop().unwrap();
10779 let left = args.pop().unwrap();
10780 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10781 this: Box::new(left),
10782 expression: Box::new(right),
10783 safe: None,
10784 })))
10785 }
10786 // ARRAY_TO_STRING(arr, delim) -> target-specific
10787 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10788 DialectType::Presto | DialectType::Trino => {
10789 Ok(Expression::Function(Box::new(Function::new(
10790 "ARRAY_JOIN".to_string(),
10791 f.args,
10792 ))))
10793 }
10794 DialectType::TSQL => Ok(Expression::Function(Box::new(
10795 Function::new("STRING_AGG".to_string(), f.args),
10796 ))),
10797 _ => Ok(Expression::Function(f)),
10798 },
10799 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10800 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10801 DialectType::Spark
10802 | DialectType::Databricks
10803 | DialectType::Hive => Ok(Expression::Function(Box::new(
10804 Function::new("CONCAT".to_string(), f.args),
10805 ))),
10806 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10807 Function::new("ARRAY_CAT".to_string(), f.args),
10808 ))),
10809 DialectType::Redshift => Ok(Expression::Function(Box::new(
10810 Function::new("ARRAY_CONCAT".to_string(), f.args),
10811 ))),
10812 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10813 Function::new("ARRAY_CAT".to_string(), f.args),
10814 ))),
10815 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10816 Function::new("LIST_CONCAT".to_string(), f.args),
10817 ))),
10818 DialectType::Presto | DialectType::Trino => {
10819 Ok(Expression::Function(Box::new(Function::new(
10820 "CONCAT".to_string(),
10821 f.args,
10822 ))))
10823 }
10824 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10825 Function::new("ARRAY_CONCAT".to_string(), f.args),
10826 ))),
10827 _ => Ok(Expression::Function(f)),
10828 },
10829 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10830 "HAS" if f.args.len() == 2 => match target {
10831 DialectType::Spark
10832 | DialectType::Databricks
10833 | DialectType::Hive => Ok(Expression::Function(Box::new(
10834 Function::new("ARRAY_CONTAINS".to_string(), f.args),
10835 ))),
10836 DialectType::Presto | DialectType::Trino => {
10837 Ok(Expression::Function(Box::new(Function::new(
10838 "CONTAINS".to_string(),
10839 f.args,
10840 ))))
10841 }
10842 _ => Ok(Expression::Function(f)),
10843 },
10844 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
10845 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
10846 Function::new("COALESCE".to_string(), f.args),
10847 ))),
10848 // ISNULL(x) in MySQL -> (x IS NULL)
10849 "ISNULL"
10850 if f.args.len() == 1
10851 && matches!(source, DialectType::MySQL)
10852 && matches!(target, DialectType::MySQL) =>
10853 {
10854 let arg = f.args.into_iter().next().unwrap();
10855 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10856 this: Expression::IsNull(Box::new(
10857 crate::expressions::IsNull {
10858 this: arg,
10859 not: false,
10860 postfix_form: false,
10861 },
10862 )),
10863 trailing_comments: Vec::new(),
10864 })))
10865 }
10866 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
10867 "MONTHNAME"
10868 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
10869 {
10870 let arg = f.args.into_iter().next().unwrap();
10871 Ok(Expression::Function(Box::new(Function::new(
10872 "DATE_FORMAT".to_string(),
10873 vec![arg, Expression::string("%M")],
10874 ))))
10875 }
10876 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
10877 "SPLITBYSTRING" if f.args.len() == 2 => {
10878 let sep = f.args[0].clone();
10879 let str_arg = f.args[1].clone();
10880 match target {
10881 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10882 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
10883 ))),
10884 DialectType::Doris => {
10885 Ok(Expression::Function(Box::new(Function::new(
10886 "SPLIT_BY_STRING".to_string(),
10887 vec![str_arg, sep],
10888 ))))
10889 }
10890 DialectType::Hive
10891 | DialectType::Spark
10892 | DialectType::Databricks => {
10893 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
10894 let escaped =
10895 Expression::Function(Box::new(Function::new(
10896 "CONCAT".to_string(),
10897 vec![
10898 Expression::string("\\Q"),
10899 sep,
10900 Expression::string("\\E"),
10901 ],
10902 )));
10903 Ok(Expression::Function(Box::new(Function::new(
10904 "SPLIT".to_string(),
10905 vec![str_arg, escaped],
10906 ))))
10907 }
10908 _ => Ok(Expression::Function(f)),
10909 }
10910 }
10911 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
10912 "SPLITBYREGEXP" if f.args.len() == 2 => {
10913 let sep = f.args[0].clone();
10914 let str_arg = f.args[1].clone();
10915 match target {
10916 DialectType::DuckDB => {
10917 Ok(Expression::Function(Box::new(Function::new(
10918 "STR_SPLIT_REGEX".to_string(),
10919 vec![str_arg, sep],
10920 ))))
10921 }
10922 DialectType::Hive
10923 | DialectType::Spark
10924 | DialectType::Databricks => {
10925 Ok(Expression::Function(Box::new(Function::new(
10926 "SPLIT".to_string(),
10927 vec![str_arg, sep],
10928 ))))
10929 }
10930 _ => Ok(Expression::Function(f)),
10931 }
10932 }
10933 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
10934 "TOMONDAY" => {
10935 if f.args.len() == 1 {
10936 let arg = f.args.into_iter().next().unwrap();
10937 match target {
10938 DialectType::Doris => {
10939 Ok(Expression::Function(Box::new(Function::new(
10940 "DATE_TRUNC".to_string(),
10941 vec![arg, Expression::string("WEEK")],
10942 ))))
10943 }
10944 _ => Ok(Expression::Function(Box::new(Function::new(
10945 "DATE_TRUNC".to_string(),
10946 vec![Expression::string("WEEK"), arg],
10947 )))),
10948 }
10949 } else {
10950 Ok(Expression::Function(f))
10951 }
10952 }
10953 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
10954 "COLLECT_LIST" if f.args.len() == 1 => match target {
10955 DialectType::Spark
10956 | DialectType::Databricks
10957 | DialectType::Hive => Ok(Expression::Function(f)),
10958 _ => Ok(Expression::Function(Box::new(Function::new(
10959 "ARRAY_AGG".to_string(),
10960 f.args,
10961 )))),
10962 },
10963 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
10964 "TO_CHAR"
10965 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
10966 {
10967 let arg = f.args.into_iter().next().unwrap();
10968 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10969 this: arg,
10970 to: DataType::Custom {
10971 name: "STRING".to_string(),
10972 },
10973 double_colon_syntax: false,
10974 trailing_comments: Vec::new(),
10975 format: None,
10976 default: None,
10977 })))
10978 }
10979 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
10980 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
10981 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10982 Function::new("RANDOM".to_string(), vec![]),
10983 ))),
10984 _ => Ok(Expression::Function(f)),
10985 },
10986 // ClickHouse formatDateTime -> target-specific
10987 "FORMATDATETIME" if f.args.len() >= 2 => match target {
10988 DialectType::MySQL => Ok(Expression::Function(Box::new(
10989 Function::new("DATE_FORMAT".to_string(), f.args),
10990 ))),
10991 _ => Ok(Expression::Function(f)),
10992 },
10993 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
10994 "REPLICATE" if f.args.len() == 2 => match target {
10995 DialectType::TSQL => Ok(Expression::Function(f)),
10996 _ => Ok(Expression::Function(Box::new(Function::new(
10997 "REPEAT".to_string(),
10998 f.args,
10999 )))),
11000 },
11001 // LEN(x) -> LENGTH(x) for non-TSQL targets
11002 // No CAST needed when arg is already a string literal
11003 "LEN" if f.args.len() == 1 => {
11004 match target {
11005 DialectType::TSQL => Ok(Expression::Function(f)),
11006 DialectType::Spark | DialectType::Databricks => {
11007 let arg = f.args.into_iter().next().unwrap();
11008 // Don't wrap string literals with CAST - they're already strings
11009 let is_string = matches!(
11010 &arg,
11011 Expression::Literal(
11012 crate::expressions::Literal::String(_)
11013 )
11014 );
11015 let final_arg = if is_string {
11016 arg
11017 } else {
11018 Expression::Cast(Box::new(Cast {
11019 this: arg,
11020 to: DataType::VarChar {
11021 length: None,
11022 parenthesized_length: false,
11023 },
11024 double_colon_syntax: false,
11025 trailing_comments: Vec::new(),
11026 format: None,
11027 default: None,
11028 }))
11029 };
11030 Ok(Expression::Function(Box::new(Function::new(
11031 "LENGTH".to_string(),
11032 vec![final_arg],
11033 ))))
11034 }
11035 _ => {
11036 let arg = f.args.into_iter().next().unwrap();
11037 Ok(Expression::Function(Box::new(Function::new(
11038 "LENGTH".to_string(),
11039 vec![arg],
11040 ))))
11041 }
11042 }
11043 }
11044 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
11045 "COUNT_BIG" if f.args.len() == 1 => match target {
11046 DialectType::TSQL => Ok(Expression::Function(f)),
11047 _ => Ok(Expression::Function(Box::new(Function::new(
11048 "COUNT".to_string(),
11049 f.args,
11050 )))),
11051 },
11052 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
11053 "DATEFROMPARTS" if f.args.len() == 3 => match target {
11054 DialectType::TSQL => Ok(Expression::Function(f)),
11055 _ => Ok(Expression::Function(Box::new(Function::new(
11056 "MAKE_DATE".to_string(),
11057 f.args,
11058 )))),
11059 },
11060 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
11061 "REGEXP_LIKE" if f.args.len() >= 2 => {
11062 let str_expr = f.args[0].clone();
11063 let pattern = f.args[1].clone();
11064 let flags = if f.args.len() >= 3 {
11065 Some(f.args[2].clone())
11066 } else {
11067 None
11068 };
11069 match target {
11070 DialectType::DuckDB => {
11071 let mut new_args = vec![str_expr, pattern];
11072 if let Some(fl) = flags {
11073 new_args.push(fl);
11074 }
11075 Ok(Expression::Function(Box::new(Function::new(
11076 "REGEXP_MATCHES".to_string(),
11077 new_args,
11078 ))))
11079 }
11080 _ => Ok(Expression::RegexpLike(Box::new(
11081 crate::expressions::RegexpFunc {
11082 this: str_expr,
11083 pattern,
11084 flags,
11085 },
11086 ))),
11087 }
11088 }
11089 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11090 "ARRAYJOIN" if f.args.len() == 1 => match target {
11091 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11092 Function::new("UNNEST".to_string(), f.args),
11093 ))),
11094 _ => Ok(Expression::Function(f)),
11095 },
11096 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11097 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11098 match target {
11099 DialectType::TSQL => Ok(Expression::Function(f)),
11100 DialectType::DuckDB => {
11101 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11102 let mut args = f.args;
11103 let ms = args.pop().unwrap();
11104 let s = args.pop().unwrap();
11105 // s + (ms / 1000.0)
11106 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11107 ms,
11108 Expression::Literal(
11109 crate::expressions::Literal::Number(
11110 "1000.0".to_string(),
11111 ),
11112 ),
11113 )));
11114 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11115 s,
11116 Expression::Paren(Box::new(Paren {
11117 this: ms_frac,
11118 trailing_comments: vec![],
11119 })),
11120 )));
11121 args.push(s_with_ms);
11122 Ok(Expression::Function(Box::new(Function::new(
11123 "MAKE_TIMESTAMP".to_string(),
11124 args,
11125 ))))
11126 }
11127 DialectType::Snowflake => {
11128 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11129 let mut args = f.args;
11130 let ms = args.pop().unwrap();
11131 // ms * 1000000
11132 let ns = Expression::Mul(Box::new(BinaryOp::new(
11133 ms,
11134 Expression::number(1000000),
11135 )));
11136 args.push(ns);
11137 Ok(Expression::Function(Box::new(Function::new(
11138 "TIMESTAMP_FROM_PARTS".to_string(),
11139 args,
11140 ))))
11141 }
11142 _ => {
11143 // Default: keep function name for other targets
11144 Ok(Expression::Function(Box::new(Function::new(
11145 "DATETIMEFROMPARTS".to_string(),
11146 f.args,
11147 ))))
11148 }
11149 }
11150 }
11151 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11152 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11153 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11154 let is_try = name == "TRY_CONVERT";
11155 let type_expr = f.args[0].clone();
11156 let value_expr = f.args[1].clone();
11157 let style = if f.args.len() >= 3 {
11158 Some(&f.args[2])
11159 } else {
11160 None
11161 };
11162
11163 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11164 if matches!(target, DialectType::TSQL) {
11165 let normalized_type = match &type_expr {
11166 Expression::DataType(dt) => {
11167 let new_dt = match dt {
11168 DataType::Int { .. } => DataType::Custom {
11169 name: "INTEGER".to_string(),
11170 },
11171 _ => dt.clone(),
11172 };
11173 Expression::DataType(new_dt)
11174 }
11175 Expression::Identifier(id) => {
11176 let upper = id.name.to_uppercase();
11177 let normalized = match upper.as_str() {
11178 "INT" => "INTEGER",
11179 _ => &upper,
11180 };
11181 Expression::Identifier(
11182 crate::expressions::Identifier::new(normalized),
11183 )
11184 }
11185 Expression::Column(col) => {
11186 let upper = col.name.name.to_uppercase();
11187 let normalized = match upper.as_str() {
11188 "INT" => "INTEGER",
11189 _ => &upper,
11190 };
11191 Expression::Identifier(
11192 crate::expressions::Identifier::new(normalized),
11193 )
11194 }
11195 _ => type_expr.clone(),
11196 };
11197 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11198 let mut new_args = vec![normalized_type, value_expr];
11199 if let Some(s) = style {
11200 new_args.push(s.clone());
11201 }
11202 return Ok(Expression::Function(Box::new(Function::new(
11203 func_name.to_string(),
11204 new_args,
11205 ))));
11206 }
11207
11208 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11209 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11210 match e {
11211 Expression::DataType(dt) => {
11212 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11213 match dt {
11214 DataType::Custom { name }
11215 if name.starts_with("NVARCHAR(")
11216 || name.starts_with("NCHAR(") =>
11217 {
11218 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11219 let inner = &name[name.find('(').unwrap() + 1
11220 ..name.len() - 1];
11221 if inner.eq_ignore_ascii_case("MAX") {
11222 Some(DataType::Text)
11223 } else if let Ok(len) = inner.parse::<u32>() {
11224 if name.starts_with("NCHAR") {
11225 Some(DataType::Char {
11226 length: Some(len),
11227 })
11228 } else {
11229 Some(DataType::VarChar {
11230 length: Some(len),
11231 parenthesized_length: false,
11232 })
11233 }
11234 } else {
11235 Some(dt.clone())
11236 }
11237 }
11238 DataType::Custom { name } if name == "NVARCHAR" => {
11239 Some(DataType::VarChar {
11240 length: None,
11241 parenthesized_length: false,
11242 })
11243 }
11244 DataType::Custom { name } if name == "NCHAR" => {
11245 Some(DataType::Char { length: None })
11246 }
11247 DataType::Custom { name }
11248 if name == "NVARCHAR(MAX)"
11249 || name == "VARCHAR(MAX)" =>
11250 {
11251 Some(DataType::Text)
11252 }
11253 _ => Some(dt.clone()),
11254 }
11255 }
11256 Expression::Identifier(id) => {
11257 let name = id.name.to_uppercase();
11258 match name.as_str() {
11259 "INT" | "INTEGER" => Some(DataType::Int {
11260 length: None,
11261 integer_spelling: false,
11262 }),
11263 "BIGINT" => Some(DataType::BigInt { length: None }),
11264 "SMALLINT" => {
11265 Some(DataType::SmallInt { length: None })
11266 }
11267 "TINYINT" => {
11268 Some(DataType::TinyInt { length: None })
11269 }
11270 "FLOAT" => Some(DataType::Float {
11271 precision: None,
11272 scale: None,
11273 real_spelling: false,
11274 }),
11275 "REAL" => Some(DataType::Float {
11276 precision: None,
11277 scale: None,
11278 real_spelling: true,
11279 }),
11280 "DATETIME" | "DATETIME2" => {
11281 Some(DataType::Timestamp {
11282 timezone: false,
11283 precision: None,
11284 })
11285 }
11286 "DATE" => Some(DataType::Date),
11287 "BIT" => Some(DataType::Boolean),
11288 "TEXT" => Some(DataType::Text),
11289 "NUMERIC" => Some(DataType::Decimal {
11290 precision: None,
11291 scale: None,
11292 }),
11293 "MONEY" => Some(DataType::Decimal {
11294 precision: Some(15),
11295 scale: Some(4),
11296 }),
11297 "SMALLMONEY" => Some(DataType::Decimal {
11298 precision: Some(6),
11299 scale: Some(4),
11300 }),
11301 "VARCHAR" => Some(DataType::VarChar {
11302 length: None,
11303 parenthesized_length: false,
11304 }),
11305 "NVARCHAR" => Some(DataType::VarChar {
11306 length: None,
11307 parenthesized_length: false,
11308 }),
11309 "CHAR" => Some(DataType::Char { length: None }),
11310 "NCHAR" => Some(DataType::Char { length: None }),
11311 _ => Some(DataType::Custom { name }),
11312 }
11313 }
11314 Expression::Column(col) => {
11315 let name = col.name.name.to_uppercase();
11316 match name.as_str() {
11317 "INT" | "INTEGER" => Some(DataType::Int {
11318 length: None,
11319 integer_spelling: false,
11320 }),
11321 "BIGINT" => Some(DataType::BigInt { length: None }),
11322 "FLOAT" => Some(DataType::Float {
11323 precision: None,
11324 scale: None,
11325 real_spelling: false,
11326 }),
11327 "DATETIME" | "DATETIME2" => {
11328 Some(DataType::Timestamp {
11329 timezone: false,
11330 precision: None,
11331 })
11332 }
11333 "DATE" => Some(DataType::Date),
11334 "NUMERIC" => Some(DataType::Decimal {
11335 precision: None,
11336 scale: None,
11337 }),
11338 "VARCHAR" => Some(DataType::VarChar {
11339 length: None,
11340 parenthesized_length: false,
11341 }),
11342 "NVARCHAR" => Some(DataType::VarChar {
11343 length: None,
11344 parenthesized_length: false,
11345 }),
11346 "CHAR" => Some(DataType::Char { length: None }),
11347 "NCHAR" => Some(DataType::Char { length: None }),
11348 _ => Some(DataType::Custom { name }),
11349 }
11350 }
11351 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11352 Expression::Function(f) => {
11353 let fname = f.name.to_uppercase();
11354 match fname.as_str() {
11355 "VARCHAR" | "NVARCHAR" => {
11356 let len = f.args.first().and_then(|a| {
11357 if let Expression::Literal(
11358 crate::expressions::Literal::Number(n),
11359 ) = a
11360 {
11361 n.parse::<u32>().ok()
11362 } else if let Expression::Identifier(id) = a
11363 {
11364 if id.name.eq_ignore_ascii_case("MAX") {
11365 None
11366 } else {
11367 None
11368 }
11369 } else {
11370 None
11371 }
11372 });
11373 // Check for VARCHAR(MAX) -> TEXT
11374 let is_max = f.args.first().map_or(false, |a| {
11375 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11376 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11377 });
11378 if is_max {
11379 Some(DataType::Text)
11380 } else {
11381 Some(DataType::VarChar {
11382 length: len,
11383 parenthesized_length: false,
11384 })
11385 }
11386 }
11387 "NCHAR" | "CHAR" => {
11388 let len = f.args.first().and_then(|a| {
11389 if let Expression::Literal(
11390 crate::expressions::Literal::Number(n),
11391 ) = a
11392 {
11393 n.parse::<u32>().ok()
11394 } else {
11395 None
11396 }
11397 });
11398 Some(DataType::Char { length: len })
11399 }
11400 "NUMERIC" | "DECIMAL" => {
11401 let precision = f.args.first().and_then(|a| {
11402 if let Expression::Literal(
11403 crate::expressions::Literal::Number(n),
11404 ) = a
11405 {
11406 n.parse::<u32>().ok()
11407 } else {
11408 None
11409 }
11410 });
11411 let scale = f.args.get(1).and_then(|a| {
11412 if let Expression::Literal(
11413 crate::expressions::Literal::Number(n),
11414 ) = a
11415 {
11416 n.parse::<u32>().ok()
11417 } else {
11418 None
11419 }
11420 });
11421 Some(DataType::Decimal { precision, scale })
11422 }
11423 _ => None,
11424 }
11425 }
11426 _ => None,
11427 }
11428 }
11429
11430 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11431 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11432 let is_tsql_source =
11433 matches!(source, DialectType::TSQL | DialectType::Fabric);
11434 if is_tsql_source {
11435 match &dt {
11436 DataType::VarChar { length: None, .. } => {
11437 dt = DataType::VarChar {
11438 length: Some(30),
11439 parenthesized_length: false,
11440 };
11441 }
11442 DataType::Char { length: None } => {
11443 dt = DataType::Char { length: Some(30) };
11444 }
11445 _ => {}
11446 }
11447 }
11448
11449 // Determine if this is a string type
11450 let is_string_type = matches!(
11451 dt,
11452 DataType::VarChar { .. }
11453 | DataType::Char { .. }
11454 | DataType::Text
11455 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11456 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11457 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11458 || name == "STRING");
11459
11460 // Determine if this is a date/time type
11461 let is_datetime_type = matches!(
11462 dt,
11463 DataType::Timestamp { .. } | DataType::Date
11464 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11465 || name == "DATETIME2" || name == "SMALLDATETIME");
11466
11467 // Check for date conversion with style
11468 if style.is_some() {
11469 let style_num = style.and_then(|s| {
11470 if let Expression::Literal(
11471 crate::expressions::Literal::Number(n),
11472 ) = s
11473 {
11474 n.parse::<u32>().ok()
11475 } else {
11476 None
11477 }
11478 });
11479
11480 // TSQL CONVERT date styles (Java format)
11481 let format_str = style_num.and_then(|n| match n {
11482 101 => Some("MM/dd/yyyy"),
11483 102 => Some("yyyy.MM.dd"),
11484 103 => Some("dd/MM/yyyy"),
11485 104 => Some("dd.MM.yyyy"),
11486 105 => Some("dd-MM-yyyy"),
11487 108 => Some("HH:mm:ss"),
11488 110 => Some("MM-dd-yyyy"),
11489 112 => Some("yyyyMMdd"),
11490 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11491 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11492 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11493 _ => None,
11494 });
11495
11496 // Non-string, non-datetime types with style: just CAST, ignore the style
11497 if !is_string_type && !is_datetime_type {
11498 let cast_expr = if is_try {
11499 Expression::TryCast(Box::new(
11500 crate::expressions::Cast {
11501 this: value_expr,
11502 to: dt,
11503 trailing_comments: Vec::new(),
11504 double_colon_syntax: false,
11505 format: None,
11506 default: None,
11507 },
11508 ))
11509 } else {
11510 Expression::Cast(Box::new(
11511 crate::expressions::Cast {
11512 this: value_expr,
11513 to: dt,
11514 trailing_comments: Vec::new(),
11515 double_colon_syntax: false,
11516 format: None,
11517 default: None,
11518 },
11519 ))
11520 };
11521 return Ok(cast_expr);
11522 }
11523
11524 if let Some(java_fmt) = format_str {
11525 let c_fmt = java_fmt
11526 .replace("yyyy", "%Y")
11527 .replace("MM", "%m")
11528 .replace("dd", "%d")
11529 .replace("HH", "%H")
11530 .replace("mm", "%M")
11531 .replace("ss", "%S")
11532 .replace("SSSSSS", "%f")
11533 .replace("SSS", "%f")
11534 .replace("'T'", "T");
11535
11536 // For datetime target types: style is the INPUT format for parsing strings -> dates
11537 if is_datetime_type {
11538 match target {
11539 DialectType::DuckDB => {
11540 return Ok(Expression::Function(Box::new(
11541 Function::new(
11542 "STRPTIME".to_string(),
11543 vec![
11544 value_expr,
11545 Expression::string(&c_fmt),
11546 ],
11547 ),
11548 )));
11549 }
11550 DialectType::Spark
11551 | DialectType::Databricks => {
11552 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11553 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11554 let func_name =
11555 if matches!(dt, DataType::Date) {
11556 "TO_DATE"
11557 } else {
11558 "TO_TIMESTAMP"
11559 };
11560 return Ok(Expression::Function(Box::new(
11561 Function::new(
11562 func_name.to_string(),
11563 vec![
11564 value_expr,
11565 Expression::string(java_fmt),
11566 ],
11567 ),
11568 )));
11569 }
11570 DialectType::Hive => {
11571 return Ok(Expression::Function(Box::new(
11572 Function::new(
11573 "TO_TIMESTAMP".to_string(),
11574 vec![
11575 value_expr,
11576 Expression::string(java_fmt),
11577 ],
11578 ),
11579 )));
11580 }
11581 _ => {
11582 return Ok(Expression::Cast(Box::new(
11583 crate::expressions::Cast {
11584 this: value_expr,
11585 to: dt,
11586 trailing_comments: Vec::new(),
11587 double_colon_syntax: false,
11588 format: None,
11589 default: None,
11590 },
11591 )));
11592 }
11593 }
11594 }
11595
11596 // For string target types: style is the OUTPUT format for dates -> strings
11597 match target {
11598 DialectType::DuckDB => Ok(Expression::Function(
11599 Box::new(Function::new(
11600 "STRPTIME".to_string(),
11601 vec![
11602 value_expr,
11603 Expression::string(&c_fmt),
11604 ],
11605 )),
11606 )),
11607 DialectType::Spark | DialectType::Databricks => {
11608 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11609 // Determine the target string type
11610 let string_dt = match &dt {
11611 DataType::VarChar {
11612 length: Some(l),
11613 ..
11614 } => DataType::VarChar {
11615 length: Some(*l),
11616 parenthesized_length: false,
11617 },
11618 DataType::Text => DataType::Custom {
11619 name: "STRING".to_string(),
11620 },
11621 _ => DataType::Custom {
11622 name: "STRING".to_string(),
11623 },
11624 };
11625 let date_format_expr = Expression::Function(
11626 Box::new(Function::new(
11627 "DATE_FORMAT".to_string(),
11628 vec![
11629 value_expr,
11630 Expression::string(java_fmt),
11631 ],
11632 )),
11633 );
11634 let cast_expr = if is_try {
11635 Expression::TryCast(Box::new(
11636 crate::expressions::Cast {
11637 this: date_format_expr,
11638 to: string_dt,
11639 trailing_comments: Vec::new(),
11640 double_colon_syntax: false,
11641 format: None,
11642 default: None,
11643 },
11644 ))
11645 } else {
11646 Expression::Cast(Box::new(
11647 crate::expressions::Cast {
11648 this: date_format_expr,
11649 to: string_dt,
11650 trailing_comments: Vec::new(),
11651 double_colon_syntax: false,
11652 format: None,
11653 default: None,
11654 },
11655 ))
11656 };
11657 Ok(cast_expr)
11658 }
11659 DialectType::MySQL | DialectType::SingleStore => {
11660 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11661 let mysql_fmt = java_fmt
11662 .replace("yyyy", "%Y")
11663 .replace("MM", "%m")
11664 .replace("dd", "%d")
11665 .replace("HH:mm:ss.SSSSSS", "%T")
11666 .replace("HH:mm:ss", "%T")
11667 .replace("HH", "%H")
11668 .replace("mm", "%i")
11669 .replace("ss", "%S");
11670 let date_format_expr = Expression::Function(
11671 Box::new(Function::new(
11672 "DATE_FORMAT".to_string(),
11673 vec![
11674 value_expr,
11675 Expression::string(&mysql_fmt),
11676 ],
11677 )),
11678 );
11679 // MySQL uses CHAR for string casts
11680 let mysql_dt = match &dt {
11681 DataType::VarChar { length, .. } => {
11682 DataType::Char { length: *length }
11683 }
11684 _ => dt,
11685 };
11686 Ok(Expression::Cast(Box::new(
11687 crate::expressions::Cast {
11688 this: date_format_expr,
11689 to: mysql_dt,
11690 trailing_comments: Vec::new(),
11691 double_colon_syntax: false,
11692 format: None,
11693 default: None,
11694 },
11695 )))
11696 }
11697 DialectType::Hive => {
11698 let func_name = "TO_TIMESTAMP";
11699 Ok(Expression::Function(Box::new(
11700 Function::new(
11701 func_name.to_string(),
11702 vec![
11703 value_expr,
11704 Expression::string(java_fmt),
11705 ],
11706 ),
11707 )))
11708 }
11709 _ => Ok(Expression::Cast(Box::new(
11710 crate::expressions::Cast {
11711 this: value_expr,
11712 to: dt,
11713 trailing_comments: Vec::new(),
11714 double_colon_syntax: false,
11715 format: None,
11716 default: None,
11717 },
11718 ))),
11719 }
11720 } else {
11721 // Unknown style, just CAST
11722 let cast_expr = if is_try {
11723 Expression::TryCast(Box::new(
11724 crate::expressions::Cast {
11725 this: value_expr,
11726 to: dt,
11727 trailing_comments: Vec::new(),
11728 double_colon_syntax: false,
11729 format: None,
11730 default: None,
11731 },
11732 ))
11733 } else {
11734 Expression::Cast(Box::new(
11735 crate::expressions::Cast {
11736 this: value_expr,
11737 to: dt,
11738 trailing_comments: Vec::new(),
11739 double_colon_syntax: false,
11740 format: None,
11741 default: None,
11742 },
11743 ))
11744 };
11745 Ok(cast_expr)
11746 }
11747 } else {
11748 // No style - simple CAST
11749 let final_dt = if matches!(
11750 target,
11751 DialectType::MySQL | DialectType::SingleStore
11752 ) {
11753 match &dt {
11754 DataType::Int { .. }
11755 | DataType::BigInt { .. }
11756 | DataType::SmallInt { .. }
11757 | DataType::TinyInt { .. } => DataType::Custom {
11758 name: "SIGNED".to_string(),
11759 },
11760 DataType::VarChar { length, .. } => {
11761 DataType::Char { length: *length }
11762 }
11763 _ => dt,
11764 }
11765 } else {
11766 dt
11767 };
11768 let cast_expr = if is_try {
11769 Expression::TryCast(Box::new(
11770 crate::expressions::Cast {
11771 this: value_expr,
11772 to: final_dt,
11773 trailing_comments: Vec::new(),
11774 double_colon_syntax: false,
11775 format: None,
11776 default: None,
11777 },
11778 ))
11779 } else {
11780 Expression::Cast(Box::new(crate::expressions::Cast {
11781 this: value_expr,
11782 to: final_dt,
11783 trailing_comments: Vec::new(),
11784 double_colon_syntax: false,
11785 format: None,
11786 default: None,
11787 }))
11788 };
11789 Ok(cast_expr)
11790 }
11791 } else {
11792 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11793 Ok(Expression::Function(f))
11794 }
11795 }
11796 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11797 "STRFTIME" if f.args.len() == 2 => {
11798 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11799 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11800 // SQLite: args[0] = format, args[1] = value
11801 (f.args[1].clone(), &f.args[0])
11802 } else {
11803 // DuckDB and others: args[0] = value, args[1] = format
11804 (f.args[0].clone(), &f.args[1])
11805 };
11806
11807 // Helper to convert C-style format to Java-style
11808 fn c_to_java_format(fmt: &str) -> String {
11809 fmt.replace("%Y", "yyyy")
11810 .replace("%m", "MM")
11811 .replace("%d", "dd")
11812 .replace("%H", "HH")
11813 .replace("%M", "mm")
11814 .replace("%S", "ss")
11815 .replace("%f", "SSSSSS")
11816 .replace("%y", "yy")
11817 .replace("%-m", "M")
11818 .replace("%-d", "d")
11819 .replace("%-H", "H")
11820 .replace("%-I", "h")
11821 .replace("%I", "hh")
11822 .replace("%p", "a")
11823 .replace("%j", "DDD")
11824 .replace("%a", "EEE")
11825 .replace("%b", "MMM")
11826 .replace("%F", "yyyy-MM-dd")
11827 .replace("%T", "HH:mm:ss")
11828 }
11829
11830 // Helper: recursively convert format strings within expressions (handles CONCAT)
11831 fn convert_fmt_expr(
11832 expr: &Expression,
11833 converter: &dyn Fn(&str) -> String,
11834 ) -> Expression {
11835 match expr {
11836 Expression::Literal(
11837 crate::expressions::Literal::String(s),
11838 ) => Expression::string(&converter(s)),
11839 Expression::Function(func)
11840 if func.name.eq_ignore_ascii_case("CONCAT") =>
11841 {
11842 let new_args: Vec<Expression> = func
11843 .args
11844 .iter()
11845 .map(|a| convert_fmt_expr(a, converter))
11846 .collect();
11847 Expression::Function(Box::new(Function::new(
11848 "CONCAT".to_string(),
11849 new_args,
11850 )))
11851 }
11852 other => other.clone(),
11853 }
11854 }
11855
11856 match target {
11857 DialectType::DuckDB => {
11858 if matches!(source, DialectType::SQLite) {
11859 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
11860 let cast_val = Expression::Cast(Box::new(Cast {
11861 this: val,
11862 to: crate::expressions::DataType::Timestamp {
11863 precision: None,
11864 timezone: false,
11865 },
11866 trailing_comments: Vec::new(),
11867 double_colon_syntax: false,
11868 format: None,
11869 default: None,
11870 }));
11871 Ok(Expression::Function(Box::new(Function::new(
11872 "STRFTIME".to_string(),
11873 vec![cast_val, fmt_expr.clone()],
11874 ))))
11875 } else {
11876 Ok(Expression::Function(f))
11877 }
11878 }
11879 DialectType::Spark
11880 | DialectType::Databricks
11881 | DialectType::Hive => {
11882 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
11883 let converted_fmt =
11884 convert_fmt_expr(fmt_expr, &c_to_java_format);
11885 Ok(Expression::Function(Box::new(Function::new(
11886 "DATE_FORMAT".to_string(),
11887 vec![val, converted_fmt],
11888 ))))
11889 }
11890 DialectType::TSQL | DialectType::Fabric => {
11891 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
11892 let converted_fmt =
11893 convert_fmt_expr(fmt_expr, &c_to_java_format);
11894 Ok(Expression::Function(Box::new(Function::new(
11895 "FORMAT".to_string(),
11896 vec![val, converted_fmt],
11897 ))))
11898 }
11899 DialectType::Presto
11900 | DialectType::Trino
11901 | DialectType::Athena => {
11902 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
11903 if let Expression::Literal(
11904 crate::expressions::Literal::String(s),
11905 ) = fmt_expr
11906 {
11907 let presto_fmt = duckdb_to_presto_format(s);
11908 Ok(Expression::Function(Box::new(Function::new(
11909 "DATE_FORMAT".to_string(),
11910 vec![val, Expression::string(&presto_fmt)],
11911 ))))
11912 } else {
11913 Ok(Expression::Function(Box::new(Function::new(
11914 "DATE_FORMAT".to_string(),
11915 vec![val, fmt_expr.clone()],
11916 ))))
11917 }
11918 }
11919 DialectType::BigQuery => {
11920 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
11921 if let Expression::Literal(
11922 crate::expressions::Literal::String(s),
11923 ) = fmt_expr
11924 {
11925 let bq_fmt = duckdb_to_bigquery_format(s);
11926 Ok(Expression::Function(Box::new(Function::new(
11927 "FORMAT_DATE".to_string(),
11928 vec![Expression::string(&bq_fmt), val],
11929 ))))
11930 } else {
11931 Ok(Expression::Function(Box::new(Function::new(
11932 "FORMAT_DATE".to_string(),
11933 vec![fmt_expr.clone(), val],
11934 ))))
11935 }
11936 }
11937 DialectType::PostgreSQL | DialectType::Redshift => {
11938 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
11939 if let Expression::Literal(
11940 crate::expressions::Literal::String(s),
11941 ) = fmt_expr
11942 {
11943 let pg_fmt = s
11944 .replace("%Y", "YYYY")
11945 .replace("%m", "MM")
11946 .replace("%d", "DD")
11947 .replace("%H", "HH24")
11948 .replace("%M", "MI")
11949 .replace("%S", "SS")
11950 .replace("%y", "YY")
11951 .replace("%-m", "FMMM")
11952 .replace("%-d", "FMDD")
11953 .replace("%-H", "FMHH24")
11954 .replace("%-I", "FMHH12")
11955 .replace("%p", "AM")
11956 .replace("%F", "YYYY-MM-DD")
11957 .replace("%T", "HH24:MI:SS");
11958 Ok(Expression::Function(Box::new(Function::new(
11959 "TO_CHAR".to_string(),
11960 vec![val, Expression::string(&pg_fmt)],
11961 ))))
11962 } else {
11963 Ok(Expression::Function(Box::new(Function::new(
11964 "TO_CHAR".to_string(),
11965 vec![val, fmt_expr.clone()],
11966 ))))
11967 }
11968 }
11969 _ => Ok(Expression::Function(f)),
11970 }
11971 }
11972 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
11973 "STRPTIME" if f.args.len() == 2 => {
11974 let val = f.args[0].clone();
11975 let fmt_expr = &f.args[1];
11976
11977 fn c_to_java_format_parse(fmt: &str) -> String {
11978 fmt.replace("%Y", "yyyy")
11979 .replace("%m", "MM")
11980 .replace("%d", "dd")
11981 .replace("%H", "HH")
11982 .replace("%M", "mm")
11983 .replace("%S", "ss")
11984 .replace("%f", "SSSSSS")
11985 .replace("%y", "yy")
11986 .replace("%-m", "M")
11987 .replace("%-d", "d")
11988 .replace("%-H", "H")
11989 .replace("%-I", "h")
11990 .replace("%I", "hh")
11991 .replace("%p", "a")
11992 .replace("%F", "yyyy-MM-dd")
11993 .replace("%T", "HH:mm:ss")
11994 }
11995
11996 match target {
11997 DialectType::DuckDB => Ok(Expression::Function(f)),
11998 DialectType::Spark | DialectType::Databricks => {
11999 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
12000 if let Expression::Literal(
12001 crate::expressions::Literal::String(s),
12002 ) = fmt_expr
12003 {
12004 let java_fmt = c_to_java_format_parse(s);
12005 Ok(Expression::Function(Box::new(Function::new(
12006 "TO_TIMESTAMP".to_string(),
12007 vec![val, Expression::string(&java_fmt)],
12008 ))))
12009 } else {
12010 Ok(Expression::Function(Box::new(Function::new(
12011 "TO_TIMESTAMP".to_string(),
12012 vec![val, fmt_expr.clone()],
12013 ))))
12014 }
12015 }
12016 DialectType::Hive => {
12017 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
12018 if let Expression::Literal(
12019 crate::expressions::Literal::String(s),
12020 ) = fmt_expr
12021 {
12022 let java_fmt = c_to_java_format_parse(s);
12023 let unix_ts =
12024 Expression::Function(Box::new(Function::new(
12025 "UNIX_TIMESTAMP".to_string(),
12026 vec![val, Expression::string(&java_fmt)],
12027 )));
12028 let from_unix =
12029 Expression::Function(Box::new(Function::new(
12030 "FROM_UNIXTIME".to_string(),
12031 vec![unix_ts],
12032 )));
12033 Ok(Expression::Cast(Box::new(
12034 crate::expressions::Cast {
12035 this: from_unix,
12036 to: DataType::Timestamp {
12037 timezone: false,
12038 precision: None,
12039 },
12040 trailing_comments: Vec::new(),
12041 double_colon_syntax: false,
12042 format: None,
12043 default: None,
12044 },
12045 )))
12046 } else {
12047 Ok(Expression::Function(f))
12048 }
12049 }
12050 DialectType::Presto
12051 | DialectType::Trino
12052 | DialectType::Athena => {
12053 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
12054 if let Expression::Literal(
12055 crate::expressions::Literal::String(s),
12056 ) = fmt_expr
12057 {
12058 let presto_fmt = duckdb_to_presto_format(s);
12059 Ok(Expression::Function(Box::new(Function::new(
12060 "DATE_PARSE".to_string(),
12061 vec![val, Expression::string(&presto_fmt)],
12062 ))))
12063 } else {
12064 Ok(Expression::Function(Box::new(Function::new(
12065 "DATE_PARSE".to_string(),
12066 vec![val, fmt_expr.clone()],
12067 ))))
12068 }
12069 }
12070 DialectType::BigQuery => {
12071 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12072 if let Expression::Literal(
12073 crate::expressions::Literal::String(s),
12074 ) = fmt_expr
12075 {
12076 let bq_fmt = duckdb_to_bigquery_format(s);
12077 Ok(Expression::Function(Box::new(Function::new(
12078 "PARSE_TIMESTAMP".to_string(),
12079 vec![Expression::string(&bq_fmt), val],
12080 ))))
12081 } else {
12082 Ok(Expression::Function(Box::new(Function::new(
12083 "PARSE_TIMESTAMP".to_string(),
12084 vec![fmt_expr.clone(), val],
12085 ))))
12086 }
12087 }
12088 _ => Ok(Expression::Function(f)),
12089 }
12090 }
12091 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12092 "DATE_FORMAT"
12093 if f.args.len() >= 2
12094 && matches!(
12095 source,
12096 DialectType::Presto
12097 | DialectType::Trino
12098 | DialectType::Athena
12099 ) =>
12100 {
12101 let val = f.args[0].clone();
12102 let fmt_expr = &f.args[1];
12103
12104 match target {
12105 DialectType::Presto
12106 | DialectType::Trino
12107 | DialectType::Athena => {
12108 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12109 if let Expression::Literal(
12110 crate::expressions::Literal::String(s),
12111 ) = fmt_expr
12112 {
12113 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12114 Ok(Expression::Function(Box::new(Function::new(
12115 "DATE_FORMAT".to_string(),
12116 vec![val, Expression::string(&normalized)],
12117 ))))
12118 } else {
12119 Ok(Expression::Function(f))
12120 }
12121 }
12122 DialectType::Hive
12123 | DialectType::Spark
12124 | DialectType::Databricks => {
12125 // Convert Presto C-style to Java-style format
12126 if let Expression::Literal(
12127 crate::expressions::Literal::String(s),
12128 ) = fmt_expr
12129 {
12130 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12131 Ok(Expression::Function(Box::new(Function::new(
12132 "DATE_FORMAT".to_string(),
12133 vec![val, Expression::string(&java_fmt)],
12134 ))))
12135 } else {
12136 Ok(Expression::Function(f))
12137 }
12138 }
12139 DialectType::DuckDB => {
12140 // Convert to STRFTIME(val, duckdb_fmt)
12141 if let Expression::Literal(
12142 crate::expressions::Literal::String(s),
12143 ) = fmt_expr
12144 {
12145 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12146 Ok(Expression::Function(Box::new(Function::new(
12147 "STRFTIME".to_string(),
12148 vec![val, Expression::string(&duckdb_fmt)],
12149 ))))
12150 } else {
12151 Ok(Expression::Function(Box::new(Function::new(
12152 "STRFTIME".to_string(),
12153 vec![val, fmt_expr.clone()],
12154 ))))
12155 }
12156 }
12157 DialectType::BigQuery => {
12158 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12159 if let Expression::Literal(
12160 crate::expressions::Literal::String(s),
12161 ) = fmt_expr
12162 {
12163 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12164 Ok(Expression::Function(Box::new(Function::new(
12165 "FORMAT_DATE".to_string(),
12166 vec![Expression::string(&bq_fmt), val],
12167 ))))
12168 } else {
12169 Ok(Expression::Function(Box::new(Function::new(
12170 "FORMAT_DATE".to_string(),
12171 vec![fmt_expr.clone(), val],
12172 ))))
12173 }
12174 }
12175 _ => Ok(Expression::Function(f)),
12176 }
12177 }
12178 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12179 "DATE_PARSE"
12180 if f.args.len() >= 2
12181 && matches!(
12182 source,
12183 DialectType::Presto
12184 | DialectType::Trino
12185 | DialectType::Athena
12186 ) =>
12187 {
12188 let val = f.args[0].clone();
12189 let fmt_expr = &f.args[1];
12190
12191 match target {
12192 DialectType::Presto
12193 | DialectType::Trino
12194 | DialectType::Athena => {
12195 // Presto -> Presto: normalize format
12196 if let Expression::Literal(
12197 crate::expressions::Literal::String(s),
12198 ) = fmt_expr
12199 {
12200 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12201 Ok(Expression::Function(Box::new(Function::new(
12202 "DATE_PARSE".to_string(),
12203 vec![val, Expression::string(&normalized)],
12204 ))))
12205 } else {
12206 Ok(Expression::Function(f))
12207 }
12208 }
12209 DialectType::Hive => {
12210 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12211 if let Expression::Literal(
12212 crate::expressions::Literal::String(s),
12213 ) = fmt_expr
12214 {
12215 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12216 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12217 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12218 this: val,
12219 to: DataType::Timestamp { timezone: false, precision: None },
12220 trailing_comments: Vec::new(),
12221 double_colon_syntax: false,
12222 format: None,
12223 default: None,
12224 })))
12225 } else {
12226 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12227 Ok(Expression::Function(Box::new(Function::new(
12228 "TO_TIMESTAMP".to_string(),
12229 vec![val, Expression::string(&java_fmt)],
12230 ))))
12231 }
12232 } else {
12233 Ok(Expression::Function(f))
12234 }
12235 }
12236 DialectType::Spark | DialectType::Databricks => {
12237 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12238 if let Expression::Literal(
12239 crate::expressions::Literal::String(s),
12240 ) = fmt_expr
12241 {
12242 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12243 Ok(Expression::Function(Box::new(Function::new(
12244 "TO_TIMESTAMP".to_string(),
12245 vec![val, Expression::string(&java_fmt)],
12246 ))))
12247 } else {
12248 Ok(Expression::Function(f))
12249 }
12250 }
12251 DialectType::DuckDB => {
12252 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12253 if let Expression::Literal(
12254 crate::expressions::Literal::String(s),
12255 ) = fmt_expr
12256 {
12257 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12258 Ok(Expression::Function(Box::new(Function::new(
12259 "STRPTIME".to_string(),
12260 vec![val, Expression::string(&duckdb_fmt)],
12261 ))))
12262 } else {
12263 Ok(Expression::Function(Box::new(Function::new(
12264 "STRPTIME".to_string(),
12265 vec![val, fmt_expr.clone()],
12266 ))))
12267 }
12268 }
12269 _ => Ok(Expression::Function(f)),
12270 }
12271 }
12272 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12273 "FROM_BASE64"
12274 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12275 {
12276 Ok(Expression::Function(Box::new(Function::new(
12277 "UNBASE64".to_string(),
12278 f.args,
12279 ))))
12280 }
12281 "TO_BASE64"
12282 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12283 {
12284 Ok(Expression::Function(Box::new(Function::new(
12285 "BASE64".to_string(),
12286 f.args,
12287 ))))
12288 }
12289 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12290 "FROM_UNIXTIME"
12291 if f.args.len() == 1
12292 && matches!(
12293 source,
12294 DialectType::Presto
12295 | DialectType::Trino
12296 | DialectType::Athena
12297 )
12298 && matches!(
12299 target,
12300 DialectType::Spark | DialectType::Databricks
12301 ) =>
12302 {
12303 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12304 let from_unix = Expression::Function(Box::new(Function::new(
12305 "FROM_UNIXTIME".to_string(),
12306 f.args,
12307 )));
12308 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12309 this: from_unix,
12310 to: DataType::Timestamp {
12311 timezone: false,
12312 precision: None,
12313 },
12314 trailing_comments: Vec::new(),
12315 double_colon_syntax: false,
12316 format: None,
12317 default: None,
12318 })))
12319 }
12320 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12321 "DATE_FORMAT"
12322 if f.args.len() >= 2
12323 && !matches!(
12324 target,
12325 DialectType::Hive
12326 | DialectType::Spark
12327 | DialectType::Databricks
12328 | DialectType::MySQL
12329 | DialectType::SingleStore
12330 ) =>
12331 {
12332 let val = f.args[0].clone();
12333 let fmt_expr = &f.args[1];
12334 let is_hive_source = matches!(
12335 source,
12336 DialectType::Hive
12337 | DialectType::Spark
12338 | DialectType::Databricks
12339 );
12340
12341 fn java_to_c_format(fmt: &str) -> String {
12342 // Replace Java patterns with C strftime patterns.
12343 // Uses multi-pass to handle patterns that conflict.
12344 // First pass: replace multi-char patterns (longer first)
12345 let result = fmt
12346 .replace("yyyy", "%Y")
12347 .replace("SSSSSS", "%f")
12348 .replace("EEEE", "%W")
12349 .replace("MM", "%m")
12350 .replace("dd", "%d")
12351 .replace("HH", "%H")
12352 .replace("mm", "%M")
12353 .replace("ss", "%S")
12354 .replace("yy", "%y");
12355 // Second pass: handle single-char timezone patterns
12356 // z -> %Z (timezone name), Z -> %z (timezone offset)
12357 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12358 let mut out = String::new();
12359 let chars: Vec<char> = result.chars().collect();
12360 let mut i = 0;
12361 while i < chars.len() {
12362 if chars[i] == '%' && i + 1 < chars.len() {
12363 // Already a format specifier, skip both chars
12364 out.push(chars[i]);
12365 out.push(chars[i + 1]);
12366 i += 2;
12367 } else if chars[i] == 'z' {
12368 out.push_str("%Z");
12369 i += 1;
12370 } else if chars[i] == 'Z' {
12371 out.push_str("%z");
12372 i += 1;
12373 } else {
12374 out.push(chars[i]);
12375 i += 1;
12376 }
12377 }
12378 out
12379 }
12380
12381 fn java_to_presto_format(fmt: &str) -> String {
12382 // Presto uses %T for HH:MM:SS
12383 let c_fmt = java_to_c_format(fmt);
12384 c_fmt.replace("%H:%M:%S", "%T")
12385 }
12386
12387 fn java_to_bq_format(fmt: &str) -> String {
12388 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12389 let c_fmt = java_to_c_format(fmt);
12390 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12391 }
12392
12393 // For Hive source, CAST string literals to appropriate type
12394 let cast_val = if is_hive_source {
12395 match &val {
12396 Expression::Literal(
12397 crate::expressions::Literal::String(_),
12398 ) => {
12399 match target {
12400 DialectType::DuckDB
12401 | DialectType::Presto
12402 | DialectType::Trino
12403 | DialectType::Athena => {
12404 Self::ensure_cast_timestamp(val.clone())
12405 }
12406 DialectType::BigQuery => {
12407 // BigQuery: CAST(val AS DATETIME)
12408 Expression::Cast(Box::new(
12409 crate::expressions::Cast {
12410 this: val.clone(),
12411 to: DataType::Custom {
12412 name: "DATETIME".to_string(),
12413 },
12414 trailing_comments: vec![],
12415 double_colon_syntax: false,
12416 format: None,
12417 default: None,
12418 },
12419 ))
12420 }
12421 _ => val.clone(),
12422 }
12423 }
12424 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12425 Expression::Cast(c)
12426 if matches!(c.to, DataType::Date)
12427 && matches!(
12428 target,
12429 DialectType::Presto
12430 | DialectType::Trino
12431 | DialectType::Athena
12432 ) =>
12433 {
12434 Expression::Cast(Box::new(crate::expressions::Cast {
12435 this: val.clone(),
12436 to: DataType::Timestamp {
12437 timezone: false,
12438 precision: None,
12439 },
12440 trailing_comments: vec![],
12441 double_colon_syntax: false,
12442 format: None,
12443 default: None,
12444 }))
12445 }
12446 Expression::Literal(crate::expressions::Literal::Date(
12447 _,
12448 )) if matches!(
12449 target,
12450 DialectType::Presto
12451 | DialectType::Trino
12452 | DialectType::Athena
12453 ) =>
12454 {
12455 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12456 let cast_date = Self::date_literal_to_cast(val.clone());
12457 Expression::Cast(Box::new(crate::expressions::Cast {
12458 this: cast_date,
12459 to: DataType::Timestamp {
12460 timezone: false,
12461 precision: None,
12462 },
12463 trailing_comments: vec![],
12464 double_colon_syntax: false,
12465 format: None,
12466 default: None,
12467 }))
12468 }
12469 _ => val.clone(),
12470 }
12471 } else {
12472 val.clone()
12473 };
12474
12475 match target {
12476 DialectType::DuckDB => {
12477 if let Expression::Literal(
12478 crate::expressions::Literal::String(s),
12479 ) = fmt_expr
12480 {
12481 let c_fmt = if is_hive_source {
12482 java_to_c_format(s)
12483 } else {
12484 s.clone()
12485 };
12486 Ok(Expression::Function(Box::new(Function::new(
12487 "STRFTIME".to_string(),
12488 vec![cast_val, Expression::string(&c_fmt)],
12489 ))))
12490 } else {
12491 Ok(Expression::Function(Box::new(Function::new(
12492 "STRFTIME".to_string(),
12493 vec![cast_val, fmt_expr.clone()],
12494 ))))
12495 }
12496 }
12497 DialectType::Presto
12498 | DialectType::Trino
12499 | DialectType::Athena => {
12500 if is_hive_source {
12501 if let Expression::Literal(
12502 crate::expressions::Literal::String(s),
12503 ) = fmt_expr
12504 {
12505 let p_fmt = java_to_presto_format(s);
12506 Ok(Expression::Function(Box::new(Function::new(
12507 "DATE_FORMAT".to_string(),
12508 vec![cast_val, Expression::string(&p_fmt)],
12509 ))))
12510 } else {
12511 Ok(Expression::Function(Box::new(Function::new(
12512 "DATE_FORMAT".to_string(),
12513 vec![cast_val, fmt_expr.clone()],
12514 ))))
12515 }
12516 } else {
12517 Ok(Expression::Function(Box::new(Function::new(
12518 "DATE_FORMAT".to_string(),
12519 f.args,
12520 ))))
12521 }
12522 }
12523 DialectType::BigQuery => {
12524 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12525 if let Expression::Literal(
12526 crate::expressions::Literal::String(s),
12527 ) = fmt_expr
12528 {
12529 let bq_fmt = if is_hive_source {
12530 java_to_bq_format(s)
12531 } else {
12532 java_to_c_format(s)
12533 };
12534 Ok(Expression::Function(Box::new(Function::new(
12535 "FORMAT_DATE".to_string(),
12536 vec![Expression::string(&bq_fmt), cast_val],
12537 ))))
12538 } else {
12539 Ok(Expression::Function(Box::new(Function::new(
12540 "FORMAT_DATE".to_string(),
12541 vec![fmt_expr.clone(), cast_val],
12542 ))))
12543 }
12544 }
12545 DialectType::PostgreSQL | DialectType::Redshift => {
12546 if let Expression::Literal(
12547 crate::expressions::Literal::String(s),
12548 ) = fmt_expr
12549 {
12550 let pg_fmt = s
12551 .replace("yyyy", "YYYY")
12552 .replace("MM", "MM")
12553 .replace("dd", "DD")
12554 .replace("HH", "HH24")
12555 .replace("mm", "MI")
12556 .replace("ss", "SS")
12557 .replace("yy", "YY");
12558 Ok(Expression::Function(Box::new(Function::new(
12559 "TO_CHAR".to_string(),
12560 vec![val, Expression::string(&pg_fmt)],
12561 ))))
12562 } else {
12563 Ok(Expression::Function(Box::new(Function::new(
12564 "TO_CHAR".to_string(),
12565 vec![val, fmt_expr.clone()],
12566 ))))
12567 }
12568 }
12569 _ => Ok(Expression::Function(f)),
12570 }
12571 }
12572 // DATEDIFF(unit, start, end) - 3-arg form
12573 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12574 "DATEDIFF" if f.args.len() == 3 => {
12575 let mut args = f.args;
12576 // SQLite source: args = (date1, date2, unit_string)
12577 // Standard source: args = (unit, start, end)
12578 let (_arg0, arg1, arg2, unit_str) =
12579 if matches!(source, DialectType::SQLite) {
12580 let date1 = args.remove(0);
12581 let date2 = args.remove(0);
12582 let unit_expr = args.remove(0);
12583 let unit_s = Self::get_unit_str_static(&unit_expr);
12584
12585 // For SQLite target, generate JULIANDAY arithmetic directly
12586 if matches!(target, DialectType::SQLite) {
12587 let jd_first = Expression::Function(Box::new(
12588 Function::new("JULIANDAY".to_string(), vec![date1]),
12589 ));
12590 let jd_second = Expression::Function(Box::new(
12591 Function::new("JULIANDAY".to_string(), vec![date2]),
12592 ));
12593 let diff = Expression::Sub(Box::new(
12594 crate::expressions::BinaryOp::new(
12595 jd_first, jd_second,
12596 ),
12597 ));
12598 let paren_diff = Expression::Paren(Box::new(
12599 crate::expressions::Paren {
12600 this: diff,
12601 trailing_comments: Vec::new(),
12602 },
12603 ));
12604 let adjusted = match unit_s.as_str() {
12605 "HOUR" => Expression::Mul(Box::new(
12606 crate::expressions::BinaryOp::new(
12607 paren_diff,
12608 Expression::Literal(Literal::Number(
12609 "24.0".to_string(),
12610 )),
12611 ),
12612 )),
12613 "MINUTE" => Expression::Mul(Box::new(
12614 crate::expressions::BinaryOp::new(
12615 paren_diff,
12616 Expression::Literal(Literal::Number(
12617 "1440.0".to_string(),
12618 )),
12619 ),
12620 )),
12621 "SECOND" => Expression::Mul(Box::new(
12622 crate::expressions::BinaryOp::new(
12623 paren_diff,
12624 Expression::Literal(Literal::Number(
12625 "86400.0".to_string(),
12626 )),
12627 ),
12628 )),
12629 "MONTH" => Expression::Div(Box::new(
12630 crate::expressions::BinaryOp::new(
12631 paren_diff,
12632 Expression::Literal(Literal::Number(
12633 "30.0".to_string(),
12634 )),
12635 ),
12636 )),
12637 "YEAR" => Expression::Div(Box::new(
12638 crate::expressions::BinaryOp::new(
12639 paren_diff,
12640 Expression::Literal(Literal::Number(
12641 "365.0".to_string(),
12642 )),
12643 ),
12644 )),
12645 _ => paren_diff,
12646 };
12647 return Ok(Expression::Cast(Box::new(Cast {
12648 this: adjusted,
12649 to: DataType::Int {
12650 length: None,
12651 integer_spelling: true,
12652 },
12653 trailing_comments: vec![],
12654 double_colon_syntax: false,
12655 format: None,
12656 default: None,
12657 })));
12658 }
12659
12660 // For other targets, remap to standard (unit, start, end) form
12661 let unit_ident =
12662 Expression::Identifier(Identifier::new(&unit_s));
12663 (unit_ident, date1, date2, unit_s)
12664 } else {
12665 let arg0 = args.remove(0);
12666 let arg1 = args.remove(0);
12667 let arg2 = args.remove(0);
12668 let unit_s = Self::get_unit_str_static(&arg0);
12669 (arg0, arg1, arg2, unit_s)
12670 };
12671
12672 // For Hive/Spark source, string literal dates need to be cast
12673 // Note: Databricks is excluded - it handles string args like standard SQL
12674 let is_hive_spark =
12675 matches!(source, DialectType::Hive | DialectType::Spark);
12676
12677 match target {
12678 DialectType::Snowflake => {
12679 let unit =
12680 Expression::Identifier(Identifier::new(&unit_str));
12681 // Use ensure_to_date_preserved to add TO_DATE with a marker
12682 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12683 let d1 = if is_hive_spark {
12684 Self::ensure_to_date_preserved(arg1)
12685 } else {
12686 arg1
12687 };
12688 let d2 = if is_hive_spark {
12689 Self::ensure_to_date_preserved(arg2)
12690 } else {
12691 arg2
12692 };
12693 Ok(Expression::Function(Box::new(Function::new(
12694 "DATEDIFF".to_string(),
12695 vec![unit, d1, d2],
12696 ))))
12697 }
12698 DialectType::Redshift => {
12699 let unit =
12700 Expression::Identifier(Identifier::new(&unit_str));
12701 let d1 = if is_hive_spark {
12702 Self::ensure_cast_date(arg1)
12703 } else {
12704 arg1
12705 };
12706 let d2 = if is_hive_spark {
12707 Self::ensure_cast_date(arg2)
12708 } else {
12709 arg2
12710 };
12711 Ok(Expression::Function(Box::new(Function::new(
12712 "DATEDIFF".to_string(),
12713 vec![unit, d1, d2],
12714 ))))
12715 }
12716 DialectType::TSQL => {
12717 let unit =
12718 Expression::Identifier(Identifier::new(&unit_str));
12719 Ok(Expression::Function(Box::new(Function::new(
12720 "DATEDIFF".to_string(),
12721 vec![unit, arg1, arg2],
12722 ))))
12723 }
12724 DialectType::DuckDB => {
12725 let is_redshift_tsql = matches!(
12726 source,
12727 DialectType::Redshift | DialectType::TSQL
12728 );
12729 if is_hive_spark {
12730 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12731 let d1 = Self::ensure_cast_date(arg1);
12732 let d2 = Self::ensure_cast_date(arg2);
12733 Ok(Expression::Function(Box::new(Function::new(
12734 "DATE_DIFF".to_string(),
12735 vec![Expression::string(&unit_str), d1, d2],
12736 ))))
12737 } else if matches!(source, DialectType::Snowflake) {
12738 // For Snowflake source: special handling per unit
12739 match unit_str.as_str() {
12740 "NANOSECOND" => {
12741 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12742 fn cast_to_timestamp_ns(
12743 expr: Expression,
12744 ) -> Expression
12745 {
12746 Expression::Cast(Box::new(Cast {
12747 this: expr,
12748 to: DataType::Custom {
12749 name: "TIMESTAMP_NS".to_string(),
12750 },
12751 trailing_comments: vec![],
12752 double_colon_syntax: false,
12753 format: None,
12754 default: None,
12755 }))
12756 }
12757 let epoch_end = Expression::Function(Box::new(
12758 Function::new(
12759 "EPOCH_NS".to_string(),
12760 vec![cast_to_timestamp_ns(arg2)],
12761 ),
12762 ));
12763 let epoch_start = Expression::Function(
12764 Box::new(Function::new(
12765 "EPOCH_NS".to_string(),
12766 vec![cast_to_timestamp_ns(arg1)],
12767 )),
12768 );
12769 Ok(Expression::Sub(Box::new(BinaryOp::new(
12770 epoch_end,
12771 epoch_start,
12772 ))))
12773 }
12774 "WEEK" => {
12775 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12776 let d1 = Self::force_cast_date(arg1);
12777 let d2 = Self::force_cast_date(arg2);
12778 let dt1 = Expression::Function(Box::new(
12779 Function::new(
12780 "DATE_TRUNC".to_string(),
12781 vec![Expression::string("WEEK"), d1],
12782 ),
12783 ));
12784 let dt2 = Expression::Function(Box::new(
12785 Function::new(
12786 "DATE_TRUNC".to_string(),
12787 vec![Expression::string("WEEK"), d2],
12788 ),
12789 ));
12790 Ok(Expression::Function(Box::new(
12791 Function::new(
12792 "DATE_DIFF".to_string(),
12793 vec![
12794 Expression::string(&unit_str),
12795 dt1,
12796 dt2,
12797 ],
12798 ),
12799 )))
12800 }
12801 _ => {
12802 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12803 let d1 = Self::force_cast_date(arg1);
12804 let d2 = Self::force_cast_date(arg2);
12805 Ok(Expression::Function(Box::new(
12806 Function::new(
12807 "DATE_DIFF".to_string(),
12808 vec![
12809 Expression::string(&unit_str),
12810 d1,
12811 d2,
12812 ],
12813 ),
12814 )))
12815 }
12816 }
12817 } else if is_redshift_tsql {
12818 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
12819 let d1 = Self::force_cast_timestamp(arg1);
12820 let d2 = Self::force_cast_timestamp(arg2);
12821 Ok(Expression::Function(Box::new(Function::new(
12822 "DATE_DIFF".to_string(),
12823 vec![Expression::string(&unit_str), d1, d2],
12824 ))))
12825 } else {
12826 // Keep as DATEDIFF so DuckDB's transform_datediff handles
12827 // DATE_TRUNC for WEEK, CAST for string literals, etc.
12828 let unit =
12829 Expression::Identifier(Identifier::new(&unit_str));
12830 Ok(Expression::Function(Box::new(Function::new(
12831 "DATEDIFF".to_string(),
12832 vec![unit, arg1, arg2],
12833 ))))
12834 }
12835 }
12836 DialectType::BigQuery => {
12837 let is_redshift_tsql = matches!(
12838 source,
12839 DialectType::Redshift
12840 | DialectType::TSQL
12841 | DialectType::Snowflake
12842 );
12843 let cast_d1 = if is_hive_spark {
12844 Self::ensure_cast_date(arg1)
12845 } else if is_redshift_tsql {
12846 Self::force_cast_datetime(arg1)
12847 } else {
12848 Self::ensure_cast_datetime(arg1)
12849 };
12850 let cast_d2 = if is_hive_spark {
12851 Self::ensure_cast_date(arg2)
12852 } else if is_redshift_tsql {
12853 Self::force_cast_datetime(arg2)
12854 } else {
12855 Self::ensure_cast_datetime(arg2)
12856 };
12857 let unit =
12858 Expression::Identifier(Identifier::new(&unit_str));
12859 Ok(Expression::Function(Box::new(Function::new(
12860 "DATE_DIFF".to_string(),
12861 vec![cast_d2, cast_d1, unit],
12862 ))))
12863 }
12864 DialectType::Presto
12865 | DialectType::Trino
12866 | DialectType::Athena => {
12867 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
12868 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
12869 let is_redshift_tsql = matches!(
12870 source,
12871 DialectType::Redshift
12872 | DialectType::TSQL
12873 | DialectType::Snowflake
12874 );
12875 let d1 = if is_hive_spark {
12876 Self::double_cast_timestamp_date(arg1)
12877 } else if is_redshift_tsql {
12878 Self::force_cast_timestamp(arg1)
12879 } else {
12880 arg1
12881 };
12882 let d2 = if is_hive_spark {
12883 Self::double_cast_timestamp_date(arg2)
12884 } else if is_redshift_tsql {
12885 Self::force_cast_timestamp(arg2)
12886 } else {
12887 arg2
12888 };
12889 Ok(Expression::Function(Box::new(Function::new(
12890 "DATE_DIFF".to_string(),
12891 vec![Expression::string(&unit_str), d1, d2],
12892 ))))
12893 }
12894 DialectType::Hive => match unit_str.as_str() {
12895 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
12896 this: Expression::Function(Box::new(Function::new(
12897 "MONTHS_BETWEEN".to_string(),
12898 vec![arg2, arg1],
12899 ))),
12900 to: DataType::Int {
12901 length: None,
12902 integer_spelling: false,
12903 },
12904 trailing_comments: vec![],
12905 double_colon_syntax: false,
12906 format: None,
12907 default: None,
12908 }))),
12909 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
12910 this: Expression::Div(Box::new(
12911 crate::expressions::BinaryOp::new(
12912 Expression::Function(Box::new(Function::new(
12913 "DATEDIFF".to_string(),
12914 vec![arg2, arg1],
12915 ))),
12916 Expression::number(7),
12917 ),
12918 )),
12919 to: DataType::Int {
12920 length: None,
12921 integer_spelling: false,
12922 },
12923 trailing_comments: vec![],
12924 double_colon_syntax: false,
12925 format: None,
12926 default: None,
12927 }))),
12928 _ => Ok(Expression::Function(Box::new(Function::new(
12929 "DATEDIFF".to_string(),
12930 vec![arg2, arg1],
12931 )))),
12932 },
12933 DialectType::Spark | DialectType::Databricks => {
12934 let unit =
12935 Expression::Identifier(Identifier::new(&unit_str));
12936 Ok(Expression::Function(Box::new(Function::new(
12937 "DATEDIFF".to_string(),
12938 vec![unit, arg1, arg2],
12939 ))))
12940 }
12941 _ => {
12942 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
12943 let d1 = if is_hive_spark {
12944 Self::ensure_cast_date(arg1)
12945 } else {
12946 arg1
12947 };
12948 let d2 = if is_hive_spark {
12949 Self::ensure_cast_date(arg2)
12950 } else {
12951 arg2
12952 };
12953 let unit =
12954 Expression::Identifier(Identifier::new(&unit_str));
12955 Ok(Expression::Function(Box::new(Function::new(
12956 "DATEDIFF".to_string(),
12957 vec![unit, d1, d2],
12958 ))))
12959 }
12960 }
12961 }
12962 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
12963 "DATEDIFF" if f.args.len() == 2 => {
12964 let mut args = f.args;
12965 let arg0 = args.remove(0);
12966 let arg1 = args.remove(0);
12967
12968 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
12969 // Also recognizes TryCast/Cast to DATE that may have been produced by
12970 // cross-dialect TO_DATE -> TRY_CAST conversion
12971 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
12972 if let Expression::Function(ref f) = e {
12973 if f.name.eq_ignore_ascii_case("TO_DATE")
12974 && f.args.len() == 1
12975 {
12976 return (f.args[0].clone(), true);
12977 }
12978 }
12979 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
12980 if let Expression::TryCast(ref c) = e {
12981 if matches!(c.to, DataType::Date) {
12982 return (e, true); // Already properly cast, return as-is
12983 }
12984 }
12985 (e, false)
12986 };
12987
12988 match target {
12989 DialectType::DuckDB => {
12990 // For Hive source, always CAST to DATE
12991 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
12992 let cast_d0 = if matches!(
12993 source,
12994 DialectType::Hive
12995 | DialectType::Spark
12996 | DialectType::Databricks
12997 ) {
12998 let (inner, was_to_date) = unwrap_to_date(arg1);
12999 if was_to_date {
13000 // Already a date expression, use directly
13001 if matches!(&inner, Expression::TryCast(_)) {
13002 inner // Already TRY_CAST(x AS DATE)
13003 } else {
13004 Self::try_cast_date(inner)
13005 }
13006 } else {
13007 Self::force_cast_date(inner)
13008 }
13009 } else {
13010 Self::ensure_cast_date(arg1)
13011 };
13012 let cast_d1 = if matches!(
13013 source,
13014 DialectType::Hive
13015 | DialectType::Spark
13016 | DialectType::Databricks
13017 ) {
13018 let (inner, was_to_date) = unwrap_to_date(arg0);
13019 if was_to_date {
13020 if matches!(&inner, Expression::TryCast(_)) {
13021 inner
13022 } else {
13023 Self::try_cast_date(inner)
13024 }
13025 } else {
13026 Self::force_cast_date(inner)
13027 }
13028 } else {
13029 Self::ensure_cast_date(arg0)
13030 };
13031 Ok(Expression::Function(Box::new(Function::new(
13032 "DATE_DIFF".to_string(),
13033 vec![Expression::string("DAY"), cast_d0, cast_d1],
13034 ))))
13035 }
13036 DialectType::Presto
13037 | DialectType::Trino
13038 | DialectType::Athena => {
13039 // For Hive/Spark source, apply double_cast_timestamp_date
13040 // For other sources (MySQL etc.), just swap args without casting
13041 if matches!(
13042 source,
13043 DialectType::Hive
13044 | DialectType::Spark
13045 | DialectType::Databricks
13046 ) {
13047 let cast_fn = |e: Expression| -> Expression {
13048 let (inner, was_to_date) = unwrap_to_date(e);
13049 if was_to_date {
13050 let first_cast =
13051 Self::double_cast_timestamp_date(inner);
13052 Self::double_cast_timestamp_date(first_cast)
13053 } else {
13054 Self::double_cast_timestamp_date(inner)
13055 }
13056 };
13057 Ok(Expression::Function(Box::new(Function::new(
13058 "DATE_DIFF".to_string(),
13059 vec![
13060 Expression::string("DAY"),
13061 cast_fn(arg1),
13062 cast_fn(arg0),
13063 ],
13064 ))))
13065 } else {
13066 Ok(Expression::Function(Box::new(Function::new(
13067 "DATE_DIFF".to_string(),
13068 vec![Expression::string("DAY"), arg1, arg0],
13069 ))))
13070 }
13071 }
13072 DialectType::Redshift => {
13073 let unit = Expression::Identifier(Identifier::new("DAY"));
13074 Ok(Expression::Function(Box::new(Function::new(
13075 "DATEDIFF".to_string(),
13076 vec![unit, arg1, arg0],
13077 ))))
13078 }
13079 _ => Ok(Expression::Function(Box::new(Function::new(
13080 "DATEDIFF".to_string(),
13081 vec![arg0, arg1],
13082 )))),
13083 }
13084 }
13085 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13086 "DATE_DIFF" if f.args.len() == 3 => {
13087 let mut args = f.args;
13088 let arg0 = args.remove(0);
13089 let arg1 = args.remove(0);
13090 let arg2 = args.remove(0);
13091 let unit_str = Self::get_unit_str_static(&arg0);
13092
13093 match target {
13094 DialectType::DuckDB => {
13095 // DuckDB: DATE_DIFF('UNIT', start, end)
13096 Ok(Expression::Function(Box::new(Function::new(
13097 "DATE_DIFF".to_string(),
13098 vec![Expression::string(&unit_str), arg1, arg2],
13099 ))))
13100 }
13101 DialectType::Presto
13102 | DialectType::Trino
13103 | DialectType::Athena => {
13104 Ok(Expression::Function(Box::new(Function::new(
13105 "DATE_DIFF".to_string(),
13106 vec![Expression::string(&unit_str), arg1, arg2],
13107 ))))
13108 }
13109 DialectType::ClickHouse => {
13110 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13111 let unit =
13112 Expression::Identifier(Identifier::new(&unit_str));
13113 Ok(Expression::Function(Box::new(Function::new(
13114 "DATE_DIFF".to_string(),
13115 vec![unit, arg1, arg2],
13116 ))))
13117 }
13118 DialectType::Snowflake | DialectType::Redshift => {
13119 let unit =
13120 Expression::Identifier(Identifier::new(&unit_str));
13121 Ok(Expression::Function(Box::new(Function::new(
13122 "DATEDIFF".to_string(),
13123 vec![unit, arg1, arg2],
13124 ))))
13125 }
13126 _ => {
13127 let unit =
13128 Expression::Identifier(Identifier::new(&unit_str));
13129 Ok(Expression::Function(Box::new(Function::new(
13130 "DATEDIFF".to_string(),
13131 vec![unit, arg1, arg2],
13132 ))))
13133 }
13134 }
13135 }
13136 // DATEADD(unit, val, date) - 3-arg form
13137 "DATEADD" if f.args.len() == 3 => {
13138 let mut args = f.args;
13139 let arg0 = args.remove(0);
13140 let arg1 = args.remove(0);
13141 let arg2 = args.remove(0);
13142 let unit_str = Self::get_unit_str_static(&arg0);
13143
13144 // Normalize TSQL unit abbreviations to standard names
13145 let unit_str = match unit_str.as_str() {
13146 "YY" | "YYYY" => "YEAR".to_string(),
13147 "QQ" | "Q" => "QUARTER".to_string(),
13148 "MM" | "M" => "MONTH".to_string(),
13149 "WK" | "WW" => "WEEK".to_string(),
13150 "DD" | "D" | "DY" => "DAY".to_string(),
13151 "HH" => "HOUR".to_string(),
13152 "MI" | "N" => "MINUTE".to_string(),
13153 "SS" | "S" => "SECOND".to_string(),
13154 "MS" => "MILLISECOND".to_string(),
13155 "MCS" | "US" => "MICROSECOND".to_string(),
13156 _ => unit_str,
13157 };
13158 match target {
13159 DialectType::Snowflake => {
13160 let unit =
13161 Expression::Identifier(Identifier::new(&unit_str));
13162 // Cast string literal to TIMESTAMP, but not for Snowflake source
13163 // (Snowflake natively accepts string literals in DATEADD)
13164 let arg2 = if matches!(
13165 &arg2,
13166 Expression::Literal(Literal::String(_))
13167 ) && !matches!(source, DialectType::Snowflake)
13168 {
13169 Expression::Cast(Box::new(Cast {
13170 this: arg2,
13171 to: DataType::Timestamp {
13172 precision: None,
13173 timezone: false,
13174 },
13175 trailing_comments: Vec::new(),
13176 double_colon_syntax: false,
13177 format: None,
13178 default: None,
13179 }))
13180 } else {
13181 arg2
13182 };
13183 Ok(Expression::Function(Box::new(Function::new(
13184 "DATEADD".to_string(),
13185 vec![unit, arg1, arg2],
13186 ))))
13187 }
13188 DialectType::TSQL => {
13189 let unit =
13190 Expression::Identifier(Identifier::new(&unit_str));
13191 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13192 let arg2 = if matches!(
13193 &arg2,
13194 Expression::Literal(Literal::String(_))
13195 ) && !matches!(
13196 source,
13197 DialectType::Spark
13198 | DialectType::Databricks
13199 | DialectType::Hive
13200 ) {
13201 Expression::Cast(Box::new(Cast {
13202 this: arg2,
13203 to: DataType::Custom {
13204 name: "DATETIME2".to_string(),
13205 },
13206 trailing_comments: Vec::new(),
13207 double_colon_syntax: false,
13208 format: None,
13209 default: None,
13210 }))
13211 } else {
13212 arg2
13213 };
13214 Ok(Expression::Function(Box::new(Function::new(
13215 "DATEADD".to_string(),
13216 vec![unit, arg1, arg2],
13217 ))))
13218 }
13219 DialectType::Redshift => {
13220 let unit =
13221 Expression::Identifier(Identifier::new(&unit_str));
13222 Ok(Expression::Function(Box::new(Function::new(
13223 "DATEADD".to_string(),
13224 vec![unit, arg1, arg2],
13225 ))))
13226 }
13227 DialectType::Databricks => {
13228 let unit =
13229 Expression::Identifier(Identifier::new(&unit_str));
13230 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13231 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13232 let func_name = if matches!(
13233 source,
13234 DialectType::TSQL
13235 | DialectType::Fabric
13236 | DialectType::Databricks
13237 | DialectType::Snowflake
13238 ) {
13239 "DATEADD"
13240 } else {
13241 "DATE_ADD"
13242 };
13243 Ok(Expression::Function(Box::new(Function::new(
13244 func_name.to_string(),
13245 vec![unit, arg1, arg2],
13246 ))))
13247 }
13248 DialectType::DuckDB => {
13249 // Special handling for NANOSECOND from Snowflake
13250 if unit_str == "NANOSECOND"
13251 && matches!(source, DialectType::Snowflake)
13252 {
13253 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13254 let cast_ts = Expression::Cast(Box::new(Cast {
13255 this: arg2,
13256 to: DataType::Custom {
13257 name: "TIMESTAMP_NS".to_string(),
13258 },
13259 trailing_comments: vec![],
13260 double_colon_syntax: false,
13261 format: None,
13262 default: None,
13263 }));
13264 let epoch_ns =
13265 Expression::Function(Box::new(Function::new(
13266 "EPOCH_NS".to_string(),
13267 vec![cast_ts],
13268 )));
13269 let sum = Expression::Add(Box::new(BinaryOp::new(
13270 epoch_ns, arg1,
13271 )));
13272 Ok(Expression::Function(Box::new(Function::new(
13273 "MAKE_TIMESTAMP_NS".to_string(),
13274 vec![sum],
13275 ))))
13276 } else {
13277 // DuckDB: convert to date + INTERVAL syntax with CAST
13278 let iu = Self::parse_interval_unit_static(&unit_str);
13279 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13280 this: Some(arg1),
13281 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13282 }));
13283 // Cast string literal to TIMESTAMP
13284 let arg2 = if matches!(
13285 &arg2,
13286 Expression::Literal(Literal::String(_))
13287 ) {
13288 Expression::Cast(Box::new(Cast {
13289 this: arg2,
13290 to: DataType::Timestamp {
13291 precision: None,
13292 timezone: false,
13293 },
13294 trailing_comments: Vec::new(),
13295 double_colon_syntax: false,
13296 format: None,
13297 default: None,
13298 }))
13299 } else {
13300 arg2
13301 };
13302 Ok(Expression::Add(Box::new(
13303 crate::expressions::BinaryOp::new(arg2, interval),
13304 )))
13305 }
13306 }
13307 DialectType::Spark => {
13308 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13309 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13310 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13311 {
13312 fn multiply_expr_spark(
13313 expr: Expression,
13314 factor: i64,
13315 ) -> Expression
13316 {
13317 if let Expression::Literal(
13318 crate::expressions::Literal::Number(n),
13319 ) = &expr
13320 {
13321 if let Ok(val) = n.parse::<i64>() {
13322 return Expression::Literal(
13323 crate::expressions::Literal::Number(
13324 (val * factor).to_string(),
13325 ),
13326 );
13327 }
13328 }
13329 Expression::Mul(Box::new(
13330 crate::expressions::BinaryOp::new(
13331 expr,
13332 Expression::Literal(
13333 crate::expressions::Literal::Number(
13334 factor.to_string(),
13335 ),
13336 ),
13337 ),
13338 ))
13339 }
13340 let normalized_unit = match unit_str.as_str() {
13341 "YEAR" | "YY" | "YYYY" => "YEAR",
13342 "QUARTER" | "QQ" | "Q" => "QUARTER",
13343 "MONTH" | "MM" | "M" => "MONTH",
13344 "WEEK" | "WK" | "WW" => "WEEK",
13345 "DAY" | "DD" | "D" | "DY" => "DAY",
13346 _ => &unit_str,
13347 };
13348 match normalized_unit {
13349 "YEAR" => {
13350 let months = multiply_expr_spark(arg1, 12);
13351 Ok(Expression::Function(Box::new(
13352 Function::new(
13353 "ADD_MONTHS".to_string(),
13354 vec![arg2, months],
13355 ),
13356 )))
13357 }
13358 "QUARTER" => {
13359 let months = multiply_expr_spark(arg1, 3);
13360 Ok(Expression::Function(Box::new(
13361 Function::new(
13362 "ADD_MONTHS".to_string(),
13363 vec![arg2, months],
13364 ),
13365 )))
13366 }
13367 "MONTH" => Ok(Expression::Function(Box::new(
13368 Function::new(
13369 "ADD_MONTHS".to_string(),
13370 vec![arg2, arg1],
13371 ),
13372 ))),
13373 "WEEK" => {
13374 let days = multiply_expr_spark(arg1, 7);
13375 Ok(Expression::Function(Box::new(
13376 Function::new(
13377 "DATE_ADD".to_string(),
13378 vec![arg2, days],
13379 ),
13380 )))
13381 }
13382 "DAY" => Ok(Expression::Function(Box::new(
13383 Function::new(
13384 "DATE_ADD".to_string(),
13385 vec![arg2, arg1],
13386 ),
13387 ))),
13388 _ => {
13389 let unit = Expression::Identifier(
13390 Identifier::new(&unit_str),
13391 );
13392 Ok(Expression::Function(Box::new(
13393 Function::new(
13394 "DATE_ADD".to_string(),
13395 vec![unit, arg1, arg2],
13396 ),
13397 )))
13398 }
13399 }
13400 } else {
13401 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13402 let unit =
13403 Expression::Identifier(Identifier::new(&unit_str));
13404 Ok(Expression::Function(Box::new(Function::new(
13405 "DATE_ADD".to_string(),
13406 vec![unit, arg1, arg2],
13407 ))))
13408 }
13409 }
13410 DialectType::Hive => match unit_str.as_str() {
13411 "MONTH" => {
13412 Ok(Expression::Function(Box::new(Function::new(
13413 "ADD_MONTHS".to_string(),
13414 vec![arg2, arg1],
13415 ))))
13416 }
13417 _ => Ok(Expression::Function(Box::new(Function::new(
13418 "DATE_ADD".to_string(),
13419 vec![arg2, arg1],
13420 )))),
13421 },
13422 DialectType::Presto
13423 | DialectType::Trino
13424 | DialectType::Athena => {
13425 // Cast string literal date to TIMESTAMP
13426 let arg2 = if matches!(
13427 &arg2,
13428 Expression::Literal(Literal::String(_))
13429 ) {
13430 Expression::Cast(Box::new(Cast {
13431 this: arg2,
13432 to: DataType::Timestamp {
13433 precision: None,
13434 timezone: false,
13435 },
13436 trailing_comments: Vec::new(),
13437 double_colon_syntax: false,
13438 format: None,
13439 default: None,
13440 }))
13441 } else {
13442 arg2
13443 };
13444 Ok(Expression::Function(Box::new(Function::new(
13445 "DATE_ADD".to_string(),
13446 vec![Expression::string(&unit_str), arg1, arg2],
13447 ))))
13448 }
13449 DialectType::MySQL => {
13450 let iu = Self::parse_interval_unit_static(&unit_str);
13451 Ok(Expression::DateAdd(Box::new(
13452 crate::expressions::DateAddFunc {
13453 this: arg2,
13454 interval: arg1,
13455 unit: iu,
13456 },
13457 )))
13458 }
13459 DialectType::PostgreSQL => {
13460 // Cast string literal date to TIMESTAMP
13461 let arg2 = if matches!(
13462 &arg2,
13463 Expression::Literal(Literal::String(_))
13464 ) {
13465 Expression::Cast(Box::new(Cast {
13466 this: arg2,
13467 to: DataType::Timestamp {
13468 precision: None,
13469 timezone: false,
13470 },
13471 trailing_comments: Vec::new(),
13472 double_colon_syntax: false,
13473 format: None,
13474 default: None,
13475 }))
13476 } else {
13477 arg2
13478 };
13479 let interval = Expression::Interval(Box::new(
13480 crate::expressions::Interval {
13481 this: Some(Expression::string(&format!(
13482 "{} {}",
13483 Self::expr_to_string_static(&arg1),
13484 unit_str
13485 ))),
13486 unit: None,
13487 },
13488 ));
13489 Ok(Expression::Add(Box::new(
13490 crate::expressions::BinaryOp::new(arg2, interval),
13491 )))
13492 }
13493 DialectType::BigQuery => {
13494 let iu = Self::parse_interval_unit_static(&unit_str);
13495 let interval = Expression::Interval(Box::new(
13496 crate::expressions::Interval {
13497 this: Some(arg1),
13498 unit: Some(
13499 crate::expressions::IntervalUnitSpec::Simple {
13500 unit: iu,
13501 use_plural: false,
13502 },
13503 ),
13504 },
13505 ));
13506 // Non-TSQL sources: CAST string literal to DATETIME
13507 let arg2 = if !matches!(
13508 source,
13509 DialectType::TSQL | DialectType::Fabric
13510 ) && matches!(
13511 &arg2,
13512 Expression::Literal(Literal::String(_))
13513 ) {
13514 Expression::Cast(Box::new(Cast {
13515 this: arg2,
13516 to: DataType::Custom {
13517 name: "DATETIME".to_string(),
13518 },
13519 trailing_comments: Vec::new(),
13520 double_colon_syntax: false,
13521 format: None,
13522 default: None,
13523 }))
13524 } else {
13525 arg2
13526 };
13527 Ok(Expression::Function(Box::new(Function::new(
13528 "DATE_ADD".to_string(),
13529 vec![arg2, interval],
13530 ))))
13531 }
13532 _ => {
13533 let unit =
13534 Expression::Identifier(Identifier::new(&unit_str));
13535 Ok(Expression::Function(Box::new(Function::new(
13536 "DATEADD".to_string(),
13537 vec![unit, arg1, arg2],
13538 ))))
13539 }
13540 }
13541 }
13542 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13543 // or (date, val, 'UNIT') from Generic canonical form
13544 "DATE_ADD" if f.args.len() == 3 => {
13545 let mut args = f.args;
13546 let arg0 = args.remove(0);
13547 let arg1 = args.remove(0);
13548 let arg2 = args.remove(0);
13549 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13550 // where arg2 is a string literal matching a unit name
13551 let arg2_unit = match &arg2 {
13552 Expression::Literal(Literal::String(s)) => {
13553 let u = s.to_uppercase();
13554 if matches!(
13555 u.as_str(),
13556 "DAY"
13557 | "MONTH"
13558 | "YEAR"
13559 | "HOUR"
13560 | "MINUTE"
13561 | "SECOND"
13562 | "WEEK"
13563 | "QUARTER"
13564 | "MILLISECOND"
13565 | "MICROSECOND"
13566 ) {
13567 Some(u)
13568 } else {
13569 None
13570 }
13571 }
13572 _ => None,
13573 };
13574 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13575 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13576 (u, arg1, arg0)
13577 } else {
13578 (Self::get_unit_str_static(&arg0), arg1, arg2)
13579 };
13580 // Alias for backward compat with the rest of the match
13581 let arg1 = val;
13582 let arg2 = date;
13583
13584 match target {
13585 DialectType::Presto
13586 | DialectType::Trino
13587 | DialectType::Athena => {
13588 Ok(Expression::Function(Box::new(Function::new(
13589 "DATE_ADD".to_string(),
13590 vec![Expression::string(&unit_str), arg1, arg2],
13591 ))))
13592 }
13593 DialectType::DuckDB => {
13594 let iu = Self::parse_interval_unit_static(&unit_str);
13595 let interval = Expression::Interval(Box::new(
13596 crate::expressions::Interval {
13597 this: Some(arg1),
13598 unit: Some(
13599 crate::expressions::IntervalUnitSpec::Simple {
13600 unit: iu,
13601 use_plural: false,
13602 },
13603 ),
13604 },
13605 ));
13606 Ok(Expression::Add(Box::new(
13607 crate::expressions::BinaryOp::new(arg2, interval),
13608 )))
13609 }
13610 DialectType::PostgreSQL
13611 | DialectType::Materialize
13612 | DialectType::RisingWave => {
13613 // PostgreSQL: x + INTERVAL '1 DAY'
13614 let amount_str = Self::expr_to_string_static(&arg1);
13615 let interval = Expression::Interval(Box::new(
13616 crate::expressions::Interval {
13617 this: Some(Expression::string(&format!(
13618 "{} {}",
13619 amount_str, unit_str
13620 ))),
13621 unit: None,
13622 },
13623 ));
13624 Ok(Expression::Add(Box::new(
13625 crate::expressions::BinaryOp::new(arg2, interval),
13626 )))
13627 }
13628 DialectType::Snowflake
13629 | DialectType::TSQL
13630 | DialectType::Redshift => {
13631 let unit =
13632 Expression::Identifier(Identifier::new(&unit_str));
13633 Ok(Expression::Function(Box::new(Function::new(
13634 "DATEADD".to_string(),
13635 vec![unit, arg1, arg2],
13636 ))))
13637 }
13638 DialectType::BigQuery
13639 | DialectType::MySQL
13640 | DialectType::Doris
13641 | DialectType::StarRocks
13642 | DialectType::Drill => {
13643 // DATE_ADD(date, INTERVAL amount UNIT)
13644 let iu = Self::parse_interval_unit_static(&unit_str);
13645 let interval = Expression::Interval(Box::new(
13646 crate::expressions::Interval {
13647 this: Some(arg1),
13648 unit: Some(
13649 crate::expressions::IntervalUnitSpec::Simple {
13650 unit: iu,
13651 use_plural: false,
13652 },
13653 ),
13654 },
13655 ));
13656 Ok(Expression::Function(Box::new(Function::new(
13657 "DATE_ADD".to_string(),
13658 vec![arg2, interval],
13659 ))))
13660 }
13661 DialectType::SQLite => {
13662 // SQLite: DATE(x, '1 DAY')
13663 // Build the string '1 DAY' from amount and unit
13664 let amount_str = match &arg1 {
13665 Expression::Literal(Literal::Number(n)) => n.clone(),
13666 _ => "1".to_string(),
13667 };
13668 Ok(Expression::Function(Box::new(Function::new(
13669 "DATE".to_string(),
13670 vec![
13671 arg2,
13672 Expression::string(format!(
13673 "{} {}",
13674 amount_str, unit_str
13675 )),
13676 ],
13677 ))))
13678 }
13679 DialectType::Dremio => {
13680 // Dremio: DATE_ADD(date, amount) - drops unit
13681 Ok(Expression::Function(Box::new(Function::new(
13682 "DATE_ADD".to_string(),
13683 vec![arg2, arg1],
13684 ))))
13685 }
13686 DialectType::Spark => {
13687 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13688 if unit_str == "DAY" {
13689 Ok(Expression::Function(Box::new(Function::new(
13690 "DATE_ADD".to_string(),
13691 vec![arg2, arg1],
13692 ))))
13693 } else {
13694 let unit =
13695 Expression::Identifier(Identifier::new(&unit_str));
13696 Ok(Expression::Function(Box::new(Function::new(
13697 "DATE_ADD".to_string(),
13698 vec![unit, arg1, arg2],
13699 ))))
13700 }
13701 }
13702 DialectType::Databricks => {
13703 let unit =
13704 Expression::Identifier(Identifier::new(&unit_str));
13705 Ok(Expression::Function(Box::new(Function::new(
13706 "DATE_ADD".to_string(),
13707 vec![unit, arg1, arg2],
13708 ))))
13709 }
13710 DialectType::Hive => {
13711 // Hive: DATE_ADD(date, val) for DAY
13712 Ok(Expression::Function(Box::new(Function::new(
13713 "DATE_ADD".to_string(),
13714 vec![arg2, arg1],
13715 ))))
13716 }
13717 _ => {
13718 let unit =
13719 Expression::Identifier(Identifier::new(&unit_str));
13720 Ok(Expression::Function(Box::new(Function::new(
13721 "DATE_ADD".to_string(),
13722 vec![unit, arg1, arg2],
13723 ))))
13724 }
13725 }
13726 }
13727 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13728 "DATE_ADD"
13729 if f.args.len() == 2
13730 && matches!(
13731 source,
13732 DialectType::Hive
13733 | DialectType::Spark
13734 | DialectType::Databricks
13735 | DialectType::Generic
13736 ) =>
13737 {
13738 let mut args = f.args;
13739 let date = args.remove(0);
13740 let days = args.remove(0);
13741 match target {
13742 DialectType::Hive | DialectType::Spark => {
13743 // Keep as DATE_ADD(date, days) for Hive/Spark
13744 Ok(Expression::Function(Box::new(Function::new(
13745 "DATE_ADD".to_string(),
13746 vec![date, days],
13747 ))))
13748 }
13749 DialectType::Databricks => {
13750 // Databricks: DATEADD(DAY, days, date)
13751 Ok(Expression::Function(Box::new(Function::new(
13752 "DATEADD".to_string(),
13753 vec![
13754 Expression::Identifier(Identifier::new("DAY")),
13755 days,
13756 date,
13757 ],
13758 ))))
13759 }
13760 DialectType::DuckDB => {
13761 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13762 let cast_date = Self::ensure_cast_date(date);
13763 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13764 let interval_val = if matches!(
13765 days,
13766 Expression::Mul(_)
13767 | Expression::Sub(_)
13768 | Expression::Add(_)
13769 ) {
13770 Expression::Paren(Box::new(crate::expressions::Paren {
13771 this: days,
13772 trailing_comments: vec![],
13773 }))
13774 } else {
13775 days
13776 };
13777 let interval = Expression::Interval(Box::new(
13778 crate::expressions::Interval {
13779 this: Some(interval_val),
13780 unit: Some(
13781 crate::expressions::IntervalUnitSpec::Simple {
13782 unit: crate::expressions::IntervalUnit::Day,
13783 use_plural: false,
13784 },
13785 ),
13786 },
13787 ));
13788 Ok(Expression::Add(Box::new(
13789 crate::expressions::BinaryOp::new(cast_date, interval),
13790 )))
13791 }
13792 DialectType::Snowflake => {
13793 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13794 let cast_date = if matches!(
13795 source,
13796 DialectType::Hive
13797 | DialectType::Spark
13798 | DialectType::Databricks
13799 ) {
13800 if matches!(
13801 date,
13802 Expression::Literal(Literal::String(_))
13803 ) {
13804 Self::double_cast_timestamp_date(date)
13805 } else {
13806 date
13807 }
13808 } else {
13809 date
13810 };
13811 Ok(Expression::Function(Box::new(Function::new(
13812 "DATEADD".to_string(),
13813 vec![
13814 Expression::Identifier(Identifier::new("DAY")),
13815 days,
13816 cast_date,
13817 ],
13818 ))))
13819 }
13820 DialectType::Redshift => {
13821 Ok(Expression::Function(Box::new(Function::new(
13822 "DATEADD".to_string(),
13823 vec![
13824 Expression::Identifier(Identifier::new("DAY")),
13825 days,
13826 date,
13827 ],
13828 ))))
13829 }
13830 DialectType::TSQL | DialectType::Fabric => {
13831 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
13832 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
13833 let cast_date = if matches!(
13834 source,
13835 DialectType::Hive | DialectType::Spark
13836 ) {
13837 if matches!(
13838 date,
13839 Expression::Literal(Literal::String(_))
13840 ) {
13841 Self::double_cast_datetime2_date(date)
13842 } else {
13843 date
13844 }
13845 } else {
13846 date
13847 };
13848 Ok(Expression::Function(Box::new(Function::new(
13849 "DATEADD".to_string(),
13850 vec![
13851 Expression::Identifier(Identifier::new("DAY")),
13852 days,
13853 cast_date,
13854 ],
13855 ))))
13856 }
13857 DialectType::Presto
13858 | DialectType::Trino
13859 | DialectType::Athena => {
13860 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13861 let cast_date = if matches!(
13862 source,
13863 DialectType::Hive
13864 | DialectType::Spark
13865 | DialectType::Databricks
13866 ) {
13867 if matches!(
13868 date,
13869 Expression::Literal(Literal::String(_))
13870 ) {
13871 Self::double_cast_timestamp_date(date)
13872 } else {
13873 date
13874 }
13875 } else {
13876 date
13877 };
13878 Ok(Expression::Function(Box::new(Function::new(
13879 "DATE_ADD".to_string(),
13880 vec![Expression::string("DAY"), days, cast_date],
13881 ))))
13882 }
13883 DialectType::BigQuery => {
13884 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
13885 let cast_date = if matches!(
13886 source,
13887 DialectType::Hive
13888 | DialectType::Spark
13889 | DialectType::Databricks
13890 ) {
13891 Self::double_cast_datetime_date(date)
13892 } else {
13893 date
13894 };
13895 // Wrap complex expressions in Paren for interval
13896 let interval_val = if matches!(
13897 days,
13898 Expression::Mul(_)
13899 | Expression::Sub(_)
13900 | Expression::Add(_)
13901 ) {
13902 Expression::Paren(Box::new(crate::expressions::Paren {
13903 this: days,
13904 trailing_comments: vec![],
13905 }))
13906 } else {
13907 days
13908 };
13909 let interval = Expression::Interval(Box::new(
13910 crate::expressions::Interval {
13911 this: Some(interval_val),
13912 unit: Some(
13913 crate::expressions::IntervalUnitSpec::Simple {
13914 unit: crate::expressions::IntervalUnit::Day,
13915 use_plural: false,
13916 },
13917 ),
13918 },
13919 ));
13920 Ok(Expression::Function(Box::new(Function::new(
13921 "DATE_ADD".to_string(),
13922 vec![cast_date, interval],
13923 ))))
13924 }
13925 DialectType::MySQL => {
13926 let iu = crate::expressions::IntervalUnit::Day;
13927 Ok(Expression::DateAdd(Box::new(
13928 crate::expressions::DateAddFunc {
13929 this: date,
13930 interval: days,
13931 unit: iu,
13932 },
13933 )))
13934 }
13935 DialectType::PostgreSQL => {
13936 let interval = Expression::Interval(Box::new(
13937 crate::expressions::Interval {
13938 this: Some(Expression::string(&format!(
13939 "{} DAY",
13940 Self::expr_to_string_static(&days)
13941 ))),
13942 unit: None,
13943 },
13944 ));
13945 Ok(Expression::Add(Box::new(
13946 crate::expressions::BinaryOp::new(date, interval),
13947 )))
13948 }
13949 DialectType::Doris
13950 | DialectType::StarRocks
13951 | DialectType::Drill => {
13952 // DATE_ADD(date, INTERVAL days DAY)
13953 let interval = Expression::Interval(Box::new(
13954 crate::expressions::Interval {
13955 this: Some(days),
13956 unit: Some(
13957 crate::expressions::IntervalUnitSpec::Simple {
13958 unit: crate::expressions::IntervalUnit::Day,
13959 use_plural: false,
13960 },
13961 ),
13962 },
13963 ));
13964 Ok(Expression::Function(Box::new(Function::new(
13965 "DATE_ADD".to_string(),
13966 vec![date, interval],
13967 ))))
13968 }
13969 _ => Ok(Expression::Function(Box::new(Function::new(
13970 "DATE_ADD".to_string(),
13971 vec![date, days],
13972 )))),
13973 }
13974 }
13975 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
13976 "DATE_SUB"
13977 if f.args.len() == 2
13978 && matches!(
13979 source,
13980 DialectType::Hive
13981 | DialectType::Spark
13982 | DialectType::Databricks
13983 ) =>
13984 {
13985 let mut args = f.args;
13986 let date = args.remove(0);
13987 let days = args.remove(0);
13988 // Helper to create days * -1
13989 let make_neg_days = |d: Expression| -> Expression {
13990 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
13991 d,
13992 Expression::Literal(Literal::Number("-1".to_string())),
13993 )))
13994 };
13995 let is_string_literal =
13996 matches!(date, Expression::Literal(Literal::String(_)));
13997 match target {
13998 DialectType::Hive
13999 | DialectType::Spark
14000 | DialectType::Databricks => {
14001 // Keep as DATE_SUB(date, days) for Hive/Spark
14002 Ok(Expression::Function(Box::new(Function::new(
14003 "DATE_SUB".to_string(),
14004 vec![date, days],
14005 ))))
14006 }
14007 DialectType::DuckDB => {
14008 let cast_date = Self::ensure_cast_date(date);
14009 let neg = make_neg_days(days);
14010 let interval = Expression::Interval(Box::new(
14011 crate::expressions::Interval {
14012 this: Some(Expression::Paren(Box::new(
14013 crate::expressions::Paren {
14014 this: neg,
14015 trailing_comments: vec![],
14016 },
14017 ))),
14018 unit: Some(
14019 crate::expressions::IntervalUnitSpec::Simple {
14020 unit: crate::expressions::IntervalUnit::Day,
14021 use_plural: false,
14022 },
14023 ),
14024 },
14025 ));
14026 Ok(Expression::Add(Box::new(
14027 crate::expressions::BinaryOp::new(cast_date, interval),
14028 )))
14029 }
14030 DialectType::Snowflake => {
14031 let cast_date = if is_string_literal {
14032 Self::double_cast_timestamp_date(date)
14033 } else {
14034 date
14035 };
14036 let neg = make_neg_days(days);
14037 Ok(Expression::Function(Box::new(Function::new(
14038 "DATEADD".to_string(),
14039 vec![
14040 Expression::Identifier(Identifier::new("DAY")),
14041 neg,
14042 cast_date,
14043 ],
14044 ))))
14045 }
14046 DialectType::Redshift => {
14047 let neg = make_neg_days(days);
14048 Ok(Expression::Function(Box::new(Function::new(
14049 "DATEADD".to_string(),
14050 vec![
14051 Expression::Identifier(Identifier::new("DAY")),
14052 neg,
14053 date,
14054 ],
14055 ))))
14056 }
14057 DialectType::TSQL | DialectType::Fabric => {
14058 let cast_date = if is_string_literal {
14059 Self::double_cast_datetime2_date(date)
14060 } else {
14061 date
14062 };
14063 let neg = make_neg_days(days);
14064 Ok(Expression::Function(Box::new(Function::new(
14065 "DATEADD".to_string(),
14066 vec![
14067 Expression::Identifier(Identifier::new("DAY")),
14068 neg,
14069 cast_date,
14070 ],
14071 ))))
14072 }
14073 DialectType::Presto
14074 | DialectType::Trino
14075 | DialectType::Athena => {
14076 let cast_date = if is_string_literal {
14077 Self::double_cast_timestamp_date(date)
14078 } else {
14079 date
14080 };
14081 let neg = make_neg_days(days);
14082 Ok(Expression::Function(Box::new(Function::new(
14083 "DATE_ADD".to_string(),
14084 vec![Expression::string("DAY"), neg, cast_date],
14085 ))))
14086 }
14087 DialectType::BigQuery => {
14088 let cast_date = if is_string_literal {
14089 Self::double_cast_datetime_date(date)
14090 } else {
14091 date
14092 };
14093 let neg = make_neg_days(days);
14094 let interval = Expression::Interval(Box::new(
14095 crate::expressions::Interval {
14096 this: Some(Expression::Paren(Box::new(
14097 crate::expressions::Paren {
14098 this: neg,
14099 trailing_comments: vec![],
14100 },
14101 ))),
14102 unit: Some(
14103 crate::expressions::IntervalUnitSpec::Simple {
14104 unit: crate::expressions::IntervalUnit::Day,
14105 use_plural: false,
14106 },
14107 ),
14108 },
14109 ));
14110 Ok(Expression::Function(Box::new(Function::new(
14111 "DATE_ADD".to_string(),
14112 vec![cast_date, interval],
14113 ))))
14114 }
14115 _ => Ok(Expression::Function(Box::new(Function::new(
14116 "DATE_SUB".to_string(),
14117 vec![date, days],
14118 )))),
14119 }
14120 }
14121 // ADD_MONTHS(date, val) -> target-specific
14122 "ADD_MONTHS" if f.args.len() == 2 => {
14123 let mut args = f.args;
14124 let date = args.remove(0);
14125 let val = args.remove(0);
14126 match target {
14127 DialectType::TSQL => {
14128 let cast_date = Self::ensure_cast_datetime2(date);
14129 Ok(Expression::Function(Box::new(Function::new(
14130 "DATEADD".to_string(),
14131 vec![
14132 Expression::Identifier(Identifier::new("MONTH")),
14133 val,
14134 cast_date,
14135 ],
14136 ))))
14137 }
14138 DialectType::DuckDB => {
14139 let interval = Expression::Interval(Box::new(
14140 crate::expressions::Interval {
14141 this: Some(val),
14142 unit: Some(
14143 crate::expressions::IntervalUnitSpec::Simple {
14144 unit:
14145 crate::expressions::IntervalUnit::Month,
14146 use_plural: false,
14147 },
14148 ),
14149 },
14150 ));
14151 Ok(Expression::Add(Box::new(
14152 crate::expressions::BinaryOp::new(date, interval),
14153 )))
14154 }
14155 DialectType::Snowflake => {
14156 // Keep ADD_MONTHS when source is Snowflake
14157 if matches!(source, DialectType::Snowflake) {
14158 Ok(Expression::Function(Box::new(Function::new(
14159 "ADD_MONTHS".to_string(),
14160 vec![date, val],
14161 ))))
14162 } else {
14163 Ok(Expression::Function(Box::new(Function::new(
14164 "DATEADD".to_string(),
14165 vec![
14166 Expression::Identifier(Identifier::new(
14167 "MONTH",
14168 )),
14169 val,
14170 date,
14171 ],
14172 ))))
14173 }
14174 }
14175 DialectType::Redshift => {
14176 Ok(Expression::Function(Box::new(Function::new(
14177 "DATEADD".to_string(),
14178 vec![
14179 Expression::Identifier(Identifier::new("MONTH")),
14180 val,
14181 date,
14182 ],
14183 ))))
14184 }
14185 DialectType::Presto
14186 | DialectType::Trino
14187 | DialectType::Athena => {
14188 Ok(Expression::Function(Box::new(Function::new(
14189 "DATE_ADD".to_string(),
14190 vec![Expression::string("MONTH"), val, date],
14191 ))))
14192 }
14193 DialectType::BigQuery => {
14194 let interval = Expression::Interval(Box::new(
14195 crate::expressions::Interval {
14196 this: Some(val),
14197 unit: Some(
14198 crate::expressions::IntervalUnitSpec::Simple {
14199 unit:
14200 crate::expressions::IntervalUnit::Month,
14201 use_plural: false,
14202 },
14203 ),
14204 },
14205 ));
14206 Ok(Expression::Function(Box::new(Function::new(
14207 "DATE_ADD".to_string(),
14208 vec![date, interval],
14209 ))))
14210 }
14211 _ => Ok(Expression::Function(Box::new(Function::new(
14212 "ADD_MONTHS".to_string(),
14213 vec![date, val],
14214 )))),
14215 }
14216 }
14217 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14218 "DATETRUNC" if f.args.len() == 2 => {
14219 let mut args = f.args;
14220 let arg0 = args.remove(0);
14221 let arg1 = args.remove(0);
14222 let unit_str = Self::get_unit_str_static(&arg0);
14223 match target {
14224 DialectType::TSQL | DialectType::Fabric => {
14225 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14226 Ok(Expression::Function(Box::new(Function::new(
14227 "DATETRUNC".to_string(),
14228 vec![
14229 Expression::Identifier(Identifier::new(&unit_str)),
14230 arg1,
14231 ],
14232 ))))
14233 }
14234 DialectType::DuckDB => {
14235 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14236 let date = Self::ensure_cast_timestamp(arg1);
14237 Ok(Expression::Function(Box::new(Function::new(
14238 "DATE_TRUNC".to_string(),
14239 vec![Expression::string(&unit_str), date],
14240 ))))
14241 }
14242 DialectType::ClickHouse => {
14243 // ClickHouse: dateTrunc('UNIT', expr)
14244 Ok(Expression::Function(Box::new(Function::new(
14245 "dateTrunc".to_string(),
14246 vec![Expression::string(&unit_str), arg1],
14247 ))))
14248 }
14249 _ => {
14250 // Standard: DATE_TRUNC('UNIT', expr)
14251 let unit = Expression::string(&unit_str);
14252 Ok(Expression::Function(Box::new(Function::new(
14253 "DATE_TRUNC".to_string(),
14254 vec![unit, arg1],
14255 ))))
14256 }
14257 }
14258 }
14259 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14260 "GETDATE" if f.args.is_empty() => match target {
14261 DialectType::TSQL => Ok(Expression::Function(f)),
14262 DialectType::Redshift => Ok(Expression::Function(Box::new(
14263 Function::new("GETDATE".to_string(), vec![]),
14264 ))),
14265 _ => Ok(Expression::CurrentTimestamp(
14266 crate::expressions::CurrentTimestamp {
14267 precision: None,
14268 sysdate: false,
14269 },
14270 )),
14271 },
14272 // TO_HEX(x) / HEX(x) -> target-specific hex function
14273 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14274 let name = match target {
14275 DialectType::Presto | DialectType::Trino => "TO_HEX",
14276 DialectType::Spark
14277 | DialectType::Databricks
14278 | DialectType::Hive => "HEX",
14279 DialectType::DuckDB
14280 | DialectType::PostgreSQL
14281 | DialectType::Redshift => "TO_HEX",
14282 _ => &f.name,
14283 };
14284 Ok(Expression::Function(Box::new(Function::new(
14285 name.to_string(),
14286 f.args,
14287 ))))
14288 }
14289 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14290 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14291 match target {
14292 DialectType::BigQuery => {
14293 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14294 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14295 // because BigQuery MD5 returns BYTES, not hex string
14296 let arg = &f.args[0];
14297 let wrapped_arg = match arg {
14298 Expression::Function(inner_f)
14299 if inner_f.name.to_uppercase() == "MD5"
14300 || inner_f.name.to_uppercase() == "SHA1"
14301 || inner_f.name.to_uppercase() == "SHA256"
14302 || inner_f.name.to_uppercase() == "SHA512" =>
14303 {
14304 // Wrap hash function in TO_HEX for BigQuery
14305 Expression::Function(Box::new(Function::new(
14306 "TO_HEX".to_string(),
14307 vec![arg.clone()],
14308 )))
14309 }
14310 _ => f.args.into_iter().next().unwrap(),
14311 };
14312 Ok(Expression::Function(Box::new(Function::new(
14313 "FROM_HEX".to_string(),
14314 vec![wrapped_arg],
14315 ))))
14316 }
14317 _ => {
14318 let name = match target {
14319 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14320 DialectType::Spark
14321 | DialectType::Databricks
14322 | DialectType::Hive => "UNHEX",
14323 _ => &f.name,
14324 };
14325 Ok(Expression::Function(Box::new(Function::new(
14326 name.to_string(),
14327 f.args,
14328 ))))
14329 }
14330 }
14331 }
14332 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14333 "TO_UTF8" if f.args.len() == 1 => match target {
14334 DialectType::Spark | DialectType::Databricks => {
14335 let mut args = f.args;
14336 args.push(Expression::string("utf-8"));
14337 Ok(Expression::Function(Box::new(Function::new(
14338 "ENCODE".to_string(),
14339 args,
14340 ))))
14341 }
14342 _ => Ok(Expression::Function(f)),
14343 },
14344 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14345 "FROM_UTF8" if f.args.len() == 1 => match target {
14346 DialectType::Spark | DialectType::Databricks => {
14347 let mut args = f.args;
14348 args.push(Expression::string("utf-8"));
14349 Ok(Expression::Function(Box::new(Function::new(
14350 "DECODE".to_string(),
14351 args,
14352 ))))
14353 }
14354 _ => Ok(Expression::Function(f)),
14355 },
14356 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14357 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14358 let name = match target {
14359 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14360 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14361 DialectType::PostgreSQL | DialectType::Redshift => {
14362 "STARTS_WITH"
14363 }
14364 _ => &f.name,
14365 };
14366 Ok(Expression::Function(Box::new(Function::new(
14367 name.to_string(),
14368 f.args,
14369 ))))
14370 }
14371 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14372 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14373 let name = match target {
14374 DialectType::Presto
14375 | DialectType::Trino
14376 | DialectType::Athena => "APPROX_DISTINCT",
14377 _ => "APPROX_COUNT_DISTINCT",
14378 };
14379 Ok(Expression::Function(Box::new(Function::new(
14380 name.to_string(),
14381 f.args,
14382 ))))
14383 }
14384 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14385 "JSON_EXTRACT"
14386 if f.args.len() == 2
14387 && !matches!(source, DialectType::BigQuery)
14388 && matches!(
14389 target,
14390 DialectType::Spark
14391 | DialectType::Databricks
14392 | DialectType::Hive
14393 ) =>
14394 {
14395 Ok(Expression::Function(Box::new(Function::new(
14396 "GET_JSON_OBJECT".to_string(),
14397 f.args,
14398 ))))
14399 }
14400 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14401 "JSON_EXTRACT"
14402 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14403 {
14404 let mut args = f.args;
14405 let path = args.remove(1);
14406 let this = args.remove(0);
14407 Ok(Expression::JsonExtract(Box::new(
14408 crate::expressions::JsonExtractFunc {
14409 this,
14410 path,
14411 returning: None,
14412 arrow_syntax: true,
14413 hash_arrow_syntax: false,
14414 wrapper_option: None,
14415 quotes_option: None,
14416 on_scalar_string: false,
14417 on_error: None,
14418 },
14419 )))
14420 }
14421 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14422 "JSON_FORMAT" if f.args.len() == 1 => {
14423 match target {
14424 DialectType::Spark | DialectType::Databricks => {
14425 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14426 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14427 if matches!(
14428 source,
14429 DialectType::Presto
14430 | DialectType::Trino
14431 | DialectType::Athena
14432 ) {
14433 if let Some(Expression::ParseJson(pj)) = f.args.first()
14434 {
14435 if let Expression::Literal(Literal::String(s)) =
14436 &pj.this
14437 {
14438 let wrapped = Expression::Literal(
14439 Literal::String(format!("[{}]", s)),
14440 );
14441 let schema_of_json = Expression::Function(
14442 Box::new(Function::new(
14443 "SCHEMA_OF_JSON".to_string(),
14444 vec![wrapped.clone()],
14445 )),
14446 );
14447 let from_json = Expression::Function(Box::new(
14448 Function::new(
14449 "FROM_JSON".to_string(),
14450 vec![wrapped, schema_of_json],
14451 ),
14452 ));
14453 let to_json = Expression::Function(Box::new(
14454 Function::new(
14455 "TO_JSON".to_string(),
14456 vec![from_json],
14457 ),
14458 ));
14459 return Ok(Expression::Function(Box::new(
14460 Function::new(
14461 "REGEXP_EXTRACT".to_string(),
14462 vec![
14463 to_json,
14464 Expression::Literal(
14465 Literal::String(
14466 "^.(.*).$".to_string(),
14467 ),
14468 ),
14469 Expression::Literal(
14470 Literal::Number(
14471 "1".to_string(),
14472 ),
14473 ),
14474 ],
14475 ),
14476 )));
14477 }
14478 }
14479 }
14480
14481 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14482 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14483 let mut args = f.args;
14484 if let Some(Expression::Cast(ref c)) = args.first() {
14485 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14486 args = vec![c.this.clone()];
14487 }
14488 } else if let Some(Expression::Function(ref inner_f)) =
14489 args.first()
14490 {
14491 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14492 && inner_f.args.len() == 1
14493 {
14494 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14495 args = inner_f.args.clone();
14496 }
14497 }
14498 Ok(Expression::Function(Box::new(Function::new(
14499 "TO_JSON".to_string(),
14500 args,
14501 ))))
14502 }
14503 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14504 Function::new("TO_JSON_STRING".to_string(), f.args),
14505 ))),
14506 DialectType::DuckDB => {
14507 // CAST(TO_JSON(x) AS TEXT)
14508 let to_json = Expression::Function(Box::new(
14509 Function::new("TO_JSON".to_string(), f.args),
14510 ));
14511 Ok(Expression::Cast(Box::new(Cast {
14512 this: to_json,
14513 to: DataType::Text,
14514 trailing_comments: Vec::new(),
14515 double_colon_syntax: false,
14516 format: None,
14517 default: None,
14518 })))
14519 }
14520 _ => Ok(Expression::Function(f)),
14521 }
14522 }
14523 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14524 "SYSDATE" if f.args.is_empty() => {
14525 match target {
14526 DialectType::Oracle | DialectType::Redshift => {
14527 Ok(Expression::Function(f))
14528 }
14529 DialectType::Snowflake => {
14530 // Snowflake uses SYSDATE() with parens
14531 let mut f = *f;
14532 f.no_parens = false;
14533 Ok(Expression::Function(Box::new(f)))
14534 }
14535 DialectType::DuckDB => {
14536 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14537 Ok(Expression::AtTimeZone(Box::new(
14538 crate::expressions::AtTimeZone {
14539 this: Expression::CurrentTimestamp(
14540 crate::expressions::CurrentTimestamp {
14541 precision: None,
14542 sysdate: false,
14543 },
14544 ),
14545 zone: Expression::Literal(Literal::String(
14546 "UTC".to_string(),
14547 )),
14548 },
14549 )))
14550 }
14551 _ => Ok(Expression::CurrentTimestamp(
14552 crate::expressions::CurrentTimestamp {
14553 precision: None,
14554 sysdate: true,
14555 },
14556 )),
14557 }
14558 }
14559 // LOGICAL_OR(x) -> BOOL_OR(x)
14560 "LOGICAL_OR" if f.args.len() == 1 => {
14561 let name = match target {
14562 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14563 _ => &f.name,
14564 };
14565 Ok(Expression::Function(Box::new(Function::new(
14566 name.to_string(),
14567 f.args,
14568 ))))
14569 }
14570 // LOGICAL_AND(x) -> BOOL_AND(x)
14571 "LOGICAL_AND" if f.args.len() == 1 => {
14572 let name = match target {
14573 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14574 _ => &f.name,
14575 };
14576 Ok(Expression::Function(Box::new(Function::new(
14577 name.to_string(),
14578 f.args,
14579 ))))
14580 }
14581 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14582 "MONTHS_ADD" if f.args.len() == 2 => match target {
14583 DialectType::Oracle => Ok(Expression::Function(Box::new(
14584 Function::new("ADD_MONTHS".to_string(), f.args),
14585 ))),
14586 _ => Ok(Expression::Function(f)),
14587 },
14588 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14589 "ARRAY_JOIN" if f.args.len() >= 2 => {
14590 match target {
14591 DialectType::Spark | DialectType::Databricks => {
14592 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14593 Ok(Expression::Function(f))
14594 }
14595 DialectType::Hive => {
14596 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14597 let mut args = f.args;
14598 let arr = args.remove(0);
14599 let sep = args.remove(0);
14600 // Drop any remaining args (null_replacement)
14601 Ok(Expression::Function(Box::new(Function::new(
14602 "CONCAT_WS".to_string(),
14603 vec![sep, arr],
14604 ))))
14605 }
14606 DialectType::Presto | DialectType::Trino => {
14607 Ok(Expression::Function(f))
14608 }
14609 _ => Ok(Expression::Function(f)),
14610 }
14611 }
14612 // LOCATE(substr, str, pos) 3-arg -> target-specific
14613 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14614 "LOCATE"
14615 if f.args.len() == 3
14616 && matches!(
14617 target,
14618 DialectType::Presto
14619 | DialectType::Trino
14620 | DialectType::Athena
14621 | DialectType::DuckDB
14622 ) =>
14623 {
14624 let mut args = f.args;
14625 let substr = args.remove(0);
14626 let string = args.remove(0);
14627 let pos = args.remove(0);
14628 // STRPOS(SUBSTRING(string, pos), substr)
14629 let substring_call = Expression::Function(Box::new(Function::new(
14630 "SUBSTRING".to_string(),
14631 vec![string.clone(), pos.clone()],
14632 )));
14633 let strpos_call = Expression::Function(Box::new(Function::new(
14634 "STRPOS".to_string(),
14635 vec![substring_call, substr.clone()],
14636 )));
14637 // STRPOS(...) + pos - 1
14638 let pos_adjusted =
14639 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14640 Expression::Add(Box::new(
14641 crate::expressions::BinaryOp::new(
14642 strpos_call.clone(),
14643 pos.clone(),
14644 ),
14645 )),
14646 Expression::number(1),
14647 )));
14648 // STRPOS(...) = 0
14649 let is_zero =
14650 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14651 strpos_call.clone(),
14652 Expression::number(0),
14653 )));
14654
14655 match target {
14656 DialectType::Presto
14657 | DialectType::Trino
14658 | DialectType::Athena => {
14659 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14660 Ok(Expression::Function(Box::new(Function::new(
14661 "IF".to_string(),
14662 vec![is_zero, Expression::number(0), pos_adjusted],
14663 ))))
14664 }
14665 DialectType::DuckDB => {
14666 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14667 Ok(Expression::Case(Box::new(crate::expressions::Case {
14668 operand: None,
14669 whens: vec![(is_zero, Expression::number(0))],
14670 else_: Some(pos_adjusted),
14671 comments: Vec::new(),
14672 })))
14673 }
14674 _ => Ok(Expression::Function(Box::new(Function::new(
14675 "LOCATE".to_string(),
14676 vec![substr, string, pos],
14677 )))),
14678 }
14679 }
14680 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14681 "STRPOS"
14682 if f.args.len() == 3
14683 && matches!(
14684 target,
14685 DialectType::BigQuery
14686 | DialectType::Oracle
14687 | DialectType::Teradata
14688 ) =>
14689 {
14690 let mut args = f.args;
14691 let haystack = args.remove(0);
14692 let needle = args.remove(0);
14693 let occurrence = args.remove(0);
14694 Ok(Expression::Function(Box::new(Function::new(
14695 "INSTR".to_string(),
14696 vec![haystack, needle, Expression::number(1), occurrence],
14697 ))))
14698 }
14699 // SCHEMA_NAME(id) -> target-specific
14700 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14701 DialectType::MySQL | DialectType::SingleStore => {
14702 Ok(Expression::Function(Box::new(Function::new(
14703 "SCHEMA".to_string(),
14704 vec![],
14705 ))))
14706 }
14707 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14708 crate::expressions::CurrentSchema { this: None },
14709 ))),
14710 DialectType::SQLite => Ok(Expression::string("main")),
14711 _ => Ok(Expression::Function(f)),
14712 },
14713 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14714 "STRTOL" if f.args.len() == 2 => match target {
14715 DialectType::Presto | DialectType::Trino => {
14716 Ok(Expression::Function(Box::new(Function::new(
14717 "FROM_BASE".to_string(),
14718 f.args,
14719 ))))
14720 }
14721 _ => Ok(Expression::Function(f)),
14722 },
14723 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14724 "EDITDIST3" if f.args.len() == 2 => match target {
14725 DialectType::Spark | DialectType::Databricks => {
14726 Ok(Expression::Function(Box::new(Function::new(
14727 "LEVENSHTEIN".to_string(),
14728 f.args,
14729 ))))
14730 }
14731 _ => Ok(Expression::Function(f)),
14732 },
14733 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14734 "FORMAT"
14735 if f.args.len() == 2
14736 && matches!(
14737 source,
14738 DialectType::MySQL | DialectType::SingleStore
14739 )
14740 && matches!(target, DialectType::DuckDB) =>
14741 {
14742 let mut args = f.args;
14743 let num_expr = args.remove(0);
14744 let decimals_expr = args.remove(0);
14745 // Extract decimal count
14746 let dec_count = match &decimals_expr {
14747 Expression::Literal(Literal::Number(n)) => n.clone(),
14748 _ => "0".to_string(),
14749 };
14750 let fmt_str = format!("{{:,.{}f}}", dec_count);
14751 Ok(Expression::Function(Box::new(Function::new(
14752 "FORMAT".to_string(),
14753 vec![Expression::string(&fmt_str), num_expr],
14754 ))))
14755 }
14756 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14757 "FORMAT"
14758 if f.args.len() == 2
14759 && matches!(
14760 source,
14761 DialectType::TSQL | DialectType::Fabric
14762 ) =>
14763 {
14764 let val_expr = f.args[0].clone();
14765 let fmt_expr = f.args[1].clone();
14766 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14767 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14768 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14769 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14770 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14771 Expression::Literal(crate::expressions::Literal::String(s)) => {
14772 match s.as_str() {
14773 "m" | "M" => (Expression::string("MMMM d"), true),
14774 "t" => (Expression::string("h:mm tt"), true),
14775 "T" => (Expression::string("h:mm:ss tt"), true),
14776 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14777 _ => (fmt_expr.clone(), false),
14778 }
14779 }
14780 _ => (fmt_expr.clone(), false),
14781 };
14782 // Check if the format looks like a date format
14783 let is_date_format = is_shortcode
14784 || match &expanded_fmt {
14785 Expression::Literal(
14786 crate::expressions::Literal::String(s),
14787 ) => {
14788 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14789 s.contains("yyyy")
14790 || s.contains("YYYY")
14791 || s.contains("MM")
14792 || s.contains("dd")
14793 || s.contains("MMMM")
14794 || s.contains("HH")
14795 || s.contains("hh")
14796 || s.contains("ss")
14797 }
14798 _ => false,
14799 };
14800 match target {
14801 DialectType::Spark | DialectType::Databricks => {
14802 let func_name = if is_date_format {
14803 "DATE_FORMAT"
14804 } else {
14805 "FORMAT_NUMBER"
14806 };
14807 Ok(Expression::Function(Box::new(Function::new(
14808 func_name.to_string(),
14809 vec![val_expr, expanded_fmt],
14810 ))))
14811 }
14812 _ => {
14813 // For TSQL and other targets, expand shortcodes but keep FORMAT
14814 if is_shortcode {
14815 Ok(Expression::Function(Box::new(Function::new(
14816 "FORMAT".to_string(),
14817 vec![val_expr, expanded_fmt],
14818 ))))
14819 } else {
14820 Ok(Expression::Function(f))
14821 }
14822 }
14823 }
14824 }
14825 // FORMAT('%s', x) from Trino/Presto -> target-specific
14826 "FORMAT"
14827 if f.args.len() >= 2
14828 && matches!(
14829 source,
14830 DialectType::Trino
14831 | DialectType::Presto
14832 | DialectType::Athena
14833 ) =>
14834 {
14835 let fmt_expr = f.args[0].clone();
14836 let value_args: Vec<Expression> = f.args[1..].to_vec();
14837 match target {
14838 // DuckDB: replace %s with {} in format string
14839 DialectType::DuckDB => {
14840 let new_fmt = match &fmt_expr {
14841 Expression::Literal(Literal::String(s)) => {
14842 Expression::Literal(Literal::String(
14843 s.replace("%s", "{}"),
14844 ))
14845 }
14846 _ => fmt_expr,
14847 };
14848 let mut args = vec![new_fmt];
14849 args.extend(value_args);
14850 Ok(Expression::Function(Box::new(Function::new(
14851 "FORMAT".to_string(),
14852 args,
14853 ))))
14854 }
14855 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
14856 DialectType::Snowflake => match &fmt_expr {
14857 Expression::Literal(Literal::String(s))
14858 if s == "%s" && value_args.len() == 1 =>
14859 {
14860 Ok(Expression::Function(Box::new(Function::new(
14861 "TO_CHAR".to_string(),
14862 value_args,
14863 ))))
14864 }
14865 _ => Ok(Expression::Function(f)),
14866 },
14867 // Default: keep FORMAT as-is
14868 _ => Ok(Expression::Function(f)),
14869 }
14870 }
14871 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
14872 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
14873 if f.args.len() == 2 =>
14874 {
14875 match target {
14876 DialectType::PostgreSQL | DialectType::Redshift => {
14877 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
14878 let arr = f.args[0].clone();
14879 let needle = f.args[1].clone();
14880 // Convert [] to ARRAY[] for PostgreSQL
14881 let pg_arr = match arr {
14882 Expression::Array(a) => Expression::ArrayFunc(
14883 Box::new(crate::expressions::ArrayConstructor {
14884 expressions: a.expressions,
14885 bracket_notation: false,
14886 use_list_keyword: false,
14887 }),
14888 ),
14889 _ => arr,
14890 };
14891 // needle = ANY(arr) using the Any quantified expression
14892 let any_expr = Expression::Any(Box::new(
14893 crate::expressions::QuantifiedExpr {
14894 this: needle.clone(),
14895 subquery: pg_arr,
14896 op: Some(crate::expressions::QuantifiedOp::Eq),
14897 },
14898 ));
14899 let coalesce = Expression::Coalesce(Box::new(
14900 crate::expressions::VarArgFunc {
14901 expressions: vec![
14902 any_expr,
14903 Expression::Boolean(
14904 crate::expressions::BooleanLiteral {
14905 value: false,
14906 },
14907 ),
14908 ],
14909 original_name: None,
14910 },
14911 ));
14912 let is_null_check = Expression::IsNull(Box::new(
14913 crate::expressions::IsNull {
14914 this: needle,
14915 not: false,
14916 postfix_form: false,
14917 },
14918 ));
14919 Ok(Expression::Case(Box::new(Case {
14920 operand: None,
14921 whens: vec![(
14922 is_null_check,
14923 Expression::Null(crate::expressions::Null),
14924 )],
14925 else_: Some(coalesce),
14926 comments: Vec::new(),
14927 })))
14928 }
14929 _ => Ok(Expression::Function(Box::new(Function::new(
14930 "ARRAY_CONTAINS".to_string(),
14931 f.args,
14932 )))),
14933 }
14934 }
14935 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
14936 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
14937 match target {
14938 DialectType::PostgreSQL | DialectType::Redshift => {
14939 // arr1 && arr2 with ARRAY[] syntax
14940 let mut args = f.args;
14941 let arr1 = args.remove(0);
14942 let arr2 = args.remove(0);
14943 let pg_arr1 = match arr1 {
14944 Expression::Array(a) => Expression::ArrayFunc(
14945 Box::new(crate::expressions::ArrayConstructor {
14946 expressions: a.expressions,
14947 bracket_notation: false,
14948 use_list_keyword: false,
14949 }),
14950 ),
14951 _ => arr1,
14952 };
14953 let pg_arr2 = match arr2 {
14954 Expression::Array(a) => Expression::ArrayFunc(
14955 Box::new(crate::expressions::ArrayConstructor {
14956 expressions: a.expressions,
14957 bracket_notation: false,
14958 use_list_keyword: false,
14959 }),
14960 ),
14961 _ => arr2,
14962 };
14963 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14964 pg_arr1, pg_arr2,
14965 ))))
14966 }
14967 DialectType::DuckDB => {
14968 // DuckDB: arr1 && arr2 (native support)
14969 let mut args = f.args;
14970 let arr1 = args.remove(0);
14971 let arr2 = args.remove(0);
14972 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14973 arr1, arr2,
14974 ))))
14975 }
14976 _ => Ok(Expression::Function(Box::new(Function::new(
14977 "LIST_HAS_ANY".to_string(),
14978 f.args,
14979 )))),
14980 }
14981 }
14982 // APPROX_QUANTILE(x, q) -> target-specific
14983 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
14984 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14985 Function::new("APPROX_PERCENTILE".to_string(), f.args),
14986 ))),
14987 DialectType::DuckDB => Ok(Expression::Function(f)),
14988 _ => Ok(Expression::Function(f)),
14989 },
14990 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
14991 "MAKE_DATE" if f.args.len() == 3 => match target {
14992 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14993 Function::new("DATE".to_string(), f.args),
14994 ))),
14995 _ => Ok(Expression::Function(f)),
14996 },
14997 // RANGE(start, end[, step]) -> target-specific
14998 "RANGE"
14999 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
15000 {
15001 let start = f.args[0].clone();
15002 let end = f.args[1].clone();
15003 let step = f.args.get(2).cloned();
15004 match target {
15005 DialectType::Spark | DialectType::Databricks => {
15006 // RANGE(start, end) -> SEQUENCE(start, end-1)
15007 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
15008 // RANGE(start, start) -> ARRAY() (empty)
15009 // RANGE(start, end, 0) -> ARRAY() (empty)
15010 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15011
15012 // Check for constant args
15013 fn extract_i64(e: &Expression) -> Option<i64> {
15014 match e {
15015 Expression::Literal(Literal::Number(n)) => {
15016 n.parse::<i64>().ok()
15017 }
15018 Expression::Neg(u) => {
15019 if let Expression::Literal(Literal::Number(n)) =
15020 &u.this
15021 {
15022 n.parse::<i64>().ok().map(|v| -v)
15023 } else {
15024 None
15025 }
15026 }
15027 _ => None,
15028 }
15029 }
15030 let start_val = extract_i64(&start);
15031 let end_val = extract_i64(&end);
15032 let step_val = step.as_ref().and_then(|s| extract_i64(s));
15033
15034 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
15035 if step_val == Some(0) {
15036 return Ok(Expression::Function(Box::new(
15037 Function::new("ARRAY".to_string(), vec![]),
15038 )));
15039 }
15040 if let (Some(s), Some(e_val)) = (start_val, end_val) {
15041 if s == e_val {
15042 return Ok(Expression::Function(Box::new(
15043 Function::new("ARRAY".to_string(), vec![]),
15044 )));
15045 }
15046 }
15047
15048 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
15049 // All constants - compute new end = end - step (if step provided) or end - 1
15050 match step_val {
15051 Some(st) if st < 0 => {
15052 // Negative step: SEQUENCE(start, end - step, step)
15053 let new_end = e_val - st; // end - step (= end + |step|)
15054 let mut args =
15055 vec![start, Expression::number(new_end)];
15056 if let Some(s) = step {
15057 args.push(s);
15058 }
15059 Ok(Expression::Function(Box::new(
15060 Function::new("SEQUENCE".to_string(), args),
15061 )))
15062 }
15063 Some(st) => {
15064 let new_end = e_val - st;
15065 let mut args =
15066 vec![start, Expression::number(new_end)];
15067 if let Some(s) = step {
15068 args.push(s);
15069 }
15070 Ok(Expression::Function(Box::new(
15071 Function::new("SEQUENCE".to_string(), args),
15072 )))
15073 }
15074 None => {
15075 // No step: SEQUENCE(start, end - 1)
15076 let new_end = e_val - 1;
15077 Ok(Expression::Function(Box::new(
15078 Function::new(
15079 "SEQUENCE".to_string(),
15080 vec![
15081 start,
15082 Expression::number(new_end),
15083 ],
15084 ),
15085 )))
15086 }
15087 }
15088 } else {
15089 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15090 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15091 end.clone(),
15092 Expression::number(1),
15093 )));
15094 let cond = Expression::Lte(Box::new(BinaryOp::new(
15095 Expression::Paren(Box::new(Paren {
15096 this: end_m1.clone(),
15097 trailing_comments: Vec::new(),
15098 })),
15099 start.clone(),
15100 )));
15101 let empty = Expression::Function(Box::new(
15102 Function::new("ARRAY".to_string(), vec![]),
15103 ));
15104 let mut seq_args = vec![
15105 start,
15106 Expression::Paren(Box::new(Paren {
15107 this: end_m1,
15108 trailing_comments: Vec::new(),
15109 })),
15110 ];
15111 if let Some(s) = step {
15112 seq_args.push(s);
15113 }
15114 let seq = Expression::Function(Box::new(
15115 Function::new("SEQUENCE".to_string(), seq_args),
15116 ));
15117 Ok(Expression::IfFunc(Box::new(
15118 crate::expressions::IfFunc {
15119 condition: cond,
15120 true_value: empty,
15121 false_value: Some(seq),
15122 original_name: None,
15123 },
15124 )))
15125 }
15126 }
15127 DialectType::SQLite => {
15128 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15129 // The subquery wrapping is handled at the Alias level
15130 let mut args = vec![start, end];
15131 if let Some(s) = step {
15132 args.push(s);
15133 }
15134 Ok(Expression::Function(Box::new(Function::new(
15135 "GENERATE_SERIES".to_string(),
15136 args,
15137 ))))
15138 }
15139 _ => Ok(Expression::Function(f)),
15140 }
15141 }
15142 // ARRAY_REVERSE_SORT -> target-specific
15143 // (handled above as well, but also need DuckDB self-normalization)
15144 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15145 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15146 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15147 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15148 ))),
15149 DialectType::Spark | DialectType::Databricks => {
15150 Ok(Expression::Function(Box::new(Function::new(
15151 "MAP_FROM_ARRAYS".to_string(),
15152 f.args,
15153 ))))
15154 }
15155 _ => Ok(Expression::Function(Box::new(Function::new(
15156 "MAP".to_string(),
15157 f.args,
15158 )))),
15159 },
15160 // VARIANCE(x) -> varSamp(x) for ClickHouse
15161 "VARIANCE" if f.args.len() == 1 => match target {
15162 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15163 Function::new("varSamp".to_string(), f.args),
15164 ))),
15165 _ => Ok(Expression::Function(f)),
15166 },
15167 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15168 "STDDEV" if f.args.len() == 1 => match target {
15169 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15170 Function::new("stddevSamp".to_string(), f.args),
15171 ))),
15172 _ => Ok(Expression::Function(f)),
15173 },
15174 // ISINF(x) -> IS_INF(x) for BigQuery
15175 "ISINF" if f.args.len() == 1 => match target {
15176 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15177 Function::new("IS_INF".to_string(), f.args),
15178 ))),
15179 _ => Ok(Expression::Function(f)),
15180 },
15181 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15182 "CONTAINS" if f.args.len() == 2 => match target {
15183 DialectType::Spark
15184 | DialectType::Databricks
15185 | DialectType::Hive => Ok(Expression::Function(Box::new(
15186 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15187 ))),
15188 _ => Ok(Expression::Function(f)),
15189 },
15190 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15191 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15192 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15193 Ok(Expression::Function(Box::new(Function::new(
15194 "CONTAINS".to_string(),
15195 f.args,
15196 ))))
15197 }
15198 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15199 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15200 ))),
15201 _ => Ok(Expression::Function(f)),
15202 },
15203 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15204 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15205 DialectType::Hive
15206 | DialectType::Spark
15207 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15208 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15209 ))),
15210 _ => Ok(Expression::Function(f)),
15211 },
15212 // FROM_UNIXTIME(x) -> target-specific
15213 "FROM_UNIXTIME" if f.args.len() == 1 => {
15214 match target {
15215 DialectType::Hive
15216 | DialectType::Spark
15217 | DialectType::Databricks
15218 | DialectType::Presto
15219 | DialectType::Trino => Ok(Expression::Function(f)),
15220 DialectType::DuckDB => {
15221 // DuckDB: TO_TIMESTAMP(x)
15222 let arg = f.args.into_iter().next().unwrap();
15223 Ok(Expression::Function(Box::new(Function::new(
15224 "TO_TIMESTAMP".to_string(),
15225 vec![arg],
15226 ))))
15227 }
15228 DialectType::PostgreSQL => {
15229 // PG: TO_TIMESTAMP(col)
15230 let arg = f.args.into_iter().next().unwrap();
15231 Ok(Expression::Function(Box::new(Function::new(
15232 "TO_TIMESTAMP".to_string(),
15233 vec![arg],
15234 ))))
15235 }
15236 DialectType::Redshift => {
15237 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15238 let arg = f.args.into_iter().next().unwrap();
15239 let epoch_ts = Expression::Literal(Literal::Timestamp(
15240 "epoch".to_string(),
15241 ));
15242 let interval = Expression::Interval(Box::new(
15243 crate::expressions::Interval {
15244 this: Some(Expression::string("1 SECOND")),
15245 unit: None,
15246 },
15247 ));
15248 let mul =
15249 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15250 let add =
15251 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15252 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15253 this: add,
15254 trailing_comments: Vec::new(),
15255 })))
15256 }
15257 _ => Ok(Expression::Function(f)),
15258 }
15259 }
15260 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15261 "FROM_UNIXTIME"
15262 if f.args.len() == 2
15263 && matches!(
15264 source,
15265 DialectType::Hive
15266 | DialectType::Spark
15267 | DialectType::Databricks
15268 ) =>
15269 {
15270 let mut args = f.args;
15271 let unix_ts = args.remove(0);
15272 let fmt_expr = args.remove(0);
15273 match target {
15274 DialectType::DuckDB => {
15275 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15276 let to_ts = Expression::Function(Box::new(Function::new(
15277 "TO_TIMESTAMP".to_string(),
15278 vec![unix_ts],
15279 )));
15280 if let Expression::Literal(
15281 crate::expressions::Literal::String(s),
15282 ) = &fmt_expr
15283 {
15284 let c_fmt = Self::hive_format_to_c_format(s);
15285 Ok(Expression::Function(Box::new(Function::new(
15286 "STRFTIME".to_string(),
15287 vec![to_ts, Expression::string(&c_fmt)],
15288 ))))
15289 } else {
15290 Ok(Expression::Function(Box::new(Function::new(
15291 "STRFTIME".to_string(),
15292 vec![to_ts, fmt_expr],
15293 ))))
15294 }
15295 }
15296 DialectType::Presto
15297 | DialectType::Trino
15298 | DialectType::Athena => {
15299 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15300 let from_unix =
15301 Expression::Function(Box::new(Function::new(
15302 "FROM_UNIXTIME".to_string(),
15303 vec![unix_ts],
15304 )));
15305 if let Expression::Literal(
15306 crate::expressions::Literal::String(s),
15307 ) = &fmt_expr
15308 {
15309 let p_fmt = Self::hive_format_to_presto_format(s);
15310 Ok(Expression::Function(Box::new(Function::new(
15311 "DATE_FORMAT".to_string(),
15312 vec![from_unix, Expression::string(&p_fmt)],
15313 ))))
15314 } else {
15315 Ok(Expression::Function(Box::new(Function::new(
15316 "DATE_FORMAT".to_string(),
15317 vec![from_unix, fmt_expr],
15318 ))))
15319 }
15320 }
15321 _ => {
15322 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15323 Ok(Expression::Function(Box::new(Function::new(
15324 "FROM_UNIXTIME".to_string(),
15325 vec![unix_ts, fmt_expr],
15326 ))))
15327 }
15328 }
15329 }
15330 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15331 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15332 let unit_str = Self::get_unit_str_static(&f.args[0]);
15333 // Get the raw unit text preserving original case
15334 let raw_unit = match &f.args[0] {
15335 Expression::Identifier(id) => id.name.clone(),
15336 Expression::Literal(crate::expressions::Literal::String(s)) => {
15337 s.clone()
15338 }
15339 Expression::Column(col) => col.name.name.clone(),
15340 _ => unit_str.clone(),
15341 };
15342 match target {
15343 DialectType::TSQL | DialectType::Fabric => {
15344 // Preserve original case of unit for TSQL
15345 let unit_name = match unit_str.as_str() {
15346 "YY" | "YYYY" => "YEAR".to_string(),
15347 "QQ" | "Q" => "QUARTER".to_string(),
15348 "MM" | "M" => "MONTH".to_string(),
15349 "WK" | "WW" => "WEEK".to_string(),
15350 "DD" | "D" | "DY" => "DAY".to_string(),
15351 "HH" => "HOUR".to_string(),
15352 "MI" | "N" => "MINUTE".to_string(),
15353 "SS" | "S" => "SECOND".to_string(),
15354 _ => raw_unit.clone(), // preserve original case
15355 };
15356 let mut args = f.args;
15357 args[0] =
15358 Expression::Identifier(Identifier::new(&unit_name));
15359 Ok(Expression::Function(Box::new(Function::new(
15360 "DATEPART".to_string(),
15361 args,
15362 ))))
15363 }
15364 DialectType::Spark | DialectType::Databricks => {
15365 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15366 // Preserve original case for non-abbreviation units
15367 let unit = match unit_str.as_str() {
15368 "YY" | "YYYY" => "YEAR".to_string(),
15369 "QQ" | "Q" => "QUARTER".to_string(),
15370 "MM" | "M" => "MONTH".to_string(),
15371 "WK" | "WW" => "WEEK".to_string(),
15372 "DD" | "D" | "DY" => "DAY".to_string(),
15373 "HH" => "HOUR".to_string(),
15374 "MI" | "N" => "MINUTE".to_string(),
15375 "SS" | "S" => "SECOND".to_string(),
15376 _ => raw_unit, // preserve original case
15377 };
15378 Ok(Expression::Extract(Box::new(
15379 crate::expressions::ExtractFunc {
15380 this: f.args[1].clone(),
15381 field: crate::expressions::DateTimeField::Custom(
15382 unit,
15383 ),
15384 },
15385 )))
15386 }
15387 _ => Ok(Expression::Function(Box::new(Function::new(
15388 "DATE_PART".to_string(),
15389 f.args,
15390 )))),
15391 }
15392 }
15393 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15394 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15395 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15396 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15397 "DATENAME" if f.args.len() == 2 => {
15398 let unit_str = Self::get_unit_str_static(&f.args[0]);
15399 let date_expr = f.args[1].clone();
15400 match unit_str.as_str() {
15401 "MM" | "M" | "MONTH" => match target {
15402 DialectType::TSQL => {
15403 let cast_date = Expression::Cast(Box::new(
15404 crate::expressions::Cast {
15405 this: date_expr,
15406 to: DataType::Custom {
15407 name: "DATETIME2".to_string(),
15408 },
15409 trailing_comments: Vec::new(),
15410 double_colon_syntax: false,
15411 format: None,
15412 default: None,
15413 },
15414 ));
15415 Ok(Expression::Function(Box::new(Function::new(
15416 "FORMAT".to_string(),
15417 vec![cast_date, Expression::string("MMMM")],
15418 ))))
15419 }
15420 DialectType::Spark | DialectType::Databricks => {
15421 let cast_date = Expression::Cast(Box::new(
15422 crate::expressions::Cast {
15423 this: date_expr,
15424 to: DataType::Timestamp {
15425 timezone: false,
15426 precision: None,
15427 },
15428 trailing_comments: Vec::new(),
15429 double_colon_syntax: false,
15430 format: None,
15431 default: None,
15432 },
15433 ));
15434 Ok(Expression::Function(Box::new(Function::new(
15435 "DATE_FORMAT".to_string(),
15436 vec![cast_date, Expression::string("MMMM")],
15437 ))))
15438 }
15439 _ => Ok(Expression::Function(f)),
15440 },
15441 "DW" | "WEEKDAY" => match target {
15442 DialectType::TSQL => {
15443 let cast_date = Expression::Cast(Box::new(
15444 crate::expressions::Cast {
15445 this: date_expr,
15446 to: DataType::Custom {
15447 name: "DATETIME2".to_string(),
15448 },
15449 trailing_comments: Vec::new(),
15450 double_colon_syntax: false,
15451 format: None,
15452 default: None,
15453 },
15454 ));
15455 Ok(Expression::Function(Box::new(Function::new(
15456 "FORMAT".to_string(),
15457 vec![cast_date, Expression::string("dddd")],
15458 ))))
15459 }
15460 DialectType::Spark | DialectType::Databricks => {
15461 let cast_date = Expression::Cast(Box::new(
15462 crate::expressions::Cast {
15463 this: date_expr,
15464 to: DataType::Timestamp {
15465 timezone: false,
15466 precision: None,
15467 },
15468 trailing_comments: Vec::new(),
15469 double_colon_syntax: false,
15470 format: None,
15471 default: None,
15472 },
15473 ));
15474 Ok(Expression::Function(Box::new(Function::new(
15475 "DATE_FORMAT".to_string(),
15476 vec![cast_date, Expression::string("EEEE")],
15477 ))))
15478 }
15479 _ => Ok(Expression::Function(f)),
15480 },
15481 _ => Ok(Expression::Function(f)),
15482 }
15483 }
15484 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15485 "STRING_AGG" if f.args.len() >= 2 => {
15486 let x = f.args[0].clone();
15487 let sep = f.args[1].clone();
15488 match target {
15489 DialectType::MySQL
15490 | DialectType::SingleStore
15491 | DialectType::Doris
15492 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15493 Box::new(crate::expressions::GroupConcatFunc {
15494 this: x,
15495 separator: Some(sep),
15496 order_by: None,
15497 distinct: false,
15498 filter: None,
15499 }),
15500 )),
15501 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15502 crate::expressions::GroupConcatFunc {
15503 this: x,
15504 separator: Some(sep),
15505 order_by: None,
15506 distinct: false,
15507 filter: None,
15508 },
15509 ))),
15510 DialectType::PostgreSQL | DialectType::Redshift => {
15511 Ok(Expression::StringAgg(Box::new(
15512 crate::expressions::StringAggFunc {
15513 this: x,
15514 separator: Some(sep),
15515 order_by: None,
15516 distinct: false,
15517 filter: None,
15518 limit: None,
15519 },
15520 )))
15521 }
15522 _ => Ok(Expression::Function(f)),
15523 }
15524 }
15525 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15526 "JSON_ARRAYAGG" => match target {
15527 DialectType::PostgreSQL => {
15528 Ok(Expression::Function(Box::new(Function {
15529 name: "JSON_AGG".to_string(),
15530 ..(*f)
15531 })))
15532 }
15533 _ => Ok(Expression::Function(f)),
15534 },
15535 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15536 "SCHEMA_NAME" => match target {
15537 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15538 crate::expressions::CurrentSchema { this: None },
15539 ))),
15540 DialectType::SQLite => Ok(Expression::string("main")),
15541 _ => Ok(Expression::Function(f)),
15542 },
15543 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15544 "TO_TIMESTAMP"
15545 if f.args.len() == 2
15546 && matches!(
15547 source,
15548 DialectType::Spark
15549 | DialectType::Databricks
15550 | DialectType::Hive
15551 )
15552 && matches!(target, DialectType::DuckDB) =>
15553 {
15554 let mut args = f.args;
15555 let val = args.remove(0);
15556 let fmt_expr = args.remove(0);
15557 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15558 // Convert Java/Spark format to C strptime format
15559 fn java_to_c_fmt(fmt: &str) -> String {
15560 let result = fmt
15561 .replace("yyyy", "%Y")
15562 .replace("SSSSSS", "%f")
15563 .replace("EEEE", "%W")
15564 .replace("MM", "%m")
15565 .replace("dd", "%d")
15566 .replace("HH", "%H")
15567 .replace("mm", "%M")
15568 .replace("ss", "%S")
15569 .replace("yy", "%y");
15570 let mut out = String::new();
15571 let chars: Vec<char> = result.chars().collect();
15572 let mut i = 0;
15573 while i < chars.len() {
15574 if chars[i] == '%' && i + 1 < chars.len() {
15575 out.push(chars[i]);
15576 out.push(chars[i + 1]);
15577 i += 2;
15578 } else if chars[i] == 'z' {
15579 out.push_str("%Z");
15580 i += 1;
15581 } else if chars[i] == 'Z' {
15582 out.push_str("%z");
15583 i += 1;
15584 } else {
15585 out.push(chars[i]);
15586 i += 1;
15587 }
15588 }
15589 out
15590 }
15591 let c_fmt = java_to_c_fmt(s);
15592 Ok(Expression::Function(Box::new(Function::new(
15593 "STRPTIME".to_string(),
15594 vec![val, Expression::string(&c_fmt)],
15595 ))))
15596 } else {
15597 Ok(Expression::Function(Box::new(Function::new(
15598 "STRPTIME".to_string(),
15599 vec![val, fmt_expr],
15600 ))))
15601 }
15602 }
15603 // TO_DATE(x) 1-arg from Doris: date conversion
15604 "TO_DATE"
15605 if f.args.len() == 1
15606 && matches!(
15607 source,
15608 DialectType::Doris | DialectType::StarRocks
15609 ) =>
15610 {
15611 let arg = f.args.into_iter().next().unwrap();
15612 match target {
15613 DialectType::Oracle
15614 | DialectType::DuckDB
15615 | DialectType::TSQL => {
15616 // CAST(x AS DATE)
15617 Ok(Expression::Cast(Box::new(Cast {
15618 this: arg,
15619 to: DataType::Date,
15620 double_colon_syntax: false,
15621 trailing_comments: vec![],
15622 format: None,
15623 default: None,
15624 })))
15625 }
15626 DialectType::MySQL | DialectType::SingleStore => {
15627 // DATE(x)
15628 Ok(Expression::Function(Box::new(Function::new(
15629 "DATE".to_string(),
15630 vec![arg],
15631 ))))
15632 }
15633 _ => {
15634 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15635 Ok(Expression::Function(Box::new(Function::new(
15636 "TO_DATE".to_string(),
15637 vec![arg],
15638 ))))
15639 }
15640 }
15641 }
15642 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15643 "TO_DATE"
15644 if f.args.len() == 1
15645 && matches!(
15646 source,
15647 DialectType::Spark
15648 | DialectType::Databricks
15649 | DialectType::Hive
15650 ) =>
15651 {
15652 let arg = f.args.into_iter().next().unwrap();
15653 match target {
15654 DialectType::DuckDB => {
15655 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15656 Ok(Expression::TryCast(Box::new(Cast {
15657 this: arg,
15658 to: DataType::Date,
15659 double_colon_syntax: false,
15660 trailing_comments: vec![],
15661 format: None,
15662 default: None,
15663 })))
15664 }
15665 DialectType::Presto
15666 | DialectType::Trino
15667 | DialectType::Athena => {
15668 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15669 Ok(Self::double_cast_timestamp_date(arg))
15670 }
15671 DialectType::Snowflake => {
15672 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15673 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15674 Ok(Expression::Function(Box::new(Function::new(
15675 "TRY_TO_DATE".to_string(),
15676 vec![arg, Expression::string("yyyy-mm-DD")],
15677 ))))
15678 }
15679 _ => {
15680 // Default: keep as TO_DATE(x)
15681 Ok(Expression::Function(Box::new(Function::new(
15682 "TO_DATE".to_string(),
15683 vec![arg],
15684 ))))
15685 }
15686 }
15687 }
15688 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15689 "TO_DATE"
15690 if f.args.len() == 2
15691 && matches!(
15692 source,
15693 DialectType::Spark
15694 | DialectType::Databricks
15695 | DialectType::Hive
15696 ) =>
15697 {
15698 let mut args = f.args;
15699 let val = args.remove(0);
15700 let fmt_expr = args.remove(0);
15701 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15702
15703 if is_default_format {
15704 // Default format: same as 1-arg form
15705 match target {
15706 DialectType::DuckDB => {
15707 Ok(Expression::TryCast(Box::new(Cast {
15708 this: val,
15709 to: DataType::Date,
15710 double_colon_syntax: false,
15711 trailing_comments: vec![],
15712 format: None,
15713 default: None,
15714 })))
15715 }
15716 DialectType::Presto
15717 | DialectType::Trino
15718 | DialectType::Athena => {
15719 Ok(Self::double_cast_timestamp_date(val))
15720 }
15721 DialectType::Snowflake => {
15722 // TRY_TO_DATE(x, format) with Snowflake format mapping
15723 let sf_fmt = "yyyy-MM-dd"
15724 .replace("yyyy", "yyyy")
15725 .replace("MM", "mm")
15726 .replace("dd", "DD");
15727 Ok(Expression::Function(Box::new(Function::new(
15728 "TRY_TO_DATE".to_string(),
15729 vec![val, Expression::string(&sf_fmt)],
15730 ))))
15731 }
15732 _ => Ok(Expression::Function(Box::new(Function::new(
15733 "TO_DATE".to_string(),
15734 vec![val],
15735 )))),
15736 }
15737 } else {
15738 // Non-default format: use format-based parsing
15739 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15740 match target {
15741 DialectType::DuckDB => {
15742 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15743 fn java_to_c_fmt_todate(fmt: &str) -> String {
15744 let result = fmt
15745 .replace("yyyy", "%Y")
15746 .replace("SSSSSS", "%f")
15747 .replace("EEEE", "%W")
15748 .replace("MM", "%m")
15749 .replace("dd", "%d")
15750 .replace("HH", "%H")
15751 .replace("mm", "%M")
15752 .replace("ss", "%S")
15753 .replace("yy", "%y");
15754 let mut out = String::new();
15755 let chars: Vec<char> = result.chars().collect();
15756 let mut i = 0;
15757 while i < chars.len() {
15758 if chars[i] == '%' && i + 1 < chars.len() {
15759 out.push(chars[i]);
15760 out.push(chars[i + 1]);
15761 i += 2;
15762 } else if chars[i] == 'z' {
15763 out.push_str("%Z");
15764 i += 1;
15765 } else if chars[i] == 'Z' {
15766 out.push_str("%z");
15767 i += 1;
15768 } else {
15769 out.push(chars[i]);
15770 i += 1;
15771 }
15772 }
15773 out
15774 }
15775 let c_fmt = java_to_c_fmt_todate(s);
15776 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15777 let try_strptime =
15778 Expression::Function(Box::new(Function::new(
15779 "TRY_STRPTIME".to_string(),
15780 vec![val, Expression::string(&c_fmt)],
15781 )));
15782 let cast_ts = Expression::Cast(Box::new(Cast {
15783 this: try_strptime,
15784 to: DataType::Timestamp {
15785 precision: None,
15786 timezone: false,
15787 },
15788 double_colon_syntax: false,
15789 trailing_comments: vec![],
15790 format: None,
15791 default: None,
15792 }));
15793 Ok(Expression::Cast(Box::new(Cast {
15794 this: cast_ts,
15795 to: DataType::Date,
15796 double_colon_syntax: false,
15797 trailing_comments: vec![],
15798 format: None,
15799 default: None,
15800 })))
15801 }
15802 DialectType::Presto
15803 | DialectType::Trino
15804 | DialectType::Athena => {
15805 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
15806 let p_fmt = s
15807 .replace("yyyy", "%Y")
15808 .replace("SSSSSS", "%f")
15809 .replace("MM", "%m")
15810 .replace("dd", "%d")
15811 .replace("HH", "%H")
15812 .replace("mm", "%M")
15813 .replace("ss", "%S")
15814 .replace("yy", "%y");
15815 let date_parse =
15816 Expression::Function(Box::new(Function::new(
15817 "DATE_PARSE".to_string(),
15818 vec![val, Expression::string(&p_fmt)],
15819 )));
15820 Ok(Expression::Cast(Box::new(Cast {
15821 this: date_parse,
15822 to: DataType::Date,
15823 double_colon_syntax: false,
15824 trailing_comments: vec![],
15825 format: None,
15826 default: None,
15827 })))
15828 }
15829 DialectType::Snowflake => {
15830 // TRY_TO_DATE(x, snowflake_fmt)
15831 Ok(Expression::Function(Box::new(Function::new(
15832 "TRY_TO_DATE".to_string(),
15833 vec![val, Expression::string(s)],
15834 ))))
15835 }
15836 _ => Ok(Expression::Function(Box::new(Function::new(
15837 "TO_DATE".to_string(),
15838 vec![val, fmt_expr],
15839 )))),
15840 }
15841 } else {
15842 Ok(Expression::Function(Box::new(Function::new(
15843 "TO_DATE".to_string(),
15844 vec![val, fmt_expr],
15845 ))))
15846 }
15847 }
15848 }
15849 // TO_TIMESTAMP(x) 1-arg: epoch conversion
15850 "TO_TIMESTAMP"
15851 if f.args.len() == 1
15852 && matches!(source, DialectType::DuckDB)
15853 && matches!(
15854 target,
15855 DialectType::BigQuery
15856 | DialectType::Presto
15857 | DialectType::Trino
15858 | DialectType::Hive
15859 | DialectType::Spark
15860 | DialectType::Databricks
15861 | DialectType::Athena
15862 ) =>
15863 {
15864 let arg = f.args.into_iter().next().unwrap();
15865 let func_name = match target {
15866 DialectType::BigQuery => "TIMESTAMP_SECONDS",
15867 DialectType::Presto
15868 | DialectType::Trino
15869 | DialectType::Athena
15870 | DialectType::Hive
15871 | DialectType::Spark
15872 | DialectType::Databricks => "FROM_UNIXTIME",
15873 _ => "TO_TIMESTAMP",
15874 };
15875 Ok(Expression::Function(Box::new(Function::new(
15876 func_name.to_string(),
15877 vec![arg],
15878 ))))
15879 }
15880 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
15881 "CONCAT" if f.args.len() == 1 => {
15882 let arg = f.args.into_iter().next().unwrap();
15883 match target {
15884 DialectType::Presto
15885 | DialectType::Trino
15886 | DialectType::Athena => {
15887 // CONCAT(a) -> CAST(a AS VARCHAR)
15888 Ok(Expression::Cast(Box::new(Cast {
15889 this: arg,
15890 to: DataType::VarChar {
15891 length: None,
15892 parenthesized_length: false,
15893 },
15894 trailing_comments: vec![],
15895 double_colon_syntax: false,
15896 format: None,
15897 default: None,
15898 })))
15899 }
15900 DialectType::TSQL => {
15901 // CONCAT(a) -> a
15902 Ok(arg)
15903 }
15904 DialectType::DuckDB => {
15905 // Keep CONCAT(a) for DuckDB (native support)
15906 Ok(Expression::Function(Box::new(Function::new(
15907 "CONCAT".to_string(),
15908 vec![arg],
15909 ))))
15910 }
15911 DialectType::Spark | DialectType::Databricks => {
15912 let coalesced = Expression::Coalesce(Box::new(
15913 crate::expressions::VarArgFunc {
15914 expressions: vec![arg, Expression::string("")],
15915 original_name: None,
15916 },
15917 ));
15918 Ok(Expression::Function(Box::new(Function::new(
15919 "CONCAT".to_string(),
15920 vec![coalesced],
15921 ))))
15922 }
15923 _ => Ok(Expression::Function(Box::new(Function::new(
15924 "CONCAT".to_string(),
15925 vec![arg],
15926 )))),
15927 }
15928 }
15929 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
15930 "REGEXP_EXTRACT"
15931 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
15932 {
15933 // If group_index is 0, drop it
15934 let drop_group = match &f.args[2] {
15935 Expression::Literal(Literal::Number(n)) => n == "0",
15936 _ => false,
15937 };
15938 if drop_group {
15939 let mut args = f.args;
15940 args.truncate(2);
15941 Ok(Expression::Function(Box::new(Function::new(
15942 "REGEXP_EXTRACT".to_string(),
15943 args,
15944 ))))
15945 } else {
15946 Ok(Expression::Function(f))
15947 }
15948 }
15949 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
15950 "REGEXP_EXTRACT"
15951 if f.args.len() == 4
15952 && matches!(target, DialectType::Snowflake) =>
15953 {
15954 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
15955 let mut args = f.args;
15956 let this = args.remove(0);
15957 let pattern = args.remove(0);
15958 let group = args.remove(0);
15959 let flags = args.remove(0);
15960 Ok(Expression::Function(Box::new(Function::new(
15961 "REGEXP_SUBSTR".to_string(),
15962 vec![
15963 this,
15964 pattern,
15965 Expression::number(1),
15966 Expression::number(1),
15967 flags,
15968 group,
15969 ],
15970 ))))
15971 }
15972 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
15973 "REGEXP_SUBSTR"
15974 if f.args.len() == 3
15975 && matches!(
15976 target,
15977 DialectType::DuckDB
15978 | DialectType::Presto
15979 | DialectType::Trino
15980 | DialectType::Spark
15981 | DialectType::Databricks
15982 ) =>
15983 {
15984 let mut args = f.args;
15985 let this = args.remove(0);
15986 let pattern = args.remove(0);
15987 let position = args.remove(0);
15988 // Wrap subject in SUBSTRING(this, position) to apply the offset
15989 let substring_expr = Expression::Function(Box::new(Function::new(
15990 "SUBSTRING".to_string(),
15991 vec![this, position],
15992 )));
15993 let target_name = match target {
15994 DialectType::DuckDB => "REGEXP_EXTRACT",
15995 _ => "REGEXP_EXTRACT",
15996 };
15997 Ok(Expression::Function(Box::new(Function::new(
15998 target_name.to_string(),
15999 vec![substring_expr, pattern],
16000 ))))
16001 }
16002 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
16003 "TO_DAYS" if f.args.len() == 1 => {
16004 let x = f.args.into_iter().next().unwrap();
16005 let epoch = Expression::string("0000-01-01");
16006 // Build the final target-specific expression directly
16007 let datediff_expr = match target {
16008 DialectType::MySQL | DialectType::SingleStore => {
16009 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
16010 Expression::Function(Box::new(Function::new(
16011 "DATEDIFF".to_string(),
16012 vec![x, epoch],
16013 )))
16014 }
16015 DialectType::DuckDB => {
16016 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
16017 let cast_epoch = Expression::Cast(Box::new(Cast {
16018 this: epoch,
16019 to: DataType::Date,
16020 trailing_comments: Vec::new(),
16021 double_colon_syntax: false,
16022 format: None,
16023 default: None,
16024 }));
16025 let cast_x = Expression::Cast(Box::new(Cast {
16026 this: x,
16027 to: DataType::Date,
16028 trailing_comments: Vec::new(),
16029 double_colon_syntax: false,
16030 format: None,
16031 default: None,
16032 }));
16033 Expression::Function(Box::new(Function::new(
16034 "DATE_DIFF".to_string(),
16035 vec![Expression::string("DAY"), cast_epoch, cast_x],
16036 )))
16037 }
16038 DialectType::Presto
16039 | DialectType::Trino
16040 | DialectType::Athena => {
16041 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
16042 let cast_epoch = Self::double_cast_timestamp_date(epoch);
16043 let cast_x = Self::double_cast_timestamp_date(x);
16044 Expression::Function(Box::new(Function::new(
16045 "DATE_DIFF".to_string(),
16046 vec![Expression::string("DAY"), cast_epoch, cast_x],
16047 )))
16048 }
16049 _ => {
16050 // Default: (DATEDIFF(x, '0000-01-01') + 1)
16051 Expression::Function(Box::new(Function::new(
16052 "DATEDIFF".to_string(),
16053 vec![x, epoch],
16054 )))
16055 }
16056 };
16057 let add_one = Expression::Add(Box::new(BinaryOp::new(
16058 datediff_expr,
16059 Expression::number(1),
16060 )));
16061 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16062 this: add_one,
16063 trailing_comments: Vec::new(),
16064 })))
16065 }
16066 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16067 "STR_TO_DATE"
16068 if f.args.len() == 2
16069 && matches!(
16070 target,
16071 DialectType::Presto | DialectType::Trino
16072 ) =>
16073 {
16074 let mut args = f.args;
16075 let x = args.remove(0);
16076 let format_expr = args.remove(0);
16077 // Check if the format contains time components
16078 let has_time =
16079 if let Expression::Literal(Literal::String(ref fmt)) =
16080 format_expr
16081 {
16082 fmt.contains("%H")
16083 || fmt.contains("%T")
16084 || fmt.contains("%M")
16085 || fmt.contains("%S")
16086 || fmt.contains("%I")
16087 || fmt.contains("%p")
16088 } else {
16089 false
16090 };
16091 let date_parse = Expression::Function(Box::new(Function::new(
16092 "DATE_PARSE".to_string(),
16093 vec![x, format_expr],
16094 )));
16095 if has_time {
16096 // Has time components: just DATE_PARSE
16097 Ok(date_parse)
16098 } else {
16099 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16100 Ok(Expression::Cast(Box::new(Cast {
16101 this: date_parse,
16102 to: DataType::Date,
16103 trailing_comments: Vec::new(),
16104 double_colon_syntax: false,
16105 format: None,
16106 default: None,
16107 })))
16108 }
16109 }
16110 "STR_TO_DATE"
16111 if f.args.len() == 2
16112 && matches!(
16113 target,
16114 DialectType::PostgreSQL | DialectType::Redshift
16115 ) =>
16116 {
16117 let mut args = f.args;
16118 let x = args.remove(0);
16119 let fmt = args.remove(0);
16120 let pg_fmt = match fmt {
16121 Expression::Literal(Literal::String(s)) => Expression::string(
16122 &s.replace("%Y", "YYYY")
16123 .replace("%m", "MM")
16124 .replace("%d", "DD")
16125 .replace("%H", "HH24")
16126 .replace("%M", "MI")
16127 .replace("%S", "SS"),
16128 ),
16129 other => other,
16130 };
16131 let to_date = Expression::Function(Box::new(Function::new(
16132 "TO_DATE".to_string(),
16133 vec![x, pg_fmt],
16134 )));
16135 Ok(Expression::Cast(Box::new(Cast {
16136 this: to_date,
16137 to: DataType::Timestamp {
16138 timezone: false,
16139 precision: None,
16140 },
16141 trailing_comments: Vec::new(),
16142 double_colon_syntax: false,
16143 format: None,
16144 default: None,
16145 })))
16146 }
16147 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16148 "RANGE"
16149 if (f.args.len() == 1 || f.args.len() == 2)
16150 && matches!(target, DialectType::SQLite) =>
16151 {
16152 if f.args.len() == 2 {
16153 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16154 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16155 let mut args = f.args;
16156 let start = args.remove(0);
16157 let end = args.remove(0);
16158 Ok(Expression::Function(Box::new(Function::new(
16159 "GENERATE_SERIES".to_string(),
16160 vec![start, end],
16161 ))))
16162 } else {
16163 Ok(Expression::Function(f))
16164 }
16165 }
16166 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16167 // When source is Snowflake, keep as-is (args already in correct form)
16168 "UNIFORM"
16169 if matches!(target, DialectType::Snowflake)
16170 && (f.args.len() == 2 || f.args.len() == 3) =>
16171 {
16172 if matches!(source, DialectType::Snowflake) {
16173 // Snowflake -> Snowflake: keep as-is
16174 Ok(Expression::Function(f))
16175 } else {
16176 let mut args = f.args;
16177 let low = args.remove(0);
16178 let high = args.remove(0);
16179 let random = if !args.is_empty() {
16180 let seed = args.remove(0);
16181 Expression::Function(Box::new(Function::new(
16182 "RANDOM".to_string(),
16183 vec![seed],
16184 )))
16185 } else {
16186 Expression::Function(Box::new(Function::new(
16187 "RANDOM".to_string(),
16188 vec![],
16189 )))
16190 };
16191 Ok(Expression::Function(Box::new(Function::new(
16192 "UNIFORM".to_string(),
16193 vec![low, high, random],
16194 ))))
16195 }
16196 }
16197 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16198 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16199 let mut args = f.args;
16200 let ts_arg = args.remove(0);
16201 let tz_arg = args.remove(0);
16202 // Cast string literal to TIMESTAMP for all targets
16203 let ts_cast =
16204 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16205 Expression::Cast(Box::new(Cast {
16206 this: ts_arg,
16207 to: DataType::Timestamp {
16208 timezone: false,
16209 precision: None,
16210 },
16211 trailing_comments: vec![],
16212 double_colon_syntax: false,
16213 format: None,
16214 default: None,
16215 }))
16216 } else {
16217 ts_arg
16218 };
16219 match target {
16220 DialectType::Spark | DialectType::Databricks => {
16221 Ok(Expression::Function(Box::new(Function::new(
16222 "TO_UTC_TIMESTAMP".to_string(),
16223 vec![ts_cast, tz_arg],
16224 ))))
16225 }
16226 DialectType::Snowflake => {
16227 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16228 Ok(Expression::Function(Box::new(Function::new(
16229 "CONVERT_TIMEZONE".to_string(),
16230 vec![tz_arg, Expression::string("UTC"), ts_cast],
16231 ))))
16232 }
16233 DialectType::Presto
16234 | DialectType::Trino
16235 | DialectType::Athena => {
16236 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16237 let wtz = Expression::Function(Box::new(Function::new(
16238 "WITH_TIMEZONE".to_string(),
16239 vec![ts_cast, tz_arg],
16240 )));
16241 Ok(Expression::AtTimeZone(Box::new(
16242 crate::expressions::AtTimeZone {
16243 this: wtz,
16244 zone: Expression::string("UTC"),
16245 },
16246 )))
16247 }
16248 DialectType::BigQuery => {
16249 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16250 let cast_dt = Expression::Cast(Box::new(Cast {
16251 this: if let Expression::Cast(c) = ts_cast {
16252 c.this
16253 } else {
16254 ts_cast.clone()
16255 },
16256 to: DataType::Custom {
16257 name: "DATETIME".to_string(),
16258 },
16259 trailing_comments: vec![],
16260 double_colon_syntax: false,
16261 format: None,
16262 default: None,
16263 }));
16264 let ts_func =
16265 Expression::Function(Box::new(Function::new(
16266 "TIMESTAMP".to_string(),
16267 vec![cast_dt, tz_arg],
16268 )));
16269 Ok(Expression::Function(Box::new(Function::new(
16270 "DATETIME".to_string(),
16271 vec![ts_func, Expression::string("UTC")],
16272 ))))
16273 }
16274 _ => {
16275 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16276 let atz1 = Expression::AtTimeZone(Box::new(
16277 crate::expressions::AtTimeZone {
16278 this: ts_cast,
16279 zone: tz_arg,
16280 },
16281 ));
16282 Ok(Expression::AtTimeZone(Box::new(
16283 crate::expressions::AtTimeZone {
16284 this: atz1,
16285 zone: Expression::string("UTC"),
16286 },
16287 )))
16288 }
16289 }
16290 }
16291 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16292 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16293 let mut args = f.args;
16294 let ts_arg = args.remove(0);
16295 let tz_arg = args.remove(0);
16296 // Cast string literal to TIMESTAMP
16297 let ts_cast =
16298 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16299 Expression::Cast(Box::new(Cast {
16300 this: ts_arg,
16301 to: DataType::Timestamp {
16302 timezone: false,
16303 precision: None,
16304 },
16305 trailing_comments: vec![],
16306 double_colon_syntax: false,
16307 format: None,
16308 default: None,
16309 }))
16310 } else {
16311 ts_arg
16312 };
16313 match target {
16314 DialectType::Spark | DialectType::Databricks => {
16315 Ok(Expression::Function(Box::new(Function::new(
16316 "FROM_UTC_TIMESTAMP".to_string(),
16317 vec![ts_cast, tz_arg],
16318 ))))
16319 }
16320 DialectType::Presto
16321 | DialectType::Trino
16322 | DialectType::Athena => {
16323 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16324 Ok(Expression::Function(Box::new(Function::new(
16325 "AT_TIMEZONE".to_string(),
16326 vec![ts_cast, tz_arg],
16327 ))))
16328 }
16329 DialectType::Snowflake => {
16330 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16331 Ok(Expression::Function(Box::new(Function::new(
16332 "CONVERT_TIMEZONE".to_string(),
16333 vec![Expression::string("UTC"), tz_arg, ts_cast],
16334 ))))
16335 }
16336 _ => {
16337 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16338 Ok(Expression::AtTimeZone(Box::new(
16339 crate::expressions::AtTimeZone {
16340 this: ts_cast,
16341 zone: tz_arg,
16342 },
16343 )))
16344 }
16345 }
16346 }
16347 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16348 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16349 let name = match target {
16350 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16351 _ => "MAP",
16352 };
16353 Ok(Expression::Function(Box::new(Function::new(
16354 name.to_string(),
16355 f.args,
16356 ))))
16357 }
16358 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16359 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16360 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16361 Ok(Expression::Function(Box::new(Function::new(
16362 "SPLIT_TO_MAP".to_string(),
16363 f.args,
16364 ))))
16365 }
16366 _ => Ok(Expression::Function(f)),
16367 },
16368 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16369 "TIME_TO_STR" if f.args.len() == 2 => {
16370 let mut args = f.args;
16371 let this = args.remove(0);
16372 let fmt_expr = args.remove(0);
16373 let format =
16374 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16375 s
16376 } else {
16377 "%Y-%m-%d %H:%M:%S".to_string()
16378 };
16379 Ok(Expression::TimeToStr(Box::new(
16380 crate::expressions::TimeToStr {
16381 this: Box::new(this),
16382 format,
16383 culture: None,
16384 zone: None,
16385 },
16386 )))
16387 }
16388 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16389 "STR_TO_TIME" if f.args.len() == 2 => {
16390 let mut args = f.args;
16391 let this = args.remove(0);
16392 let fmt_expr = args.remove(0);
16393 let format =
16394 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16395 s
16396 } else {
16397 "%Y-%m-%d %H:%M:%S".to_string()
16398 };
16399 Ok(Expression::StrToTime(Box::new(
16400 crate::expressions::StrToTime {
16401 this: Box::new(this),
16402 format,
16403 zone: None,
16404 safe: None,
16405 target_type: None,
16406 },
16407 )))
16408 }
16409 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16410 "STR_TO_UNIX" if f.args.len() >= 1 => {
16411 let mut args = f.args;
16412 let this = args.remove(0);
16413 let format = if !args.is_empty() {
16414 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16415 {
16416 Some(s)
16417 } else {
16418 None
16419 }
16420 } else {
16421 None
16422 };
16423 Ok(Expression::StrToUnix(Box::new(
16424 crate::expressions::StrToUnix {
16425 this: Some(Box::new(this)),
16426 format,
16427 },
16428 )))
16429 }
16430 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16431 "TIME_TO_UNIX" if f.args.len() == 1 => {
16432 let mut args = f.args;
16433 let this = args.remove(0);
16434 Ok(Expression::TimeToUnix(Box::new(
16435 crate::expressions::UnaryFunc {
16436 this,
16437 original_name: None,
16438 },
16439 )))
16440 }
16441 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16442 "UNIX_TO_STR" if f.args.len() >= 1 => {
16443 let mut args = f.args;
16444 let this = args.remove(0);
16445 let format = if !args.is_empty() {
16446 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16447 {
16448 Some(s)
16449 } else {
16450 None
16451 }
16452 } else {
16453 None
16454 };
16455 Ok(Expression::UnixToStr(Box::new(
16456 crate::expressions::UnixToStr {
16457 this: Box::new(this),
16458 format,
16459 },
16460 )))
16461 }
16462 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16463 "UNIX_TO_TIME" if f.args.len() == 1 => {
16464 let mut args = f.args;
16465 let this = args.remove(0);
16466 Ok(Expression::UnixToTime(Box::new(
16467 crate::expressions::UnixToTime {
16468 this: Box::new(this),
16469 scale: None,
16470 zone: None,
16471 hours: None,
16472 minutes: None,
16473 format: None,
16474 target_type: None,
16475 },
16476 )))
16477 }
16478 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16479 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16480 let mut args = f.args;
16481 let this = args.remove(0);
16482 Ok(Expression::TimeStrToDate(Box::new(
16483 crate::expressions::UnaryFunc {
16484 this,
16485 original_name: None,
16486 },
16487 )))
16488 }
16489 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16490 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16491 let mut args = f.args;
16492 let this = args.remove(0);
16493 Ok(Expression::TimeStrToTime(Box::new(
16494 crate::expressions::TimeStrToTime {
16495 this: Box::new(this),
16496 zone: None,
16497 },
16498 )))
16499 }
16500 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16501 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16502 match target {
16503 DialectType::DuckDB => {
16504 let mut args = f.args;
16505 let end_date = args.remove(0);
16506 let start_date = args.remove(0);
16507 let cast_end = Self::ensure_cast_date(end_date);
16508 let cast_start = Self::ensure_cast_date(start_date);
16509 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16510 let dd = Expression::Function(Box::new(Function::new(
16511 "DATE_DIFF".to_string(),
16512 vec![
16513 Expression::string("MONTH"),
16514 cast_start.clone(),
16515 cast_end.clone(),
16516 ],
16517 )));
16518 let day_end =
16519 Expression::Function(Box::new(Function::new(
16520 "DAY".to_string(),
16521 vec![cast_end.clone()],
16522 )));
16523 let day_start =
16524 Expression::Function(Box::new(Function::new(
16525 "DAY".to_string(),
16526 vec![cast_start.clone()],
16527 )));
16528 let last_day_end =
16529 Expression::Function(Box::new(Function::new(
16530 "LAST_DAY".to_string(),
16531 vec![cast_end.clone()],
16532 )));
16533 let last_day_start =
16534 Expression::Function(Box::new(Function::new(
16535 "LAST_DAY".to_string(),
16536 vec![cast_start.clone()],
16537 )));
16538 let day_last_end = Expression::Function(Box::new(
16539 Function::new("DAY".to_string(), vec![last_day_end]),
16540 ));
16541 let day_last_start = Expression::Function(Box::new(
16542 Function::new("DAY".to_string(), vec![last_day_start]),
16543 ));
16544 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16545 day_end.clone(),
16546 day_last_end,
16547 )));
16548 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16549 day_start.clone(),
16550 day_last_start,
16551 )));
16552 let both_cond =
16553 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16554 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16555 day_end, day_start,
16556 )));
16557 let day_diff_paren = Expression::Paren(Box::new(
16558 crate::expressions::Paren {
16559 this: day_diff,
16560 trailing_comments: Vec::new(),
16561 },
16562 ));
16563 let frac = Expression::Div(Box::new(BinaryOp::new(
16564 day_diff_paren,
16565 Expression::Literal(Literal::Number(
16566 "31.0".to_string(),
16567 )),
16568 )));
16569 let case_expr = Expression::Case(Box::new(Case {
16570 operand: None,
16571 whens: vec![(both_cond, Expression::number(0))],
16572 else_: Some(frac),
16573 comments: Vec::new(),
16574 }));
16575 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16576 }
16577 DialectType::Snowflake | DialectType::Redshift => {
16578 let mut args = f.args;
16579 let end_date = args.remove(0);
16580 let start_date = args.remove(0);
16581 let unit = Expression::Identifier(Identifier::new("MONTH"));
16582 Ok(Expression::Function(Box::new(Function::new(
16583 "DATEDIFF".to_string(),
16584 vec![unit, start_date, end_date],
16585 ))))
16586 }
16587 DialectType::Presto
16588 | DialectType::Trino
16589 | DialectType::Athena => {
16590 let mut args = f.args;
16591 let end_date = args.remove(0);
16592 let start_date = args.remove(0);
16593 Ok(Expression::Function(Box::new(Function::new(
16594 "DATE_DIFF".to_string(),
16595 vec![Expression::string("MONTH"), start_date, end_date],
16596 ))))
16597 }
16598 _ => Ok(Expression::Function(f)),
16599 }
16600 }
16601 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16602 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16603 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16604 match target {
16605 DialectType::Spark | DialectType::Databricks => {
16606 Ok(Expression::Function(f))
16607 }
16608 _ => {
16609 // Drop the 3rd arg and delegate to the 2-arg logic
16610 let mut args = f.args;
16611 let end_date = args.remove(0);
16612 let start_date = args.remove(0);
16613 // Re-create as 2-arg and process
16614 let f2 = Function::new(
16615 "MONTHS_BETWEEN".to_string(),
16616 vec![end_date, start_date],
16617 );
16618 let e2 = Expression::Function(Box::new(f2));
16619 Self::cross_dialect_normalize(e2, source, target)
16620 }
16621 }
16622 }
16623 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16624 "TO_TIMESTAMP"
16625 if f.args.len() == 1
16626 && matches!(
16627 source,
16628 DialectType::Spark
16629 | DialectType::Databricks
16630 | DialectType::Hive
16631 ) =>
16632 {
16633 let arg = f.args.into_iter().next().unwrap();
16634 Ok(Expression::Cast(Box::new(Cast {
16635 this: arg,
16636 to: DataType::Timestamp {
16637 timezone: false,
16638 precision: None,
16639 },
16640 trailing_comments: vec![],
16641 double_colon_syntax: false,
16642 format: None,
16643 default: None,
16644 })))
16645 }
16646 // STRING(x) -> CAST(x AS STRING) for Spark target
16647 "STRING"
16648 if f.args.len() == 1
16649 && matches!(
16650 source,
16651 DialectType::Spark | DialectType::Databricks
16652 ) =>
16653 {
16654 let arg = f.args.into_iter().next().unwrap();
16655 let dt = match target {
16656 DialectType::Spark
16657 | DialectType::Databricks
16658 | DialectType::Hive => DataType::Custom {
16659 name: "STRING".to_string(),
16660 },
16661 _ => DataType::Text,
16662 };
16663 Ok(Expression::Cast(Box::new(Cast {
16664 this: arg,
16665 to: dt,
16666 trailing_comments: vec![],
16667 double_colon_syntax: false,
16668 format: None,
16669 default: None,
16670 })))
16671 }
16672 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16673 "LOGICAL_OR" if f.args.len() == 1 => {
16674 let name = match target {
16675 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16676 _ => "LOGICAL_OR",
16677 };
16678 Ok(Expression::Function(Box::new(Function::new(
16679 name.to_string(),
16680 f.args,
16681 ))))
16682 }
16683 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16684 "SPLIT"
16685 if f.args.len() == 2
16686 && matches!(
16687 source,
16688 DialectType::Spark
16689 | DialectType::Databricks
16690 | DialectType::Hive
16691 ) =>
16692 {
16693 let name = match target {
16694 DialectType::DuckDB => "STR_SPLIT_REGEX",
16695 DialectType::Presto
16696 | DialectType::Trino
16697 | DialectType::Athena => "REGEXP_SPLIT",
16698 DialectType::Spark
16699 | DialectType::Databricks
16700 | DialectType::Hive => "SPLIT",
16701 _ => "SPLIT",
16702 };
16703 Ok(Expression::Function(Box::new(Function::new(
16704 name.to_string(),
16705 f.args,
16706 ))))
16707 }
16708 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16709 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16710 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16711 Ok(Expression::Function(Box::new(Function::new(
16712 "ELEMENT_AT".to_string(),
16713 f.args,
16714 ))))
16715 }
16716 DialectType::DuckDB => {
16717 let mut args = f.args;
16718 let arr = args.remove(0);
16719 let idx = args.remove(0);
16720 Ok(Expression::Subscript(Box::new(
16721 crate::expressions::Subscript {
16722 this: arr,
16723 index: idx,
16724 },
16725 )))
16726 }
16727 _ => Ok(Expression::Function(f)),
16728 },
16729 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16730 "ARRAY_FILTER" if f.args.len() == 2 => {
16731 let name = match target {
16732 DialectType::DuckDB => "LIST_FILTER",
16733 DialectType::StarRocks => "ARRAY_FILTER",
16734 _ => "FILTER",
16735 };
16736 Ok(Expression::Function(Box::new(Function::new(
16737 name.to_string(),
16738 f.args,
16739 ))))
16740 }
16741 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16742 "FILTER" if f.args.len() == 2 => {
16743 let name = match target {
16744 DialectType::DuckDB => "LIST_FILTER",
16745 DialectType::StarRocks => "ARRAY_FILTER",
16746 _ => "FILTER",
16747 };
16748 Ok(Expression::Function(Box::new(Function::new(
16749 name.to_string(),
16750 f.args,
16751 ))))
16752 }
16753 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16754 "REDUCE" if f.args.len() >= 3 => {
16755 let name = match target {
16756 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16757 _ => "REDUCE",
16758 };
16759 Ok(Expression::Function(Box::new(Function::new(
16760 name.to_string(),
16761 f.args,
16762 ))))
16763 }
16764 // CURRENT_SCHEMA() -> dialect-specific
16765 "CURRENT_SCHEMA" => {
16766 match target {
16767 DialectType::PostgreSQL => {
16768 // PostgreSQL: CURRENT_SCHEMA (no parens)
16769 Ok(Expression::Function(Box::new(Function {
16770 name: "CURRENT_SCHEMA".to_string(),
16771 args: vec![],
16772 distinct: false,
16773 trailing_comments: vec![],
16774 use_bracket_syntax: false,
16775 no_parens: true,
16776 quoted: false,
16777 span: None,
16778 })))
16779 }
16780 DialectType::MySQL
16781 | DialectType::Doris
16782 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
16783 Function::new("SCHEMA".to_string(), vec![]),
16784 ))),
16785 DialectType::TSQL => Ok(Expression::Function(Box::new(
16786 Function::new("SCHEMA_NAME".to_string(), vec![]),
16787 ))),
16788 DialectType::SQLite => {
16789 Ok(Expression::Literal(Literal::String("main".to_string())))
16790 }
16791 _ => Ok(Expression::Function(f)),
16792 }
16793 }
16794 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16795 "LTRIM" if f.args.len() == 2 => match target {
16796 DialectType::Spark
16797 | DialectType::Hive
16798 | DialectType::Databricks
16799 | DialectType::ClickHouse => {
16800 let mut args = f.args;
16801 let str_expr = args.remove(0);
16802 let chars = args.remove(0);
16803 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16804 this: str_expr,
16805 characters: Some(chars),
16806 position: crate::expressions::TrimPosition::Leading,
16807 sql_standard_syntax: true,
16808 position_explicit: true,
16809 })))
16810 }
16811 _ => Ok(Expression::Function(f)),
16812 },
16813 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16814 "RTRIM" if f.args.len() == 2 => match target {
16815 DialectType::Spark
16816 | DialectType::Hive
16817 | DialectType::Databricks
16818 | DialectType::ClickHouse => {
16819 let mut args = f.args;
16820 let str_expr = args.remove(0);
16821 let chars = args.remove(0);
16822 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16823 this: str_expr,
16824 characters: Some(chars),
16825 position: crate::expressions::TrimPosition::Trailing,
16826 sql_standard_syntax: true,
16827 position_explicit: true,
16828 })))
16829 }
16830 _ => Ok(Expression::Function(f)),
16831 },
16832 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16833 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
16834 DialectType::ClickHouse => {
16835 let mut new_f = *f;
16836 new_f.name = "arrayReverse".to_string();
16837 Ok(Expression::Function(Box::new(new_f)))
16838 }
16839 _ => Ok(Expression::Function(f)),
16840 },
16841 // UUID() -> NEWID() for TSQL
16842 "UUID" if f.args.is_empty() => match target {
16843 DialectType::TSQL | DialectType::Fabric => {
16844 Ok(Expression::Function(Box::new(Function::new(
16845 "NEWID".to_string(),
16846 vec![],
16847 ))))
16848 }
16849 _ => Ok(Expression::Function(f)),
16850 },
16851 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
16852 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
16853 DialectType::ClickHouse => {
16854 let mut new_f = *f;
16855 new_f.name = "farmFingerprint64".to_string();
16856 Ok(Expression::Function(Box::new(new_f)))
16857 }
16858 DialectType::Redshift => {
16859 let mut new_f = *f;
16860 new_f.name = "FARMFINGERPRINT64".to_string();
16861 Ok(Expression::Function(Box::new(new_f)))
16862 }
16863 _ => Ok(Expression::Function(f)),
16864 },
16865 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
16866 "JSON_KEYS" => match target {
16867 DialectType::Databricks | DialectType::Spark => {
16868 let mut new_f = *f;
16869 new_f.name = "JSON_OBJECT_KEYS".to_string();
16870 Ok(Expression::Function(Box::new(new_f)))
16871 }
16872 DialectType::Snowflake => {
16873 let mut new_f = *f;
16874 new_f.name = "OBJECT_KEYS".to_string();
16875 Ok(Expression::Function(Box::new(new_f)))
16876 }
16877 _ => Ok(Expression::Function(f)),
16878 },
16879 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
16880 "WEEKOFYEAR" => match target {
16881 DialectType::Snowflake => {
16882 let mut new_f = *f;
16883 new_f.name = "WEEKISO".to_string();
16884 Ok(Expression::Function(Box::new(new_f)))
16885 }
16886 _ => Ok(Expression::Function(f)),
16887 },
16888 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
16889 "FORMAT"
16890 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
16891 {
16892 match target {
16893 DialectType::Databricks | DialectType::Spark => {
16894 let mut new_f = *f;
16895 new_f.name = "FORMAT_STRING".to_string();
16896 Ok(Expression::Function(Box::new(new_f)))
16897 }
16898 _ => Ok(Expression::Function(f)),
16899 }
16900 }
16901 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
16902 "CONCAT_WS" if f.args.len() >= 2 => match target {
16903 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16904 let mut args = f.args;
16905 let sep = args.remove(0);
16906 let cast_args: Vec<Expression> = args
16907 .into_iter()
16908 .map(|a| {
16909 Expression::Cast(Box::new(Cast {
16910 this: a,
16911 to: DataType::VarChar {
16912 length: None,
16913 parenthesized_length: false,
16914 },
16915 double_colon_syntax: false,
16916 trailing_comments: Vec::new(),
16917 format: None,
16918 default: None,
16919 }))
16920 })
16921 .collect();
16922 let mut new_args = vec![sep];
16923 new_args.extend(cast_args);
16924 Ok(Expression::Function(Box::new(Function::new(
16925 "CONCAT_WS".to_string(),
16926 new_args,
16927 ))))
16928 }
16929 _ => Ok(Expression::Function(f)),
16930 },
16931 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
16932 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
16933 DialectType::Presto
16934 | DialectType::Trino
16935 | DialectType::Athena
16936 | DialectType::Databricks
16937 | DialectType::Spark => {
16938 let mut new_f = *f;
16939 new_f.name = "SLICE".to_string();
16940 Ok(Expression::Function(Box::new(new_f)))
16941 }
16942 DialectType::ClickHouse => {
16943 let mut new_f = *f;
16944 new_f.name = "arraySlice".to_string();
16945 Ok(Expression::Function(Box::new(new_f)))
16946 }
16947 _ => Ok(Expression::Function(f)),
16948 },
16949 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
16950 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
16951 DialectType::DuckDB => {
16952 let mut args = f.args;
16953 let arr = args.remove(0);
16954 let val = args.remove(0);
16955 Ok(Expression::Function(Box::new(Function::new(
16956 "LIST_PREPEND".to_string(),
16957 vec![val, arr],
16958 ))))
16959 }
16960 _ => Ok(Expression::Function(f)),
16961 },
16962 // ARRAY_REMOVE(arr, target) -> dialect-specific
16963 "ARRAY_REMOVE" if f.args.len() == 2 => {
16964 match target {
16965 DialectType::DuckDB => {
16966 let mut args = f.args;
16967 let arr = args.remove(0);
16968 let target_val = args.remove(0);
16969 let u_id = crate::expressions::Identifier::new("_u");
16970 // LIST_FILTER(arr, _u -> _u <> target)
16971 let lambda = Expression::Lambda(Box::new(
16972 crate::expressions::LambdaExpr {
16973 parameters: vec![u_id.clone()],
16974 body: Expression::Neq(Box::new(BinaryOp {
16975 left: Expression::Identifier(u_id),
16976 right: target_val,
16977 left_comments: Vec::new(),
16978 operator_comments: Vec::new(),
16979 trailing_comments: Vec::new(),
16980 })),
16981 colon: false,
16982 parameter_types: Vec::new(),
16983 },
16984 ));
16985 Ok(Expression::Function(Box::new(Function::new(
16986 "LIST_FILTER".to_string(),
16987 vec![arr, lambda],
16988 ))))
16989 }
16990 DialectType::ClickHouse => {
16991 let mut args = f.args;
16992 let arr = args.remove(0);
16993 let target_val = args.remove(0);
16994 let u_id = crate::expressions::Identifier::new("_u");
16995 // arrayFilter(_u -> _u <> target, arr)
16996 let lambda = Expression::Lambda(Box::new(
16997 crate::expressions::LambdaExpr {
16998 parameters: vec![u_id.clone()],
16999 body: Expression::Neq(Box::new(BinaryOp {
17000 left: Expression::Identifier(u_id),
17001 right: target_val,
17002 left_comments: Vec::new(),
17003 operator_comments: Vec::new(),
17004 trailing_comments: Vec::new(),
17005 })),
17006 colon: false,
17007 parameter_types: Vec::new(),
17008 },
17009 ));
17010 Ok(Expression::Function(Box::new(Function::new(
17011 "arrayFilter".to_string(),
17012 vec![lambda, arr],
17013 ))))
17014 }
17015 DialectType::BigQuery => {
17016 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
17017 let mut args = f.args;
17018 let arr = args.remove(0);
17019 let target_val = args.remove(0);
17020 let u_id = crate::expressions::Identifier::new("_u");
17021 let u_col =
17022 Expression::Column(crate::expressions::Column {
17023 name: u_id.clone(),
17024 table: None,
17025 join_mark: false,
17026 trailing_comments: Vec::new(),
17027 span: None,
17028 });
17029 // UNNEST(the_array) AS _u
17030 let unnest_expr = Expression::Unnest(Box::new(
17031 crate::expressions::UnnestFunc {
17032 this: arr,
17033 expressions: Vec::new(),
17034 with_ordinality: false,
17035 alias: None,
17036 offset_alias: None,
17037 },
17038 ));
17039 let aliased_unnest = Expression::Alias(Box::new(
17040 crate::expressions::Alias {
17041 this: unnest_expr,
17042 alias: u_id.clone(),
17043 column_aliases: Vec::new(),
17044 pre_alias_comments: Vec::new(),
17045 trailing_comments: Vec::new(),
17046 },
17047 ));
17048 // _u <> target
17049 let where_cond = Expression::Neq(Box::new(BinaryOp {
17050 left: u_col.clone(),
17051 right: target_val,
17052 left_comments: Vec::new(),
17053 operator_comments: Vec::new(),
17054 trailing_comments: Vec::new(),
17055 }));
17056 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
17057 let subquery = Expression::Select(Box::new(
17058 crate::expressions::Select::new()
17059 .column(u_col)
17060 .from(aliased_unnest)
17061 .where_(where_cond),
17062 ));
17063 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
17064 Ok(Expression::ArrayFunc(Box::new(
17065 crate::expressions::ArrayConstructor {
17066 expressions: vec![subquery],
17067 bracket_notation: false,
17068 use_list_keyword: false,
17069 },
17070 )))
17071 }
17072 _ => Ok(Expression::Function(f)),
17073 }
17074 }
17075 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17076 "PARSE_JSON" if f.args.len() == 1 => {
17077 match target {
17078 DialectType::SQLite
17079 | DialectType::Doris
17080 | DialectType::MySQL
17081 | DialectType::StarRocks => {
17082 // Strip PARSE_JSON, return the inner argument
17083 Ok(f.args.into_iter().next().unwrap())
17084 }
17085 _ => Ok(Expression::Function(f)),
17086 }
17087 }
17088 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17089 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17090 "JSON_REMOVE" => Ok(Expression::Function(f)),
17091 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17092 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17093 "JSON_SET" => Ok(Expression::Function(f)),
17094 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17095 // Behavior per search value type:
17096 // NULL literal -> CASE WHEN x IS NULL THEN result
17097 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17098 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17099 "DECODE" if f.args.len() >= 3 => {
17100 // Keep as DECODE for targets that support it natively
17101 let keep_as_decode = matches!(
17102 target,
17103 DialectType::Oracle
17104 | DialectType::Snowflake
17105 | DialectType::Redshift
17106 | DialectType::Teradata
17107 | DialectType::Spark
17108 | DialectType::Databricks
17109 );
17110 if keep_as_decode {
17111 return Ok(Expression::Function(f));
17112 }
17113
17114 let mut args = f.args;
17115 let this_expr = args.remove(0);
17116 let mut pairs = Vec::new();
17117 let mut default = None;
17118 let mut i = 0;
17119 while i + 1 < args.len() {
17120 pairs.push((args[i].clone(), args[i + 1].clone()));
17121 i += 2;
17122 }
17123 if i < args.len() {
17124 default = Some(args[i].clone());
17125 }
17126 // Helper: check if expression is a literal value
17127 fn is_literal(e: &Expression) -> bool {
17128 matches!(
17129 e,
17130 Expression::Literal(_)
17131 | Expression::Boolean(_)
17132 | Expression::Neg(_)
17133 )
17134 }
17135 let whens: Vec<(Expression, Expression)> = pairs
17136 .into_iter()
17137 .map(|(search, result)| {
17138 if matches!(&search, Expression::Null(_)) {
17139 // NULL search -> IS NULL
17140 let condition = Expression::Is(Box::new(BinaryOp {
17141 left: this_expr.clone(),
17142 right: Expression::Null(crate::expressions::Null),
17143 left_comments: Vec::new(),
17144 operator_comments: Vec::new(),
17145 trailing_comments: Vec::new(),
17146 }));
17147 (condition, result)
17148 } else if is_literal(&search) {
17149 // Literal search -> simple equality
17150 let eq = Expression::Eq(Box::new(BinaryOp {
17151 left: this_expr.clone(),
17152 right: search,
17153 left_comments: Vec::new(),
17154 operator_comments: Vec::new(),
17155 trailing_comments: Vec::new(),
17156 }));
17157 (eq, result)
17158 } else {
17159 // Non-literal (column ref, expression) -> null-safe comparison
17160 let needs_paren = matches!(
17161 &search,
17162 Expression::Eq(_)
17163 | Expression::Neq(_)
17164 | Expression::Gt(_)
17165 | Expression::Gte(_)
17166 | Expression::Lt(_)
17167 | Expression::Lte(_)
17168 );
17169 let search_for_eq = if needs_paren {
17170 Expression::Paren(Box::new(
17171 crate::expressions::Paren {
17172 this: search.clone(),
17173 trailing_comments: Vec::new(),
17174 },
17175 ))
17176 } else {
17177 search.clone()
17178 };
17179 let eq = Expression::Eq(Box::new(BinaryOp {
17180 left: this_expr.clone(),
17181 right: search_for_eq,
17182 left_comments: Vec::new(),
17183 operator_comments: Vec::new(),
17184 trailing_comments: Vec::new(),
17185 }));
17186 let search_for_null = if needs_paren {
17187 Expression::Paren(Box::new(
17188 crate::expressions::Paren {
17189 this: search.clone(),
17190 trailing_comments: Vec::new(),
17191 },
17192 ))
17193 } else {
17194 search.clone()
17195 };
17196 let x_is_null = Expression::Is(Box::new(BinaryOp {
17197 left: this_expr.clone(),
17198 right: Expression::Null(crate::expressions::Null),
17199 left_comments: Vec::new(),
17200 operator_comments: Vec::new(),
17201 trailing_comments: Vec::new(),
17202 }));
17203 let s_is_null = Expression::Is(Box::new(BinaryOp {
17204 left: search_for_null,
17205 right: Expression::Null(crate::expressions::Null),
17206 left_comments: Vec::new(),
17207 operator_comments: Vec::new(),
17208 trailing_comments: Vec::new(),
17209 }));
17210 let both_null = Expression::And(Box::new(BinaryOp {
17211 left: x_is_null,
17212 right: s_is_null,
17213 left_comments: Vec::new(),
17214 operator_comments: Vec::new(),
17215 trailing_comments: Vec::new(),
17216 }));
17217 let condition = Expression::Or(Box::new(BinaryOp {
17218 left: eq,
17219 right: Expression::Paren(Box::new(
17220 crate::expressions::Paren {
17221 this: both_null,
17222 trailing_comments: Vec::new(),
17223 },
17224 )),
17225 left_comments: Vec::new(),
17226 operator_comments: Vec::new(),
17227 trailing_comments: Vec::new(),
17228 }));
17229 (condition, result)
17230 }
17231 })
17232 .collect();
17233 Ok(Expression::Case(Box::new(Case {
17234 operand: None,
17235 whens,
17236 else_: default,
17237 comments: Vec::new(),
17238 })))
17239 }
17240 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17241 "LEVENSHTEIN" => {
17242 match target {
17243 DialectType::BigQuery => {
17244 let mut new_f = *f;
17245 new_f.name = "EDIT_DISTANCE".to_string();
17246 Ok(Expression::Function(Box::new(new_f)))
17247 }
17248 DialectType::Drill => {
17249 let mut new_f = *f;
17250 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17251 Ok(Expression::Function(Box::new(new_f)))
17252 }
17253 DialectType::PostgreSQL if f.args.len() == 6 => {
17254 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17255 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17256 let mut new_f = *f;
17257 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17258 Ok(Expression::Function(Box::new(new_f)))
17259 }
17260 _ => Ok(Expression::Function(f)),
17261 }
17262 }
17263 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17264 "ARRAY_REVERSE" => match target {
17265 DialectType::ClickHouse => {
17266 let mut new_f = *f;
17267 new_f.name = "arrayReverse".to_string();
17268 Ok(Expression::Function(Box::new(new_f)))
17269 }
17270 _ => Ok(Expression::Function(f)),
17271 },
17272 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17273 "GENERATE_DATE_ARRAY" => {
17274 let mut args = f.args;
17275 if matches!(target, DialectType::BigQuery) {
17276 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17277 if args.len() == 2 {
17278 let default_interval = Expression::Interval(Box::new(
17279 crate::expressions::Interval {
17280 this: Some(Expression::Literal(Literal::String(
17281 "1".to_string(),
17282 ))),
17283 unit: Some(
17284 crate::expressions::IntervalUnitSpec::Simple {
17285 unit: crate::expressions::IntervalUnit::Day,
17286 use_plural: false,
17287 },
17288 ),
17289 },
17290 ));
17291 args.push(default_interval);
17292 }
17293 Ok(Expression::Function(Box::new(Function::new(
17294 "GENERATE_DATE_ARRAY".to_string(),
17295 args,
17296 ))))
17297 } else if matches!(target, DialectType::DuckDB) {
17298 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17299 let start = args.get(0).cloned();
17300 let end = args.get(1).cloned();
17301 let step = args.get(2).cloned().or_else(|| {
17302 Some(Expression::Interval(Box::new(
17303 crate::expressions::Interval {
17304 this: Some(Expression::Literal(Literal::String(
17305 "1".to_string(),
17306 ))),
17307 unit: Some(
17308 crate::expressions::IntervalUnitSpec::Simple {
17309 unit: crate::expressions::IntervalUnit::Day,
17310 use_plural: false,
17311 },
17312 ),
17313 },
17314 )))
17315 });
17316 let gen_series = Expression::GenerateSeries(Box::new(
17317 crate::expressions::GenerateSeries {
17318 start: start.map(Box::new),
17319 end: end.map(Box::new),
17320 step: step.map(Box::new),
17321 is_end_exclusive: None,
17322 },
17323 ));
17324 Ok(Expression::Cast(Box::new(Cast {
17325 this: gen_series,
17326 to: DataType::Array {
17327 element_type: Box::new(DataType::Date),
17328 dimension: None,
17329 },
17330 trailing_comments: vec![],
17331 double_colon_syntax: false,
17332 format: None,
17333 default: None,
17334 })))
17335 } else if matches!(
17336 target,
17337 DialectType::Presto | DialectType::Trino | DialectType::Athena
17338 ) {
17339 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17340 let start = args.get(0).cloned();
17341 let end = args.get(1).cloned();
17342 let step = args.get(2).cloned().or_else(|| {
17343 Some(Expression::Interval(Box::new(
17344 crate::expressions::Interval {
17345 this: Some(Expression::Literal(Literal::String(
17346 "1".to_string(),
17347 ))),
17348 unit: Some(
17349 crate::expressions::IntervalUnitSpec::Simple {
17350 unit: crate::expressions::IntervalUnit::Day,
17351 use_plural: false,
17352 },
17353 ),
17354 },
17355 )))
17356 });
17357 let gen_series = Expression::GenerateSeries(Box::new(
17358 crate::expressions::GenerateSeries {
17359 start: start.map(Box::new),
17360 end: end.map(Box::new),
17361 step: step.map(Box::new),
17362 is_end_exclusive: None,
17363 },
17364 ));
17365 Ok(gen_series)
17366 } else if matches!(
17367 target,
17368 DialectType::Spark | DialectType::Databricks
17369 ) {
17370 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17371 let start = args.get(0).cloned();
17372 let end = args.get(1).cloned();
17373 let step = args.get(2).cloned().or_else(|| {
17374 Some(Expression::Interval(Box::new(
17375 crate::expressions::Interval {
17376 this: Some(Expression::Literal(Literal::String(
17377 "1".to_string(),
17378 ))),
17379 unit: Some(
17380 crate::expressions::IntervalUnitSpec::Simple {
17381 unit: crate::expressions::IntervalUnit::Day,
17382 use_plural: false,
17383 },
17384 ),
17385 },
17386 )))
17387 });
17388 let gen_series = Expression::GenerateSeries(Box::new(
17389 crate::expressions::GenerateSeries {
17390 start: start.map(Box::new),
17391 end: end.map(Box::new),
17392 step: step.map(Box::new),
17393 is_end_exclusive: None,
17394 },
17395 ));
17396 Ok(gen_series)
17397 } else if matches!(target, DialectType::Snowflake) {
17398 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17399 if args.len() == 2 {
17400 let default_interval = Expression::Interval(Box::new(
17401 crate::expressions::Interval {
17402 this: Some(Expression::Literal(Literal::String(
17403 "1".to_string(),
17404 ))),
17405 unit: Some(
17406 crate::expressions::IntervalUnitSpec::Simple {
17407 unit: crate::expressions::IntervalUnit::Day,
17408 use_plural: false,
17409 },
17410 ),
17411 },
17412 ));
17413 args.push(default_interval);
17414 }
17415 Ok(Expression::Function(Box::new(Function::new(
17416 "GENERATE_DATE_ARRAY".to_string(),
17417 args,
17418 ))))
17419 } else if matches!(
17420 target,
17421 DialectType::MySQL
17422 | DialectType::TSQL
17423 | DialectType::Fabric
17424 | DialectType::Redshift
17425 ) {
17426 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17427 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17428 Ok(Expression::Function(Box::new(Function::new(
17429 "GENERATE_DATE_ARRAY".to_string(),
17430 args,
17431 ))))
17432 } else {
17433 // PostgreSQL/others: convert to GenerateSeries
17434 let start = args.get(0).cloned();
17435 let end = args.get(1).cloned();
17436 let step = args.get(2).cloned().or_else(|| {
17437 Some(Expression::Interval(Box::new(
17438 crate::expressions::Interval {
17439 this: Some(Expression::Literal(Literal::String(
17440 "1".to_string(),
17441 ))),
17442 unit: Some(
17443 crate::expressions::IntervalUnitSpec::Simple {
17444 unit: crate::expressions::IntervalUnit::Day,
17445 use_plural: false,
17446 },
17447 ),
17448 },
17449 )))
17450 });
17451 Ok(Expression::GenerateSeries(Box::new(
17452 crate::expressions::GenerateSeries {
17453 start: start.map(Box::new),
17454 end: end.map(Box::new),
17455 step: step.map(Box::new),
17456 is_end_exclusive: None,
17457 },
17458 )))
17459 }
17460 }
17461 _ => Ok(Expression::Function(f)),
17462 }
17463 } else if let Expression::AggregateFunction(mut af) = e {
17464 let name = af.name.to_uppercase();
17465 match name.as_str() {
17466 "ARBITRARY" if af.args.len() == 1 => {
17467 let arg = af.args.into_iter().next().unwrap();
17468 Ok(convert_arbitrary(arg, target))
17469 }
17470 "JSON_ARRAYAGG" => {
17471 match target {
17472 DialectType::PostgreSQL => {
17473 af.name = "JSON_AGG".to_string();
17474 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17475 for ordered in af.order_by.iter_mut() {
17476 if ordered.nulls_first.is_none() {
17477 ordered.nulls_first = Some(true);
17478 }
17479 }
17480 Ok(Expression::AggregateFunction(af))
17481 }
17482 _ => Ok(Expression::AggregateFunction(af)),
17483 }
17484 }
17485 _ => Ok(Expression::AggregateFunction(af)),
17486 }
17487 } else if let Expression::JSONArrayAgg(ja) = e {
17488 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17489 match target {
17490 DialectType::PostgreSQL => {
17491 let mut order_by = Vec::new();
17492 if let Some(order_expr) = ja.order {
17493 if let Expression::OrderBy(ob) = *order_expr {
17494 for mut ordered in ob.expressions {
17495 if ordered.nulls_first.is_none() {
17496 ordered.nulls_first = Some(true);
17497 }
17498 order_by.push(ordered);
17499 }
17500 }
17501 }
17502 Ok(Expression::AggregateFunction(Box::new(
17503 crate::expressions::AggregateFunction {
17504 name: "JSON_AGG".to_string(),
17505 args: vec![*ja.this],
17506 distinct: false,
17507 filter: None,
17508 order_by,
17509 limit: None,
17510 ignore_nulls: None,
17511 },
17512 )))
17513 }
17514 _ => Ok(Expression::JSONArrayAgg(ja)),
17515 }
17516 } else if let Expression::ToNumber(tn) = e {
17517 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17518 let arg = *tn.this;
17519 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17520 this: arg,
17521 to: crate::expressions::DataType::Double {
17522 precision: None,
17523 scale: None,
17524 },
17525 double_colon_syntax: false,
17526 trailing_comments: Vec::new(),
17527 format: None,
17528 default: None,
17529 })))
17530 } else {
17531 Ok(e)
17532 }
17533 }
17534
17535 Action::RegexpLikeToDuckDB => {
17536 if let Expression::RegexpLike(f) = e {
17537 let mut args = vec![f.this, f.pattern];
17538 if let Some(flags) = f.flags {
17539 args.push(flags);
17540 }
17541 Ok(Expression::Function(Box::new(Function::new(
17542 "REGEXP_MATCHES".to_string(),
17543 args,
17544 ))))
17545 } else {
17546 Ok(e)
17547 }
17548 }
17549 Action::EpochConvert => {
17550 if let Expression::Epoch(f) = e {
17551 let arg = f.this;
17552 let name = match target {
17553 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17554 "UNIX_TIMESTAMP"
17555 }
17556 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17557 DialectType::BigQuery => "TIME_TO_UNIX",
17558 _ => "EPOCH",
17559 };
17560 Ok(Expression::Function(Box::new(Function::new(
17561 name.to_string(),
17562 vec![arg],
17563 ))))
17564 } else {
17565 Ok(e)
17566 }
17567 }
17568 Action::EpochMsConvert => {
17569 use crate::expressions::{BinaryOp, Cast};
17570 if let Expression::EpochMs(f) = e {
17571 let arg = f.this;
17572 match target {
17573 DialectType::Spark | DialectType::Databricks => {
17574 Ok(Expression::Function(Box::new(Function::new(
17575 "TIMESTAMP_MILLIS".to_string(),
17576 vec![arg],
17577 ))))
17578 }
17579 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17580 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17581 ))),
17582 DialectType::Presto | DialectType::Trino => {
17583 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17584 let cast_arg = Expression::Cast(Box::new(Cast {
17585 this: arg,
17586 to: DataType::Double {
17587 precision: None,
17588 scale: None,
17589 },
17590 trailing_comments: Vec::new(),
17591 double_colon_syntax: false,
17592 format: None,
17593 default: None,
17594 }));
17595 let div = Expression::Div(Box::new(BinaryOp::new(
17596 cast_arg,
17597 Expression::Function(Box::new(Function::new(
17598 "POW".to_string(),
17599 vec![Expression::number(10), Expression::number(3)],
17600 ))),
17601 )));
17602 Ok(Expression::Function(Box::new(Function::new(
17603 "FROM_UNIXTIME".to_string(),
17604 vec![div],
17605 ))))
17606 }
17607 DialectType::MySQL => {
17608 // FROM_UNIXTIME(x / POWER(10, 3))
17609 let div = Expression::Div(Box::new(BinaryOp::new(
17610 arg,
17611 Expression::Function(Box::new(Function::new(
17612 "POWER".to_string(),
17613 vec![Expression::number(10), Expression::number(3)],
17614 ))),
17615 )));
17616 Ok(Expression::Function(Box::new(Function::new(
17617 "FROM_UNIXTIME".to_string(),
17618 vec![div],
17619 ))))
17620 }
17621 DialectType::PostgreSQL | DialectType::Redshift => {
17622 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17623 let cast_arg = Expression::Cast(Box::new(Cast {
17624 this: arg,
17625 to: DataType::Custom {
17626 name: "DOUBLE PRECISION".to_string(),
17627 },
17628 trailing_comments: Vec::new(),
17629 double_colon_syntax: false,
17630 format: None,
17631 default: None,
17632 }));
17633 let div = Expression::Div(Box::new(BinaryOp::new(
17634 cast_arg,
17635 Expression::Function(Box::new(Function::new(
17636 "POWER".to_string(),
17637 vec![Expression::number(10), Expression::number(3)],
17638 ))),
17639 )));
17640 Ok(Expression::Function(Box::new(Function::new(
17641 "TO_TIMESTAMP".to_string(),
17642 vec![div],
17643 ))))
17644 }
17645 DialectType::ClickHouse => {
17646 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17647 let cast_arg = Expression::Cast(Box::new(Cast {
17648 this: arg,
17649 to: DataType::Nullable {
17650 inner: Box::new(DataType::BigInt { length: None }),
17651 },
17652 trailing_comments: Vec::new(),
17653 double_colon_syntax: false,
17654 format: None,
17655 default: None,
17656 }));
17657 Ok(Expression::Function(Box::new(Function::new(
17658 "fromUnixTimestamp64Milli".to_string(),
17659 vec![cast_arg],
17660 ))))
17661 }
17662 _ => Ok(Expression::Function(Box::new(Function::new(
17663 "EPOCH_MS".to_string(),
17664 vec![arg],
17665 )))),
17666 }
17667 } else {
17668 Ok(e)
17669 }
17670 }
17671 Action::TSQLTypeNormalize => {
17672 if let Expression::DataType(dt) = e {
17673 let new_dt = match &dt {
17674 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17675 DataType::Decimal {
17676 precision: Some(15),
17677 scale: Some(4),
17678 }
17679 }
17680 DataType::Custom { name }
17681 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17682 {
17683 DataType::Decimal {
17684 precision: Some(6),
17685 scale: Some(4),
17686 }
17687 }
17688 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17689 DataType::Timestamp {
17690 timezone: false,
17691 precision: None,
17692 }
17693 }
17694 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17695 DataType::Float {
17696 precision: None,
17697 scale: None,
17698 real_spelling: false,
17699 }
17700 }
17701 DataType::Float {
17702 real_spelling: true,
17703 ..
17704 } => DataType::Float {
17705 precision: None,
17706 scale: None,
17707 real_spelling: false,
17708 },
17709 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17710 DataType::Custom {
17711 name: "BLOB".to_string(),
17712 }
17713 }
17714 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17715 DataType::Boolean
17716 }
17717 DataType::Custom { name }
17718 if name.eq_ignore_ascii_case("ROWVERSION") =>
17719 {
17720 DataType::Custom {
17721 name: "BINARY".to_string(),
17722 }
17723 }
17724 DataType::Custom { name }
17725 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17726 {
17727 match target {
17728 DialectType::Spark
17729 | DialectType::Databricks
17730 | DialectType::Hive => DataType::Custom {
17731 name: "STRING".to_string(),
17732 },
17733 _ => DataType::VarChar {
17734 length: Some(36),
17735 parenthesized_length: true,
17736 },
17737 }
17738 }
17739 DataType::Custom { name }
17740 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17741 {
17742 match target {
17743 DialectType::Spark
17744 | DialectType::Databricks
17745 | DialectType::Hive => DataType::Timestamp {
17746 timezone: false,
17747 precision: None,
17748 },
17749 _ => DataType::Timestamp {
17750 timezone: true,
17751 precision: None,
17752 },
17753 }
17754 }
17755 DataType::Custom { ref name }
17756 if name.to_uppercase().starts_with("DATETIME2(") =>
17757 {
17758 // DATETIME2(n) -> TIMESTAMP
17759 DataType::Timestamp {
17760 timezone: false,
17761 precision: None,
17762 }
17763 }
17764 DataType::Custom { ref name }
17765 if name.to_uppercase().starts_with("TIME(") =>
17766 {
17767 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
17768 match target {
17769 DialectType::Spark
17770 | DialectType::Databricks
17771 | DialectType::Hive => DataType::Timestamp {
17772 timezone: false,
17773 precision: None,
17774 },
17775 _ => return Ok(Expression::DataType(dt)),
17776 }
17777 }
17778 DataType::Custom { ref name }
17779 if name.to_uppercase().starts_with("NUMERIC") =>
17780 {
17781 // Parse NUMERIC(p,s) back to Decimal(p,s)
17782 let upper = name.to_uppercase();
17783 if let Some(inner) = upper
17784 .strip_prefix("NUMERIC(")
17785 .and_then(|s| s.strip_suffix(')'))
17786 {
17787 let parts: Vec<&str> = inner.split(',').collect();
17788 let precision =
17789 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
17790 let scale =
17791 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
17792 DataType::Decimal { precision, scale }
17793 } else if upper == "NUMERIC" {
17794 DataType::Decimal {
17795 precision: None,
17796 scale: None,
17797 }
17798 } else {
17799 return Ok(Expression::DataType(dt));
17800 }
17801 }
17802 DataType::Float {
17803 precision: Some(p), ..
17804 } => {
17805 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
17806 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
17807 let boundary = match target {
17808 DialectType::Hive
17809 | DialectType::Spark
17810 | DialectType::Databricks => 32,
17811 _ => 24,
17812 };
17813 if *p <= boundary {
17814 DataType::Float {
17815 precision: None,
17816 scale: None,
17817 real_spelling: false,
17818 }
17819 } else {
17820 DataType::Double {
17821 precision: None,
17822 scale: None,
17823 }
17824 }
17825 }
17826 DataType::TinyInt { .. } => match target {
17827 DialectType::DuckDB => DataType::Custom {
17828 name: "UTINYINT".to_string(),
17829 },
17830 DialectType::Hive
17831 | DialectType::Spark
17832 | DialectType::Databricks => DataType::SmallInt { length: None },
17833 _ => return Ok(Expression::DataType(dt)),
17834 },
17835 // INTEGER -> INT for Spark/Databricks
17836 DataType::Int {
17837 length,
17838 integer_spelling: true,
17839 } => DataType::Int {
17840 length: *length,
17841 integer_spelling: false,
17842 },
17843 _ => return Ok(Expression::DataType(dt)),
17844 };
17845 Ok(Expression::DataType(new_dt))
17846 } else {
17847 Ok(e)
17848 }
17849 }
17850 Action::MySQLSafeDivide => {
17851 use crate::expressions::{BinaryOp, Cast};
17852 if let Expression::Div(op) = e {
17853 let left = op.left;
17854 let right = op.right;
17855 // For SQLite: CAST left as REAL but NO NULLIF wrapping
17856 if matches!(target, DialectType::SQLite) {
17857 let new_left = Expression::Cast(Box::new(Cast {
17858 this: left,
17859 to: DataType::Float {
17860 precision: None,
17861 scale: None,
17862 real_spelling: true,
17863 },
17864 trailing_comments: Vec::new(),
17865 double_colon_syntax: false,
17866 format: None,
17867 default: None,
17868 }));
17869 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
17870 }
17871 // Wrap right in NULLIF(right, 0)
17872 let nullif_right = Expression::Function(Box::new(Function::new(
17873 "NULLIF".to_string(),
17874 vec![right, Expression::number(0)],
17875 )));
17876 // For some dialects, also CAST the left side
17877 let new_left = match target {
17878 DialectType::PostgreSQL
17879 | DialectType::Redshift
17880 | DialectType::Teradata
17881 | DialectType::Materialize
17882 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
17883 this: left,
17884 to: DataType::Custom {
17885 name: "DOUBLE PRECISION".to_string(),
17886 },
17887 trailing_comments: Vec::new(),
17888 double_colon_syntax: false,
17889 format: None,
17890 default: None,
17891 })),
17892 DialectType::Drill
17893 | DialectType::Trino
17894 | DialectType::Presto
17895 | DialectType::Athena => Expression::Cast(Box::new(Cast {
17896 this: left,
17897 to: DataType::Double {
17898 precision: None,
17899 scale: None,
17900 },
17901 trailing_comments: Vec::new(),
17902 double_colon_syntax: false,
17903 format: None,
17904 default: None,
17905 })),
17906 DialectType::TSQL => Expression::Cast(Box::new(Cast {
17907 this: left,
17908 to: DataType::Float {
17909 precision: None,
17910 scale: None,
17911 real_spelling: false,
17912 },
17913 trailing_comments: Vec::new(),
17914 double_colon_syntax: false,
17915 format: None,
17916 default: None,
17917 })),
17918 _ => left,
17919 };
17920 Ok(Expression::Div(Box::new(BinaryOp::new(
17921 new_left,
17922 nullif_right,
17923 ))))
17924 } else {
17925 Ok(e)
17926 }
17927 }
17928 Action::AlterTableRenameStripSchema => {
17929 if let Expression::AlterTable(mut at) = e {
17930 if let Some(crate::expressions::AlterTableAction::RenameTable(
17931 ref mut new_tbl,
17932 )) = at.actions.first_mut()
17933 {
17934 new_tbl.schema = None;
17935 new_tbl.catalog = None;
17936 }
17937 Ok(Expression::AlterTable(at))
17938 } else {
17939 Ok(e)
17940 }
17941 }
17942 Action::NullsOrdering => {
17943 // Fill in the source dialect's implied null ordering default.
17944 // This makes implicit null ordering explicit so the target generator
17945 // can correctly strip or keep it.
17946 //
17947 // Dialect null ordering categories:
17948 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
17949 // ASC -> NULLS LAST, DESC -> NULLS FIRST
17950 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
17951 // ASC -> NULLS FIRST, DESC -> NULLS LAST
17952 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
17953 // NULLS LAST always (both ASC and DESC)
17954 if let Expression::Ordered(mut o) = e {
17955 let is_asc = !o.desc;
17956
17957 let is_source_nulls_large = matches!(
17958 source,
17959 DialectType::Oracle
17960 | DialectType::PostgreSQL
17961 | DialectType::Redshift
17962 | DialectType::Snowflake
17963 );
17964 let is_source_nulls_last = matches!(
17965 source,
17966 DialectType::DuckDB
17967 | DialectType::Presto
17968 | DialectType::Trino
17969 | DialectType::Dremio
17970 | DialectType::Athena
17971 | DialectType::ClickHouse
17972 | DialectType::Drill
17973 | DialectType::Exasol
17974 | DialectType::DataFusion
17975 );
17976
17977 // Determine target category to check if default matches
17978 let is_target_nulls_large = matches!(
17979 target,
17980 DialectType::Oracle
17981 | DialectType::PostgreSQL
17982 | DialectType::Redshift
17983 | DialectType::Snowflake
17984 );
17985 let is_target_nulls_last = matches!(
17986 target,
17987 DialectType::DuckDB
17988 | DialectType::Presto
17989 | DialectType::Trino
17990 | DialectType::Dremio
17991 | DialectType::Athena
17992 | DialectType::ClickHouse
17993 | DialectType::Drill
17994 | DialectType::Exasol
17995 | DialectType::DataFusion
17996 );
17997
17998 // Compute the implied nulls_first for source
17999 let source_nulls_first = if is_source_nulls_large {
18000 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
18001 } else if is_source_nulls_last {
18002 false // NULLS LAST always
18003 } else {
18004 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
18005 };
18006
18007 // Compute the target's default
18008 let target_nulls_first = if is_target_nulls_large {
18009 !is_asc
18010 } else if is_target_nulls_last {
18011 false
18012 } else {
18013 is_asc
18014 };
18015
18016 // Only add explicit nulls ordering if source and target defaults differ
18017 if source_nulls_first != target_nulls_first {
18018 o.nulls_first = Some(source_nulls_first);
18019 }
18020 // If they match, leave nulls_first as None so the generator won't output it
18021
18022 Ok(Expression::Ordered(o))
18023 } else {
18024 Ok(e)
18025 }
18026 }
18027 Action::StringAggConvert => {
18028 match e {
18029 Expression::WithinGroup(wg) => {
18030 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
18031 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
18032 let (x_opt, sep_opt, distinct) = match wg.this {
18033 Expression::AggregateFunction(ref af)
18034 if af.name.eq_ignore_ascii_case("STRING_AGG")
18035 && af.args.len() >= 2 =>
18036 {
18037 (
18038 Some(af.args[0].clone()),
18039 Some(af.args[1].clone()),
18040 af.distinct,
18041 )
18042 }
18043 Expression::Function(ref f)
18044 if f.name.eq_ignore_ascii_case("STRING_AGG")
18045 && f.args.len() >= 2 =>
18046 {
18047 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
18048 }
18049 Expression::StringAgg(ref sa) => {
18050 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
18051 }
18052 _ => (None, None, false),
18053 };
18054 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
18055 let order_by = wg.order_by;
18056
18057 match target {
18058 DialectType::TSQL | DialectType::Fabric => {
18059 // Keep as WithinGroup(StringAgg) for TSQL
18060 Ok(Expression::WithinGroup(Box::new(
18061 crate::expressions::WithinGroup {
18062 this: Expression::StringAgg(Box::new(
18063 crate::expressions::StringAggFunc {
18064 this: x,
18065 separator: Some(sep),
18066 order_by: None, // order_by goes in WithinGroup, not StringAgg
18067 distinct,
18068 filter: None,
18069 limit: None,
18070 },
18071 )),
18072 order_by,
18073 },
18074 )))
18075 }
18076 DialectType::MySQL
18077 | DialectType::SingleStore
18078 | DialectType::Doris
18079 | DialectType::StarRocks => {
18080 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18081 Ok(Expression::GroupConcat(Box::new(
18082 crate::expressions::GroupConcatFunc {
18083 this: x,
18084 separator: Some(sep),
18085 order_by: Some(order_by),
18086 distinct,
18087 filter: None,
18088 },
18089 )))
18090 }
18091 DialectType::SQLite => {
18092 // GROUP_CONCAT(x, sep) - no ORDER BY support
18093 Ok(Expression::GroupConcat(Box::new(
18094 crate::expressions::GroupConcatFunc {
18095 this: x,
18096 separator: Some(sep),
18097 order_by: None,
18098 distinct,
18099 filter: None,
18100 },
18101 )))
18102 }
18103 DialectType::PostgreSQL | DialectType::Redshift => {
18104 // STRING_AGG(x, sep ORDER BY z)
18105 Ok(Expression::StringAgg(Box::new(
18106 crate::expressions::StringAggFunc {
18107 this: x,
18108 separator: Some(sep),
18109 order_by: Some(order_by),
18110 distinct,
18111 filter: None,
18112 limit: None,
18113 },
18114 )))
18115 }
18116 _ => {
18117 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18118 Ok(Expression::StringAgg(Box::new(
18119 crate::expressions::StringAggFunc {
18120 this: x,
18121 separator: Some(sep),
18122 order_by: Some(order_by),
18123 distinct,
18124 filter: None,
18125 limit: None,
18126 },
18127 )))
18128 }
18129 }
18130 } else {
18131 Ok(Expression::WithinGroup(wg))
18132 }
18133 }
18134 Expression::StringAgg(sa) => {
18135 match target {
18136 DialectType::MySQL
18137 | DialectType::SingleStore
18138 | DialectType::Doris
18139 | DialectType::StarRocks => {
18140 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18141 Ok(Expression::GroupConcat(Box::new(
18142 crate::expressions::GroupConcatFunc {
18143 this: sa.this,
18144 separator: sa.separator,
18145 order_by: sa.order_by,
18146 distinct: sa.distinct,
18147 filter: sa.filter,
18148 },
18149 )))
18150 }
18151 DialectType::SQLite => {
18152 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18153 Ok(Expression::GroupConcat(Box::new(
18154 crate::expressions::GroupConcatFunc {
18155 this: sa.this,
18156 separator: sa.separator,
18157 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18158 distinct: sa.distinct,
18159 filter: sa.filter,
18160 },
18161 )))
18162 }
18163 DialectType::Spark | DialectType::Databricks => {
18164 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18165 Ok(Expression::ListAgg(Box::new(
18166 crate::expressions::ListAggFunc {
18167 this: sa.this,
18168 separator: sa.separator,
18169 on_overflow: None,
18170 order_by: sa.order_by,
18171 distinct: sa.distinct,
18172 filter: None,
18173 },
18174 )))
18175 }
18176 _ => Ok(Expression::StringAgg(sa)),
18177 }
18178 }
18179 _ => Ok(e),
18180 }
18181 }
18182 Action::GroupConcatConvert => {
18183 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18184 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18185 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18186 if let Expression::Function(ref f) = expr {
18187 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18188 let mut result = f.args[0].clone();
18189 for arg in &f.args[1..] {
18190 result = Expression::Concat(Box::new(BinaryOp {
18191 left: result,
18192 right: arg.clone(),
18193 left_comments: vec![],
18194 operator_comments: vec![],
18195 trailing_comments: vec![],
18196 }));
18197 }
18198 return result;
18199 }
18200 }
18201 expr
18202 }
18203 fn expand_concat_to_plus(expr: Expression) -> Expression {
18204 if let Expression::Function(ref f) = expr {
18205 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18206 let mut result = f.args[0].clone();
18207 for arg in &f.args[1..] {
18208 result = Expression::Add(Box::new(BinaryOp {
18209 left: result,
18210 right: arg.clone(),
18211 left_comments: vec![],
18212 operator_comments: vec![],
18213 trailing_comments: vec![],
18214 }));
18215 }
18216 return result;
18217 }
18218 }
18219 expr
18220 }
18221 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18222 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18223 if let Expression::Function(ref f) = expr {
18224 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18225 let new_args: Vec<Expression> = f
18226 .args
18227 .iter()
18228 .map(|arg| {
18229 Expression::Cast(Box::new(crate::expressions::Cast {
18230 this: arg.clone(),
18231 to: crate::expressions::DataType::VarChar {
18232 length: None,
18233 parenthesized_length: false,
18234 },
18235 trailing_comments: Vec::new(),
18236 double_colon_syntax: false,
18237 format: None,
18238 default: None,
18239 }))
18240 })
18241 .collect();
18242 return Expression::Function(Box::new(
18243 crate::expressions::Function::new(
18244 "CONCAT".to_string(),
18245 new_args,
18246 ),
18247 ));
18248 }
18249 }
18250 expr
18251 }
18252 if let Expression::GroupConcat(gc) = e {
18253 match target {
18254 DialectType::Presto => {
18255 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18256 let sep = gc.separator.unwrap_or(Expression::string(","));
18257 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18258 let this = wrap_concat_args_in_varchar_cast(gc.this);
18259 let array_agg =
18260 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18261 this,
18262 distinct: gc.distinct,
18263 filter: gc.filter,
18264 order_by: gc.order_by.unwrap_or_default(),
18265 name: None,
18266 ignore_nulls: None,
18267 having_max: None,
18268 limit: None,
18269 }));
18270 Ok(Expression::ArrayJoin(Box::new(
18271 crate::expressions::ArrayJoinFunc {
18272 this: array_agg,
18273 separator: sep,
18274 null_replacement: None,
18275 },
18276 )))
18277 }
18278 DialectType::Trino => {
18279 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18280 let sep = gc.separator.unwrap_or(Expression::string(","));
18281 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18282 let this = wrap_concat_args_in_varchar_cast(gc.this);
18283 Ok(Expression::ListAgg(Box::new(
18284 crate::expressions::ListAggFunc {
18285 this,
18286 separator: Some(sep),
18287 on_overflow: None,
18288 order_by: gc.order_by,
18289 distinct: gc.distinct,
18290 filter: gc.filter,
18291 },
18292 )))
18293 }
18294 DialectType::PostgreSQL
18295 | DialectType::Redshift
18296 | DialectType::Snowflake
18297 | DialectType::DuckDB
18298 | DialectType::Hive
18299 | DialectType::ClickHouse => {
18300 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18301 let sep = gc.separator.unwrap_or(Expression::string(","));
18302 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18303 let this = expand_concat_to_dpipe(gc.this);
18304 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18305 let order_by = if target == DialectType::PostgreSQL {
18306 gc.order_by.map(|ords| {
18307 ords.into_iter()
18308 .map(|mut o| {
18309 if o.nulls_first.is_none() {
18310 if o.desc {
18311 o.nulls_first = Some(false);
18312 // NULLS LAST
18313 } else {
18314 o.nulls_first = Some(true);
18315 // NULLS FIRST
18316 }
18317 }
18318 o
18319 })
18320 .collect()
18321 })
18322 } else {
18323 gc.order_by
18324 };
18325 Ok(Expression::StringAgg(Box::new(
18326 crate::expressions::StringAggFunc {
18327 this,
18328 separator: Some(sep),
18329 order_by,
18330 distinct: gc.distinct,
18331 filter: gc.filter,
18332 limit: None,
18333 },
18334 )))
18335 }
18336 DialectType::TSQL => {
18337 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18338 // TSQL doesn't support DISTINCT in STRING_AGG
18339 let sep = gc.separator.unwrap_or(Expression::string(","));
18340 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18341 let this = expand_concat_to_plus(gc.this);
18342 Ok(Expression::StringAgg(Box::new(
18343 crate::expressions::StringAggFunc {
18344 this,
18345 separator: Some(sep),
18346 order_by: gc.order_by,
18347 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18348 filter: gc.filter,
18349 limit: None,
18350 },
18351 )))
18352 }
18353 DialectType::SQLite => {
18354 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18355 // SQLite GROUP_CONCAT doesn't support ORDER BY
18356 // Expand CONCAT(a,b,c) -> a || b || c
18357 let this = expand_concat_to_dpipe(gc.this);
18358 Ok(Expression::GroupConcat(Box::new(
18359 crate::expressions::GroupConcatFunc {
18360 this,
18361 separator: gc.separator,
18362 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18363 distinct: gc.distinct,
18364 filter: gc.filter,
18365 },
18366 )))
18367 }
18368 DialectType::Spark | DialectType::Databricks => {
18369 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18370 let sep = gc.separator.unwrap_or(Expression::string(","));
18371 Ok(Expression::ListAgg(Box::new(
18372 crate::expressions::ListAggFunc {
18373 this: gc.this,
18374 separator: Some(sep),
18375 on_overflow: None,
18376 order_by: gc.order_by,
18377 distinct: gc.distinct,
18378 filter: None,
18379 },
18380 )))
18381 }
18382 DialectType::MySQL
18383 | DialectType::SingleStore
18384 | DialectType::StarRocks => {
18385 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18386 if gc.separator.is_none() {
18387 let mut gc = gc;
18388 gc.separator = Some(Expression::string(","));
18389 Ok(Expression::GroupConcat(gc))
18390 } else {
18391 Ok(Expression::GroupConcat(gc))
18392 }
18393 }
18394 _ => Ok(Expression::GroupConcat(gc)),
18395 }
18396 } else {
18397 Ok(e)
18398 }
18399 }
18400 Action::TempTableHash => {
18401 match e {
18402 Expression::CreateTable(mut ct) => {
18403 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18404 let name = &ct.name.name.name;
18405 if name.starts_with('#') {
18406 ct.name.name.name = name.trim_start_matches('#').to_string();
18407 }
18408 // Set temporary flag
18409 ct.temporary = true;
18410 Ok(Expression::CreateTable(ct))
18411 }
18412 Expression::Table(mut tr) => {
18413 // Strip # from table references
18414 let name = &tr.name.name;
18415 if name.starts_with('#') {
18416 tr.name.name = name.trim_start_matches('#').to_string();
18417 }
18418 Ok(Expression::Table(tr))
18419 }
18420 Expression::DropTable(mut dt) => {
18421 // Strip # from DROP TABLE names
18422 for table_ref in &mut dt.names {
18423 if table_ref.name.name.starts_with('#') {
18424 table_ref.name.name =
18425 table_ref.name.name.trim_start_matches('#').to_string();
18426 }
18427 }
18428 Ok(Expression::DropTable(dt))
18429 }
18430 _ => Ok(e),
18431 }
18432 }
18433 Action::NvlClearOriginal => {
18434 if let Expression::Nvl(mut f) = e {
18435 f.original_name = None;
18436 Ok(Expression::Nvl(f))
18437 } else {
18438 Ok(e)
18439 }
18440 }
18441 Action::HiveCastToTryCast => {
18442 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18443 if let Expression::Cast(mut c) = e {
18444 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18445 // (Spark's TIMESTAMP is always timezone-aware)
18446 if matches!(target, DialectType::DuckDB)
18447 && matches!(source, DialectType::Spark | DialectType::Databricks)
18448 && matches!(
18449 c.to,
18450 DataType::Timestamp {
18451 timezone: false,
18452 ..
18453 }
18454 )
18455 {
18456 c.to = DataType::Custom {
18457 name: "TIMESTAMPTZ".to_string(),
18458 };
18459 }
18460 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18461 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18462 if matches!(target, DialectType::Databricks | DialectType::Spark)
18463 && matches!(
18464 source,
18465 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18466 )
18467 && Self::has_varchar_char_type(&c.to)
18468 {
18469 c.to = Self::normalize_varchar_to_string(c.to);
18470 }
18471 Ok(Expression::TryCast(c))
18472 } else {
18473 Ok(e)
18474 }
18475 }
18476 Action::XorExpand => {
18477 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18478 // Snowflake: use BOOLXOR(a, b) instead
18479 if let Expression::Xor(xor) = e {
18480 // Collect all XOR operands
18481 let mut operands = Vec::new();
18482 if let Some(this) = xor.this {
18483 operands.push(*this);
18484 }
18485 if let Some(expr) = xor.expression {
18486 operands.push(*expr);
18487 }
18488 operands.extend(xor.expressions);
18489
18490 // Snowflake: use BOOLXOR(a, b)
18491 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18492 let a = operands.remove(0);
18493 let b = operands.remove(0);
18494 return Ok(Expression::Function(Box::new(Function::new(
18495 "BOOLXOR".to_string(),
18496 vec![a, b],
18497 ))));
18498 }
18499
18500 // Helper to build (a AND NOT b) OR (NOT a AND b)
18501 let make_xor = |a: Expression, b: Expression| -> Expression {
18502 let not_b = Expression::Not(Box::new(
18503 crate::expressions::UnaryOp::new(b.clone()),
18504 ));
18505 let not_a = Expression::Not(Box::new(
18506 crate::expressions::UnaryOp::new(a.clone()),
18507 ));
18508 let left_and = Expression::And(Box::new(BinaryOp {
18509 left: a,
18510 right: Expression::Paren(Box::new(Paren {
18511 this: not_b,
18512 trailing_comments: Vec::new(),
18513 })),
18514 left_comments: Vec::new(),
18515 operator_comments: Vec::new(),
18516 trailing_comments: Vec::new(),
18517 }));
18518 let right_and = Expression::And(Box::new(BinaryOp {
18519 left: Expression::Paren(Box::new(Paren {
18520 this: not_a,
18521 trailing_comments: Vec::new(),
18522 })),
18523 right: b,
18524 left_comments: Vec::new(),
18525 operator_comments: Vec::new(),
18526 trailing_comments: Vec::new(),
18527 }));
18528 Expression::Or(Box::new(BinaryOp {
18529 left: Expression::Paren(Box::new(Paren {
18530 this: left_and,
18531 trailing_comments: Vec::new(),
18532 })),
18533 right: Expression::Paren(Box::new(Paren {
18534 this: right_and,
18535 trailing_comments: Vec::new(),
18536 })),
18537 left_comments: Vec::new(),
18538 operator_comments: Vec::new(),
18539 trailing_comments: Vec::new(),
18540 }))
18541 };
18542
18543 if operands.len() >= 2 {
18544 let mut result = make_xor(operands.remove(0), operands.remove(0));
18545 for operand in operands {
18546 result = make_xor(result, operand);
18547 }
18548 Ok(result)
18549 } else if operands.len() == 1 {
18550 Ok(operands.remove(0))
18551 } else {
18552 // No operands - return FALSE (shouldn't happen)
18553 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18554 value: false,
18555 }))
18556 }
18557 } else {
18558 Ok(e)
18559 }
18560 }
18561 Action::DatePartUnquote => {
18562 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18563 // Convert the quoted string first arg to a bare Column/Identifier
18564 if let Expression::Function(mut f) = e {
18565 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18566 f.args.first()
18567 {
18568 let bare_name = s.to_lowercase();
18569 f.args[0] = Expression::Column(crate::expressions::Column {
18570 name: Identifier::new(bare_name),
18571 table: None,
18572 join_mark: false,
18573 trailing_comments: Vec::new(),
18574 span: None,
18575 });
18576 }
18577 Ok(Expression::Function(f))
18578 } else {
18579 Ok(e)
18580 }
18581 }
18582 Action::ArrayLengthConvert => {
18583 // Extract the argument from the expression
18584 let arg = match e {
18585 Expression::Cardinality(ref f) => f.this.clone(),
18586 Expression::ArrayLength(ref f) => f.this.clone(),
18587 Expression::ArraySize(ref f) => f.this.clone(),
18588 _ => return Ok(e),
18589 };
18590 match target {
18591 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18592 Ok(Expression::Function(Box::new(Function::new(
18593 "SIZE".to_string(),
18594 vec![arg],
18595 ))))
18596 }
18597 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18598 Ok(Expression::Cardinality(Box::new(
18599 crate::expressions::UnaryFunc::new(arg),
18600 )))
18601 }
18602 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18603 crate::expressions::UnaryFunc::new(arg),
18604 ))),
18605 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18606 crate::expressions::UnaryFunc::new(arg),
18607 ))),
18608 DialectType::PostgreSQL | DialectType::Redshift => {
18609 // PostgreSQL ARRAY_LENGTH requires dimension arg
18610 Ok(Expression::Function(Box::new(Function::new(
18611 "ARRAY_LENGTH".to_string(),
18612 vec![arg, Expression::number(1)],
18613 ))))
18614 }
18615 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18616 crate::expressions::UnaryFunc::new(arg),
18617 ))),
18618 _ => Ok(e), // Keep original
18619 }
18620 }
18621
18622 Action::JsonExtractToArrow => {
18623 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18624 if let Expression::JsonExtract(mut f) = e {
18625 f.arrow_syntax = true;
18626 // Transform path: convert bracket notation to dot notation
18627 // SQLite strips wildcards, DuckDB preserves them
18628 if let Expression::Literal(Literal::String(ref s)) = f.path {
18629 let mut transformed = s.clone();
18630 if matches!(target, DialectType::SQLite) {
18631 transformed = Self::strip_json_wildcards(&transformed);
18632 }
18633 transformed = Self::bracket_to_dot_notation(&transformed);
18634 if transformed != *s {
18635 f.path = Expression::string(&transformed);
18636 }
18637 }
18638 Ok(Expression::JsonExtract(f))
18639 } else {
18640 Ok(e)
18641 }
18642 }
18643
18644 Action::JsonExtractToGetJsonObject => {
18645 if let Expression::JsonExtract(f) = e {
18646 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18647 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18648 // Use proper decomposition that handles brackets
18649 let keys: Vec<Expression> =
18650 if let Expression::Literal(Literal::String(ref s)) = f.path {
18651 let parts = Self::decompose_json_path(s);
18652 parts.into_iter().map(|k| Expression::string(&k)).collect()
18653 } else {
18654 vec![f.path]
18655 };
18656 let func_name = if matches!(target, DialectType::Redshift) {
18657 "JSON_EXTRACT_PATH_TEXT"
18658 } else {
18659 "JSON_EXTRACT_PATH"
18660 };
18661 let mut args = vec![f.this];
18662 args.extend(keys);
18663 Ok(Expression::Function(Box::new(Function::new(
18664 func_name.to_string(),
18665 args,
18666 ))))
18667 } else {
18668 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18669 // Convert bracket double quotes to single quotes
18670 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18671 let normalized = Self::bracket_to_single_quotes(s);
18672 if normalized != *s {
18673 Expression::string(&normalized)
18674 } else {
18675 f.path
18676 }
18677 } else {
18678 f.path
18679 };
18680 Ok(Expression::Function(Box::new(Function::new(
18681 "GET_JSON_OBJECT".to_string(),
18682 vec![f.this, path],
18683 ))))
18684 }
18685 } else {
18686 Ok(e)
18687 }
18688 }
18689
18690 Action::JsonExtractScalarToGetJsonObject => {
18691 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18692 if let Expression::JsonExtractScalar(f) = e {
18693 Ok(Expression::Function(Box::new(Function::new(
18694 "GET_JSON_OBJECT".to_string(),
18695 vec![f.this, f.path],
18696 ))))
18697 } else {
18698 Ok(e)
18699 }
18700 }
18701
18702 Action::JsonExtractToTsql => {
18703 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18704 let (this, path) = match e {
18705 Expression::JsonExtract(f) => (f.this, f.path),
18706 Expression::JsonExtractScalar(f) => (f.this, f.path),
18707 _ => return Ok(e),
18708 };
18709 // Transform path: strip wildcards, convert bracket notation to dot notation
18710 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18711 {
18712 let stripped = Self::strip_json_wildcards(s);
18713 let dotted = Self::bracket_to_dot_notation(&stripped);
18714 Expression::string(&dotted)
18715 } else {
18716 path
18717 };
18718 let json_query = Expression::Function(Box::new(Function::new(
18719 "JSON_QUERY".to_string(),
18720 vec![this.clone(), transformed_path.clone()],
18721 )));
18722 let json_value = Expression::Function(Box::new(Function::new(
18723 "JSON_VALUE".to_string(),
18724 vec![this, transformed_path],
18725 )));
18726 Ok(Expression::Function(Box::new(Function::new(
18727 "ISNULL".to_string(),
18728 vec![json_query, json_value],
18729 ))))
18730 }
18731
18732 Action::JsonExtractToClickHouse => {
18733 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18734 let (this, path) = match e {
18735 Expression::JsonExtract(f) => (f.this, f.path),
18736 Expression::JsonExtractScalar(f) => (f.this, f.path),
18737 _ => return Ok(e),
18738 };
18739 let args: Vec<Expression> =
18740 if let Expression::Literal(Literal::String(ref s)) = path {
18741 let parts = Self::decompose_json_path(s);
18742 let mut result = vec![this];
18743 for part in parts {
18744 // ClickHouse uses 1-based integer indices for array access
18745 if let Ok(idx) = part.parse::<i64>() {
18746 result.push(Expression::number(idx + 1));
18747 } else {
18748 result.push(Expression::string(&part));
18749 }
18750 }
18751 result
18752 } else {
18753 vec![this, path]
18754 };
18755 Ok(Expression::Function(Box::new(Function::new(
18756 "JSONExtractString".to_string(),
18757 args,
18758 ))))
18759 }
18760
18761 Action::JsonExtractScalarConvert => {
18762 // JSON_EXTRACT_SCALAR -> target-specific
18763 if let Expression::JsonExtractScalar(f) = e {
18764 match target {
18765 DialectType::PostgreSQL | DialectType::Redshift => {
18766 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
18767 let keys: Vec<Expression> =
18768 if let Expression::Literal(Literal::String(ref s)) = f.path {
18769 let parts = Self::decompose_json_path(s);
18770 parts.into_iter().map(|k| Expression::string(&k)).collect()
18771 } else {
18772 vec![f.path]
18773 };
18774 let mut args = vec![f.this];
18775 args.extend(keys);
18776 Ok(Expression::Function(Box::new(Function::new(
18777 "JSON_EXTRACT_PATH_TEXT".to_string(),
18778 args,
18779 ))))
18780 }
18781 DialectType::Snowflake => {
18782 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
18783 let stripped_path =
18784 if let Expression::Literal(Literal::String(ref s)) = f.path {
18785 let stripped = Self::strip_json_dollar_prefix(s);
18786 Expression::string(&stripped)
18787 } else {
18788 f.path
18789 };
18790 Ok(Expression::Function(Box::new(Function::new(
18791 "JSON_EXTRACT_PATH_TEXT".to_string(),
18792 vec![f.this, stripped_path],
18793 ))))
18794 }
18795 DialectType::SQLite | DialectType::DuckDB => {
18796 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
18797 Ok(Expression::JsonExtractScalar(Box::new(
18798 crate::expressions::JsonExtractFunc {
18799 this: f.this,
18800 path: f.path,
18801 returning: f.returning,
18802 arrow_syntax: true,
18803 hash_arrow_syntax: false,
18804 wrapper_option: None,
18805 quotes_option: None,
18806 on_scalar_string: false,
18807 on_error: None,
18808 },
18809 )))
18810 }
18811 _ => Ok(Expression::JsonExtractScalar(f)),
18812 }
18813 } else {
18814 Ok(e)
18815 }
18816 }
18817
18818 Action::JsonPathNormalize => {
18819 // Normalize JSON path format for BigQuery, MySQL, etc.
18820 if let Expression::JsonExtract(mut f) = e {
18821 if let Expression::Literal(Literal::String(ref s)) = f.path {
18822 let mut normalized = s.clone();
18823 // Convert bracket notation and handle wildcards per dialect
18824 match target {
18825 DialectType::BigQuery => {
18826 // BigQuery strips wildcards and uses single quotes in brackets
18827 normalized = Self::strip_json_wildcards(&normalized);
18828 normalized = Self::bracket_to_single_quotes(&normalized);
18829 }
18830 DialectType::MySQL => {
18831 // MySQL preserves wildcards, converts brackets to dot notation
18832 normalized = Self::bracket_to_dot_notation(&normalized);
18833 }
18834 _ => {}
18835 }
18836 if normalized != *s {
18837 f.path = Expression::string(&normalized);
18838 }
18839 }
18840 Ok(Expression::JsonExtract(f))
18841 } else {
18842 Ok(e)
18843 }
18844 }
18845
18846 Action::JsonQueryValueConvert => {
18847 // JsonQuery/JsonValue -> target-specific
18848 let (f, is_query) = match e {
18849 Expression::JsonQuery(f) => (f, true),
18850 Expression::JsonValue(f) => (f, false),
18851 _ => return Ok(e),
18852 };
18853 match target {
18854 DialectType::TSQL | DialectType::Fabric => {
18855 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
18856 let json_query = Expression::Function(Box::new(Function::new(
18857 "JSON_QUERY".to_string(),
18858 vec![f.this.clone(), f.path.clone()],
18859 )));
18860 let json_value = Expression::Function(Box::new(Function::new(
18861 "JSON_VALUE".to_string(),
18862 vec![f.this, f.path],
18863 )));
18864 Ok(Expression::Function(Box::new(Function::new(
18865 "ISNULL".to_string(),
18866 vec![json_query, json_value],
18867 ))))
18868 }
18869 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18870 Ok(Expression::Function(Box::new(Function::new(
18871 "GET_JSON_OBJECT".to_string(),
18872 vec![f.this, f.path],
18873 ))))
18874 }
18875 DialectType::PostgreSQL | DialectType::Redshift => {
18876 Ok(Expression::Function(Box::new(Function::new(
18877 "JSON_EXTRACT_PATH_TEXT".to_string(),
18878 vec![f.this, f.path],
18879 ))))
18880 }
18881 DialectType::DuckDB | DialectType::SQLite => {
18882 // json -> path arrow syntax
18883 Ok(Expression::JsonExtract(Box::new(
18884 crate::expressions::JsonExtractFunc {
18885 this: f.this,
18886 path: f.path,
18887 returning: f.returning,
18888 arrow_syntax: true,
18889 hash_arrow_syntax: false,
18890 wrapper_option: f.wrapper_option,
18891 quotes_option: f.quotes_option,
18892 on_scalar_string: f.on_scalar_string,
18893 on_error: f.on_error,
18894 },
18895 )))
18896 }
18897 DialectType::Snowflake => {
18898 // GET_PATH(PARSE_JSON(json), 'path')
18899 // Strip $. prefix from path
18900 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
18901 let json_expr = match &f.this {
18902 Expression::Function(ref inner_f)
18903 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
18904 {
18905 f.this
18906 }
18907 Expression::ParseJson(_) => {
18908 // Already a ParseJson expression, which generates as PARSE_JSON(...)
18909 f.this
18910 }
18911 _ => Expression::Function(Box::new(Function::new(
18912 "PARSE_JSON".to_string(),
18913 vec![f.this],
18914 ))),
18915 };
18916 let path_str = match &f.path {
18917 Expression::Literal(Literal::String(s)) => {
18918 let stripped = s.strip_prefix("$.").unwrap_or(s);
18919 Expression::Literal(Literal::String(stripped.to_string()))
18920 }
18921 other => other.clone(),
18922 };
18923 Ok(Expression::Function(Box::new(Function::new(
18924 "GET_PATH".to_string(),
18925 vec![json_expr, path_str],
18926 ))))
18927 }
18928 _ => {
18929 // Default: keep as JSON_QUERY/JSON_VALUE function
18930 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
18931 Ok(Expression::Function(Box::new(Function::new(
18932 func_name.to_string(),
18933 vec![f.this, f.path],
18934 ))))
18935 }
18936 }
18937 }
18938
18939 Action::JsonLiteralToJsonParse => {
18940 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
18941 if let Expression::Cast(c) = e {
18942 let func_name = if matches!(target, DialectType::Snowflake) {
18943 "PARSE_JSON"
18944 } else {
18945 "JSON_PARSE"
18946 };
18947 Ok(Expression::Function(Box::new(Function::new(
18948 func_name.to_string(),
18949 vec![c.this],
18950 ))))
18951 } else {
18952 Ok(e)
18953 }
18954 }
18955
18956 Action::AtTimeZoneConvert => {
18957 // AT TIME ZONE -> target-specific conversion
18958 if let Expression::AtTimeZone(atz) = e {
18959 match target {
18960 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18961 Ok(Expression::Function(Box::new(Function::new(
18962 "AT_TIMEZONE".to_string(),
18963 vec![atz.this, atz.zone],
18964 ))))
18965 }
18966 DialectType::Spark | DialectType::Databricks => {
18967 Ok(Expression::Function(Box::new(Function::new(
18968 "FROM_UTC_TIMESTAMP".to_string(),
18969 vec![atz.this, atz.zone],
18970 ))))
18971 }
18972 DialectType::Snowflake => {
18973 // CONVERT_TIMEZONE('zone', expr)
18974 Ok(Expression::Function(Box::new(Function::new(
18975 "CONVERT_TIMEZONE".to_string(),
18976 vec![atz.zone, atz.this],
18977 ))))
18978 }
18979 DialectType::BigQuery => {
18980 // TIMESTAMP(DATETIME(expr, 'zone'))
18981 let datetime_call = Expression::Function(Box::new(Function::new(
18982 "DATETIME".to_string(),
18983 vec![atz.this, atz.zone],
18984 )));
18985 Ok(Expression::Function(Box::new(Function::new(
18986 "TIMESTAMP".to_string(),
18987 vec![datetime_call],
18988 ))))
18989 }
18990 _ => Ok(Expression::Function(Box::new(Function::new(
18991 "AT_TIMEZONE".to_string(),
18992 vec![atz.this, atz.zone],
18993 )))),
18994 }
18995 } else {
18996 Ok(e)
18997 }
18998 }
18999
19000 Action::DayOfWeekConvert => {
19001 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
19002 if let Expression::DayOfWeek(f) = e {
19003 match target {
19004 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19005 Function::new("ISODOW".to_string(), vec![f.this]),
19006 ))),
19007 DialectType::Spark | DialectType::Databricks => {
19008 // ((DAYOFWEEK(x) % 7) + 1)
19009 let dayofweek = Expression::Function(Box::new(Function::new(
19010 "DAYOFWEEK".to_string(),
19011 vec![f.this],
19012 )));
19013 let modulo = Expression::Mod(Box::new(BinaryOp {
19014 left: dayofweek,
19015 right: Expression::number(7),
19016 left_comments: Vec::new(),
19017 operator_comments: Vec::new(),
19018 trailing_comments: Vec::new(),
19019 }));
19020 let paren_mod = Expression::Paren(Box::new(Paren {
19021 this: modulo,
19022 trailing_comments: Vec::new(),
19023 }));
19024 let add_one = Expression::Add(Box::new(BinaryOp {
19025 left: paren_mod,
19026 right: Expression::number(1),
19027 left_comments: Vec::new(),
19028 operator_comments: Vec::new(),
19029 trailing_comments: Vec::new(),
19030 }));
19031 Ok(Expression::Paren(Box::new(Paren {
19032 this: add_one,
19033 trailing_comments: Vec::new(),
19034 })))
19035 }
19036 _ => Ok(Expression::DayOfWeek(f)),
19037 }
19038 } else {
19039 Ok(e)
19040 }
19041 }
19042
19043 Action::MaxByMinByConvert => {
19044 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
19045 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
19046 // Handle both Expression::Function and Expression::AggregateFunction
19047 let (is_max, args) = match &e {
19048 Expression::Function(f) => {
19049 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
19050 }
19051 Expression::AggregateFunction(af) => {
19052 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
19053 }
19054 _ => return Ok(e),
19055 };
19056 match target {
19057 DialectType::ClickHouse => {
19058 let name = if is_max { "argMax" } else { "argMin" };
19059 let mut args = args;
19060 args.truncate(2);
19061 Ok(Expression::Function(Box::new(Function::new(
19062 name.to_string(),
19063 args,
19064 ))))
19065 }
19066 DialectType::DuckDB => {
19067 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
19068 Ok(Expression::Function(Box::new(Function::new(
19069 name.to_string(),
19070 args,
19071 ))))
19072 }
19073 DialectType::Spark | DialectType::Databricks => {
19074 let mut args = args;
19075 args.truncate(2);
19076 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19077 Ok(Expression::Function(Box::new(Function::new(
19078 name.to_string(),
19079 args,
19080 ))))
19081 }
19082 _ => Ok(e),
19083 }
19084 }
19085
19086 Action::ElementAtConvert => {
19087 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19088 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19089 (bf.this, bf.expression)
19090 } else if let Expression::Function(ref f) = e {
19091 if f.args.len() >= 2 {
19092 if let Expression::Function(f) = e {
19093 let mut args = f.args;
19094 let arr = args.remove(0);
19095 let idx = args.remove(0);
19096 (arr, idx)
19097 } else {
19098 unreachable!("outer condition already matched Expression::Function")
19099 }
19100 } else {
19101 return Ok(e);
19102 }
19103 } else {
19104 return Ok(e);
19105 };
19106 match target {
19107 DialectType::PostgreSQL => {
19108 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19109 let arr_expr = Expression::Paren(Box::new(Paren {
19110 this: arr,
19111 trailing_comments: vec![],
19112 }));
19113 Ok(Expression::Subscript(Box::new(
19114 crate::expressions::Subscript {
19115 this: arr_expr,
19116 index: idx,
19117 },
19118 )))
19119 }
19120 DialectType::BigQuery => {
19121 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19122 let arr_expr = match arr {
19123 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19124 crate::expressions::ArrayConstructor {
19125 expressions: af.expressions,
19126 bracket_notation: true,
19127 use_list_keyword: false,
19128 },
19129 )),
19130 other => other,
19131 };
19132 let safe_ordinal = Expression::Function(Box::new(Function::new(
19133 "SAFE_ORDINAL".to_string(),
19134 vec![idx],
19135 )));
19136 Ok(Expression::Subscript(Box::new(
19137 crate::expressions::Subscript {
19138 this: arr_expr,
19139 index: safe_ordinal,
19140 },
19141 )))
19142 }
19143 _ => Ok(Expression::Function(Box::new(Function::new(
19144 "ELEMENT_AT".to_string(),
19145 vec![arr, idx],
19146 )))),
19147 }
19148 }
19149
19150 Action::CurrentUserParens => {
19151 // CURRENT_USER -> CURRENT_USER() for Snowflake
19152 Ok(Expression::Function(Box::new(Function::new(
19153 "CURRENT_USER".to_string(),
19154 vec![],
19155 ))))
19156 }
19157
19158 Action::ArrayAggToCollectList => {
19159 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19160 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19161 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19162 match e {
19163 Expression::AggregateFunction(mut af) => {
19164 let is_simple =
19165 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19166 let args = if af.args.is_empty() {
19167 vec![]
19168 } else {
19169 vec![af.args[0].clone()]
19170 };
19171 af.name = "COLLECT_LIST".to_string();
19172 af.args = args;
19173 if is_simple {
19174 af.order_by = Vec::new();
19175 }
19176 Ok(Expression::AggregateFunction(af))
19177 }
19178 Expression::ArrayAgg(agg) => {
19179 let is_simple =
19180 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19181 Ok(Expression::AggregateFunction(Box::new(
19182 crate::expressions::AggregateFunction {
19183 name: "COLLECT_LIST".to_string(),
19184 args: vec![agg.this.clone()],
19185 distinct: agg.distinct,
19186 filter: agg.filter.clone(),
19187 order_by: if is_simple {
19188 Vec::new()
19189 } else {
19190 agg.order_by.clone()
19191 },
19192 limit: agg.limit.clone(),
19193 ignore_nulls: agg.ignore_nulls,
19194 },
19195 )))
19196 }
19197 _ => Ok(e),
19198 }
19199 }
19200
19201 Action::ArraySyntaxConvert => {
19202 match e {
19203 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19204 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19205 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19206 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19207 expressions: arr.expressions,
19208 bracket_notation: true,
19209 use_list_keyword: false,
19210 })),
19211 ),
19212 // ARRAY(y) function style -> ArrayFunc for target dialect
19213 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19214 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19215 let bracket = matches!(
19216 target,
19217 DialectType::BigQuery
19218 | DialectType::DuckDB
19219 | DialectType::ClickHouse
19220 | DialectType::StarRocks
19221 );
19222 Ok(Expression::ArrayFunc(Box::new(
19223 crate::expressions::ArrayConstructor {
19224 expressions: f.args,
19225 bracket_notation: bracket,
19226 use_list_keyword: false,
19227 },
19228 )))
19229 }
19230 _ => Ok(e),
19231 }
19232 }
19233
19234 Action::CastToJsonForSpark => {
19235 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19236 if let Expression::Cast(c) = e {
19237 Ok(Expression::Function(Box::new(Function::new(
19238 "TO_JSON".to_string(),
19239 vec![c.this],
19240 ))))
19241 } else {
19242 Ok(e)
19243 }
19244 }
19245
19246 Action::CastJsonToFromJson => {
19247 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19248 if let Expression::Cast(c) = e {
19249 // Extract the string literal from ParseJson
19250 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19251 pj.this
19252 } else {
19253 c.this
19254 };
19255 // Convert the target DataType to Spark's type string format
19256 let type_str = Self::data_type_to_spark_string(&c.to);
19257 Ok(Expression::Function(Box::new(Function::new(
19258 "FROM_JSON".to_string(),
19259 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19260 ))))
19261 } else {
19262 Ok(e)
19263 }
19264 }
19265
19266 Action::ToJsonConvert => {
19267 // TO_JSON(x) -> target-specific conversion
19268 if let Expression::ToJson(f) = e {
19269 let arg = f.this;
19270 match target {
19271 DialectType::Presto | DialectType::Trino => {
19272 // JSON_FORMAT(CAST(x AS JSON))
19273 let cast_json = Expression::Cast(Box::new(Cast {
19274 this: arg,
19275 to: DataType::Custom {
19276 name: "JSON".to_string(),
19277 },
19278 trailing_comments: vec![],
19279 double_colon_syntax: false,
19280 format: None,
19281 default: None,
19282 }));
19283 Ok(Expression::Function(Box::new(Function::new(
19284 "JSON_FORMAT".to_string(),
19285 vec![cast_json],
19286 ))))
19287 }
19288 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19289 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19290 ))),
19291 DialectType::DuckDB => {
19292 // CAST(TO_JSON(x) AS TEXT)
19293 let to_json =
19294 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19295 this: arg,
19296 original_name: None,
19297 }));
19298 Ok(Expression::Cast(Box::new(Cast {
19299 this: to_json,
19300 to: DataType::Text,
19301 trailing_comments: vec![],
19302 double_colon_syntax: false,
19303 format: None,
19304 default: None,
19305 })))
19306 }
19307 _ => Ok(Expression::ToJson(Box::new(
19308 crate::expressions::UnaryFunc {
19309 this: arg,
19310 original_name: None,
19311 },
19312 ))),
19313 }
19314 } else {
19315 Ok(e)
19316 }
19317 }
19318
19319 Action::VarianceToClickHouse => {
19320 if let Expression::Variance(f) = e {
19321 Ok(Expression::Function(Box::new(Function::new(
19322 "varSamp".to_string(),
19323 vec![f.this],
19324 ))))
19325 } else {
19326 Ok(e)
19327 }
19328 }
19329
19330 Action::StddevToClickHouse => {
19331 if let Expression::Stddev(f) = e {
19332 Ok(Expression::Function(Box::new(Function::new(
19333 "stddevSamp".to_string(),
19334 vec![f.this],
19335 ))))
19336 } else {
19337 Ok(e)
19338 }
19339 }
19340
19341 Action::ApproxQuantileConvert => {
19342 if let Expression::ApproxQuantile(aq) = e {
19343 let mut args = vec![*aq.this];
19344 if let Some(q) = aq.quantile {
19345 args.push(*q);
19346 }
19347 Ok(Expression::Function(Box::new(Function::new(
19348 "APPROX_PERCENTILE".to_string(),
19349 args,
19350 ))))
19351 } else {
19352 Ok(e)
19353 }
19354 }
19355
19356 Action::DollarParamConvert => {
19357 if let Expression::Parameter(p) = e {
19358 Ok(Expression::Parameter(Box::new(
19359 crate::expressions::Parameter {
19360 name: p.name,
19361 index: p.index,
19362 style: crate::expressions::ParameterStyle::At,
19363 quoted: p.quoted,
19364 string_quoted: p.string_quoted,
19365 expression: p.expression,
19366 },
19367 )))
19368 } else {
19369 Ok(e)
19370 }
19371 }
19372
19373 Action::EscapeStringNormalize => {
19374 if let Expression::Literal(Literal::EscapeString(s)) = e {
19375 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19376 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19377 s[2..].to_string()
19378 } else {
19379 s
19380 };
19381 let normalized = stripped
19382 .replace('\n', "\\n")
19383 .replace('\r', "\\r")
19384 .replace('\t', "\\t");
19385 match target {
19386 DialectType::BigQuery => {
19387 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19388 // Use Raw for the b'...' part to avoid double-escaping
19389 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19390 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19391 }
19392 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19393 }
19394 } else {
19395 Ok(e)
19396 }
19397 }
19398
19399 Action::StraightJoinCase => {
19400 // straight_join: keep lowercase for DuckDB, quote for MySQL
19401 if let Expression::Column(col) = e {
19402 if col.name.name == "STRAIGHT_JOIN" {
19403 let mut new_col = col;
19404 new_col.name.name = "straight_join".to_string();
19405 if matches!(target, DialectType::MySQL) {
19406 // MySQL: needs quoting since it's a reserved keyword
19407 new_col.name.quoted = true;
19408 }
19409 Ok(Expression::Column(new_col))
19410 } else {
19411 Ok(Expression::Column(col))
19412 }
19413 } else {
19414 Ok(e)
19415 }
19416 }
19417
19418 Action::TablesampleReservoir => {
19419 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19420 if let Expression::TableSample(mut ts) = e {
19421 if let Some(ref mut sample) = ts.sample {
19422 sample.method = crate::expressions::SampleMethod::Reservoir;
19423 sample.explicit_method = true;
19424 }
19425 Ok(Expression::TableSample(ts))
19426 } else {
19427 Ok(e)
19428 }
19429 }
19430
19431 Action::TablesampleSnowflakeStrip => {
19432 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19433 match e {
19434 Expression::TableSample(mut ts) => {
19435 if let Some(ref mut sample) = ts.sample {
19436 sample.suppress_method_output = true;
19437 sample.unit_after_size = false;
19438 sample.is_percent = false;
19439 }
19440 Ok(Expression::TableSample(ts))
19441 }
19442 Expression::Table(mut t) => {
19443 if let Some(ref mut sample) = t.table_sample {
19444 sample.suppress_method_output = true;
19445 sample.unit_after_size = false;
19446 sample.is_percent = false;
19447 }
19448 Ok(Expression::Table(t))
19449 }
19450 _ => Ok(e),
19451 }
19452 }
19453
19454 Action::FirstToAnyValue => {
19455 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19456 if let Expression::First(mut agg) = e {
19457 agg.ignore_nulls = None;
19458 agg.name = Some("ANY_VALUE".to_string());
19459 Ok(Expression::AnyValue(agg))
19460 } else {
19461 Ok(e)
19462 }
19463 }
19464
19465 Action::ArrayIndexConvert => {
19466 // Subscript index: 1-based to 0-based for BigQuery
19467 if let Expression::Subscript(mut sub) = e {
19468 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19469 if let Ok(val) = n.parse::<i64>() {
19470 sub.index =
19471 Expression::Literal(Literal::Number((val - 1).to_string()));
19472 }
19473 }
19474 Ok(Expression::Subscript(sub))
19475 } else {
19476 Ok(e)
19477 }
19478 }
19479
19480 Action::AnyValueIgnoreNulls => {
19481 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19482 if let Expression::AnyValue(mut av) = e {
19483 if av.ignore_nulls.is_none() {
19484 av.ignore_nulls = Some(true);
19485 }
19486 Ok(Expression::AnyValue(av))
19487 } else {
19488 Ok(e)
19489 }
19490 }
19491
19492 Action::BigQueryNullsOrdering => {
19493 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19494 if let Expression::WindowFunction(mut wf) = e {
19495 for o in &mut wf.over.order_by {
19496 o.nulls_first = None;
19497 }
19498 Ok(Expression::WindowFunction(wf))
19499 } else if let Expression::Ordered(mut o) = e {
19500 o.nulls_first = None;
19501 Ok(Expression::Ordered(o))
19502 } else {
19503 Ok(e)
19504 }
19505 }
19506
19507 Action::SnowflakeFloatProtect => {
19508 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19509 // Snowflake's target transform from converting it to DOUBLE.
19510 // Non-Snowflake sources should keep their FLOAT spelling.
19511 if let Expression::DataType(DataType::Float { .. }) = e {
19512 Ok(Expression::DataType(DataType::Custom {
19513 name: "FLOAT".to_string(),
19514 }))
19515 } else {
19516 Ok(e)
19517 }
19518 }
19519
19520 Action::MysqlNullsOrdering => {
19521 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19522 if let Expression::Ordered(mut o) = e {
19523 let nulls_last = o.nulls_first == Some(false);
19524 let desc = o.desc;
19525 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19526 // If requested ordering matches default, just strip NULLS clause
19527 let matches_default = if desc {
19528 // DESC default is NULLS FIRST, so nulls_first=true matches
19529 o.nulls_first == Some(true)
19530 } else {
19531 // ASC default is NULLS LAST, so nulls_first=false matches
19532 nulls_last
19533 };
19534 if matches_default {
19535 o.nulls_first = None;
19536 Ok(Expression::Ordered(o))
19537 } else {
19538 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19539 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19540 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19541 let null_val = if desc { 1 } else { 0 };
19542 let non_null_val = if desc { 0 } else { 1 };
19543 let _case_expr = Expression::Case(Box::new(Case {
19544 operand: None,
19545 whens: vec![(
19546 Expression::IsNull(Box::new(crate::expressions::IsNull {
19547 this: o.this.clone(),
19548 not: false,
19549 postfix_form: false,
19550 })),
19551 Expression::number(null_val),
19552 )],
19553 else_: Some(Expression::number(non_null_val)),
19554 comments: Vec::new(),
19555 }));
19556 o.nulls_first = None;
19557 // Return a tuple of [case_expr, ordered_expr]
19558 // We need to return both as part of the ORDER BY
19559 // But since transform_recursive processes individual expressions,
19560 // we can't easily add extra ORDER BY items here.
19561 // Instead, strip the nulls_first
19562 o.nulls_first = None;
19563 Ok(Expression::Ordered(o))
19564 }
19565 } else {
19566 Ok(e)
19567 }
19568 }
19569
19570 Action::MysqlNullsLastRewrite => {
19571 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19572 // to simulate NULLS LAST for ASC ordering
19573 if let Expression::WindowFunction(mut wf) = e {
19574 let mut new_order_by = Vec::new();
19575 for o in wf.over.order_by {
19576 if !o.desc {
19577 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19578 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19579 let case_expr = Expression::Case(Box::new(Case {
19580 operand: None,
19581 whens: vec![(
19582 Expression::IsNull(Box::new(crate::expressions::IsNull {
19583 this: o.this.clone(),
19584 not: false,
19585 postfix_form: false,
19586 })),
19587 Expression::Literal(Literal::Number("1".to_string())),
19588 )],
19589 else_: Some(Expression::Literal(Literal::Number(
19590 "0".to_string(),
19591 ))),
19592 comments: Vec::new(),
19593 }));
19594 new_order_by.push(crate::expressions::Ordered {
19595 this: case_expr,
19596 desc: false,
19597 nulls_first: None,
19598 explicit_asc: false,
19599 with_fill: None,
19600 });
19601 let mut ordered = o;
19602 ordered.nulls_first = None;
19603 new_order_by.push(ordered);
19604 } else {
19605 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19606 // No change needed
19607 let mut ordered = o;
19608 ordered.nulls_first = None;
19609 new_order_by.push(ordered);
19610 }
19611 }
19612 wf.over.order_by = new_order_by;
19613 Ok(Expression::WindowFunction(wf))
19614 } else {
19615 Ok(e)
19616 }
19617 }
19618
19619 Action::RespectNullsConvert => {
19620 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19621 if let Expression::WindowFunction(mut wf) = e {
19622 match &mut wf.this {
19623 Expression::FirstValue(ref mut vf) => {
19624 if vf.ignore_nulls == Some(false) {
19625 vf.ignore_nulls = None;
19626 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19627 // but that's handled by the generator's NULLS ordering
19628 }
19629 }
19630 Expression::LastValue(ref mut vf) => {
19631 if vf.ignore_nulls == Some(false) {
19632 vf.ignore_nulls = None;
19633 }
19634 }
19635 _ => {}
19636 }
19637 Ok(Expression::WindowFunction(wf))
19638 } else {
19639 Ok(e)
19640 }
19641 }
19642
19643 Action::CreateTableStripComment => {
19644 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19645 if let Expression::CreateTable(mut ct) = e {
19646 for col in &mut ct.columns {
19647 col.comment = None;
19648 col.constraints.retain(|c| {
19649 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19650 });
19651 // Also remove Comment from constraint_order
19652 col.constraint_order.retain(|c| {
19653 !matches!(c, crate::expressions::ConstraintType::Comment)
19654 });
19655 }
19656 // Strip properties (USING, PARTITIONED BY, etc.)
19657 ct.properties.clear();
19658 Ok(Expression::CreateTable(ct))
19659 } else {
19660 Ok(e)
19661 }
19662 }
19663
19664 Action::AlterTableToSpRename => {
19665 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19666 if let Expression::AlterTable(ref at) = e {
19667 if let Some(crate::expressions::AlterTableAction::RenameTable(
19668 ref new_tbl,
19669 )) = at.actions.first()
19670 {
19671 // Build the old table name using TSQL bracket quoting
19672 let old_name = if let Some(ref schema) = at.name.schema {
19673 if at.name.name.quoted || schema.quoted {
19674 format!("[{}].[{}]", schema.name, at.name.name.name)
19675 } else {
19676 format!("{}.{}", schema.name, at.name.name.name)
19677 }
19678 } else {
19679 if at.name.name.quoted {
19680 format!("[{}]", at.name.name.name)
19681 } else {
19682 at.name.name.name.clone()
19683 }
19684 };
19685 let new_name = new_tbl.name.name.clone();
19686 // EXEC sp_rename 'old_name', 'new_name'
19687 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19688 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19689 } else {
19690 Ok(e)
19691 }
19692 } else {
19693 Ok(e)
19694 }
19695 }
19696
19697 Action::SnowflakeIntervalFormat => {
19698 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19699 if let Expression::Interval(mut iv) = e {
19700 if let (
19701 Some(Expression::Literal(Literal::String(ref val))),
19702 Some(ref unit_spec),
19703 ) = (&iv.this, &iv.unit)
19704 {
19705 let unit_str = match unit_spec {
19706 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19707 match unit {
19708 crate::expressions::IntervalUnit::Year => "YEAR",
19709 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19710 crate::expressions::IntervalUnit::Month => "MONTH",
19711 crate::expressions::IntervalUnit::Week => "WEEK",
19712 crate::expressions::IntervalUnit::Day => "DAY",
19713 crate::expressions::IntervalUnit::Hour => "HOUR",
19714 crate::expressions::IntervalUnit::Minute => "MINUTE",
19715 crate::expressions::IntervalUnit::Second => "SECOND",
19716 crate::expressions::IntervalUnit::Millisecond => {
19717 "MILLISECOND"
19718 }
19719 crate::expressions::IntervalUnit::Microsecond => {
19720 "MICROSECOND"
19721 }
19722 crate::expressions::IntervalUnit::Nanosecond => {
19723 "NANOSECOND"
19724 }
19725 }
19726 }
19727 _ => "",
19728 };
19729 if !unit_str.is_empty() {
19730 let combined = format!("{} {}", val, unit_str);
19731 iv.this = Some(Expression::Literal(Literal::String(combined)));
19732 iv.unit = None;
19733 }
19734 }
19735 Ok(Expression::Interval(iv))
19736 } else {
19737 Ok(e)
19738 }
19739 }
19740
19741 Action::ArrayConcatBracketConvert => {
19742 // Expression::Array/ArrayFunc -> target-specific
19743 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
19744 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
19745 match e {
19746 Expression::Array(arr) => {
19747 if matches!(target, DialectType::Redshift) {
19748 Ok(Expression::Function(Box::new(Function::new(
19749 "ARRAY".to_string(),
19750 arr.expressions,
19751 ))))
19752 } else {
19753 Ok(Expression::ArrayFunc(Box::new(
19754 crate::expressions::ArrayConstructor {
19755 expressions: arr.expressions,
19756 bracket_notation: false,
19757 use_list_keyword: false,
19758 },
19759 )))
19760 }
19761 }
19762 Expression::ArrayFunc(arr) => {
19763 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
19764 if matches!(target, DialectType::Redshift) {
19765 Ok(Expression::Function(Box::new(Function::new(
19766 "ARRAY".to_string(),
19767 arr.expressions,
19768 ))))
19769 } else {
19770 Ok(Expression::ArrayFunc(arr))
19771 }
19772 }
19773 _ => Ok(e),
19774 }
19775 }
19776
19777 Action::BitAggFloatCast => {
19778 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
19779 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19780 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19781 let int_type = DataType::Int {
19782 length: None,
19783 integer_spelling: false,
19784 };
19785 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
19786 if let Expression::Cast(c) = agg_this {
19787 match &c.to {
19788 DataType::Float { .. }
19789 | DataType::Double { .. }
19790 | DataType::Custom { .. } => {
19791 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19792 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
19793 let inner_type = match &c.to {
19794 DataType::Float {
19795 precision, scale, ..
19796 } => DataType::Float {
19797 precision: *precision,
19798 scale: *scale,
19799 real_spelling: true,
19800 },
19801 other => other.clone(),
19802 };
19803 let inner_cast =
19804 Expression::Cast(Box::new(crate::expressions::Cast {
19805 this: c.this.clone(),
19806 to: inner_type,
19807 trailing_comments: Vec::new(),
19808 double_colon_syntax: false,
19809 format: None,
19810 default: None,
19811 }));
19812 let rounded = Expression::Function(Box::new(Function::new(
19813 "ROUND".to_string(),
19814 vec![inner_cast],
19815 )));
19816 Expression::Cast(Box::new(crate::expressions::Cast {
19817 this: rounded,
19818 to: int_dt,
19819 trailing_comments: Vec::new(),
19820 double_colon_syntax: false,
19821 format: None,
19822 default: None,
19823 }))
19824 }
19825 DataType::Decimal { .. } => {
19826 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19827 Expression::Cast(Box::new(crate::expressions::Cast {
19828 this: Expression::Cast(c),
19829 to: int_dt,
19830 trailing_comments: Vec::new(),
19831 double_colon_syntax: false,
19832 format: None,
19833 default: None,
19834 }))
19835 }
19836 _ => Expression::Cast(c),
19837 }
19838 } else {
19839 agg_this
19840 }
19841 };
19842 match e {
19843 Expression::BitwiseOrAgg(mut f) => {
19844 f.this = wrap_agg(f.this, int_type);
19845 Ok(Expression::BitwiseOrAgg(f))
19846 }
19847 Expression::BitwiseAndAgg(mut f) => {
19848 let int_type = DataType::Int {
19849 length: None,
19850 integer_spelling: false,
19851 };
19852 f.this = wrap_agg(f.this, int_type);
19853 Ok(Expression::BitwiseAndAgg(f))
19854 }
19855 Expression::BitwiseXorAgg(mut f) => {
19856 let int_type = DataType::Int {
19857 length: None,
19858 integer_spelling: false,
19859 };
19860 f.this = wrap_agg(f.this, int_type);
19861 Ok(Expression::BitwiseXorAgg(f))
19862 }
19863 _ => Ok(e),
19864 }
19865 }
19866
19867 Action::BitAggSnowflakeRename => {
19868 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
19869 match e {
19870 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
19871 Function::new("BITORAGG".to_string(), vec![f.this]),
19872 ))),
19873 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
19874 Function::new("BITANDAGG".to_string(), vec![f.this]),
19875 ))),
19876 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
19877 Function::new("BITXORAGG".to_string(), vec![f.this]),
19878 ))),
19879 _ => Ok(e),
19880 }
19881 }
19882
19883 Action::StrftimeCastTimestamp => {
19884 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
19885 if let Expression::Cast(mut c) = e {
19886 if matches!(
19887 c.to,
19888 DataType::Timestamp {
19889 timezone: false,
19890 ..
19891 }
19892 ) {
19893 c.to = DataType::Custom {
19894 name: "TIMESTAMP_NTZ".to_string(),
19895 };
19896 }
19897 Ok(Expression::Cast(c))
19898 } else {
19899 Ok(e)
19900 }
19901 }
19902
19903 Action::DecimalDefaultPrecision => {
19904 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
19905 if let Expression::Cast(mut c) = e {
19906 if matches!(
19907 c.to,
19908 DataType::Decimal {
19909 precision: None,
19910 ..
19911 }
19912 ) {
19913 c.to = DataType::Decimal {
19914 precision: Some(18),
19915 scale: Some(3),
19916 };
19917 }
19918 Ok(Expression::Cast(c))
19919 } else {
19920 Ok(e)
19921 }
19922 }
19923
19924 Action::FilterToIff => {
19925 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
19926 if let Expression::Filter(f) = e {
19927 let condition = *f.expression;
19928 let agg = *f.this;
19929 // Strip WHERE from condition
19930 let cond = match condition {
19931 Expression::Where(w) => w.this,
19932 other => other,
19933 };
19934 // Extract the aggregate function and its argument
19935 // We want AVG(IFF(condition, x, NULL))
19936 match agg {
19937 Expression::Function(mut func) => {
19938 if !func.args.is_empty() {
19939 let orig_arg = func.args[0].clone();
19940 let iff_call = Expression::Function(Box::new(Function::new(
19941 "IFF".to_string(),
19942 vec![cond, orig_arg, Expression::Null(Null)],
19943 )));
19944 func.args[0] = iff_call;
19945 Ok(Expression::Function(func))
19946 } else {
19947 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19948 this: Box::new(Expression::Function(func)),
19949 expression: Box::new(cond),
19950 })))
19951 }
19952 }
19953 Expression::Avg(mut avg) => {
19954 let iff_call = Expression::Function(Box::new(Function::new(
19955 "IFF".to_string(),
19956 vec![cond, avg.this.clone(), Expression::Null(Null)],
19957 )));
19958 avg.this = iff_call;
19959 Ok(Expression::Avg(avg))
19960 }
19961 Expression::Sum(mut s) => {
19962 let iff_call = Expression::Function(Box::new(Function::new(
19963 "IFF".to_string(),
19964 vec![cond, s.this.clone(), Expression::Null(Null)],
19965 )));
19966 s.this = iff_call;
19967 Ok(Expression::Sum(s))
19968 }
19969 Expression::Count(mut c) => {
19970 if let Some(ref this_expr) = c.this {
19971 let iff_call = Expression::Function(Box::new(Function::new(
19972 "IFF".to_string(),
19973 vec![cond, this_expr.clone(), Expression::Null(Null)],
19974 )));
19975 c.this = Some(iff_call);
19976 }
19977 Ok(Expression::Count(c))
19978 }
19979 other => {
19980 // Fallback: keep as Filter
19981 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19982 this: Box::new(other),
19983 expression: Box::new(cond),
19984 })))
19985 }
19986 }
19987 } else {
19988 Ok(e)
19989 }
19990 }
19991
19992 Action::AggFilterToIff => {
19993 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
19994 // Helper macro to handle the common AggFunc case
19995 macro_rules! handle_agg_filter_to_iff {
19996 ($variant:ident, $agg:expr) => {{
19997 let mut agg = $agg;
19998 if let Some(filter_cond) = agg.filter.take() {
19999 let iff_call = Expression::Function(Box::new(Function::new(
20000 "IFF".to_string(),
20001 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
20002 )));
20003 agg.this = iff_call;
20004 }
20005 Ok(Expression::$variant(agg))
20006 }};
20007 }
20008
20009 match e {
20010 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
20011 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
20012 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
20013 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
20014 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
20015 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
20016 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
20017 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
20018 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
20019 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
20020 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
20021 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
20022 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
20023 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
20024 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
20025 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
20026 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
20027 Expression::ApproxDistinct(agg) => {
20028 handle_agg_filter_to_iff!(ApproxDistinct, agg)
20029 }
20030 Expression::Count(mut c) => {
20031 if let Some(filter_cond) = c.filter.take() {
20032 if let Some(ref this_expr) = c.this {
20033 let iff_call = Expression::Function(Box::new(Function::new(
20034 "IFF".to_string(),
20035 vec![
20036 filter_cond,
20037 this_expr.clone(),
20038 Expression::Null(Null),
20039 ],
20040 )));
20041 c.this = Some(iff_call);
20042 }
20043 }
20044 Ok(Expression::Count(c))
20045 }
20046 other => Ok(other),
20047 }
20048 }
20049
20050 Action::JsonToGetPath => {
20051 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
20052 if let Expression::JsonExtract(je) = e {
20053 // Convert to PARSE_JSON() wrapper:
20054 // - JSON(x) -> PARSE_JSON(x)
20055 // - PARSE_JSON(x) -> keep as-is
20056 // - anything else -> wrap in PARSE_JSON()
20057 let this = match &je.this {
20058 Expression::Function(f)
20059 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
20060 {
20061 Expression::Function(Box::new(Function::new(
20062 "PARSE_JSON".to_string(),
20063 f.args.clone(),
20064 )))
20065 }
20066 Expression::Function(f)
20067 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20068 {
20069 je.this.clone()
20070 }
20071 // GET_PATH result is already JSON, don't wrap
20072 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20073 je.this.clone()
20074 }
20075 other => {
20076 // Wrap non-JSON expressions in PARSE_JSON()
20077 Expression::Function(Box::new(Function::new(
20078 "PARSE_JSON".to_string(),
20079 vec![other.clone()],
20080 )))
20081 }
20082 };
20083 // Convert path: extract key from JSONPath or strip $. prefix from string
20084 let path = match &je.path {
20085 Expression::JSONPath(jp) => {
20086 // Extract the key from JSONPath: $root.key -> 'key'
20087 let mut key_parts = Vec::new();
20088 for expr in &jp.expressions {
20089 match expr {
20090 Expression::JSONPathRoot(_) => {} // skip root
20091 Expression::JSONPathKey(k) => {
20092 if let Expression::Literal(Literal::String(s)) =
20093 &*k.this
20094 {
20095 key_parts.push(s.clone());
20096 }
20097 }
20098 _ => {}
20099 }
20100 }
20101 if !key_parts.is_empty() {
20102 Expression::Literal(Literal::String(key_parts.join(".")))
20103 } else {
20104 je.path.clone()
20105 }
20106 }
20107 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20108 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20109 Expression::Literal(Literal::String(stripped))
20110 }
20111 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20112 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20113 Expression::Literal(Literal::String(stripped))
20114 }
20115 _ => je.path.clone(),
20116 };
20117 Ok(Expression::Function(Box::new(Function::new(
20118 "GET_PATH".to_string(),
20119 vec![this, path],
20120 ))))
20121 } else {
20122 Ok(e)
20123 }
20124 }
20125
20126 Action::StructToRow => {
20127 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20128 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20129
20130 // Extract key-value pairs from either Struct or MapFunc
20131 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20132 Expression::Struct(s) => Some(
20133 s.fields
20134 .iter()
20135 .map(|(opt_name, field_expr)| {
20136 if let Some(name) = opt_name {
20137 (name.clone(), field_expr.clone())
20138 } else if let Expression::NamedArgument(na) = field_expr {
20139 (na.name.name.clone(), na.value.clone())
20140 } else {
20141 (String::new(), field_expr.clone())
20142 }
20143 })
20144 .collect(),
20145 ),
20146 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20147 m.keys
20148 .iter()
20149 .zip(m.values.iter())
20150 .map(|(key, value)| {
20151 let key_name = match key {
20152 Expression::Literal(Literal::String(s)) => s.clone(),
20153 Expression::Identifier(id) => id.name.clone(),
20154 _ => String::new(),
20155 };
20156 (key_name, value.clone())
20157 })
20158 .collect(),
20159 ),
20160 _ => None,
20161 };
20162
20163 if let Some(pairs) = kv_pairs {
20164 let mut named_args = Vec::new();
20165 for (key_name, value) in pairs {
20166 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20167 named_args.push(Expression::Alias(Box::new(
20168 crate::expressions::Alias::new(
20169 value,
20170 Identifier::new(key_name),
20171 ),
20172 )));
20173 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20174 named_args.push(value);
20175 } else {
20176 named_args.push(value);
20177 }
20178 }
20179
20180 if matches!(target, DialectType::BigQuery) {
20181 Ok(Expression::Function(Box::new(Function::new(
20182 "STRUCT".to_string(),
20183 named_args,
20184 ))))
20185 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20186 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20187 let row_func = Expression::Function(Box::new(Function::new(
20188 "ROW".to_string(),
20189 named_args,
20190 )));
20191
20192 // Try to infer types for each pair
20193 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20194 Expression::Struct(s) => Some(
20195 s.fields
20196 .iter()
20197 .map(|(opt_name, field_expr)| {
20198 if let Some(name) = opt_name {
20199 (name.clone(), field_expr.clone())
20200 } else if let Expression::NamedArgument(na) = field_expr
20201 {
20202 (na.name.name.clone(), na.value.clone())
20203 } else {
20204 (String::new(), field_expr.clone())
20205 }
20206 })
20207 .collect(),
20208 ),
20209 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20210 m.keys
20211 .iter()
20212 .zip(m.values.iter())
20213 .map(|(key, value)| {
20214 let key_name = match key {
20215 Expression::Literal(Literal::String(s)) => {
20216 s.clone()
20217 }
20218 Expression::Identifier(id) => id.name.clone(),
20219 _ => String::new(),
20220 };
20221 (key_name, value.clone())
20222 })
20223 .collect(),
20224 ),
20225 _ => None,
20226 };
20227
20228 if let Some(pairs) = kv_pairs_again {
20229 // Infer types for all values
20230 let mut all_inferred = true;
20231 let mut fields = Vec::new();
20232 for (name, value) in &pairs {
20233 let inferred_type = match value {
20234 Expression::Literal(Literal::Number(n)) => {
20235 if n.contains('.') {
20236 Some(DataType::Double {
20237 precision: None,
20238 scale: None,
20239 })
20240 } else {
20241 Some(DataType::Int {
20242 length: None,
20243 integer_spelling: true,
20244 })
20245 }
20246 }
20247 Expression::Literal(Literal::String(_)) => {
20248 Some(DataType::VarChar {
20249 length: None,
20250 parenthesized_length: false,
20251 })
20252 }
20253 Expression::Boolean(_) => Some(DataType::Boolean),
20254 _ => None,
20255 };
20256 if let Some(dt) = inferred_type {
20257 fields.push(crate::expressions::StructField::new(
20258 name.clone(),
20259 dt,
20260 ));
20261 } else {
20262 all_inferred = false;
20263 break;
20264 }
20265 }
20266
20267 if all_inferred && !fields.is_empty() {
20268 let row_type = DataType::Struct {
20269 fields,
20270 nested: true,
20271 };
20272 Ok(Expression::Cast(Box::new(Cast {
20273 this: row_func,
20274 to: row_type,
20275 trailing_comments: Vec::new(),
20276 double_colon_syntax: false,
20277 format: None,
20278 default: None,
20279 })))
20280 } else {
20281 Ok(row_func)
20282 }
20283 } else {
20284 Ok(row_func)
20285 }
20286 } else {
20287 Ok(Expression::Function(Box::new(Function::new(
20288 "ROW".to_string(),
20289 named_args,
20290 ))))
20291 }
20292 } else {
20293 Ok(e)
20294 }
20295 }
20296
20297 Action::SparkStructConvert => {
20298 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20299 // or DuckDB {'name': val, ...}
20300 if let Expression::Function(f) = e {
20301 // Extract name-value pairs from aliased args
20302 let mut pairs: Vec<(String, Expression)> = Vec::new();
20303 for arg in &f.args {
20304 match arg {
20305 Expression::Alias(a) => {
20306 pairs.push((a.alias.name.clone(), a.this.clone()));
20307 }
20308 _ => {
20309 pairs.push((String::new(), arg.clone()));
20310 }
20311 }
20312 }
20313
20314 match target {
20315 DialectType::DuckDB => {
20316 // Convert to DuckDB struct literal {'name': value, ...}
20317 let mut keys = Vec::new();
20318 let mut values = Vec::new();
20319 for (name, value) in &pairs {
20320 keys.push(Expression::Literal(Literal::String(name.clone())));
20321 values.push(value.clone());
20322 }
20323 Ok(Expression::MapFunc(Box::new(
20324 crate::expressions::MapConstructor {
20325 keys,
20326 values,
20327 curly_brace_syntax: true,
20328 with_map_keyword: false,
20329 },
20330 )))
20331 }
20332 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20333 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20334 let row_args: Vec<Expression> =
20335 pairs.iter().map(|(_, v)| v.clone()).collect();
20336 let row_func = Expression::Function(Box::new(Function::new(
20337 "ROW".to_string(),
20338 row_args,
20339 )));
20340
20341 // Infer types
20342 let mut all_inferred = true;
20343 let mut fields = Vec::new();
20344 for (name, value) in &pairs {
20345 let inferred_type = match value {
20346 Expression::Literal(Literal::Number(n)) => {
20347 if n.contains('.') {
20348 Some(DataType::Double {
20349 precision: None,
20350 scale: None,
20351 })
20352 } else {
20353 Some(DataType::Int {
20354 length: None,
20355 integer_spelling: true,
20356 })
20357 }
20358 }
20359 Expression::Literal(Literal::String(_)) => {
20360 Some(DataType::VarChar {
20361 length: None,
20362 parenthesized_length: false,
20363 })
20364 }
20365 Expression::Boolean(_) => Some(DataType::Boolean),
20366 _ => None,
20367 };
20368 if let Some(dt) = inferred_type {
20369 fields.push(crate::expressions::StructField::new(
20370 name.clone(),
20371 dt,
20372 ));
20373 } else {
20374 all_inferred = false;
20375 break;
20376 }
20377 }
20378
20379 if all_inferred && !fields.is_empty() {
20380 let row_type = DataType::Struct {
20381 fields,
20382 nested: true,
20383 };
20384 Ok(Expression::Cast(Box::new(Cast {
20385 this: row_func,
20386 to: row_type,
20387 trailing_comments: Vec::new(),
20388 double_colon_syntax: false,
20389 format: None,
20390 default: None,
20391 })))
20392 } else {
20393 Ok(row_func)
20394 }
20395 }
20396 _ => Ok(Expression::Function(f)),
20397 }
20398 } else {
20399 Ok(e)
20400 }
20401 }
20402
20403 Action::ApproxCountDistinctToApproxDistinct => {
20404 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20405 if let Expression::ApproxCountDistinct(f) = e {
20406 Ok(Expression::ApproxDistinct(f))
20407 } else {
20408 Ok(e)
20409 }
20410 }
20411
20412 Action::CollectListToArrayAgg => {
20413 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20414 if let Expression::AggregateFunction(f) = e {
20415 let filter_expr = if !f.args.is_empty() {
20416 let arg = f.args[0].clone();
20417 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20418 this: arg,
20419 not: true,
20420 postfix_form: false,
20421 })))
20422 } else {
20423 None
20424 };
20425 let agg = crate::expressions::AggFunc {
20426 this: if f.args.is_empty() {
20427 Expression::Null(crate::expressions::Null)
20428 } else {
20429 f.args[0].clone()
20430 },
20431 distinct: f.distinct,
20432 order_by: f.order_by.clone(),
20433 filter: filter_expr,
20434 ignore_nulls: None,
20435 name: None,
20436 having_max: None,
20437 limit: None,
20438 };
20439 Ok(Expression::ArrayAgg(Box::new(agg)))
20440 } else {
20441 Ok(e)
20442 }
20443 }
20444
20445 Action::CollectSetConvert => {
20446 // COLLECT_SET(x) -> target-specific
20447 if let Expression::AggregateFunction(f) = e {
20448 match target {
20449 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20450 crate::expressions::AggregateFunction {
20451 name: "SET_AGG".to_string(),
20452 args: f.args,
20453 distinct: false,
20454 order_by: f.order_by,
20455 filter: f.filter,
20456 limit: f.limit,
20457 ignore_nulls: f.ignore_nulls,
20458 },
20459 ))),
20460 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20461 crate::expressions::AggregateFunction {
20462 name: "ARRAY_UNIQUE_AGG".to_string(),
20463 args: f.args,
20464 distinct: false,
20465 order_by: f.order_by,
20466 filter: f.filter,
20467 limit: f.limit,
20468 ignore_nulls: f.ignore_nulls,
20469 },
20470 ))),
20471 DialectType::Trino | DialectType::DuckDB => {
20472 let agg = crate::expressions::AggFunc {
20473 this: if f.args.is_empty() {
20474 Expression::Null(crate::expressions::Null)
20475 } else {
20476 f.args[0].clone()
20477 },
20478 distinct: true,
20479 order_by: Vec::new(),
20480 filter: None,
20481 ignore_nulls: None,
20482 name: None,
20483 having_max: None,
20484 limit: None,
20485 };
20486 Ok(Expression::ArrayAgg(Box::new(agg)))
20487 }
20488 _ => Ok(Expression::AggregateFunction(f)),
20489 }
20490 } else {
20491 Ok(e)
20492 }
20493 }
20494
20495 Action::PercentileConvert => {
20496 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20497 if let Expression::AggregateFunction(f) = e {
20498 let name = match target {
20499 DialectType::DuckDB => "QUANTILE",
20500 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20501 _ => "PERCENTILE",
20502 };
20503 Ok(Expression::AggregateFunction(Box::new(
20504 crate::expressions::AggregateFunction {
20505 name: name.to_string(),
20506 args: f.args,
20507 distinct: f.distinct,
20508 order_by: f.order_by,
20509 filter: f.filter,
20510 limit: f.limit,
20511 ignore_nulls: f.ignore_nulls,
20512 },
20513 )))
20514 } else {
20515 Ok(e)
20516 }
20517 }
20518
20519 Action::CorrIsnanWrap => {
20520 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20521 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20522 let corr_clone = e.clone();
20523 let isnan = Expression::Function(Box::new(Function::new(
20524 "ISNAN".to_string(),
20525 vec![corr_clone.clone()],
20526 )));
20527 let case_expr = Expression::Case(Box::new(Case {
20528 operand: None,
20529 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20530 else_: Some(corr_clone),
20531 comments: Vec::new(),
20532 }));
20533 Ok(case_expr)
20534 }
20535
20536 Action::TruncToDateTrunc => {
20537 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20538 if let Expression::Function(f) = e {
20539 if f.args.len() == 2 {
20540 let timestamp = f.args[0].clone();
20541 let unit_expr = f.args[1].clone();
20542
20543 if matches!(target, DialectType::ClickHouse) {
20544 // For ClickHouse, produce Expression::DateTrunc which the generator
20545 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20546 // target transform that would convert it to dateTrunc
20547 let unit_str = Self::get_unit_str_static(&unit_expr);
20548 let dt_field = match unit_str.as_str() {
20549 "YEAR" => DateTimeField::Year,
20550 "MONTH" => DateTimeField::Month,
20551 "DAY" => DateTimeField::Day,
20552 "HOUR" => DateTimeField::Hour,
20553 "MINUTE" => DateTimeField::Minute,
20554 "SECOND" => DateTimeField::Second,
20555 "WEEK" => DateTimeField::Week,
20556 "QUARTER" => DateTimeField::Quarter,
20557 _ => DateTimeField::Custom(unit_str),
20558 };
20559 Ok(Expression::DateTrunc(Box::new(
20560 crate::expressions::DateTruncFunc {
20561 this: timestamp,
20562 unit: dt_field,
20563 },
20564 )))
20565 } else {
20566 let new_args = vec![unit_expr, timestamp];
20567 Ok(Expression::Function(Box::new(Function::new(
20568 "DATE_TRUNC".to_string(),
20569 new_args,
20570 ))))
20571 }
20572 } else {
20573 Ok(Expression::Function(f))
20574 }
20575 } else {
20576 Ok(e)
20577 }
20578 }
20579
20580 Action::ArrayContainsConvert => {
20581 if let Expression::ArrayContains(f) = e {
20582 match target {
20583 DialectType::Presto | DialectType::Trino => {
20584 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20585 Ok(Expression::Function(Box::new(Function::new(
20586 "CONTAINS".to_string(),
20587 vec![f.this, f.expression],
20588 ))))
20589 }
20590 DialectType::Snowflake => {
20591 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20592 let cast_val =
20593 Expression::Cast(Box::new(crate::expressions::Cast {
20594 this: f.expression,
20595 to: crate::expressions::DataType::Custom {
20596 name: "VARIANT".to_string(),
20597 },
20598 trailing_comments: Vec::new(),
20599 double_colon_syntax: false,
20600 format: None,
20601 default: None,
20602 }));
20603 Ok(Expression::Function(Box::new(Function::new(
20604 "ARRAY_CONTAINS".to_string(),
20605 vec![cast_val, f.this],
20606 ))))
20607 }
20608 _ => Ok(Expression::ArrayContains(f)),
20609 }
20610 } else {
20611 Ok(e)
20612 }
20613 }
20614
20615 Action::StrPositionExpand => {
20616 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20617 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20618 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20619 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20620 if let Expression::StrPosition(sp) = e {
20621 let crate::expressions::StrPosition {
20622 this,
20623 substr,
20624 position,
20625 occurrence,
20626 } = *sp;
20627 let string = *this;
20628 let substr_expr = match substr {
20629 Some(s) => *s,
20630 None => Expression::Null(Null),
20631 };
20632 let pos = match position {
20633 Some(p) => *p,
20634 None => Expression::number(1),
20635 };
20636
20637 // SUBSTRING(string, pos)
20638 let substring_call = Expression::Function(Box::new(Function::new(
20639 "SUBSTRING".to_string(),
20640 vec![string.clone(), pos.clone()],
20641 )));
20642 // STRPOS(SUBSTRING(string, pos), substr)
20643 let strpos_call = Expression::Function(Box::new(Function::new(
20644 "STRPOS".to_string(),
20645 vec![substring_call, substr_expr.clone()],
20646 )));
20647 // STRPOS(...) + pos - 1
20648 let pos_adjusted =
20649 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20650 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20651 strpos_call.clone(),
20652 pos.clone(),
20653 ))),
20654 Expression::number(1),
20655 )));
20656 // STRPOS(...) = 0
20657 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20658 strpos_call.clone(),
20659 Expression::number(0),
20660 )));
20661
20662 match target {
20663 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20664 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20665 Ok(Expression::Function(Box::new(Function::new(
20666 "IF".to_string(),
20667 vec![is_zero, Expression::number(0), pos_adjusted],
20668 ))))
20669 }
20670 DialectType::DuckDB => {
20671 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20672 Ok(Expression::Case(Box::new(Case {
20673 operand: None,
20674 whens: vec![(is_zero, Expression::number(0))],
20675 else_: Some(pos_adjusted),
20676 comments: Vec::new(),
20677 })))
20678 }
20679 _ => {
20680 // Reconstruct StrPosition
20681 Ok(Expression::StrPosition(Box::new(
20682 crate::expressions::StrPosition {
20683 this: Box::new(string),
20684 substr: Some(Box::new(substr_expr)),
20685 position: Some(Box::new(pos)),
20686 occurrence,
20687 },
20688 )))
20689 }
20690 }
20691 } else {
20692 Ok(e)
20693 }
20694 }
20695
20696 Action::MonthsBetweenConvert => {
20697 if let Expression::MonthsBetween(mb) = e {
20698 let crate::expressions::BinaryFunc {
20699 this: end_date,
20700 expression: start_date,
20701 ..
20702 } = *mb;
20703 match target {
20704 DialectType::DuckDB => {
20705 let cast_end = Self::ensure_cast_date(end_date);
20706 let cast_start = Self::ensure_cast_date(start_date);
20707 let dd = Expression::Function(Box::new(Function::new(
20708 "DATE_DIFF".to_string(),
20709 vec![
20710 Expression::string("MONTH"),
20711 cast_start.clone(),
20712 cast_end.clone(),
20713 ],
20714 )));
20715 let day_end = Expression::Function(Box::new(Function::new(
20716 "DAY".to_string(),
20717 vec![cast_end.clone()],
20718 )));
20719 let day_start = Expression::Function(Box::new(Function::new(
20720 "DAY".to_string(),
20721 vec![cast_start.clone()],
20722 )));
20723 let last_day_end = Expression::Function(Box::new(Function::new(
20724 "LAST_DAY".to_string(),
20725 vec![cast_end.clone()],
20726 )));
20727 let last_day_start = Expression::Function(Box::new(Function::new(
20728 "LAST_DAY".to_string(),
20729 vec![cast_start.clone()],
20730 )));
20731 let day_last_end = Expression::Function(Box::new(Function::new(
20732 "DAY".to_string(),
20733 vec![last_day_end],
20734 )));
20735 let day_last_start = Expression::Function(Box::new(Function::new(
20736 "DAY".to_string(),
20737 vec![last_day_start],
20738 )));
20739 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20740 day_end.clone(),
20741 day_last_end,
20742 )));
20743 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20744 day_start.clone(),
20745 day_last_start,
20746 )));
20747 let both_cond =
20748 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20749 let day_diff =
20750 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
20751 let day_diff_paren =
20752 Expression::Paren(Box::new(crate::expressions::Paren {
20753 this: day_diff,
20754 trailing_comments: Vec::new(),
20755 }));
20756 let frac = Expression::Div(Box::new(BinaryOp::new(
20757 day_diff_paren,
20758 Expression::Literal(Literal::Number("31.0".to_string())),
20759 )));
20760 let case_expr = Expression::Case(Box::new(Case {
20761 operand: None,
20762 whens: vec![(both_cond, Expression::number(0))],
20763 else_: Some(frac),
20764 comments: Vec::new(),
20765 }));
20766 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20767 }
20768 DialectType::Snowflake | DialectType::Redshift => {
20769 let unit = Expression::Identifier(Identifier::new("MONTH"));
20770 Ok(Expression::Function(Box::new(Function::new(
20771 "DATEDIFF".to_string(),
20772 vec![unit, start_date, end_date],
20773 ))))
20774 }
20775 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20776 Ok(Expression::Function(Box::new(Function::new(
20777 "DATE_DIFF".to_string(),
20778 vec![Expression::string("MONTH"), start_date, end_date],
20779 ))))
20780 }
20781 _ => Ok(Expression::MonthsBetween(Box::new(
20782 crate::expressions::BinaryFunc {
20783 this: end_date,
20784 expression: start_date,
20785 original_name: None,
20786 },
20787 ))),
20788 }
20789 } else {
20790 Ok(e)
20791 }
20792 }
20793
20794 Action::AddMonthsConvert => {
20795 if let Expression::AddMonths(am) = e {
20796 let date = am.this;
20797 let val = am.expression;
20798 match target {
20799 DialectType::TSQL | DialectType::Fabric => {
20800 let cast_date = Self::ensure_cast_datetime2(date);
20801 Ok(Expression::Function(Box::new(Function::new(
20802 "DATEADD".to_string(),
20803 vec![
20804 Expression::Identifier(Identifier::new("MONTH")),
20805 val,
20806 cast_date,
20807 ],
20808 ))))
20809 }
20810 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
20811 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20812 // Optionally wrapped in CAST(... AS type) if the input had a specific type
20813
20814 // Determine the cast type from the date expression
20815 let (cast_date, return_type) = match &date {
20816 Expression::Literal(Literal::String(_)) => {
20817 // String literal: CAST(str AS TIMESTAMP), no outer CAST
20818 (
20819 Expression::Cast(Box::new(Cast {
20820 this: date.clone(),
20821 to: DataType::Timestamp {
20822 precision: None,
20823 timezone: false,
20824 },
20825 trailing_comments: Vec::new(),
20826 double_colon_syntax: false,
20827 format: None,
20828 default: None,
20829 })),
20830 None,
20831 )
20832 }
20833 Expression::Cast(c) => {
20834 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
20835 (date.clone(), Some(c.to.clone()))
20836 }
20837 _ => {
20838 // Expression or NULL::TYPE - keep as-is, check for cast type
20839 if let Expression::Cast(c) = &date {
20840 (date.clone(), Some(c.to.clone()))
20841 } else {
20842 (date.clone(), None)
20843 }
20844 }
20845 };
20846
20847 // Build the interval expression
20848 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
20849 // For integer values, use INTERVAL val MONTH
20850 let is_non_integer_val = match &val {
20851 Expression::Literal(Literal::Number(n)) => n.contains('.'),
20852 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
20853 Expression::Neg(n) => {
20854 if let Expression::Literal(Literal::Number(s)) = &n.this {
20855 s.contains('.')
20856 } else {
20857 false
20858 }
20859 }
20860 _ => false,
20861 };
20862
20863 let add_interval = if is_non_integer_val {
20864 // TO_MONTHS(CAST(ROUND(val) AS INT))
20865 let round_val = Expression::Function(Box::new(Function::new(
20866 "ROUND".to_string(),
20867 vec![val.clone()],
20868 )));
20869 let cast_int = Expression::Cast(Box::new(Cast {
20870 this: round_val,
20871 to: DataType::Int {
20872 length: None,
20873 integer_spelling: false,
20874 },
20875 trailing_comments: Vec::new(),
20876 double_colon_syntax: false,
20877 format: None,
20878 default: None,
20879 }));
20880 Expression::Function(Box::new(Function::new(
20881 "TO_MONTHS".to_string(),
20882 vec![cast_int],
20883 )))
20884 } else {
20885 // INTERVAL val MONTH
20886 // For negative numbers, wrap in parens
20887 let interval_val = match &val {
20888 Expression::Literal(Literal::Number(n))
20889 if n.starts_with('-') =>
20890 {
20891 Expression::Paren(Box::new(Paren {
20892 this: val.clone(),
20893 trailing_comments: Vec::new(),
20894 }))
20895 }
20896 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
20897 this: val.clone(),
20898 trailing_comments: Vec::new(),
20899 })),
20900 Expression::Null(_) => Expression::Paren(Box::new(Paren {
20901 this: val.clone(),
20902 trailing_comments: Vec::new(),
20903 })),
20904 _ => val.clone(),
20905 };
20906 Expression::Interval(Box::new(crate::expressions::Interval {
20907 this: Some(interval_val),
20908 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20909 unit: crate::expressions::IntervalUnit::Month,
20910 use_plural: false,
20911 }),
20912 }))
20913 };
20914
20915 // Build: date + interval
20916 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
20917 cast_date.clone(),
20918 add_interval.clone(),
20919 )));
20920
20921 // Build LAST_DAY(date)
20922 let last_day_date = Expression::Function(Box::new(Function::new(
20923 "LAST_DAY".to_string(),
20924 vec![cast_date.clone()],
20925 )));
20926
20927 // Build LAST_DAY(date + interval)
20928 let last_day_date_plus =
20929 Expression::Function(Box::new(Function::new(
20930 "LAST_DAY".to_string(),
20931 vec![date_plus_interval.clone()],
20932 )));
20933
20934 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20935 let case_expr = Expression::Case(Box::new(Case {
20936 operand: None,
20937 whens: vec![(
20938 Expression::Eq(Box::new(BinaryOp::new(
20939 last_day_date,
20940 cast_date.clone(),
20941 ))),
20942 last_day_date_plus,
20943 )],
20944 else_: Some(date_plus_interval),
20945 comments: Vec::new(),
20946 }));
20947
20948 // Wrap in CAST(... AS type) if needed
20949 if let Some(dt) = return_type {
20950 Ok(Expression::Cast(Box::new(Cast {
20951 this: case_expr,
20952 to: dt,
20953 trailing_comments: Vec::new(),
20954 double_colon_syntax: false,
20955 format: None,
20956 default: None,
20957 })))
20958 } else {
20959 Ok(case_expr)
20960 }
20961 }
20962 DialectType::DuckDB => {
20963 // Non-Snowflake source: simple date + INTERVAL
20964 let cast_date =
20965 if matches!(&date, Expression::Literal(Literal::String(_))) {
20966 Expression::Cast(Box::new(Cast {
20967 this: date,
20968 to: DataType::Timestamp {
20969 precision: None,
20970 timezone: false,
20971 },
20972 trailing_comments: Vec::new(),
20973 double_colon_syntax: false,
20974 format: None,
20975 default: None,
20976 }))
20977 } else {
20978 date
20979 };
20980 let interval =
20981 Expression::Interval(Box::new(crate::expressions::Interval {
20982 this: Some(val),
20983 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20984 unit: crate::expressions::IntervalUnit::Month,
20985 use_plural: false,
20986 }),
20987 }));
20988 Ok(Expression::Add(Box::new(BinaryOp::new(
20989 cast_date, interval,
20990 ))))
20991 }
20992 DialectType::Snowflake => {
20993 // Keep ADD_MONTHS when source is also Snowflake
20994 if matches!(source, DialectType::Snowflake) {
20995 Ok(Expression::Function(Box::new(Function::new(
20996 "ADD_MONTHS".to_string(),
20997 vec![date, val],
20998 ))))
20999 } else {
21000 Ok(Expression::Function(Box::new(Function::new(
21001 "DATEADD".to_string(),
21002 vec![
21003 Expression::Identifier(Identifier::new("MONTH")),
21004 val,
21005 date,
21006 ],
21007 ))))
21008 }
21009 }
21010 DialectType::Redshift => {
21011 Ok(Expression::Function(Box::new(Function::new(
21012 "DATEADD".to_string(),
21013 vec![
21014 Expression::Identifier(Identifier::new("MONTH")),
21015 val,
21016 date,
21017 ],
21018 ))))
21019 }
21020 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21021 let cast_date =
21022 if matches!(&date, Expression::Literal(Literal::String(_))) {
21023 Expression::Cast(Box::new(Cast {
21024 this: date,
21025 to: DataType::Timestamp {
21026 precision: None,
21027 timezone: false,
21028 },
21029 trailing_comments: Vec::new(),
21030 double_colon_syntax: false,
21031 format: None,
21032 default: None,
21033 }))
21034 } else {
21035 date
21036 };
21037 Ok(Expression::Function(Box::new(Function::new(
21038 "DATE_ADD".to_string(),
21039 vec![Expression::string("MONTH"), val, cast_date],
21040 ))))
21041 }
21042 DialectType::BigQuery => {
21043 let interval =
21044 Expression::Interval(Box::new(crate::expressions::Interval {
21045 this: Some(val),
21046 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21047 unit: crate::expressions::IntervalUnit::Month,
21048 use_plural: false,
21049 }),
21050 }));
21051 let cast_date =
21052 if matches!(&date, Expression::Literal(Literal::String(_))) {
21053 Expression::Cast(Box::new(Cast {
21054 this: date,
21055 to: DataType::Custom {
21056 name: "DATETIME".to_string(),
21057 },
21058 trailing_comments: Vec::new(),
21059 double_colon_syntax: false,
21060 format: None,
21061 default: None,
21062 }))
21063 } else {
21064 date
21065 };
21066 Ok(Expression::Function(Box::new(Function::new(
21067 "DATE_ADD".to_string(),
21068 vec![cast_date, interval],
21069 ))))
21070 }
21071 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21072 Ok(Expression::Function(Box::new(Function::new(
21073 "ADD_MONTHS".to_string(),
21074 vec![date, val],
21075 ))))
21076 }
21077 _ => {
21078 // Default: keep as AddMonths expression
21079 Ok(Expression::AddMonths(Box::new(
21080 crate::expressions::BinaryFunc {
21081 this: date,
21082 expression: val,
21083 original_name: None,
21084 },
21085 )))
21086 }
21087 }
21088 } else {
21089 Ok(e)
21090 }
21091 }
21092
21093 Action::PercentileContConvert => {
21094 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21095 // Presto/Trino: APPROX_PERCENTILE(col, p)
21096 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21097 if let Expression::WithinGroup(wg) = e {
21098 // Extract percentile value and order by column
21099 let (percentile, _is_disc) = match &wg.this {
21100 Expression::Function(f) => {
21101 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21102 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21103 Literal::Number("0.5".to_string()),
21104 ));
21105 (pct, is_disc)
21106 }
21107 Expression::AggregateFunction(af) => {
21108 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21109 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21110 Literal::Number("0.5".to_string()),
21111 ));
21112 (pct, is_disc)
21113 }
21114 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21115 _ => return Ok(Expression::WithinGroup(wg)),
21116 };
21117 let col = wg
21118 .order_by
21119 .first()
21120 .map(|o| o.this.clone())
21121 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21122
21123 let func_name = match target {
21124 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21125 "APPROX_PERCENTILE"
21126 }
21127 _ => "PERCENTILE_APPROX", // Spark, Databricks
21128 };
21129 Ok(Expression::Function(Box::new(Function::new(
21130 func_name.to_string(),
21131 vec![col, percentile],
21132 ))))
21133 } else {
21134 Ok(e)
21135 }
21136 }
21137
21138 Action::CurrentUserSparkParens => {
21139 // CURRENT_USER -> CURRENT_USER() for Spark
21140 if let Expression::CurrentUser(_) = e {
21141 Ok(Expression::Function(Box::new(Function::new(
21142 "CURRENT_USER".to_string(),
21143 vec![],
21144 ))))
21145 } else {
21146 Ok(e)
21147 }
21148 }
21149
21150 Action::SparkDateFuncCast => {
21151 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21152 let cast_arg = |arg: Expression| -> Expression {
21153 match target {
21154 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21155 Self::double_cast_timestamp_date(arg)
21156 }
21157 _ => {
21158 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21159 Self::ensure_cast_date(arg)
21160 }
21161 }
21162 };
21163 match e {
21164 Expression::Month(f) => Ok(Expression::Month(Box::new(
21165 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21166 ))),
21167 Expression::Year(f) => Ok(Expression::Year(Box::new(
21168 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21169 ))),
21170 Expression::Day(f) => Ok(Expression::Day(Box::new(
21171 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21172 ))),
21173 other => Ok(other),
21174 }
21175 }
21176
21177 Action::MapFromArraysConvert => {
21178 // Expression::MapFromArrays -> target-specific
21179 if let Expression::MapFromArrays(mfa) = e {
21180 let keys = mfa.this;
21181 let values = mfa.expression;
21182 match target {
21183 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21184 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21185 ))),
21186 _ => {
21187 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21188 Ok(Expression::Function(Box::new(Function::new(
21189 "MAP".to_string(),
21190 vec![keys, values],
21191 ))))
21192 }
21193 }
21194 } else {
21195 Ok(e)
21196 }
21197 }
21198
21199 Action::AnyToExists => {
21200 if let Expression::Any(q) = e {
21201 if let Some(op) = q.op.clone() {
21202 let lambda_param = crate::expressions::Identifier::new("x");
21203 let rhs = Expression::Identifier(lambda_param.clone());
21204 let body = match op {
21205 crate::expressions::QuantifiedOp::Eq => {
21206 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21207 }
21208 crate::expressions::QuantifiedOp::Neq => {
21209 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21210 }
21211 crate::expressions::QuantifiedOp::Lt => {
21212 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21213 }
21214 crate::expressions::QuantifiedOp::Lte => {
21215 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21216 }
21217 crate::expressions::QuantifiedOp::Gt => {
21218 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21219 }
21220 crate::expressions::QuantifiedOp::Gte => {
21221 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21222 }
21223 };
21224 let lambda =
21225 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21226 parameters: vec![lambda_param],
21227 body,
21228 colon: false,
21229 parameter_types: Vec::new(),
21230 }));
21231 Ok(Expression::Function(Box::new(Function::new(
21232 "EXISTS".to_string(),
21233 vec![q.subquery, lambda],
21234 ))))
21235 } else {
21236 Ok(Expression::Any(q))
21237 }
21238 } else {
21239 Ok(e)
21240 }
21241 }
21242
21243 Action::GenerateSeriesConvert => {
21244 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21245 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21246 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21247 if let Expression::Function(f) = e {
21248 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21249 let start = f.args[0].clone();
21250 let end = f.args[1].clone();
21251 let step = f.args.get(2).cloned();
21252
21253 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21254 let step = step.map(|s| Self::normalize_interval_string(s, target));
21255
21256 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21257 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21258 if matches!(
21259 target,
21260 DialectType::Presto
21261 | DialectType::Trino
21262 | DialectType::Athena
21263 | DialectType::Spark
21264 | DialectType::Databricks
21265 | DialectType::Hive
21266 ) {
21267 match &arg {
21268 Expression::CurrentTimestamp(_) => {
21269 Expression::Cast(Box::new(Cast {
21270 this: arg,
21271 to: DataType::Timestamp {
21272 precision: None,
21273 timezone: false,
21274 },
21275 trailing_comments: Vec::new(),
21276 double_colon_syntax: false,
21277 format: None,
21278 default: None,
21279 }))
21280 }
21281 _ => arg,
21282 }
21283 } else {
21284 arg
21285 }
21286 };
21287
21288 let start = maybe_cast_timestamp(start);
21289 let end = maybe_cast_timestamp(end);
21290
21291 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21292 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21293 let mut gs_args = vec![start, end];
21294 if let Some(step) = step {
21295 gs_args.push(step);
21296 }
21297 return Ok(Expression::Function(Box::new(Function::new(
21298 "GENERATE_SERIES".to_string(),
21299 gs_args,
21300 ))));
21301 }
21302
21303 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21304 if matches!(target, DialectType::DuckDB) {
21305 let mut gs_args = vec![start, end];
21306 if let Some(step) = step {
21307 gs_args.push(step);
21308 }
21309 let gs = Expression::Function(Box::new(Function::new(
21310 "GENERATE_SERIES".to_string(),
21311 gs_args,
21312 )));
21313 return Ok(Expression::Function(Box::new(Function::new(
21314 "UNNEST".to_string(),
21315 vec![gs],
21316 ))));
21317 }
21318
21319 let mut seq_args = vec![start, end];
21320 if let Some(step) = step {
21321 seq_args.push(step);
21322 }
21323
21324 let seq = Expression::Function(Box::new(Function::new(
21325 "SEQUENCE".to_string(),
21326 seq_args,
21327 )));
21328
21329 match target {
21330 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21331 // Wrap in UNNEST
21332 Ok(Expression::Function(Box::new(Function::new(
21333 "UNNEST".to_string(),
21334 vec![seq],
21335 ))))
21336 }
21337 DialectType::Spark
21338 | DialectType::Databricks
21339 | DialectType::Hive => {
21340 // Wrap in EXPLODE
21341 Ok(Expression::Function(Box::new(Function::new(
21342 "EXPLODE".to_string(),
21343 vec![seq],
21344 ))))
21345 }
21346 _ => {
21347 // Just SEQUENCE for others
21348 Ok(seq)
21349 }
21350 }
21351 } else {
21352 Ok(Expression::Function(f))
21353 }
21354 } else {
21355 Ok(e)
21356 }
21357 }
21358
21359 Action::ConcatCoalesceWrap => {
21360 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21361 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21362 if let Expression::Function(f) = e {
21363 if f.name.eq_ignore_ascii_case("CONCAT") {
21364 let new_args: Vec<Expression> = f
21365 .args
21366 .into_iter()
21367 .map(|arg| {
21368 let cast_arg = if matches!(
21369 target,
21370 DialectType::Presto
21371 | DialectType::Trino
21372 | DialectType::Athena
21373 ) {
21374 Expression::Cast(Box::new(Cast {
21375 this: arg,
21376 to: DataType::VarChar {
21377 length: None,
21378 parenthesized_length: false,
21379 },
21380 trailing_comments: Vec::new(),
21381 double_colon_syntax: false,
21382 format: None,
21383 default: None,
21384 }))
21385 } else {
21386 arg
21387 };
21388 Expression::Function(Box::new(Function::new(
21389 "COALESCE".to_string(),
21390 vec![cast_arg, Expression::string("")],
21391 )))
21392 })
21393 .collect();
21394 Ok(Expression::Function(Box::new(Function::new(
21395 "CONCAT".to_string(),
21396 new_args,
21397 ))))
21398 } else {
21399 Ok(Expression::Function(f))
21400 }
21401 } else {
21402 Ok(e)
21403 }
21404 }
21405
21406 Action::PipeConcatToConcat => {
21407 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21408 if let Expression::Concat(op) = e {
21409 let cast_left = Expression::Cast(Box::new(Cast {
21410 this: op.left,
21411 to: DataType::VarChar {
21412 length: None,
21413 parenthesized_length: false,
21414 },
21415 trailing_comments: Vec::new(),
21416 double_colon_syntax: false,
21417 format: None,
21418 default: None,
21419 }));
21420 let cast_right = Expression::Cast(Box::new(Cast {
21421 this: op.right,
21422 to: DataType::VarChar {
21423 length: None,
21424 parenthesized_length: false,
21425 },
21426 trailing_comments: Vec::new(),
21427 double_colon_syntax: false,
21428 format: None,
21429 default: None,
21430 }));
21431 Ok(Expression::Function(Box::new(Function::new(
21432 "CONCAT".to_string(),
21433 vec![cast_left, cast_right],
21434 ))))
21435 } else {
21436 Ok(e)
21437 }
21438 }
21439
21440 Action::DivFuncConvert => {
21441 // DIV(a, b) -> target-specific integer division
21442 if let Expression::Function(f) = e {
21443 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21444 let a = f.args[0].clone();
21445 let b = f.args[1].clone();
21446 match target {
21447 DialectType::DuckDB => {
21448 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21449 let int_div = Expression::IntDiv(Box::new(
21450 crate::expressions::BinaryFunc {
21451 this: a,
21452 expression: b,
21453 original_name: None,
21454 },
21455 ));
21456 Ok(Expression::Cast(Box::new(Cast {
21457 this: int_div,
21458 to: DataType::Decimal {
21459 precision: None,
21460 scale: None,
21461 },
21462 trailing_comments: Vec::new(),
21463 double_colon_syntax: false,
21464 format: None,
21465 default: None,
21466 })))
21467 }
21468 DialectType::BigQuery => {
21469 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21470 let div_func = Expression::Function(Box::new(Function::new(
21471 "DIV".to_string(),
21472 vec![a, b],
21473 )));
21474 Ok(Expression::Cast(Box::new(Cast {
21475 this: div_func,
21476 to: DataType::Custom {
21477 name: "NUMERIC".to_string(),
21478 },
21479 trailing_comments: Vec::new(),
21480 double_colon_syntax: false,
21481 format: None,
21482 default: None,
21483 })))
21484 }
21485 DialectType::SQLite => {
21486 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21487 let cast_a = Expression::Cast(Box::new(Cast {
21488 this: a,
21489 to: DataType::Custom {
21490 name: "REAL".to_string(),
21491 },
21492 trailing_comments: Vec::new(),
21493 double_colon_syntax: false,
21494 format: None,
21495 default: None,
21496 }));
21497 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21498 let cast_int = Expression::Cast(Box::new(Cast {
21499 this: div,
21500 to: DataType::Int {
21501 length: None,
21502 integer_spelling: true,
21503 },
21504 trailing_comments: Vec::new(),
21505 double_colon_syntax: false,
21506 format: None,
21507 default: None,
21508 }));
21509 Ok(Expression::Cast(Box::new(Cast {
21510 this: cast_int,
21511 to: DataType::Custom {
21512 name: "REAL".to_string(),
21513 },
21514 trailing_comments: Vec::new(),
21515 double_colon_syntax: false,
21516 format: None,
21517 default: None,
21518 })))
21519 }
21520 _ => Ok(Expression::Function(f)),
21521 }
21522 } else {
21523 Ok(Expression::Function(f))
21524 }
21525 } else {
21526 Ok(e)
21527 }
21528 }
21529
21530 Action::JsonObjectAggConvert => {
21531 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21532 match e {
21533 Expression::Function(f) => Ok(Expression::Function(Box::new(
21534 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21535 ))),
21536 Expression::AggregateFunction(af) => {
21537 // AggregateFunction stores all args in the `args` vec
21538 Ok(Expression::Function(Box::new(Function::new(
21539 "JSON_GROUP_OBJECT".to_string(),
21540 af.args,
21541 ))))
21542 }
21543 other => Ok(other),
21544 }
21545 }
21546
21547 Action::JsonbExistsConvert => {
21548 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21549 if let Expression::Function(f) = e {
21550 if f.args.len() == 2 {
21551 let json_expr = f.args[0].clone();
21552 let key = match &f.args[1] {
21553 Expression::Literal(crate::expressions::Literal::String(s)) => {
21554 format!("$.{}", s)
21555 }
21556 _ => return Ok(Expression::Function(f)),
21557 };
21558 Ok(Expression::Function(Box::new(Function::new(
21559 "JSON_EXISTS".to_string(),
21560 vec![json_expr, Expression::string(&key)],
21561 ))))
21562 } else {
21563 Ok(Expression::Function(f))
21564 }
21565 } else {
21566 Ok(e)
21567 }
21568 }
21569
21570 Action::DateBinConvert => {
21571 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21572 if let Expression::Function(f) = e {
21573 Ok(Expression::Function(Box::new(Function::new(
21574 "TIME_BUCKET".to_string(),
21575 f.args,
21576 ))))
21577 } else {
21578 Ok(e)
21579 }
21580 }
21581
21582 Action::MysqlCastCharToText => {
21583 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21584 if let Expression::Cast(mut c) = e {
21585 c.to = DataType::Text;
21586 Ok(Expression::Cast(c))
21587 } else {
21588 Ok(e)
21589 }
21590 }
21591
21592 Action::SparkCastVarcharToString => {
21593 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21594 match e {
21595 Expression::Cast(mut c) => {
21596 c.to = Self::normalize_varchar_to_string(c.to);
21597 Ok(Expression::Cast(c))
21598 }
21599 Expression::TryCast(mut c) => {
21600 c.to = Self::normalize_varchar_to_string(c.to);
21601 Ok(Expression::TryCast(c))
21602 }
21603 _ => Ok(e),
21604 }
21605 }
21606
21607 Action::MinMaxToLeastGreatest => {
21608 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21609 if let Expression::Function(f) = e {
21610 let name = f.name.to_uppercase();
21611 let new_name = match name.as_str() {
21612 "MIN" => "LEAST",
21613 "MAX" => "GREATEST",
21614 _ => return Ok(Expression::Function(f)),
21615 };
21616 Ok(Expression::Function(Box::new(Function::new(
21617 new_name.to_string(),
21618 f.args,
21619 ))))
21620 } else {
21621 Ok(e)
21622 }
21623 }
21624
21625 Action::ClickHouseUniqToApproxCountDistinct => {
21626 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21627 if let Expression::Function(f) = e {
21628 Ok(Expression::Function(Box::new(Function::new(
21629 "APPROX_COUNT_DISTINCT".to_string(),
21630 f.args,
21631 ))))
21632 } else {
21633 Ok(e)
21634 }
21635 }
21636
21637 Action::ClickHouseAnyToAnyValue => {
21638 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21639 if let Expression::Function(f) = e {
21640 Ok(Expression::Function(Box::new(Function::new(
21641 "ANY_VALUE".to_string(),
21642 f.args,
21643 ))))
21644 } else {
21645 Ok(e)
21646 }
21647 }
21648
21649 Action::OracleVarchar2ToVarchar => {
21650 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21651 if let Expression::DataType(DataType::Custom { ref name }) = e {
21652 let upper = name.to_uppercase();
21653 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21654 let inner =
21655 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21656 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21657 let end = name.len() - 1; // skip trailing ")"
21658 Some(&name[start..end])
21659 } else {
21660 Option::None
21661 };
21662 if let Some(inner_str) = inner {
21663 // Parse the number part, ignoring BYTE/CHAR qualifier
21664 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21665 if let Ok(n) = num_str.parse::<u32>() {
21666 Ok(Expression::DataType(DataType::VarChar {
21667 length: Some(n),
21668 parenthesized_length: false,
21669 }))
21670 } else {
21671 Ok(e)
21672 }
21673 } else {
21674 // Plain VARCHAR2 / NVARCHAR2 without parens
21675 Ok(Expression::DataType(DataType::VarChar {
21676 length: Option::None,
21677 parenthesized_length: false,
21678 }))
21679 }
21680 } else {
21681 Ok(e)
21682 }
21683 }
21684
21685 Action::Nvl2Expand => {
21686 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21687 // But keep as NVL2 for dialects that support it natively
21688 let nvl2_native = matches!(
21689 target,
21690 DialectType::Oracle
21691 | DialectType::Snowflake
21692 | DialectType::Redshift
21693 | DialectType::Teradata
21694 | DialectType::Spark
21695 | DialectType::Databricks
21696 );
21697 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21698 if nvl2_native {
21699 return Ok(Expression::Nvl2(nvl2));
21700 }
21701 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
21702 } else if let Expression::Function(f) = e {
21703 if nvl2_native {
21704 return Ok(Expression::Function(Box::new(Function::new(
21705 "NVL2".to_string(),
21706 f.args,
21707 ))));
21708 }
21709 if f.args.len() < 2 {
21710 return Ok(Expression::Function(f));
21711 }
21712 let mut args = f.args;
21713 let a = args.remove(0);
21714 let b = args.remove(0);
21715 let c = if !args.is_empty() {
21716 Some(args.remove(0))
21717 } else {
21718 Option::None
21719 };
21720 (a, b, c)
21721 } else {
21722 return Ok(e);
21723 };
21724 // Build: NOT (a IS NULL)
21725 let is_null = Expression::IsNull(Box::new(IsNull {
21726 this: a,
21727 not: false,
21728 postfix_form: false,
21729 }));
21730 let not_null =
21731 Expression::Not(Box::new(crate::expressions::UnaryOp { this: is_null }));
21732 Ok(Expression::Case(Box::new(Case {
21733 operand: Option::None,
21734 whens: vec![(not_null, b)],
21735 else_: c,
21736 comments: Vec::new(),
21737 })))
21738 }
21739
21740 Action::IfnullToCoalesce => {
21741 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
21742 if let Expression::Coalesce(mut cf) = e {
21743 cf.original_name = Option::None;
21744 Ok(Expression::Coalesce(cf))
21745 } else if let Expression::Function(f) = e {
21746 Ok(Expression::Function(Box::new(Function::new(
21747 "COALESCE".to_string(),
21748 f.args,
21749 ))))
21750 } else {
21751 Ok(e)
21752 }
21753 }
21754
21755 Action::IsAsciiConvert => {
21756 // IS_ASCII(x) -> dialect-specific ASCII check
21757 if let Expression::Function(f) = e {
21758 let arg = f.args.into_iter().next().unwrap();
21759 match target {
21760 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
21761 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
21762 Ok(Expression::Function(Box::new(Function::new(
21763 "REGEXP_LIKE".to_string(),
21764 vec![
21765 arg,
21766 Expression::Literal(Literal::String(
21767 "^[[:ascii:]]*$".to_string(),
21768 )),
21769 ],
21770 ))))
21771 }
21772 DialectType::PostgreSQL
21773 | DialectType::Redshift
21774 | DialectType::Materialize
21775 | DialectType::RisingWave => {
21776 // (x ~ '^[[:ascii:]]*$')
21777 Ok(Expression::Paren(Box::new(Paren {
21778 this: Expression::RegexpLike(Box::new(
21779 crate::expressions::RegexpFunc {
21780 this: arg,
21781 pattern: Expression::Literal(Literal::String(
21782 "^[[:ascii:]]*$".to_string(),
21783 )),
21784 flags: Option::None,
21785 },
21786 )),
21787 trailing_comments: Vec::new(),
21788 })))
21789 }
21790 DialectType::SQLite => {
21791 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
21792 let hex_lit = Expression::Literal(Literal::HexString(
21793 "2a5b5e012d7f5d2a".to_string(),
21794 ));
21795 let cast_expr = Expression::Cast(Box::new(Cast {
21796 this: hex_lit,
21797 to: DataType::Text,
21798 trailing_comments: Vec::new(),
21799 double_colon_syntax: false,
21800 format: Option::None,
21801 default: Option::None,
21802 }));
21803 let glob = Expression::Glob(Box::new(BinaryOp {
21804 left: arg,
21805 right: cast_expr,
21806 left_comments: Vec::new(),
21807 operator_comments: Vec::new(),
21808 trailing_comments: Vec::new(),
21809 }));
21810 Ok(Expression::Paren(Box::new(Paren {
21811 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
21812 this: glob,
21813 })),
21814 trailing_comments: Vec::new(),
21815 })))
21816 }
21817 DialectType::TSQL | DialectType::Fabric => {
21818 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
21819 let hex_lit = Expression::Literal(Literal::HexNumber(
21820 "255b5e002d7f5d25".to_string(),
21821 ));
21822 let convert_expr = Expression::Convert(Box::new(
21823 crate::expressions::ConvertFunc {
21824 this: hex_lit,
21825 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
21826 style: None,
21827 },
21828 ));
21829 let collated = Expression::Collation(Box::new(
21830 crate::expressions::CollationExpr {
21831 this: convert_expr,
21832 collation: "Latin1_General_BIN".to_string(),
21833 quoted: false,
21834 double_quoted: false,
21835 },
21836 ));
21837 let patindex = Expression::Function(Box::new(Function::new(
21838 "PATINDEX".to_string(),
21839 vec![collated, arg],
21840 )));
21841 let zero = Expression::Literal(Literal::Number("0".to_string()));
21842 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21843 left: patindex,
21844 right: zero,
21845 left_comments: Vec::new(),
21846 operator_comments: Vec::new(),
21847 trailing_comments: Vec::new(),
21848 }));
21849 Ok(Expression::Paren(Box::new(Paren {
21850 this: eq_zero,
21851 trailing_comments: Vec::new(),
21852 })))
21853 }
21854 DialectType::Oracle => {
21855 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
21856 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21857 let s1 = Expression::Literal(Literal::String("^[".to_string()));
21858 let chr1 = Expression::Function(Box::new(Function::new(
21859 "CHR".to_string(),
21860 vec![Expression::Literal(Literal::Number("1".to_string()))],
21861 )));
21862 let dash = Expression::Literal(Literal::String("-".to_string()));
21863 let chr127 = Expression::Function(Box::new(Function::new(
21864 "CHR".to_string(),
21865 vec![Expression::Literal(Literal::Number("127".to_string()))],
21866 )));
21867 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
21868 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21869 let concat1 =
21870 Expression::DPipe(Box::new(crate::expressions::DPipe {
21871 this: Box::new(s1),
21872 expression: Box::new(chr1),
21873 safe: None,
21874 }));
21875 let concat2 =
21876 Expression::DPipe(Box::new(crate::expressions::DPipe {
21877 this: Box::new(concat1),
21878 expression: Box::new(dash),
21879 safe: None,
21880 }));
21881 let concat3 =
21882 Expression::DPipe(Box::new(crate::expressions::DPipe {
21883 this: Box::new(concat2),
21884 expression: Box::new(chr127),
21885 safe: None,
21886 }));
21887 let concat4 =
21888 Expression::DPipe(Box::new(crate::expressions::DPipe {
21889 this: Box::new(concat3),
21890 expression: Box::new(s2),
21891 safe: None,
21892 }));
21893 let regexp_like = Expression::Function(Box::new(Function::new(
21894 "REGEXP_LIKE".to_string(),
21895 vec![arg, concat4],
21896 )));
21897 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
21898 let true_expr = Expression::Column(crate::expressions::Column {
21899 name: Identifier {
21900 name: "TRUE".to_string(),
21901 quoted: false,
21902 trailing_comments: Vec::new(),
21903 span: None,
21904 },
21905 table: None,
21906 join_mark: false,
21907 trailing_comments: Vec::new(),
21908 span: None,
21909 });
21910 let nvl = Expression::Function(Box::new(Function::new(
21911 "NVL".to_string(),
21912 vec![regexp_like, true_expr],
21913 )));
21914 Ok(nvl)
21915 }
21916 _ => Ok(Expression::Function(Box::new(Function::new(
21917 "IS_ASCII".to_string(),
21918 vec![arg],
21919 )))),
21920 }
21921 } else {
21922 Ok(e)
21923 }
21924 }
21925
21926 Action::StrPositionConvert => {
21927 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
21928 if let Expression::Function(f) = e {
21929 if f.args.len() < 2 {
21930 return Ok(Expression::Function(f));
21931 }
21932 let mut args = f.args;
21933
21934 let haystack = args.remove(0);
21935 let needle = args.remove(0);
21936 let position = if !args.is_empty() {
21937 Some(args.remove(0))
21938 } else {
21939 Option::None
21940 };
21941 let occurrence = if !args.is_empty() {
21942 Some(args.remove(0))
21943 } else {
21944 Option::None
21945 };
21946
21947 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
21948 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
21949 fn build_position_expansion(
21950 haystack: Expression,
21951 needle: Expression,
21952 pos: Expression,
21953 occurrence: Option<Expression>,
21954 inner_func: &str,
21955 wrapper: &str, // "CASE", "IF", "IIF"
21956 ) -> Expression {
21957 let substr = Expression::Function(Box::new(Function::new(
21958 "SUBSTRING".to_string(),
21959 vec![haystack, pos.clone()],
21960 )));
21961 let mut inner_args = vec![substr, needle];
21962 if let Some(occ) = occurrence {
21963 inner_args.push(occ);
21964 }
21965 let inner_call = Expression::Function(Box::new(Function::new(
21966 inner_func.to_string(),
21967 inner_args,
21968 )));
21969 let zero = Expression::Literal(Literal::Number("0".to_string()));
21970 let one = Expression::Literal(Literal::Number("1".to_string()));
21971 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21972 left: inner_call.clone(),
21973 right: zero.clone(),
21974 left_comments: Vec::new(),
21975 operator_comments: Vec::new(),
21976 trailing_comments: Vec::new(),
21977 }));
21978 let add_pos = Expression::Add(Box::new(BinaryOp {
21979 left: inner_call,
21980 right: pos,
21981 left_comments: Vec::new(),
21982 operator_comments: Vec::new(),
21983 trailing_comments: Vec::new(),
21984 }));
21985 let sub_one = Expression::Sub(Box::new(BinaryOp {
21986 left: add_pos,
21987 right: one,
21988 left_comments: Vec::new(),
21989 operator_comments: Vec::new(),
21990 trailing_comments: Vec::new(),
21991 }));
21992
21993 match wrapper {
21994 "CASE" => Expression::Case(Box::new(Case {
21995 operand: Option::None,
21996 whens: vec![(eq_zero, zero)],
21997 else_: Some(sub_one),
21998 comments: Vec::new(),
21999 })),
22000 "IIF" => Expression::Function(Box::new(Function::new(
22001 "IIF".to_string(),
22002 vec![eq_zero, zero, sub_one],
22003 ))),
22004 _ => Expression::Function(Box::new(Function::new(
22005 "IF".to_string(),
22006 vec![eq_zero, zero, sub_one],
22007 ))),
22008 }
22009 }
22010
22011 match target {
22012 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
22013 DialectType::Athena
22014 | DialectType::DuckDB
22015 | DialectType::Presto
22016 | DialectType::Trino
22017 | DialectType::Drill => {
22018 if let Some(pos) = position {
22019 let wrapper = if matches!(target, DialectType::DuckDB) {
22020 "CASE"
22021 } else {
22022 "IF"
22023 };
22024 let result = build_position_expansion(
22025 haystack, needle, pos, occurrence, "STRPOS", wrapper,
22026 );
22027 if matches!(target, DialectType::Drill) {
22028 // Drill uses backtick-quoted `IF`
22029 if let Expression::Function(mut f) = result {
22030 f.name = "`IF`".to_string();
22031 Ok(Expression::Function(f))
22032 } else {
22033 Ok(result)
22034 }
22035 } else {
22036 Ok(result)
22037 }
22038 } else {
22039 Ok(Expression::Function(Box::new(Function::new(
22040 "STRPOS".to_string(),
22041 vec![haystack, needle],
22042 ))))
22043 }
22044 }
22045 // SQLite: IIF wrapper
22046 DialectType::SQLite => {
22047 if let Some(pos) = position {
22048 Ok(build_position_expansion(
22049 haystack, needle, pos, occurrence, "INSTR", "IIF",
22050 ))
22051 } else {
22052 Ok(Expression::Function(Box::new(Function::new(
22053 "INSTR".to_string(),
22054 vec![haystack, needle],
22055 ))))
22056 }
22057 }
22058 // INSTR group: Teradata, BigQuery, Oracle
22059 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
22060 let mut a = vec![haystack, needle];
22061 if let Some(pos) = position {
22062 a.push(pos);
22063 }
22064 if let Some(occ) = occurrence {
22065 a.push(occ);
22066 }
22067 Ok(Expression::Function(Box::new(Function::new(
22068 "INSTR".to_string(),
22069 a,
22070 ))))
22071 }
22072 // CHARINDEX group: Snowflake, TSQL
22073 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22074 let mut a = vec![needle, haystack];
22075 if let Some(pos) = position {
22076 a.push(pos);
22077 }
22078 Ok(Expression::Function(Box::new(Function::new(
22079 "CHARINDEX".to_string(),
22080 a,
22081 ))))
22082 }
22083 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22084 DialectType::PostgreSQL
22085 | DialectType::Materialize
22086 | DialectType::RisingWave
22087 | DialectType::Redshift => {
22088 if let Some(pos) = position {
22089 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22090 // ELSE POSITION(...) + pos - 1 END
22091 let substr = Expression::Substring(Box::new(
22092 crate::expressions::SubstringFunc {
22093 this: haystack,
22094 start: pos.clone(),
22095 length: Option::None,
22096 from_for_syntax: true,
22097 },
22098 ));
22099 let pos_in = Expression::StrPosition(Box::new(
22100 crate::expressions::StrPosition {
22101 this: Box::new(substr),
22102 substr: Some(Box::new(needle)),
22103 position: Option::None,
22104 occurrence: Option::None,
22105 },
22106 ));
22107 let zero =
22108 Expression::Literal(Literal::Number("0".to_string()));
22109 let one = Expression::Literal(Literal::Number("1".to_string()));
22110 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22111 left: pos_in.clone(),
22112 right: zero.clone(),
22113 left_comments: Vec::new(),
22114 operator_comments: Vec::new(),
22115 trailing_comments: Vec::new(),
22116 }));
22117 let add_pos = Expression::Add(Box::new(BinaryOp {
22118 left: pos_in,
22119 right: pos,
22120 left_comments: Vec::new(),
22121 operator_comments: Vec::new(),
22122 trailing_comments: Vec::new(),
22123 }));
22124 let sub_one = Expression::Sub(Box::new(BinaryOp {
22125 left: add_pos,
22126 right: one,
22127 left_comments: Vec::new(),
22128 operator_comments: Vec::new(),
22129 trailing_comments: Vec::new(),
22130 }));
22131 Ok(Expression::Case(Box::new(Case {
22132 operand: Option::None,
22133 whens: vec![(eq_zero, zero)],
22134 else_: Some(sub_one),
22135 comments: Vec::new(),
22136 })))
22137 } else {
22138 Ok(Expression::StrPosition(Box::new(
22139 crate::expressions::StrPosition {
22140 this: Box::new(haystack),
22141 substr: Some(Box::new(needle)),
22142 position: Option::None,
22143 occurrence: Option::None,
22144 },
22145 )))
22146 }
22147 }
22148 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22149 DialectType::MySQL
22150 | DialectType::SingleStore
22151 | DialectType::TiDB
22152 | DialectType::Hive
22153 | DialectType::Spark
22154 | DialectType::Databricks
22155 | DialectType::Doris
22156 | DialectType::StarRocks => {
22157 let mut a = vec![needle, haystack];
22158 if let Some(pos) = position {
22159 a.push(pos);
22160 }
22161 Ok(Expression::Function(Box::new(Function::new(
22162 "LOCATE".to_string(),
22163 a,
22164 ))))
22165 }
22166 // ClickHouse: POSITION(haystack, needle[, position])
22167 DialectType::ClickHouse => {
22168 let mut a = vec![haystack, needle];
22169 if let Some(pos) = position {
22170 a.push(pos);
22171 }
22172 Ok(Expression::Function(Box::new(Function::new(
22173 "POSITION".to_string(),
22174 a,
22175 ))))
22176 }
22177 _ => {
22178 let mut a = vec![haystack, needle];
22179 if let Some(pos) = position {
22180 a.push(pos);
22181 }
22182 if let Some(occ) = occurrence {
22183 a.push(occ);
22184 }
22185 Ok(Expression::Function(Box::new(Function::new(
22186 "STR_POSITION".to_string(),
22187 a,
22188 ))))
22189 }
22190 }
22191 } else {
22192 Ok(e)
22193 }
22194 }
22195
22196 Action::ArraySumConvert => {
22197 // ARRAY_SUM(arr) -> dialect-specific
22198 if let Expression::Function(f) = e {
22199 let args = f.args;
22200 match target {
22201 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22202 Function::new("LIST_SUM".to_string(), args),
22203 ))),
22204 DialectType::Spark | DialectType::Databricks => {
22205 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22206 let arr = args.into_iter().next().unwrap();
22207 let zero = Expression::Literal(Literal::Number("0".to_string()));
22208 let acc_id = Identifier::new("acc");
22209 let x_id = Identifier::new("x");
22210 let acc = Expression::Identifier(acc_id.clone());
22211 let x = Expression::Identifier(x_id.clone());
22212 let add = Expression::Add(Box::new(BinaryOp {
22213 left: acc.clone(),
22214 right: x,
22215 left_comments: Vec::new(),
22216 operator_comments: Vec::new(),
22217 trailing_comments: Vec::new(),
22218 }));
22219 let lambda1 =
22220 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22221 parameters: vec![acc_id.clone(), x_id],
22222 body: add,
22223 colon: false,
22224 parameter_types: Vec::new(),
22225 }));
22226 let lambda2 =
22227 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22228 parameters: vec![acc_id],
22229 body: acc,
22230 colon: false,
22231 parameter_types: Vec::new(),
22232 }));
22233 Ok(Expression::Function(Box::new(Function::new(
22234 "AGGREGATE".to_string(),
22235 vec![arr, zero, lambda1, lambda2],
22236 ))))
22237 }
22238 DialectType::Presto | DialectType::Athena => {
22239 // Presto/Athena keep ARRAY_SUM natively
22240 Ok(Expression::Function(Box::new(Function::new(
22241 "ARRAY_SUM".to_string(),
22242 args,
22243 ))))
22244 }
22245 DialectType::Trino => {
22246 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22247 if args.len() == 1 {
22248 let arr = args.into_iter().next().unwrap();
22249 let zero =
22250 Expression::Literal(Literal::Number("0".to_string()));
22251 let acc_id = Identifier::new("acc");
22252 let x_id = Identifier::new("x");
22253 let acc = Expression::Identifier(acc_id.clone());
22254 let x = Expression::Identifier(x_id.clone());
22255 let add = Expression::Add(Box::new(BinaryOp {
22256 left: acc.clone(),
22257 right: x,
22258 left_comments: Vec::new(),
22259 operator_comments: Vec::new(),
22260 trailing_comments: Vec::new(),
22261 }));
22262 let lambda1 = Expression::Lambda(Box::new(
22263 crate::expressions::LambdaExpr {
22264 parameters: vec![acc_id.clone(), x_id],
22265 body: add,
22266 colon: false,
22267 parameter_types: Vec::new(),
22268 },
22269 ));
22270 let lambda2 = Expression::Lambda(Box::new(
22271 crate::expressions::LambdaExpr {
22272 parameters: vec![acc_id],
22273 body: acc,
22274 colon: false,
22275 parameter_types: Vec::new(),
22276 },
22277 ));
22278 Ok(Expression::Function(Box::new(Function::new(
22279 "REDUCE".to_string(),
22280 vec![arr, zero, lambda1, lambda2],
22281 ))))
22282 } else {
22283 Ok(Expression::Function(Box::new(Function::new(
22284 "ARRAY_SUM".to_string(),
22285 args,
22286 ))))
22287 }
22288 }
22289 DialectType::ClickHouse => {
22290 // arraySum(lambda, arr) or arraySum(arr)
22291 Ok(Expression::Function(Box::new(Function::new(
22292 "arraySum".to_string(),
22293 args,
22294 ))))
22295 }
22296 _ => Ok(Expression::Function(Box::new(Function::new(
22297 "ARRAY_SUM".to_string(),
22298 args,
22299 )))),
22300 }
22301 } else {
22302 Ok(e)
22303 }
22304 }
22305
22306 Action::ArraySizeConvert => {
22307 if let Expression::Function(f) = e {
22308 Ok(Expression::Function(Box::new(Function::new(
22309 "REPEATED_COUNT".to_string(),
22310 f.args,
22311 ))))
22312 } else {
22313 Ok(e)
22314 }
22315 }
22316
22317 Action::ArrayAnyConvert => {
22318 if let Expression::Function(f) = e {
22319 let mut args = f.args;
22320 if args.len() == 2 {
22321 let arr = args.remove(0);
22322 let lambda = args.remove(0);
22323
22324 // Extract lambda parameter name and body
22325 let (param_name, pred_body) =
22326 if let Expression::Lambda(ref lam) = lambda {
22327 let name = if let Some(p) = lam.parameters.first() {
22328 p.name.clone()
22329 } else {
22330 "x".to_string()
22331 };
22332 (name, lam.body.clone())
22333 } else {
22334 ("x".to_string(), lambda.clone())
22335 };
22336
22337 // Helper: build a function call Expression
22338 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22339 Expression::Function(Box::new(Function::new(
22340 name.to_string(),
22341 args,
22342 )))
22343 };
22344
22345 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22346 let build_filter_pattern = |len_func: &str,
22347 len_args_extra: Vec<Expression>,
22348 filter_expr: Expression|
22349 -> Expression {
22350 // len_func(arr, ...extra) = 0
22351 let mut len_arr_args = vec![arr.clone()];
22352 len_arr_args.extend(len_args_extra.clone());
22353 let len_arr = make_func(len_func, len_arr_args);
22354 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22355 len_arr,
22356 Expression::number(0),
22357 )));
22358
22359 // len_func(filter_expr, ...extra) <> 0
22360 let mut len_filter_args = vec![filter_expr];
22361 len_filter_args.extend(len_args_extra);
22362 let len_filter = make_func(len_func, len_filter_args);
22363 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22364 len_filter,
22365 Expression::number(0),
22366 )));
22367
22368 // (eq_zero OR neq_zero)
22369 let or_expr =
22370 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22371 Expression::Paren(Box::new(Paren {
22372 this: or_expr,
22373 trailing_comments: Vec::new(),
22374 }))
22375 };
22376
22377 match target {
22378 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22379 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22380 }
22381 DialectType::ClickHouse => {
22382 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22383 // ClickHouse arrayFilter takes lambda first, then array
22384 let filter_expr =
22385 make_func("arrayFilter", vec![lambda, arr.clone()]);
22386 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22387 }
22388 DialectType::Databricks | DialectType::Spark => {
22389 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22390 let filter_expr =
22391 make_func("FILTER", vec![arr.clone(), lambda]);
22392 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22393 }
22394 DialectType::DuckDB => {
22395 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22396 let filter_expr =
22397 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22398 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22399 }
22400 DialectType::Teradata => {
22401 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22402 let filter_expr =
22403 make_func("FILTER", vec![arr.clone(), lambda]);
22404 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22405 }
22406 DialectType::BigQuery => {
22407 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22408 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22409 let param_col = Expression::column(¶m_name);
22410 let unnest_expr = Expression::Unnest(Box::new(
22411 crate::expressions::UnnestFunc {
22412 this: arr.clone(),
22413 expressions: vec![],
22414 with_ordinality: false,
22415 alias: Some(Identifier::new(¶m_name)),
22416 offset_alias: None,
22417 },
22418 ));
22419 let mut sel = crate::expressions::Select::default();
22420 sel.expressions = vec![param_col];
22421 sel.from = Some(crate::expressions::From {
22422 expressions: vec![unnest_expr],
22423 });
22424 sel.where_clause =
22425 Some(crate::expressions::Where { this: pred_body });
22426 let array_subquery =
22427 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22428 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22429 }
22430 DialectType::PostgreSQL => {
22431 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22432 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22433 let param_col = Expression::column(¶m_name);
22434 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22435 let unnest_with_alias =
22436 Expression::Alias(Box::new(crate::expressions::Alias {
22437 this: Expression::Unnest(Box::new(
22438 crate::expressions::UnnestFunc {
22439 this: arr.clone(),
22440 expressions: vec![],
22441 with_ordinality: false,
22442 alias: None,
22443 offset_alias: None,
22444 },
22445 )),
22446 alias: Identifier::new("_t0"),
22447 column_aliases: vec![Identifier::new(¶m_name)],
22448 pre_alias_comments: Vec::new(),
22449 trailing_comments: Vec::new(),
22450 }));
22451 let mut sel = crate::expressions::Select::default();
22452 sel.expressions = vec![param_col];
22453 sel.from = Some(crate::expressions::From {
22454 expressions: vec![unnest_with_alias],
22455 });
22456 sel.where_clause =
22457 Some(crate::expressions::Where { this: pred_body });
22458 let array_subquery =
22459 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22460 Ok(build_filter_pattern(
22461 "ARRAY_LENGTH",
22462 vec![Expression::number(1)],
22463 array_subquery,
22464 ))
22465 }
22466 _ => Ok(Expression::Function(Box::new(Function::new(
22467 "ARRAY_ANY".to_string(),
22468 vec![arr, lambda],
22469 )))),
22470 }
22471 } else {
22472 Ok(Expression::Function(Box::new(Function::new(
22473 "ARRAY_ANY".to_string(),
22474 args,
22475 ))))
22476 }
22477 } else {
22478 Ok(e)
22479 }
22480 }
22481
22482 Action::DecodeSimplify => {
22483 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22484 // For literal search values: CASE WHEN x = search THEN result
22485 // For NULL search: CASE WHEN x IS NULL THEN result
22486 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22487 fn is_decode_literal(e: &Expression) -> bool {
22488 matches!(
22489 e,
22490 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22491 )
22492 }
22493
22494 let build_decode_case =
22495 |this_expr: Expression,
22496 pairs: Vec<(Expression, Expression)>,
22497 default: Option<Expression>| {
22498 let whens: Vec<(Expression, Expression)> = pairs
22499 .into_iter()
22500 .map(|(search, result)| {
22501 if matches!(&search, Expression::Null(_)) {
22502 // NULL search -> IS NULL
22503 let condition = Expression::Is(Box::new(BinaryOp {
22504 left: this_expr.clone(),
22505 right: Expression::Null(crate::expressions::Null),
22506 left_comments: Vec::new(),
22507 operator_comments: Vec::new(),
22508 trailing_comments: Vec::new(),
22509 }));
22510 (condition, result)
22511 } else if is_decode_literal(&search)
22512 || is_decode_literal(&this_expr)
22513 {
22514 // At least one side is a literal -> simple equality (no NULL check needed)
22515 let eq = Expression::Eq(Box::new(BinaryOp {
22516 left: this_expr.clone(),
22517 right: search,
22518 left_comments: Vec::new(),
22519 operator_comments: Vec::new(),
22520 trailing_comments: Vec::new(),
22521 }));
22522 (eq, result)
22523 } else {
22524 // Non-literal -> null-safe comparison
22525 let needs_paren = matches!(
22526 &search,
22527 Expression::Eq(_)
22528 | Expression::Neq(_)
22529 | Expression::Gt(_)
22530 | Expression::Gte(_)
22531 | Expression::Lt(_)
22532 | Expression::Lte(_)
22533 );
22534 let search_ref = if needs_paren {
22535 Expression::Paren(Box::new(crate::expressions::Paren {
22536 this: search.clone(),
22537 trailing_comments: Vec::new(),
22538 }))
22539 } else {
22540 search.clone()
22541 };
22542 // Build: x = search OR (x IS NULL AND search IS NULL)
22543 let eq = Expression::Eq(Box::new(BinaryOp {
22544 left: this_expr.clone(),
22545 right: search_ref,
22546 left_comments: Vec::new(),
22547 operator_comments: Vec::new(),
22548 trailing_comments: Vec::new(),
22549 }));
22550 let search_in_null = if needs_paren {
22551 Expression::Paren(Box::new(crate::expressions::Paren {
22552 this: search.clone(),
22553 trailing_comments: Vec::new(),
22554 }))
22555 } else {
22556 search.clone()
22557 };
22558 let x_is_null = Expression::Is(Box::new(BinaryOp {
22559 left: this_expr.clone(),
22560 right: Expression::Null(crate::expressions::Null),
22561 left_comments: Vec::new(),
22562 operator_comments: Vec::new(),
22563 trailing_comments: Vec::new(),
22564 }));
22565 let search_is_null = Expression::Is(Box::new(BinaryOp {
22566 left: search_in_null,
22567 right: Expression::Null(crate::expressions::Null),
22568 left_comments: Vec::new(),
22569 operator_comments: Vec::new(),
22570 trailing_comments: Vec::new(),
22571 }));
22572 let both_null = Expression::And(Box::new(BinaryOp {
22573 left: x_is_null,
22574 right: search_is_null,
22575 left_comments: Vec::new(),
22576 operator_comments: Vec::new(),
22577 trailing_comments: Vec::new(),
22578 }));
22579 let condition = Expression::Or(Box::new(BinaryOp {
22580 left: eq,
22581 right: Expression::Paren(Box::new(
22582 crate::expressions::Paren {
22583 this: both_null,
22584 trailing_comments: Vec::new(),
22585 },
22586 )),
22587 left_comments: Vec::new(),
22588 operator_comments: Vec::new(),
22589 trailing_comments: Vec::new(),
22590 }));
22591 (condition, result)
22592 }
22593 })
22594 .collect();
22595 Expression::Case(Box::new(Case {
22596 operand: None,
22597 whens,
22598 else_: default,
22599 comments: Vec::new(),
22600 }))
22601 };
22602
22603 if let Expression::Decode(decode) = e {
22604 Ok(build_decode_case(
22605 decode.this,
22606 decode.search_results,
22607 decode.default,
22608 ))
22609 } else if let Expression::DecodeCase(dc) = e {
22610 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22611 let mut exprs = dc.expressions;
22612 if exprs.len() < 3 {
22613 return Ok(Expression::DecodeCase(Box::new(
22614 crate::expressions::DecodeCase { expressions: exprs },
22615 )));
22616 }
22617 let this_expr = exprs.remove(0);
22618 let mut pairs = Vec::new();
22619 let mut default = None;
22620 let mut i = 0;
22621 while i + 1 < exprs.len() {
22622 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22623 i += 2;
22624 }
22625 if i < exprs.len() {
22626 // Odd remaining element is the default
22627 default = Some(exprs[i].clone());
22628 }
22629 Ok(build_decode_case(this_expr, pairs, default))
22630 } else {
22631 Ok(e)
22632 }
22633 }
22634
22635 Action::CreateTableLikeToCtas => {
22636 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22637 if let Expression::CreateTable(ct) = e {
22638 let like_source = ct.constraints.iter().find_map(|c| {
22639 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22640 Some(source.clone())
22641 } else {
22642 None
22643 }
22644 });
22645 if let Some(source_table) = like_source {
22646 let mut new_ct = *ct;
22647 new_ct.constraints.clear();
22648 // Build: SELECT * FROM b LIMIT 0
22649 let select = Expression::Select(Box::new(crate::expressions::Select {
22650 expressions: vec![Expression::Star(crate::expressions::Star {
22651 table: None,
22652 except: None,
22653 replace: None,
22654 rename: None,
22655 trailing_comments: Vec::new(),
22656 span: None,
22657 })],
22658 from: Some(crate::expressions::From {
22659 expressions: vec![Expression::Table(source_table)],
22660 }),
22661 limit: Some(crate::expressions::Limit {
22662 this: Expression::Literal(Literal::Number("0".to_string())),
22663 percent: false,
22664 comments: Vec::new(),
22665 }),
22666 ..Default::default()
22667 }));
22668 new_ct.as_select = Some(select);
22669 Ok(Expression::CreateTable(Box::new(new_ct)))
22670 } else {
22671 Ok(Expression::CreateTable(ct))
22672 }
22673 } else {
22674 Ok(e)
22675 }
22676 }
22677
22678 Action::CreateTableLikeToSelectInto => {
22679 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
22680 if let Expression::CreateTable(ct) = e {
22681 let like_source = ct.constraints.iter().find_map(|c| {
22682 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22683 Some(source.clone())
22684 } else {
22685 None
22686 }
22687 });
22688 if let Some(source_table) = like_source {
22689 let mut aliased_source = source_table;
22690 aliased_source.alias = Some(Identifier::new("temp"));
22691 // Build: SELECT TOP 0 * INTO a FROM b AS temp
22692 let select = Expression::Select(Box::new(crate::expressions::Select {
22693 expressions: vec![Expression::Star(crate::expressions::Star {
22694 table: None,
22695 except: None,
22696 replace: None,
22697 rename: None,
22698 trailing_comments: Vec::new(),
22699 span: None,
22700 })],
22701 from: Some(crate::expressions::From {
22702 expressions: vec![Expression::Table(aliased_source)],
22703 }),
22704 into: Some(crate::expressions::SelectInto {
22705 this: Expression::Table(ct.name.clone()),
22706 temporary: false,
22707 unlogged: false,
22708 bulk_collect: false,
22709 expressions: Vec::new(),
22710 }),
22711 top: Some(crate::expressions::Top {
22712 this: Expression::Literal(Literal::Number("0".to_string())),
22713 percent: false,
22714 with_ties: false,
22715 parenthesized: false,
22716 }),
22717 ..Default::default()
22718 }));
22719 Ok(select)
22720 } else {
22721 Ok(Expression::CreateTable(ct))
22722 }
22723 } else {
22724 Ok(e)
22725 }
22726 }
22727
22728 Action::CreateTableLikeToAs => {
22729 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
22730 if let Expression::CreateTable(ct) = e {
22731 let like_source = ct.constraints.iter().find_map(|c| {
22732 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22733 Some(source.clone())
22734 } else {
22735 None
22736 }
22737 });
22738 if let Some(source_table) = like_source {
22739 let mut new_ct = *ct;
22740 new_ct.constraints.clear();
22741 // AS b (just a table reference, not a SELECT)
22742 new_ct.as_select = Some(Expression::Table(source_table));
22743 Ok(Expression::CreateTable(Box::new(new_ct)))
22744 } else {
22745 Ok(Expression::CreateTable(ct))
22746 }
22747 } else {
22748 Ok(e)
22749 }
22750 }
22751
22752 Action::TsOrDsToDateConvert => {
22753 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
22754 if let Expression::Function(f) = e {
22755 let mut args = f.args;
22756 let this = args.remove(0);
22757 let fmt = if !args.is_empty() {
22758 match &args[0] {
22759 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22760 _ => None,
22761 }
22762 } else {
22763 None
22764 };
22765 Ok(Expression::TsOrDsToDate(Box::new(
22766 crate::expressions::TsOrDsToDate {
22767 this: Box::new(this),
22768 format: fmt,
22769 safe: None,
22770 },
22771 )))
22772 } else {
22773 Ok(e)
22774 }
22775 }
22776
22777 Action::TsOrDsToDateStrConvert => {
22778 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
22779 if let Expression::Function(f) = e {
22780 let arg = f.args.into_iter().next().unwrap();
22781 let str_type = match target {
22782 DialectType::DuckDB
22783 | DialectType::PostgreSQL
22784 | DialectType::Materialize => DataType::Text,
22785 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22786 DataType::Custom {
22787 name: "STRING".to_string(),
22788 }
22789 }
22790 DialectType::Presto
22791 | DialectType::Trino
22792 | DialectType::Athena
22793 | DialectType::Drill => DataType::VarChar {
22794 length: None,
22795 parenthesized_length: false,
22796 },
22797 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
22798 DataType::Custom {
22799 name: "STRING".to_string(),
22800 }
22801 }
22802 _ => DataType::VarChar {
22803 length: None,
22804 parenthesized_length: false,
22805 },
22806 };
22807 let cast_expr = Expression::Cast(Box::new(Cast {
22808 this: arg,
22809 to: str_type,
22810 double_colon_syntax: false,
22811 trailing_comments: Vec::new(),
22812 format: None,
22813 default: None,
22814 }));
22815 Ok(Expression::Substring(Box::new(
22816 crate::expressions::SubstringFunc {
22817 this: cast_expr,
22818 start: Expression::number(1),
22819 length: Some(Expression::number(10)),
22820 from_for_syntax: false,
22821 },
22822 )))
22823 } else {
22824 Ok(e)
22825 }
22826 }
22827
22828 Action::DateStrToDateConvert => {
22829 // DATE_STR_TO_DATE(x) -> dialect-specific
22830 if let Expression::Function(f) = e {
22831 let arg = f.args.into_iter().next().unwrap();
22832 match target {
22833 DialectType::SQLite => {
22834 // SQLite: just the bare expression (dates are strings)
22835 Ok(arg)
22836 }
22837 _ => Ok(Expression::Cast(Box::new(Cast {
22838 this: arg,
22839 to: DataType::Date,
22840 double_colon_syntax: false,
22841 trailing_comments: Vec::new(),
22842 format: None,
22843 default: None,
22844 }))),
22845 }
22846 } else {
22847 Ok(e)
22848 }
22849 }
22850
22851 Action::TimeStrToDateConvert => {
22852 // TIME_STR_TO_DATE(x) -> dialect-specific
22853 if let Expression::Function(f) = e {
22854 let arg = f.args.into_iter().next().unwrap();
22855 match target {
22856 DialectType::Hive
22857 | DialectType::Doris
22858 | DialectType::StarRocks
22859 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
22860 Function::new("TO_DATE".to_string(), vec![arg]),
22861 ))),
22862 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22863 // Presto: CAST(x AS TIMESTAMP)
22864 Ok(Expression::Cast(Box::new(Cast {
22865 this: arg,
22866 to: DataType::Timestamp {
22867 timezone: false,
22868 precision: None,
22869 },
22870 double_colon_syntax: false,
22871 trailing_comments: Vec::new(),
22872 format: None,
22873 default: None,
22874 })))
22875 }
22876 _ => {
22877 // Default: CAST(x AS DATE)
22878 Ok(Expression::Cast(Box::new(Cast {
22879 this: arg,
22880 to: DataType::Date,
22881 double_colon_syntax: false,
22882 trailing_comments: Vec::new(),
22883 format: None,
22884 default: None,
22885 })))
22886 }
22887 }
22888 } else {
22889 Ok(e)
22890 }
22891 }
22892
22893 Action::TimeStrToTimeConvert => {
22894 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
22895 if let Expression::Function(f) = e {
22896 let mut args = f.args;
22897 let this = args.remove(0);
22898 let zone = if !args.is_empty() {
22899 match &args[0] {
22900 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22901 _ => None,
22902 }
22903 } else {
22904 None
22905 };
22906 let has_zone = zone.is_some();
22907
22908 match target {
22909 DialectType::SQLite => {
22910 // SQLite: just the bare expression
22911 Ok(this)
22912 }
22913 DialectType::MySQL => {
22914 if has_zone {
22915 // MySQL with zone: TIMESTAMP(x)
22916 Ok(Expression::Function(Box::new(Function::new(
22917 "TIMESTAMP".to_string(),
22918 vec![this],
22919 ))))
22920 } else {
22921 // MySQL: CAST(x AS DATETIME) or with precision
22922 // Use DataType::Custom to avoid MySQL's transform_cast converting
22923 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
22924 let precision =
22925 if let Expression::Literal(Literal::String(ref s)) = this {
22926 if let Some(dot_pos) = s.rfind('.') {
22927 let frac = &s[dot_pos + 1..];
22928 let digit_count = frac
22929 .chars()
22930 .take_while(|c| c.is_ascii_digit())
22931 .count();
22932 if digit_count > 0 {
22933 Some(digit_count)
22934 } else {
22935 None
22936 }
22937 } else {
22938 None
22939 }
22940 } else {
22941 None
22942 };
22943 let type_name = match precision {
22944 Some(p) => format!("DATETIME({})", p),
22945 None => "DATETIME".to_string(),
22946 };
22947 Ok(Expression::Cast(Box::new(Cast {
22948 this,
22949 to: DataType::Custom { name: type_name },
22950 double_colon_syntax: false,
22951 trailing_comments: Vec::new(),
22952 format: None,
22953 default: None,
22954 })))
22955 }
22956 }
22957 DialectType::ClickHouse => {
22958 if has_zone {
22959 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
22960 // We need to strip the timezone offset from the literal if present
22961 let clean_this =
22962 if let Expression::Literal(Literal::String(ref s)) = this {
22963 // Strip timezone offset like "-08:00" or "+00:00"
22964 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
22965 if let Some(offset_pos) = re_offset {
22966 if offset_pos > 10 {
22967 // After the date part
22968 let trimmed = s[..offset_pos].to_string();
22969 Expression::Literal(Literal::String(trimmed))
22970 } else {
22971 this.clone()
22972 }
22973 } else {
22974 this.clone()
22975 }
22976 } else {
22977 this.clone()
22978 };
22979 let zone_str = zone.unwrap();
22980 // Build: CAST(x AS DateTime64(6, 'zone'))
22981 let type_name = format!("DateTime64(6, '{}')", zone_str);
22982 Ok(Expression::Cast(Box::new(Cast {
22983 this: clean_this,
22984 to: DataType::Custom { name: type_name },
22985 double_colon_syntax: false,
22986 trailing_comments: Vec::new(),
22987 format: None,
22988 default: None,
22989 })))
22990 } else {
22991 Ok(Expression::Cast(Box::new(Cast {
22992 this,
22993 to: DataType::Custom {
22994 name: "DateTime64(6)".to_string(),
22995 },
22996 double_colon_syntax: false,
22997 trailing_comments: Vec::new(),
22998 format: None,
22999 default: None,
23000 })))
23001 }
23002 }
23003 DialectType::BigQuery => {
23004 if has_zone {
23005 // BigQuery with zone: CAST(x AS TIMESTAMP)
23006 Ok(Expression::Cast(Box::new(Cast {
23007 this,
23008 to: DataType::Timestamp {
23009 timezone: false,
23010 precision: None,
23011 },
23012 double_colon_syntax: false,
23013 trailing_comments: Vec::new(),
23014 format: None,
23015 default: None,
23016 })))
23017 } else {
23018 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
23019 Ok(Expression::Cast(Box::new(Cast {
23020 this,
23021 to: DataType::Custom {
23022 name: "DATETIME".to_string(),
23023 },
23024 double_colon_syntax: false,
23025 trailing_comments: Vec::new(),
23026 format: None,
23027 default: None,
23028 })))
23029 }
23030 }
23031 DialectType::Doris => {
23032 // Doris: CAST(x AS DATETIME)
23033 Ok(Expression::Cast(Box::new(Cast {
23034 this,
23035 to: DataType::Custom {
23036 name: "DATETIME".to_string(),
23037 },
23038 double_colon_syntax: false,
23039 trailing_comments: Vec::new(),
23040 format: None,
23041 default: None,
23042 })))
23043 }
23044 DialectType::TSQL | DialectType::Fabric => {
23045 if has_zone {
23046 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
23047 let cast_expr = Expression::Cast(Box::new(Cast {
23048 this,
23049 to: DataType::Custom {
23050 name: "DATETIMEOFFSET".to_string(),
23051 },
23052 double_colon_syntax: false,
23053 trailing_comments: Vec::new(),
23054 format: None,
23055 default: None,
23056 }));
23057 Ok(Expression::AtTimeZone(Box::new(
23058 crate::expressions::AtTimeZone {
23059 this: cast_expr,
23060 zone: Expression::Literal(Literal::String(
23061 "UTC".to_string(),
23062 )),
23063 },
23064 )))
23065 } else {
23066 // TSQL: CAST(x AS DATETIME2)
23067 Ok(Expression::Cast(Box::new(Cast {
23068 this,
23069 to: DataType::Custom {
23070 name: "DATETIME2".to_string(),
23071 },
23072 double_colon_syntax: false,
23073 trailing_comments: Vec::new(),
23074 format: None,
23075 default: None,
23076 })))
23077 }
23078 }
23079 DialectType::DuckDB => {
23080 if has_zone {
23081 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23082 Ok(Expression::Cast(Box::new(Cast {
23083 this,
23084 to: DataType::Timestamp {
23085 timezone: true,
23086 precision: None,
23087 },
23088 double_colon_syntax: false,
23089 trailing_comments: Vec::new(),
23090 format: None,
23091 default: None,
23092 })))
23093 } else {
23094 // DuckDB: CAST(x AS TIMESTAMP)
23095 Ok(Expression::Cast(Box::new(Cast {
23096 this,
23097 to: DataType::Timestamp {
23098 timezone: false,
23099 precision: None,
23100 },
23101 double_colon_syntax: false,
23102 trailing_comments: Vec::new(),
23103 format: None,
23104 default: None,
23105 })))
23106 }
23107 }
23108 DialectType::PostgreSQL
23109 | DialectType::Materialize
23110 | DialectType::RisingWave => {
23111 if has_zone {
23112 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23113 Ok(Expression::Cast(Box::new(Cast {
23114 this,
23115 to: DataType::Timestamp {
23116 timezone: true,
23117 precision: None,
23118 },
23119 double_colon_syntax: false,
23120 trailing_comments: Vec::new(),
23121 format: None,
23122 default: None,
23123 })))
23124 } else {
23125 // PostgreSQL: CAST(x AS TIMESTAMP)
23126 Ok(Expression::Cast(Box::new(Cast {
23127 this,
23128 to: DataType::Timestamp {
23129 timezone: false,
23130 precision: None,
23131 },
23132 double_colon_syntax: false,
23133 trailing_comments: Vec::new(),
23134 format: None,
23135 default: None,
23136 })))
23137 }
23138 }
23139 DialectType::Snowflake => {
23140 if has_zone {
23141 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23142 Ok(Expression::Cast(Box::new(Cast {
23143 this,
23144 to: DataType::Timestamp {
23145 timezone: true,
23146 precision: None,
23147 },
23148 double_colon_syntax: false,
23149 trailing_comments: Vec::new(),
23150 format: None,
23151 default: None,
23152 })))
23153 } else {
23154 // Snowflake: CAST(x AS TIMESTAMP)
23155 Ok(Expression::Cast(Box::new(Cast {
23156 this,
23157 to: DataType::Timestamp {
23158 timezone: false,
23159 precision: None,
23160 },
23161 double_colon_syntax: false,
23162 trailing_comments: Vec::new(),
23163 format: None,
23164 default: None,
23165 })))
23166 }
23167 }
23168 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23169 if has_zone {
23170 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23171 // Check for precision from sub-second digits
23172 let precision =
23173 if let Expression::Literal(Literal::String(ref s)) = this {
23174 if let Some(dot_pos) = s.rfind('.') {
23175 let frac = &s[dot_pos + 1..];
23176 let digit_count = frac
23177 .chars()
23178 .take_while(|c| c.is_ascii_digit())
23179 .count();
23180 if digit_count > 0
23181 && matches!(target, DialectType::Trino)
23182 {
23183 Some(digit_count as u32)
23184 } else {
23185 None
23186 }
23187 } else {
23188 None
23189 }
23190 } else {
23191 None
23192 };
23193 let dt = if let Some(prec) = precision {
23194 DataType::Timestamp {
23195 timezone: true,
23196 precision: Some(prec),
23197 }
23198 } else {
23199 DataType::Timestamp {
23200 timezone: true,
23201 precision: None,
23202 }
23203 };
23204 Ok(Expression::Cast(Box::new(Cast {
23205 this,
23206 to: dt,
23207 double_colon_syntax: false,
23208 trailing_comments: Vec::new(),
23209 format: None,
23210 default: None,
23211 })))
23212 } else {
23213 // Check for sub-second precision for Trino
23214 let precision =
23215 if let Expression::Literal(Literal::String(ref s)) = this {
23216 if let Some(dot_pos) = s.rfind('.') {
23217 let frac = &s[dot_pos + 1..];
23218 let digit_count = frac
23219 .chars()
23220 .take_while(|c| c.is_ascii_digit())
23221 .count();
23222 if digit_count > 0
23223 && matches!(target, DialectType::Trino)
23224 {
23225 Some(digit_count as u32)
23226 } else {
23227 None
23228 }
23229 } else {
23230 None
23231 }
23232 } else {
23233 None
23234 };
23235 let dt = DataType::Timestamp {
23236 timezone: false,
23237 precision,
23238 };
23239 Ok(Expression::Cast(Box::new(Cast {
23240 this,
23241 to: dt,
23242 double_colon_syntax: false,
23243 trailing_comments: Vec::new(),
23244 format: None,
23245 default: None,
23246 })))
23247 }
23248 }
23249 DialectType::Redshift => {
23250 if has_zone {
23251 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23252 Ok(Expression::Cast(Box::new(Cast {
23253 this,
23254 to: DataType::Timestamp {
23255 timezone: true,
23256 precision: None,
23257 },
23258 double_colon_syntax: false,
23259 trailing_comments: Vec::new(),
23260 format: None,
23261 default: None,
23262 })))
23263 } else {
23264 // Redshift: CAST(x AS TIMESTAMP)
23265 Ok(Expression::Cast(Box::new(Cast {
23266 this,
23267 to: DataType::Timestamp {
23268 timezone: false,
23269 precision: None,
23270 },
23271 double_colon_syntax: false,
23272 trailing_comments: Vec::new(),
23273 format: None,
23274 default: None,
23275 })))
23276 }
23277 }
23278 _ => {
23279 // Default: CAST(x AS TIMESTAMP)
23280 Ok(Expression::Cast(Box::new(Cast {
23281 this,
23282 to: DataType::Timestamp {
23283 timezone: false,
23284 precision: None,
23285 },
23286 double_colon_syntax: false,
23287 trailing_comments: Vec::new(),
23288 format: None,
23289 default: None,
23290 })))
23291 }
23292 }
23293 } else {
23294 Ok(e)
23295 }
23296 }
23297
23298 Action::DateToDateStrConvert => {
23299 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23300 if let Expression::Function(f) = e {
23301 let arg = f.args.into_iter().next().unwrap();
23302 let str_type = match target {
23303 DialectType::DuckDB => DataType::Text,
23304 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23305 DataType::Custom {
23306 name: "STRING".to_string(),
23307 }
23308 }
23309 DialectType::Presto
23310 | DialectType::Trino
23311 | DialectType::Athena
23312 | DialectType::Drill => DataType::VarChar {
23313 length: None,
23314 parenthesized_length: false,
23315 },
23316 _ => DataType::VarChar {
23317 length: None,
23318 parenthesized_length: false,
23319 },
23320 };
23321 Ok(Expression::Cast(Box::new(Cast {
23322 this: arg,
23323 to: str_type,
23324 double_colon_syntax: false,
23325 trailing_comments: Vec::new(),
23326 format: None,
23327 default: None,
23328 })))
23329 } else {
23330 Ok(e)
23331 }
23332 }
23333
23334 Action::DateToDiConvert => {
23335 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23336 if let Expression::Function(f) = e {
23337 let arg = f.args.into_iter().next().unwrap();
23338 let inner = match target {
23339 DialectType::DuckDB => {
23340 // STRFTIME(x, '%Y%m%d')
23341 Expression::Function(Box::new(Function::new(
23342 "STRFTIME".to_string(),
23343 vec![arg, Expression::string("%Y%m%d")],
23344 )))
23345 }
23346 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23347 // DATE_FORMAT(x, 'yyyyMMdd')
23348 Expression::Function(Box::new(Function::new(
23349 "DATE_FORMAT".to_string(),
23350 vec![arg, Expression::string("yyyyMMdd")],
23351 )))
23352 }
23353 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23354 // DATE_FORMAT(x, '%Y%m%d')
23355 Expression::Function(Box::new(Function::new(
23356 "DATE_FORMAT".to_string(),
23357 vec![arg, Expression::string("%Y%m%d")],
23358 )))
23359 }
23360 DialectType::Drill => {
23361 // TO_DATE(x, 'yyyyMMdd')
23362 Expression::Function(Box::new(Function::new(
23363 "TO_DATE".to_string(),
23364 vec![arg, Expression::string("yyyyMMdd")],
23365 )))
23366 }
23367 _ => {
23368 // Default: STRFTIME(x, '%Y%m%d')
23369 Expression::Function(Box::new(Function::new(
23370 "STRFTIME".to_string(),
23371 vec![arg, Expression::string("%Y%m%d")],
23372 )))
23373 }
23374 };
23375 // Use INT (not INTEGER) for Presto/Trino
23376 let int_type = match target {
23377 DialectType::Presto
23378 | DialectType::Trino
23379 | DialectType::Athena
23380 | DialectType::TSQL
23381 | DialectType::Fabric
23382 | DialectType::SQLite
23383 | DialectType::Redshift => DataType::Custom {
23384 name: "INT".to_string(),
23385 },
23386 _ => DataType::Int {
23387 length: None,
23388 integer_spelling: false,
23389 },
23390 };
23391 Ok(Expression::Cast(Box::new(Cast {
23392 this: inner,
23393 to: int_type,
23394 double_colon_syntax: false,
23395 trailing_comments: Vec::new(),
23396 format: None,
23397 default: None,
23398 })))
23399 } else {
23400 Ok(e)
23401 }
23402 }
23403
23404 Action::DiToDateConvert => {
23405 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23406 if let Expression::Function(f) = e {
23407 let arg = f.args.into_iter().next().unwrap();
23408 match target {
23409 DialectType::DuckDB => {
23410 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23411 let cast_text = Expression::Cast(Box::new(Cast {
23412 this: arg,
23413 to: DataType::Text,
23414 double_colon_syntax: false,
23415 trailing_comments: Vec::new(),
23416 format: None,
23417 default: None,
23418 }));
23419 let strptime = Expression::Function(Box::new(Function::new(
23420 "STRPTIME".to_string(),
23421 vec![cast_text, Expression::string("%Y%m%d")],
23422 )));
23423 Ok(Expression::Cast(Box::new(Cast {
23424 this: strptime,
23425 to: DataType::Date,
23426 double_colon_syntax: false,
23427 trailing_comments: Vec::new(),
23428 format: None,
23429 default: None,
23430 })))
23431 }
23432 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23433 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23434 let cast_str = Expression::Cast(Box::new(Cast {
23435 this: arg,
23436 to: DataType::Custom {
23437 name: "STRING".to_string(),
23438 },
23439 double_colon_syntax: false,
23440 trailing_comments: Vec::new(),
23441 format: None,
23442 default: None,
23443 }));
23444 Ok(Expression::Function(Box::new(Function::new(
23445 "TO_DATE".to_string(),
23446 vec![cast_str, Expression::string("yyyyMMdd")],
23447 ))))
23448 }
23449 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23450 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23451 let cast_varchar = Expression::Cast(Box::new(Cast {
23452 this: arg,
23453 to: DataType::VarChar {
23454 length: None,
23455 parenthesized_length: false,
23456 },
23457 double_colon_syntax: false,
23458 trailing_comments: Vec::new(),
23459 format: None,
23460 default: None,
23461 }));
23462 let date_parse = Expression::Function(Box::new(Function::new(
23463 "DATE_PARSE".to_string(),
23464 vec![cast_varchar, Expression::string("%Y%m%d")],
23465 )));
23466 Ok(Expression::Cast(Box::new(Cast {
23467 this: date_parse,
23468 to: DataType::Date,
23469 double_colon_syntax: false,
23470 trailing_comments: Vec::new(),
23471 format: None,
23472 default: None,
23473 })))
23474 }
23475 DialectType::Drill => {
23476 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23477 let cast_varchar = Expression::Cast(Box::new(Cast {
23478 this: arg,
23479 to: DataType::VarChar {
23480 length: None,
23481 parenthesized_length: false,
23482 },
23483 double_colon_syntax: false,
23484 trailing_comments: Vec::new(),
23485 format: None,
23486 default: None,
23487 }));
23488 Ok(Expression::Function(Box::new(Function::new(
23489 "TO_DATE".to_string(),
23490 vec![cast_varchar, Expression::string("yyyyMMdd")],
23491 ))))
23492 }
23493 _ => Ok(Expression::Function(Box::new(Function::new(
23494 "DI_TO_DATE".to_string(),
23495 vec![arg],
23496 )))),
23497 }
23498 } else {
23499 Ok(e)
23500 }
23501 }
23502
23503 Action::TsOrDiToDiConvert => {
23504 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23505 if let Expression::Function(f) = e {
23506 let arg = f.args.into_iter().next().unwrap();
23507 let str_type = match target {
23508 DialectType::DuckDB => DataType::Text,
23509 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23510 DataType::Custom {
23511 name: "STRING".to_string(),
23512 }
23513 }
23514 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23515 DataType::VarChar {
23516 length: None,
23517 parenthesized_length: false,
23518 }
23519 }
23520 _ => DataType::VarChar {
23521 length: None,
23522 parenthesized_length: false,
23523 },
23524 };
23525 let cast_str = Expression::Cast(Box::new(Cast {
23526 this: arg,
23527 to: str_type,
23528 double_colon_syntax: false,
23529 trailing_comments: Vec::new(),
23530 format: None,
23531 default: None,
23532 }));
23533 let replace_expr = Expression::Function(Box::new(Function::new(
23534 "REPLACE".to_string(),
23535 vec![cast_str, Expression::string("-"), Expression::string("")],
23536 )));
23537 let substr_name = match target {
23538 DialectType::DuckDB
23539 | DialectType::Hive
23540 | DialectType::Spark
23541 | DialectType::Databricks => "SUBSTR",
23542 _ => "SUBSTR",
23543 };
23544 let substr = Expression::Function(Box::new(Function::new(
23545 substr_name.to_string(),
23546 vec![replace_expr, Expression::number(1), Expression::number(8)],
23547 )));
23548 // Use INT (not INTEGER) for Presto/Trino etc.
23549 let int_type = match target {
23550 DialectType::Presto
23551 | DialectType::Trino
23552 | DialectType::Athena
23553 | DialectType::TSQL
23554 | DialectType::Fabric
23555 | DialectType::SQLite
23556 | DialectType::Redshift => DataType::Custom {
23557 name: "INT".to_string(),
23558 },
23559 _ => DataType::Int {
23560 length: None,
23561 integer_spelling: false,
23562 },
23563 };
23564 Ok(Expression::Cast(Box::new(Cast {
23565 this: substr,
23566 to: int_type,
23567 double_colon_syntax: false,
23568 trailing_comments: Vec::new(),
23569 format: None,
23570 default: None,
23571 })))
23572 } else {
23573 Ok(e)
23574 }
23575 }
23576
23577 Action::UnixToStrConvert => {
23578 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23579 if let Expression::Function(f) = e {
23580 let mut args = f.args;
23581 let this = args.remove(0);
23582 let fmt_expr = if !args.is_empty() {
23583 Some(args.remove(0))
23584 } else {
23585 None
23586 };
23587
23588 // Check if format is a string literal
23589 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23590 if let Expression::Literal(Literal::String(s)) = f {
23591 Some(s.clone())
23592 } else {
23593 None
23594 }
23595 });
23596
23597 if let Some(fmt_string) = fmt_str {
23598 // String literal format -> use UnixToStr expression (generator handles it)
23599 Ok(Expression::UnixToStr(Box::new(
23600 crate::expressions::UnixToStr {
23601 this: Box::new(this),
23602 format: Some(fmt_string),
23603 },
23604 )))
23605 } else if let Some(fmt_e) = fmt_expr {
23606 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23607 match target {
23608 DialectType::DuckDB => {
23609 // STRFTIME(TO_TIMESTAMP(x), y)
23610 let to_ts = Expression::Function(Box::new(Function::new(
23611 "TO_TIMESTAMP".to_string(),
23612 vec![this],
23613 )));
23614 Ok(Expression::Function(Box::new(Function::new(
23615 "STRFTIME".to_string(),
23616 vec![to_ts, fmt_e],
23617 ))))
23618 }
23619 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23620 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23621 let from_unix = Expression::Function(Box::new(Function::new(
23622 "FROM_UNIXTIME".to_string(),
23623 vec![this],
23624 )));
23625 Ok(Expression::Function(Box::new(Function::new(
23626 "DATE_FORMAT".to_string(),
23627 vec![from_unix, fmt_e],
23628 ))))
23629 }
23630 DialectType::Hive
23631 | DialectType::Spark
23632 | DialectType::Databricks
23633 | DialectType::Doris
23634 | DialectType::StarRocks => {
23635 // FROM_UNIXTIME(x, y)
23636 Ok(Expression::Function(Box::new(Function::new(
23637 "FROM_UNIXTIME".to_string(),
23638 vec![this, fmt_e],
23639 ))))
23640 }
23641 _ => {
23642 // Default: keep as UNIX_TO_STR(x, y)
23643 Ok(Expression::Function(Box::new(Function::new(
23644 "UNIX_TO_STR".to_string(),
23645 vec![this, fmt_e],
23646 ))))
23647 }
23648 }
23649 } else {
23650 Ok(Expression::UnixToStr(Box::new(
23651 crate::expressions::UnixToStr {
23652 this: Box::new(this),
23653 format: None,
23654 },
23655 )))
23656 }
23657 } else {
23658 Ok(e)
23659 }
23660 }
23661
23662 Action::UnixToTimeConvert => {
23663 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
23664 if let Expression::Function(f) = e {
23665 let arg = f.args.into_iter().next().unwrap();
23666 Ok(Expression::UnixToTime(Box::new(
23667 crate::expressions::UnixToTime {
23668 this: Box::new(arg),
23669 scale: None,
23670 zone: None,
23671 hours: None,
23672 minutes: None,
23673 format: None,
23674 target_type: None,
23675 },
23676 )))
23677 } else {
23678 Ok(e)
23679 }
23680 }
23681
23682 Action::UnixToTimeStrConvert => {
23683 // UNIX_TO_TIME_STR(x) -> dialect-specific
23684 if let Expression::Function(f) = e {
23685 let arg = f.args.into_iter().next().unwrap();
23686 match target {
23687 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23688 // FROM_UNIXTIME(x)
23689 Ok(Expression::Function(Box::new(Function::new(
23690 "FROM_UNIXTIME".to_string(),
23691 vec![arg],
23692 ))))
23693 }
23694 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23695 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
23696 let from_unix = Expression::Function(Box::new(Function::new(
23697 "FROM_UNIXTIME".to_string(),
23698 vec![arg],
23699 )));
23700 Ok(Expression::Cast(Box::new(Cast {
23701 this: from_unix,
23702 to: DataType::VarChar {
23703 length: None,
23704 parenthesized_length: false,
23705 },
23706 double_colon_syntax: false,
23707 trailing_comments: Vec::new(),
23708 format: None,
23709 default: None,
23710 })))
23711 }
23712 DialectType::DuckDB => {
23713 // CAST(TO_TIMESTAMP(x) AS TEXT)
23714 let to_ts = Expression::Function(Box::new(Function::new(
23715 "TO_TIMESTAMP".to_string(),
23716 vec![arg],
23717 )));
23718 Ok(Expression::Cast(Box::new(Cast {
23719 this: to_ts,
23720 to: DataType::Text,
23721 double_colon_syntax: false,
23722 trailing_comments: Vec::new(),
23723 format: None,
23724 default: None,
23725 })))
23726 }
23727 _ => Ok(Expression::Function(Box::new(Function::new(
23728 "UNIX_TO_TIME_STR".to_string(),
23729 vec![arg],
23730 )))),
23731 }
23732 } else {
23733 Ok(e)
23734 }
23735 }
23736
23737 Action::TimeToUnixConvert => {
23738 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
23739 if let Expression::Function(f) = e {
23740 let arg = f.args.into_iter().next().unwrap();
23741 Ok(Expression::TimeToUnix(Box::new(
23742 crate::expressions::UnaryFunc {
23743 this: arg,
23744 original_name: None,
23745 },
23746 )))
23747 } else {
23748 Ok(e)
23749 }
23750 }
23751
23752 Action::TimeToStrConvert => {
23753 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
23754 if let Expression::Function(f) = e {
23755 let mut args = f.args;
23756 let this = args.remove(0);
23757 let fmt = match args.remove(0) {
23758 Expression::Literal(Literal::String(s)) => s,
23759 other => {
23760 return Ok(Expression::Function(Box::new(Function::new(
23761 "TIME_TO_STR".to_string(),
23762 vec![this, other],
23763 ))));
23764 }
23765 };
23766 Ok(Expression::TimeToStr(Box::new(
23767 crate::expressions::TimeToStr {
23768 this: Box::new(this),
23769 format: fmt,
23770 culture: None,
23771 zone: None,
23772 },
23773 )))
23774 } else {
23775 Ok(e)
23776 }
23777 }
23778
23779 Action::StrToUnixConvert => {
23780 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
23781 if let Expression::Function(f) = e {
23782 let mut args = f.args;
23783 let this = args.remove(0);
23784 let fmt = match args.remove(0) {
23785 Expression::Literal(Literal::String(s)) => s,
23786 other => {
23787 return Ok(Expression::Function(Box::new(Function::new(
23788 "STR_TO_UNIX".to_string(),
23789 vec![this, other],
23790 ))));
23791 }
23792 };
23793 Ok(Expression::StrToUnix(Box::new(
23794 crate::expressions::StrToUnix {
23795 this: Some(Box::new(this)),
23796 format: Some(fmt),
23797 },
23798 )))
23799 } else {
23800 Ok(e)
23801 }
23802 }
23803
23804 Action::TimeStrToUnixConvert => {
23805 // TIME_STR_TO_UNIX(x) -> dialect-specific
23806 if let Expression::Function(f) = e {
23807 let arg = f.args.into_iter().next().unwrap();
23808 match target {
23809 DialectType::DuckDB => {
23810 // EPOCH(CAST(x AS TIMESTAMP))
23811 let cast_ts = Expression::Cast(Box::new(Cast {
23812 this: arg,
23813 to: DataType::Timestamp {
23814 timezone: false,
23815 precision: None,
23816 },
23817 double_colon_syntax: false,
23818 trailing_comments: Vec::new(),
23819 format: None,
23820 default: None,
23821 }));
23822 Ok(Expression::Function(Box::new(Function::new(
23823 "EPOCH".to_string(),
23824 vec![cast_ts],
23825 ))))
23826 }
23827 DialectType::Hive
23828 | DialectType::Doris
23829 | DialectType::StarRocks
23830 | DialectType::MySQL => {
23831 // UNIX_TIMESTAMP(x)
23832 Ok(Expression::Function(Box::new(Function::new(
23833 "UNIX_TIMESTAMP".to_string(),
23834 vec![arg],
23835 ))))
23836 }
23837 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23838 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
23839 let date_parse = Expression::Function(Box::new(Function::new(
23840 "DATE_PARSE".to_string(),
23841 vec![arg, Expression::string("%Y-%m-%d %T")],
23842 )));
23843 Ok(Expression::Function(Box::new(Function::new(
23844 "TO_UNIXTIME".to_string(),
23845 vec![date_parse],
23846 ))))
23847 }
23848 _ => Ok(Expression::Function(Box::new(Function::new(
23849 "TIME_STR_TO_UNIX".to_string(),
23850 vec![arg],
23851 )))),
23852 }
23853 } else {
23854 Ok(e)
23855 }
23856 }
23857
23858 Action::TimeToTimeStrConvert => {
23859 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
23860 if let Expression::Function(f) = e {
23861 let arg = f.args.into_iter().next().unwrap();
23862 let str_type = match target {
23863 DialectType::DuckDB => DataType::Text,
23864 DialectType::Hive
23865 | DialectType::Spark
23866 | DialectType::Databricks
23867 | DialectType::Doris
23868 | DialectType::StarRocks => DataType::Custom {
23869 name: "STRING".to_string(),
23870 },
23871 DialectType::Redshift => DataType::Custom {
23872 name: "VARCHAR(MAX)".to_string(),
23873 },
23874 _ => DataType::VarChar {
23875 length: None,
23876 parenthesized_length: false,
23877 },
23878 };
23879 Ok(Expression::Cast(Box::new(Cast {
23880 this: arg,
23881 to: str_type,
23882 double_colon_syntax: false,
23883 trailing_comments: Vec::new(),
23884 format: None,
23885 default: None,
23886 })))
23887 } else {
23888 Ok(e)
23889 }
23890 }
23891
23892 Action::DateTruncSwapArgs => {
23893 // DATE_TRUNC('unit', x) from Generic -> target-specific
23894 if let Expression::Function(f) = e {
23895 if f.args.len() == 2 {
23896 let unit_arg = f.args[0].clone();
23897 let expr_arg = f.args[1].clone();
23898 // Extract unit string from the first arg
23899 let unit_str = match &unit_arg {
23900 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23901 _ => return Ok(Expression::Function(f)),
23902 };
23903 match target {
23904 DialectType::BigQuery => {
23905 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
23906 let unit_ident =
23907 Expression::Column(crate::expressions::Column {
23908 name: crate::expressions::Identifier::new(unit_str),
23909 table: None,
23910 join_mark: false,
23911 trailing_comments: Vec::new(),
23912 span: None,
23913 });
23914 Ok(Expression::Function(Box::new(Function::new(
23915 "DATE_TRUNC".to_string(),
23916 vec![expr_arg, unit_ident],
23917 ))))
23918 }
23919 DialectType::Doris => {
23920 // Doris: DATE_TRUNC(x, 'UNIT')
23921 Ok(Expression::Function(Box::new(Function::new(
23922 "DATE_TRUNC".to_string(),
23923 vec![expr_arg, Expression::string(&unit_str)],
23924 ))))
23925 }
23926 DialectType::StarRocks => {
23927 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
23928 Ok(Expression::Function(Box::new(Function::new(
23929 "DATE_TRUNC".to_string(),
23930 vec![Expression::string(&unit_str), expr_arg],
23931 ))))
23932 }
23933 DialectType::Spark | DialectType::Databricks => {
23934 // Spark: TRUNC(x, 'UNIT')
23935 Ok(Expression::Function(Box::new(Function::new(
23936 "TRUNC".to_string(),
23937 vec![expr_arg, Expression::string(&unit_str)],
23938 ))))
23939 }
23940 DialectType::MySQL => {
23941 // MySQL: complex expansion based on unit
23942 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
23943 }
23944 _ => Ok(Expression::Function(f)),
23945 }
23946 } else {
23947 Ok(Expression::Function(f))
23948 }
23949 } else {
23950 Ok(e)
23951 }
23952 }
23953
23954 Action::TimestampTruncConvert => {
23955 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
23956 if let Expression::Function(f) = e {
23957 if f.args.len() >= 2 {
23958 let expr_arg = f.args[0].clone();
23959 let unit_arg = f.args[1].clone();
23960 let tz_arg = if f.args.len() >= 3 {
23961 Some(f.args[2].clone())
23962 } else {
23963 None
23964 };
23965 // Extract unit string
23966 let unit_str = match &unit_arg {
23967 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23968 Expression::Column(c) => c.name.name.to_uppercase(),
23969 _ => {
23970 return Ok(Expression::Function(f));
23971 }
23972 };
23973 match target {
23974 DialectType::Spark | DialectType::Databricks => {
23975 // Spark: DATE_TRUNC('UNIT', x)
23976 Ok(Expression::Function(Box::new(Function::new(
23977 "DATE_TRUNC".to_string(),
23978 vec![Expression::string(&unit_str), expr_arg],
23979 ))))
23980 }
23981 DialectType::Doris | DialectType::StarRocks => {
23982 // Doris: DATE_TRUNC(x, 'UNIT')
23983 Ok(Expression::Function(Box::new(Function::new(
23984 "DATE_TRUNC".to_string(),
23985 vec![expr_arg, Expression::string(&unit_str)],
23986 ))))
23987 }
23988 DialectType::BigQuery => {
23989 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
23990 let unit_ident =
23991 Expression::Column(crate::expressions::Column {
23992 name: crate::expressions::Identifier::new(unit_str),
23993 table: None,
23994 join_mark: false,
23995 trailing_comments: Vec::new(),
23996 span: None,
23997 });
23998 let mut args = vec![expr_arg, unit_ident];
23999 if let Some(tz) = tz_arg {
24000 args.push(tz);
24001 }
24002 Ok(Expression::Function(Box::new(Function::new(
24003 "TIMESTAMP_TRUNC".to_string(),
24004 args,
24005 ))))
24006 }
24007 DialectType::DuckDB => {
24008 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
24009 if let Some(tz) = tz_arg {
24010 let tz_str = match &tz {
24011 Expression::Literal(Literal::String(s)) => s.clone(),
24012 _ => "UTC".to_string(),
24013 };
24014 // x AT TIME ZONE 'tz'
24015 let at_tz = Expression::AtTimeZone(Box::new(
24016 crate::expressions::AtTimeZone {
24017 this: expr_arg,
24018 zone: Expression::string(&tz_str),
24019 },
24020 ));
24021 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
24022 let trunc = Expression::Function(Box::new(Function::new(
24023 "DATE_TRUNC".to_string(),
24024 vec![Expression::string(&unit_str), at_tz],
24025 )));
24026 // DATE_TRUNC(...) AT TIME ZONE 'tz'
24027 Ok(Expression::AtTimeZone(Box::new(
24028 crate::expressions::AtTimeZone {
24029 this: trunc,
24030 zone: Expression::string(&tz_str),
24031 },
24032 )))
24033 } else {
24034 Ok(Expression::Function(Box::new(Function::new(
24035 "DATE_TRUNC".to_string(),
24036 vec![Expression::string(&unit_str), expr_arg],
24037 ))))
24038 }
24039 }
24040 DialectType::Presto
24041 | DialectType::Trino
24042 | DialectType::Athena
24043 | DialectType::Snowflake => {
24044 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
24045 Ok(Expression::Function(Box::new(Function::new(
24046 "DATE_TRUNC".to_string(),
24047 vec![Expression::string(&unit_str), expr_arg],
24048 ))))
24049 }
24050 _ => {
24051 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
24052 let mut args = vec![Expression::string(&unit_str), expr_arg];
24053 if let Some(tz) = tz_arg {
24054 args.push(tz);
24055 }
24056 Ok(Expression::Function(Box::new(Function::new(
24057 "DATE_TRUNC".to_string(),
24058 args,
24059 ))))
24060 }
24061 }
24062 } else {
24063 Ok(Expression::Function(f))
24064 }
24065 } else {
24066 Ok(e)
24067 }
24068 }
24069
24070 Action::StrToDateConvert => {
24071 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
24072 if let Expression::Function(f) = e {
24073 if f.args.len() == 2 {
24074 let mut args = f.args;
24075 let this = args.remove(0);
24076 let fmt_expr = args.remove(0);
24077 let fmt_str = match &fmt_expr {
24078 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24079 _ => None,
24080 };
24081 let default_date = "%Y-%m-%d";
24082 let default_time = "%Y-%m-%d %H:%M:%S";
24083 let is_default = fmt_str
24084 .as_ref()
24085 .map_or(false, |f| f == default_date || f == default_time);
24086
24087 if is_default {
24088 // Default format: handle per-dialect
24089 match target {
24090 DialectType::MySQL
24091 | DialectType::Doris
24092 | DialectType::StarRocks => {
24093 // Keep STR_TO_DATE(x, fmt) as-is
24094 Ok(Expression::Function(Box::new(Function::new(
24095 "STR_TO_DATE".to_string(),
24096 vec![this, fmt_expr],
24097 ))))
24098 }
24099 DialectType::Hive => {
24100 // Hive: CAST(x AS DATE)
24101 Ok(Expression::Cast(Box::new(Cast {
24102 this,
24103 to: DataType::Date,
24104 double_colon_syntax: false,
24105 trailing_comments: Vec::new(),
24106 format: None,
24107 default: None,
24108 })))
24109 }
24110 DialectType::Presto
24111 | DialectType::Trino
24112 | DialectType::Athena => {
24113 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24114 let date_parse =
24115 Expression::Function(Box::new(Function::new(
24116 "DATE_PARSE".to_string(),
24117 vec![this, fmt_expr],
24118 )));
24119 Ok(Expression::Cast(Box::new(Cast {
24120 this: date_parse,
24121 to: DataType::Date,
24122 double_colon_syntax: false,
24123 trailing_comments: Vec::new(),
24124 format: None,
24125 default: None,
24126 })))
24127 }
24128 _ => {
24129 // Others: TsOrDsToDate (delegates to generator)
24130 Ok(Expression::TsOrDsToDate(Box::new(
24131 crate::expressions::TsOrDsToDate {
24132 this: Box::new(this),
24133 format: None,
24134 safe: None,
24135 },
24136 )))
24137 }
24138 }
24139 } else if let Some(fmt) = fmt_str {
24140 match target {
24141 DialectType::Doris
24142 | DialectType::StarRocks
24143 | DialectType::MySQL => {
24144 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24145 let mut normalized = fmt.clone();
24146 normalized = normalized.replace("%-d", "%e");
24147 normalized = normalized.replace("%-m", "%c");
24148 normalized = normalized.replace("%H:%M:%S", "%T");
24149 Ok(Expression::Function(Box::new(Function::new(
24150 "STR_TO_DATE".to_string(),
24151 vec![this, Expression::string(&normalized)],
24152 ))))
24153 }
24154 DialectType::Hive => {
24155 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24156 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24157 let unix_ts =
24158 Expression::Function(Box::new(Function::new(
24159 "UNIX_TIMESTAMP".to_string(),
24160 vec![this, Expression::string(&java_fmt)],
24161 )));
24162 let from_unix =
24163 Expression::Function(Box::new(Function::new(
24164 "FROM_UNIXTIME".to_string(),
24165 vec![unix_ts],
24166 )));
24167 Ok(Expression::Cast(Box::new(Cast {
24168 this: from_unix,
24169 to: DataType::Date,
24170 double_colon_syntax: false,
24171 trailing_comments: Vec::new(),
24172 format: None,
24173 default: None,
24174 })))
24175 }
24176 DialectType::Spark | DialectType::Databricks => {
24177 // Spark: TO_DATE(x, java_fmt)
24178 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24179 Ok(Expression::Function(Box::new(Function::new(
24180 "TO_DATE".to_string(),
24181 vec![this, Expression::string(&java_fmt)],
24182 ))))
24183 }
24184 DialectType::Drill => {
24185 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24186 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24187 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24188 let java_fmt = java_fmt.replace('T', "'T'");
24189 Ok(Expression::Function(Box::new(Function::new(
24190 "TO_DATE".to_string(),
24191 vec![this, Expression::string(&java_fmt)],
24192 ))))
24193 }
24194 _ => {
24195 // For other dialects: use TsOrDsToDate which delegates to generator
24196 Ok(Expression::TsOrDsToDate(Box::new(
24197 crate::expressions::TsOrDsToDate {
24198 this: Box::new(this),
24199 format: Some(fmt),
24200 safe: None,
24201 },
24202 )))
24203 }
24204 }
24205 } else {
24206 // Non-string format - keep as-is
24207 let mut new_args = Vec::new();
24208 new_args.push(this);
24209 new_args.push(fmt_expr);
24210 Ok(Expression::Function(Box::new(Function::new(
24211 "STR_TO_DATE".to_string(),
24212 new_args,
24213 ))))
24214 }
24215 } else {
24216 Ok(Expression::Function(f))
24217 }
24218 } else {
24219 Ok(e)
24220 }
24221 }
24222
24223 Action::TsOrDsAddConvert => {
24224 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24225 if let Expression::Function(f) = e {
24226 if f.args.len() == 3 {
24227 let mut args = f.args;
24228 let x = args.remove(0);
24229 let n = args.remove(0);
24230 let unit_expr = args.remove(0);
24231 let unit_str = match &unit_expr {
24232 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24233 _ => "DAY".to_string(),
24234 };
24235
24236 match target {
24237 DialectType::Hive
24238 | DialectType::Spark
24239 | DialectType::Databricks => {
24240 // DATE_ADD(x, n) - only supports DAY unit
24241 Ok(Expression::Function(Box::new(Function::new(
24242 "DATE_ADD".to_string(),
24243 vec![x, n],
24244 ))))
24245 }
24246 DialectType::MySQL => {
24247 // DATE_ADD(x, INTERVAL n UNIT)
24248 let iu = match unit_str.to_uppercase().as_str() {
24249 "YEAR" => crate::expressions::IntervalUnit::Year,
24250 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24251 "MONTH" => crate::expressions::IntervalUnit::Month,
24252 "WEEK" => crate::expressions::IntervalUnit::Week,
24253 "HOUR" => crate::expressions::IntervalUnit::Hour,
24254 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24255 "SECOND" => crate::expressions::IntervalUnit::Second,
24256 _ => crate::expressions::IntervalUnit::Day,
24257 };
24258 let interval = Expression::Interval(Box::new(
24259 crate::expressions::Interval {
24260 this: Some(n),
24261 unit: Some(
24262 crate::expressions::IntervalUnitSpec::Simple {
24263 unit: iu,
24264 use_plural: false,
24265 },
24266 ),
24267 },
24268 ));
24269 Ok(Expression::Function(Box::new(Function::new(
24270 "DATE_ADD".to_string(),
24271 vec![x, interval],
24272 ))))
24273 }
24274 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24275 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24276 let cast_ts = Expression::Cast(Box::new(Cast {
24277 this: x,
24278 to: DataType::Timestamp {
24279 precision: None,
24280 timezone: false,
24281 },
24282 double_colon_syntax: false,
24283 trailing_comments: Vec::new(),
24284 format: None,
24285 default: None,
24286 }));
24287 let cast_date = Expression::Cast(Box::new(Cast {
24288 this: cast_ts,
24289 to: DataType::Date,
24290 double_colon_syntax: false,
24291 trailing_comments: Vec::new(),
24292 format: None,
24293 default: None,
24294 }));
24295 Ok(Expression::Function(Box::new(Function::new(
24296 "DATE_ADD".to_string(),
24297 vec![Expression::string(&unit_str), n, cast_date],
24298 ))))
24299 }
24300 DialectType::DuckDB => {
24301 // CAST(x AS DATE) + INTERVAL n UNIT
24302 let cast_date = Expression::Cast(Box::new(Cast {
24303 this: x,
24304 to: DataType::Date,
24305 double_colon_syntax: false,
24306 trailing_comments: Vec::new(),
24307 format: None,
24308 default: None,
24309 }));
24310 let iu = match unit_str.to_uppercase().as_str() {
24311 "YEAR" => crate::expressions::IntervalUnit::Year,
24312 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24313 "MONTH" => crate::expressions::IntervalUnit::Month,
24314 "WEEK" => crate::expressions::IntervalUnit::Week,
24315 "HOUR" => crate::expressions::IntervalUnit::Hour,
24316 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24317 "SECOND" => crate::expressions::IntervalUnit::Second,
24318 _ => crate::expressions::IntervalUnit::Day,
24319 };
24320 let interval = Expression::Interval(Box::new(
24321 crate::expressions::Interval {
24322 this: Some(n),
24323 unit: Some(
24324 crate::expressions::IntervalUnitSpec::Simple {
24325 unit: iu,
24326 use_plural: false,
24327 },
24328 ),
24329 },
24330 ));
24331 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24332 left: cast_date,
24333 right: interval,
24334 left_comments: Vec::new(),
24335 operator_comments: Vec::new(),
24336 trailing_comments: Vec::new(),
24337 })))
24338 }
24339 DialectType::Drill => {
24340 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24341 let cast_date = Expression::Cast(Box::new(Cast {
24342 this: x,
24343 to: DataType::Date,
24344 double_colon_syntax: false,
24345 trailing_comments: Vec::new(),
24346 format: None,
24347 default: None,
24348 }));
24349 let iu = match unit_str.to_uppercase().as_str() {
24350 "YEAR" => crate::expressions::IntervalUnit::Year,
24351 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24352 "MONTH" => crate::expressions::IntervalUnit::Month,
24353 "WEEK" => crate::expressions::IntervalUnit::Week,
24354 "HOUR" => crate::expressions::IntervalUnit::Hour,
24355 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24356 "SECOND" => crate::expressions::IntervalUnit::Second,
24357 _ => crate::expressions::IntervalUnit::Day,
24358 };
24359 let interval = Expression::Interval(Box::new(
24360 crate::expressions::Interval {
24361 this: Some(n),
24362 unit: Some(
24363 crate::expressions::IntervalUnitSpec::Simple {
24364 unit: iu,
24365 use_plural: false,
24366 },
24367 ),
24368 },
24369 ));
24370 Ok(Expression::Function(Box::new(Function::new(
24371 "DATE_ADD".to_string(),
24372 vec![cast_date, interval],
24373 ))))
24374 }
24375 _ => {
24376 // Default: keep as TS_OR_DS_ADD
24377 Ok(Expression::Function(Box::new(Function::new(
24378 "TS_OR_DS_ADD".to_string(),
24379 vec![x, n, unit_expr],
24380 ))))
24381 }
24382 }
24383 } else {
24384 Ok(Expression::Function(f))
24385 }
24386 } else {
24387 Ok(e)
24388 }
24389 }
24390
24391 Action::DateFromUnixDateConvert => {
24392 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24393 if let Expression::Function(f) = e {
24394 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24395 if matches!(
24396 target,
24397 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24398 ) {
24399 return Ok(Expression::Function(Box::new(Function::new(
24400 "DATE_FROM_UNIX_DATE".to_string(),
24401 f.args,
24402 ))));
24403 }
24404 let n = f.args.into_iter().next().unwrap();
24405 let epoch_date = Expression::Cast(Box::new(Cast {
24406 this: Expression::string("1970-01-01"),
24407 to: DataType::Date,
24408 double_colon_syntax: false,
24409 trailing_comments: Vec::new(),
24410 format: None,
24411 default: None,
24412 }));
24413 match target {
24414 DialectType::DuckDB => {
24415 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24416 let interval =
24417 Expression::Interval(Box::new(crate::expressions::Interval {
24418 this: Some(n),
24419 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24420 unit: crate::expressions::IntervalUnit::Day,
24421 use_plural: false,
24422 }),
24423 }));
24424 Ok(Expression::Add(Box::new(
24425 crate::expressions::BinaryOp::new(epoch_date, interval),
24426 )))
24427 }
24428 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24429 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24430 Ok(Expression::Function(Box::new(Function::new(
24431 "DATE_ADD".to_string(),
24432 vec![Expression::string("DAY"), n, epoch_date],
24433 ))))
24434 }
24435 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24436 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24437 Ok(Expression::Function(Box::new(Function::new(
24438 "DATEADD".to_string(),
24439 vec![
24440 Expression::Identifier(Identifier::new("DAY")),
24441 n,
24442 epoch_date,
24443 ],
24444 ))))
24445 }
24446 DialectType::BigQuery => {
24447 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24448 let interval =
24449 Expression::Interval(Box::new(crate::expressions::Interval {
24450 this: Some(n),
24451 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24452 unit: crate::expressions::IntervalUnit::Day,
24453 use_plural: false,
24454 }),
24455 }));
24456 Ok(Expression::Function(Box::new(Function::new(
24457 "DATE_ADD".to_string(),
24458 vec![epoch_date, interval],
24459 ))))
24460 }
24461 DialectType::MySQL
24462 | DialectType::Doris
24463 | DialectType::StarRocks
24464 | DialectType::Drill => {
24465 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24466 let interval =
24467 Expression::Interval(Box::new(crate::expressions::Interval {
24468 this: Some(n),
24469 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24470 unit: crate::expressions::IntervalUnit::Day,
24471 use_plural: false,
24472 }),
24473 }));
24474 Ok(Expression::Function(Box::new(Function::new(
24475 "DATE_ADD".to_string(),
24476 vec![epoch_date, interval],
24477 ))))
24478 }
24479 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24480 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24481 Ok(Expression::Function(Box::new(Function::new(
24482 "DATE_ADD".to_string(),
24483 vec![epoch_date, n],
24484 ))))
24485 }
24486 DialectType::PostgreSQL
24487 | DialectType::Materialize
24488 | DialectType::RisingWave => {
24489 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24490 let n_str = match &n {
24491 Expression::Literal(Literal::Number(s)) => s.clone(),
24492 _ => Self::expr_to_string_static(&n),
24493 };
24494 let interval =
24495 Expression::Interval(Box::new(crate::expressions::Interval {
24496 this: Some(Expression::string(&format!("{} DAY", n_str))),
24497 unit: None,
24498 }));
24499 Ok(Expression::Add(Box::new(
24500 crate::expressions::BinaryOp::new(epoch_date, interval),
24501 )))
24502 }
24503 _ => {
24504 // Default: keep as-is
24505 Ok(Expression::Function(Box::new(Function::new(
24506 "DATE_FROM_UNIX_DATE".to_string(),
24507 vec![n],
24508 ))))
24509 }
24510 }
24511 } else {
24512 Ok(e)
24513 }
24514 }
24515
24516 Action::ArrayRemoveConvert => {
24517 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24518 if let Expression::ArrayRemove(bf) = e {
24519 let arr = bf.this;
24520 let target_val = bf.expression;
24521 match target {
24522 DialectType::DuckDB => {
24523 let u_id = crate::expressions::Identifier::new("_u");
24524 let lambda =
24525 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24526 parameters: vec![u_id.clone()],
24527 body: Expression::Neq(Box::new(BinaryOp {
24528 left: Expression::Identifier(u_id),
24529 right: target_val,
24530 left_comments: Vec::new(),
24531 operator_comments: Vec::new(),
24532 trailing_comments: Vec::new(),
24533 })),
24534 colon: false,
24535 parameter_types: Vec::new(),
24536 }));
24537 Ok(Expression::Function(Box::new(Function::new(
24538 "LIST_FILTER".to_string(),
24539 vec![arr, lambda],
24540 ))))
24541 }
24542 DialectType::ClickHouse => {
24543 let u_id = crate::expressions::Identifier::new("_u");
24544 let lambda =
24545 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24546 parameters: vec![u_id.clone()],
24547 body: Expression::Neq(Box::new(BinaryOp {
24548 left: Expression::Identifier(u_id),
24549 right: target_val,
24550 left_comments: Vec::new(),
24551 operator_comments: Vec::new(),
24552 trailing_comments: Vec::new(),
24553 })),
24554 colon: false,
24555 parameter_types: Vec::new(),
24556 }));
24557 Ok(Expression::Function(Box::new(Function::new(
24558 "arrayFilter".to_string(),
24559 vec![lambda, arr],
24560 ))))
24561 }
24562 DialectType::BigQuery => {
24563 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24564 let u_id = crate::expressions::Identifier::new("_u");
24565 let u_col = Expression::Column(crate::expressions::Column {
24566 name: u_id.clone(),
24567 table: None,
24568 join_mark: false,
24569 trailing_comments: Vec::new(),
24570 span: None,
24571 });
24572 let unnest_expr =
24573 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24574 this: arr,
24575 expressions: Vec::new(),
24576 with_ordinality: false,
24577 alias: None,
24578 offset_alias: None,
24579 }));
24580 let aliased_unnest =
24581 Expression::Alias(Box::new(crate::expressions::Alias {
24582 this: unnest_expr,
24583 alias: u_id.clone(),
24584 column_aliases: Vec::new(),
24585 pre_alias_comments: Vec::new(),
24586 trailing_comments: Vec::new(),
24587 }));
24588 let where_cond = Expression::Neq(Box::new(BinaryOp {
24589 left: u_col.clone(),
24590 right: target_val,
24591 left_comments: Vec::new(),
24592 operator_comments: Vec::new(),
24593 trailing_comments: Vec::new(),
24594 }));
24595 let subquery = Expression::Select(Box::new(
24596 crate::expressions::Select::new()
24597 .column(u_col)
24598 .from(aliased_unnest)
24599 .where_(where_cond),
24600 ));
24601 Ok(Expression::ArrayFunc(Box::new(
24602 crate::expressions::ArrayConstructor {
24603 expressions: vec![subquery],
24604 bracket_notation: false,
24605 use_list_keyword: false,
24606 },
24607 )))
24608 }
24609 _ => Ok(Expression::ArrayRemove(Box::new(
24610 crate::expressions::BinaryFunc {
24611 original_name: None,
24612 this: arr,
24613 expression: target_val,
24614 },
24615 ))),
24616 }
24617 } else {
24618 Ok(e)
24619 }
24620 }
24621
24622 Action::ArrayReverseConvert => {
24623 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
24624 if let Expression::ArrayReverse(af) = e {
24625 Ok(Expression::Function(Box::new(Function::new(
24626 "arrayReverse".to_string(),
24627 vec![af.this],
24628 ))))
24629 } else {
24630 Ok(e)
24631 }
24632 }
24633
24634 Action::JsonKeysConvert => {
24635 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
24636 if let Expression::JsonKeys(uf) = e {
24637 match target {
24638 DialectType::Spark | DialectType::Databricks => {
24639 Ok(Expression::Function(Box::new(Function::new(
24640 "JSON_OBJECT_KEYS".to_string(),
24641 vec![uf.this],
24642 ))))
24643 }
24644 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24645 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
24646 ))),
24647 _ => Ok(Expression::JsonKeys(uf)),
24648 }
24649 } else {
24650 Ok(e)
24651 }
24652 }
24653
24654 Action::ParseJsonStrip => {
24655 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
24656 if let Expression::ParseJson(uf) = e {
24657 Ok(uf.this)
24658 } else {
24659 Ok(e)
24660 }
24661 }
24662
24663 Action::ArraySizeDrill => {
24664 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
24665 if let Expression::ArraySize(uf) = e {
24666 Ok(Expression::Function(Box::new(Function::new(
24667 "REPEATED_COUNT".to_string(),
24668 vec![uf.this],
24669 ))))
24670 } else {
24671 Ok(e)
24672 }
24673 }
24674
24675 Action::WeekOfYearToWeekIso => {
24676 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
24677 if let Expression::WeekOfYear(uf) = e {
24678 Ok(Expression::Function(Box::new(Function::new(
24679 "WEEKISO".to_string(),
24680 vec![uf.this],
24681 ))))
24682 } else {
24683 Ok(e)
24684 }
24685 }
24686 }
24687 })
24688 }
24689
24690 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
24691 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
24692 use crate::expressions::Function;
24693 match unit {
24694 "DAY" => {
24695 // DATE(x)
24696 Ok(Expression::Function(Box::new(Function::new(
24697 "DATE".to_string(),
24698 vec![expr.clone()],
24699 ))))
24700 }
24701 "WEEK" => {
24702 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
24703 let year_x = Expression::Function(Box::new(Function::new(
24704 "YEAR".to_string(),
24705 vec![expr.clone()],
24706 )));
24707 let week_x = Expression::Function(Box::new(Function::new(
24708 "WEEK".to_string(),
24709 vec![expr.clone(), Expression::number(1)],
24710 )));
24711 let concat_args = vec![
24712 year_x,
24713 Expression::string(" "),
24714 week_x,
24715 Expression::string(" 1"),
24716 ];
24717 let concat = Expression::Function(Box::new(Function::new(
24718 "CONCAT".to_string(),
24719 concat_args,
24720 )));
24721 Ok(Expression::Function(Box::new(Function::new(
24722 "STR_TO_DATE".to_string(),
24723 vec![concat, Expression::string("%Y %u %w")],
24724 ))))
24725 }
24726 "MONTH" => {
24727 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
24728 let year_x = Expression::Function(Box::new(Function::new(
24729 "YEAR".to_string(),
24730 vec![expr.clone()],
24731 )));
24732 let month_x = Expression::Function(Box::new(Function::new(
24733 "MONTH".to_string(),
24734 vec![expr.clone()],
24735 )));
24736 let concat_args = vec![
24737 year_x,
24738 Expression::string(" "),
24739 month_x,
24740 Expression::string(" 1"),
24741 ];
24742 let concat = Expression::Function(Box::new(Function::new(
24743 "CONCAT".to_string(),
24744 concat_args,
24745 )));
24746 Ok(Expression::Function(Box::new(Function::new(
24747 "STR_TO_DATE".to_string(),
24748 vec![concat, Expression::string("%Y %c %e")],
24749 ))))
24750 }
24751 "QUARTER" => {
24752 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
24753 let year_x = Expression::Function(Box::new(Function::new(
24754 "YEAR".to_string(),
24755 vec![expr.clone()],
24756 )));
24757 let quarter_x = Expression::Function(Box::new(Function::new(
24758 "QUARTER".to_string(),
24759 vec![expr.clone()],
24760 )));
24761 // QUARTER(x) * 3 - 2
24762 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
24763 left: quarter_x,
24764 right: Expression::number(3),
24765 left_comments: Vec::new(),
24766 operator_comments: Vec::new(),
24767 trailing_comments: Vec::new(),
24768 }));
24769 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
24770 left: mul,
24771 right: Expression::number(2),
24772 left_comments: Vec::new(),
24773 operator_comments: Vec::new(),
24774 trailing_comments: Vec::new(),
24775 }));
24776 let concat_args = vec![
24777 year_x,
24778 Expression::string(" "),
24779 sub,
24780 Expression::string(" 1"),
24781 ];
24782 let concat = Expression::Function(Box::new(Function::new(
24783 "CONCAT".to_string(),
24784 concat_args,
24785 )));
24786 Ok(Expression::Function(Box::new(Function::new(
24787 "STR_TO_DATE".to_string(),
24788 vec![concat, Expression::string("%Y %c %e")],
24789 ))))
24790 }
24791 "YEAR" => {
24792 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
24793 let year_x = Expression::Function(Box::new(Function::new(
24794 "YEAR".to_string(),
24795 vec![expr.clone()],
24796 )));
24797 let concat_args = vec![year_x, Expression::string(" 1 1")];
24798 let concat = Expression::Function(Box::new(Function::new(
24799 "CONCAT".to_string(),
24800 concat_args,
24801 )));
24802 Ok(Expression::Function(Box::new(Function::new(
24803 "STR_TO_DATE".to_string(),
24804 vec![concat, Expression::string("%Y %c %e")],
24805 ))))
24806 }
24807 _ => {
24808 // Unsupported unit -> keep as DATE_TRUNC
24809 Ok(Expression::Function(Box::new(Function::new(
24810 "DATE_TRUNC".to_string(),
24811 vec![Expression::string(unit), expr.clone()],
24812 ))))
24813 }
24814 }
24815 }
24816
24817 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
24818 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
24819 use crate::expressions::DataType;
24820 match dt {
24821 DataType::VarChar { .. } | DataType::Char { .. } => true,
24822 DataType::Struct { fields, .. } => fields
24823 .iter()
24824 .any(|f| Self::has_varchar_char_type(&f.data_type)),
24825 _ => false,
24826 }
24827 }
24828
24829 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
24830 fn normalize_varchar_to_string(
24831 dt: crate::expressions::DataType,
24832 ) -> crate::expressions::DataType {
24833 use crate::expressions::DataType;
24834 match dt {
24835 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
24836 name: "STRING".to_string(),
24837 },
24838 DataType::Struct { fields, nested } => {
24839 let fields = fields
24840 .into_iter()
24841 .map(|mut f| {
24842 f.data_type = Self::normalize_varchar_to_string(f.data_type);
24843 f
24844 })
24845 .collect();
24846 DataType::Struct { fields, nested }
24847 }
24848 other => other,
24849 }
24850 }
24851
24852 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
24853 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
24854 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
24855 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
24856 let trimmed = s.trim();
24857
24858 // Find where digits end and unit text begins
24859 let digit_end = trimmed
24860 .find(|c: char| !c.is_ascii_digit())
24861 .unwrap_or(trimmed.len());
24862 if digit_end == 0 || digit_end == trimmed.len() {
24863 return expr;
24864 }
24865 let num = &trimmed[..digit_end];
24866 let unit_text = trimmed[digit_end..].trim().to_uppercase();
24867 if unit_text.is_empty() {
24868 return expr;
24869 }
24870
24871 let known_units = [
24872 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
24873 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
24874 ];
24875 if !known_units.contains(&unit_text.as_str()) {
24876 return expr;
24877 }
24878
24879 let unit_str = unit_text.clone();
24880 // Singularize
24881 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
24882 &unit_str[..unit_str.len() - 1]
24883 } else {
24884 &unit_str
24885 };
24886 let unit = unit_singular;
24887
24888 match target {
24889 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24890 // INTERVAL '2' DAY
24891 let iu = match unit {
24892 "DAY" => crate::expressions::IntervalUnit::Day,
24893 "HOUR" => crate::expressions::IntervalUnit::Hour,
24894 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24895 "SECOND" => crate::expressions::IntervalUnit::Second,
24896 "WEEK" => crate::expressions::IntervalUnit::Week,
24897 "MONTH" => crate::expressions::IntervalUnit::Month,
24898 "YEAR" => crate::expressions::IntervalUnit::Year,
24899 _ => return expr,
24900 };
24901 return Expression::Interval(Box::new(crate::expressions::Interval {
24902 this: Some(Expression::string(num)),
24903 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24904 unit: iu,
24905 use_plural: false,
24906 }),
24907 }));
24908 }
24909 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
24910 // INTERVAL '2 DAYS'
24911 let plural = if num != "1" && !unit_str.ends_with('S') {
24912 format!("{} {}S", num, unit)
24913 } else if unit_str.ends_with('S') {
24914 format!("{} {}", num, unit_str)
24915 } else {
24916 format!("{} {}", num, unit)
24917 };
24918 return Expression::Interval(Box::new(crate::expressions::Interval {
24919 this: Some(Expression::string(&plural)),
24920 unit: None,
24921 }));
24922 }
24923 _ => {
24924 // Spark/Databricks/Hive: INTERVAL '1' DAY
24925 let iu = match unit {
24926 "DAY" => crate::expressions::IntervalUnit::Day,
24927 "HOUR" => crate::expressions::IntervalUnit::Hour,
24928 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24929 "SECOND" => crate::expressions::IntervalUnit::Second,
24930 "WEEK" => crate::expressions::IntervalUnit::Week,
24931 "MONTH" => crate::expressions::IntervalUnit::Month,
24932 "YEAR" => crate::expressions::IntervalUnit::Year,
24933 _ => return expr,
24934 };
24935 return Expression::Interval(Box::new(crate::expressions::Interval {
24936 this: Some(Expression::string(num)),
24937 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24938 unit: iu,
24939 use_plural: false,
24940 }),
24941 }));
24942 }
24943 }
24944 }
24945 // If it's already an INTERVAL expression, pass through
24946 expr
24947 }
24948
24949 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
24950 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
24951 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
24952 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
24953 fn rewrite_unnest_expansion(
24954 select: &crate::expressions::Select,
24955 target: DialectType,
24956 ) -> Option<crate::expressions::Select> {
24957 use crate::expressions::{
24958 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
24959 UnnestFunc,
24960 };
24961
24962 let index_offset: i64 = match target {
24963 DialectType::Presto | DialectType::Trino => 1,
24964 _ => 0, // BigQuery, Snowflake
24965 };
24966
24967 let if_func_name = match target {
24968 DialectType::Snowflake => "IFF",
24969 _ => "IF",
24970 };
24971
24972 let array_length_func = match target {
24973 DialectType::BigQuery => "ARRAY_LENGTH",
24974 DialectType::Presto | DialectType::Trino => "CARDINALITY",
24975 DialectType::Snowflake => "ARRAY_SIZE",
24976 _ => "ARRAY_LENGTH",
24977 };
24978
24979 let use_table_aliases = matches!(
24980 target,
24981 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
24982 );
24983 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
24984
24985 fn make_col(name: &str, table: Option<&str>) -> Expression {
24986 if let Some(tbl) = table {
24987 Expression::Column(Column {
24988 name: Identifier::new(name.to_string()),
24989 table: Some(Identifier::new(tbl.to_string())),
24990 join_mark: false,
24991 trailing_comments: Vec::new(),
24992 span: None,
24993 })
24994 } else {
24995 Expression::Identifier(Identifier::new(name.to_string()))
24996 }
24997 }
24998
24999 fn make_join(this: Expression) -> Join {
25000 Join {
25001 this,
25002 on: None,
25003 using: Vec::new(),
25004 kind: JoinKind::Cross,
25005 use_inner_keyword: false,
25006 use_outer_keyword: false,
25007 deferred_condition: false,
25008 join_hint: None,
25009 match_condition: None,
25010 pivots: Vec::new(),
25011 comments: Vec::new(),
25012 nesting_group: 0,
25013 directed: false,
25014 }
25015 }
25016
25017 // Collect UNNEST info from SELECT expressions
25018 struct UnnestInfo {
25019 arr_expr: Expression,
25020 col_alias: String,
25021 pos_alias: String,
25022 source_alias: String,
25023 original_expr: Expression,
25024 has_outer_alias: Option<String>,
25025 }
25026
25027 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
25028 let mut col_counter = 0usize;
25029 let mut pos_counter = 1usize;
25030 let mut source_counter = 1usize;
25031
25032 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
25033 match expr {
25034 Expression::Unnest(u) => Some(u.this.clone()),
25035 Expression::Function(f)
25036 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
25037 {
25038 Some(f.args[0].clone())
25039 }
25040 Expression::Alias(a) => extract_unnest_arg(&a.this),
25041 Expression::Add(op)
25042 | Expression::Sub(op)
25043 | Expression::Mul(op)
25044 | Expression::Div(op) => {
25045 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
25046 }
25047 _ => None,
25048 }
25049 }
25050
25051 fn get_alias_name(expr: &Expression) -> Option<String> {
25052 if let Expression::Alias(a) = expr {
25053 Some(a.alias.name.clone())
25054 } else {
25055 None
25056 }
25057 }
25058
25059 for sel_expr in &select.expressions {
25060 if let Some(arr) = extract_unnest_arg(sel_expr) {
25061 col_counter += 1;
25062 pos_counter += 1;
25063 source_counter += 1;
25064
25065 let col_alias = if col_counter == 1 {
25066 "col".to_string()
25067 } else {
25068 format!("col_{}", col_counter)
25069 };
25070 let pos_alias = format!("pos_{}", pos_counter);
25071 let source_alias = format!("_u_{}", source_counter);
25072 let has_outer_alias = get_alias_name(sel_expr);
25073
25074 unnest_infos.push(UnnestInfo {
25075 arr_expr: arr,
25076 col_alias,
25077 pos_alias,
25078 source_alias,
25079 original_expr: sel_expr.clone(),
25080 has_outer_alias,
25081 });
25082 }
25083 }
25084
25085 if unnest_infos.is_empty() {
25086 return None;
25087 }
25088
25089 let series_alias = "pos".to_string();
25090 let series_source_alias = "_u".to_string();
25091 let tbl_ref = if use_table_aliases {
25092 Some(series_source_alias.as_str())
25093 } else {
25094 None
25095 };
25096
25097 // Build new SELECT expressions
25098 let mut new_select_exprs = Vec::new();
25099 for info in &unnest_infos {
25100 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25101 let src_ref = if use_table_aliases {
25102 Some(info.source_alias.as_str())
25103 } else {
25104 None
25105 };
25106
25107 let pos_col = make_col(&series_alias, tbl_ref);
25108 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25109 let col_ref = make_col(actual_col_name, src_ref);
25110
25111 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25112 pos_col.clone(),
25113 unnest_pos_col.clone(),
25114 )));
25115 let mut if_args = vec![eq_cond, col_ref];
25116 if null_third_arg {
25117 if_args.push(Expression::Null(crate::expressions::Null));
25118 }
25119
25120 let if_expr =
25121 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25122 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25123
25124 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25125 final_expr,
25126 Identifier::new(actual_col_name.clone()),
25127 ))));
25128 }
25129
25130 // Build array size expressions for GREATEST
25131 let size_exprs: Vec<Expression> = unnest_infos
25132 .iter()
25133 .map(|info| {
25134 Expression::Function(Box::new(Function::new(
25135 array_length_func.to_string(),
25136 vec![info.arr_expr.clone()],
25137 )))
25138 })
25139 .collect();
25140
25141 let greatest =
25142 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25143
25144 let series_end = if index_offset == 0 {
25145 Expression::Sub(Box::new(BinaryOp::new(
25146 greatest,
25147 Expression::Literal(Literal::Number("1".to_string())),
25148 )))
25149 } else {
25150 greatest
25151 };
25152
25153 // Build the position array source
25154 let series_unnest_expr = match target {
25155 DialectType::BigQuery => {
25156 let gen_array = Expression::Function(Box::new(Function::new(
25157 "GENERATE_ARRAY".to_string(),
25158 vec![
25159 Expression::Literal(Literal::Number("0".to_string())),
25160 series_end,
25161 ],
25162 )));
25163 Expression::Unnest(Box::new(UnnestFunc {
25164 this: gen_array,
25165 expressions: Vec::new(),
25166 with_ordinality: false,
25167 alias: None,
25168 offset_alias: None,
25169 }))
25170 }
25171 DialectType::Presto | DialectType::Trino => {
25172 let sequence = Expression::Function(Box::new(Function::new(
25173 "SEQUENCE".to_string(),
25174 vec![
25175 Expression::Literal(Literal::Number("1".to_string())),
25176 series_end,
25177 ],
25178 )));
25179 Expression::Unnest(Box::new(UnnestFunc {
25180 this: sequence,
25181 expressions: Vec::new(),
25182 with_ordinality: false,
25183 alias: None,
25184 offset_alias: None,
25185 }))
25186 }
25187 DialectType::Snowflake => {
25188 let range_end = Expression::Add(Box::new(BinaryOp::new(
25189 Expression::Paren(Box::new(crate::expressions::Paren {
25190 this: series_end,
25191 trailing_comments: Vec::new(),
25192 })),
25193 Expression::Literal(Literal::Number("1".to_string())),
25194 )));
25195 let gen_range = Expression::Function(Box::new(Function::new(
25196 "ARRAY_GENERATE_RANGE".to_string(),
25197 vec![
25198 Expression::Literal(Literal::Number("0".to_string())),
25199 range_end,
25200 ],
25201 )));
25202 let flatten_arg =
25203 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25204 name: Identifier::new("INPUT".to_string()),
25205 value: gen_range,
25206 separator: crate::expressions::NamedArgSeparator::DArrow,
25207 }));
25208 let flatten = Expression::Function(Box::new(Function::new(
25209 "FLATTEN".to_string(),
25210 vec![flatten_arg],
25211 )));
25212 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25213 }
25214 _ => return None,
25215 };
25216
25217 // Build series alias expression
25218 let series_alias_expr = if use_table_aliases {
25219 let col_aliases = if matches!(target, DialectType::Snowflake) {
25220 vec![
25221 Identifier::new("seq".to_string()),
25222 Identifier::new("key".to_string()),
25223 Identifier::new("path".to_string()),
25224 Identifier::new("index".to_string()),
25225 Identifier::new(series_alias.clone()),
25226 Identifier::new("this".to_string()),
25227 ]
25228 } else {
25229 vec![Identifier::new(series_alias.clone())]
25230 };
25231 Expression::Alias(Box::new(Alias {
25232 this: series_unnest_expr,
25233 alias: Identifier::new(series_source_alias.clone()),
25234 column_aliases: col_aliases,
25235 pre_alias_comments: Vec::new(),
25236 trailing_comments: Vec::new(),
25237 }))
25238 } else {
25239 Expression::Alias(Box::new(Alias::new(
25240 series_unnest_expr,
25241 Identifier::new(series_alias.clone()),
25242 )))
25243 };
25244
25245 // Build CROSS JOINs for each UNNEST
25246 let mut joins = Vec::new();
25247 for info in &unnest_infos {
25248 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25249
25250 let unnest_join_expr = match target {
25251 DialectType::BigQuery => {
25252 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25253 let unnest = UnnestFunc {
25254 this: info.arr_expr.clone(),
25255 expressions: Vec::new(),
25256 with_ordinality: true,
25257 alias: Some(Identifier::new(actual_col_name.clone())),
25258 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25259 };
25260 Expression::Unnest(Box::new(unnest))
25261 }
25262 DialectType::Presto | DialectType::Trino => {
25263 let unnest = UnnestFunc {
25264 this: info.arr_expr.clone(),
25265 expressions: Vec::new(),
25266 with_ordinality: true,
25267 alias: None,
25268 offset_alias: None,
25269 };
25270 Expression::Alias(Box::new(Alias {
25271 this: Expression::Unnest(Box::new(unnest)),
25272 alias: Identifier::new(info.source_alias.clone()),
25273 column_aliases: vec![
25274 Identifier::new(actual_col_name.clone()),
25275 Identifier::new(info.pos_alias.clone()),
25276 ],
25277 pre_alias_comments: Vec::new(),
25278 trailing_comments: Vec::new(),
25279 }))
25280 }
25281 DialectType::Snowflake => {
25282 let flatten_arg =
25283 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25284 name: Identifier::new("INPUT".to_string()),
25285 value: info.arr_expr.clone(),
25286 separator: crate::expressions::NamedArgSeparator::DArrow,
25287 }));
25288 let flatten = Expression::Function(Box::new(Function::new(
25289 "FLATTEN".to_string(),
25290 vec![flatten_arg],
25291 )));
25292 let table_fn = Expression::Function(Box::new(Function::new(
25293 "TABLE".to_string(),
25294 vec![flatten],
25295 )));
25296 Expression::Alias(Box::new(Alias {
25297 this: table_fn,
25298 alias: Identifier::new(info.source_alias.clone()),
25299 column_aliases: vec![
25300 Identifier::new("seq".to_string()),
25301 Identifier::new("key".to_string()),
25302 Identifier::new("path".to_string()),
25303 Identifier::new(info.pos_alias.clone()),
25304 Identifier::new(actual_col_name.clone()),
25305 Identifier::new("this".to_string()),
25306 ],
25307 pre_alias_comments: Vec::new(),
25308 trailing_comments: Vec::new(),
25309 }))
25310 }
25311 _ => return None,
25312 };
25313
25314 joins.push(make_join(unnest_join_expr));
25315 }
25316
25317 // Build WHERE clause
25318 let mut where_conditions: Vec<Expression> = Vec::new();
25319 for info in &unnest_infos {
25320 let src_ref = if use_table_aliases {
25321 Some(info.source_alias.as_str())
25322 } else {
25323 None
25324 };
25325 let pos_col = make_col(&series_alias, tbl_ref);
25326 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25327
25328 let arr_size = Expression::Function(Box::new(Function::new(
25329 array_length_func.to_string(),
25330 vec![info.arr_expr.clone()],
25331 )));
25332
25333 let size_ref = if index_offset == 0 {
25334 Expression::Paren(Box::new(crate::expressions::Paren {
25335 this: Expression::Sub(Box::new(BinaryOp::new(
25336 arr_size,
25337 Expression::Literal(Literal::Number("1".to_string())),
25338 ))),
25339 trailing_comments: Vec::new(),
25340 }))
25341 } else {
25342 arr_size
25343 };
25344
25345 let eq = Expression::Eq(Box::new(BinaryOp::new(
25346 pos_col.clone(),
25347 unnest_pos_col.clone(),
25348 )));
25349 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25350 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25351 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25352 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25353 this: and_cond,
25354 trailing_comments: Vec::new(),
25355 }));
25356 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25357
25358 where_conditions.push(or_cond);
25359 }
25360
25361 let where_expr = if where_conditions.len() == 1 {
25362 // Single condition: no parens needed
25363 where_conditions.into_iter().next().unwrap()
25364 } else {
25365 // Multiple conditions: wrap each OR in parens, then combine with AND
25366 let wrap = |e: Expression| {
25367 Expression::Paren(Box::new(crate::expressions::Paren {
25368 this: e,
25369 trailing_comments: Vec::new(),
25370 }))
25371 };
25372 let mut iter = where_conditions.into_iter();
25373 let first = wrap(iter.next().unwrap());
25374 let second = wrap(iter.next().unwrap());
25375 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25376 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25377 trailing_comments: Vec::new(),
25378 }));
25379 for cond in iter {
25380 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25381 }
25382 combined
25383 };
25384
25385 // Build the new SELECT
25386 let mut new_select = select.clone();
25387 new_select.expressions = new_select_exprs;
25388
25389 if new_select.from.is_some() {
25390 let mut all_joins = vec![make_join(series_alias_expr)];
25391 all_joins.extend(joins);
25392 new_select.joins.extend(all_joins);
25393 } else {
25394 new_select.from = Some(From {
25395 expressions: vec![series_alias_expr],
25396 });
25397 new_select.joins.extend(joins);
25398 }
25399
25400 if let Some(ref existing_where) = new_select.where_clause {
25401 let combined = Expression::And(Box::new(BinaryOp::new(
25402 existing_where.this.clone(),
25403 where_expr,
25404 )));
25405 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25406 } else {
25407 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25408 }
25409
25410 Some(new_select)
25411 }
25412
25413 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25414 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25415 match original {
25416 Expression::Unnest(_) => replacement.clone(),
25417 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25418 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25419 Expression::Add(op) => {
25420 let left = Self::replace_unnest_with_if(&op.left, replacement);
25421 let right = Self::replace_unnest_with_if(&op.right, replacement);
25422 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25423 }
25424 Expression::Sub(op) => {
25425 let left = Self::replace_unnest_with_if(&op.left, replacement);
25426 let right = Self::replace_unnest_with_if(&op.right, replacement);
25427 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25428 }
25429 Expression::Mul(op) => {
25430 let left = Self::replace_unnest_with_if(&op.left, replacement);
25431 let right = Self::replace_unnest_with_if(&op.right, replacement);
25432 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25433 }
25434 Expression::Div(op) => {
25435 let left = Self::replace_unnest_with_if(&op.left, replacement);
25436 let right = Self::replace_unnest_with_if(&op.right, replacement);
25437 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25438 }
25439 _ => original.clone(),
25440 }
25441 }
25442
25443 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25444 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25445 fn decompose_json_path(path: &str) -> Vec<String> {
25446 let mut parts = Vec::new();
25447 let path = if path.starts_with("$.") {
25448 &path[2..]
25449 } else if path.starts_with('$') {
25450 &path[1..]
25451 } else {
25452 path
25453 };
25454 if path.is_empty() {
25455 return parts;
25456 }
25457 let mut current = String::new();
25458 let chars: Vec<char> = path.chars().collect();
25459 let mut i = 0;
25460 while i < chars.len() {
25461 match chars[i] {
25462 '.' => {
25463 if !current.is_empty() {
25464 parts.push(current.clone());
25465 current.clear();
25466 }
25467 i += 1;
25468 }
25469 '[' => {
25470 if !current.is_empty() {
25471 parts.push(current.clone());
25472 current.clear();
25473 }
25474 i += 1;
25475 let mut bracket_content = String::new();
25476 while i < chars.len() && chars[i] != ']' {
25477 if chars[i] == '"' || chars[i] == '\'' {
25478 let quote = chars[i];
25479 i += 1;
25480 while i < chars.len() && chars[i] != quote {
25481 bracket_content.push(chars[i]);
25482 i += 1;
25483 }
25484 if i < chars.len() {
25485 i += 1;
25486 }
25487 } else {
25488 bracket_content.push(chars[i]);
25489 i += 1;
25490 }
25491 }
25492 if i < chars.len() {
25493 i += 1;
25494 }
25495 if bracket_content != "*" {
25496 parts.push(bracket_content);
25497 }
25498 }
25499 _ => {
25500 current.push(chars[i]);
25501 i += 1;
25502 }
25503 }
25504 }
25505 if !current.is_empty() {
25506 parts.push(current);
25507 }
25508 parts
25509 }
25510
25511 /// Strip `$` prefix from a JSON path, keeping the rest.
25512 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25513 fn strip_json_dollar_prefix(path: &str) -> String {
25514 if path.starts_with("$.") {
25515 path[2..].to_string()
25516 } else if path.starts_with('$') {
25517 path[1..].to_string()
25518 } else {
25519 path.to_string()
25520 }
25521 }
25522
25523 /// Strip `[*]` wildcards from a JSON path.
25524 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25525 fn strip_json_wildcards(path: &str) -> String {
25526 path.replace("[*]", "")
25527 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25528 .trim_end_matches('.')
25529 .to_string()
25530 }
25531
25532 /// Convert bracket notation to dot notation for JSON paths.
25533 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25534 fn bracket_to_dot_notation(path: &str) -> String {
25535 let mut result = String::new();
25536 let chars: Vec<char> = path.chars().collect();
25537 let mut i = 0;
25538 while i < chars.len() {
25539 if chars[i] == '[' {
25540 // Read bracket content
25541 i += 1;
25542 let mut bracket_content = String::new();
25543 let mut is_quoted = false;
25544 let mut _quote_char = '"';
25545 while i < chars.len() && chars[i] != ']' {
25546 if chars[i] == '"' || chars[i] == '\'' {
25547 is_quoted = true;
25548 _quote_char = chars[i];
25549 i += 1;
25550 while i < chars.len() && chars[i] != _quote_char {
25551 bracket_content.push(chars[i]);
25552 i += 1;
25553 }
25554 if i < chars.len() {
25555 i += 1;
25556 }
25557 } else {
25558 bracket_content.push(chars[i]);
25559 i += 1;
25560 }
25561 }
25562 if i < chars.len() {
25563 i += 1;
25564 } // skip ]
25565 if bracket_content == "*" {
25566 // Keep wildcard as-is
25567 result.push_str("[*]");
25568 } else if is_quoted {
25569 // Quoted bracket -> dot notation with quotes
25570 result.push('.');
25571 result.push('"');
25572 result.push_str(&bracket_content);
25573 result.push('"');
25574 } else {
25575 // Numeric index -> keep as bracket
25576 result.push('[');
25577 result.push_str(&bracket_content);
25578 result.push(']');
25579 }
25580 } else {
25581 result.push(chars[i]);
25582 i += 1;
25583 }
25584 }
25585 result
25586 }
25587
25588 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25589 /// `$["a b"]` -> `$['a b']`
25590 fn bracket_to_single_quotes(path: &str) -> String {
25591 let mut result = String::new();
25592 let chars: Vec<char> = path.chars().collect();
25593 let mut i = 0;
25594 while i < chars.len() {
25595 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25596 result.push('[');
25597 result.push('\'');
25598 i += 2; // skip [ and "
25599 while i < chars.len() && chars[i] != '"' {
25600 result.push(chars[i]);
25601 i += 1;
25602 }
25603 if i < chars.len() {
25604 i += 1;
25605 } // skip closing "
25606 result.push('\'');
25607 } else {
25608 result.push(chars[i]);
25609 i += 1;
25610 }
25611 }
25612 result
25613 }
25614
25615 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
25616 /// or PostgreSQL #temp -> TEMPORARY.
25617 /// Also strips # from INSERT INTO #table for non-TSQL targets.
25618 fn transform_select_into(
25619 expr: Expression,
25620 _source: DialectType,
25621 target: DialectType,
25622 ) -> Expression {
25623 use crate::expressions::{CreateTable, Expression, TableRef};
25624
25625 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
25626 if let Expression::Insert(ref insert) = expr {
25627 if insert.table.name.name.starts_with('#')
25628 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
25629 {
25630 let mut new_insert = insert.clone();
25631 new_insert.table.name.name =
25632 insert.table.name.name.trim_start_matches('#').to_string();
25633 return Expression::Insert(new_insert);
25634 }
25635 return expr;
25636 }
25637
25638 if let Expression::Select(ref select) = expr {
25639 if let Some(ref into) = select.into {
25640 let table_name_raw = match &into.this {
25641 Expression::Table(tr) => tr.name.name.clone(),
25642 Expression::Identifier(id) => id.name.clone(),
25643 _ => String::new(),
25644 };
25645 let is_temp = table_name_raw.starts_with('#') || into.temporary;
25646 let clean_name = table_name_raw.trim_start_matches('#').to_string();
25647
25648 match target {
25649 DialectType::DuckDB | DialectType::Snowflake => {
25650 // SELECT INTO -> CREATE TABLE AS SELECT
25651 let mut new_select = select.clone();
25652 new_select.into = None;
25653 let ct = CreateTable {
25654 name: TableRef::new(clean_name),
25655 on_cluster: None,
25656 columns: Vec::new(),
25657 constraints: Vec::new(),
25658 if_not_exists: false,
25659 temporary: is_temp,
25660 or_replace: false,
25661 table_modifier: None,
25662 as_select: Some(Expression::Select(new_select)),
25663 as_select_parenthesized: false,
25664 on_commit: None,
25665 clone_source: None,
25666 clone_at_clause: None,
25667 shallow_clone: false,
25668 is_copy: false,
25669 leading_comments: Vec::new(),
25670 with_properties: Vec::new(),
25671 teradata_post_name_options: Vec::new(),
25672 with_data: None,
25673 with_statistics: None,
25674 teradata_indexes: Vec::new(),
25675 with_cte: None,
25676 properties: Vec::new(),
25677 partition_of: None,
25678 post_table_properties: Vec::new(),
25679 mysql_table_options: Vec::new(),
25680 inherits: Vec::new(),
25681 on_property: None,
25682 copy_grants: false,
25683 using_template: None,
25684 rollup: None,
25685 };
25686 return Expression::CreateTable(Box::new(ct));
25687 }
25688 DialectType::PostgreSQL | DialectType::Redshift => {
25689 // PostgreSQL: #foo -> INTO TEMPORARY foo
25690 if is_temp && !into.temporary {
25691 let mut new_select = select.clone();
25692 let mut new_into = into.clone();
25693 new_into.temporary = true;
25694 new_into.unlogged = false;
25695 new_into.this = Expression::Table(TableRef::new(clean_name));
25696 new_select.into = Some(new_into);
25697 Expression::Select(new_select)
25698 } else {
25699 expr
25700 }
25701 }
25702 _ => expr,
25703 }
25704 } else {
25705 expr
25706 }
25707 } else {
25708 expr
25709 }
25710 }
25711
25712 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
25713 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
25714 fn transform_create_table_properties(
25715 ct: &mut crate::expressions::CreateTable,
25716 _source: DialectType,
25717 target: DialectType,
25718 ) {
25719 use crate::expressions::{
25720 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
25721 Properties,
25722 };
25723
25724 // Helper to convert a raw property value string to the correct Expression
25725 let value_to_expr = |v: &str| -> Expression {
25726 let trimmed = v.trim();
25727 // Check if it's a quoted string (starts and ends with ')
25728 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
25729 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
25730 }
25731 // Check if it's a number
25732 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
25733 Expression::Literal(Literal::Number(trimmed.to_string()))
25734 }
25735 // Check if it's ARRAY[...] or ARRAY(...)
25736 else if trimmed.to_uppercase().starts_with("ARRAY") {
25737 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
25738 let inner = trimmed
25739 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
25740 .trim_start_matches('[')
25741 .trim_start_matches('(')
25742 .trim_end_matches(']')
25743 .trim_end_matches(')');
25744 let elements: Vec<Expression> = inner
25745 .split(',')
25746 .map(|e| {
25747 let elem = e.trim().trim_matches('\'');
25748 Expression::Literal(Literal::String(elem.to_string()))
25749 })
25750 .collect();
25751 Expression::Function(Box::new(crate::expressions::Function::new(
25752 "ARRAY".to_string(),
25753 elements,
25754 )))
25755 }
25756 // Otherwise, just output as identifier (unquoted)
25757 else {
25758 Expression::Identifier(Identifier::new(trimmed.to_string()))
25759 }
25760 };
25761
25762 if ct.with_properties.is_empty() && ct.properties.is_empty() {
25763 return;
25764 }
25765
25766 // Handle Presto-style WITH properties
25767 if !ct.with_properties.is_empty() {
25768 // Extract FORMAT property and remaining properties
25769 let mut format_value: Option<String> = None;
25770 let mut partitioned_by: Option<String> = None;
25771 let mut other_props: Vec<(String, String)> = Vec::new();
25772
25773 for (key, value) in ct.with_properties.drain(..) {
25774 let key_upper = key.to_uppercase();
25775 if key_upper == "FORMAT" {
25776 // Strip surrounding quotes from value if present
25777 format_value = Some(value.trim_matches('\'').to_string());
25778 } else if key_upper == "PARTITIONED_BY" {
25779 partitioned_by = Some(value);
25780 } else {
25781 other_props.push((key, value));
25782 }
25783 }
25784
25785 match target {
25786 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25787 // Presto: keep WITH properties but lowercase 'format' key
25788 if let Some(fmt) = format_value {
25789 ct.with_properties
25790 .push(("format".to_string(), format!("'{}'", fmt)));
25791 }
25792 if let Some(part) = partitioned_by {
25793 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
25794 let trimmed = part.trim();
25795 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
25796 // Also handle ARRAY['...'] format - keep as-is
25797 if trimmed.to_uppercase().starts_with("ARRAY") {
25798 ct.with_properties
25799 .push(("PARTITIONED_BY".to_string(), part));
25800 } else {
25801 // Parse column names from the parenthesized list
25802 let cols: Vec<&str> = inner
25803 .split(',')
25804 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
25805 .collect();
25806 let array_val = format!(
25807 "ARRAY[{}]",
25808 cols.iter()
25809 .map(|c| format!("'{}'", c))
25810 .collect::<Vec<_>>()
25811 .join(", ")
25812 );
25813 ct.with_properties
25814 .push(("PARTITIONED_BY".to_string(), array_val));
25815 }
25816 }
25817 ct.with_properties.extend(other_props);
25818 }
25819 DialectType::Hive => {
25820 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
25821 if let Some(fmt) = format_value {
25822 ct.properties.push(Expression::FileFormatProperty(Box::new(
25823 FileFormatProperty {
25824 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25825 expressions: vec![],
25826 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
25827 value: true,
25828 }))),
25829 },
25830 )));
25831 }
25832 if let Some(_part) = partitioned_by {
25833 // PARTITIONED_BY handling is complex - move columns to partitioned by
25834 // For now, the partition columns are extracted from the column list
25835 Self::apply_partitioned_by(ct, &_part, target);
25836 }
25837 if !other_props.is_empty() {
25838 let eq_exprs: Vec<Expression> = other_props
25839 .into_iter()
25840 .map(|(k, v)| {
25841 Expression::Eq(Box::new(BinaryOp::new(
25842 Expression::Literal(Literal::String(k)),
25843 value_to_expr(&v),
25844 )))
25845 })
25846 .collect();
25847 ct.properties
25848 .push(Expression::Properties(Box::new(Properties {
25849 expressions: eq_exprs,
25850 })));
25851 }
25852 }
25853 DialectType::Spark | DialectType::Databricks => {
25854 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
25855 if let Some(fmt) = format_value {
25856 ct.properties.push(Expression::FileFormatProperty(Box::new(
25857 FileFormatProperty {
25858 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25859 expressions: vec![],
25860 hive_format: None, // None means USING syntax
25861 },
25862 )));
25863 }
25864 if let Some(_part) = partitioned_by {
25865 Self::apply_partitioned_by(ct, &_part, target);
25866 }
25867 if !other_props.is_empty() {
25868 let eq_exprs: Vec<Expression> = other_props
25869 .into_iter()
25870 .map(|(k, v)| {
25871 Expression::Eq(Box::new(BinaryOp::new(
25872 Expression::Literal(Literal::String(k)),
25873 value_to_expr(&v),
25874 )))
25875 })
25876 .collect();
25877 ct.properties
25878 .push(Expression::Properties(Box::new(Properties {
25879 expressions: eq_exprs,
25880 })));
25881 }
25882 }
25883 DialectType::DuckDB => {
25884 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
25885 // Keep nothing
25886 }
25887 _ => {
25888 // For other dialects, keep WITH properties as-is
25889 if let Some(fmt) = format_value {
25890 ct.with_properties
25891 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
25892 }
25893 if let Some(part) = partitioned_by {
25894 ct.with_properties
25895 .push(("PARTITIONED_BY".to_string(), part));
25896 }
25897 ct.with_properties.extend(other_props);
25898 }
25899 }
25900 }
25901
25902 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
25903 // and Hive STORED AS -> Presto WITH (format=...) conversion
25904 if !ct.properties.is_empty() {
25905 let is_presto_target = matches!(
25906 target,
25907 DialectType::Presto | DialectType::Trino | DialectType::Athena
25908 );
25909 let is_duckdb_target = matches!(target, DialectType::DuckDB);
25910
25911 if is_presto_target || is_duckdb_target {
25912 let mut new_properties = Vec::new();
25913 for prop in ct.properties.drain(..) {
25914 match &prop {
25915 Expression::FileFormatProperty(ffp) => {
25916 if is_presto_target {
25917 // Convert STORED AS/USING to WITH (format=...)
25918 if let Some(ref fmt_expr) = ffp.this {
25919 let fmt_str = match fmt_expr.as_ref() {
25920 Expression::Identifier(id) => id.name.clone(),
25921 Expression::Literal(Literal::String(s)) => s.clone(),
25922 _ => {
25923 new_properties.push(prop);
25924 continue;
25925 }
25926 };
25927 ct.with_properties
25928 .push(("format".to_string(), format!("'{}'", fmt_str)));
25929 }
25930 }
25931 // DuckDB: just strip file format properties
25932 }
25933 // Convert TBLPROPERTIES to WITH properties for Presto target
25934 Expression::Properties(props) if is_presto_target => {
25935 for expr in &props.expressions {
25936 if let Expression::Eq(eq) = expr {
25937 // Extract key and value from the Eq expression
25938 let key = match &eq.left {
25939 Expression::Literal(Literal::String(s)) => s.clone(),
25940 Expression::Identifier(id) => id.name.clone(),
25941 _ => continue,
25942 };
25943 let value = match &eq.right {
25944 Expression::Literal(Literal::String(s)) => {
25945 format!("'{}'", s)
25946 }
25947 Expression::Literal(Literal::Number(n)) => n.clone(),
25948 Expression::Identifier(id) => id.name.clone(),
25949 _ => continue,
25950 };
25951 ct.with_properties.push((key, value));
25952 }
25953 }
25954 }
25955 // Convert PartitionedByProperty for Presto target
25956 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
25957 // Check if it contains ColumnDef expressions (Hive-style with types)
25958 if let Expression::Tuple(ref tuple) = *pbp.this {
25959 let mut col_names: Vec<String> = Vec::new();
25960 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
25961 let mut has_col_defs = false;
25962 for expr in &tuple.expressions {
25963 if let Expression::ColumnDef(ref cd) = expr {
25964 has_col_defs = true;
25965 col_names.push(cd.name.name.clone());
25966 col_defs.push(*cd.clone());
25967 } else if let Expression::Column(ref col) = expr {
25968 col_names.push(col.name.name.clone());
25969 } else if let Expression::Identifier(ref id) = expr {
25970 col_names.push(id.name.clone());
25971 } else {
25972 // For function expressions like MONTHS(y), serialize to SQL
25973 let generic = Dialect::get(DialectType::Generic);
25974 if let Ok(sql) = generic.generate(expr) {
25975 col_names.push(sql);
25976 }
25977 }
25978 }
25979 if has_col_defs {
25980 // Merge partition column defs into the main column list
25981 for cd in col_defs {
25982 ct.columns.push(cd);
25983 }
25984 }
25985 if !col_names.is_empty() {
25986 // Add PARTITIONED_BY property
25987 let array_val = format!(
25988 "ARRAY[{}]",
25989 col_names
25990 .iter()
25991 .map(|n| format!("'{}'", n))
25992 .collect::<Vec<_>>()
25993 .join(", ")
25994 );
25995 ct.with_properties
25996 .push(("PARTITIONED_BY".to_string(), array_val));
25997 }
25998 }
25999 // Skip - don't keep in properties
26000 }
26001 _ => {
26002 if !is_duckdb_target {
26003 new_properties.push(prop);
26004 }
26005 }
26006 }
26007 }
26008 ct.properties = new_properties;
26009 } else {
26010 // For Hive/Spark targets, unquote format names in STORED AS
26011 for prop in &mut ct.properties {
26012 if let Expression::FileFormatProperty(ref mut ffp) = prop {
26013 if let Some(ref mut fmt_expr) = ffp.this {
26014 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
26015 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
26016 let unquoted = s.clone();
26017 *fmt_expr =
26018 Box::new(Expression::Identifier(Identifier::new(unquoted)));
26019 }
26020 }
26021 }
26022 }
26023 }
26024 }
26025 }
26026
26027 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
26028 fn apply_partitioned_by(
26029 ct: &mut crate::expressions::CreateTable,
26030 partitioned_by_value: &str,
26031 target: DialectType,
26032 ) {
26033 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
26034
26035 // Parse the ARRAY['col1', 'col2'] value to extract column names
26036 let mut col_names: Vec<String> = Vec::new();
26037 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
26038 let inner = partitioned_by_value
26039 .trim()
26040 .trim_start_matches("ARRAY")
26041 .trim_start_matches('[')
26042 .trim_start_matches('(')
26043 .trim_end_matches(']')
26044 .trim_end_matches(')');
26045 for part in inner.split(',') {
26046 let col = part.trim().trim_matches('\'').trim_matches('"');
26047 if !col.is_empty() {
26048 col_names.push(col.to_string());
26049 }
26050 }
26051
26052 if col_names.is_empty() {
26053 return;
26054 }
26055
26056 if matches!(target, DialectType::Hive) {
26057 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
26058 let mut partition_col_defs = Vec::new();
26059 for col_name in &col_names {
26060 // Find and remove from columns
26061 if let Some(pos) = ct
26062 .columns
26063 .iter()
26064 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
26065 {
26066 let col_def = ct.columns.remove(pos);
26067 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
26068 }
26069 }
26070 if !partition_col_defs.is_empty() {
26071 ct.properties
26072 .push(Expression::PartitionedByProperty(Box::new(
26073 PartitionedByProperty {
26074 this: Box::new(Expression::Tuple(Box::new(Tuple {
26075 expressions: partition_col_defs,
26076 }))),
26077 },
26078 )));
26079 }
26080 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26081 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26082 // Use quoted identifiers to match the quoting style of the original column definitions
26083 let partition_exprs: Vec<Expression> = col_names
26084 .iter()
26085 .map(|name| {
26086 // Check if the column exists in the column list and use its quoting
26087 let is_quoted = ct
26088 .columns
26089 .iter()
26090 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26091 let ident = if is_quoted {
26092 Identifier::quoted(name.clone())
26093 } else {
26094 Identifier::new(name.clone())
26095 };
26096 Expression::Column(Column {
26097 name: ident,
26098 table: None,
26099 join_mark: false,
26100 trailing_comments: Vec::new(),
26101 span: None,
26102 })
26103 })
26104 .collect();
26105 ct.properties
26106 .push(Expression::PartitionedByProperty(Box::new(
26107 PartitionedByProperty {
26108 this: Box::new(Expression::Tuple(Box::new(Tuple {
26109 expressions: partition_exprs,
26110 }))),
26111 },
26112 )));
26113 }
26114 // DuckDB: strip partitioned_by entirely (already handled)
26115 }
26116
26117 /// Convert a DataType to Spark's type string format (using angle brackets)
26118 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26119 use crate::expressions::DataType;
26120 match dt {
26121 DataType::Int { .. } => "INT".to_string(),
26122 DataType::BigInt { .. } => "BIGINT".to_string(),
26123 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26124 DataType::TinyInt { .. } => "TINYINT".to_string(),
26125 DataType::Float { .. } => "FLOAT".to_string(),
26126 DataType::Double { .. } => "DOUBLE".to_string(),
26127 DataType::Decimal {
26128 precision: Some(p),
26129 scale: Some(s),
26130 } => format!("DECIMAL({}, {})", p, s),
26131 DataType::Decimal {
26132 precision: Some(p), ..
26133 } => format!("DECIMAL({})", p),
26134 DataType::Decimal { .. } => "DECIMAL".to_string(),
26135 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26136 "STRING".to_string()
26137 }
26138 DataType::Char { .. } => "STRING".to_string(),
26139 DataType::Boolean => "BOOLEAN".to_string(),
26140 DataType::Date => "DATE".to_string(),
26141 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26142 DataType::Json | DataType::JsonB => "STRING".to_string(),
26143 DataType::Binary { .. } => "BINARY".to_string(),
26144 DataType::Array { element_type, .. } => {
26145 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26146 }
26147 DataType::Map {
26148 key_type,
26149 value_type,
26150 } => format!(
26151 "MAP<{}, {}>",
26152 Self::data_type_to_spark_string(key_type),
26153 Self::data_type_to_spark_string(value_type)
26154 ),
26155 DataType::Struct { fields, .. } => {
26156 let field_strs: Vec<String> = fields
26157 .iter()
26158 .map(|f| {
26159 if f.name.is_empty() {
26160 Self::data_type_to_spark_string(&f.data_type)
26161 } else {
26162 format!(
26163 "{}: {}",
26164 f.name,
26165 Self::data_type_to_spark_string(&f.data_type)
26166 )
26167 }
26168 })
26169 .collect();
26170 format!("STRUCT<{}>", field_strs.join(", "))
26171 }
26172 DataType::Custom { name } => name.clone(),
26173 _ => format!("{:?}", dt),
26174 }
26175 }
26176
26177 /// Extract value and unit from an Interval expression
26178 /// Returns (value_expression, IntervalUnit)
26179 fn extract_interval_parts(
26180 interval_expr: &Expression,
26181 ) -> (Expression, crate::expressions::IntervalUnit) {
26182 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26183
26184 if let Expression::Interval(iv) = interval_expr {
26185 let val = iv.this.clone().unwrap_or(Expression::number(0));
26186 let unit = match &iv.unit {
26187 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26188 None => {
26189 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26190 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26191 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26192 if parts.len() == 2 {
26193 let unit_str = parts[1].trim().to_uppercase();
26194 let parsed_unit = match unit_str.as_str() {
26195 "YEAR" | "YEARS" => IntervalUnit::Year,
26196 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26197 "MONTH" | "MONTHS" => IntervalUnit::Month,
26198 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26199 "DAY" | "DAYS" => IntervalUnit::Day,
26200 "HOUR" | "HOURS" => IntervalUnit::Hour,
26201 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26202 "SECOND" | "SECONDS" => IntervalUnit::Second,
26203 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26204 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26205 _ => IntervalUnit::Day,
26206 };
26207 // Return just the numeric part as value and parsed unit
26208 return (
26209 Expression::Literal(crate::expressions::Literal::String(
26210 parts[0].to_string(),
26211 )),
26212 parsed_unit,
26213 );
26214 }
26215 IntervalUnit::Day
26216 } else {
26217 IntervalUnit::Day
26218 }
26219 }
26220 _ => IntervalUnit::Day,
26221 };
26222 (val, unit)
26223 } else {
26224 // Not an interval - pass through
26225 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26226 }
26227 }
26228
26229 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26230 fn normalize_bigquery_function(
26231 e: Expression,
26232 source: DialectType,
26233 target: DialectType,
26234 ) -> Result<Expression> {
26235 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26236
26237 let f = if let Expression::Function(f) = e {
26238 *f
26239 } else {
26240 return Ok(e);
26241 };
26242 let name = f.name.to_uppercase();
26243 let mut args = f.args;
26244
26245 /// Helper to extract unit string from an identifier, column, or literal expression
26246 fn get_unit_str(expr: &Expression) -> String {
26247 match expr {
26248 Expression::Identifier(id) => id.name.to_uppercase(),
26249 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26250 Expression::Column(col) => col.name.name.to_uppercase(),
26251 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26252 Expression::Function(f) => {
26253 let base = f.name.to_uppercase();
26254 if !f.args.is_empty() {
26255 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26256 let inner = get_unit_str(&f.args[0]);
26257 format!("{}({})", base, inner)
26258 } else {
26259 base
26260 }
26261 }
26262 _ => "DAY".to_string(),
26263 }
26264 }
26265
26266 /// Parse unit string to IntervalUnit
26267 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26268 match s {
26269 "YEAR" => crate::expressions::IntervalUnit::Year,
26270 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26271 "MONTH" => crate::expressions::IntervalUnit::Month,
26272 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26273 "DAY" => crate::expressions::IntervalUnit::Day,
26274 "HOUR" => crate::expressions::IntervalUnit::Hour,
26275 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26276 "SECOND" => crate::expressions::IntervalUnit::Second,
26277 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26278 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26279 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26280 _ => crate::expressions::IntervalUnit::Day,
26281 }
26282 }
26283
26284 match name.as_str() {
26285 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26286 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26287 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26288 let date1 = args.remove(0);
26289 let date2 = args.remove(0);
26290 let unit_expr = args.remove(0);
26291 let unit_str = get_unit_str(&unit_expr);
26292
26293 if matches!(target, DialectType::BigQuery) {
26294 // BigQuery -> BigQuery: just uppercase the unit
26295 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26296 return Ok(Expression::Function(Box::new(Function::new(
26297 f.name,
26298 vec![date1, date2, unit],
26299 ))));
26300 }
26301
26302 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26303 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26304 if matches!(target, DialectType::Snowflake) {
26305 return Ok(Expression::TimestampDiff(Box::new(
26306 crate::expressions::TimestampDiff {
26307 this: Box::new(date2),
26308 expression: Box::new(date1),
26309 unit: Some(unit_str),
26310 },
26311 )));
26312 }
26313
26314 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26315 if matches!(target, DialectType::DuckDB) {
26316 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26317 // CAST to TIME
26318 let cast_fn = |e: Expression| -> Expression {
26319 match e {
26320 Expression::Literal(Literal::String(s)) => {
26321 Expression::Cast(Box::new(Cast {
26322 this: Expression::Literal(Literal::String(s)),
26323 to: DataType::Custom {
26324 name: "TIME".to_string(),
26325 },
26326 trailing_comments: vec![],
26327 double_colon_syntax: false,
26328 format: None,
26329 default: None,
26330 }))
26331 }
26332 other => other,
26333 }
26334 };
26335 (cast_fn(date1), cast_fn(date2))
26336 } else if name == "DATETIME_DIFF" {
26337 // CAST to TIMESTAMP
26338 (
26339 Self::ensure_cast_timestamp(date1),
26340 Self::ensure_cast_timestamp(date2),
26341 )
26342 } else {
26343 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26344 (
26345 Self::ensure_cast_timestamptz(date1),
26346 Self::ensure_cast_timestamptz(date2),
26347 )
26348 };
26349 return Ok(Expression::Function(Box::new(Function::new(
26350 "DATE_DIFF".to_string(),
26351 vec![
26352 Expression::Literal(Literal::String(unit_str)),
26353 cast_d2,
26354 cast_d1,
26355 ],
26356 ))));
26357 }
26358
26359 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26360 let unit = Expression::Identifier(Identifier::new(unit_str));
26361 Ok(Expression::Function(Box::new(Function::new(
26362 "TIMESTAMPDIFF".to_string(),
26363 vec![unit, date2, date1],
26364 ))))
26365 }
26366
26367 // DATEDIFF(unit, start, end) -> target-specific form
26368 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26369 "DATEDIFF" if args.len() == 3 => {
26370 let arg0 = args.remove(0);
26371 let arg1 = args.remove(0);
26372 let arg2 = args.remove(0);
26373 let unit_str = get_unit_str(&arg0);
26374
26375 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26376 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26377 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26378
26379 if matches!(target, DialectType::Snowflake) {
26380 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26381 let unit = Expression::Identifier(Identifier::new(unit_str));
26382 return Ok(Expression::Function(Box::new(Function::new(
26383 "DATEDIFF".to_string(),
26384 vec![unit, arg1, arg2],
26385 ))));
26386 }
26387
26388 if matches!(target, DialectType::DuckDB) {
26389 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26390 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26391 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26392 return Ok(Expression::Function(Box::new(Function::new(
26393 "DATE_DIFF".to_string(),
26394 vec![
26395 Expression::Literal(Literal::String(unit_str)),
26396 cast_d1,
26397 cast_d2,
26398 ],
26399 ))));
26400 }
26401
26402 if matches!(target, DialectType::BigQuery) {
26403 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26404 let cast_d1 = Self::ensure_cast_datetime(arg1);
26405 let cast_d2 = Self::ensure_cast_datetime(arg2);
26406 let unit = Expression::Identifier(Identifier::new(unit_str));
26407 return Ok(Expression::Function(Box::new(Function::new(
26408 "DATE_DIFF".to_string(),
26409 vec![cast_d2, cast_d1, unit],
26410 ))));
26411 }
26412
26413 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26414 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26415 let unit = Expression::Identifier(Identifier::new(unit_str));
26416 return Ok(Expression::Function(Box::new(Function::new(
26417 "DATEDIFF".to_string(),
26418 vec![unit, arg1, arg2],
26419 ))));
26420 }
26421
26422 if matches!(target, DialectType::Hive) {
26423 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26424 match unit_str.as_str() {
26425 "MONTH" => {
26426 return Ok(Expression::Function(Box::new(Function::new(
26427 "CAST".to_string(),
26428 vec![Expression::Function(Box::new(Function::new(
26429 "MONTHS_BETWEEN".to_string(),
26430 vec![arg2, arg1],
26431 )))],
26432 ))));
26433 }
26434 "WEEK" => {
26435 return Ok(Expression::Cast(Box::new(Cast {
26436 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26437 Expression::Function(Box::new(Function::new(
26438 "DATEDIFF".to_string(),
26439 vec![arg2, arg1],
26440 ))),
26441 Expression::Literal(Literal::Number("7".to_string())),
26442 ))),
26443 to: DataType::Int {
26444 length: None,
26445 integer_spelling: false,
26446 },
26447 trailing_comments: vec![],
26448 double_colon_syntax: false,
26449 format: None,
26450 default: None,
26451 })));
26452 }
26453 _ => {
26454 // Default: DATEDIFF(end, start) for DAY
26455 return Ok(Expression::Function(Box::new(Function::new(
26456 "DATEDIFF".to_string(),
26457 vec![arg2, arg1],
26458 ))));
26459 }
26460 }
26461 }
26462
26463 if matches!(
26464 target,
26465 DialectType::Presto | DialectType::Trino | DialectType::Athena
26466 ) {
26467 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26468 return Ok(Expression::Function(Box::new(Function::new(
26469 "DATE_DIFF".to_string(),
26470 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26471 ))));
26472 }
26473
26474 if matches!(target, DialectType::TSQL) {
26475 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26476 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26477 let unit = Expression::Identifier(Identifier::new(unit_str));
26478 return Ok(Expression::Function(Box::new(Function::new(
26479 "DATEDIFF".to_string(),
26480 vec![unit, arg1, cast_d2],
26481 ))));
26482 }
26483
26484 if matches!(target, DialectType::PostgreSQL) {
26485 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26486 // For now, use DATEDIFF (passthrough) with uppercased unit
26487 let unit = Expression::Identifier(Identifier::new(unit_str));
26488 return Ok(Expression::Function(Box::new(Function::new(
26489 "DATEDIFF".to_string(),
26490 vec![unit, arg1, arg2],
26491 ))));
26492 }
26493
26494 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26495 let unit = Expression::Identifier(Identifier::new(unit_str));
26496 Ok(Expression::Function(Box::new(Function::new(
26497 "DATEDIFF".to_string(),
26498 vec![unit, arg1, arg2],
26499 ))))
26500 }
26501
26502 // DATE_DIFF(date1, date2, unit) -> standard form
26503 "DATE_DIFF" if args.len() == 3 => {
26504 let date1 = args.remove(0);
26505 let date2 = args.remove(0);
26506 let unit_expr = args.remove(0);
26507 let unit_str = get_unit_str(&unit_expr);
26508
26509 if matches!(target, DialectType::BigQuery) {
26510 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26511 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26512 "WEEK".to_string()
26513 } else {
26514 unit_str
26515 };
26516 let norm_d1 = Self::date_literal_to_cast(date1);
26517 let norm_d2 = Self::date_literal_to_cast(date2);
26518 let unit = Expression::Identifier(Identifier::new(norm_unit));
26519 return Ok(Expression::Function(Box::new(Function::new(
26520 f.name,
26521 vec![norm_d1, norm_d2, unit],
26522 ))));
26523 }
26524
26525 if matches!(target, DialectType::MySQL) {
26526 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26527 let norm_d1 = Self::date_literal_to_cast(date1);
26528 let norm_d2 = Self::date_literal_to_cast(date2);
26529 return Ok(Expression::Function(Box::new(Function::new(
26530 "DATEDIFF".to_string(),
26531 vec![norm_d1, norm_d2],
26532 ))));
26533 }
26534
26535 if matches!(target, DialectType::StarRocks) {
26536 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26537 let norm_d1 = Self::date_literal_to_cast(date1);
26538 let norm_d2 = Self::date_literal_to_cast(date2);
26539 return Ok(Expression::Function(Box::new(Function::new(
26540 "DATE_DIFF".to_string(),
26541 vec![
26542 Expression::Literal(Literal::String(unit_str)),
26543 norm_d1,
26544 norm_d2,
26545 ],
26546 ))));
26547 }
26548
26549 if matches!(target, DialectType::DuckDB) {
26550 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26551 let norm_d1 = Self::ensure_cast_date(date1);
26552 let norm_d2 = Self::ensure_cast_date(date2);
26553
26554 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26555 let is_week_variant = unit_str == "WEEK"
26556 || unit_str.starts_with("WEEK(")
26557 || unit_str == "ISOWEEK";
26558 if is_week_variant {
26559 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26560 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26561 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26562 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26563 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26564 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26565 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26566 Some("1") // Shift Sunday to Monday alignment
26567 } else if unit_str == "WEEK(SATURDAY)" {
26568 Some("-5")
26569 } else if unit_str == "WEEK(TUESDAY)" {
26570 Some("-1")
26571 } else if unit_str == "WEEK(WEDNESDAY)" {
26572 Some("-2")
26573 } else if unit_str == "WEEK(THURSDAY)" {
26574 Some("-3")
26575 } else if unit_str == "WEEK(FRIDAY)" {
26576 Some("-4")
26577 } else {
26578 Some("1") // default to Sunday
26579 };
26580
26581 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26582 let shifted = if let Some(off) = offset {
26583 let interval =
26584 Expression::Interval(Box::new(crate::expressions::Interval {
26585 this: Some(Expression::Literal(Literal::String(
26586 off.to_string(),
26587 ))),
26588 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26589 unit: crate::expressions::IntervalUnit::Day,
26590 use_plural: false,
26591 }),
26592 }));
26593 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26594 date, interval,
26595 )))
26596 } else {
26597 date
26598 };
26599 Expression::Function(Box::new(Function::new(
26600 "DATE_TRUNC".to_string(),
26601 vec![
26602 Expression::Literal(Literal::String("WEEK".to_string())),
26603 shifted,
26604 ],
26605 )))
26606 };
26607
26608 let trunc_d2 = make_trunc(norm_d2, day_offset);
26609 let trunc_d1 = make_trunc(norm_d1, day_offset);
26610 return Ok(Expression::Function(Box::new(Function::new(
26611 "DATE_DIFF".to_string(),
26612 vec![
26613 Expression::Literal(Literal::String("WEEK".to_string())),
26614 trunc_d2,
26615 trunc_d1,
26616 ],
26617 ))));
26618 }
26619
26620 return Ok(Expression::Function(Box::new(Function::new(
26621 "DATE_DIFF".to_string(),
26622 vec![
26623 Expression::Literal(Literal::String(unit_str)),
26624 norm_d2,
26625 norm_d1,
26626 ],
26627 ))));
26628 }
26629
26630 // Default: DATEDIFF(unit, date2, date1)
26631 let unit = Expression::Identifier(Identifier::new(unit_str));
26632 Ok(Expression::Function(Box::new(Function::new(
26633 "DATEDIFF".to_string(),
26634 vec![unit, date2, date1],
26635 ))))
26636 }
26637
26638 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
26639 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
26640 let ts = args.remove(0);
26641 let interval_expr = args.remove(0);
26642 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26643
26644 match target {
26645 DialectType::Snowflake => {
26646 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
26647 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
26648 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
26649 let unit_str = Self::interval_unit_to_string(&unit);
26650 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26651 Ok(Expression::TimestampAdd(Box::new(
26652 crate::expressions::TimestampAdd {
26653 this: Box::new(val),
26654 expression: Box::new(cast_ts),
26655 unit: Some(unit_str),
26656 },
26657 )))
26658 }
26659 DialectType::Spark | DialectType::Databricks => {
26660 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
26661 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
26662 let interval =
26663 Expression::Interval(Box::new(crate::expressions::Interval {
26664 this: Some(val),
26665 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26666 unit,
26667 use_plural: false,
26668 }),
26669 }));
26670 Ok(Expression::Add(Box::new(
26671 crate::expressions::BinaryOp::new(ts, interval),
26672 )))
26673 } else if name == "DATETIME_ADD"
26674 && matches!(target, DialectType::Databricks)
26675 {
26676 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
26677 let unit_str = Self::interval_unit_to_string(&unit);
26678 Ok(Expression::Function(Box::new(Function::new(
26679 "TIMESTAMPADD".to_string(),
26680 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
26681 ))))
26682 } else {
26683 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
26684 let unit_str = Self::interval_unit_to_string(&unit);
26685 let cast_ts =
26686 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
26687 Self::maybe_cast_ts(ts)
26688 } else {
26689 ts
26690 };
26691 Ok(Expression::Function(Box::new(Function::new(
26692 "DATE_ADD".to_string(),
26693 vec![
26694 Expression::Identifier(Identifier::new(unit_str)),
26695 val,
26696 cast_ts,
26697 ],
26698 ))))
26699 }
26700 }
26701 DialectType::MySQL => {
26702 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
26703 let mysql_ts = if name.starts_with("TIMESTAMP") {
26704 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26705 match &ts {
26706 Expression::Function(ref inner_f)
26707 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26708 {
26709 // Already wrapped, keep as-is
26710 ts
26711 }
26712 _ => {
26713 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
26714 let unwrapped = match ts {
26715 Expression::Literal(Literal::Timestamp(s)) => {
26716 Expression::Literal(Literal::String(s))
26717 }
26718 other => other,
26719 };
26720 Expression::Function(Box::new(Function::new(
26721 "TIMESTAMP".to_string(),
26722 vec![unwrapped],
26723 )))
26724 }
26725 }
26726 } else {
26727 ts
26728 };
26729 Ok(Expression::DateAdd(Box::new(
26730 crate::expressions::DateAddFunc {
26731 this: mysql_ts,
26732 interval: val,
26733 unit,
26734 },
26735 )))
26736 }
26737 _ => {
26738 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
26739 let cast_ts = if matches!(target, DialectType::DuckDB) {
26740 if name == "DATETIME_ADD" {
26741 Self::ensure_cast_timestamp(ts)
26742 } else if name.starts_with("TIMESTAMP") {
26743 Self::maybe_cast_ts_to_tz(ts, &name)
26744 } else {
26745 ts
26746 }
26747 } else {
26748 ts
26749 };
26750 Ok(Expression::DateAdd(Box::new(
26751 crate::expressions::DateAddFunc {
26752 this: cast_ts,
26753 interval: val,
26754 unit,
26755 },
26756 )))
26757 }
26758 }
26759 }
26760
26761 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
26762 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
26763 let ts = args.remove(0);
26764 let interval_expr = args.remove(0);
26765 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26766
26767 match target {
26768 DialectType::Snowflake => {
26769 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
26770 let unit_str = Self::interval_unit_to_string(&unit);
26771 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26772 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26773 val,
26774 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26775 this: Expression::number(1),
26776 })),
26777 )));
26778 Ok(Expression::TimestampAdd(Box::new(
26779 crate::expressions::TimestampAdd {
26780 this: Box::new(neg_val),
26781 expression: Box::new(cast_ts),
26782 unit: Some(unit_str),
26783 },
26784 )))
26785 }
26786 DialectType::Spark | DialectType::Databricks => {
26787 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
26788 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
26789 {
26790 // Spark: ts - INTERVAL val UNIT
26791 let cast_ts = if name.starts_with("TIMESTAMP") {
26792 Self::maybe_cast_ts(ts)
26793 } else {
26794 ts
26795 };
26796 let interval =
26797 Expression::Interval(Box::new(crate::expressions::Interval {
26798 this: Some(val),
26799 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26800 unit,
26801 use_plural: false,
26802 }),
26803 }));
26804 Ok(Expression::Sub(Box::new(
26805 crate::expressions::BinaryOp::new(cast_ts, interval),
26806 )))
26807 } else {
26808 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
26809 let unit_str = Self::interval_unit_to_string(&unit);
26810 let neg_val =
26811 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26812 val,
26813 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26814 this: Expression::number(1),
26815 })),
26816 )));
26817 Ok(Expression::Function(Box::new(Function::new(
26818 "TIMESTAMPADD".to_string(),
26819 vec![
26820 Expression::Identifier(Identifier::new(unit_str)),
26821 neg_val,
26822 ts,
26823 ],
26824 ))))
26825 }
26826 }
26827 DialectType::MySQL => {
26828 let mysql_ts = if name.starts_with("TIMESTAMP") {
26829 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26830 match &ts {
26831 Expression::Function(ref inner_f)
26832 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26833 {
26834 // Already wrapped, keep as-is
26835 ts
26836 }
26837 _ => {
26838 let unwrapped = match ts {
26839 Expression::Literal(Literal::Timestamp(s)) => {
26840 Expression::Literal(Literal::String(s))
26841 }
26842 other => other,
26843 };
26844 Expression::Function(Box::new(Function::new(
26845 "TIMESTAMP".to_string(),
26846 vec![unwrapped],
26847 )))
26848 }
26849 }
26850 } else {
26851 ts
26852 };
26853 Ok(Expression::DateSub(Box::new(
26854 crate::expressions::DateAddFunc {
26855 this: mysql_ts,
26856 interval: val,
26857 unit,
26858 },
26859 )))
26860 }
26861 _ => {
26862 let cast_ts = if matches!(target, DialectType::DuckDB) {
26863 if name == "DATETIME_SUB" {
26864 Self::ensure_cast_timestamp(ts)
26865 } else if name.starts_with("TIMESTAMP") {
26866 Self::maybe_cast_ts_to_tz(ts, &name)
26867 } else {
26868 ts
26869 }
26870 } else {
26871 ts
26872 };
26873 Ok(Expression::DateSub(Box::new(
26874 crate::expressions::DateAddFunc {
26875 this: cast_ts,
26876 interval: val,
26877 unit,
26878 },
26879 )))
26880 }
26881 }
26882 }
26883
26884 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
26885 "DATE_SUB" if args.len() == 2 => {
26886 let date = args.remove(0);
26887 let interval_expr = args.remove(0);
26888 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26889
26890 match target {
26891 DialectType::Databricks | DialectType::Spark => {
26892 // Databricks/Spark: DATE_ADD(date, -val)
26893 // Use DateAdd expression with negative val so it generates correctly
26894 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
26895 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
26896 // Instead, we directly output as a simple negated DateSub
26897 Ok(Expression::DateSub(Box::new(
26898 crate::expressions::DateAddFunc {
26899 this: date,
26900 interval: val,
26901 unit,
26902 },
26903 )))
26904 }
26905 DialectType::DuckDB => {
26906 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
26907 let cast_date = Self::ensure_cast_date(date);
26908 let interval =
26909 Expression::Interval(Box::new(crate::expressions::Interval {
26910 this: Some(val),
26911 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26912 unit,
26913 use_plural: false,
26914 }),
26915 }));
26916 Ok(Expression::Sub(Box::new(
26917 crate::expressions::BinaryOp::new(cast_date, interval),
26918 )))
26919 }
26920 DialectType::Snowflake => {
26921 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
26922 // Just ensure the date is cast properly
26923 let cast_date = Self::ensure_cast_date(date);
26924 Ok(Expression::DateSub(Box::new(
26925 crate::expressions::DateAddFunc {
26926 this: cast_date,
26927 interval: val,
26928 unit,
26929 },
26930 )))
26931 }
26932 DialectType::PostgreSQL => {
26933 // PostgreSQL: date - INTERVAL 'val UNIT'
26934 let unit_str = Self::interval_unit_to_string(&unit);
26935 let interval =
26936 Expression::Interval(Box::new(crate::expressions::Interval {
26937 this: Some(Expression::Literal(Literal::String(format!(
26938 "{} {}",
26939 Self::expr_to_string(&val),
26940 unit_str
26941 )))),
26942 unit: None,
26943 }));
26944 Ok(Expression::Sub(Box::new(
26945 crate::expressions::BinaryOp::new(date, interval),
26946 )))
26947 }
26948 _ => Ok(Expression::DateSub(Box::new(
26949 crate::expressions::DateAddFunc {
26950 this: date,
26951 interval: val,
26952 unit,
26953 },
26954 ))),
26955 }
26956 }
26957
26958 // DATEADD(unit, val, date) -> target-specific form
26959 // Used by: Redshift, Snowflake, TSQL, ClickHouse
26960 "DATEADD" if args.len() == 3 => {
26961 let arg0 = args.remove(0);
26962 let arg1 = args.remove(0);
26963 let arg2 = args.remove(0);
26964 let unit_str = get_unit_str(&arg0);
26965
26966 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
26967 // Keep DATEADD(UNIT, val, date) with uppercased unit
26968 let unit = Expression::Identifier(Identifier::new(unit_str));
26969 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
26970 let date = if matches!(target, DialectType::TSQL)
26971 && !matches!(
26972 source,
26973 DialectType::Spark | DialectType::Databricks | DialectType::Hive
26974 ) {
26975 Self::ensure_cast_datetime2(arg2)
26976 } else {
26977 arg2
26978 };
26979 return Ok(Expression::Function(Box::new(Function::new(
26980 "DATEADD".to_string(),
26981 vec![unit, arg1, date],
26982 ))));
26983 }
26984
26985 if matches!(target, DialectType::DuckDB) {
26986 // DuckDB: date + INTERVAL 'val' UNIT
26987 let iu = parse_interval_unit(&unit_str);
26988 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26989 this: Some(arg1),
26990 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26991 unit: iu,
26992 use_plural: false,
26993 }),
26994 }));
26995 let cast_date = Self::ensure_cast_timestamp(arg2);
26996 return Ok(Expression::Add(Box::new(
26997 crate::expressions::BinaryOp::new(cast_date, interval),
26998 )));
26999 }
27000
27001 if matches!(target, DialectType::BigQuery) {
27002 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
27003 let iu = parse_interval_unit(&unit_str);
27004 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27005 this: Some(arg1),
27006 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27007 unit: iu,
27008 use_plural: false,
27009 }),
27010 }));
27011 return Ok(Expression::Function(Box::new(Function::new(
27012 "DATE_ADD".to_string(),
27013 vec![arg2, interval],
27014 ))));
27015 }
27016
27017 if matches!(target, DialectType::Databricks) {
27018 // Databricks: keep DATEADD(UNIT, val, date) format
27019 let unit = Expression::Identifier(Identifier::new(unit_str));
27020 return Ok(Expression::Function(Box::new(Function::new(
27021 "DATEADD".to_string(),
27022 vec![unit, arg1, arg2],
27023 ))));
27024 }
27025
27026 if matches!(target, DialectType::Spark) {
27027 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
27028 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
27029 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
27030 if let Ok(val) = n.parse::<i64>() {
27031 return Expression::Literal(crate::expressions::Literal::Number(
27032 (val * factor).to_string(),
27033 ));
27034 }
27035 }
27036 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27037 expr,
27038 Expression::Literal(crate::expressions::Literal::Number(
27039 factor.to_string(),
27040 )),
27041 )))
27042 }
27043 match unit_str.as_str() {
27044 "YEAR" => {
27045 let months = multiply_expr_dateadd(arg1, 12);
27046 return Ok(Expression::Function(Box::new(Function::new(
27047 "ADD_MONTHS".to_string(),
27048 vec![arg2, months],
27049 ))));
27050 }
27051 "QUARTER" => {
27052 let months = multiply_expr_dateadd(arg1, 3);
27053 return Ok(Expression::Function(Box::new(Function::new(
27054 "ADD_MONTHS".to_string(),
27055 vec![arg2, months],
27056 ))));
27057 }
27058 "MONTH" => {
27059 return Ok(Expression::Function(Box::new(Function::new(
27060 "ADD_MONTHS".to_string(),
27061 vec![arg2, arg1],
27062 ))));
27063 }
27064 "WEEK" => {
27065 let days = multiply_expr_dateadd(arg1, 7);
27066 return Ok(Expression::Function(Box::new(Function::new(
27067 "DATE_ADD".to_string(),
27068 vec![arg2, days],
27069 ))));
27070 }
27071 "DAY" => {
27072 return Ok(Expression::Function(Box::new(Function::new(
27073 "DATE_ADD".to_string(),
27074 vec![arg2, arg1],
27075 ))));
27076 }
27077 _ => {
27078 let unit = Expression::Identifier(Identifier::new(unit_str));
27079 return Ok(Expression::Function(Box::new(Function::new(
27080 "DATE_ADD".to_string(),
27081 vec![unit, arg1, arg2],
27082 ))));
27083 }
27084 }
27085 }
27086
27087 if matches!(target, DialectType::Hive) {
27088 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27089 match unit_str.as_str() {
27090 "DAY" => {
27091 return Ok(Expression::Function(Box::new(Function::new(
27092 "DATE_ADD".to_string(),
27093 vec![arg2, arg1],
27094 ))));
27095 }
27096 "MONTH" => {
27097 return Ok(Expression::Function(Box::new(Function::new(
27098 "ADD_MONTHS".to_string(),
27099 vec![arg2, arg1],
27100 ))));
27101 }
27102 _ => {
27103 let iu = parse_interval_unit(&unit_str);
27104 let interval =
27105 Expression::Interval(Box::new(crate::expressions::Interval {
27106 this: Some(arg1),
27107 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27108 unit: iu,
27109 use_plural: false,
27110 }),
27111 }));
27112 return Ok(Expression::Add(Box::new(
27113 crate::expressions::BinaryOp::new(arg2, interval),
27114 )));
27115 }
27116 }
27117 }
27118
27119 if matches!(target, DialectType::PostgreSQL) {
27120 // PostgreSQL: date + INTERVAL 'val UNIT'
27121 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27122 this: Some(Expression::Literal(Literal::String(format!(
27123 "{} {}",
27124 Self::expr_to_string(&arg1),
27125 unit_str
27126 )))),
27127 unit: None,
27128 }));
27129 return Ok(Expression::Add(Box::new(
27130 crate::expressions::BinaryOp::new(arg2, interval),
27131 )));
27132 }
27133
27134 if matches!(
27135 target,
27136 DialectType::Presto | DialectType::Trino | DialectType::Athena
27137 ) {
27138 // Presto/Trino: DATE_ADD('UNIT', val, date)
27139 return Ok(Expression::Function(Box::new(Function::new(
27140 "DATE_ADD".to_string(),
27141 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27142 ))));
27143 }
27144
27145 if matches!(target, DialectType::ClickHouse) {
27146 // ClickHouse: DATE_ADD(UNIT, val, date)
27147 let unit = Expression::Identifier(Identifier::new(unit_str));
27148 return Ok(Expression::Function(Box::new(Function::new(
27149 "DATE_ADD".to_string(),
27150 vec![unit, arg1, arg2],
27151 ))));
27152 }
27153
27154 // Default: keep DATEADD with uppercased unit
27155 let unit = Expression::Identifier(Identifier::new(unit_str));
27156 Ok(Expression::Function(Box::new(Function::new(
27157 "DATEADD".to_string(),
27158 vec![unit, arg1, arg2],
27159 ))))
27160 }
27161
27162 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27163 "DATE_ADD" if args.len() == 3 => {
27164 let arg0 = args.remove(0);
27165 let arg1 = args.remove(0);
27166 let arg2 = args.remove(0);
27167 let unit_str = get_unit_str(&arg0);
27168
27169 if matches!(
27170 target,
27171 DialectType::Presto | DialectType::Trino | DialectType::Athena
27172 ) {
27173 // Presto/Trino: DATE_ADD('UNIT', val, date)
27174 return Ok(Expression::Function(Box::new(Function::new(
27175 "DATE_ADD".to_string(),
27176 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27177 ))));
27178 }
27179
27180 if matches!(
27181 target,
27182 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27183 ) {
27184 // DATEADD(UNIT, val, date)
27185 let unit = Expression::Identifier(Identifier::new(unit_str));
27186 let date = if matches!(target, DialectType::TSQL) {
27187 Self::ensure_cast_datetime2(arg2)
27188 } else {
27189 arg2
27190 };
27191 return Ok(Expression::Function(Box::new(Function::new(
27192 "DATEADD".to_string(),
27193 vec![unit, arg1, date],
27194 ))));
27195 }
27196
27197 if matches!(target, DialectType::DuckDB) {
27198 // DuckDB: date + INTERVAL val UNIT
27199 let iu = parse_interval_unit(&unit_str);
27200 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27201 this: Some(arg1),
27202 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27203 unit: iu,
27204 use_plural: false,
27205 }),
27206 }));
27207 return Ok(Expression::Add(Box::new(
27208 crate::expressions::BinaryOp::new(arg2, interval),
27209 )));
27210 }
27211
27212 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27213 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27214 let unit = Expression::Identifier(Identifier::new(unit_str));
27215 return Ok(Expression::Function(Box::new(Function::new(
27216 "DATE_ADD".to_string(),
27217 vec![unit, arg1, arg2],
27218 ))));
27219 }
27220
27221 // Default: DATE_ADD(UNIT, val, date)
27222 let unit = Expression::Identifier(Identifier::new(unit_str));
27223 Ok(Expression::Function(Box::new(Function::new(
27224 "DATE_ADD".to_string(),
27225 vec![unit, arg1, arg2],
27226 ))))
27227 }
27228
27229 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27230 "DATE_ADD" if args.len() == 2 => {
27231 let date = args.remove(0);
27232 let interval_expr = args.remove(0);
27233 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27234 let unit_str = Self::interval_unit_to_string(&unit);
27235
27236 match target {
27237 DialectType::DuckDB => {
27238 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27239 let cast_date = Self::ensure_cast_date(date);
27240 let quoted_val = Self::quote_interval_val(&val);
27241 let interval =
27242 Expression::Interval(Box::new(crate::expressions::Interval {
27243 this: Some(quoted_val),
27244 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27245 unit,
27246 use_plural: false,
27247 }),
27248 }));
27249 Ok(Expression::Add(Box::new(
27250 crate::expressions::BinaryOp::new(cast_date, interval),
27251 )))
27252 }
27253 DialectType::PostgreSQL => {
27254 // PostgreSQL: date + INTERVAL 'val UNIT'
27255 let interval =
27256 Expression::Interval(Box::new(crate::expressions::Interval {
27257 this: Some(Expression::Literal(Literal::String(format!(
27258 "{} {}",
27259 Self::expr_to_string(&val),
27260 unit_str
27261 )))),
27262 unit: None,
27263 }));
27264 Ok(Expression::Add(Box::new(
27265 crate::expressions::BinaryOp::new(date, interval),
27266 )))
27267 }
27268 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27269 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27270 let val_str = Self::expr_to_string(&val);
27271 Ok(Expression::Function(Box::new(Function::new(
27272 "DATE_ADD".to_string(),
27273 vec![
27274 Expression::Literal(Literal::String(unit_str)),
27275 Expression::Cast(Box::new(Cast {
27276 this: Expression::Literal(Literal::String(val_str)),
27277 to: DataType::BigInt { length: None },
27278 trailing_comments: vec![],
27279 double_colon_syntax: false,
27280 format: None,
27281 default: None,
27282 })),
27283 date,
27284 ],
27285 ))))
27286 }
27287 DialectType::Spark | DialectType::Hive => {
27288 // Spark/Hive: DATE_ADD(date, val) for DAY
27289 match unit_str.as_str() {
27290 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27291 "DATE_ADD".to_string(),
27292 vec![date, val],
27293 )))),
27294 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27295 "ADD_MONTHS".to_string(),
27296 vec![date, val],
27297 )))),
27298 _ => {
27299 let iu = parse_interval_unit(&unit_str);
27300 let interval =
27301 Expression::Interval(Box::new(crate::expressions::Interval {
27302 this: Some(val),
27303 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27304 unit: iu,
27305 use_plural: false,
27306 }),
27307 }));
27308 Ok(Expression::Function(Box::new(Function::new(
27309 "DATE_ADD".to_string(),
27310 vec![date, interval],
27311 ))))
27312 }
27313 }
27314 }
27315 DialectType::Snowflake => {
27316 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27317 let cast_date = Self::ensure_cast_date(date);
27318 let val_str = Self::expr_to_string(&val);
27319 Ok(Expression::Function(Box::new(Function::new(
27320 "DATEADD".to_string(),
27321 vec![
27322 Expression::Identifier(Identifier::new(unit_str)),
27323 Expression::Literal(Literal::String(val_str)),
27324 cast_date,
27325 ],
27326 ))))
27327 }
27328 DialectType::TSQL | DialectType::Fabric => {
27329 let cast_date = Self::ensure_cast_datetime2(date);
27330 Ok(Expression::Function(Box::new(Function::new(
27331 "DATEADD".to_string(),
27332 vec![
27333 Expression::Identifier(Identifier::new(unit_str)),
27334 val,
27335 cast_date,
27336 ],
27337 ))))
27338 }
27339 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27340 "DATEADD".to_string(),
27341 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27342 )))),
27343 DialectType::MySQL => {
27344 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27345 let quoted_val = Self::quote_interval_val(&val);
27346 let iu = parse_interval_unit(&unit_str);
27347 let interval =
27348 Expression::Interval(Box::new(crate::expressions::Interval {
27349 this: Some(quoted_val),
27350 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27351 unit: iu,
27352 use_plural: false,
27353 }),
27354 }));
27355 Ok(Expression::Function(Box::new(Function::new(
27356 "DATE_ADD".to_string(),
27357 vec![date, interval],
27358 ))))
27359 }
27360 DialectType::BigQuery => {
27361 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27362 let quoted_val = Self::quote_interval_val(&val);
27363 let iu = parse_interval_unit(&unit_str);
27364 let interval =
27365 Expression::Interval(Box::new(crate::expressions::Interval {
27366 this: Some(quoted_val),
27367 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27368 unit: iu,
27369 use_plural: false,
27370 }),
27371 }));
27372 Ok(Expression::Function(Box::new(Function::new(
27373 "DATE_ADD".to_string(),
27374 vec![date, interval],
27375 ))))
27376 }
27377 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27378 "DATEADD".to_string(),
27379 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27380 )))),
27381 _ => {
27382 // Default: keep as DATE_ADD with decomposed interval
27383 Ok(Expression::DateAdd(Box::new(
27384 crate::expressions::DateAddFunc {
27385 this: date,
27386 interval: val,
27387 unit,
27388 },
27389 )))
27390 }
27391 }
27392 }
27393
27394 // ADD_MONTHS(date, val) -> target-specific form
27395 "ADD_MONTHS" if args.len() == 2 => {
27396 let date = args.remove(0);
27397 let val = args.remove(0);
27398
27399 if matches!(target, DialectType::TSQL) {
27400 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27401 let cast_date = Self::ensure_cast_datetime2(date);
27402 return Ok(Expression::Function(Box::new(Function::new(
27403 "DATEADD".to_string(),
27404 vec![
27405 Expression::Identifier(Identifier::new("MONTH")),
27406 val,
27407 cast_date,
27408 ],
27409 ))));
27410 }
27411
27412 if matches!(target, DialectType::DuckDB) {
27413 // DuckDB: date + INTERVAL val MONTH
27414 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27415 this: Some(val),
27416 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27417 unit: crate::expressions::IntervalUnit::Month,
27418 use_plural: false,
27419 }),
27420 }));
27421 return Ok(Expression::Add(Box::new(
27422 crate::expressions::BinaryOp::new(date, interval),
27423 )));
27424 }
27425
27426 if matches!(target, DialectType::Snowflake) {
27427 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27428 if matches!(source, DialectType::Snowflake) {
27429 return Ok(Expression::Function(Box::new(Function::new(
27430 "ADD_MONTHS".to_string(),
27431 vec![date, val],
27432 ))));
27433 }
27434 return Ok(Expression::Function(Box::new(Function::new(
27435 "DATEADD".to_string(),
27436 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27437 ))));
27438 }
27439
27440 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27441 // Spark: ADD_MONTHS(date, val) - keep as is
27442 return Ok(Expression::Function(Box::new(Function::new(
27443 "ADD_MONTHS".to_string(),
27444 vec![date, val],
27445 ))));
27446 }
27447
27448 if matches!(target, DialectType::Hive) {
27449 return Ok(Expression::Function(Box::new(Function::new(
27450 "ADD_MONTHS".to_string(),
27451 vec![date, val],
27452 ))));
27453 }
27454
27455 if matches!(
27456 target,
27457 DialectType::Presto | DialectType::Trino | DialectType::Athena
27458 ) {
27459 // Presto: DATE_ADD('MONTH', val, date)
27460 return Ok(Expression::Function(Box::new(Function::new(
27461 "DATE_ADD".to_string(),
27462 vec![
27463 Expression::Literal(Literal::String("MONTH".to_string())),
27464 val,
27465 date,
27466 ],
27467 ))));
27468 }
27469
27470 // Default: keep ADD_MONTHS
27471 Ok(Expression::Function(Box::new(Function::new(
27472 "ADD_MONTHS".to_string(),
27473 vec![date, val],
27474 ))))
27475 }
27476
27477 // SAFE_DIVIDE(x, y) -> target-specific form directly
27478 "SAFE_DIVIDE" if args.len() == 2 => {
27479 let x = args.remove(0);
27480 let y = args.remove(0);
27481 // Wrap x and y in parens if they're complex expressions
27482 let y_ref = match &y {
27483 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27484 y.clone()
27485 }
27486 _ => Expression::Paren(Box::new(Paren {
27487 this: y.clone(),
27488 trailing_comments: vec![],
27489 })),
27490 };
27491 let x_ref = match &x {
27492 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27493 x.clone()
27494 }
27495 _ => Expression::Paren(Box::new(Paren {
27496 this: x.clone(),
27497 trailing_comments: vec![],
27498 })),
27499 };
27500 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27501 y_ref.clone(),
27502 Expression::number(0),
27503 )));
27504 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27505 x_ref.clone(),
27506 y_ref.clone(),
27507 )));
27508
27509 match target {
27510 DialectType::DuckDB | DialectType::PostgreSQL => {
27511 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27512 let result_div = if matches!(target, DialectType::PostgreSQL) {
27513 let cast_x = Expression::Cast(Box::new(Cast {
27514 this: x_ref,
27515 to: DataType::Custom {
27516 name: "DOUBLE PRECISION".to_string(),
27517 },
27518 trailing_comments: vec![],
27519 double_colon_syntax: false,
27520 format: None,
27521 default: None,
27522 }));
27523 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27524 cast_x, y_ref,
27525 )))
27526 } else {
27527 div_expr
27528 };
27529 Ok(Expression::Case(Box::new(crate::expressions::Case {
27530 operand: None,
27531 whens: vec![(condition, result_div)],
27532 else_: Some(Expression::Null(crate::expressions::Null)),
27533 comments: Vec::new(),
27534 })))
27535 }
27536 DialectType::Snowflake => {
27537 // IFF(y <> 0, x / y, NULL)
27538 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27539 condition,
27540 true_value: div_expr,
27541 false_value: Some(Expression::Null(crate::expressions::Null)),
27542 original_name: Some("IFF".to_string()),
27543 })))
27544 }
27545 DialectType::Presto | DialectType::Trino => {
27546 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27547 let cast_x = Expression::Cast(Box::new(Cast {
27548 this: x_ref,
27549 to: DataType::Double {
27550 precision: None,
27551 scale: None,
27552 },
27553 trailing_comments: vec![],
27554 double_colon_syntax: false,
27555 format: None,
27556 default: None,
27557 }));
27558 let cast_div = Expression::Div(Box::new(
27559 crate::expressions::BinaryOp::new(cast_x, y_ref),
27560 ));
27561 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27562 condition,
27563 true_value: cast_div,
27564 false_value: Some(Expression::Null(crate::expressions::Null)),
27565 original_name: None,
27566 })))
27567 }
27568 _ => {
27569 // IF(y <> 0, x / y, NULL)
27570 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27571 condition,
27572 true_value: div_expr,
27573 false_value: Some(Expression::Null(crate::expressions::Null)),
27574 original_name: None,
27575 })))
27576 }
27577 }
27578 }
27579
27580 // GENERATE_UUID() -> UUID() with CAST to string
27581 "GENERATE_UUID" => {
27582 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27583 this: None,
27584 name: None,
27585 is_string: None,
27586 }));
27587 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27588 let cast_type = match target {
27589 DialectType::DuckDB => Some(DataType::Text),
27590 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27591 length: None,
27592 parenthesized_length: false,
27593 }),
27594 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27595 Some(DataType::String { length: None })
27596 }
27597 _ => None,
27598 };
27599 if let Some(dt) = cast_type {
27600 Ok(Expression::Cast(Box::new(Cast {
27601 this: uuid_expr,
27602 to: dt,
27603 trailing_comments: vec![],
27604 double_colon_syntax: false,
27605 format: None,
27606 default: None,
27607 })))
27608 } else {
27609 Ok(uuid_expr)
27610 }
27611 }
27612
27613 // COUNTIF(x) -> CountIf expression
27614 "COUNTIF" if args.len() == 1 => {
27615 let arg = args.remove(0);
27616 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
27617 this: arg,
27618 distinct: false,
27619 filter: None,
27620 order_by: vec![],
27621 name: None,
27622 ignore_nulls: None,
27623 having_max: None,
27624 limit: None,
27625 })))
27626 }
27627
27628 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
27629 "EDIT_DISTANCE" => {
27630 // Strip named arguments (max_distance => N) and pass as positional
27631 let mut positional_args: Vec<Expression> = vec![];
27632 for arg in args {
27633 match arg {
27634 Expression::NamedArgument(na) => {
27635 positional_args.push(na.value);
27636 }
27637 other => positional_args.push(other),
27638 }
27639 }
27640 if positional_args.len() >= 2 {
27641 let col1 = positional_args.remove(0);
27642 let col2 = positional_args.remove(0);
27643 let levenshtein = crate::expressions::BinaryFunc {
27644 this: col1,
27645 expression: col2,
27646 original_name: None,
27647 };
27648 // Pass extra args through a function wrapper with all args
27649 if !positional_args.is_empty() {
27650 let max_dist = positional_args.remove(0);
27651 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
27652 if matches!(target, DialectType::DuckDB) {
27653 let lev = Expression::Function(Box::new(Function::new(
27654 "LEVENSHTEIN".to_string(),
27655 vec![levenshtein.this, levenshtein.expression],
27656 )));
27657 let lev_is_null =
27658 Expression::IsNull(Box::new(crate::expressions::IsNull {
27659 this: lev.clone(),
27660 not: false,
27661 postfix_form: false,
27662 }));
27663 let max_is_null =
27664 Expression::IsNull(Box::new(crate::expressions::IsNull {
27665 this: max_dist.clone(),
27666 not: false,
27667 postfix_form: false,
27668 }));
27669 let null_check =
27670 Expression::Or(Box::new(crate::expressions::BinaryOp {
27671 left: lev_is_null,
27672 right: max_is_null,
27673 left_comments: Vec::new(),
27674 operator_comments: Vec::new(),
27675 trailing_comments: Vec::new(),
27676 }));
27677 let least =
27678 Expression::Least(Box::new(crate::expressions::VarArgFunc {
27679 expressions: vec![lev, max_dist],
27680 original_name: None,
27681 }));
27682 return Ok(Expression::Case(Box::new(crate::expressions::Case {
27683 operand: None,
27684 whens: vec![(
27685 null_check,
27686 Expression::Null(crate::expressions::Null),
27687 )],
27688 else_: Some(least),
27689 comments: Vec::new(),
27690 })));
27691 }
27692 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
27693 all_args.extend(positional_args);
27694 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
27695 let func_name = if matches!(target, DialectType::PostgreSQL) {
27696 "LEVENSHTEIN_LESS_EQUAL"
27697 } else {
27698 "LEVENSHTEIN"
27699 };
27700 return Ok(Expression::Function(Box::new(Function::new(
27701 func_name.to_string(),
27702 all_args,
27703 ))));
27704 }
27705 Ok(Expression::Levenshtein(Box::new(levenshtein)))
27706 } else {
27707 Ok(Expression::Function(Box::new(Function::new(
27708 "EDIT_DISTANCE".to_string(),
27709 positional_args,
27710 ))))
27711 }
27712 }
27713
27714 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
27715 "TIMESTAMP_SECONDS" if args.len() == 1 => {
27716 let arg = args.remove(0);
27717 Ok(Expression::UnixToTime(Box::new(
27718 crate::expressions::UnixToTime {
27719 this: Box::new(arg),
27720 scale: Some(0),
27721 zone: None,
27722 hours: None,
27723 minutes: None,
27724 format: None,
27725 target_type: None,
27726 },
27727 )))
27728 }
27729
27730 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
27731 "TIMESTAMP_MILLIS" if args.len() == 1 => {
27732 let arg = args.remove(0);
27733 Ok(Expression::UnixToTime(Box::new(
27734 crate::expressions::UnixToTime {
27735 this: Box::new(arg),
27736 scale: Some(3),
27737 zone: None,
27738 hours: None,
27739 minutes: None,
27740 format: None,
27741 target_type: None,
27742 },
27743 )))
27744 }
27745
27746 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
27747 "TIMESTAMP_MICROS" if args.len() == 1 => {
27748 let arg = args.remove(0);
27749 Ok(Expression::UnixToTime(Box::new(
27750 crate::expressions::UnixToTime {
27751 this: Box::new(arg),
27752 scale: Some(6),
27753 zone: None,
27754 hours: None,
27755 minutes: None,
27756 format: None,
27757 target_type: None,
27758 },
27759 )))
27760 }
27761
27762 // DIV(x, y) -> IntDiv expression
27763 "DIV" if args.len() == 2 => {
27764 let x = args.remove(0);
27765 let y = args.remove(0);
27766 Ok(Expression::IntDiv(Box::new(
27767 crate::expressions::BinaryFunc {
27768 this: x,
27769 expression: y,
27770 original_name: None,
27771 },
27772 )))
27773 }
27774
27775 // TO_HEX(x) -> target-specific form
27776 "TO_HEX" if args.len() == 1 => {
27777 let arg = args.remove(0);
27778 // Check if inner function already returns hex string in certain targets
27779 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
27780 if matches!(target, DialectType::BigQuery) {
27781 // BQ->BQ: keep as TO_HEX
27782 Ok(Expression::Function(Box::new(Function::new(
27783 "TO_HEX".to_string(),
27784 vec![arg],
27785 ))))
27786 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
27787 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
27788 Ok(arg)
27789 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
27790 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
27791 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
27792 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
27793 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
27794 if let Expression::Function(ref inner_f) = arg {
27795 let inner_args = inner_f.args.clone();
27796 let binary_func = match inner_f.name.to_uppercase().as_str() {
27797 "SHA1" => Expression::Function(Box::new(Function::new(
27798 "SHA1_BINARY".to_string(),
27799 inner_args,
27800 ))),
27801 "MD5" => Expression::Function(Box::new(Function::new(
27802 "MD5_BINARY".to_string(),
27803 inner_args,
27804 ))),
27805 "SHA256" => {
27806 let mut a = inner_args;
27807 a.push(Expression::number(256));
27808 Expression::Function(Box::new(Function::new(
27809 "SHA2_BINARY".to_string(),
27810 a,
27811 )))
27812 }
27813 "SHA512" => {
27814 let mut a = inner_args;
27815 a.push(Expression::number(512));
27816 Expression::Function(Box::new(Function::new(
27817 "SHA2_BINARY".to_string(),
27818 a,
27819 )))
27820 }
27821 _ => arg.clone(),
27822 };
27823 Ok(Expression::Function(Box::new(Function::new(
27824 "TO_CHAR".to_string(),
27825 vec![binary_func],
27826 ))))
27827 } else {
27828 let inner = Expression::Function(Box::new(Function::new(
27829 "HEX".to_string(),
27830 vec![arg],
27831 )));
27832 Ok(Expression::Lower(Box::new(
27833 crate::expressions::UnaryFunc::new(inner),
27834 )))
27835 }
27836 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
27837 let inner = Expression::Function(Box::new(Function::new(
27838 "TO_HEX".to_string(),
27839 vec![arg],
27840 )));
27841 Ok(Expression::Lower(Box::new(
27842 crate::expressions::UnaryFunc::new(inner),
27843 )))
27844 } else {
27845 let inner =
27846 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
27847 Ok(Expression::Lower(Box::new(
27848 crate::expressions::UnaryFunc::new(inner),
27849 )))
27850 }
27851 }
27852
27853 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
27854 "LAST_DAY" if args.len() == 2 => {
27855 let date = args.remove(0);
27856 let _unit = args.remove(0); // Strip the unit (MONTH is default)
27857 Ok(Expression::Function(Box::new(Function::new(
27858 "LAST_DAY".to_string(),
27859 vec![date],
27860 ))))
27861 }
27862
27863 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
27864 "GENERATE_ARRAY" => {
27865 let start = args.get(0).cloned();
27866 let end = args.get(1).cloned();
27867 let step = args.get(2).cloned();
27868 Ok(Expression::GenerateSeries(Box::new(
27869 crate::expressions::GenerateSeries {
27870 start: start.map(Box::new),
27871 end: end.map(Box::new),
27872 step: step.map(Box::new),
27873 is_end_exclusive: None,
27874 },
27875 )))
27876 }
27877
27878 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
27879 "GENERATE_TIMESTAMP_ARRAY" => {
27880 let start = args.get(0).cloned();
27881 let end = args.get(1).cloned();
27882 let step = args.get(2).cloned();
27883
27884 if matches!(target, DialectType::DuckDB) {
27885 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
27886 // Only cast string literals - leave columns/expressions as-is
27887 let maybe_cast_ts = |expr: Expression| -> Expression {
27888 if matches!(&expr, Expression::Literal(Literal::String(_))) {
27889 Expression::Cast(Box::new(Cast {
27890 this: expr,
27891 to: DataType::Timestamp {
27892 precision: None,
27893 timezone: false,
27894 },
27895 trailing_comments: vec![],
27896 double_colon_syntax: false,
27897 format: None,
27898 default: None,
27899 }))
27900 } else {
27901 expr
27902 }
27903 };
27904 let cast_start = start.map(maybe_cast_ts);
27905 let cast_end = end.map(maybe_cast_ts);
27906 Ok(Expression::GenerateSeries(Box::new(
27907 crate::expressions::GenerateSeries {
27908 start: cast_start.map(Box::new),
27909 end: cast_end.map(Box::new),
27910 step: step.map(Box::new),
27911 is_end_exclusive: None,
27912 },
27913 )))
27914 } else {
27915 Ok(Expression::GenerateSeries(Box::new(
27916 crate::expressions::GenerateSeries {
27917 start: start.map(Box::new),
27918 end: end.map(Box::new),
27919 step: step.map(Box::new),
27920 is_end_exclusive: None,
27921 },
27922 )))
27923 }
27924 }
27925
27926 // TO_JSON(x) -> target-specific (from Spark/Hive)
27927 "TO_JSON" => {
27928 match target {
27929 DialectType::Presto | DialectType::Trino => {
27930 // JSON_FORMAT(CAST(x AS JSON))
27931 let arg = args
27932 .into_iter()
27933 .next()
27934 .unwrap_or(Expression::Null(crate::expressions::Null));
27935 let cast_json = Expression::Cast(Box::new(Cast {
27936 this: arg,
27937 to: DataType::Custom {
27938 name: "JSON".to_string(),
27939 },
27940 trailing_comments: vec![],
27941 double_colon_syntax: false,
27942 format: None,
27943 default: None,
27944 }));
27945 Ok(Expression::Function(Box::new(Function::new(
27946 "JSON_FORMAT".to_string(),
27947 vec![cast_json],
27948 ))))
27949 }
27950 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
27951 "TO_JSON_STRING".to_string(),
27952 args,
27953 )))),
27954 DialectType::DuckDB => {
27955 // CAST(TO_JSON(x) AS TEXT)
27956 let arg = args
27957 .into_iter()
27958 .next()
27959 .unwrap_or(Expression::Null(crate::expressions::Null));
27960 let to_json = Expression::Function(Box::new(Function::new(
27961 "TO_JSON".to_string(),
27962 vec![arg],
27963 )));
27964 Ok(Expression::Cast(Box::new(Cast {
27965 this: to_json,
27966 to: DataType::Text,
27967 trailing_comments: vec![],
27968 double_colon_syntax: false,
27969 format: None,
27970 default: None,
27971 })))
27972 }
27973 _ => Ok(Expression::Function(Box::new(Function::new(
27974 "TO_JSON".to_string(),
27975 args,
27976 )))),
27977 }
27978 }
27979
27980 // TO_JSON_STRING(x) -> target-specific
27981 "TO_JSON_STRING" => {
27982 match target {
27983 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
27984 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
27985 ),
27986 DialectType::Presto | DialectType::Trino => {
27987 // JSON_FORMAT(CAST(x AS JSON))
27988 let arg = args
27989 .into_iter()
27990 .next()
27991 .unwrap_or(Expression::Null(crate::expressions::Null));
27992 let cast_json = Expression::Cast(Box::new(Cast {
27993 this: arg,
27994 to: DataType::Custom {
27995 name: "JSON".to_string(),
27996 },
27997 trailing_comments: vec![],
27998 double_colon_syntax: false,
27999 format: None,
28000 default: None,
28001 }));
28002 Ok(Expression::Function(Box::new(Function::new(
28003 "JSON_FORMAT".to_string(),
28004 vec![cast_json],
28005 ))))
28006 }
28007 DialectType::DuckDB => {
28008 // CAST(TO_JSON(x) AS TEXT)
28009 let arg = args
28010 .into_iter()
28011 .next()
28012 .unwrap_or(Expression::Null(crate::expressions::Null));
28013 let to_json = Expression::Function(Box::new(Function::new(
28014 "TO_JSON".to_string(),
28015 vec![arg],
28016 )));
28017 Ok(Expression::Cast(Box::new(Cast {
28018 this: to_json,
28019 to: DataType::Text,
28020 trailing_comments: vec![],
28021 double_colon_syntax: false,
28022 format: None,
28023 default: None,
28024 })))
28025 }
28026 DialectType::Snowflake => {
28027 // TO_JSON(x)
28028 Ok(Expression::Function(Box::new(Function::new(
28029 "TO_JSON".to_string(),
28030 args,
28031 ))))
28032 }
28033 _ => Ok(Expression::Function(Box::new(Function::new(
28034 "TO_JSON_STRING".to_string(),
28035 args,
28036 )))),
28037 }
28038 }
28039
28040 // SAFE_ADD(x, y) -> SafeAdd expression
28041 "SAFE_ADD" if args.len() == 2 => {
28042 let x = args.remove(0);
28043 let y = args.remove(0);
28044 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
28045 this: Box::new(x),
28046 expression: Box::new(y),
28047 })))
28048 }
28049
28050 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
28051 "SAFE_SUBTRACT" if args.len() == 2 => {
28052 let x = args.remove(0);
28053 let y = args.remove(0);
28054 Ok(Expression::SafeSubtract(Box::new(
28055 crate::expressions::SafeSubtract {
28056 this: Box::new(x),
28057 expression: Box::new(y),
28058 },
28059 )))
28060 }
28061
28062 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
28063 "SAFE_MULTIPLY" if args.len() == 2 => {
28064 let x = args.remove(0);
28065 let y = args.remove(0);
28066 Ok(Expression::SafeMultiply(Box::new(
28067 crate::expressions::SafeMultiply {
28068 this: Box::new(x),
28069 expression: Box::new(y),
28070 },
28071 )))
28072 }
28073
28074 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
28075 "REGEXP_CONTAINS" if args.len() == 2 => {
28076 let str_expr = args.remove(0);
28077 let pattern = args.remove(0);
28078 Ok(Expression::RegexpLike(Box::new(
28079 crate::expressions::RegexpFunc {
28080 this: str_expr,
28081 pattern,
28082 flags: None,
28083 },
28084 )))
28085 }
28086
28087 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28088 "CONTAINS_SUBSTR" if args.len() == 2 => {
28089 let a = args.remove(0);
28090 let b = args.remove(0);
28091 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28092 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28093 Ok(Expression::Function(Box::new(Function::new(
28094 "CONTAINS".to_string(),
28095 vec![lower_a, lower_b],
28096 ))))
28097 }
28098
28099 // INT64(x) -> CAST(x AS BIGINT)
28100 "INT64" if args.len() == 1 => {
28101 let arg = args.remove(0);
28102 Ok(Expression::Cast(Box::new(Cast {
28103 this: arg,
28104 to: DataType::BigInt { length: None },
28105 trailing_comments: vec![],
28106 double_colon_syntax: false,
28107 format: None,
28108 default: None,
28109 })))
28110 }
28111
28112 // INSTR(str, substr) -> target-specific
28113 "INSTR" if args.len() >= 2 => {
28114 let str_expr = args.remove(0);
28115 let substr = args.remove(0);
28116 if matches!(target, DialectType::Snowflake) {
28117 // CHARINDEX(substr, str)
28118 Ok(Expression::Function(Box::new(Function::new(
28119 "CHARINDEX".to_string(),
28120 vec![substr, str_expr],
28121 ))))
28122 } else if matches!(target, DialectType::BigQuery) {
28123 // Keep as INSTR
28124 Ok(Expression::Function(Box::new(Function::new(
28125 "INSTR".to_string(),
28126 vec![str_expr, substr],
28127 ))))
28128 } else {
28129 // Default: keep as INSTR
28130 Ok(Expression::Function(Box::new(Function::new(
28131 "INSTR".to_string(),
28132 vec![str_expr, substr],
28133 ))))
28134 }
28135 }
28136
28137 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28138 "DATE_TRUNC" if args.len() == 2 => {
28139 let expr = args.remove(0);
28140 let unit_expr = args.remove(0);
28141 let unit_str = get_unit_str(&unit_expr);
28142
28143 match target {
28144 DialectType::DuckDB
28145 | DialectType::Snowflake
28146 | DialectType::PostgreSQL
28147 | DialectType::Presto
28148 | DialectType::Trino
28149 | DialectType::Databricks
28150 | DialectType::Spark
28151 | DialectType::Redshift
28152 | DialectType::ClickHouse
28153 | DialectType::TSQL => {
28154 // Standard: DATE_TRUNC('UNIT', expr)
28155 Ok(Expression::Function(Box::new(Function::new(
28156 "DATE_TRUNC".to_string(),
28157 vec![Expression::Literal(Literal::String(unit_str)), expr],
28158 ))))
28159 }
28160 _ => {
28161 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28162 Ok(Expression::Function(Box::new(Function::new(
28163 "DATE_TRUNC".to_string(),
28164 vec![expr, unit_expr],
28165 ))))
28166 }
28167 }
28168 }
28169
28170 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28171 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28172 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28173 let ts = args.remove(0);
28174 let unit_expr = args.remove(0);
28175 let tz = if !args.is_empty() {
28176 Some(args.remove(0))
28177 } else {
28178 None
28179 };
28180 let unit_str = get_unit_str(&unit_expr);
28181
28182 match target {
28183 DialectType::DuckDB => {
28184 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28185 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28186 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28187 let is_coarse = matches!(
28188 unit_str.as_str(),
28189 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28190 );
28191 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28192 let cast_ts = if name == "DATETIME_TRUNC" {
28193 match ts {
28194 Expression::Literal(Literal::String(ref _s)) => {
28195 Expression::Cast(Box::new(Cast {
28196 this: ts,
28197 to: DataType::Timestamp {
28198 precision: None,
28199 timezone: false,
28200 },
28201 trailing_comments: vec![],
28202 double_colon_syntax: false,
28203 format: None,
28204 default: None,
28205 }))
28206 }
28207 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28208 }
28209 } else {
28210 Self::maybe_cast_ts_to_tz(ts, &name)
28211 };
28212
28213 if let Some(tz_arg) = tz {
28214 if is_coarse {
28215 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28216 let at_tz = Expression::AtTimeZone(Box::new(
28217 crate::expressions::AtTimeZone {
28218 this: cast_ts,
28219 zone: tz_arg.clone(),
28220 },
28221 ));
28222 let date_trunc = Expression::Function(Box::new(Function::new(
28223 "DATE_TRUNC".to_string(),
28224 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28225 )));
28226 Ok(Expression::AtTimeZone(Box::new(
28227 crate::expressions::AtTimeZone {
28228 this: date_trunc,
28229 zone: tz_arg,
28230 },
28231 )))
28232 } else {
28233 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28234 Ok(Expression::Function(Box::new(Function::new(
28235 "DATE_TRUNC".to_string(),
28236 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28237 ))))
28238 }
28239 } else {
28240 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28241 Ok(Expression::Function(Box::new(Function::new(
28242 "DATE_TRUNC".to_string(),
28243 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28244 ))))
28245 }
28246 }
28247 DialectType::Databricks | DialectType::Spark => {
28248 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28249 Ok(Expression::Function(Box::new(Function::new(
28250 "DATE_TRUNC".to_string(),
28251 vec![Expression::Literal(Literal::String(unit_str)), ts],
28252 ))))
28253 }
28254 _ => {
28255 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28256 let unit = Expression::Literal(Literal::String(unit_str));
28257 let mut date_trunc_args = vec![unit, ts];
28258 if let Some(tz_arg) = tz {
28259 date_trunc_args.push(tz_arg);
28260 }
28261 Ok(Expression::Function(Box::new(Function::new(
28262 "TIMESTAMP_TRUNC".to_string(),
28263 date_trunc_args,
28264 ))))
28265 }
28266 }
28267 }
28268
28269 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28270 "TIME" => {
28271 if args.len() == 3 {
28272 // TIME(h, m, s) constructor
28273 match target {
28274 DialectType::TSQL => {
28275 // TIMEFROMPARTS(h, m, s, 0, 0)
28276 args.push(Expression::number(0));
28277 args.push(Expression::number(0));
28278 Ok(Expression::Function(Box::new(Function::new(
28279 "TIMEFROMPARTS".to_string(),
28280 args,
28281 ))))
28282 }
28283 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28284 "MAKETIME".to_string(),
28285 args,
28286 )))),
28287 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28288 Function::new("MAKE_TIME".to_string(), args),
28289 ))),
28290 _ => Ok(Expression::Function(Box::new(Function::new(
28291 "TIME".to_string(),
28292 args,
28293 )))),
28294 }
28295 } else if args.len() == 1 {
28296 let arg = args.remove(0);
28297 if matches!(target, DialectType::Spark) {
28298 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28299 Ok(Expression::Cast(Box::new(Cast {
28300 this: arg,
28301 to: DataType::Timestamp {
28302 timezone: false,
28303 precision: None,
28304 },
28305 trailing_comments: vec![],
28306 double_colon_syntax: false,
28307 format: None,
28308 default: None,
28309 })))
28310 } else {
28311 // Most targets: CAST(x AS TIME)
28312 Ok(Expression::Cast(Box::new(Cast {
28313 this: arg,
28314 to: DataType::Time {
28315 precision: None,
28316 timezone: false,
28317 },
28318 trailing_comments: vec![],
28319 double_colon_syntax: false,
28320 format: None,
28321 default: None,
28322 })))
28323 }
28324 } else if args.len() == 2 {
28325 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28326 let expr = args.remove(0);
28327 let tz = args.remove(0);
28328 let cast_tstz = Expression::Cast(Box::new(Cast {
28329 this: expr,
28330 to: DataType::Timestamp {
28331 timezone: true,
28332 precision: None,
28333 },
28334 trailing_comments: vec![],
28335 double_colon_syntax: false,
28336 format: None,
28337 default: None,
28338 }));
28339 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28340 this: cast_tstz,
28341 zone: tz,
28342 }));
28343 Ok(Expression::Cast(Box::new(Cast {
28344 this: at_tz,
28345 to: DataType::Time {
28346 precision: None,
28347 timezone: false,
28348 },
28349 trailing_comments: vec![],
28350 double_colon_syntax: false,
28351 format: None,
28352 default: None,
28353 })))
28354 } else {
28355 Ok(Expression::Function(Box::new(Function::new(
28356 "TIME".to_string(),
28357 args,
28358 ))))
28359 }
28360 }
28361
28362 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28363 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28364 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28365 // DATETIME(y, m, d, h, min, s) -> target-specific
28366 "DATETIME" => {
28367 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28368 if matches!(target, DialectType::BigQuery) {
28369 if args.len() == 2 {
28370 let has_time_literal =
28371 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28372 if has_time_literal {
28373 let first = args.remove(0);
28374 let second = args.remove(0);
28375 let time_as_cast = match second {
28376 Expression::Literal(Literal::Time(s)) => {
28377 Expression::Cast(Box::new(Cast {
28378 this: Expression::Literal(Literal::String(s)),
28379 to: DataType::Time {
28380 precision: None,
28381 timezone: false,
28382 },
28383 trailing_comments: vec![],
28384 double_colon_syntax: false,
28385 format: None,
28386 default: None,
28387 }))
28388 }
28389 other => other,
28390 };
28391 return Ok(Expression::Function(Box::new(Function::new(
28392 "DATETIME".to_string(),
28393 vec![first, time_as_cast],
28394 ))));
28395 }
28396 }
28397 return Ok(Expression::Function(Box::new(Function::new(
28398 "DATETIME".to_string(),
28399 args,
28400 ))));
28401 }
28402
28403 if args.len() == 1 {
28404 let arg = args.remove(0);
28405 Ok(Expression::Cast(Box::new(Cast {
28406 this: arg,
28407 to: DataType::Timestamp {
28408 timezone: false,
28409 precision: None,
28410 },
28411 trailing_comments: vec![],
28412 double_colon_syntax: false,
28413 format: None,
28414 default: None,
28415 })))
28416 } else if args.len() == 2 {
28417 let first = args.remove(0);
28418 let second = args.remove(0);
28419 // Check if second arg is a TIME literal
28420 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28421 if is_time_literal {
28422 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28423 let cast_date = Expression::Cast(Box::new(Cast {
28424 this: first,
28425 to: DataType::Date,
28426 trailing_comments: vec![],
28427 double_colon_syntax: false,
28428 format: None,
28429 default: None,
28430 }));
28431 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28432 let time_as_string = match second {
28433 Expression::Literal(Literal::Time(s)) => {
28434 Expression::Literal(Literal::String(s))
28435 }
28436 other => other,
28437 };
28438 let cast_time = Expression::Cast(Box::new(Cast {
28439 this: time_as_string,
28440 to: DataType::Time {
28441 precision: None,
28442 timezone: false,
28443 },
28444 trailing_comments: vec![],
28445 double_colon_syntax: false,
28446 format: None,
28447 default: None,
28448 }));
28449 let add_expr =
28450 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28451 Ok(Expression::Cast(Box::new(Cast {
28452 this: add_expr,
28453 to: DataType::Timestamp {
28454 timezone: false,
28455 precision: None,
28456 },
28457 trailing_comments: vec![],
28458 double_colon_syntax: false,
28459 format: None,
28460 default: None,
28461 })))
28462 } else {
28463 // DATETIME('string', 'timezone')
28464 let cast_tstz = Expression::Cast(Box::new(Cast {
28465 this: first,
28466 to: DataType::Timestamp {
28467 timezone: true,
28468 precision: None,
28469 },
28470 trailing_comments: vec![],
28471 double_colon_syntax: false,
28472 format: None,
28473 default: None,
28474 }));
28475 let at_tz =
28476 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28477 this: cast_tstz,
28478 zone: second,
28479 }));
28480 Ok(Expression::Cast(Box::new(Cast {
28481 this: at_tz,
28482 to: DataType::Timestamp {
28483 timezone: false,
28484 precision: None,
28485 },
28486 trailing_comments: vec![],
28487 double_colon_syntax: false,
28488 format: None,
28489 default: None,
28490 })))
28491 }
28492 } else if args.len() >= 3 {
28493 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28494 // For other targets, use MAKE_TIMESTAMP or similar
28495 if matches!(target, DialectType::Snowflake) {
28496 Ok(Expression::Function(Box::new(Function::new(
28497 "TIMESTAMP_FROM_PARTS".to_string(),
28498 args,
28499 ))))
28500 } else {
28501 Ok(Expression::Function(Box::new(Function::new(
28502 "DATETIME".to_string(),
28503 args,
28504 ))))
28505 }
28506 } else {
28507 Ok(Expression::Function(Box::new(Function::new(
28508 "DATETIME".to_string(),
28509 args,
28510 ))))
28511 }
28512 }
28513
28514 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28515 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28516 "TIMESTAMP" => {
28517 if args.len() == 1 {
28518 let arg = args.remove(0);
28519 Ok(Expression::Cast(Box::new(Cast {
28520 this: arg,
28521 to: DataType::Timestamp {
28522 timezone: true,
28523 precision: None,
28524 },
28525 trailing_comments: vec![],
28526 double_colon_syntax: false,
28527 format: None,
28528 default: None,
28529 })))
28530 } else if args.len() == 2 {
28531 let arg = args.remove(0);
28532 let tz = args.remove(0);
28533 let cast_ts = Expression::Cast(Box::new(Cast {
28534 this: arg,
28535 to: DataType::Timestamp {
28536 timezone: false,
28537 precision: None,
28538 },
28539 trailing_comments: vec![],
28540 double_colon_syntax: false,
28541 format: None,
28542 default: None,
28543 }));
28544 if matches!(target, DialectType::Snowflake) {
28545 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28546 Ok(Expression::Function(Box::new(Function::new(
28547 "CONVERT_TIMEZONE".to_string(),
28548 vec![tz, cast_ts],
28549 ))))
28550 } else {
28551 Ok(Expression::AtTimeZone(Box::new(
28552 crate::expressions::AtTimeZone {
28553 this: cast_ts,
28554 zone: tz,
28555 },
28556 )))
28557 }
28558 } else {
28559 Ok(Expression::Function(Box::new(Function::new(
28560 "TIMESTAMP".to_string(),
28561 args,
28562 ))))
28563 }
28564 }
28565
28566 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28567 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28568 "STRING" => {
28569 if args.len() == 1 {
28570 let arg = args.remove(0);
28571 let cast_type = match target {
28572 DialectType::DuckDB => DataType::Text,
28573 _ => DataType::VarChar {
28574 length: None,
28575 parenthesized_length: false,
28576 },
28577 };
28578 Ok(Expression::Cast(Box::new(Cast {
28579 this: arg,
28580 to: cast_type,
28581 trailing_comments: vec![],
28582 double_colon_syntax: false,
28583 format: None,
28584 default: None,
28585 })))
28586 } else if args.len() == 2 {
28587 let arg = args.remove(0);
28588 let tz = args.remove(0);
28589 let cast_type = match target {
28590 DialectType::DuckDB => DataType::Text,
28591 _ => DataType::VarChar {
28592 length: None,
28593 parenthesized_length: false,
28594 },
28595 };
28596 if matches!(target, DialectType::Snowflake) {
28597 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
28598 let convert_tz = Expression::Function(Box::new(Function::new(
28599 "CONVERT_TIMEZONE".to_string(),
28600 vec![
28601 Expression::Literal(Literal::String("UTC".to_string())),
28602 tz,
28603 arg,
28604 ],
28605 )));
28606 Ok(Expression::Cast(Box::new(Cast {
28607 this: convert_tz,
28608 to: cast_type,
28609 trailing_comments: vec![],
28610 double_colon_syntax: false,
28611 format: None,
28612 default: None,
28613 })))
28614 } else {
28615 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
28616 let cast_ts = Expression::Cast(Box::new(Cast {
28617 this: arg,
28618 to: DataType::Timestamp {
28619 timezone: false,
28620 precision: None,
28621 },
28622 trailing_comments: vec![],
28623 double_colon_syntax: false,
28624 format: None,
28625 default: None,
28626 }));
28627 let at_utc =
28628 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28629 this: cast_ts,
28630 zone: Expression::Literal(Literal::String("UTC".to_string())),
28631 }));
28632 let at_tz =
28633 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28634 this: at_utc,
28635 zone: tz,
28636 }));
28637 Ok(Expression::Cast(Box::new(Cast {
28638 this: at_tz,
28639 to: cast_type,
28640 trailing_comments: vec![],
28641 double_colon_syntax: false,
28642 format: None,
28643 default: None,
28644 })))
28645 }
28646 } else {
28647 Ok(Expression::Function(Box::new(Function::new(
28648 "STRING".to_string(),
28649 args,
28650 ))))
28651 }
28652 }
28653
28654 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
28655 "UNIX_SECONDS" if args.len() == 1 => {
28656 let ts = args.remove(0);
28657 match target {
28658 DialectType::DuckDB => {
28659 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
28660 let cast_ts = Self::ensure_cast_timestamptz(ts);
28661 let epoch = Expression::Function(Box::new(Function::new(
28662 "EPOCH".to_string(),
28663 vec![cast_ts],
28664 )));
28665 Ok(Expression::Cast(Box::new(Cast {
28666 this: epoch,
28667 to: DataType::BigInt { length: None },
28668 trailing_comments: vec![],
28669 double_colon_syntax: false,
28670 format: None,
28671 default: None,
28672 })))
28673 }
28674 DialectType::Snowflake => {
28675 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
28676 let epoch = Expression::Cast(Box::new(Cast {
28677 this: Expression::Literal(Literal::String(
28678 "1970-01-01 00:00:00+00".to_string(),
28679 )),
28680 to: DataType::Timestamp {
28681 timezone: true,
28682 precision: None,
28683 },
28684 trailing_comments: vec![],
28685 double_colon_syntax: false,
28686 format: None,
28687 default: None,
28688 }));
28689 Ok(Expression::TimestampDiff(Box::new(
28690 crate::expressions::TimestampDiff {
28691 this: Box::new(epoch),
28692 expression: Box::new(ts),
28693 unit: Some("SECONDS".to_string()),
28694 },
28695 )))
28696 }
28697 _ => Ok(Expression::Function(Box::new(Function::new(
28698 "UNIX_SECONDS".to_string(),
28699 vec![ts],
28700 )))),
28701 }
28702 }
28703
28704 "UNIX_MILLIS" if args.len() == 1 => {
28705 let ts = args.remove(0);
28706 match target {
28707 DialectType::DuckDB => {
28708 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
28709 let cast_ts = Self::ensure_cast_timestamptz(ts);
28710 Ok(Expression::Function(Box::new(Function::new(
28711 "EPOCH_MS".to_string(),
28712 vec![cast_ts],
28713 ))))
28714 }
28715 _ => Ok(Expression::Function(Box::new(Function::new(
28716 "UNIX_MILLIS".to_string(),
28717 vec![ts],
28718 )))),
28719 }
28720 }
28721
28722 "UNIX_MICROS" if args.len() == 1 => {
28723 let ts = args.remove(0);
28724 match target {
28725 DialectType::DuckDB => {
28726 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
28727 let cast_ts = Self::ensure_cast_timestamptz(ts);
28728 Ok(Expression::Function(Box::new(Function::new(
28729 "EPOCH_US".to_string(),
28730 vec![cast_ts],
28731 ))))
28732 }
28733 _ => Ok(Expression::Function(Box::new(Function::new(
28734 "UNIX_MICROS".to_string(),
28735 vec![ts],
28736 )))),
28737 }
28738 }
28739
28740 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
28741 "ARRAY_CONCAT" | "LIST_CONCAT" => {
28742 match target {
28743 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28744 // CONCAT(arr1, arr2, ...)
28745 Ok(Expression::Function(Box::new(Function::new(
28746 "CONCAT".to_string(),
28747 args,
28748 ))))
28749 }
28750 DialectType::Presto | DialectType::Trino => {
28751 // CONCAT(arr1, arr2, ...)
28752 Ok(Expression::Function(Box::new(Function::new(
28753 "CONCAT".to_string(),
28754 args,
28755 ))))
28756 }
28757 DialectType::Snowflake => {
28758 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28759 if args.len() == 1 {
28760 // ARRAY_CAT requires 2 args, add empty array as []
28761 let empty_arr = Expression::ArrayFunc(Box::new(
28762 crate::expressions::ArrayConstructor {
28763 expressions: vec![],
28764 bracket_notation: true,
28765 use_list_keyword: false,
28766 },
28767 ));
28768 let mut new_args = args;
28769 new_args.push(empty_arr);
28770 Ok(Expression::Function(Box::new(Function::new(
28771 "ARRAY_CAT".to_string(),
28772 new_args,
28773 ))))
28774 } else if args.is_empty() {
28775 Ok(Expression::Function(Box::new(Function::new(
28776 "ARRAY_CAT".to_string(),
28777 args,
28778 ))))
28779 } else {
28780 let mut it = args.into_iter().rev();
28781 let mut result = it.next().unwrap();
28782 for arr in it {
28783 result = Expression::Function(Box::new(Function::new(
28784 "ARRAY_CAT".to_string(),
28785 vec![arr, result],
28786 )));
28787 }
28788 Ok(result)
28789 }
28790 }
28791 DialectType::PostgreSQL => {
28792 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28793 if args.len() <= 1 {
28794 Ok(Expression::Function(Box::new(Function::new(
28795 "ARRAY_CAT".to_string(),
28796 args,
28797 ))))
28798 } else {
28799 let mut it = args.into_iter().rev();
28800 let mut result = it.next().unwrap();
28801 for arr in it {
28802 result = Expression::Function(Box::new(Function::new(
28803 "ARRAY_CAT".to_string(),
28804 vec![arr, result],
28805 )));
28806 }
28807 Ok(result)
28808 }
28809 }
28810 DialectType::Redshift => {
28811 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
28812 if args.len() <= 2 {
28813 Ok(Expression::Function(Box::new(Function::new(
28814 "ARRAY_CONCAT".to_string(),
28815 args,
28816 ))))
28817 } else {
28818 let mut it = args.into_iter().rev();
28819 let mut result = it.next().unwrap();
28820 for arr in it {
28821 result = Expression::Function(Box::new(Function::new(
28822 "ARRAY_CONCAT".to_string(),
28823 vec![arr, result],
28824 )));
28825 }
28826 Ok(result)
28827 }
28828 }
28829 DialectType::DuckDB => {
28830 // LIST_CONCAT supports multiple args natively in DuckDB
28831 Ok(Expression::Function(Box::new(Function::new(
28832 "LIST_CONCAT".to_string(),
28833 args,
28834 ))))
28835 }
28836 _ => Ok(Expression::Function(Box::new(Function::new(
28837 "ARRAY_CONCAT".to_string(),
28838 args,
28839 )))),
28840 }
28841 }
28842
28843 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
28844 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
28845 let arg = args.remove(0);
28846 match target {
28847 DialectType::Snowflake => {
28848 let array_agg =
28849 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
28850 this: arg,
28851 distinct: false,
28852 filter: None,
28853 order_by: vec![],
28854 name: None,
28855 ignore_nulls: None,
28856 having_max: None,
28857 limit: None,
28858 }));
28859 Ok(Expression::Function(Box::new(Function::new(
28860 "ARRAY_FLATTEN".to_string(),
28861 vec![array_agg],
28862 ))))
28863 }
28864 _ => Ok(Expression::Function(Box::new(Function::new(
28865 "ARRAY_CONCAT_AGG".to_string(),
28866 vec![arg],
28867 )))),
28868 }
28869 }
28870
28871 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
28872 "MD5" if args.len() == 1 => {
28873 let arg = args.remove(0);
28874 match target {
28875 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28876 // UNHEX(MD5(x))
28877 let md5 = Expression::Function(Box::new(Function::new(
28878 "MD5".to_string(),
28879 vec![arg],
28880 )));
28881 Ok(Expression::Function(Box::new(Function::new(
28882 "UNHEX".to_string(),
28883 vec![md5],
28884 ))))
28885 }
28886 DialectType::Snowflake => {
28887 // MD5_BINARY(x)
28888 Ok(Expression::Function(Box::new(Function::new(
28889 "MD5_BINARY".to_string(),
28890 vec![arg],
28891 ))))
28892 }
28893 _ => Ok(Expression::Function(Box::new(Function::new(
28894 "MD5".to_string(),
28895 vec![arg],
28896 )))),
28897 }
28898 }
28899
28900 "SHA1" if args.len() == 1 => {
28901 let arg = args.remove(0);
28902 match target {
28903 DialectType::DuckDB => {
28904 // UNHEX(SHA1(x))
28905 let sha1 = Expression::Function(Box::new(Function::new(
28906 "SHA1".to_string(),
28907 vec![arg],
28908 )));
28909 Ok(Expression::Function(Box::new(Function::new(
28910 "UNHEX".to_string(),
28911 vec![sha1],
28912 ))))
28913 }
28914 _ => Ok(Expression::Function(Box::new(Function::new(
28915 "SHA1".to_string(),
28916 vec![arg],
28917 )))),
28918 }
28919 }
28920
28921 "SHA256" if args.len() == 1 => {
28922 let arg = args.remove(0);
28923 match target {
28924 DialectType::DuckDB => {
28925 // UNHEX(SHA256(x))
28926 let sha = Expression::Function(Box::new(Function::new(
28927 "SHA256".to_string(),
28928 vec![arg],
28929 )));
28930 Ok(Expression::Function(Box::new(Function::new(
28931 "UNHEX".to_string(),
28932 vec![sha],
28933 ))))
28934 }
28935 DialectType::Snowflake => {
28936 // SHA2_BINARY(x, 256)
28937 Ok(Expression::Function(Box::new(Function::new(
28938 "SHA2_BINARY".to_string(),
28939 vec![arg, Expression::number(256)],
28940 ))))
28941 }
28942 DialectType::Redshift | DialectType::Spark => {
28943 // SHA2(x, 256)
28944 Ok(Expression::Function(Box::new(Function::new(
28945 "SHA2".to_string(),
28946 vec![arg, Expression::number(256)],
28947 ))))
28948 }
28949 _ => Ok(Expression::Function(Box::new(Function::new(
28950 "SHA256".to_string(),
28951 vec![arg],
28952 )))),
28953 }
28954 }
28955
28956 "SHA512" if args.len() == 1 => {
28957 let arg = args.remove(0);
28958 match target {
28959 DialectType::Snowflake => {
28960 // SHA2_BINARY(x, 512)
28961 Ok(Expression::Function(Box::new(Function::new(
28962 "SHA2_BINARY".to_string(),
28963 vec![arg, Expression::number(512)],
28964 ))))
28965 }
28966 DialectType::Redshift | DialectType::Spark => {
28967 // SHA2(x, 512)
28968 Ok(Expression::Function(Box::new(Function::new(
28969 "SHA2".to_string(),
28970 vec![arg, Expression::number(512)],
28971 ))))
28972 }
28973 _ => Ok(Expression::Function(Box::new(Function::new(
28974 "SHA512".to_string(),
28975 vec![arg],
28976 )))),
28977 }
28978 }
28979
28980 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
28981 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
28982 let str_expr = args.remove(0);
28983 let pattern = args.remove(0);
28984
28985 // Check if pattern contains capturing groups (parentheses)
28986 let has_groups = match &pattern {
28987 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
28988 _ => false,
28989 };
28990
28991 match target {
28992 DialectType::DuckDB => {
28993 let group = if has_groups {
28994 Expression::number(1)
28995 } else {
28996 Expression::number(0)
28997 };
28998 Ok(Expression::Function(Box::new(Function::new(
28999 "REGEXP_EXTRACT_ALL".to_string(),
29000 vec![str_expr, pattern, group],
29001 ))))
29002 }
29003 DialectType::Spark | DialectType::Databricks => {
29004 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
29005 if has_groups {
29006 Ok(Expression::Function(Box::new(Function::new(
29007 "REGEXP_EXTRACT_ALL".to_string(),
29008 vec![str_expr, pattern],
29009 ))))
29010 } else {
29011 Ok(Expression::Function(Box::new(Function::new(
29012 "REGEXP_EXTRACT_ALL".to_string(),
29013 vec![str_expr, pattern, Expression::number(0)],
29014 ))))
29015 }
29016 }
29017 DialectType::Presto | DialectType::Trino => {
29018 if has_groups {
29019 Ok(Expression::Function(Box::new(Function::new(
29020 "REGEXP_EXTRACT_ALL".to_string(),
29021 vec![str_expr, pattern, Expression::number(1)],
29022 ))))
29023 } else {
29024 Ok(Expression::Function(Box::new(Function::new(
29025 "REGEXP_EXTRACT_ALL".to_string(),
29026 vec![str_expr, pattern],
29027 ))))
29028 }
29029 }
29030 DialectType::Snowflake => {
29031 if has_groups {
29032 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
29033 Ok(Expression::Function(Box::new(Function::new(
29034 "REGEXP_EXTRACT_ALL".to_string(),
29035 vec![
29036 str_expr,
29037 pattern,
29038 Expression::number(1),
29039 Expression::number(1),
29040 Expression::Literal(Literal::String("c".to_string())),
29041 Expression::number(1),
29042 ],
29043 ))))
29044 } else {
29045 Ok(Expression::Function(Box::new(Function::new(
29046 "REGEXP_EXTRACT_ALL".to_string(),
29047 vec![str_expr, pattern],
29048 ))))
29049 }
29050 }
29051 _ => Ok(Expression::Function(Box::new(Function::new(
29052 "REGEXP_EXTRACT_ALL".to_string(),
29053 vec![str_expr, pattern],
29054 )))),
29055 }
29056 }
29057
29058 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
29059 "MOD" if args.len() == 2 => {
29060 match target {
29061 DialectType::PostgreSQL
29062 | DialectType::DuckDB
29063 | DialectType::Presto
29064 | DialectType::Trino
29065 | DialectType::Athena
29066 | DialectType::Snowflake => {
29067 let x = args.remove(0);
29068 let y = args.remove(0);
29069 // Wrap complex expressions in parens to preserve precedence
29070 let needs_paren = |e: &Expression| {
29071 matches!(
29072 e,
29073 Expression::Add(_)
29074 | Expression::Sub(_)
29075 | Expression::Mul(_)
29076 | Expression::Div(_)
29077 )
29078 };
29079 let x = if needs_paren(&x) {
29080 Expression::Paren(Box::new(crate::expressions::Paren {
29081 this: x,
29082 trailing_comments: vec![],
29083 }))
29084 } else {
29085 x
29086 };
29087 let y = if needs_paren(&y) {
29088 Expression::Paren(Box::new(crate::expressions::Paren {
29089 this: y,
29090 trailing_comments: vec![],
29091 }))
29092 } else {
29093 y
29094 };
29095 Ok(Expression::Mod(Box::new(
29096 crate::expressions::BinaryOp::new(x, y),
29097 )))
29098 }
29099 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29100 // Hive/Spark: a % b
29101 let x = args.remove(0);
29102 let y = args.remove(0);
29103 let needs_paren = |e: &Expression| {
29104 matches!(
29105 e,
29106 Expression::Add(_)
29107 | Expression::Sub(_)
29108 | Expression::Mul(_)
29109 | Expression::Div(_)
29110 )
29111 };
29112 let x = if needs_paren(&x) {
29113 Expression::Paren(Box::new(crate::expressions::Paren {
29114 this: x,
29115 trailing_comments: vec![],
29116 }))
29117 } else {
29118 x
29119 };
29120 let y = if needs_paren(&y) {
29121 Expression::Paren(Box::new(crate::expressions::Paren {
29122 this: y,
29123 trailing_comments: vec![],
29124 }))
29125 } else {
29126 y
29127 };
29128 Ok(Expression::Mod(Box::new(
29129 crate::expressions::BinaryOp::new(x, y),
29130 )))
29131 }
29132 _ => Ok(Expression::Function(Box::new(Function::new(
29133 "MOD".to_string(),
29134 args,
29135 )))),
29136 }
29137 }
29138
29139 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29140 "ARRAY_FILTER" if args.len() == 2 => {
29141 let name = match target {
29142 DialectType::DuckDB => "LIST_FILTER",
29143 DialectType::StarRocks => "ARRAY_FILTER",
29144 _ => "FILTER",
29145 };
29146 Ok(Expression::Function(Box::new(Function::new(
29147 name.to_string(),
29148 args,
29149 ))))
29150 }
29151 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29152 "FILTER" if args.len() == 2 => {
29153 let name = match target {
29154 DialectType::DuckDB => "LIST_FILTER",
29155 DialectType::StarRocks => "ARRAY_FILTER",
29156 _ => "FILTER",
29157 };
29158 Ok(Expression::Function(Box::new(Function::new(
29159 name.to_string(),
29160 args,
29161 ))))
29162 }
29163 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29164 "REDUCE" if args.len() >= 3 => {
29165 let name = match target {
29166 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29167 _ => "REDUCE",
29168 };
29169 Ok(Expression::Function(Box::new(Function::new(
29170 name.to_string(),
29171 args,
29172 ))))
29173 }
29174 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29175 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29176 Function::new("ARRAY_REVERSE".to_string(), args),
29177 ))),
29178
29179 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29180 "CONCAT" if args.len() > 2 => match target {
29181 DialectType::DuckDB => {
29182 let mut it = args.into_iter();
29183 let mut result = it.next().unwrap();
29184 for arg in it {
29185 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29186 this: Box::new(result),
29187 expression: Box::new(arg),
29188 safe: None,
29189 }));
29190 }
29191 Ok(result)
29192 }
29193 _ => Ok(Expression::Function(Box::new(Function::new(
29194 "CONCAT".to_string(),
29195 args,
29196 )))),
29197 },
29198
29199 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29200 "GENERATE_DATE_ARRAY" => {
29201 if matches!(target, DialectType::BigQuery) {
29202 // BQ->BQ: add default interval if not present
29203 if args.len() == 2 {
29204 let start = args.remove(0);
29205 let end = args.remove(0);
29206 let default_interval =
29207 Expression::Interval(Box::new(crate::expressions::Interval {
29208 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29209 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29210 unit: crate::expressions::IntervalUnit::Day,
29211 use_plural: false,
29212 }),
29213 }));
29214 Ok(Expression::Function(Box::new(Function::new(
29215 "GENERATE_DATE_ARRAY".to_string(),
29216 vec![start, end, default_interval],
29217 ))))
29218 } else {
29219 Ok(Expression::Function(Box::new(Function::new(
29220 "GENERATE_DATE_ARRAY".to_string(),
29221 args,
29222 ))))
29223 }
29224 } else if matches!(target, DialectType::DuckDB) {
29225 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29226 let start = args.get(0).cloned();
29227 let end = args.get(1).cloned();
29228 let step = args.get(2).cloned().or_else(|| {
29229 Some(Expression::Interval(Box::new(
29230 crate::expressions::Interval {
29231 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29232 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29233 unit: crate::expressions::IntervalUnit::Day,
29234 use_plural: false,
29235 }),
29236 },
29237 )))
29238 });
29239
29240 // Wrap start/end in CAST(... AS DATE) only for string literals
29241 let maybe_cast_date = |expr: Expression| -> Expression {
29242 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29243 Expression::Cast(Box::new(Cast {
29244 this: expr,
29245 to: DataType::Date,
29246 trailing_comments: vec![],
29247 double_colon_syntax: false,
29248 format: None,
29249 default: None,
29250 }))
29251 } else {
29252 expr
29253 }
29254 };
29255 let cast_start = start.map(maybe_cast_date);
29256 let cast_end = end.map(maybe_cast_date);
29257
29258 let gen_series =
29259 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29260 start: cast_start.map(Box::new),
29261 end: cast_end.map(Box::new),
29262 step: step.map(Box::new),
29263 is_end_exclusive: None,
29264 }));
29265
29266 // Wrap in CAST(... AS DATE[])
29267 Ok(Expression::Cast(Box::new(Cast {
29268 this: gen_series,
29269 to: DataType::Array {
29270 element_type: Box::new(DataType::Date),
29271 dimension: None,
29272 },
29273 trailing_comments: vec![],
29274 double_colon_syntax: false,
29275 format: None,
29276 default: None,
29277 })))
29278 } else if matches!(target, DialectType::Snowflake) {
29279 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29280 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29281 if args.len() == 2 {
29282 let start = args.remove(0);
29283 let end = args.remove(0);
29284 let default_interval =
29285 Expression::Interval(Box::new(crate::expressions::Interval {
29286 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29287 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29288 unit: crate::expressions::IntervalUnit::Day,
29289 use_plural: false,
29290 }),
29291 }));
29292 Ok(Expression::Function(Box::new(Function::new(
29293 "GENERATE_DATE_ARRAY".to_string(),
29294 vec![start, end, default_interval],
29295 ))))
29296 } else {
29297 Ok(Expression::Function(Box::new(Function::new(
29298 "GENERATE_DATE_ARRAY".to_string(),
29299 args,
29300 ))))
29301 }
29302 } else {
29303 // Convert to GenerateSeries for other targets
29304 let start = args.get(0).cloned();
29305 let end = args.get(1).cloned();
29306 let step = args.get(2).cloned().or_else(|| {
29307 Some(Expression::Interval(Box::new(
29308 crate::expressions::Interval {
29309 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29310 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29311 unit: crate::expressions::IntervalUnit::Day,
29312 use_plural: false,
29313 }),
29314 },
29315 )))
29316 });
29317 Ok(Expression::GenerateSeries(Box::new(
29318 crate::expressions::GenerateSeries {
29319 start: start.map(Box::new),
29320 end: end.map(Box::new),
29321 step: step.map(Box::new),
29322 is_end_exclusive: None,
29323 },
29324 )))
29325 }
29326 }
29327
29328 // PARSE_DATE(format, str) -> target-specific
29329 "PARSE_DATE" if args.len() == 2 => {
29330 let format = args.remove(0);
29331 let str_expr = args.remove(0);
29332 match target {
29333 DialectType::DuckDB => {
29334 // CAST(STRPTIME(str, duck_format) AS DATE)
29335 let duck_format = Self::bq_format_to_duckdb(&format);
29336 let strptime = Expression::Function(Box::new(Function::new(
29337 "STRPTIME".to_string(),
29338 vec![str_expr, duck_format],
29339 )));
29340 Ok(Expression::Cast(Box::new(Cast {
29341 this: strptime,
29342 to: DataType::Date,
29343 trailing_comments: vec![],
29344 double_colon_syntax: false,
29345 format: None,
29346 default: None,
29347 })))
29348 }
29349 DialectType::Snowflake => {
29350 // _POLYGLOT_DATE(str, snowflake_format)
29351 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29352 let sf_format = Self::bq_format_to_snowflake(&format);
29353 Ok(Expression::Function(Box::new(Function::new(
29354 "_POLYGLOT_DATE".to_string(),
29355 vec![str_expr, sf_format],
29356 ))))
29357 }
29358 _ => Ok(Expression::Function(Box::new(Function::new(
29359 "PARSE_DATE".to_string(),
29360 vec![format, str_expr],
29361 )))),
29362 }
29363 }
29364
29365 // PARSE_TIMESTAMP(format, str) -> target-specific
29366 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29367 let format = args.remove(0);
29368 let str_expr = args.remove(0);
29369 let tz = if !args.is_empty() {
29370 Some(args.remove(0))
29371 } else {
29372 None
29373 };
29374 match target {
29375 DialectType::DuckDB => {
29376 let duck_format = Self::bq_format_to_duckdb(&format);
29377 let strptime = Expression::Function(Box::new(Function::new(
29378 "STRPTIME".to_string(),
29379 vec![str_expr, duck_format],
29380 )));
29381 Ok(strptime)
29382 }
29383 _ => {
29384 let mut result_args = vec![format, str_expr];
29385 if let Some(tz_arg) = tz {
29386 result_args.push(tz_arg);
29387 }
29388 Ok(Expression::Function(Box::new(Function::new(
29389 "PARSE_TIMESTAMP".to_string(),
29390 result_args,
29391 ))))
29392 }
29393 }
29394 }
29395
29396 // FORMAT_DATE(format, date) -> target-specific
29397 "FORMAT_DATE" if args.len() == 2 => {
29398 let format = args.remove(0);
29399 let date_expr = args.remove(0);
29400 match target {
29401 DialectType::DuckDB => {
29402 // STRFTIME(CAST(date AS DATE), format)
29403 let cast_date = Expression::Cast(Box::new(Cast {
29404 this: date_expr,
29405 to: DataType::Date,
29406 trailing_comments: vec![],
29407 double_colon_syntax: false,
29408 format: None,
29409 default: None,
29410 }));
29411 Ok(Expression::Function(Box::new(Function::new(
29412 "STRFTIME".to_string(),
29413 vec![cast_date, format],
29414 ))))
29415 }
29416 _ => Ok(Expression::Function(Box::new(Function::new(
29417 "FORMAT_DATE".to_string(),
29418 vec![format, date_expr],
29419 )))),
29420 }
29421 }
29422
29423 // FORMAT_DATETIME(format, datetime) -> target-specific
29424 "FORMAT_DATETIME" if args.len() == 2 => {
29425 let format = args.remove(0);
29426 let dt_expr = args.remove(0);
29427
29428 if matches!(target, DialectType::BigQuery) {
29429 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29430 let norm_format = Self::bq_format_normalize_bq(&format);
29431 // Also strip DATETIME keyword from typed literals
29432 let norm_dt = match dt_expr {
29433 Expression::Literal(Literal::Timestamp(s)) => {
29434 Expression::Cast(Box::new(Cast {
29435 this: Expression::Literal(Literal::String(s)),
29436 to: DataType::Custom {
29437 name: "DATETIME".to_string(),
29438 },
29439 trailing_comments: vec![],
29440 double_colon_syntax: false,
29441 format: None,
29442 default: None,
29443 }))
29444 }
29445 other => other,
29446 };
29447 return Ok(Expression::Function(Box::new(Function::new(
29448 "FORMAT_DATETIME".to_string(),
29449 vec![norm_format, norm_dt],
29450 ))));
29451 }
29452
29453 match target {
29454 DialectType::DuckDB => {
29455 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29456 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29457 let duck_format = Self::bq_format_to_duckdb(&format);
29458 Ok(Expression::Function(Box::new(Function::new(
29459 "STRFTIME".to_string(),
29460 vec![cast_dt, duck_format],
29461 ))))
29462 }
29463 _ => Ok(Expression::Function(Box::new(Function::new(
29464 "FORMAT_DATETIME".to_string(),
29465 vec![format, dt_expr],
29466 )))),
29467 }
29468 }
29469
29470 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29471 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29472 let format = args.remove(0);
29473 let ts_expr = args.remove(0);
29474 match target {
29475 DialectType::DuckDB => {
29476 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29477 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29478 let cast_ts = Expression::Cast(Box::new(Cast {
29479 this: cast_tstz,
29480 to: DataType::Timestamp {
29481 timezone: false,
29482 precision: None,
29483 },
29484 trailing_comments: vec![],
29485 double_colon_syntax: false,
29486 format: None,
29487 default: None,
29488 }));
29489 Ok(Expression::Function(Box::new(Function::new(
29490 "STRFTIME".to_string(),
29491 vec![cast_ts, format],
29492 ))))
29493 }
29494 DialectType::Snowflake => {
29495 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29496 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29497 let cast_ts = Expression::Cast(Box::new(Cast {
29498 this: cast_tstz,
29499 to: DataType::Timestamp {
29500 timezone: false,
29501 precision: None,
29502 },
29503 trailing_comments: vec![],
29504 double_colon_syntax: false,
29505 format: None,
29506 default: None,
29507 }));
29508 let sf_format = Self::bq_format_to_snowflake(&format);
29509 Ok(Expression::Function(Box::new(Function::new(
29510 "TO_CHAR".to_string(),
29511 vec![cast_ts, sf_format],
29512 ))))
29513 }
29514 _ => Ok(Expression::Function(Box::new(Function::new(
29515 "FORMAT_TIMESTAMP".to_string(),
29516 vec![format, ts_expr],
29517 )))),
29518 }
29519 }
29520
29521 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29522 "UNIX_DATE" if args.len() == 1 => {
29523 let date = args.remove(0);
29524 match target {
29525 DialectType::DuckDB => {
29526 let epoch = Expression::Cast(Box::new(Cast {
29527 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29528 to: DataType::Date,
29529 trailing_comments: vec![],
29530 double_colon_syntax: false,
29531 format: None,
29532 default: None,
29533 }));
29534 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29535 // Need to convert DATE literal to CAST
29536 let norm_date = Self::date_literal_to_cast(date);
29537 Ok(Expression::Function(Box::new(Function::new(
29538 "DATE_DIFF".to_string(),
29539 vec![
29540 Expression::Literal(Literal::String("DAY".to_string())),
29541 epoch,
29542 norm_date,
29543 ],
29544 ))))
29545 }
29546 _ => Ok(Expression::Function(Box::new(Function::new(
29547 "UNIX_DATE".to_string(),
29548 vec![date],
29549 )))),
29550 }
29551 }
29552
29553 // UNIX_SECONDS(ts) -> target-specific
29554 "UNIX_SECONDS" if args.len() == 1 => {
29555 let ts = args.remove(0);
29556 match target {
29557 DialectType::DuckDB => {
29558 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29559 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29560 let epoch = Expression::Function(Box::new(Function::new(
29561 "EPOCH".to_string(),
29562 vec![norm_ts],
29563 )));
29564 Ok(Expression::Cast(Box::new(Cast {
29565 this: epoch,
29566 to: DataType::BigInt { length: None },
29567 trailing_comments: vec![],
29568 double_colon_syntax: false,
29569 format: None,
29570 default: None,
29571 })))
29572 }
29573 DialectType::Snowflake => {
29574 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29575 let epoch = Expression::Cast(Box::new(Cast {
29576 this: Expression::Literal(Literal::String(
29577 "1970-01-01 00:00:00+00".to_string(),
29578 )),
29579 to: DataType::Timestamp {
29580 timezone: true,
29581 precision: None,
29582 },
29583 trailing_comments: vec![],
29584 double_colon_syntax: false,
29585 format: None,
29586 default: None,
29587 }));
29588 Ok(Expression::Function(Box::new(Function::new(
29589 "TIMESTAMPDIFF".to_string(),
29590 vec![
29591 Expression::Identifier(Identifier::new("SECONDS".to_string())),
29592 epoch,
29593 ts,
29594 ],
29595 ))))
29596 }
29597 _ => Ok(Expression::Function(Box::new(Function::new(
29598 "UNIX_SECONDS".to_string(),
29599 vec![ts],
29600 )))),
29601 }
29602 }
29603
29604 // UNIX_MILLIS(ts) -> target-specific
29605 "UNIX_MILLIS" if args.len() == 1 => {
29606 let ts = args.remove(0);
29607 match target {
29608 DialectType::DuckDB => {
29609 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29610 Ok(Expression::Function(Box::new(Function::new(
29611 "EPOCH_MS".to_string(),
29612 vec![norm_ts],
29613 ))))
29614 }
29615 _ => Ok(Expression::Function(Box::new(Function::new(
29616 "UNIX_MILLIS".to_string(),
29617 vec![ts],
29618 )))),
29619 }
29620 }
29621
29622 // UNIX_MICROS(ts) -> target-specific
29623 "UNIX_MICROS" if args.len() == 1 => {
29624 let ts = args.remove(0);
29625 match target {
29626 DialectType::DuckDB => {
29627 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29628 Ok(Expression::Function(Box::new(Function::new(
29629 "EPOCH_US".to_string(),
29630 vec![norm_ts],
29631 ))))
29632 }
29633 _ => Ok(Expression::Function(Box::new(Function::new(
29634 "UNIX_MICROS".to_string(),
29635 vec![ts],
29636 )))),
29637 }
29638 }
29639
29640 // INSTR(str, substr) -> target-specific
29641 "INSTR" => {
29642 if matches!(target, DialectType::BigQuery) {
29643 // BQ->BQ: keep as INSTR
29644 Ok(Expression::Function(Box::new(Function::new(
29645 "INSTR".to_string(),
29646 args,
29647 ))))
29648 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
29649 // Snowflake: CHARINDEX(substr, str) - swap args
29650 let str_expr = args.remove(0);
29651 let substr = args.remove(0);
29652 Ok(Expression::Function(Box::new(Function::new(
29653 "CHARINDEX".to_string(),
29654 vec![substr, str_expr],
29655 ))))
29656 } else {
29657 // Keep as INSTR for other targets
29658 Ok(Expression::Function(Box::new(Function::new(
29659 "INSTR".to_string(),
29660 args,
29661 ))))
29662 }
29663 }
29664
29665 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
29666 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
29667 if matches!(target, DialectType::BigQuery) {
29668 // BQ->BQ: always output with parens (function form), keep any timezone arg
29669 Ok(Expression::Function(Box::new(Function::new(name, args))))
29670 } else if name == "CURRENT_DATE" && args.len() == 1 {
29671 // CURRENT_DATE('UTC') - has timezone arg
29672 let tz_arg = args.remove(0);
29673 match target {
29674 DialectType::DuckDB => {
29675 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
29676 let ct = Expression::CurrentTimestamp(
29677 crate::expressions::CurrentTimestamp {
29678 precision: None,
29679 sysdate: false,
29680 },
29681 );
29682 let at_tz =
29683 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29684 this: ct,
29685 zone: tz_arg,
29686 }));
29687 Ok(Expression::Cast(Box::new(Cast {
29688 this: at_tz,
29689 to: DataType::Date,
29690 trailing_comments: vec![],
29691 double_colon_syntax: false,
29692 format: None,
29693 default: None,
29694 })))
29695 }
29696 DialectType::Snowflake => {
29697 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
29698 let ct = Expression::Function(Box::new(Function::new(
29699 "CURRENT_TIMESTAMP".to_string(),
29700 vec![],
29701 )));
29702 let convert = Expression::Function(Box::new(Function::new(
29703 "CONVERT_TIMEZONE".to_string(),
29704 vec![tz_arg, ct],
29705 )));
29706 Ok(Expression::Cast(Box::new(Cast {
29707 this: convert,
29708 to: DataType::Date,
29709 trailing_comments: vec![],
29710 double_colon_syntax: false,
29711 format: None,
29712 default: None,
29713 })))
29714 }
29715 _ => {
29716 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
29717 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
29718 Ok(Expression::AtTimeZone(Box::new(
29719 crate::expressions::AtTimeZone {
29720 this: cd,
29721 zone: tz_arg,
29722 },
29723 )))
29724 }
29725 }
29726 } else if (name == "CURRENT_TIMESTAMP"
29727 || name == "CURRENT_TIME"
29728 || name == "CURRENT_DATE")
29729 && args.is_empty()
29730 && matches!(
29731 target,
29732 DialectType::PostgreSQL
29733 | DialectType::DuckDB
29734 | DialectType::Presto
29735 | DialectType::Trino
29736 )
29737 {
29738 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
29739 if name == "CURRENT_TIMESTAMP" {
29740 Ok(Expression::CurrentTimestamp(
29741 crate::expressions::CurrentTimestamp {
29742 precision: None,
29743 sysdate: false,
29744 },
29745 ))
29746 } else if name == "CURRENT_DATE" {
29747 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
29748 } else {
29749 // CURRENT_TIME
29750 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
29751 precision: None,
29752 }))
29753 }
29754 } else {
29755 // All other targets: keep as function (with parens)
29756 Ok(Expression::Function(Box::new(Function::new(name, args))))
29757 }
29758 }
29759
29760 // JSON_QUERY(json, path) -> target-specific
29761 "JSON_QUERY" if args.len() == 2 => {
29762 match target {
29763 DialectType::DuckDB | DialectType::SQLite => {
29764 // json -> path syntax
29765 let json_expr = args.remove(0);
29766 let path = args.remove(0);
29767 Ok(Expression::JsonExtract(Box::new(
29768 crate::expressions::JsonExtractFunc {
29769 this: json_expr,
29770 path,
29771 returning: None,
29772 arrow_syntax: true,
29773 hash_arrow_syntax: false,
29774 wrapper_option: None,
29775 quotes_option: None,
29776 on_scalar_string: false,
29777 on_error: None,
29778 },
29779 )))
29780 }
29781 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29782 Ok(Expression::Function(Box::new(Function::new(
29783 "GET_JSON_OBJECT".to_string(),
29784 args,
29785 ))))
29786 }
29787 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
29788 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
29789 )),
29790 _ => Ok(Expression::Function(Box::new(Function::new(
29791 "JSON_QUERY".to_string(),
29792 args,
29793 )))),
29794 }
29795 }
29796
29797 // JSON_VALUE_ARRAY(json, path) -> target-specific
29798 "JSON_VALUE_ARRAY" if args.len() == 2 => {
29799 match target {
29800 DialectType::DuckDB => {
29801 // CAST(json -> path AS TEXT[])
29802 let json_expr = args.remove(0);
29803 let path = args.remove(0);
29804 let arrow = Expression::JsonExtract(Box::new(
29805 crate::expressions::JsonExtractFunc {
29806 this: json_expr,
29807 path,
29808 returning: None,
29809 arrow_syntax: true,
29810 hash_arrow_syntax: false,
29811 wrapper_option: None,
29812 quotes_option: None,
29813 on_scalar_string: false,
29814 on_error: None,
29815 },
29816 ));
29817 Ok(Expression::Cast(Box::new(Cast {
29818 this: arrow,
29819 to: DataType::Array {
29820 element_type: Box::new(DataType::Text),
29821 dimension: None,
29822 },
29823 trailing_comments: vec![],
29824 double_colon_syntax: false,
29825 format: None,
29826 default: None,
29827 })))
29828 }
29829 DialectType::Snowflake => {
29830 let json_expr = args.remove(0);
29831 let path_expr = args.remove(0);
29832 // Convert JSON path from $.path to just path
29833 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
29834 {
29835 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
29836 Expression::Literal(Literal::String(trimmed.to_string()))
29837 } else {
29838 path_expr
29839 };
29840 let parse_json = Expression::Function(Box::new(Function::new(
29841 "PARSE_JSON".to_string(),
29842 vec![json_expr],
29843 )));
29844 let get_path = Expression::Function(Box::new(Function::new(
29845 "GET_PATH".to_string(),
29846 vec![parse_json, sf_path],
29847 )));
29848 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
29849 let cast_expr = Expression::Cast(Box::new(Cast {
29850 this: Expression::Identifier(Identifier::new("x")),
29851 to: DataType::VarChar {
29852 length: None,
29853 parenthesized_length: false,
29854 },
29855 trailing_comments: vec![],
29856 double_colon_syntax: false,
29857 format: None,
29858 default: None,
29859 }));
29860 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29861 parameters: vec![Identifier::new("x")],
29862 body: cast_expr,
29863 colon: false,
29864 parameter_types: vec![],
29865 }));
29866 Ok(Expression::Function(Box::new(Function::new(
29867 "TRANSFORM".to_string(),
29868 vec![get_path, lambda],
29869 ))))
29870 }
29871 _ => Ok(Expression::Function(Box::new(Function::new(
29872 "JSON_VALUE_ARRAY".to_string(),
29873 args,
29874 )))),
29875 }
29876 }
29877
29878 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
29879 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
29880 // This is different from Hive/Spark where 3rd arg is "group_index"
29881 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
29882 match target {
29883 DialectType::DuckDB
29884 | DialectType::Presto
29885 | DialectType::Trino
29886 | DialectType::Athena => {
29887 if args.len() == 2 {
29888 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
29889 args.push(Expression::number(1));
29890 Ok(Expression::Function(Box::new(Function::new(
29891 "REGEXP_EXTRACT".to_string(),
29892 args,
29893 ))))
29894 } else if args.len() == 3 {
29895 let val = args.remove(0);
29896 let regex = args.remove(0);
29897 let position = args.remove(0);
29898 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29899 if is_pos_1 {
29900 Ok(Expression::Function(Box::new(Function::new(
29901 "REGEXP_EXTRACT".to_string(),
29902 vec![val, regex, Expression::number(1)],
29903 ))))
29904 } else {
29905 let substring_expr = Expression::Function(Box::new(Function::new(
29906 "SUBSTRING".to_string(),
29907 vec![val, position],
29908 )));
29909 let nullif_expr = Expression::Function(Box::new(Function::new(
29910 "NULLIF".to_string(),
29911 vec![
29912 substring_expr,
29913 Expression::Literal(Literal::String(String::new())),
29914 ],
29915 )));
29916 Ok(Expression::Function(Box::new(Function::new(
29917 "REGEXP_EXTRACT".to_string(),
29918 vec![nullif_expr, regex, Expression::number(1)],
29919 ))))
29920 }
29921 } else if args.len() == 4 {
29922 let val = args.remove(0);
29923 let regex = args.remove(0);
29924 let position = args.remove(0);
29925 let occurrence = args.remove(0);
29926 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29927 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
29928 if is_pos_1 && is_occ_1 {
29929 Ok(Expression::Function(Box::new(Function::new(
29930 "REGEXP_EXTRACT".to_string(),
29931 vec![val, regex, Expression::number(1)],
29932 ))))
29933 } else {
29934 let subject = if is_pos_1 {
29935 val
29936 } else {
29937 let substring_expr = Expression::Function(Box::new(
29938 Function::new("SUBSTRING".to_string(), vec![val, position]),
29939 ));
29940 Expression::Function(Box::new(Function::new(
29941 "NULLIF".to_string(),
29942 vec![
29943 substring_expr,
29944 Expression::Literal(Literal::String(String::new())),
29945 ],
29946 )))
29947 };
29948 let extract_all = Expression::Function(Box::new(Function::new(
29949 "REGEXP_EXTRACT_ALL".to_string(),
29950 vec![subject, regex, Expression::number(1)],
29951 )));
29952 Ok(Expression::Function(Box::new(Function::new(
29953 "ARRAY_EXTRACT".to_string(),
29954 vec![extract_all, occurrence],
29955 ))))
29956 }
29957 } else {
29958 Ok(Expression::Function(Box::new(Function {
29959 name: f.name,
29960 args,
29961 distinct: f.distinct,
29962 trailing_comments: f.trailing_comments,
29963 use_bracket_syntax: f.use_bracket_syntax,
29964 no_parens: f.no_parens,
29965 quoted: f.quoted,
29966 span: None,
29967 })))
29968 }
29969 }
29970 DialectType::Snowflake => {
29971 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
29972 Ok(Expression::Function(Box::new(Function::new(
29973 "REGEXP_SUBSTR".to_string(),
29974 args,
29975 ))))
29976 }
29977 _ => {
29978 // For other targets (Hive/Spark/BigQuery): pass through as-is
29979 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
29980 Ok(Expression::Function(Box::new(Function {
29981 name: f.name,
29982 args,
29983 distinct: f.distinct,
29984 trailing_comments: f.trailing_comments,
29985 use_bracket_syntax: f.use_bracket_syntax,
29986 no_parens: f.no_parens,
29987 quoted: f.quoted,
29988 span: None,
29989 })))
29990 }
29991 }
29992 }
29993
29994 // BigQuery STRUCT(args) -> target-specific struct expression
29995 "STRUCT" => {
29996 // Convert Function args to Struct fields
29997 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
29998 for (i, arg) in args.into_iter().enumerate() {
29999 match arg {
30000 Expression::Alias(a) => {
30001 // Named field: expr AS name
30002 fields.push((Some(a.alias.name.clone()), a.this));
30003 }
30004 other => {
30005 // Unnamed field: for Spark/Hive, keep as None
30006 // For Snowflake, auto-name as _N
30007 // For DuckDB, use column name for column refs, _N for others
30008 if matches!(target, DialectType::Snowflake) {
30009 fields.push((Some(format!("_{}", i)), other));
30010 } else if matches!(target, DialectType::DuckDB) {
30011 let auto_name = match &other {
30012 Expression::Column(col) => col.name.name.clone(),
30013 _ => format!("_{}", i),
30014 };
30015 fields.push((Some(auto_name), other));
30016 } else {
30017 fields.push((None, other));
30018 }
30019 }
30020 }
30021 }
30022
30023 match target {
30024 DialectType::Snowflake => {
30025 // OBJECT_CONSTRUCT('name', value, ...)
30026 let mut oc_args = Vec::new();
30027 for (name, val) in &fields {
30028 if let Some(n) = name {
30029 oc_args.push(Expression::Literal(Literal::String(n.clone())));
30030 oc_args.push(val.clone());
30031 } else {
30032 oc_args.push(val.clone());
30033 }
30034 }
30035 Ok(Expression::Function(Box::new(Function::new(
30036 "OBJECT_CONSTRUCT".to_string(),
30037 oc_args,
30038 ))))
30039 }
30040 DialectType::DuckDB => {
30041 // {'name': value, ...}
30042 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30043 fields,
30044 })))
30045 }
30046 DialectType::Hive => {
30047 // STRUCT(val1, val2, ...) - strip aliases
30048 let hive_fields: Vec<(Option<String>, Expression)> =
30049 fields.into_iter().map(|(_, v)| (None, v)).collect();
30050 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30051 fields: hive_fields,
30052 })))
30053 }
30054 DialectType::Spark | DialectType::Databricks => {
30055 // Use Expression::Struct to bypass Spark target transform auto-naming
30056 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30057 fields,
30058 })))
30059 }
30060 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30061 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
30062 let all_named =
30063 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
30064 let all_types_inferable = all_named
30065 && fields
30066 .iter()
30067 .all(|(_, val)| Self::can_infer_presto_type(val));
30068 let row_args: Vec<Expression> =
30069 fields.iter().map(|(_, v)| v.clone()).collect();
30070 let row_expr = Expression::Function(Box::new(Function::new(
30071 "ROW".to_string(),
30072 row_args,
30073 )));
30074 if all_named && all_types_inferable {
30075 // Build ROW type with inferred types
30076 let mut row_type_fields = Vec::new();
30077 for (name, val) in &fields {
30078 if let Some(n) = name {
30079 let type_str = Self::infer_sql_type_for_presto(val);
30080 row_type_fields.push(crate::expressions::StructField::new(
30081 n.clone(),
30082 crate::expressions::DataType::Custom { name: type_str },
30083 ));
30084 }
30085 }
30086 let row_type = crate::expressions::DataType::Struct {
30087 fields: row_type_fields,
30088 nested: true,
30089 };
30090 Ok(Expression::Cast(Box::new(Cast {
30091 this: row_expr,
30092 to: row_type,
30093 trailing_comments: Vec::new(),
30094 double_colon_syntax: false,
30095 format: None,
30096 default: None,
30097 })))
30098 } else {
30099 Ok(row_expr)
30100 }
30101 }
30102 _ => {
30103 // Default: keep as STRUCT function with original args
30104 let mut new_args = Vec::new();
30105 for (name, val) in fields {
30106 if let Some(n) = name {
30107 new_args.push(Expression::Alias(Box::new(
30108 crate::expressions::Alias::new(val, Identifier::new(n)),
30109 )));
30110 } else {
30111 new_args.push(val);
30112 }
30113 }
30114 Ok(Expression::Function(Box::new(Function::new(
30115 "STRUCT".to_string(),
30116 new_args,
30117 ))))
30118 }
30119 }
30120 }
30121
30122 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30123 "ROUND" if args.len() == 3 => {
30124 let x = args.remove(0);
30125 let n = args.remove(0);
30126 let mode = args.remove(0);
30127 // Check if mode is 'ROUND_HALF_EVEN'
30128 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30129 if is_half_even && matches!(target, DialectType::DuckDB) {
30130 Ok(Expression::Function(Box::new(Function::new(
30131 "ROUND_EVEN".to_string(),
30132 vec![x, n],
30133 ))))
30134 } else {
30135 // Pass through with all args
30136 Ok(Expression::Function(Box::new(Function::new(
30137 "ROUND".to_string(),
30138 vec![x, n, mode],
30139 ))))
30140 }
30141 }
30142
30143 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30144 "MAKE_INTERVAL" => {
30145 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30146 // The positional args are: year, month
30147 // Named args are: day =>, minute =>, etc.
30148 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30149 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30150 // For BigQuery->BigQuery: reorder named args (day before minute)
30151 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30152 let mut parts: Vec<(String, String)> = Vec::new();
30153 let mut pos_idx = 0;
30154 let pos_units = ["year", "month"];
30155 for arg in &args {
30156 if let Expression::NamedArgument(na) = arg {
30157 // Named arg like minute => 5
30158 let unit = na.name.name.clone();
30159 if let Expression::Literal(Literal::Number(n)) = &na.value {
30160 parts.push((unit, n.clone()));
30161 }
30162 } else if pos_idx < pos_units.len() {
30163 if let Expression::Literal(Literal::Number(n)) = arg {
30164 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30165 }
30166 pos_idx += 1;
30167 }
30168 }
30169 // Don't sort - preserve original argument order
30170 let separator = if matches!(target, DialectType::Snowflake) {
30171 ", "
30172 } else {
30173 " "
30174 };
30175 let interval_str = parts
30176 .iter()
30177 .map(|(u, v)| format!("{} {}", v, u))
30178 .collect::<Vec<_>>()
30179 .join(separator);
30180 Ok(Expression::Interval(Box::new(
30181 crate::expressions::Interval {
30182 this: Some(Expression::Literal(Literal::String(interval_str))),
30183 unit: None,
30184 },
30185 )))
30186 } else if matches!(target, DialectType::BigQuery) {
30187 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30188 let mut positional = Vec::new();
30189 let mut named: Vec<(
30190 String,
30191 Expression,
30192 crate::expressions::NamedArgSeparator,
30193 )> = Vec::new();
30194 let _pos_units = ["year", "month"];
30195 let mut _pos_idx = 0;
30196 for arg in args {
30197 if let Expression::NamedArgument(na) = arg {
30198 named.push((na.name.name.clone(), na.value, na.separator));
30199 } else {
30200 positional.push(arg);
30201 _pos_idx += 1;
30202 }
30203 }
30204 // Sort named args by: day, hour, minute, second
30205 let unit_order = |u: &str| -> usize {
30206 match u.to_lowercase().as_str() {
30207 "day" => 0,
30208 "hour" => 1,
30209 "minute" => 2,
30210 "second" => 3,
30211 _ => 4,
30212 }
30213 };
30214 named.sort_by_key(|(u, _, _)| unit_order(u));
30215 let mut result_args = positional;
30216 for (name, value, sep) in named {
30217 result_args.push(Expression::NamedArgument(Box::new(
30218 crate::expressions::NamedArgument {
30219 name: Identifier::new(&name),
30220 value,
30221 separator: sep,
30222 },
30223 )));
30224 }
30225 Ok(Expression::Function(Box::new(Function::new(
30226 "MAKE_INTERVAL".to_string(),
30227 result_args,
30228 ))))
30229 } else {
30230 Ok(Expression::Function(Box::new(Function::new(
30231 "MAKE_INTERVAL".to_string(),
30232 args,
30233 ))))
30234 }
30235 }
30236
30237 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30238 "ARRAY_TO_STRING" if args.len() == 3 => {
30239 let arr = args.remove(0);
30240 let sep = args.remove(0);
30241 let null_text = args.remove(0);
30242 match target {
30243 DialectType::DuckDB => {
30244 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30245 let _lambda_param =
30246 Expression::Identifier(crate::expressions::Identifier::new("x"));
30247 let coalesce =
30248 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30249 original_name: None,
30250 expressions: vec![
30251 Expression::Identifier(crate::expressions::Identifier::new(
30252 "x",
30253 )),
30254 null_text,
30255 ],
30256 }));
30257 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30258 parameters: vec![crate::expressions::Identifier::new("x")],
30259 body: coalesce,
30260 colon: false,
30261 parameter_types: vec![],
30262 }));
30263 let list_transform = Expression::Function(Box::new(Function::new(
30264 "LIST_TRANSFORM".to_string(),
30265 vec![arr, lambda],
30266 )));
30267 Ok(Expression::Function(Box::new(Function::new(
30268 "ARRAY_TO_STRING".to_string(),
30269 vec![list_transform, sep],
30270 ))))
30271 }
30272 _ => Ok(Expression::Function(Box::new(Function::new(
30273 "ARRAY_TO_STRING".to_string(),
30274 vec![arr, sep, null_text],
30275 )))),
30276 }
30277 }
30278
30279 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30280 "LENGTH" if args.len() == 1 => {
30281 let arg = args.remove(0);
30282 match target {
30283 DialectType::DuckDB => {
30284 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30285 let typeof_func = Expression::Function(Box::new(Function::new(
30286 "TYPEOF".to_string(),
30287 vec![arg.clone()],
30288 )));
30289 let blob_cast = Expression::Cast(Box::new(Cast {
30290 this: arg.clone(),
30291 to: DataType::VarBinary { length: None },
30292 trailing_comments: vec![],
30293 double_colon_syntax: false,
30294 format: None,
30295 default: None,
30296 }));
30297 let octet_length = Expression::Function(Box::new(Function::new(
30298 "OCTET_LENGTH".to_string(),
30299 vec![blob_cast],
30300 )));
30301 let text_cast = Expression::Cast(Box::new(Cast {
30302 this: arg,
30303 to: DataType::Text,
30304 trailing_comments: vec![],
30305 double_colon_syntax: false,
30306 format: None,
30307 default: None,
30308 }));
30309 let length_text = Expression::Function(Box::new(Function::new(
30310 "LENGTH".to_string(),
30311 vec![text_cast],
30312 )));
30313 Ok(Expression::Case(Box::new(crate::expressions::Case {
30314 operand: Some(typeof_func),
30315 whens: vec![(
30316 Expression::Literal(Literal::String("BLOB".to_string())),
30317 octet_length,
30318 )],
30319 else_: Some(length_text),
30320 comments: Vec::new(),
30321 })))
30322 }
30323 _ => Ok(Expression::Function(Box::new(Function::new(
30324 "LENGTH".to_string(),
30325 vec![arg],
30326 )))),
30327 }
30328 }
30329
30330 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30331 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30332 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30333 // The args should be [x, fraction] with the null handling stripped
30334 // For DuckDB: QUANTILE_CONT(x, fraction)
30335 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30336 match target {
30337 DialectType::DuckDB => {
30338 // Strip down to just 2 args, rename to QUANTILE_CONT
30339 let x = args[0].clone();
30340 let frac = args[1].clone();
30341 Ok(Expression::Function(Box::new(Function::new(
30342 "QUANTILE_CONT".to_string(),
30343 vec![x, frac],
30344 ))))
30345 }
30346 _ => Ok(Expression::Function(Box::new(Function::new(
30347 "PERCENTILE_CONT".to_string(),
30348 args,
30349 )))),
30350 }
30351 }
30352
30353 // All others: pass through
30354 _ => Ok(Expression::Function(Box::new(Function {
30355 name: f.name,
30356 args,
30357 distinct: f.distinct,
30358 trailing_comments: f.trailing_comments,
30359 use_bracket_syntax: f.use_bracket_syntax,
30360 no_parens: f.no_parens,
30361 quoted: f.quoted,
30362 span: None,
30363 }))),
30364 }
30365 }
30366
30367 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30368 /// Returns false for column references and other non-literal expressions where the type is unknown.
30369 fn can_infer_presto_type(expr: &Expression) -> bool {
30370 match expr {
30371 Expression::Literal(_) => true,
30372 Expression::Boolean(_) => true,
30373 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30374 Expression::Struct(_) | Expression::StructFunc(_) => true,
30375 Expression::Function(f) => {
30376 let up = f.name.to_uppercase();
30377 up == "STRUCT"
30378 || up == "ROW"
30379 || up == "CURRENT_DATE"
30380 || up == "CURRENT_TIMESTAMP"
30381 || up == "NOW"
30382 }
30383 Expression::Cast(_) => true,
30384 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30385 _ => false,
30386 }
30387 }
30388
30389 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30390 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30391 use crate::expressions::Literal;
30392 match expr {
30393 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30394 Expression::Literal(Literal::Number(n)) => {
30395 if n.contains('.') {
30396 "DOUBLE".to_string()
30397 } else {
30398 "INTEGER".to_string()
30399 }
30400 }
30401 Expression::Boolean(_) => "BOOLEAN".to_string(),
30402 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30403 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30404 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30405 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30406 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30407 Expression::Function(f) => {
30408 let up = f.name.to_uppercase();
30409 if up == "STRUCT" || up == "ROW" {
30410 "ROW".to_string()
30411 } else if up == "CURRENT_DATE" {
30412 "DATE".to_string()
30413 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30414 "TIMESTAMP".to_string()
30415 } else {
30416 "VARCHAR".to_string()
30417 }
30418 }
30419 Expression::Cast(c) => {
30420 // If already cast, use the target type
30421 Self::data_type_to_presto_string(&c.to)
30422 }
30423 _ => "VARCHAR".to_string(),
30424 }
30425 }
30426
30427 /// Convert a DataType to its Presto/Trino string representation for ROW type
30428 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30429 use crate::expressions::DataType;
30430 match dt {
30431 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30432 "VARCHAR".to_string()
30433 }
30434 DataType::Int { .. }
30435 | DataType::BigInt { .. }
30436 | DataType::SmallInt { .. }
30437 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30438 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30439 DataType::Boolean => "BOOLEAN".to_string(),
30440 DataType::Date => "DATE".to_string(),
30441 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30442 DataType::Struct { fields, .. } => {
30443 let field_strs: Vec<String> = fields
30444 .iter()
30445 .map(|f| {
30446 format!(
30447 "{} {}",
30448 f.name,
30449 Self::data_type_to_presto_string(&f.data_type)
30450 )
30451 })
30452 .collect();
30453 format!("ROW({})", field_strs.join(", "))
30454 }
30455 DataType::Array { element_type, .. } => {
30456 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30457 }
30458 DataType::Custom { name } => {
30459 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30460 name.clone()
30461 }
30462 _ => "VARCHAR".to_string(),
30463 }
30464 }
30465
30466 /// Convert IntervalUnit to string
30467 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30468 match unit {
30469 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30470 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30471 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30472 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30473 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30474 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30475 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30476 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30477 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30478 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30479 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30480 }
30481 }
30482
30483 /// Extract unit string from an expression (uppercased)
30484 fn get_unit_str_static(expr: &Expression) -> String {
30485 use crate::expressions::Literal;
30486 match expr {
30487 Expression::Identifier(id) => id.name.to_uppercase(),
30488 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30489 Expression::Column(col) => col.name.name.to_uppercase(),
30490 Expression::Function(f) => {
30491 let base = f.name.to_uppercase();
30492 if !f.args.is_empty() {
30493 let inner = Self::get_unit_str_static(&f.args[0]);
30494 format!("{}({})", base, inner)
30495 } else {
30496 base
30497 }
30498 }
30499 _ => "DAY".to_string(),
30500 }
30501 }
30502
30503 /// Parse unit string to IntervalUnit
30504 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30505 match s {
30506 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30507 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30508 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30509 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30510 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30511 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30512 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30513 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30514 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30515 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30516 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30517 _ => crate::expressions::IntervalUnit::Day,
30518 }
30519 }
30520
30521 /// Convert expression to simple string for interval building
30522 fn expr_to_string_static(expr: &Expression) -> String {
30523 use crate::expressions::Literal;
30524 match expr {
30525 Expression::Literal(Literal::Number(s)) => s.clone(),
30526 Expression::Literal(Literal::String(s)) => s.clone(),
30527 Expression::Identifier(id) => id.name.clone(),
30528 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30529 _ => "1".to_string(),
30530 }
30531 }
30532
30533 /// Extract a simple string representation from a literal expression
30534 fn expr_to_string(expr: &Expression) -> String {
30535 use crate::expressions::Literal;
30536 match expr {
30537 Expression::Literal(Literal::Number(s)) => s.clone(),
30538 Expression::Literal(Literal::String(s)) => s.clone(),
30539 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30540 Expression::Identifier(id) => id.name.clone(),
30541 _ => "1".to_string(),
30542 }
30543 }
30544
30545 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30546 fn quote_interval_val(expr: &Expression) -> Expression {
30547 use crate::expressions::Literal;
30548 match expr {
30549 Expression::Literal(Literal::Number(n)) => {
30550 Expression::Literal(Literal::String(n.clone()))
30551 }
30552 Expression::Literal(Literal::String(_)) => expr.clone(),
30553 Expression::Neg(inner) => {
30554 if let Expression::Literal(Literal::Number(n)) = &inner.this {
30555 Expression::Literal(Literal::String(format!("-{}", n)))
30556 } else {
30557 expr.clone()
30558 }
30559 }
30560 _ => expr.clone(),
30561 }
30562 }
30563
30564 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
30565 fn timestamp_string_has_timezone(ts: &str) -> bool {
30566 let trimmed = ts.trim();
30567 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
30568 if let Some(last_space) = trimmed.rfind(' ') {
30569 let suffix = &trimmed[last_space + 1..];
30570 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
30571 let rest = &suffix[1..];
30572 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
30573 return true;
30574 }
30575 }
30576 }
30577 // Check for named timezone abbreviations
30578 let ts_lower = trimmed.to_lowercase();
30579 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
30580 for abbrev in &tz_abbrevs {
30581 if ts_lower.ends_with(abbrev) {
30582 return true;
30583 }
30584 }
30585 false
30586 }
30587
30588 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
30589 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
30590 use crate::expressions::{Cast, DataType, Literal};
30591 match expr {
30592 Expression::Literal(Literal::Timestamp(s)) => {
30593 let tz = func_name.starts_with("TIMESTAMP");
30594 Expression::Cast(Box::new(Cast {
30595 this: Expression::Literal(Literal::String(s)),
30596 to: if tz {
30597 DataType::Timestamp {
30598 timezone: true,
30599 precision: None,
30600 }
30601 } else {
30602 DataType::Timestamp {
30603 timezone: false,
30604 precision: None,
30605 }
30606 },
30607 trailing_comments: vec![],
30608 double_colon_syntax: false,
30609 format: None,
30610 default: None,
30611 }))
30612 }
30613 other => other,
30614 }
30615 }
30616
30617 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
30618 fn maybe_cast_ts(expr: Expression) -> Expression {
30619 use crate::expressions::{Cast, DataType, Literal};
30620 match expr {
30621 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30622 this: Expression::Literal(Literal::String(s)),
30623 to: DataType::Timestamp {
30624 timezone: false,
30625 precision: None,
30626 },
30627 trailing_comments: vec![],
30628 double_colon_syntax: false,
30629 format: None,
30630 default: None,
30631 })),
30632 other => other,
30633 }
30634 }
30635
30636 /// Convert DATE 'x' literal to CAST('x' AS DATE)
30637 fn date_literal_to_cast(expr: Expression) -> Expression {
30638 use crate::expressions::{Cast, DataType, Literal};
30639 match expr {
30640 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30641 this: Expression::Literal(Literal::String(s)),
30642 to: DataType::Date,
30643 trailing_comments: vec![],
30644 double_colon_syntax: false,
30645 format: None,
30646 default: None,
30647 })),
30648 other => other,
30649 }
30650 }
30651
30652 /// Ensure an expression that should be a date is CAST(... AS DATE).
30653 /// Handles both DATE literals and string literals that look like dates.
30654 fn ensure_cast_date(expr: Expression) -> Expression {
30655 use crate::expressions::{Cast, DataType, Literal};
30656 match expr {
30657 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30658 this: Expression::Literal(Literal::String(s)),
30659 to: DataType::Date,
30660 trailing_comments: vec![],
30661 double_colon_syntax: false,
30662 format: None,
30663 default: None,
30664 })),
30665 Expression::Literal(Literal::String(ref _s)) => {
30666 // String literal that should be a date -> CAST('s' AS DATE)
30667 Expression::Cast(Box::new(Cast {
30668 this: expr,
30669 to: DataType::Date,
30670 trailing_comments: vec![],
30671 double_colon_syntax: false,
30672 format: None,
30673 default: None,
30674 }))
30675 }
30676 // Already a CAST or other expression -> leave as-is
30677 other => other,
30678 }
30679 }
30680
30681 /// Force CAST(expr AS DATE) for any expression (not just literals)
30682 /// Skips if the expression is already a CAST to DATE
30683 fn force_cast_date(expr: Expression) -> Expression {
30684 use crate::expressions::{Cast, DataType};
30685 // If it's already a CAST to DATE, don't double-wrap
30686 if let Expression::Cast(ref c) = expr {
30687 if matches!(c.to, DataType::Date) {
30688 return expr;
30689 }
30690 }
30691 Expression::Cast(Box::new(Cast {
30692 this: expr,
30693 to: DataType::Date,
30694 trailing_comments: vec![],
30695 double_colon_syntax: false,
30696 format: None,
30697 default: None,
30698 }))
30699 }
30700
30701 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
30702 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
30703 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
30704 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
30705
30706 fn ensure_to_date_preserved(expr: Expression) -> Expression {
30707 use crate::expressions::{Function, Literal};
30708 if matches!(expr, Expression::Literal(Literal::String(_))) {
30709 Expression::Function(Box::new(Function::new(
30710 Self::PRESERVED_TO_DATE.to_string(),
30711 vec![expr],
30712 )))
30713 } else {
30714 expr
30715 }
30716 }
30717
30718 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
30719 fn try_cast_date(expr: Expression) -> Expression {
30720 use crate::expressions::{Cast, DataType};
30721 Expression::TryCast(Box::new(Cast {
30722 this: expr,
30723 to: DataType::Date,
30724 trailing_comments: vec![],
30725 double_colon_syntax: false,
30726 format: None,
30727 default: None,
30728 }))
30729 }
30730
30731 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
30732 fn double_cast_timestamp_date(expr: Expression) -> Expression {
30733 use crate::expressions::{Cast, DataType};
30734 let inner = Expression::Cast(Box::new(Cast {
30735 this: expr,
30736 to: DataType::Timestamp {
30737 timezone: false,
30738 precision: None,
30739 },
30740 trailing_comments: vec![],
30741 double_colon_syntax: false,
30742 format: None,
30743 default: None,
30744 }));
30745 Expression::Cast(Box::new(Cast {
30746 this: inner,
30747 to: DataType::Date,
30748 trailing_comments: vec![],
30749 double_colon_syntax: false,
30750 format: None,
30751 default: None,
30752 }))
30753 }
30754
30755 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
30756 fn double_cast_datetime_date(expr: Expression) -> Expression {
30757 use crate::expressions::{Cast, DataType};
30758 let inner = Expression::Cast(Box::new(Cast {
30759 this: expr,
30760 to: DataType::Custom {
30761 name: "DATETIME".to_string(),
30762 },
30763 trailing_comments: vec![],
30764 double_colon_syntax: false,
30765 format: None,
30766 default: None,
30767 }));
30768 Expression::Cast(Box::new(Cast {
30769 this: inner,
30770 to: DataType::Date,
30771 trailing_comments: vec![],
30772 double_colon_syntax: false,
30773 format: None,
30774 default: None,
30775 }))
30776 }
30777
30778 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
30779 fn double_cast_datetime2_date(expr: Expression) -> Expression {
30780 use crate::expressions::{Cast, DataType};
30781 let inner = Expression::Cast(Box::new(Cast {
30782 this: expr,
30783 to: DataType::Custom {
30784 name: "DATETIME2".to_string(),
30785 },
30786 trailing_comments: vec![],
30787 double_colon_syntax: false,
30788 format: None,
30789 default: None,
30790 }));
30791 Expression::Cast(Box::new(Cast {
30792 this: inner,
30793 to: DataType::Date,
30794 trailing_comments: vec![],
30795 double_colon_syntax: false,
30796 format: None,
30797 default: None,
30798 }))
30799 }
30800
30801 /// Convert Hive/Java-style date format strings to C-style (strftime) format
30802 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
30803 fn hive_format_to_c_format(fmt: &str) -> String {
30804 let mut result = String::new();
30805 let chars: Vec<char> = fmt.chars().collect();
30806 let mut i = 0;
30807 while i < chars.len() {
30808 match chars[i] {
30809 'y' => {
30810 let mut count = 0;
30811 while i < chars.len() && chars[i] == 'y' {
30812 count += 1;
30813 i += 1;
30814 }
30815 if count >= 4 {
30816 result.push_str("%Y");
30817 } else if count == 2 {
30818 result.push_str("%y");
30819 } else {
30820 result.push_str("%Y");
30821 }
30822 }
30823 'M' => {
30824 let mut count = 0;
30825 while i < chars.len() && chars[i] == 'M' {
30826 count += 1;
30827 i += 1;
30828 }
30829 if count >= 3 {
30830 result.push_str("%b");
30831 } else if count == 2 {
30832 result.push_str("%m");
30833 } else {
30834 result.push_str("%m");
30835 }
30836 }
30837 'd' => {
30838 let mut _count = 0;
30839 while i < chars.len() && chars[i] == 'd' {
30840 _count += 1;
30841 i += 1;
30842 }
30843 result.push_str("%d");
30844 }
30845 'H' => {
30846 let mut _count = 0;
30847 while i < chars.len() && chars[i] == 'H' {
30848 _count += 1;
30849 i += 1;
30850 }
30851 result.push_str("%H");
30852 }
30853 'h' => {
30854 let mut _count = 0;
30855 while i < chars.len() && chars[i] == 'h' {
30856 _count += 1;
30857 i += 1;
30858 }
30859 result.push_str("%I");
30860 }
30861 'm' => {
30862 let mut _count = 0;
30863 while i < chars.len() && chars[i] == 'm' {
30864 _count += 1;
30865 i += 1;
30866 }
30867 result.push_str("%M");
30868 }
30869 's' => {
30870 let mut _count = 0;
30871 while i < chars.len() && chars[i] == 's' {
30872 _count += 1;
30873 i += 1;
30874 }
30875 result.push_str("%S");
30876 }
30877 'S' => {
30878 // Fractional seconds - skip
30879 while i < chars.len() && chars[i] == 'S' {
30880 i += 1;
30881 }
30882 result.push_str("%f");
30883 }
30884 'a' => {
30885 // AM/PM
30886 while i < chars.len() && chars[i] == 'a' {
30887 i += 1;
30888 }
30889 result.push_str("%p");
30890 }
30891 'E' => {
30892 let mut count = 0;
30893 while i < chars.len() && chars[i] == 'E' {
30894 count += 1;
30895 i += 1;
30896 }
30897 if count >= 4 {
30898 result.push_str("%A");
30899 } else {
30900 result.push_str("%a");
30901 }
30902 }
30903 '\'' => {
30904 // Quoted literal text - pass through the quotes and content
30905 result.push('\'');
30906 i += 1;
30907 while i < chars.len() && chars[i] != '\'' {
30908 result.push(chars[i]);
30909 i += 1;
30910 }
30911 if i < chars.len() {
30912 result.push('\'');
30913 i += 1;
30914 }
30915 }
30916 c => {
30917 result.push(c);
30918 i += 1;
30919 }
30920 }
30921 }
30922 result
30923 }
30924
30925 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
30926 fn hive_format_to_presto_format(fmt: &str) -> String {
30927 let c_fmt = Self::hive_format_to_c_format(fmt);
30928 // Presto uses %T for HH:MM:SS
30929 c_fmt.replace("%H:%M:%S", "%T")
30930 }
30931
30932 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
30933 fn ensure_cast_timestamp(expr: Expression) -> Expression {
30934 use crate::expressions::{Cast, DataType, Literal};
30935 match expr {
30936 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30937 this: Expression::Literal(Literal::String(s)),
30938 to: DataType::Timestamp {
30939 timezone: false,
30940 precision: None,
30941 },
30942 trailing_comments: vec![],
30943 double_colon_syntax: false,
30944 format: None,
30945 default: None,
30946 })),
30947 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30948 this: expr,
30949 to: DataType::Timestamp {
30950 timezone: false,
30951 precision: None,
30952 },
30953 trailing_comments: vec![],
30954 double_colon_syntax: false,
30955 format: None,
30956 default: None,
30957 })),
30958 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30959 this: Expression::Literal(Literal::String(s)),
30960 to: DataType::Timestamp {
30961 timezone: false,
30962 precision: None,
30963 },
30964 trailing_comments: vec![],
30965 double_colon_syntax: false,
30966 format: None,
30967 default: None,
30968 })),
30969 other => other,
30970 }
30971 }
30972
30973 /// Force CAST to TIMESTAMP for any expression (not just literals)
30974 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
30975 fn force_cast_timestamp(expr: Expression) -> Expression {
30976 use crate::expressions::{Cast, DataType};
30977 // Don't double-wrap if already a CAST to TIMESTAMP
30978 if let Expression::Cast(ref c) = expr {
30979 if matches!(c.to, DataType::Timestamp { .. }) {
30980 return expr;
30981 }
30982 }
30983 Expression::Cast(Box::new(Cast {
30984 this: expr,
30985 to: DataType::Timestamp {
30986 timezone: false,
30987 precision: None,
30988 },
30989 trailing_comments: vec![],
30990 double_colon_syntax: false,
30991 format: None,
30992 default: None,
30993 }))
30994 }
30995
30996 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
30997 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
30998 use crate::expressions::{Cast, DataType, Literal};
30999 match expr {
31000 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31001 this: Expression::Literal(Literal::String(s)),
31002 to: DataType::Timestamp {
31003 timezone: true,
31004 precision: None,
31005 },
31006 trailing_comments: vec![],
31007 double_colon_syntax: false,
31008 format: None,
31009 default: None,
31010 })),
31011 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31012 this: expr,
31013 to: DataType::Timestamp {
31014 timezone: true,
31015 precision: None,
31016 },
31017 trailing_comments: vec![],
31018 double_colon_syntax: false,
31019 format: None,
31020 default: None,
31021 })),
31022 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31023 this: Expression::Literal(Literal::String(s)),
31024 to: DataType::Timestamp {
31025 timezone: true,
31026 precision: None,
31027 },
31028 trailing_comments: vec![],
31029 double_colon_syntax: false,
31030 format: None,
31031 default: None,
31032 })),
31033 other => other,
31034 }
31035 }
31036
31037 /// Ensure expression is CAST to DATETIME (for BigQuery)
31038 fn ensure_cast_datetime(expr: Expression) -> Expression {
31039 use crate::expressions::{Cast, DataType, Literal};
31040 match expr {
31041 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31042 this: expr,
31043 to: DataType::Custom {
31044 name: "DATETIME".to_string(),
31045 },
31046 trailing_comments: vec![],
31047 double_colon_syntax: false,
31048 format: None,
31049 default: None,
31050 })),
31051 other => other,
31052 }
31053 }
31054
31055 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
31056 fn force_cast_datetime(expr: Expression) -> Expression {
31057 use crate::expressions::{Cast, DataType};
31058 if let Expression::Cast(ref c) = expr {
31059 if let DataType::Custom { ref name } = c.to {
31060 if name.eq_ignore_ascii_case("DATETIME") {
31061 return expr;
31062 }
31063 }
31064 }
31065 Expression::Cast(Box::new(Cast {
31066 this: expr,
31067 to: DataType::Custom {
31068 name: "DATETIME".to_string(),
31069 },
31070 trailing_comments: vec![],
31071 double_colon_syntax: false,
31072 format: None,
31073 default: None,
31074 }))
31075 }
31076
31077 /// Ensure expression is CAST to DATETIME2 (for TSQL)
31078 fn ensure_cast_datetime2(expr: Expression) -> Expression {
31079 use crate::expressions::{Cast, DataType, Literal};
31080 match expr {
31081 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31082 this: expr,
31083 to: DataType::Custom {
31084 name: "DATETIME2".to_string(),
31085 },
31086 trailing_comments: vec![],
31087 double_colon_syntax: false,
31088 format: None,
31089 default: None,
31090 })),
31091 other => other,
31092 }
31093 }
31094
31095 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31096 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31097 use crate::expressions::{Cast, DataType, Literal};
31098 match expr {
31099 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31100 this: Expression::Literal(Literal::String(s)),
31101 to: DataType::Timestamp {
31102 timezone: true,
31103 precision: None,
31104 },
31105 trailing_comments: vec![],
31106 double_colon_syntax: false,
31107 format: None,
31108 default: None,
31109 })),
31110 other => other,
31111 }
31112 }
31113
31114 /// Convert BigQuery format string to Snowflake format string
31115 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31116 use crate::expressions::Literal;
31117 if let Expression::Literal(Literal::String(s)) = format_expr {
31118 let sf = s
31119 .replace("%Y", "yyyy")
31120 .replace("%m", "mm")
31121 .replace("%d", "DD")
31122 .replace("%H", "HH24")
31123 .replace("%M", "MI")
31124 .replace("%S", "SS")
31125 .replace("%b", "mon")
31126 .replace("%B", "Month")
31127 .replace("%e", "FMDD");
31128 Expression::Literal(Literal::String(sf))
31129 } else {
31130 format_expr.clone()
31131 }
31132 }
31133
31134 /// Convert BigQuery format string to DuckDB format string
31135 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31136 use crate::expressions::Literal;
31137 if let Expression::Literal(Literal::String(s)) = format_expr {
31138 let duck = s
31139 .replace("%T", "%H:%M:%S")
31140 .replace("%F", "%Y-%m-%d")
31141 .replace("%D", "%m/%d/%y")
31142 .replace("%x", "%m/%d/%y")
31143 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31144 .replace("%e", "%-d")
31145 .replace("%E6S", "%S.%f");
31146 Expression::Literal(Literal::String(duck))
31147 } else {
31148 format_expr.clone()
31149 }
31150 }
31151
31152 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31153 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31154 use crate::expressions::Literal;
31155 if let Expression::Literal(Literal::String(s)) = format_expr {
31156 // Replace format elements from longest to shortest to avoid partial matches
31157 let result = s
31158 .replace("YYYYMMDD", "%Y%m%d")
31159 .replace("YYYY", "%Y")
31160 .replace("YY", "%y")
31161 .replace("MONTH", "%B")
31162 .replace("MON", "%b")
31163 .replace("MM", "%m")
31164 .replace("DD", "%d")
31165 .replace("HH24", "%H")
31166 .replace("HH12", "%I")
31167 .replace("HH", "%I")
31168 .replace("MI", "%M")
31169 .replace("SSTZH", "%S%z")
31170 .replace("SS", "%S")
31171 .replace("TZH", "%z");
31172 Expression::Literal(Literal::String(result))
31173 } else {
31174 format_expr.clone()
31175 }
31176 }
31177
31178 /// Normalize BigQuery format strings for BQ->BQ output
31179 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31180 use crate::expressions::Literal;
31181 if let Expression::Literal(Literal::String(s)) = format_expr {
31182 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31183 Expression::Literal(Literal::String(norm))
31184 } else {
31185 format_expr.clone()
31186 }
31187 }
31188}
31189
31190#[cfg(test)]
31191mod tests {
31192 use super::*;
31193
31194 #[test]
31195 fn test_dialect_type_from_str() {
31196 assert_eq!(
31197 "postgres".parse::<DialectType>().unwrap(),
31198 DialectType::PostgreSQL
31199 );
31200 assert_eq!(
31201 "postgresql".parse::<DialectType>().unwrap(),
31202 DialectType::PostgreSQL
31203 );
31204 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31205 assert_eq!(
31206 "bigquery".parse::<DialectType>().unwrap(),
31207 DialectType::BigQuery
31208 );
31209 }
31210
31211 #[test]
31212 fn test_basic_transpile() {
31213 let dialect = Dialect::get(DialectType::Generic);
31214 let result = dialect
31215 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31216 .unwrap();
31217 assert_eq!(result.len(), 1);
31218 assert_eq!(result[0], "SELECT 1");
31219 }
31220
31221 #[test]
31222 fn test_function_transformation_mysql() {
31223 // NVL should be transformed to IFNULL in MySQL
31224 let dialect = Dialect::get(DialectType::Generic);
31225 let result = dialect
31226 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31227 .unwrap();
31228 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31229 }
31230
31231 #[test]
31232 fn test_get_path_duckdb() {
31233 // Test: step by step
31234 let snowflake = Dialect::get(DialectType::Snowflake);
31235
31236 // Step 1: Parse and check what Snowflake produces as intermediate
31237 let result_sf_sf = snowflake
31238 .transpile_to(
31239 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31240 DialectType::Snowflake,
31241 )
31242 .unwrap();
31243 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31244
31245 // Step 2: DuckDB target
31246 let result_sf_dk = snowflake
31247 .transpile_to(
31248 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31249 DialectType::DuckDB,
31250 )
31251 .unwrap();
31252 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31253
31254 // Step 3: GET_PATH directly
31255 let result_gp = snowflake
31256 .transpile_to(
31257 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31258 DialectType::DuckDB,
31259 )
31260 .unwrap();
31261 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31262 }
31263
31264 #[test]
31265 fn test_function_transformation_postgres() {
31266 // IFNULL should be transformed to COALESCE in PostgreSQL
31267 let dialect = Dialect::get(DialectType::Generic);
31268 let result = dialect
31269 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31270 .unwrap();
31271 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31272
31273 // NVL should also be transformed to COALESCE
31274 let result = dialect
31275 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31276 .unwrap();
31277 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31278 }
31279
31280 #[test]
31281 fn test_hive_cast_to_trycast() {
31282 // Hive CAST should become TRY_CAST for targets that support it
31283 let hive = Dialect::get(DialectType::Hive);
31284 let result = hive
31285 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31286 .unwrap();
31287 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31288
31289 let result = hive
31290 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31291 .unwrap();
31292 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31293 }
31294
31295 #[test]
31296 fn test_hive_array_identity() {
31297 // Hive ARRAY<DATE> should preserve angle bracket syntax
31298 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31299 let hive = Dialect::get(DialectType::Hive);
31300
31301 // Test via transpile_to (this works)
31302 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31303 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31304 assert!(
31305 result[0].contains("ARRAY<DATE>"),
31306 "transpile_to: Expected ARRAY<DATE>, got: {}",
31307 result[0]
31308 );
31309
31310 // Test via parse -> transform -> generate (identity test path)
31311 let ast = hive.parse(sql).unwrap();
31312 let transformed = hive.transform(ast[0].clone()).unwrap();
31313 let output = hive.generate(&transformed).unwrap();
31314 eprintln!("Hive ARRAY via identity path: {}", output);
31315 assert!(
31316 output.contains("ARRAY<DATE>"),
31317 "identity path: Expected ARRAY<DATE>, got: {}",
31318 output
31319 );
31320 }
31321
31322 #[test]
31323 fn test_starrocks_delete_between_expansion() {
31324 // StarRocks doesn't support BETWEEN in DELETE statements
31325 let dialect = Dialect::get(DialectType::Generic);
31326
31327 // BETWEEN should be expanded to >= AND <= in DELETE
31328 let result = dialect
31329 .transpile_to(
31330 "DELETE FROM t WHERE a BETWEEN b AND c",
31331 DialectType::StarRocks,
31332 )
31333 .unwrap();
31334 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31335
31336 // NOT BETWEEN should be expanded to < OR > in DELETE
31337 let result = dialect
31338 .transpile_to(
31339 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31340 DialectType::StarRocks,
31341 )
31342 .unwrap();
31343 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31344
31345 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31346 let result = dialect
31347 .transpile_to(
31348 "SELECT * FROM t WHERE a BETWEEN b AND c",
31349 DialectType::StarRocks,
31350 )
31351 .unwrap();
31352 assert!(
31353 result[0].contains("BETWEEN"),
31354 "BETWEEN should be preserved in SELECT"
31355 );
31356 }
31357
31358 #[test]
31359 fn test_snowflake_ltrim_rtrim_parse() {
31360 let sf = Dialect::get(DialectType::Snowflake);
31361 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31362 let result = sf.transpile_to(sql, DialectType::DuckDB);
31363 match &result {
31364 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31365 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31366 }
31367 assert!(
31368 result.is_ok(),
31369 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31370 result.err()
31371 );
31372 }
31373
31374 #[test]
31375 fn test_duckdb_count_if_parse() {
31376 let duck = Dialect::get(DialectType::DuckDB);
31377 let sql = "COUNT_IF(x)";
31378 let result = duck.transpile_to(sql, DialectType::DuckDB);
31379 match &result {
31380 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31381 Err(e) => eprintln!("COUNT_IF error: {}", e),
31382 }
31383 assert!(
31384 result.is_ok(),
31385 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31386 result.err()
31387 );
31388 }
31389
31390 #[test]
31391 fn test_tsql_cast_tinyint_parse() {
31392 let tsql = Dialect::get(DialectType::TSQL);
31393 let sql = "CAST(X AS TINYINT)";
31394 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31395 match &result {
31396 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31397 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31398 }
31399 assert!(
31400 result.is_ok(),
31401 "Expected successful transpile, got error: {:?}",
31402 result.err()
31403 );
31404 }
31405
31406 #[test]
31407 fn test_pg_hash_bitwise_xor() {
31408 let dialect = Dialect::get(DialectType::PostgreSQL);
31409 let result = dialect
31410 .transpile_to("x # y", DialectType::PostgreSQL)
31411 .unwrap();
31412 assert_eq!(result[0], "x # y");
31413 }
31414
31415 #[test]
31416 fn test_pg_array_to_duckdb() {
31417 let dialect = Dialect::get(DialectType::PostgreSQL);
31418 let result = dialect
31419 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31420 .unwrap();
31421 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31422 }
31423
31424 #[test]
31425 fn test_array_remove_bigquery() {
31426 let dialect = Dialect::get(DialectType::Generic);
31427 let result = dialect
31428 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31429 .unwrap();
31430 assert_eq!(
31431 result[0],
31432 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31433 );
31434 }
31435
31436 #[test]
31437 fn test_map_clickhouse_case() {
31438 let dialect = Dialect::get(DialectType::Generic);
31439 let parsed = dialect
31440 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31441 .unwrap();
31442 eprintln!("MAP parsed: {:?}", parsed);
31443 let result = dialect
31444 .transpile_to(
31445 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31446 DialectType::ClickHouse,
31447 )
31448 .unwrap();
31449 eprintln!("MAP result: {}", result[0]);
31450 }
31451
31452 #[test]
31453 fn test_generate_date_array_presto() {
31454 let dialect = Dialect::get(DialectType::Generic);
31455 let result = dialect.transpile_to(
31456 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31457 DialectType::Presto,
31458 ).unwrap();
31459 eprintln!("GDA -> Presto: {}", result[0]);
31460 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31461 }
31462
31463 #[test]
31464 fn test_generate_date_array_postgres() {
31465 let dialect = Dialect::get(DialectType::Generic);
31466 let result = dialect.transpile_to(
31467 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31468 DialectType::PostgreSQL,
31469 ).unwrap();
31470 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31471 }
31472
31473 #[test]
31474 fn test_generate_date_array_snowflake() {
31475 std::thread::Builder::new()
31476 .stack_size(16 * 1024 * 1024)
31477 .spawn(|| {
31478 let dialect = Dialect::get(DialectType::Generic);
31479 let result = dialect.transpile_to(
31480 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31481 DialectType::Snowflake,
31482 ).unwrap();
31483 eprintln!("GDA -> Snowflake: {}", result[0]);
31484 })
31485 .unwrap()
31486 .join()
31487 .unwrap();
31488 }
31489
31490 #[test]
31491 fn test_array_length_generate_date_array_snowflake() {
31492 let dialect = Dialect::get(DialectType::Generic);
31493 let result = dialect.transpile_to(
31494 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31495 DialectType::Snowflake,
31496 ).unwrap();
31497 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31498 }
31499
31500 #[test]
31501 fn test_generate_date_array_mysql() {
31502 let dialect = Dialect::get(DialectType::Generic);
31503 let result = dialect.transpile_to(
31504 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31505 DialectType::MySQL,
31506 ).unwrap();
31507 eprintln!("GDA -> MySQL: {}", result[0]);
31508 }
31509
31510 #[test]
31511 fn test_generate_date_array_redshift() {
31512 let dialect = Dialect::get(DialectType::Generic);
31513 let result = dialect.transpile_to(
31514 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31515 DialectType::Redshift,
31516 ).unwrap();
31517 eprintln!("GDA -> Redshift: {}", result[0]);
31518 }
31519
31520 #[test]
31521 fn test_generate_date_array_tsql() {
31522 let dialect = Dialect::get(DialectType::Generic);
31523 let result = dialect.transpile_to(
31524 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31525 DialectType::TSQL,
31526 ).unwrap();
31527 eprintln!("GDA -> TSQL: {}", result[0]);
31528 }
31529
31530 #[test]
31531 fn test_struct_colon_syntax() {
31532 let dialect = Dialect::get(DialectType::Generic);
31533 // Test without colon first
31534 let result = dialect.transpile_to(
31535 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
31536 DialectType::ClickHouse,
31537 );
31538 match result {
31539 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
31540 Err(e) => eprintln!("STRUCT no colon error: {}", e),
31541 }
31542 // Now test with colon
31543 let result = dialect.transpile_to(
31544 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
31545 DialectType::ClickHouse,
31546 );
31547 match result {
31548 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
31549 Err(e) => eprintln!("STRUCT colon error: {}", e),
31550 }
31551 }
31552
31553 #[test]
31554 fn test_generate_date_array_cte_wrapped_mysql() {
31555 let dialect = Dialect::get(DialectType::Generic);
31556 let result = dialect.transpile_to(
31557 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31558 DialectType::MySQL,
31559 ).unwrap();
31560 eprintln!("GDA CTE -> MySQL: {}", result[0]);
31561 }
31562
31563 #[test]
31564 fn test_generate_date_array_cte_wrapped_tsql() {
31565 let dialect = Dialect::get(DialectType::Generic);
31566 let result = dialect.transpile_to(
31567 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31568 DialectType::TSQL,
31569 ).unwrap();
31570 eprintln!("GDA CTE -> TSQL: {}", result[0]);
31571 }
31572
31573 #[test]
31574 fn test_decode_literal_no_null_check() {
31575 // Oracle DECODE with all literals should produce simple equality, no IS NULL
31576 let dialect = Dialect::get(DialectType::Oracle);
31577 let result = dialect
31578 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
31579 .unwrap();
31580 assert_eq!(
31581 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
31582 "Literal DECODE should not have IS NULL checks"
31583 );
31584 }
31585
31586 #[test]
31587 fn test_decode_column_vs_literal_no_null_check() {
31588 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
31589 let dialect = Dialect::get(DialectType::Oracle);
31590 let result = dialect
31591 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
31592 .unwrap();
31593 assert_eq!(
31594 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
31595 "Column vs literal DECODE should not have IS NULL checks"
31596 );
31597 }
31598
31599 #[test]
31600 fn test_decode_column_vs_column_keeps_null_check() {
31601 // Oracle DECODE with column vs column should keep null-safe comparison
31602 let dialect = Dialect::get(DialectType::Oracle);
31603 let result = dialect
31604 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
31605 .unwrap();
31606 assert!(
31607 result[0].contains("IS NULL"),
31608 "Column vs column DECODE should have IS NULL checks, got: {}",
31609 result[0]
31610 );
31611 }
31612
31613 #[test]
31614 fn test_decode_null_search() {
31615 // Oracle DECODE with NULL search should use IS NULL
31616 let dialect = Dialect::get(DialectType::Oracle);
31617 let result = dialect
31618 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
31619 .unwrap();
31620 assert_eq!(
31621 result[0],
31622 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
31623 );
31624 }
31625}