polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 }))
558 }};
559 }
560
561 // First recursively transform children, then apply the transform function
562 let expr = match expr {
563 Expression::Select(mut select) => {
564 select.expressions = select
565 .expressions
566 .into_iter()
567 .map(|e| transform_recursive(e, transform_fn))
568 .collect::<Result<Vec<_>>>()?;
569
570 // Transform FROM clause
571 if let Some(mut from) = select.from.take() {
572 from.expressions = from
573 .expressions
574 .into_iter()
575 .map(|e| transform_recursive(e, transform_fn))
576 .collect::<Result<Vec<_>>>()?;
577 select.from = Some(from);
578 }
579
580 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
581 select.joins = select
582 .joins
583 .into_iter()
584 .map(|mut join| {
585 join.this = transform_recursive(join.this, transform_fn)?;
586 if let Some(on) = join.on.take() {
587 join.on = Some(transform_recursive(on, transform_fn)?);
588 }
589 // Wrap join in Expression::Join to allow transform_fn to transform it
590 match transform_fn(Expression::Join(Box::new(join)))? {
591 Expression::Join(j) => Ok(*j),
592 _ => Err(crate::error::Error::parse(
593 "Join transformation returned non-join expression",
594 0,
595 0,
596 0,
597 0,
598 )),
599 }
600 })
601 .collect::<Result<Vec<_>>>()?;
602
603 // Transform LATERAL VIEW expressions (Hive/Spark)
604 select.lateral_views = select
605 .lateral_views
606 .into_iter()
607 .map(|mut lv| {
608 lv.this = transform_recursive(lv.this, transform_fn)?;
609 Ok(lv)
610 })
611 .collect::<Result<Vec<_>>>()?;
612
613 // Transform WHERE clause
614 if let Some(mut where_clause) = select.where_clause.take() {
615 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
616 select.where_clause = Some(where_clause);
617 }
618
619 // Transform GROUP BY
620 if let Some(mut group_by) = select.group_by.take() {
621 group_by.expressions = group_by
622 .expressions
623 .into_iter()
624 .map(|e| transform_recursive(e, transform_fn))
625 .collect::<Result<Vec<_>>>()?;
626 select.group_by = Some(group_by);
627 }
628
629 // Transform HAVING
630 if let Some(mut having) = select.having.take() {
631 having.this = transform_recursive(having.this, transform_fn)?;
632 select.having = Some(having);
633 }
634
635 // Transform WITH (CTEs)
636 if let Some(mut with) = select.with.take() {
637 with.ctes = with
638 .ctes
639 .into_iter()
640 .map(|mut cte| {
641 let original = cte.this.clone();
642 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
643 cte
644 })
645 .collect();
646 select.with = Some(with);
647 }
648
649 // Transform ORDER BY
650 if let Some(mut order) = select.order_by.take() {
651 order.expressions = order
652 .expressions
653 .into_iter()
654 .map(|o| {
655 let mut o = o;
656 let original = o.this.clone();
657 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
658 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
659 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
660 Ok(Expression::Ordered(transformed)) => *transformed,
661 Ok(_) | Err(_) => o,
662 }
663 })
664 .collect();
665 select.order_by = Some(order);
666 }
667
668 // Transform WINDOW clause order_by
669 if let Some(ref mut windows) = select.windows {
670 for nw in windows.iter_mut() {
671 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
672 .into_iter()
673 .map(|o| {
674 let mut o = o;
675 let original = o.this.clone();
676 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
677 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
678 Ok(Expression::Ordered(transformed)) => *transformed,
679 Ok(_) | Err(_) => o,
680 }
681 })
682 .collect();
683 }
684 }
685
686 // Transform QUALIFY
687 if let Some(mut qual) = select.qualify.take() {
688 qual.this = transform_recursive(qual.this, transform_fn)?;
689 select.qualify = Some(qual);
690 }
691
692 Expression::Select(select)
693 }
694 Expression::Function(mut f) => {
695 f.args = f
696 .args
697 .into_iter()
698 .map(|e| transform_recursive(e, transform_fn))
699 .collect::<Result<Vec<_>>>()?;
700 Expression::Function(f)
701 }
702 Expression::AggregateFunction(mut f) => {
703 f.args = f
704 .args
705 .into_iter()
706 .map(|e| transform_recursive(e, transform_fn))
707 .collect::<Result<Vec<_>>>()?;
708 if let Some(filter) = f.filter {
709 f.filter = Some(transform_recursive(filter, transform_fn)?);
710 }
711 Expression::AggregateFunction(f)
712 }
713 Expression::WindowFunction(mut wf) => {
714 wf.this = transform_recursive(wf.this, transform_fn)?;
715 wf.over.partition_by = wf
716 .over
717 .partition_by
718 .into_iter()
719 .map(|e| transform_recursive(e, transform_fn))
720 .collect::<Result<Vec<_>>>()?;
721 // Transform order_by items through Expression::Ordered wrapper
722 wf.over.order_by = wf
723 .over
724 .order_by
725 .into_iter()
726 .map(|o| {
727 let mut o = o;
728 o.this = transform_recursive(o.this, transform_fn)?;
729 match transform_fn(Expression::Ordered(Box::new(o)))? {
730 Expression::Ordered(transformed) => Ok(*transformed),
731 _ => Err(crate::error::Error::parse(
732 "Ordered transformation returned non-Ordered expression",
733 0,
734 0,
735 0,
736 0,
737 )),
738 }
739 })
740 .collect::<Result<Vec<_>>>()?;
741 Expression::WindowFunction(wf)
742 }
743 Expression::Alias(mut a) => {
744 a.this = transform_recursive(a.this, transform_fn)?;
745 Expression::Alias(a)
746 }
747 Expression::Cast(mut c) => {
748 c.this = transform_recursive(c.this, transform_fn)?;
749 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
750 c.to = transform_data_type_recursive(c.to, transform_fn)?;
751 Expression::Cast(c)
752 }
753 Expression::And(op) => transform_binary!(And, *op),
754 Expression::Or(op) => transform_binary!(Or, *op),
755 Expression::Add(op) => transform_binary!(Add, *op),
756 Expression::Sub(op) => transform_binary!(Sub, *op),
757 Expression::Mul(op) => transform_binary!(Mul, *op),
758 Expression::Div(op) => transform_binary!(Div, *op),
759 Expression::Eq(op) => transform_binary!(Eq, *op),
760 Expression::Lt(op) => transform_binary!(Lt, *op),
761 Expression::Gt(op) => transform_binary!(Gt, *op),
762 Expression::Paren(mut p) => {
763 p.this = transform_recursive(p.this, transform_fn)?;
764 Expression::Paren(p)
765 }
766 Expression::Coalesce(mut f) => {
767 f.expressions = f
768 .expressions
769 .into_iter()
770 .map(|e| transform_recursive(e, transform_fn))
771 .collect::<Result<Vec<_>>>()?;
772 Expression::Coalesce(f)
773 }
774 Expression::IfNull(mut f) => {
775 f.this = transform_recursive(f.this, transform_fn)?;
776 f.expression = transform_recursive(f.expression, transform_fn)?;
777 Expression::IfNull(f)
778 }
779 Expression::Nvl(mut f) => {
780 f.this = transform_recursive(f.this, transform_fn)?;
781 f.expression = transform_recursive(f.expression, transform_fn)?;
782 Expression::Nvl(f)
783 }
784 Expression::In(mut i) => {
785 i.this = transform_recursive(i.this, transform_fn)?;
786 i.expressions = i
787 .expressions
788 .into_iter()
789 .map(|e| transform_recursive(e, transform_fn))
790 .collect::<Result<Vec<_>>>()?;
791 if let Some(query) = i.query {
792 i.query = Some(transform_recursive(query, transform_fn)?);
793 }
794 Expression::In(i)
795 }
796 Expression::Not(mut n) => {
797 n.this = transform_recursive(n.this, transform_fn)?;
798 Expression::Not(n)
799 }
800 Expression::ArraySlice(mut s) => {
801 s.this = transform_recursive(s.this, transform_fn)?;
802 if let Some(start) = s.start {
803 s.start = Some(transform_recursive(start, transform_fn)?);
804 }
805 if let Some(end) = s.end {
806 s.end = Some(transform_recursive(end, transform_fn)?);
807 }
808 Expression::ArraySlice(s)
809 }
810 Expression::Subscript(mut s) => {
811 s.this = transform_recursive(s.this, transform_fn)?;
812 s.index = transform_recursive(s.index, transform_fn)?;
813 Expression::Subscript(s)
814 }
815 Expression::Array(mut a) => {
816 a.expressions = a
817 .expressions
818 .into_iter()
819 .map(|e| transform_recursive(e, transform_fn))
820 .collect::<Result<Vec<_>>>()?;
821 Expression::Array(a)
822 }
823 Expression::Struct(mut s) => {
824 let mut new_fields = Vec::new();
825 for (name, expr) in s.fields {
826 let transformed = transform_recursive(expr, transform_fn)?;
827 new_fields.push((name, transformed));
828 }
829 s.fields = new_fields;
830 Expression::Struct(s)
831 }
832 Expression::NamedArgument(mut na) => {
833 na.value = transform_recursive(na.value, transform_fn)?;
834 Expression::NamedArgument(na)
835 }
836 Expression::MapFunc(mut m) => {
837 m.keys = m
838 .keys
839 .into_iter()
840 .map(|e| transform_recursive(e, transform_fn))
841 .collect::<Result<Vec<_>>>()?;
842 m.values = m
843 .values
844 .into_iter()
845 .map(|e| transform_recursive(e, transform_fn))
846 .collect::<Result<Vec<_>>>()?;
847 Expression::MapFunc(m)
848 }
849 Expression::ArrayFunc(mut a) => {
850 a.expressions = a
851 .expressions
852 .into_iter()
853 .map(|e| transform_recursive(e, transform_fn))
854 .collect::<Result<Vec<_>>>()?;
855 Expression::ArrayFunc(a)
856 }
857 Expression::Lambda(mut l) => {
858 l.body = transform_recursive(l.body, transform_fn)?;
859 Expression::Lambda(l)
860 }
861 Expression::JsonExtract(mut f) => {
862 f.this = transform_recursive(f.this, transform_fn)?;
863 f.path = transform_recursive(f.path, transform_fn)?;
864 Expression::JsonExtract(f)
865 }
866 Expression::JsonExtractScalar(mut f) => {
867 f.this = transform_recursive(f.this, transform_fn)?;
868 f.path = transform_recursive(f.path, transform_fn)?;
869 Expression::JsonExtractScalar(f)
870 }
871
872 // ===== UnaryFunc-based expressions =====
873 // These all have a single `this: Expression` child
874 Expression::Length(mut f) => {
875 f.this = transform_recursive(f.this, transform_fn)?;
876 Expression::Length(f)
877 }
878 Expression::Upper(mut f) => {
879 f.this = transform_recursive(f.this, transform_fn)?;
880 Expression::Upper(f)
881 }
882 Expression::Lower(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 Expression::Lower(f)
885 }
886 Expression::LTrim(mut f) => {
887 f.this = transform_recursive(f.this, transform_fn)?;
888 Expression::LTrim(f)
889 }
890 Expression::RTrim(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::RTrim(f)
893 }
894 Expression::Reverse(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Reverse(f)
897 }
898 Expression::Abs(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Abs(f)
901 }
902 Expression::Ceil(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::Ceil(f)
905 }
906 Expression::Floor(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::Floor(f)
909 }
910 Expression::Sign(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Sign(f)
913 }
914 Expression::Sqrt(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Sqrt(f)
917 }
918 Expression::Cbrt(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Cbrt(f)
921 }
922 Expression::Ln(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Ln(f)
925 }
926 Expression::Log(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 if let Some(base) = f.base {
929 f.base = Some(transform_recursive(base, transform_fn)?);
930 }
931 Expression::Log(f)
932 }
933 Expression::Exp(mut f) => {
934 f.this = transform_recursive(f.this, transform_fn)?;
935 Expression::Exp(f)
936 }
937 Expression::Date(mut f) => {
938 f.this = transform_recursive(f.this, transform_fn)?;
939 Expression::Date(f)
940 }
941 Expression::Stddev(mut f) => {
942 f.this = transform_recursive(f.this, transform_fn)?;
943 Expression::Stddev(f)
944 }
945 Expression::Variance(mut f) => {
946 f.this = transform_recursive(f.this, transform_fn)?;
947 Expression::Variance(f)
948 }
949
950 // ===== BinaryFunc-based expressions =====
951 Expression::ModFunc(mut f) => {
952 f.this = transform_recursive(f.this, transform_fn)?;
953 f.expression = transform_recursive(f.expression, transform_fn)?;
954 Expression::ModFunc(f)
955 }
956 Expression::Power(mut f) => {
957 f.this = transform_recursive(f.this, transform_fn)?;
958 f.expression = transform_recursive(f.expression, transform_fn)?;
959 Expression::Power(f)
960 }
961 Expression::MapFromArrays(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 f.expression = transform_recursive(f.expression, transform_fn)?;
964 Expression::MapFromArrays(f)
965 }
966 Expression::ElementAt(mut f) => {
967 f.this = transform_recursive(f.this, transform_fn)?;
968 f.expression = transform_recursive(f.expression, transform_fn)?;
969 Expression::ElementAt(f)
970 }
971 Expression::MapContainsKey(mut f) => {
972 f.this = transform_recursive(f.this, transform_fn)?;
973 f.expression = transform_recursive(f.expression, transform_fn)?;
974 Expression::MapContainsKey(f)
975 }
976 Expression::Left(mut f) => {
977 f.this = transform_recursive(f.this, transform_fn)?;
978 f.length = transform_recursive(f.length, transform_fn)?;
979 Expression::Left(f)
980 }
981 Expression::Right(mut f) => {
982 f.this = transform_recursive(f.this, transform_fn)?;
983 f.length = transform_recursive(f.length, transform_fn)?;
984 Expression::Right(f)
985 }
986 Expression::Repeat(mut f) => {
987 f.this = transform_recursive(f.this, transform_fn)?;
988 f.times = transform_recursive(f.times, transform_fn)?;
989 Expression::Repeat(f)
990 }
991
992 // ===== Complex function expressions =====
993 Expression::Substring(mut f) => {
994 f.this = transform_recursive(f.this, transform_fn)?;
995 f.start = transform_recursive(f.start, transform_fn)?;
996 if let Some(len) = f.length {
997 f.length = Some(transform_recursive(len, transform_fn)?);
998 }
999 Expression::Substring(f)
1000 }
1001 Expression::Replace(mut f) => {
1002 f.this = transform_recursive(f.this, transform_fn)?;
1003 f.old = transform_recursive(f.old, transform_fn)?;
1004 f.new = transform_recursive(f.new, transform_fn)?;
1005 Expression::Replace(f)
1006 }
1007 Expression::ConcatWs(mut f) => {
1008 f.separator = transform_recursive(f.separator, transform_fn)?;
1009 f.expressions = f
1010 .expressions
1011 .into_iter()
1012 .map(|e| transform_recursive(e, transform_fn))
1013 .collect::<Result<Vec<_>>>()?;
1014 Expression::ConcatWs(f)
1015 }
1016 Expression::Trim(mut f) => {
1017 f.this = transform_recursive(f.this, transform_fn)?;
1018 if let Some(chars) = f.characters {
1019 f.characters = Some(transform_recursive(chars, transform_fn)?);
1020 }
1021 Expression::Trim(f)
1022 }
1023 Expression::Split(mut f) => {
1024 f.this = transform_recursive(f.this, transform_fn)?;
1025 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1026 Expression::Split(f)
1027 }
1028 Expression::Lpad(mut f) => {
1029 f.this = transform_recursive(f.this, transform_fn)?;
1030 f.length = transform_recursive(f.length, transform_fn)?;
1031 if let Some(fill) = f.fill {
1032 f.fill = Some(transform_recursive(fill, transform_fn)?);
1033 }
1034 Expression::Lpad(f)
1035 }
1036 Expression::Rpad(mut f) => {
1037 f.this = transform_recursive(f.this, transform_fn)?;
1038 f.length = transform_recursive(f.length, transform_fn)?;
1039 if let Some(fill) = f.fill {
1040 f.fill = Some(transform_recursive(fill, transform_fn)?);
1041 }
1042 Expression::Rpad(f)
1043 }
1044
1045 // ===== Conditional expressions =====
1046 Expression::Case(mut c) => {
1047 if let Some(operand) = c.operand {
1048 c.operand = Some(transform_recursive(operand, transform_fn)?);
1049 }
1050 c.whens = c
1051 .whens
1052 .into_iter()
1053 .map(|(cond, then)| {
1054 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1055 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1056 (new_cond, new_then)
1057 })
1058 .collect();
1059 if let Some(else_expr) = c.else_ {
1060 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1061 }
1062 Expression::Case(c)
1063 }
1064 Expression::IfFunc(mut f) => {
1065 f.condition = transform_recursive(f.condition, transform_fn)?;
1066 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1067 if let Some(false_val) = f.false_value {
1068 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1069 }
1070 Expression::IfFunc(f)
1071 }
1072
1073 // ===== Date/Time expressions =====
1074 Expression::DateAdd(mut f) => {
1075 f.this = transform_recursive(f.this, transform_fn)?;
1076 f.interval = transform_recursive(f.interval, transform_fn)?;
1077 Expression::DateAdd(f)
1078 }
1079 Expression::DateSub(mut f) => {
1080 f.this = transform_recursive(f.this, transform_fn)?;
1081 f.interval = transform_recursive(f.interval, transform_fn)?;
1082 Expression::DateSub(f)
1083 }
1084 Expression::DateDiff(mut f) => {
1085 f.this = transform_recursive(f.this, transform_fn)?;
1086 f.expression = transform_recursive(f.expression, transform_fn)?;
1087 Expression::DateDiff(f)
1088 }
1089 Expression::DateTrunc(mut f) => {
1090 f.this = transform_recursive(f.this, transform_fn)?;
1091 Expression::DateTrunc(f)
1092 }
1093 Expression::Extract(mut f) => {
1094 f.this = transform_recursive(f.this, transform_fn)?;
1095 Expression::Extract(f)
1096 }
1097
1098 // ===== JSON expressions =====
1099 Expression::JsonObject(mut f) => {
1100 f.pairs = f
1101 .pairs
1102 .into_iter()
1103 .map(|(k, v)| {
1104 let new_k = transform_recursive(k, transform_fn)?;
1105 let new_v = transform_recursive(v, transform_fn)?;
1106 Ok((new_k, new_v))
1107 })
1108 .collect::<Result<Vec<_>>>()?;
1109 Expression::JsonObject(f)
1110 }
1111
1112 // ===== Subquery expressions =====
1113 Expression::Subquery(mut s) => {
1114 s.this = transform_recursive(s.this, transform_fn)?;
1115 Expression::Subquery(s)
1116 }
1117 Expression::Exists(mut e) => {
1118 e.this = transform_recursive(e.this, transform_fn)?;
1119 Expression::Exists(e)
1120 }
1121
1122 // ===== Set operations =====
1123 Expression::Union(mut u) => {
1124 u.left = transform_recursive(u.left, transform_fn)?;
1125 u.right = transform_recursive(u.right, transform_fn)?;
1126 Expression::Union(u)
1127 }
1128 Expression::Intersect(mut i) => {
1129 i.left = transform_recursive(i.left, transform_fn)?;
1130 i.right = transform_recursive(i.right, transform_fn)?;
1131 Expression::Intersect(i)
1132 }
1133 Expression::Except(mut e) => {
1134 e.left = transform_recursive(e.left, transform_fn)?;
1135 e.right = transform_recursive(e.right, transform_fn)?;
1136 Expression::Except(e)
1137 }
1138
1139 // ===== DML expressions =====
1140 Expression::Insert(mut ins) => {
1141 // Transform VALUES clause expressions
1142 let mut new_values = Vec::new();
1143 for row in ins.values {
1144 let mut new_row = Vec::new();
1145 for e in row {
1146 new_row.push(transform_recursive(e, transform_fn)?);
1147 }
1148 new_values.push(new_row);
1149 }
1150 ins.values = new_values;
1151
1152 // Transform query (for INSERT ... SELECT)
1153 if let Some(query) = ins.query {
1154 ins.query = Some(transform_recursive(query, transform_fn)?);
1155 }
1156
1157 // Transform RETURNING clause
1158 let mut new_returning = Vec::new();
1159 for e in ins.returning {
1160 new_returning.push(transform_recursive(e, transform_fn)?);
1161 }
1162 ins.returning = new_returning;
1163
1164 // Transform ON CONFLICT clause
1165 if let Some(on_conflict) = ins.on_conflict {
1166 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1167 }
1168
1169 Expression::Insert(ins)
1170 }
1171 Expression::Update(mut upd) => {
1172 upd.set = upd
1173 .set
1174 .into_iter()
1175 .map(|(id, val)| {
1176 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1177 (id, new_val)
1178 })
1179 .collect();
1180 if let Some(mut where_clause) = upd.where_clause.take() {
1181 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1182 upd.where_clause = Some(where_clause);
1183 }
1184 Expression::Update(upd)
1185 }
1186 Expression::Delete(mut del) => {
1187 if let Some(mut where_clause) = del.where_clause.take() {
1188 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1189 del.where_clause = Some(where_clause);
1190 }
1191 Expression::Delete(del)
1192 }
1193
1194 // ===== CTE expressions =====
1195 Expression::With(mut w) => {
1196 w.ctes = w
1197 .ctes
1198 .into_iter()
1199 .map(|mut cte| {
1200 let original = cte.this.clone();
1201 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1202 cte
1203 })
1204 .collect();
1205 Expression::With(w)
1206 }
1207 Expression::Cte(mut c) => {
1208 c.this = transform_recursive(c.this, transform_fn)?;
1209 Expression::Cte(c)
1210 }
1211
1212 // ===== Order expressions =====
1213 Expression::Ordered(mut o) => {
1214 o.this = transform_recursive(o.this, transform_fn)?;
1215 Expression::Ordered(o)
1216 }
1217
1218 // ===== Negation =====
1219 Expression::Neg(mut n) => {
1220 n.this = transform_recursive(n.this, transform_fn)?;
1221 Expression::Neg(n)
1222 }
1223
1224 // ===== Between =====
1225 Expression::Between(mut b) => {
1226 b.this = transform_recursive(b.this, transform_fn)?;
1227 b.low = transform_recursive(b.low, transform_fn)?;
1228 b.high = transform_recursive(b.high, transform_fn)?;
1229 Expression::Between(b)
1230 }
1231 Expression::IsNull(mut i) => {
1232 i.this = transform_recursive(i.this, transform_fn)?;
1233 Expression::IsNull(i)
1234 }
1235 Expression::IsTrue(mut i) => {
1236 i.this = transform_recursive(i.this, transform_fn)?;
1237 Expression::IsTrue(i)
1238 }
1239 Expression::IsFalse(mut i) => {
1240 i.this = transform_recursive(i.this, transform_fn)?;
1241 Expression::IsFalse(i)
1242 }
1243
1244 // ===== Like expressions =====
1245 Expression::Like(mut l) => {
1246 l.left = transform_recursive(l.left, transform_fn)?;
1247 l.right = transform_recursive(l.right, transform_fn)?;
1248 Expression::Like(l)
1249 }
1250 Expression::ILike(mut l) => {
1251 l.left = transform_recursive(l.left, transform_fn)?;
1252 l.right = transform_recursive(l.right, transform_fn)?;
1253 Expression::ILike(l)
1254 }
1255
1256 // ===== Additional binary ops not covered by macro =====
1257 Expression::Neq(op) => transform_binary!(Neq, *op),
1258 Expression::Lte(op) => transform_binary!(Lte, *op),
1259 Expression::Gte(op) => transform_binary!(Gte, *op),
1260 Expression::Mod(op) => transform_binary!(Mod, *op),
1261 Expression::Concat(op) => transform_binary!(Concat, *op),
1262 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1263 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1264 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1265 Expression::Is(op) => transform_binary!(Is, *op),
1266
1267 // ===== TryCast / SafeCast =====
1268 Expression::TryCast(mut c) => {
1269 c.this = transform_recursive(c.this, transform_fn)?;
1270 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1271 Expression::TryCast(c)
1272 }
1273 Expression::SafeCast(mut c) => {
1274 c.this = transform_recursive(c.this, transform_fn)?;
1275 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1276 Expression::SafeCast(c)
1277 }
1278
1279 // ===== Misc =====
1280 Expression::Unnest(mut f) => {
1281 f.this = transform_recursive(f.this, transform_fn)?;
1282 f.expressions = f
1283 .expressions
1284 .into_iter()
1285 .map(|e| transform_recursive(e, transform_fn))
1286 .collect::<Result<Vec<_>>>()?;
1287 Expression::Unnest(f)
1288 }
1289 Expression::Explode(mut f) => {
1290 f.this = transform_recursive(f.this, transform_fn)?;
1291 Expression::Explode(f)
1292 }
1293 Expression::GroupConcat(mut f) => {
1294 f.this = transform_recursive(f.this, transform_fn)?;
1295 Expression::GroupConcat(f)
1296 }
1297 Expression::StringAgg(mut f) => {
1298 f.this = transform_recursive(f.this, transform_fn)?;
1299 Expression::StringAgg(f)
1300 }
1301 Expression::ListAgg(mut f) => {
1302 f.this = transform_recursive(f.this, transform_fn)?;
1303 Expression::ListAgg(f)
1304 }
1305 Expression::ArrayAgg(mut f) => {
1306 f.this = transform_recursive(f.this, transform_fn)?;
1307 Expression::ArrayAgg(f)
1308 }
1309 Expression::ParseJson(mut f) => {
1310 f.this = transform_recursive(f.this, transform_fn)?;
1311 Expression::ParseJson(f)
1312 }
1313 Expression::ToJson(mut f) => {
1314 f.this = transform_recursive(f.this, transform_fn)?;
1315 Expression::ToJson(f)
1316 }
1317 Expression::JSONExtract(mut e) => {
1318 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1319 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1320 Expression::JSONExtract(e)
1321 }
1322 Expression::JSONExtractScalar(mut e) => {
1323 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1324 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1325 Expression::JSONExtractScalar(e)
1326 }
1327
1328 // StrToTime: recurse into this
1329 Expression::StrToTime(mut e) => {
1330 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1331 Expression::StrToTime(e)
1332 }
1333
1334 // UnixToTime: recurse into this
1335 Expression::UnixToTime(mut e) => {
1336 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1337 Expression::UnixToTime(e)
1338 }
1339
1340 // CreateTable: recurse into column defaults, on_update expressions, and data types
1341 Expression::CreateTable(mut ct) => {
1342 for col in &mut ct.columns {
1343 if let Some(default_expr) = col.default.take() {
1344 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1345 }
1346 if let Some(on_update_expr) = col.on_update.take() {
1347 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1348 }
1349 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1350 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1351 // contexts and may not produce correct results for DDL column definitions.
1352 // The DDL type mappings would need dedicated handling per source/target pair.
1353 }
1354 if let Some(as_select) = ct.as_select.take() {
1355 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1356 }
1357 Expression::CreateTable(ct)
1358 }
1359
1360 // CreateProcedure: recurse into body expressions
1361 Expression::CreateProcedure(mut cp) => {
1362 if let Some(body) = cp.body.take() {
1363 cp.body = Some(match body {
1364 FunctionBody::Expression(expr) => {
1365 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1366 }
1367 FunctionBody::Return(expr) => {
1368 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1369 }
1370 FunctionBody::Statements(stmts) => {
1371 let transformed_stmts = stmts
1372 .into_iter()
1373 .map(|s| transform_recursive(s, transform_fn))
1374 .collect::<Result<Vec<_>>>()?;
1375 FunctionBody::Statements(transformed_stmts)
1376 }
1377 other => other,
1378 });
1379 }
1380 Expression::CreateProcedure(cp)
1381 }
1382
1383 // CreateFunction: recurse into body expressions
1384 Expression::CreateFunction(mut cf) => {
1385 if let Some(body) = cf.body.take() {
1386 cf.body = Some(match body {
1387 FunctionBody::Expression(expr) => {
1388 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1389 }
1390 FunctionBody::Return(expr) => {
1391 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1392 }
1393 FunctionBody::Statements(stmts) => {
1394 let transformed_stmts = stmts
1395 .into_iter()
1396 .map(|s| transform_recursive(s, transform_fn))
1397 .collect::<Result<Vec<_>>>()?;
1398 FunctionBody::Statements(transformed_stmts)
1399 }
1400 other => other,
1401 });
1402 }
1403 Expression::CreateFunction(cf)
1404 }
1405
1406 // MemberOf: recurse into left and right operands
1407 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1408 // ArrayContainsAll (@>): recurse into left and right operands
1409 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1410 // ArrayContainedBy (<@): recurse into left and right operands
1411 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1412 // ArrayOverlaps (&&): recurse into left and right operands
1413 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1414 // TsMatch (@@): recurse into left and right operands
1415 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1416 // Adjacent (-|-): recurse into left and right operands
1417 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1418
1419 // Table: recurse into when (HistoricalData) and changes fields
1420 Expression::Table(mut t) => {
1421 if let Some(when) = t.when.take() {
1422 let transformed =
1423 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1424 if let Expression::HistoricalData(hd) = transformed {
1425 t.when = Some(hd);
1426 }
1427 }
1428 if let Some(changes) = t.changes.take() {
1429 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1430 if let Expression::Changes(c) = transformed {
1431 t.changes = Some(c);
1432 }
1433 }
1434 Expression::Table(t)
1435 }
1436
1437 // HistoricalData (Snowflake time travel): recurse into expression
1438 Expression::HistoricalData(mut hd) => {
1439 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1440 Expression::HistoricalData(hd)
1441 }
1442
1443 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1444 Expression::Changes(mut c) => {
1445 if let Some(at_before) = c.at_before.take() {
1446 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1447 }
1448 if let Some(end) = c.end.take() {
1449 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1450 }
1451 Expression::Changes(c)
1452 }
1453
1454 // TableArgument: TABLE(expr) or MODEL(expr)
1455 Expression::TableArgument(mut ta) => {
1456 ta.this = transform_recursive(ta.this, transform_fn)?;
1457 Expression::TableArgument(ta)
1458 }
1459
1460 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1461 Expression::JoinedTable(mut jt) => {
1462 jt.left = transform_recursive(jt.left, transform_fn)?;
1463 for join in &mut jt.joins {
1464 join.this = transform_recursive(
1465 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1466 transform_fn,
1467 )?;
1468 if let Some(on) = join.on.take() {
1469 join.on = Some(transform_recursive(on, transform_fn)?);
1470 }
1471 }
1472 jt.lateral_views = jt
1473 .lateral_views
1474 .into_iter()
1475 .map(|mut lv| {
1476 lv.this = transform_recursive(lv.this, transform_fn)?;
1477 Ok(lv)
1478 })
1479 .collect::<Result<Vec<_>>>()?;
1480 Expression::JoinedTable(jt)
1481 }
1482
1483 // Lateral: LATERAL func() - recurse into the function expression
1484 Expression::Lateral(mut lat) => {
1485 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1486 Expression::Lateral(lat)
1487 }
1488
1489 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1490 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1491 // as a unit together with the WithinGroup wrapper
1492 Expression::WithinGroup(mut wg) => {
1493 wg.order_by = wg
1494 .order_by
1495 .into_iter()
1496 .map(|mut o| {
1497 let original = o.this.clone();
1498 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1499 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1500 Ok(Expression::Ordered(transformed)) => *transformed,
1501 Ok(_) | Err(_) => o,
1502 }
1503 })
1504 .collect();
1505 Expression::WithinGroup(wg)
1506 }
1507
1508 // Filter: recurse into both the aggregate and the filter condition
1509 Expression::Filter(mut f) => {
1510 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1511 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1512 Expression::Filter(f)
1513 }
1514
1515 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1516 Expression::BitwiseOrAgg(mut f) => {
1517 f.this = transform_recursive(f.this, transform_fn)?;
1518 Expression::BitwiseOrAgg(f)
1519 }
1520 Expression::BitwiseAndAgg(mut f) => {
1521 f.this = transform_recursive(f.this, transform_fn)?;
1522 Expression::BitwiseAndAgg(f)
1523 }
1524 Expression::BitwiseXorAgg(mut f) => {
1525 f.this = transform_recursive(f.this, transform_fn)?;
1526 Expression::BitwiseXorAgg(f)
1527 }
1528 Expression::PipeOperator(mut pipe) => {
1529 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1530 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1531 Expression::PipeOperator(pipe)
1532 }
1533
1534 // Pass through leaf nodes unchanged
1535 other => other,
1536 };
1537
1538 // Then apply the transform function
1539 transform_fn(expr)
1540}
1541
1542/// Returns the tokenizer config, generator config, and expression transform closure
1543/// for a built-in dialect type. This is the shared implementation used by both
1544/// `Dialect::get()` and custom dialect construction.
1545fn configs_for_dialect_type(
1546 dt: DialectType,
1547) -> (
1548 TokenizerConfig,
1549 GeneratorConfig,
1550 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1551) {
1552 macro_rules! dialect_configs {
1553 ($dialect_struct:ident) => {{
1554 let d = $dialect_struct;
1555 (
1556 d.tokenizer_config(),
1557 d.generator_config(),
1558 Box::new(move |e| $dialect_struct.transform_expr(e)),
1559 )
1560 }};
1561 }
1562 match dt {
1563 #[cfg(feature = "dialect-postgresql")]
1564 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1565 #[cfg(feature = "dialect-mysql")]
1566 DialectType::MySQL => dialect_configs!(MySQLDialect),
1567 #[cfg(feature = "dialect-bigquery")]
1568 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1569 #[cfg(feature = "dialect-snowflake")]
1570 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1571 #[cfg(feature = "dialect-duckdb")]
1572 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1573 #[cfg(feature = "dialect-tsql")]
1574 DialectType::TSQL => dialect_configs!(TSQLDialect),
1575 #[cfg(feature = "dialect-oracle")]
1576 DialectType::Oracle => dialect_configs!(OracleDialect),
1577 #[cfg(feature = "dialect-hive")]
1578 DialectType::Hive => dialect_configs!(HiveDialect),
1579 #[cfg(feature = "dialect-spark")]
1580 DialectType::Spark => dialect_configs!(SparkDialect),
1581 #[cfg(feature = "dialect-sqlite")]
1582 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1583 #[cfg(feature = "dialect-presto")]
1584 DialectType::Presto => dialect_configs!(PrestoDialect),
1585 #[cfg(feature = "dialect-trino")]
1586 DialectType::Trino => dialect_configs!(TrinoDialect),
1587 #[cfg(feature = "dialect-redshift")]
1588 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1589 #[cfg(feature = "dialect-clickhouse")]
1590 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1591 #[cfg(feature = "dialect-databricks")]
1592 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1593 #[cfg(feature = "dialect-athena")]
1594 DialectType::Athena => dialect_configs!(AthenaDialect),
1595 #[cfg(feature = "dialect-teradata")]
1596 DialectType::Teradata => dialect_configs!(TeradataDialect),
1597 #[cfg(feature = "dialect-doris")]
1598 DialectType::Doris => dialect_configs!(DorisDialect),
1599 #[cfg(feature = "dialect-starrocks")]
1600 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1601 #[cfg(feature = "dialect-materialize")]
1602 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1603 #[cfg(feature = "dialect-risingwave")]
1604 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1605 #[cfg(feature = "dialect-singlestore")]
1606 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1607 #[cfg(feature = "dialect-cockroachdb")]
1608 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1609 #[cfg(feature = "dialect-tidb")]
1610 DialectType::TiDB => dialect_configs!(TiDBDialect),
1611 #[cfg(feature = "dialect-druid")]
1612 DialectType::Druid => dialect_configs!(DruidDialect),
1613 #[cfg(feature = "dialect-solr")]
1614 DialectType::Solr => dialect_configs!(SolrDialect),
1615 #[cfg(feature = "dialect-tableau")]
1616 DialectType::Tableau => dialect_configs!(TableauDialect),
1617 #[cfg(feature = "dialect-dune")]
1618 DialectType::Dune => dialect_configs!(DuneDialect),
1619 #[cfg(feature = "dialect-fabric")]
1620 DialectType::Fabric => dialect_configs!(FabricDialect),
1621 #[cfg(feature = "dialect-drill")]
1622 DialectType::Drill => dialect_configs!(DrillDialect),
1623 #[cfg(feature = "dialect-dremio")]
1624 DialectType::Dremio => dialect_configs!(DremioDialect),
1625 #[cfg(feature = "dialect-exasol")]
1626 DialectType::Exasol => dialect_configs!(ExasolDialect),
1627 #[cfg(feature = "dialect-datafusion")]
1628 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1629 _ => dialect_configs!(GenericDialect),
1630 }
1631}
1632
1633// ---------------------------------------------------------------------------
1634// Custom dialect registry
1635// ---------------------------------------------------------------------------
1636
1637static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1638 LazyLock::new(|| RwLock::new(HashMap::new()));
1639
1640struct CustomDialectConfig {
1641 name: String,
1642 base_dialect: DialectType,
1643 tokenizer_config: TokenizerConfig,
1644 generator_config: GeneratorConfig,
1645 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1646 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1647}
1648
1649/// Fluent builder for creating and registering custom SQL dialects.
1650///
1651/// A custom dialect is based on an existing built-in dialect and allows selective
1652/// overrides of tokenizer configuration, generator configuration, and expression
1653/// transforms.
1654///
1655/// # Example
1656///
1657/// ```rust,ignore
1658/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1659/// use polyglot_sql::generator::NormalizeFunctions;
1660///
1661/// CustomDialectBuilder::new("my_postgres")
1662/// .based_on(DialectType::PostgreSQL)
1663/// .generator_config_modifier(|gc| {
1664/// gc.normalize_functions = NormalizeFunctions::Lower;
1665/// })
1666/// .register()
1667/// .unwrap();
1668///
1669/// let d = Dialect::get_by_name("my_postgres").unwrap();
1670/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1671/// let sql = d.generate(&exprs[0]).unwrap();
1672/// assert_eq!(sql, "select count(*)");
1673///
1674/// polyglot_sql::unregister_custom_dialect("my_postgres");
1675/// ```
1676pub struct CustomDialectBuilder {
1677 name: String,
1678 base_dialect: DialectType,
1679 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1680 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1681 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1682 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1683}
1684
1685impl CustomDialectBuilder {
1686 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1687 pub fn new(name: impl Into<String>) -> Self {
1688 Self {
1689 name: name.into(),
1690 base_dialect: DialectType::Generic,
1691 tokenizer_modifier: None,
1692 generator_modifier: None,
1693 transform: None,
1694 preprocess: None,
1695 }
1696 }
1697
1698 /// Set the base built-in dialect to inherit configuration from.
1699 pub fn based_on(mut self, dialect: DialectType) -> Self {
1700 self.base_dialect = dialect;
1701 self
1702 }
1703
1704 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1705 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1706 where
1707 F: FnOnce(&mut TokenizerConfig) + 'static,
1708 {
1709 self.tokenizer_modifier = Some(Box::new(f));
1710 self
1711 }
1712
1713 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1714 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1715 where
1716 F: FnOnce(&mut GeneratorConfig) + 'static,
1717 {
1718 self.generator_modifier = Some(Box::new(f));
1719 self
1720 }
1721
1722 /// Set a custom per-node expression transform function.
1723 ///
1724 /// This replaces the base dialect's transform. It is called on every expression
1725 /// node during the recursive transform pass.
1726 pub fn transform_fn<F>(mut self, f: F) -> Self
1727 where
1728 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1729 {
1730 self.transform = Some(Arc::new(f));
1731 self
1732 }
1733
1734 /// Set a custom whole-tree preprocessing function.
1735 ///
1736 /// This replaces the base dialect's built-in preprocessing. It is called once
1737 /// on the entire expression tree before the recursive per-node transform.
1738 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1739 where
1740 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1741 {
1742 self.preprocess = Some(Arc::new(f));
1743 self
1744 }
1745
1746 /// Build the custom dialect configuration and register it in the global registry.
1747 ///
1748 /// Returns an error if:
1749 /// - The name collides with a built-in dialect name
1750 /// - A custom dialect with the same name is already registered
1751 pub fn register(self) -> Result<()> {
1752 // Reject names that collide with built-in dialects
1753 if DialectType::from_str(&self.name).is_ok() {
1754 return Err(crate::error::Error::parse(
1755 format!(
1756 "Cannot register custom dialect '{}': name collides with built-in dialect",
1757 self.name
1758 ),
1759 0,
1760 0,
1761 0,
1762 0,
1763 ));
1764 }
1765
1766 // Get base configs
1767 let (mut tok_config, mut gen_config, _base_transform) =
1768 configs_for_dialect_type(self.base_dialect);
1769
1770 // Apply modifiers
1771 if let Some(tok_mod) = self.tokenizer_modifier {
1772 tok_mod(&mut tok_config);
1773 }
1774 if let Some(gen_mod) = self.generator_modifier {
1775 gen_mod(&mut gen_config);
1776 }
1777
1778 let config = CustomDialectConfig {
1779 name: self.name.clone(),
1780 base_dialect: self.base_dialect,
1781 tokenizer_config: tok_config,
1782 generator_config: gen_config,
1783 transform: self.transform,
1784 preprocess: self.preprocess,
1785 };
1786
1787 register_custom_dialect(config)
1788 }
1789}
1790
1791use std::str::FromStr;
1792
1793fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1794 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1795 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1796 })?;
1797
1798 if registry.contains_key(&config.name) {
1799 return Err(crate::error::Error::parse(
1800 format!("Custom dialect '{}' is already registered", config.name),
1801 0,
1802 0,
1803 0,
1804 0,
1805 ));
1806 }
1807
1808 registry.insert(config.name.clone(), Arc::new(config));
1809 Ok(())
1810}
1811
1812/// Remove a custom dialect from the global registry.
1813///
1814/// Returns `true` if a dialect with that name was found and removed,
1815/// `false` if no such custom dialect existed.
1816pub fn unregister_custom_dialect(name: &str) -> bool {
1817 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1818 registry.remove(name).is_some()
1819 } else {
1820 false
1821 }
1822}
1823
1824fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1825 CUSTOM_DIALECT_REGISTRY
1826 .read()
1827 .ok()
1828 .and_then(|registry| registry.get(name).cloned())
1829}
1830
1831/// Main entry point for dialect-specific SQL operations.
1832///
1833/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1834/// transformer for a specific SQL database engine. It is the high-level API through
1835/// which callers parse, generate, transform, and transpile SQL.
1836///
1837/// # Usage
1838///
1839/// ```rust,ignore
1840/// use polyglot_sql::dialects::{Dialect, DialectType};
1841///
1842/// // Parse PostgreSQL SQL into an AST
1843/// let pg = Dialect::get(DialectType::PostgreSQL);
1844/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1845///
1846/// // Transpile from PostgreSQL to BigQuery
1847/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1848/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1849/// ```
1850///
1851/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1852/// The struct is `Send + Sync` safe so it can be shared across threads.
1853pub struct Dialect {
1854 dialect_type: DialectType,
1855 tokenizer: Tokenizer,
1856 generator_config: GeneratorConfig,
1857 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1858 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1859 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1860 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1861 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1862}
1863
1864impl Dialect {
1865 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1866 ///
1867 /// This is the primary constructor. It initializes the tokenizer, generator config,
1868 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1869 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1870 /// config routing.
1871 pub fn get(dialect_type: DialectType) -> Self {
1872 let (tokenizer_config, generator_config, transformer) =
1873 configs_for_dialect_type(dialect_type);
1874
1875 // Set up expression-specific generator config for hybrid dialects
1876 let generator_config_for_expr: Option<
1877 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1878 > = match dialect_type {
1879 #[cfg(feature = "dialect-athena")]
1880 DialectType::Athena => Some(Box::new(|expr| {
1881 AthenaDialect.generator_config_for_expr(expr)
1882 })),
1883 _ => None,
1884 };
1885
1886 Self {
1887 dialect_type,
1888 tokenizer: Tokenizer::new(tokenizer_config),
1889 generator_config,
1890 transformer,
1891 generator_config_for_expr,
1892 custom_preprocess: None,
1893 }
1894 }
1895
1896 /// Look up a dialect by string name.
1897 ///
1898 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1899 /// falls back to the custom dialect registry. Returns `None` if no dialect
1900 /// with the given name exists.
1901 pub fn get_by_name(name: &str) -> Option<Self> {
1902 // Try built-in first
1903 if let Ok(dt) = DialectType::from_str(name) {
1904 return Some(Self::get(dt));
1905 }
1906
1907 // Try custom registry
1908 let config = get_custom_dialect_config(name)?;
1909 Some(Self::from_custom_config(&config))
1910 }
1911
1912 /// Construct a `Dialect` from a custom dialect configuration.
1913 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1914 // Build the transformer: use custom if provided, else use base dialect's
1915 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1916 if let Some(ref custom_transform) = config.transform {
1917 let t = Arc::clone(custom_transform);
1918 Box::new(move |e| t(e))
1919 } else {
1920 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1921 base_transform
1922 };
1923
1924 // Build the custom preprocess: use custom if provided
1925 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1926 config.preprocess.as_ref().map(|p| {
1927 let p = Arc::clone(p);
1928 Box::new(move |e: Expression| p(e))
1929 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1930 });
1931
1932 Self {
1933 dialect_type: config.base_dialect,
1934 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1935 generator_config: config.generator_config.clone(),
1936 transformer,
1937 generator_config_for_expr: None,
1938 custom_preprocess,
1939 }
1940 }
1941
1942 /// Get the dialect type
1943 pub fn dialect_type(&self) -> DialectType {
1944 self.dialect_type
1945 }
1946
1947 /// Get the generator configuration
1948 pub fn generator_config(&self) -> &GeneratorConfig {
1949 &self.generator_config
1950 }
1951
1952 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1953 ///
1954 /// The input may contain multiple semicolon-separated statements; each one
1955 /// produces a separate element in the returned vector. Tokenization uses
1956 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1957 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1958 let tokens = self.tokenizer.tokenize(sql)?;
1959 let config = crate::parser::ParserConfig {
1960 dialect: Some(self.dialect_type),
1961 ..Default::default()
1962 };
1963 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1964 parser.parse()
1965 }
1966
1967 /// Tokenize SQL using this dialect's tokenizer configuration.
1968 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1969 self.tokenizer.tokenize(sql)
1970 }
1971
1972 /// Get the generator config for a specific expression (supports hybrid dialects)
1973 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1974 if let Some(ref config_fn) = self.generator_config_for_expr {
1975 config_fn(expr)
1976 } else {
1977 self.generator_config.clone()
1978 }
1979 }
1980
1981 /// Generates a SQL string from an [`Expression`] AST node.
1982 ///
1983 /// The output uses this dialect's generator configuration for identifier quoting,
1984 /// keyword casing, function name normalization, and syntax style. The result is
1985 /// a single-line (non-pretty) SQL string.
1986 pub fn generate(&self, expr: &Expression) -> Result<String> {
1987 let config = self.get_config_for_expr(expr);
1988 let mut generator = Generator::with_config(config);
1989 generator.generate(expr)
1990 }
1991
1992 /// Generate SQL from an expression with pretty printing enabled
1993 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1994 let mut config = self.get_config_for_expr(expr);
1995 config.pretty = true;
1996 let mut generator = Generator::with_config(config);
1997 generator.generate(expr)
1998 }
1999
2000 /// Generate SQL from an expression with source dialect info (for transpilation)
2001 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2002 let mut config = self.get_config_for_expr(expr);
2003 config.source_dialect = Some(source);
2004 let mut generator = Generator::with_config(config);
2005 generator.generate(expr)
2006 }
2007
2008 /// Generate SQL from an expression with pretty printing and source dialect info
2009 pub fn generate_pretty_with_source(
2010 &self,
2011 expr: &Expression,
2012 source: DialectType,
2013 ) -> Result<String> {
2014 let mut config = self.get_config_for_expr(expr);
2015 config.pretty = true;
2016 config.source_dialect = Some(source);
2017 let mut generator = Generator::with_config(config);
2018 generator.generate(expr)
2019 }
2020
2021 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2022 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2023 let mut config = self.get_config_for_expr(expr);
2024 config.always_quote_identifiers = true;
2025 let mut generator = Generator::with_config(config);
2026 generator.generate(expr)
2027 }
2028
2029 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2030 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2031 let mut config = self.generator_config.clone();
2032 config.pretty = true;
2033 config.always_quote_identifiers = true;
2034 let mut generator = Generator::with_config(config);
2035 generator.generate(expr)
2036 }
2037
2038 /// Generate SQL from an expression with caller-specified config overrides
2039 pub fn generate_with_overrides(
2040 &self,
2041 expr: &Expression,
2042 overrides: impl FnOnce(&mut GeneratorConfig),
2043 ) -> Result<String> {
2044 let mut config = self.get_config_for_expr(expr);
2045 overrides(&mut config);
2046 let mut generator = Generator::with_config(config);
2047 generator.generate(expr)
2048 }
2049
2050 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2051 ///
2052 /// The transformation proceeds in two phases:
2053 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2054 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2055 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2056 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2057 ///
2058 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2059 /// and for identity transforms (normalizing SQL within the same dialect).
2060 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2061 // Apply preprocessing transforms based on dialect
2062 let preprocessed = self.preprocess(expr)?;
2063 // Then apply recursive transformation
2064 transform_recursive(preprocessed, &self.transformer)
2065 }
2066
2067 /// Apply dialect-specific preprocessing transforms
2068 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2069 // If a custom preprocess function is set, use it instead of the built-in logic
2070 if let Some(ref custom_preprocess) = self.custom_preprocess {
2071 return custom_preprocess(expr);
2072 }
2073
2074 #[cfg(any(
2075 feature = "dialect-mysql",
2076 feature = "dialect-postgresql",
2077 feature = "dialect-bigquery",
2078 feature = "dialect-snowflake",
2079 feature = "dialect-tsql",
2080 feature = "dialect-spark",
2081 feature = "dialect-databricks",
2082 feature = "dialect-hive",
2083 feature = "dialect-sqlite",
2084 feature = "dialect-trino",
2085 feature = "dialect-presto",
2086 feature = "dialect-duckdb",
2087 feature = "dialect-redshift",
2088 feature = "dialect-starrocks",
2089 feature = "dialect-oracle",
2090 feature = "dialect-clickhouse",
2091 ))]
2092 use crate::transforms;
2093
2094 match self.dialect_type {
2095 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2096 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2097 #[cfg(feature = "dialect-mysql")]
2098 DialectType::MySQL => {
2099 let expr = transforms::eliminate_qualify(expr)?;
2100 let expr = transforms::eliminate_full_outer_join(expr)?;
2101 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2102 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2103 Ok(expr)
2104 }
2105 // PostgreSQL doesn't support QUALIFY
2106 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2107 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2108 #[cfg(feature = "dialect-postgresql")]
2109 DialectType::PostgreSQL => {
2110 let expr = transforms::eliminate_qualify(expr)?;
2111 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2112 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2113 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2114 // Only normalize when sqlglot would fully parse (no body) —
2115 // sqlglot falls back to Command for complex function bodies,
2116 // preserving the original text including TO.
2117 let expr = if let Expression::CreateFunction(mut cf) = expr {
2118 if cf.body.is_none() {
2119 for opt in &mut cf.set_options {
2120 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2121 &mut opt.value
2122 {
2123 *use_to = false;
2124 }
2125 }
2126 }
2127 Expression::CreateFunction(cf)
2128 } else {
2129 expr
2130 };
2131 Ok(expr)
2132 }
2133 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2134 #[cfg(feature = "dialect-bigquery")]
2135 DialectType::BigQuery => {
2136 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2137 let expr = transforms::pushdown_cte_column_names(expr)?;
2138 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2139 Ok(expr)
2140 }
2141 // Snowflake
2142 #[cfg(feature = "dialect-snowflake")]
2143 DialectType::Snowflake => {
2144 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2145 let expr = transforms::eliminate_window_clause(expr)?;
2146 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2147 Ok(expr)
2148 }
2149 // TSQL doesn't support QUALIFY
2150 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2151 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2152 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2153 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2154 #[cfg(feature = "dialect-tsql")]
2155 DialectType::TSQL => {
2156 let expr = transforms::eliminate_qualify(expr)?;
2157 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2158 let expr = transforms::ensure_bools(expr)?;
2159 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2160 let expr = transforms::move_ctes_to_top_level(expr)?;
2161 let expr = transforms::qualify_derived_table_outputs(expr)?;
2162 Ok(expr)
2163 }
2164 // Spark doesn't support QUALIFY (but Databricks does)
2165 // Spark doesn't support CTEs in subqueries (hoist to top level)
2166 #[cfg(feature = "dialect-spark")]
2167 DialectType::Spark => {
2168 let expr = transforms::eliminate_qualify(expr)?;
2169 let expr = transforms::add_auto_table_alias(expr)?;
2170 let expr = transforms::simplify_nested_paren_values(expr)?;
2171 let expr = transforms::move_ctes_to_top_level(expr)?;
2172 Ok(expr)
2173 }
2174 // Databricks supports QUALIFY natively
2175 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2176 #[cfg(feature = "dialect-databricks")]
2177 DialectType::Databricks => {
2178 let expr = transforms::add_auto_table_alias(expr)?;
2179 let expr = transforms::simplify_nested_paren_values(expr)?;
2180 let expr = transforms::move_ctes_to_top_level(expr)?;
2181 Ok(expr)
2182 }
2183 // Hive doesn't support QUALIFY or CTEs in subqueries
2184 #[cfg(feature = "dialect-hive")]
2185 DialectType::Hive => {
2186 let expr = transforms::eliminate_qualify(expr)?;
2187 let expr = transforms::move_ctes_to_top_level(expr)?;
2188 Ok(expr)
2189 }
2190 // SQLite doesn't support QUALIFY
2191 #[cfg(feature = "dialect-sqlite")]
2192 DialectType::SQLite => {
2193 let expr = transforms::eliminate_qualify(expr)?;
2194 Ok(expr)
2195 }
2196 // Trino doesn't support QUALIFY
2197 #[cfg(feature = "dialect-trino")]
2198 DialectType::Trino => {
2199 let expr = transforms::eliminate_qualify(expr)?;
2200 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2201 Ok(expr)
2202 }
2203 // Presto doesn't support QUALIFY or WINDOW clause
2204 #[cfg(feature = "dialect-presto")]
2205 DialectType::Presto => {
2206 let expr = transforms::eliminate_qualify(expr)?;
2207 let expr = transforms::eliminate_window_clause(expr)?;
2208 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2209 Ok(expr)
2210 }
2211 // DuckDB supports QUALIFY - no elimination needed
2212 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2213 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2214 #[cfg(feature = "dialect-duckdb")]
2215 DialectType::DuckDB => {
2216 let expr = transforms::expand_posexplode_duckdb(expr)?;
2217 let expr = transforms::expand_like_any(expr)?;
2218 Ok(expr)
2219 }
2220 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2221 #[cfg(feature = "dialect-redshift")]
2222 DialectType::Redshift => {
2223 let expr = transforms::eliminate_qualify(expr)?;
2224 let expr = transforms::eliminate_window_clause(expr)?;
2225 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2226 Ok(expr)
2227 }
2228 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2229 #[cfg(feature = "dialect-starrocks")]
2230 DialectType::StarRocks => {
2231 let expr = transforms::eliminate_qualify(expr)?;
2232 let expr = transforms::expand_between_in_delete(expr)?;
2233 Ok(expr)
2234 }
2235 // DataFusion supports QUALIFY and semi/anti joins natively
2236 #[cfg(feature = "dialect-datafusion")]
2237 DialectType::DataFusion => Ok(expr),
2238 // Oracle doesn't support QUALIFY
2239 #[cfg(feature = "dialect-oracle")]
2240 DialectType::Oracle => {
2241 let expr = transforms::eliminate_qualify(expr)?;
2242 Ok(expr)
2243 }
2244 // Drill - no special preprocessing needed
2245 #[cfg(feature = "dialect-drill")]
2246 DialectType::Drill => Ok(expr),
2247 // Teradata - no special preprocessing needed
2248 #[cfg(feature = "dialect-teradata")]
2249 DialectType::Teradata => Ok(expr),
2250 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2251 #[cfg(feature = "dialect-clickhouse")]
2252 DialectType::ClickHouse => {
2253 let expr = transforms::no_limit_order_by_union(expr)?;
2254 Ok(expr)
2255 }
2256 // Other dialects - no preprocessing
2257 _ => Ok(expr),
2258 }
2259 }
2260
2261 /// Transpile SQL from this dialect to another
2262 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2263 self.transpile_to_inner(sql, target, false)
2264 }
2265
2266 /// Transpile SQL from this dialect to another with pretty printing enabled
2267 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2268 self.transpile_to_inner(sql, target, true)
2269 }
2270
2271 #[cfg(not(feature = "transpile"))]
2272 fn transpile_to_inner(
2273 &self,
2274 sql: &str,
2275 target: DialectType,
2276 pretty: bool,
2277 ) -> Result<Vec<String>> {
2278 // Without the transpile feature, only same-dialect or to/from generic is supported
2279 if self.dialect_type != target
2280 && self.dialect_type != DialectType::Generic
2281 && target != DialectType::Generic
2282 {
2283 return Err(crate::error::Error::parse(
2284 "Cross-dialect transpilation not available in this build",
2285 0,
2286 0,
2287 0,
2288 0,
2289 ));
2290 }
2291
2292 let expressions = self.parse(sql)?;
2293 let target_dialect = Dialect::get(target);
2294 let generic_identity =
2295 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2296
2297 if generic_identity {
2298 return expressions
2299 .into_iter()
2300 .map(|expr| {
2301 if pretty {
2302 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2303 } else {
2304 target_dialect.generate_with_source(&expr, self.dialect_type)
2305 }
2306 })
2307 .collect();
2308 }
2309
2310 expressions
2311 .into_iter()
2312 .map(|expr| {
2313 let transformed = target_dialect.transform(expr)?;
2314 if pretty {
2315 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2316 } else {
2317 target_dialect.generate_with_source(&transformed, self.dialect_type)
2318 }
2319 })
2320 .collect()
2321 }
2322
2323 #[cfg(feature = "transpile")]
2324 fn transpile_to_inner(
2325 &self,
2326 sql: &str,
2327 target: DialectType,
2328 pretty: bool,
2329 ) -> Result<Vec<String>> {
2330 let expressions = self.parse(sql)?;
2331 let target_dialect = Dialect::get(target);
2332 let generic_identity =
2333 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2334
2335 if generic_identity {
2336 return expressions
2337 .into_iter()
2338 .map(|expr| {
2339 if pretty {
2340 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2341 } else {
2342 target_dialect.generate_with_source(&expr, self.dialect_type)
2343 }
2344 })
2345 .collect();
2346 }
2347
2348 expressions
2349 .into_iter()
2350 .map(|expr| {
2351 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2352 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2353 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2354 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2355 use crate::expressions::DataType as DT;
2356 transform_recursive(expr, &|e| match e {
2357 Expression::DataType(DT::VarChar { .. }) => {
2358 Ok(Expression::DataType(DT::Text))
2359 }
2360 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2361 _ => Ok(e),
2362 })?
2363 } else {
2364 expr
2365 };
2366
2367 // When source and target differ, first normalize the source dialect's
2368 // AST constructs to standard SQL, so that the target dialect can handle them.
2369 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2370 let normalized =
2371 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2372 self.transform(expr)?
2373 } else {
2374 expr
2375 };
2376
2377 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2378 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2379 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2380 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2381 let normalized =
2382 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2383 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2384 {
2385 transform_recursive(normalized, &|e| {
2386 if let Expression::Function(ref f) = e {
2387 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2388 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2389 if let (
2390 Expression::Function(ref jq),
2391 Expression::Function(ref jv),
2392 ) = (&f.args[0], &f.args[1])
2393 {
2394 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2395 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2396 {
2397 // Unwrap: return just JSON_QUERY(...)
2398 return Ok(f.args[0].clone());
2399 }
2400 }
2401 }
2402 }
2403 Ok(e)
2404 })?
2405 } else {
2406 normalized
2407 };
2408
2409 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2410 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2411 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2412 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2413 && !matches!(target, DialectType::Snowflake)
2414 {
2415 transform_recursive(normalized, &|e| {
2416 if let Expression::Function(ref f) = e {
2417 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2418 return Ok(Expression::Localtime(Box::new(
2419 crate::expressions::Localtime { this: None },
2420 )));
2421 }
2422 }
2423 Ok(e)
2424 })?
2425 } else {
2426 normalized
2427 };
2428
2429 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2430 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2431 // transform. DuckDB requires the count argument to be BIGINT.
2432 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2433 && matches!(target, DialectType::DuckDB)
2434 {
2435 transform_recursive(normalized, &|e| {
2436 if let Expression::Function(ref f) = e {
2437 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2438 // Check if first arg is space string literal
2439 if let Expression::Literal(crate::expressions::Literal::String(
2440 ref s,
2441 )) = f.args[0]
2442 {
2443 if s == " " {
2444 // Wrap second arg in CAST(... AS BIGINT) if not already
2445 if !matches!(f.args[1], Expression::Cast(_)) {
2446 let mut new_args = f.args.clone();
2447 new_args[1] = Expression::Cast(Box::new(
2448 crate::expressions::Cast {
2449 this: new_args[1].clone(),
2450 to: crate::expressions::DataType::BigInt {
2451 length: None,
2452 },
2453 trailing_comments: Vec::new(),
2454 double_colon_syntax: false,
2455 format: None,
2456 default: None,
2457 },
2458 ));
2459 return Ok(Expression::Function(Box::new(
2460 crate::expressions::Function {
2461 name: f.name.clone(),
2462 args: new_args,
2463 distinct: f.distinct,
2464 trailing_comments: f.trailing_comments.clone(),
2465 use_bracket_syntax: f.use_bracket_syntax,
2466 no_parens: f.no_parens,
2467 quoted: f.quoted,
2468 span: None,
2469 },
2470 )));
2471 }
2472 }
2473 }
2474 }
2475 }
2476 Ok(e)
2477 })?
2478 } else {
2479 normalized
2480 };
2481
2482 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2483 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2484 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2485 && !matches!(target, DialectType::BigQuery)
2486 {
2487 crate::transforms::propagate_struct_field_names(normalized)?
2488 } else {
2489 normalized
2490 };
2491
2492 // Apply cross-dialect semantic normalizations
2493 let normalized =
2494 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2495
2496 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2497 // (SELECT UNNEST(..., max_depth => 2)) subquery
2498 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2499 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2500 && matches!(target, DialectType::DuckDB)
2501 {
2502 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2503 } else {
2504 normalized
2505 };
2506
2507 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2508 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2509 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2510 && matches!(
2511 target,
2512 DialectType::DuckDB
2513 | DialectType::Presto
2514 | DialectType::Trino
2515 | DialectType::Athena
2516 | DialectType::Spark
2517 | DialectType::Databricks
2518 ) {
2519 crate::transforms::unnest_alias_to_column_alias(normalized)?
2520 } else if matches!(self.dialect_type, DialectType::BigQuery)
2521 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2522 {
2523 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2524 // but don't convert alias format (no _t0 wrapper)
2525 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2526 // For Redshift: strip UNNEST when arg is a column reference path
2527 if matches!(target, DialectType::Redshift) {
2528 crate::transforms::strip_unnest_column_refs(result)?
2529 } else {
2530 result
2531 }
2532 } else {
2533 normalized
2534 };
2535
2536 // For Presto/Trino targets from PostgreSQL/Redshift source:
2537 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2538 let normalized = if matches!(
2539 self.dialect_type,
2540 DialectType::PostgreSQL | DialectType::Redshift
2541 ) && matches!(
2542 target,
2543 DialectType::Presto | DialectType::Trino | DialectType::Athena
2544 ) {
2545 crate::transforms::wrap_unnest_join_aliases(normalized)?
2546 } else {
2547 normalized
2548 };
2549
2550 // Eliminate DISTINCT ON with target-dialect awareness
2551 // This must happen after source transform (which may produce DISTINCT ON)
2552 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2553 let normalized =
2554 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2555
2556 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2557 let normalized = if matches!(target, DialectType::Snowflake) {
2558 Self::transform_generate_date_array_snowflake(normalized)?
2559 } else {
2560 normalized
2561 };
2562
2563 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2564 let normalized = if matches!(
2565 target,
2566 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2567 ) {
2568 crate::transforms::unnest_to_explode_select(normalized)?
2569 } else {
2570 normalized
2571 };
2572
2573 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2574 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2575 crate::transforms::no_limit_order_by_union(normalized)?
2576 } else {
2577 normalized
2578 };
2579
2580 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2581 // Python sqlglot does this in the TSQL generator, but we can't do it there
2582 // because it would break TSQL -> TSQL identity
2583 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2584 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2585 {
2586 transform_recursive(normalized, &|e| {
2587 if let Expression::Count(ref c) = e {
2588 // Build COUNT_BIG(...) as an AggregateFunction
2589 let args = if c.star {
2590 vec![Expression::Star(crate::expressions::Star {
2591 table: None,
2592 except: None,
2593 replace: None,
2594 rename: None,
2595 trailing_comments: Vec::new(),
2596 span: None,
2597 })]
2598 } else if let Some(ref this) = c.this {
2599 vec![this.clone()]
2600 } else {
2601 vec![]
2602 };
2603 Ok(Expression::AggregateFunction(Box::new(
2604 crate::expressions::AggregateFunction {
2605 name: "COUNT_BIG".to_string(),
2606 args,
2607 distinct: c.distinct,
2608 filter: c.filter.clone(),
2609 order_by: Vec::new(),
2610 limit: None,
2611 ignore_nulls: None,
2612 },
2613 )))
2614 } else {
2615 Ok(e)
2616 }
2617 })?
2618 } else {
2619 normalized
2620 };
2621
2622 let transformed = target_dialect.transform(normalized)?;
2623 let mut sql = if pretty {
2624 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2625 } else {
2626 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2627 };
2628
2629 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2630 if pretty && target == DialectType::Snowflake {
2631 sql = Self::normalize_snowflake_pretty(sql);
2632 }
2633
2634 Ok(sql)
2635 })
2636 .collect()
2637 }
2638}
2639
2640// Transpile-only methods: cross-dialect normalization and helpers
2641#[cfg(feature = "transpile")]
2642impl Dialect {
2643 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2644 /// Converts:
2645 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2646 /// To:
2647 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2648 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2649 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2650 use crate::expressions::*;
2651 transform_recursive(expr, &|e| {
2652 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2653 if let Expression::ArraySize(ref af) = e {
2654 if let Expression::Function(ref f) = af.this {
2655 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2656 let result = Self::convert_array_size_gda_snowflake(f)?;
2657 return Ok(result);
2658 }
2659 }
2660 }
2661
2662 let Expression::Select(mut sel) = e else {
2663 return Ok(e);
2664 };
2665
2666 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2667 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2668 let mut gda_join_idx: Option<usize> = None;
2669
2670 for (idx, join) in sel.joins.iter().enumerate() {
2671 // The join.this may be:
2672 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2673 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2674 let (unnest_ref, alias_name) = match &join.this {
2675 Expression::Unnest(ref unnest) => {
2676 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2677 (Some(unnest.as_ref()), alias)
2678 }
2679 Expression::Alias(ref a) => {
2680 if let Expression::Unnest(ref unnest) = a.this {
2681 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2682 } else {
2683 (None, None)
2684 }
2685 }
2686 _ => (None, None),
2687 };
2688
2689 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2690 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2691 if let Expression::Function(ref f) = unnest.this {
2692 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2693 let start_expr = f.args[0].clone();
2694 let end_expr = f.args[1].clone();
2695 let step = f.args.get(2).cloned();
2696
2697 // Extract unit from step interval
2698 let unit = if let Some(Expression::Interval(ref iv)) = step {
2699 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2700 Some(format!("{:?}", unit).to_uppercase())
2701 } else if let Some(ref this) = iv.this {
2702 // The interval may be stored as a string like "1 MONTH"
2703 if let Expression::Literal(Literal::String(ref s)) = this {
2704 let parts: Vec<&str> = s.split_whitespace().collect();
2705 if parts.len() == 2 {
2706 Some(parts[1].to_uppercase())
2707 } else if parts.len() == 1 {
2708 // Single word like "MONTH" or just "1"
2709 let upper = parts[0].to_uppercase();
2710 if matches!(
2711 upper.as_str(),
2712 "YEAR"
2713 | "QUARTER"
2714 | "MONTH"
2715 | "WEEK"
2716 | "DAY"
2717 | "HOUR"
2718 | "MINUTE"
2719 | "SECOND"
2720 ) {
2721 Some(upper)
2722 } else {
2723 None
2724 }
2725 } else {
2726 None
2727 }
2728 } else {
2729 None
2730 }
2731 } else {
2732 None
2733 }
2734 } else {
2735 None
2736 };
2737
2738 if let Some(unit_str) = unit {
2739 gda_info = Some((alias, start_expr, end_expr, unit_str));
2740 gda_join_idx = Some(idx);
2741 }
2742 }
2743 }
2744 }
2745 if gda_info.is_some() {
2746 break;
2747 }
2748 }
2749
2750 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2751 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2752 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2753 let result = Self::try_transform_from_gda_snowflake(sel);
2754 return result;
2755 };
2756 let join_idx = gda_join_idx.unwrap();
2757
2758 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2759 let datediff = Expression::Function(Box::new(Function::new(
2760 "DATEDIFF".to_string(),
2761 vec![
2762 Expression::Column(Column {
2763 name: Identifier::new(&unit_str),
2764 table: None,
2765 join_mark: false,
2766 trailing_comments: vec![],
2767 span: None,
2768 }),
2769 start_expr.clone(),
2770 end_expr.clone(),
2771 ],
2772 )));
2773 // (DATEDIFF(...) + 1 - 1) + 1
2774 let plus_one = Expression::Add(Box::new(BinaryOp {
2775 left: datediff,
2776 right: Expression::Literal(Literal::Number("1".to_string())),
2777 left_comments: vec![],
2778 operator_comments: vec![],
2779 trailing_comments: vec![],
2780 }));
2781 let minus_one = Expression::Sub(Box::new(BinaryOp {
2782 left: plus_one,
2783 right: Expression::Literal(Literal::Number("1".to_string())),
2784 left_comments: vec![],
2785 operator_comments: vec![],
2786 trailing_comments: vec![],
2787 }));
2788 let paren_inner = Expression::Paren(Box::new(Paren {
2789 this: minus_one,
2790 trailing_comments: vec![],
2791 }));
2792 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2793 left: paren_inner,
2794 right: Expression::Literal(Literal::Number("1".to_string())),
2795 left_comments: vec![],
2796 operator_comments: vec![],
2797 trailing_comments: vec![],
2798 }));
2799
2800 let array_gen_range = Expression::Function(Box::new(Function::new(
2801 "ARRAY_GENERATE_RANGE".to_string(),
2802 vec![
2803 Expression::Literal(Literal::Number("0".to_string())),
2804 outer_plus_one,
2805 ],
2806 )));
2807
2808 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2809 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2810 name: Identifier::new("INPUT"),
2811 value: array_gen_range,
2812 separator: crate::expressions::NamedArgSeparator::DArrow,
2813 }));
2814 let flatten = Expression::Function(Box::new(Function::new(
2815 "FLATTEN".to_string(),
2816 vec![flatten_input],
2817 )));
2818
2819 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2820 let alias_table = Alias {
2821 this: flatten,
2822 alias: Identifier::new("_t0"),
2823 column_aliases: vec![
2824 Identifier::new("seq"),
2825 Identifier::new("key"),
2826 Identifier::new("path"),
2827 Identifier::new("index"),
2828 Identifier::new(&alias_name),
2829 Identifier::new("this"),
2830 ],
2831 pre_alias_comments: vec![],
2832 trailing_comments: vec![],
2833 };
2834 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2835 this: Box::new(Expression::Alias(Box::new(alias_table))),
2836 view: None,
2837 outer: None,
2838 alias: None,
2839 alias_quoted: false,
2840 cross_apply: None,
2841 ordinality: None,
2842 column_aliases: vec![],
2843 }));
2844
2845 // Remove the original join and add to FROM expressions
2846 sel.joins.remove(join_idx);
2847 if let Some(ref mut from) = sel.from {
2848 from.expressions.push(lateral_expr);
2849 }
2850
2851 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2852 let dateadd_expr = Expression::Function(Box::new(Function::new(
2853 "DATEADD".to_string(),
2854 vec![
2855 Expression::Column(Column {
2856 name: Identifier::new(&unit_str),
2857 table: None,
2858 join_mark: false,
2859 trailing_comments: vec![],
2860 span: None,
2861 }),
2862 Expression::Cast(Box::new(Cast {
2863 this: Expression::Column(Column {
2864 name: Identifier::new(&alias_name),
2865 table: None,
2866 join_mark: false,
2867 trailing_comments: vec![],
2868 span: None,
2869 }),
2870 to: DataType::Int {
2871 length: None,
2872 integer_spelling: false,
2873 },
2874 trailing_comments: vec![],
2875 double_colon_syntax: false,
2876 format: None,
2877 default: None,
2878 })),
2879 Expression::Cast(Box::new(Cast {
2880 this: start_expr.clone(),
2881 to: DataType::Date,
2882 trailing_comments: vec![],
2883 double_colon_syntax: false,
2884 format: None,
2885 default: None,
2886 })),
2887 ],
2888 )));
2889
2890 // Replace references to the alias in the SELECT list
2891 let new_exprs: Vec<Expression> = sel
2892 .expressions
2893 .iter()
2894 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2895 .collect();
2896 sel.expressions = new_exprs;
2897
2898 Ok(Expression::Select(sel))
2899 })
2900 }
2901
2902 /// Helper: replace column references to `alias_name` with dateadd expression
2903 fn replace_column_ref_with_dateadd(
2904 expr: &Expression,
2905 alias_name: &str,
2906 dateadd: &Expression,
2907 ) -> Expression {
2908 use crate::expressions::*;
2909 match expr {
2910 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2911 // Plain column reference -> DATEADD(...) AS alias_name
2912 Expression::Alias(Box::new(Alias {
2913 this: dateadd.clone(),
2914 alias: Identifier::new(alias_name),
2915 column_aliases: vec![],
2916 pre_alias_comments: vec![],
2917 trailing_comments: vec![],
2918 }))
2919 }
2920 Expression::Alias(a) => {
2921 // Check if the inner expression references the alias
2922 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2923 Expression::Alias(Box::new(Alias {
2924 this: new_this,
2925 alias: a.alias.clone(),
2926 column_aliases: a.column_aliases.clone(),
2927 pre_alias_comments: a.pre_alias_comments.clone(),
2928 trailing_comments: a.trailing_comments.clone(),
2929 }))
2930 }
2931 _ => expr.clone(),
2932 }
2933 }
2934
2935 /// Helper: replace column references in inner expression (not top-level)
2936 fn replace_column_ref_inner(
2937 expr: &Expression,
2938 alias_name: &str,
2939 dateadd: &Expression,
2940 ) -> Expression {
2941 use crate::expressions::*;
2942 match expr {
2943 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2944 dateadd.clone()
2945 }
2946 Expression::Add(op) => {
2947 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2948 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2949 Expression::Add(Box::new(BinaryOp {
2950 left,
2951 right,
2952 left_comments: op.left_comments.clone(),
2953 operator_comments: op.operator_comments.clone(),
2954 trailing_comments: op.trailing_comments.clone(),
2955 }))
2956 }
2957 Expression::Sub(op) => {
2958 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2959 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2960 Expression::Sub(Box::new(BinaryOp {
2961 left,
2962 right,
2963 left_comments: op.left_comments.clone(),
2964 operator_comments: op.operator_comments.clone(),
2965 trailing_comments: op.trailing_comments.clone(),
2966 }))
2967 }
2968 Expression::Mul(op) => {
2969 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2970 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2971 Expression::Mul(Box::new(BinaryOp {
2972 left,
2973 right,
2974 left_comments: op.left_comments.clone(),
2975 operator_comments: op.operator_comments.clone(),
2976 trailing_comments: op.trailing_comments.clone(),
2977 }))
2978 }
2979 _ => expr.clone(),
2980 }
2981 }
2982
2983 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
2984 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
2985 fn try_transform_from_gda_snowflake(
2986 mut sel: Box<crate::expressions::Select>,
2987 ) -> Result<Expression> {
2988 use crate::expressions::*;
2989
2990 // Extract GDA info from FROM clause
2991 let mut gda_info: Option<(
2992 usize,
2993 String,
2994 Expression,
2995 Expression,
2996 String,
2997 Option<(String, Vec<Identifier>)>,
2998 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
2999
3000 if let Some(ref from) = sel.from {
3001 for (idx, table_expr) in from.expressions.iter().enumerate() {
3002 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3003 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3004 let (unnest_opt, outer_alias_info) = match table_expr {
3005 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3006 Expression::Alias(ref a) => {
3007 if let Expression::Unnest(ref unnest) = a.this {
3008 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3009 (Some(unnest.as_ref()), Some(alias_info))
3010 } else {
3011 (None, None)
3012 }
3013 }
3014 _ => (None, None),
3015 };
3016
3017 if let Some(unnest) = unnest_opt {
3018 // Check for GENERATE_DATE_ARRAY function
3019 let func_opt = match &unnest.this {
3020 Expression::Function(ref f)
3021 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3022 && f.args.len() >= 2 =>
3023 {
3024 Some(f)
3025 }
3026 // Also check for GenerateSeries (from earlier normalization)
3027 _ => None,
3028 };
3029
3030 if let Some(f) = func_opt {
3031 let start_expr = f.args[0].clone();
3032 let end_expr = f.args[1].clone();
3033 let step = f.args.get(2).cloned();
3034
3035 // Extract unit and column name
3036 let unit = Self::extract_interval_unit_str(&step);
3037 let col_name = outer_alias_info
3038 .as_ref()
3039 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3040 .unwrap_or_else(|| "value".to_string());
3041
3042 if let Some(unit_str) = unit {
3043 gda_info = Some((
3044 idx,
3045 col_name,
3046 start_expr,
3047 end_expr,
3048 unit_str,
3049 outer_alias_info,
3050 ));
3051 break;
3052 }
3053 }
3054 }
3055 }
3056 }
3057
3058 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3059 else {
3060 return Ok(Expression::Select(sel));
3061 };
3062
3063 // Build the Snowflake subquery:
3064 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3065 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3066
3067 // DATEDIFF(unit, start, end)
3068 let datediff = Expression::Function(Box::new(Function::new(
3069 "DATEDIFF".to_string(),
3070 vec![
3071 Expression::Column(Column {
3072 name: Identifier::new(&unit_str),
3073 table: None,
3074 join_mark: false,
3075 trailing_comments: vec![],
3076 span: None,
3077 }),
3078 start_expr.clone(),
3079 end_expr.clone(),
3080 ],
3081 )));
3082 // (DATEDIFF(...) + 1 - 1) + 1
3083 let plus_one = Expression::Add(Box::new(BinaryOp {
3084 left: datediff,
3085 right: Expression::Literal(Literal::Number("1".to_string())),
3086 left_comments: vec![],
3087 operator_comments: vec![],
3088 trailing_comments: vec![],
3089 }));
3090 let minus_one = Expression::Sub(Box::new(BinaryOp {
3091 left: plus_one,
3092 right: Expression::Literal(Literal::Number("1".to_string())),
3093 left_comments: vec![],
3094 operator_comments: vec![],
3095 trailing_comments: vec![],
3096 }));
3097 let paren_inner = Expression::Paren(Box::new(Paren {
3098 this: minus_one,
3099 trailing_comments: vec![],
3100 }));
3101 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3102 left: paren_inner,
3103 right: Expression::Literal(Literal::Number("1".to_string())),
3104 left_comments: vec![],
3105 operator_comments: vec![],
3106 trailing_comments: vec![],
3107 }));
3108
3109 let array_gen_range = Expression::Function(Box::new(Function::new(
3110 "ARRAY_GENERATE_RANGE".to_string(),
3111 vec![
3112 Expression::Literal(Literal::Number("0".to_string())),
3113 outer_plus_one,
3114 ],
3115 )));
3116
3117 // TABLE(FLATTEN(INPUT => ...))
3118 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3119 name: Identifier::new("INPUT"),
3120 value: array_gen_range,
3121 separator: crate::expressions::NamedArgSeparator::DArrow,
3122 }));
3123 let flatten = Expression::Function(Box::new(Function::new(
3124 "FLATTEN".to_string(),
3125 vec![flatten_input],
3126 )));
3127
3128 // Determine alias name for the table: use outer alias or _t0
3129 let table_alias_name = outer_alias_info
3130 .as_ref()
3131 .map(|(name, _)| name.clone())
3132 .unwrap_or_else(|| "_t0".to_string());
3133
3134 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3135 let table_func =
3136 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3137 let flatten_aliased = Expression::Alias(Box::new(Alias {
3138 this: table_func,
3139 alias: Identifier::new(&table_alias_name),
3140 column_aliases: vec![
3141 Identifier::new("seq"),
3142 Identifier::new("key"),
3143 Identifier::new("path"),
3144 Identifier::new("index"),
3145 Identifier::new(&col_name),
3146 Identifier::new("this"),
3147 ],
3148 pre_alias_comments: vec![],
3149 trailing_comments: vec![],
3150 }));
3151
3152 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3153 let dateadd_expr = Expression::Function(Box::new(Function::new(
3154 "DATEADD".to_string(),
3155 vec![
3156 Expression::Column(Column {
3157 name: Identifier::new(&unit_str),
3158 table: None,
3159 join_mark: false,
3160 trailing_comments: vec![],
3161 span: None,
3162 }),
3163 Expression::Cast(Box::new(Cast {
3164 this: Expression::Column(Column {
3165 name: Identifier::new(&col_name),
3166 table: None,
3167 join_mark: false,
3168 trailing_comments: vec![],
3169 span: None,
3170 }),
3171 to: DataType::Int {
3172 length: None,
3173 integer_spelling: false,
3174 },
3175 trailing_comments: vec![],
3176 double_colon_syntax: false,
3177 format: None,
3178 default: None,
3179 })),
3180 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3181 start_expr.clone(),
3182 ],
3183 )));
3184 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3185 this: dateadd_expr,
3186 alias: Identifier::new(&col_name),
3187 column_aliases: vec![],
3188 pre_alias_comments: vec![],
3189 trailing_comments: vec![],
3190 }));
3191
3192 // Build inner SELECT
3193 let mut inner_select = Select::new();
3194 inner_select.expressions = vec![dateadd_aliased];
3195 inner_select.from = Some(From {
3196 expressions: vec![flatten_aliased],
3197 });
3198
3199 let inner_select_expr = Expression::Select(Box::new(inner_select));
3200 let subquery = Expression::Subquery(Box::new(Subquery {
3201 this: inner_select_expr,
3202 alias: None,
3203 column_aliases: vec![],
3204 order_by: None,
3205 limit: None,
3206 offset: None,
3207 distribute_by: None,
3208 sort_by: None,
3209 cluster_by: None,
3210 lateral: false,
3211 modifiers_inside: false,
3212 trailing_comments: vec![],
3213 }));
3214
3215 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3216 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3217 Expression::Alias(Box::new(Alias {
3218 this: subquery,
3219 alias: Identifier::new(&alias_name),
3220 column_aliases: col_aliases,
3221 pre_alias_comments: vec![],
3222 trailing_comments: vec![],
3223 }))
3224 } else {
3225 subquery
3226 };
3227
3228 // Replace the FROM expression
3229 if let Some(ref mut from) = sel.from {
3230 from.expressions[from_idx] = replacement;
3231 }
3232
3233 Ok(Expression::Select(sel))
3234 }
3235
3236 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3237 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3238 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3239 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3240 use crate::expressions::*;
3241
3242 let start_expr = f.args[0].clone();
3243 let end_expr = f.args[1].clone();
3244 let step = f.args.get(2).cloned();
3245 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3246 let col_name = "value";
3247
3248 // Build the inner subquery: same as try_transform_from_gda_snowflake
3249 let datediff = Expression::Function(Box::new(Function::new(
3250 "DATEDIFF".to_string(),
3251 vec![
3252 Expression::Column(Column {
3253 name: Identifier::new(&unit_str),
3254 table: None,
3255 join_mark: false,
3256 trailing_comments: vec![],
3257 span: None,
3258 }),
3259 start_expr.clone(),
3260 end_expr.clone(),
3261 ],
3262 )));
3263 let plus_one = Expression::Add(Box::new(BinaryOp {
3264 left: datediff,
3265 right: Expression::Literal(Literal::Number("1".to_string())),
3266 left_comments: vec![],
3267 operator_comments: vec![],
3268 trailing_comments: vec![],
3269 }));
3270 let minus_one = Expression::Sub(Box::new(BinaryOp {
3271 left: plus_one,
3272 right: Expression::Literal(Literal::Number("1".to_string())),
3273 left_comments: vec![],
3274 operator_comments: vec![],
3275 trailing_comments: vec![],
3276 }));
3277 let paren_inner = Expression::Paren(Box::new(Paren {
3278 this: minus_one,
3279 trailing_comments: vec![],
3280 }));
3281 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3282 left: paren_inner,
3283 right: Expression::Literal(Literal::Number("1".to_string())),
3284 left_comments: vec![],
3285 operator_comments: vec![],
3286 trailing_comments: vec![],
3287 }));
3288
3289 let array_gen_range = Expression::Function(Box::new(Function::new(
3290 "ARRAY_GENERATE_RANGE".to_string(),
3291 vec![
3292 Expression::Literal(Literal::Number("0".to_string())),
3293 outer_plus_one,
3294 ],
3295 )));
3296
3297 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3298 name: Identifier::new("INPUT"),
3299 value: array_gen_range,
3300 separator: crate::expressions::NamedArgSeparator::DArrow,
3301 }));
3302 let flatten = Expression::Function(Box::new(Function::new(
3303 "FLATTEN".to_string(),
3304 vec![flatten_input],
3305 )));
3306
3307 let table_func =
3308 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3309 let flatten_aliased = Expression::Alias(Box::new(Alias {
3310 this: table_func,
3311 alias: Identifier::new("_t0"),
3312 column_aliases: vec![
3313 Identifier::new("seq"),
3314 Identifier::new("key"),
3315 Identifier::new("path"),
3316 Identifier::new("index"),
3317 Identifier::new(col_name),
3318 Identifier::new("this"),
3319 ],
3320 pre_alias_comments: vec![],
3321 trailing_comments: vec![],
3322 }));
3323
3324 let dateadd_expr = Expression::Function(Box::new(Function::new(
3325 "DATEADD".to_string(),
3326 vec![
3327 Expression::Column(Column {
3328 name: Identifier::new(&unit_str),
3329 table: None,
3330 join_mark: false,
3331 trailing_comments: vec![],
3332 span: None,
3333 }),
3334 Expression::Cast(Box::new(Cast {
3335 this: Expression::Column(Column {
3336 name: Identifier::new(col_name),
3337 table: None,
3338 join_mark: false,
3339 trailing_comments: vec![],
3340 span: None,
3341 }),
3342 to: DataType::Int {
3343 length: None,
3344 integer_spelling: false,
3345 },
3346 trailing_comments: vec![],
3347 double_colon_syntax: false,
3348 format: None,
3349 default: None,
3350 })),
3351 start_expr.clone(),
3352 ],
3353 )));
3354 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3355 this: dateadd_expr,
3356 alias: Identifier::new(col_name),
3357 column_aliases: vec![],
3358 pre_alias_comments: vec![],
3359 trailing_comments: vec![],
3360 }));
3361
3362 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3363 let mut inner_select = Select::new();
3364 inner_select.expressions = vec![dateadd_aliased];
3365 inner_select.from = Some(From {
3366 expressions: vec![flatten_aliased],
3367 });
3368
3369 // Wrap in subquery for the inner part
3370 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3371 this: Expression::Select(Box::new(inner_select)),
3372 alias: None,
3373 column_aliases: vec![],
3374 order_by: None,
3375 limit: None,
3376 offset: None,
3377 distribute_by: None,
3378 sort_by: None,
3379 cluster_by: None,
3380 lateral: false,
3381 modifiers_inside: false,
3382 trailing_comments: vec![],
3383 }));
3384
3385 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3386 let star = Expression::Star(Star {
3387 table: None,
3388 except: None,
3389 replace: None,
3390 rename: None,
3391 trailing_comments: vec![],
3392 span: None,
3393 });
3394 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3395 this: star,
3396 distinct: false,
3397 filter: None,
3398 order_by: vec![],
3399 name: Some("ARRAY_AGG".to_string()),
3400 ignore_nulls: None,
3401 having_max: None,
3402 limit: None,
3403 }));
3404
3405 let mut outer_select = Select::new();
3406 outer_select.expressions = vec![array_agg];
3407 outer_select.from = Some(From {
3408 expressions: vec![inner_subquery],
3409 });
3410
3411 // Wrap in a subquery
3412 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3413 this: Expression::Select(Box::new(outer_select)),
3414 alias: None,
3415 column_aliases: vec![],
3416 order_by: None,
3417 limit: None,
3418 offset: None,
3419 distribute_by: None,
3420 sort_by: None,
3421 cluster_by: None,
3422 lateral: false,
3423 modifiers_inside: false,
3424 trailing_comments: vec![],
3425 }));
3426
3427 // ARRAY_SIZE(subquery)
3428 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3429 outer_subquery,
3430 ))))
3431 }
3432
3433 /// Extract interval unit string from an optional step expression.
3434 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3435 use crate::expressions::*;
3436 if let Some(Expression::Interval(ref iv)) = step {
3437 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3438 return Some(format!("{:?}", unit).to_uppercase());
3439 }
3440 if let Some(ref this) = iv.this {
3441 if let Expression::Literal(Literal::String(ref s)) = this {
3442 let parts: Vec<&str> = s.split_whitespace().collect();
3443 if parts.len() == 2 {
3444 return Some(parts[1].to_uppercase());
3445 } else if parts.len() == 1 {
3446 let upper = parts[0].to_uppercase();
3447 if matches!(
3448 upper.as_str(),
3449 "YEAR"
3450 | "QUARTER"
3451 | "MONTH"
3452 | "WEEK"
3453 | "DAY"
3454 | "HOUR"
3455 | "MINUTE"
3456 | "SECOND"
3457 ) {
3458 return Some(upper);
3459 }
3460 }
3461 }
3462 }
3463 }
3464 // Default to DAY if no step or no interval
3465 if step.is_none() {
3466 return Some("DAY".to_string());
3467 }
3468 None
3469 }
3470
3471 fn normalize_snowflake_pretty(mut sql: String) -> String {
3472 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3473 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3474 {
3475 sql = sql.replace(
3476 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3477 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3478 );
3479
3480 sql = sql.replace(
3481 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3482 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3483 );
3484
3485 sql = sql.replace(
3486 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3487 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3488 );
3489 }
3490
3491 sql
3492 }
3493
3494 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3495 /// This handles cases where the same syntax has different semantics across dialects.
3496 fn cross_dialect_normalize(
3497 expr: Expression,
3498 source: DialectType,
3499 target: DialectType,
3500 ) -> Result<Expression> {
3501 use crate::expressions::{
3502 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3503 Function, Identifier, IsNull, Literal, Null, Paren,
3504 };
3505
3506 // Helper to tag which kind of transform to apply
3507 #[derive(Debug)]
3508 enum Action {
3509 None,
3510 GreatestLeastNull,
3511 ArrayGenerateRange,
3512 Div0TypedDivision,
3513 ArrayAggCollectList,
3514 ArrayAggWithinGroupFilter,
3515 ArrayAggFilter,
3516 CastTimestampToDatetime,
3517 DateTruncWrapCast,
3518 ToDateToCast,
3519 ConvertTimezoneToExpr,
3520 SetToVariable,
3521 RegexpReplaceSnowflakeToDuckDB,
3522 BigQueryFunctionNormalize,
3523 BigQuerySafeDivide,
3524 BigQueryCastType,
3525 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3526 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3527 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3528 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3529 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3530 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3531 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3532 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3533 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3534 EpochConvert, // Expression::Epoch -> target-specific epoch function
3535 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3536 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3537 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3538 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3539 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3540 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3541 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3542 TempTableHash, // TSQL #table -> temp table normalization
3543 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3544 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3545 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3546 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3547 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3548 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3549 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3550 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3551 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3552 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3553 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3554 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3555 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3556 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3557 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3558 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3559 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3560 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3561 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3562 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3563 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3564 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3565 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3566 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3567 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3568 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3569 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3570 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3571 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3572 DollarParamConvert, // $foo -> @foo for BigQuery
3573 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3574 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3575 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3576 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3577 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3578 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3579 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3580 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3581 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3582 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3583 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3584 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3585 RespectNullsConvert, // RESPECT NULLS window function handling
3586 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3587 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3588 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3589 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3590 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3591 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3592 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3593 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3594 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3595 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3596 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3597 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3598 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3599 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3600 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3601 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3602 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3603 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3604 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3605 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3606 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3607 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3608 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3609 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3610 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3611 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3612 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3613 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3614 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3615 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3616 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3617 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3618 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3619 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3620 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3621 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3622 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3623 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3624 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3625 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3626 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3627 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3628 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3629 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3630 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3631 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3632 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3633 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3634 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3635 ArraySumConvert, // ARRAY_SUM -> target-specific
3636 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3637 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3638 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3639 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3640 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3641 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3642 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3643 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3644 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3645 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3646 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3647 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3648 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3649 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3650 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3651 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3652 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3653 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3654 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3655 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3656 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3657 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3658 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3659 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3660 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3661 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3662 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3663 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3664 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3665 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3666 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3667 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3668 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3669 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3670 }
3671
3672 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3673 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3674 Self::transform_select_into(expr, source, target)
3675 } else {
3676 expr
3677 };
3678
3679 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3680 let expr = if !matches!(
3681 target,
3682 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3683 ) {
3684 if let Expression::Select(mut select) = expr {
3685 if let Some(ref mut offset) = select.offset {
3686 offset.rows = None;
3687 }
3688 Expression::Select(select)
3689 } else {
3690 expr
3691 }
3692 } else {
3693 expr
3694 };
3695
3696 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3697 let expr = if matches!(target, DialectType::Oracle) {
3698 if let Expression::Select(mut select) = expr {
3699 if let Some(limit) = select.limit.take() {
3700 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3701 select.fetch = Some(crate::expressions::Fetch {
3702 direction: "FIRST".to_string(),
3703 count: Some(limit.this),
3704 percent: false,
3705 rows: true,
3706 with_ties: false,
3707 });
3708 }
3709 // Add ROWS to OFFSET if present
3710 if let Some(ref mut offset) = select.offset {
3711 offset.rows = Some(true);
3712 }
3713 Expression::Select(select)
3714 } else {
3715 expr
3716 }
3717 } else {
3718 expr
3719 };
3720
3721 // Handle CreateTable WITH properties transformation before recursive transforms
3722 let expr = if let Expression::CreateTable(mut ct) = expr {
3723 Self::transform_create_table_properties(&mut ct, source, target);
3724
3725 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3726 // When the PARTITIONED BY clause contains column definitions, merge them into the
3727 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3728 if matches!(
3729 source,
3730 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3731 ) {
3732 let mut partition_col_names: Vec<String> = Vec::new();
3733 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3734 let mut has_col_def_partitions = false;
3735
3736 // Check if any PARTITIONED BY property contains ColumnDef expressions
3737 for prop in &ct.properties {
3738 if let Expression::PartitionedByProperty(ref pbp) = prop {
3739 if let Expression::Tuple(ref tuple) = *pbp.this {
3740 for expr in &tuple.expressions {
3741 if let Expression::ColumnDef(ref cd) = expr {
3742 has_col_def_partitions = true;
3743 partition_col_names.push(cd.name.name.clone());
3744 partition_col_defs.push(*cd.clone());
3745 }
3746 }
3747 }
3748 }
3749 }
3750
3751 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3752 // Merge partition columns into main column list
3753 for cd in partition_col_defs {
3754 ct.columns.push(cd);
3755 }
3756
3757 // Replace PARTITIONED BY property with column-name-only version
3758 ct.properties
3759 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3760
3761 if matches!(
3762 target,
3763 DialectType::Presto | DialectType::Trino | DialectType::Athena
3764 ) {
3765 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3766 let array_elements: Vec<String> = partition_col_names
3767 .iter()
3768 .map(|n| format!("'{}'", n))
3769 .collect();
3770 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3771 ct.with_properties
3772 .push(("PARTITIONED_BY".to_string(), array_value));
3773 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3774 // Spark: PARTITIONED BY (y, z) - just column names
3775 let name_exprs: Vec<Expression> = partition_col_names
3776 .iter()
3777 .map(|n| {
3778 Expression::Column(crate::expressions::Column {
3779 name: crate::expressions::Identifier::new(n.clone()),
3780 table: None,
3781 join_mark: false,
3782 trailing_comments: Vec::new(),
3783 span: None,
3784 })
3785 })
3786 .collect();
3787 ct.properties.insert(
3788 0,
3789 Expression::PartitionedByProperty(Box::new(
3790 crate::expressions::PartitionedByProperty {
3791 this: Box::new(Expression::Tuple(Box::new(
3792 crate::expressions::Tuple {
3793 expressions: name_exprs,
3794 },
3795 ))),
3796 },
3797 )),
3798 );
3799 }
3800 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3801 }
3802
3803 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3804 // are handled by transform_create_table_properties which runs first
3805 }
3806
3807 // Strip LOCATION property for Presto/Trino (not supported)
3808 if matches!(
3809 target,
3810 DialectType::Presto | DialectType::Trino | DialectType::Athena
3811 ) {
3812 ct.properties
3813 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3814 }
3815
3816 // Strip table-level constraints for Spark/Hive/Databricks
3817 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3818 if matches!(
3819 target,
3820 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3821 ) {
3822 ct.constraints.retain(|c| {
3823 matches!(
3824 c,
3825 crate::expressions::TableConstraint::PrimaryKey { .. }
3826 | crate::expressions::TableConstraint::Like { .. }
3827 )
3828 });
3829 for constraint in &mut ct.constraints {
3830 if let crate::expressions::TableConstraint::PrimaryKey {
3831 columns,
3832 modifiers,
3833 ..
3834 } = constraint
3835 {
3836 // Strip ASC/DESC from column names
3837 for col in columns.iter_mut() {
3838 if col.name.ends_with(" ASC") {
3839 col.name = col.name[..col.name.len() - 4].to_string();
3840 } else if col.name.ends_with(" DESC") {
3841 col.name = col.name[..col.name.len() - 5].to_string();
3842 }
3843 }
3844 // Strip TSQL-specific modifiers
3845 modifiers.clustered = None;
3846 modifiers.with_options.clear();
3847 modifiers.on_filegroup = None;
3848 }
3849 }
3850 }
3851
3852 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3853 if matches!(target, DialectType::Databricks) {
3854 for col in &mut ct.columns {
3855 if col.auto_increment {
3856 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3857 col.data_type = crate::expressions::DataType::BigInt { length: None };
3858 }
3859 }
3860 }
3861 }
3862
3863 // Spark/Databricks: INTEGER -> INT in column definitions
3864 // Python sqlglot always outputs INT for Spark/Databricks
3865 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3866 for col in &mut ct.columns {
3867 if let crate::expressions::DataType::Int {
3868 integer_spelling, ..
3869 } = &mut col.data_type
3870 {
3871 *integer_spelling = false;
3872 }
3873 }
3874 }
3875
3876 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3877 if matches!(target, DialectType::Hive | DialectType::Spark) {
3878 for col in &mut ct.columns {
3879 // If nullable is explicitly true (NULL), change to None (omit it)
3880 if col.nullable == Some(true) {
3881 col.nullable = None;
3882 }
3883 // Also remove from constraints if stored there
3884 col.constraints
3885 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3886 }
3887 }
3888
3889 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3890 if ct.on_property.is_some()
3891 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3892 {
3893 ct.on_property = None;
3894 }
3895
3896 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3897 // Snowflake doesn't support typed arrays in DDL
3898 if matches!(target, DialectType::Snowflake) {
3899 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3900 if let crate::expressions::DataType::Array { .. } = dt {
3901 *dt = crate::expressions::DataType::Custom {
3902 name: "ARRAY".to_string(),
3903 };
3904 }
3905 }
3906 for col in &mut ct.columns {
3907 strip_array_type_params(&mut col.data_type);
3908 }
3909 }
3910
3911 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3912 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3913 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3914 if matches!(target, DialectType::PostgreSQL) {
3915 for col in &mut ct.columns {
3916 if col.auto_increment && !col.constraint_order.is_empty() {
3917 use crate::expressions::ConstraintType;
3918 let has_explicit_not_null = col
3919 .constraint_order
3920 .iter()
3921 .any(|ct| *ct == ConstraintType::NotNull);
3922
3923 if has_explicit_not_null {
3924 // Source had explicit NOT NULL - preserve original order
3925 // Just ensure nullable is set
3926 if col.nullable != Some(false) {
3927 col.nullable = Some(false);
3928 }
3929 } else {
3930 // Source didn't have explicit NOT NULL - build order with
3931 // AutoIncrement + NotNull first, then remaining constraints
3932 let mut new_order = Vec::new();
3933 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3934 new_order.push(ConstraintType::AutoIncrement);
3935 new_order.push(ConstraintType::NotNull);
3936 // Add remaining constraints in original order (except AutoIncrement)
3937 for ct_type in &col.constraint_order {
3938 if *ct_type != ConstraintType::AutoIncrement {
3939 new_order.push(ct_type.clone());
3940 }
3941 }
3942 col.constraint_order = new_order;
3943 col.nullable = Some(false);
3944 }
3945 }
3946 }
3947 }
3948
3949 Expression::CreateTable(ct)
3950 } else {
3951 expr
3952 };
3953
3954 // Handle CreateView column stripping for Presto/Trino target
3955 let expr = if let Expression::CreateView(mut cv) = expr {
3956 // Presto/Trino: drop column list when view has a SELECT body
3957 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
3958 {
3959 if !matches!(&cv.query, Expression::Null(_)) {
3960 cv.columns.clear();
3961 }
3962 }
3963 Expression::CreateView(cv)
3964 } else {
3965 expr
3966 };
3967
3968 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
3969 let expr = if !matches!(
3970 target,
3971 DialectType::Presto | DialectType::Trino | DialectType::Athena
3972 ) {
3973 if let Expression::Select(mut select) = expr {
3974 if let Some(ref mut with) = select.with {
3975 for cte in &mut with.ctes {
3976 if let Expression::Values(ref vals) = cte.this {
3977 // Build: SELECT * FROM (VALUES ...) AS _values
3978 let values_subquery =
3979 Expression::Subquery(Box::new(crate::expressions::Subquery {
3980 this: Expression::Values(vals.clone()),
3981 alias: Some(Identifier::new("_values".to_string())),
3982 column_aliases: Vec::new(),
3983 order_by: None,
3984 limit: None,
3985 offset: None,
3986 distribute_by: None,
3987 sort_by: None,
3988 cluster_by: None,
3989 lateral: false,
3990 modifiers_inside: false,
3991 trailing_comments: Vec::new(),
3992 }));
3993 let mut new_select = crate::expressions::Select::new();
3994 new_select.expressions =
3995 vec![Expression::Star(crate::expressions::Star {
3996 table: None,
3997 except: None,
3998 replace: None,
3999 rename: None,
4000 trailing_comments: Vec::new(),
4001 span: None,
4002 })];
4003 new_select.from = Some(crate::expressions::From {
4004 expressions: vec![values_subquery],
4005 });
4006 cte.this = Expression::Select(Box::new(new_select));
4007 }
4008 }
4009 }
4010 Expression::Select(select)
4011 } else {
4012 expr
4013 }
4014 } else {
4015 expr
4016 };
4017
4018 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4019 let expr = if matches!(target, DialectType::PostgreSQL) {
4020 if let Expression::CreateIndex(mut ci) = expr {
4021 for col in &mut ci.columns {
4022 if col.nulls_first.is_none() {
4023 col.nulls_first = Some(true);
4024 }
4025 }
4026 Expression::CreateIndex(ci)
4027 } else {
4028 expr
4029 }
4030 } else {
4031 expr
4032 };
4033
4034 transform_recursive(expr, &|e| {
4035 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4036 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4037 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4038 if let Expression::Cast(ref c) = e {
4039 // Check if this is a CAST of an array to a struct array type
4040 let is_struct_array_cast =
4041 matches!(&c.to, crate::expressions::DataType::Array { .. });
4042 if is_struct_array_cast {
4043 let has_auto_named_structs = match &c.this {
4044 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4045 if let Expression::Struct(s) = elem {
4046 s.fields.iter().all(|(name, _)| {
4047 name.as_ref().map_or(true, |n| {
4048 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4049 })
4050 })
4051 } else {
4052 false
4053 }
4054 }),
4055 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4056 if let Expression::Struct(s) = elem {
4057 s.fields.iter().all(|(name, _)| {
4058 name.as_ref().map_or(true, |n| {
4059 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4060 })
4061 })
4062 } else {
4063 false
4064 }
4065 }),
4066 _ => false,
4067 };
4068 if has_auto_named_structs {
4069 let convert_struct_to_row = |elem: Expression| -> Expression {
4070 if let Expression::Struct(s) = elem {
4071 let row_args: Vec<Expression> =
4072 s.fields.into_iter().map(|(_, v)| v).collect();
4073 Expression::Function(Box::new(Function::new(
4074 "ROW".to_string(),
4075 row_args,
4076 )))
4077 } else {
4078 elem
4079 }
4080 };
4081 let mut c_clone = c.as_ref().clone();
4082 match &mut c_clone.this {
4083 Expression::Array(arr) => {
4084 arr.expressions = arr
4085 .expressions
4086 .drain(..)
4087 .map(convert_struct_to_row)
4088 .collect();
4089 }
4090 Expression::ArrayFunc(arr) => {
4091 arr.expressions = arr
4092 .expressions
4093 .drain(..)
4094 .map(convert_struct_to_row)
4095 .collect();
4096 }
4097 _ => {}
4098 }
4099 return Ok(Expression::Cast(Box::new(c_clone)));
4100 }
4101 }
4102 }
4103 }
4104
4105 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4106 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4107 if let Expression::Select(ref sel) = e {
4108 if sel.kind.as_deref() == Some("STRUCT") {
4109 let mut fields = Vec::new();
4110 for expr in &sel.expressions {
4111 match expr {
4112 Expression::Alias(a) => {
4113 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4114 }
4115 Expression::Column(c) => {
4116 fields.push((Some(c.name.name.clone()), expr.clone()));
4117 }
4118 _ => {
4119 fields.push((None, expr.clone()));
4120 }
4121 }
4122 }
4123 let struct_lit =
4124 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4125 let mut new_select = sel.as_ref().clone();
4126 new_select.kind = None;
4127 new_select.expressions = vec![struct_lit];
4128 return Ok(Expression::Select(Box::new(new_select)));
4129 }
4130 }
4131 }
4132
4133 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4134 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4135 && matches!(
4136 target,
4137 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4138 )
4139 {
4140 if let Expression::Parameter(ref p) = e {
4141 if p.style == crate::expressions::ParameterStyle::At {
4142 if let Some(ref name) = p.name {
4143 return Ok(Expression::Parameter(Box::new(
4144 crate::expressions::Parameter {
4145 name: Some(name.clone()),
4146 index: p.index,
4147 style: crate::expressions::ParameterStyle::DollarBrace,
4148 quoted: p.quoted,
4149 string_quoted: p.string_quoted,
4150 expression: None,
4151 },
4152 )));
4153 }
4154 }
4155 }
4156 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4157 if let Expression::Column(ref col) = e {
4158 if col.name.name.starts_with('@') && col.table.is_none() {
4159 let var_name = col.name.name.trim_start_matches('@').to_string();
4160 return Ok(Expression::Parameter(Box::new(
4161 crate::expressions::Parameter {
4162 name: Some(var_name),
4163 index: None,
4164 style: crate::expressions::ParameterStyle::DollarBrace,
4165 quoted: false,
4166 string_quoted: false,
4167 expression: None,
4168 },
4169 )));
4170 }
4171 }
4172 }
4173
4174 // Convert @variable -> variable in SET statements for Spark/Databricks
4175 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4176 && matches!(target, DialectType::Spark | DialectType::Databricks)
4177 {
4178 if let Expression::SetStatement(ref s) = e {
4179 let mut new_items = s.items.clone();
4180 let mut changed = false;
4181 for item in &mut new_items {
4182 // Strip @ from the SET name (Parameter style)
4183 if let Expression::Parameter(ref p) = item.name {
4184 if p.style == crate::expressions::ParameterStyle::At {
4185 if let Some(ref name) = p.name {
4186 item.name = Expression::Identifier(Identifier::new(name));
4187 changed = true;
4188 }
4189 }
4190 }
4191 // Strip @ from the SET name (Identifier style - SET parser)
4192 if let Expression::Identifier(ref id) = item.name {
4193 if id.name.starts_with('@') {
4194 let var_name = id.name.trim_start_matches('@').to_string();
4195 item.name = Expression::Identifier(Identifier::new(&var_name));
4196 changed = true;
4197 }
4198 }
4199 // Strip @ from the SET name (Column style - alternative parsing)
4200 if let Expression::Column(ref col) = item.name {
4201 if col.name.name.starts_with('@') && col.table.is_none() {
4202 let var_name = col.name.name.trim_start_matches('@').to_string();
4203 item.name = Expression::Identifier(Identifier::new(&var_name));
4204 changed = true;
4205 }
4206 }
4207 }
4208 if changed {
4209 let mut new_set = (**s).clone();
4210 new_set.items = new_items;
4211 return Ok(Expression::SetStatement(Box::new(new_set)));
4212 }
4213 }
4214 }
4215
4216 // Strip NOLOCK hint for non-TSQL targets
4217 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4218 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4219 {
4220 if let Expression::Table(ref tr) = e {
4221 if !tr.hints.is_empty() {
4222 let mut new_tr = tr.clone();
4223 new_tr.hints.clear();
4224 return Ok(Expression::Table(new_tr));
4225 }
4226 }
4227 }
4228
4229 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4230 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4231 if matches!(target, DialectType::Snowflake) {
4232 if let Expression::IsTrue(ref itf) = e {
4233 if let Expression::Boolean(ref b) = itf.this {
4234 if !itf.not {
4235 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4236 value: b.value,
4237 }));
4238 } else {
4239 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4240 value: !b.value,
4241 }));
4242 }
4243 }
4244 }
4245 if let Expression::IsFalse(ref itf) = e {
4246 if let Expression::Boolean(ref b) = itf.this {
4247 if !itf.not {
4248 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4249 value: !b.value,
4250 }));
4251 } else {
4252 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4253 value: b.value,
4254 }));
4255 }
4256 }
4257 }
4258 }
4259
4260 // BigQuery: split dotted backtick identifiers in table names
4261 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4262 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4263 if let Expression::CreateTable(ref ct) = e {
4264 let mut changed = false;
4265 let mut new_ct = ct.clone();
4266 // Split the table name
4267 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4268 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4269 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4270 let was_quoted = ct.name.name.quoted;
4271 let mk_id = |s: &str| {
4272 if was_quoted {
4273 Identifier::quoted(s)
4274 } else {
4275 Identifier::new(s)
4276 }
4277 };
4278 if parts.len() == 3 {
4279 new_ct.name.catalog = Some(mk_id(parts[0]));
4280 new_ct.name.schema = Some(mk_id(parts[1]));
4281 new_ct.name.name = mk_id(parts[2]);
4282 changed = true;
4283 } else if parts.len() == 2 {
4284 new_ct.name.schema = Some(mk_id(parts[0]));
4285 new_ct.name.name = mk_id(parts[1]);
4286 changed = true;
4287 }
4288 }
4289 // Split the clone source name
4290 if let Some(ref clone_src) = ct.clone_source {
4291 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4292 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4293 let was_quoted = clone_src.name.quoted;
4294 let mk_id = |s: &str| {
4295 if was_quoted {
4296 Identifier::quoted(s)
4297 } else {
4298 Identifier::new(s)
4299 }
4300 };
4301 let mut new_src = clone_src.clone();
4302 if parts.len() == 3 {
4303 new_src.catalog = Some(mk_id(parts[0]));
4304 new_src.schema = Some(mk_id(parts[1]));
4305 new_src.name = mk_id(parts[2]);
4306 new_ct.clone_source = Some(new_src);
4307 changed = true;
4308 } else if parts.len() == 2 {
4309 new_src.schema = Some(mk_id(parts[0]));
4310 new_src.name = mk_id(parts[1]);
4311 new_ct.clone_source = Some(new_src);
4312 changed = true;
4313 }
4314 }
4315 }
4316 if changed {
4317 return Ok(Expression::CreateTable(new_ct));
4318 }
4319 }
4320 }
4321
4322 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4323 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4324 if matches!(source, DialectType::BigQuery)
4325 && matches!(
4326 target,
4327 DialectType::DuckDB
4328 | DialectType::Presto
4329 | DialectType::Trino
4330 | DialectType::Athena
4331 )
4332 {
4333 if let Expression::Subscript(ref sub) = e {
4334 let (new_index, is_safe) = match &sub.index {
4335 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4336 Expression::Literal(Literal::Number(n)) => {
4337 if let Ok(val) = n.parse::<i64>() {
4338 (
4339 Some(Expression::Literal(Literal::Number(
4340 (val + 1).to_string(),
4341 ))),
4342 false,
4343 )
4344 } else {
4345 (None, false)
4346 }
4347 }
4348 // OFFSET(n) -> n+1 (0-based)
4349 Expression::Function(ref f)
4350 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4351 {
4352 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4353 if let Ok(val) = n.parse::<i64>() {
4354 (
4355 Some(Expression::Literal(Literal::Number(
4356 (val + 1).to_string(),
4357 ))),
4358 false,
4359 )
4360 } else {
4361 (
4362 Some(Expression::Add(Box::new(
4363 crate::expressions::BinaryOp::new(
4364 f.args[0].clone(),
4365 Expression::number(1),
4366 ),
4367 ))),
4368 false,
4369 )
4370 }
4371 } else {
4372 (
4373 Some(Expression::Add(Box::new(
4374 crate::expressions::BinaryOp::new(
4375 f.args[0].clone(),
4376 Expression::number(1),
4377 ),
4378 ))),
4379 false,
4380 )
4381 }
4382 }
4383 // ORDINAL(n) -> n (already 1-based)
4384 Expression::Function(ref f)
4385 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4386 {
4387 (Some(f.args[0].clone()), false)
4388 }
4389 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4390 Expression::Function(ref f)
4391 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4392 {
4393 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4394 if let Ok(val) = n.parse::<i64>() {
4395 (
4396 Some(Expression::Literal(Literal::Number(
4397 (val + 1).to_string(),
4398 ))),
4399 true,
4400 )
4401 } else {
4402 (
4403 Some(Expression::Add(Box::new(
4404 crate::expressions::BinaryOp::new(
4405 f.args[0].clone(),
4406 Expression::number(1),
4407 ),
4408 ))),
4409 true,
4410 )
4411 }
4412 } else {
4413 (
4414 Some(Expression::Add(Box::new(
4415 crate::expressions::BinaryOp::new(
4416 f.args[0].clone(),
4417 Expression::number(1),
4418 ),
4419 ))),
4420 true,
4421 )
4422 }
4423 }
4424 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4425 Expression::Function(ref f)
4426 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4427 {
4428 (Some(f.args[0].clone()), true)
4429 }
4430 _ => (None, false),
4431 };
4432 if let Some(idx) = new_index {
4433 if is_safe
4434 && matches!(
4435 target,
4436 DialectType::Presto | DialectType::Trino | DialectType::Athena
4437 )
4438 {
4439 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4440 return Ok(Expression::Function(Box::new(Function::new(
4441 "ELEMENT_AT".to_string(),
4442 vec![sub.this.clone(), idx],
4443 ))));
4444 } else {
4445 // DuckDB or non-safe: just use subscript with converted index
4446 return Ok(Expression::Subscript(Box::new(
4447 crate::expressions::Subscript {
4448 this: sub.this.clone(),
4449 index: idx,
4450 },
4451 )));
4452 }
4453 }
4454 }
4455 }
4456
4457 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4458 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4459 if let Expression::Length(ref uf) = e {
4460 let arg = uf.this.clone();
4461 let typeof_func = Expression::Function(Box::new(Function::new(
4462 "TYPEOF".to_string(),
4463 vec![arg.clone()],
4464 )));
4465 let blob_cast = Expression::Cast(Box::new(Cast {
4466 this: arg.clone(),
4467 to: DataType::VarBinary { length: None },
4468 trailing_comments: vec![],
4469 double_colon_syntax: false,
4470 format: None,
4471 default: None,
4472 }));
4473 let octet_length = Expression::Function(Box::new(Function::new(
4474 "OCTET_LENGTH".to_string(),
4475 vec![blob_cast],
4476 )));
4477 let text_cast = Expression::Cast(Box::new(Cast {
4478 this: arg,
4479 to: DataType::Text,
4480 trailing_comments: vec![],
4481 double_colon_syntax: false,
4482 format: None,
4483 default: None,
4484 }));
4485 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4486 this: text_cast,
4487 original_name: None,
4488 }));
4489 return Ok(Expression::Case(Box::new(Case {
4490 operand: Some(typeof_func),
4491 whens: vec![(
4492 Expression::Literal(Literal::String("BLOB".to_string())),
4493 octet_length,
4494 )],
4495 else_: Some(length_text),
4496 comments: Vec::new(),
4497 })));
4498 }
4499 }
4500
4501 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4502 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4503 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4504 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4505 if let Expression::Alias(ref a) = e {
4506 if matches!(&a.this, Expression::Unnest(_)) {
4507 if a.column_aliases.is_empty() {
4508 // Drop the entire alias, return just the UNNEST expression
4509 return Ok(a.this.clone());
4510 } else {
4511 // Use first column alias as the main alias
4512 let mut new_alias = a.as_ref().clone();
4513 new_alias.alias = a.column_aliases[0].clone();
4514 new_alias.column_aliases.clear();
4515 return Ok(Expression::Alias(Box::new(new_alias)));
4516 }
4517 }
4518 }
4519 }
4520
4521 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4522 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4523 if let Expression::In(ref in_expr) = e {
4524 if let Some(ref unnest_inner) = in_expr.unnest {
4525 // Build the function call for the target dialect
4526 let func_expr = if matches!(
4527 target,
4528 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4529 ) {
4530 // Use EXPLODE for Hive/Spark
4531 Expression::Function(Box::new(Function::new(
4532 "EXPLODE".to_string(),
4533 vec![*unnest_inner.clone()],
4534 )))
4535 } else {
4536 // Use UNNEST for Presto/Trino/DuckDB/etc.
4537 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4538 this: *unnest_inner.clone(),
4539 expressions: Vec::new(),
4540 with_ordinality: false,
4541 alias: None,
4542 offset_alias: None,
4543 }))
4544 };
4545
4546 // Wrap in SELECT
4547 let mut inner_select = crate::expressions::Select::new();
4548 inner_select.expressions = vec![func_expr];
4549
4550 let subquery_expr = Expression::Select(Box::new(inner_select));
4551
4552 return Ok(Expression::In(Box::new(crate::expressions::In {
4553 this: in_expr.this.clone(),
4554 expressions: Vec::new(),
4555 query: Some(subquery_expr),
4556 not: in_expr.not,
4557 global: in_expr.global,
4558 unnest: None,
4559 is_field: false,
4560 })));
4561 }
4562 }
4563 }
4564
4565 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4566 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4567 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4568 if let Expression::Alias(ref a) = e {
4569 if let Expression::Function(ref f) = a.this {
4570 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4571 && !a.column_aliases.is_empty()
4572 {
4573 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4574 let col_alias = a.column_aliases[0].clone();
4575 let mut inner_select = crate::expressions::Select::new();
4576 inner_select.expressions =
4577 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4578 Expression::Identifier(Identifier::new("value".to_string())),
4579 col_alias,
4580 )))];
4581 inner_select.from = Some(crate::expressions::From {
4582 expressions: vec![a.this.clone()],
4583 });
4584 let subquery =
4585 Expression::Subquery(Box::new(crate::expressions::Subquery {
4586 this: Expression::Select(Box::new(inner_select)),
4587 alias: Some(a.alias.clone()),
4588 column_aliases: Vec::new(),
4589 order_by: None,
4590 limit: None,
4591 offset: None,
4592 lateral: false,
4593 modifiers_inside: false,
4594 trailing_comments: Vec::new(),
4595 distribute_by: None,
4596 sort_by: None,
4597 cluster_by: None,
4598 }));
4599 return Ok(subquery);
4600 }
4601 }
4602 }
4603 }
4604
4605 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4606 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4607 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4608 if matches!(source, DialectType::BigQuery) {
4609 if let Expression::Select(ref s) = e {
4610 if let Some(ref from) = s.from {
4611 if from.expressions.len() >= 2 {
4612 // Collect table names from first expression
4613 let first_tables: Vec<String> = from
4614 .expressions
4615 .iter()
4616 .take(1)
4617 .filter_map(|expr| {
4618 if let Expression::Table(t) = expr {
4619 Some(t.name.name.to_lowercase())
4620 } else {
4621 None
4622 }
4623 })
4624 .collect();
4625
4626 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4627 // or have a dotted name matching a table
4628 let mut needs_rewrite = false;
4629 for expr in from.expressions.iter().skip(1) {
4630 if let Expression::Table(t) = expr {
4631 if let Some(ref schema) = t.schema {
4632 if first_tables.contains(&schema.name.to_lowercase()) {
4633 needs_rewrite = true;
4634 break;
4635 }
4636 }
4637 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4638 if t.schema.is_none() && t.name.name.contains('.') {
4639 let parts: Vec<&str> = t.name.name.split('.').collect();
4640 if parts.len() >= 2
4641 && first_tables.contains(&parts[0].to_lowercase())
4642 {
4643 needs_rewrite = true;
4644 break;
4645 }
4646 }
4647 }
4648 }
4649
4650 if needs_rewrite {
4651 let mut new_select = s.clone();
4652 let mut new_from_exprs = vec![from.expressions[0].clone()];
4653 let mut new_joins = s.joins.clone();
4654
4655 for expr in from.expressions.iter().skip(1) {
4656 if let Expression::Table(ref t) = expr {
4657 if let Some(ref schema) = t.schema {
4658 if first_tables.contains(&schema.name.to_lowercase()) {
4659 // This is an array path reference, convert to CROSS JOIN UNNEST
4660 let col_expr = Expression::Column(
4661 crate::expressions::Column {
4662 name: t.name.clone(),
4663 table: Some(schema.clone()),
4664 join_mark: false,
4665 trailing_comments: vec![],
4666 span: None,
4667 },
4668 );
4669 let unnest_expr = Expression::Unnest(Box::new(
4670 crate::expressions::UnnestFunc {
4671 this: col_expr,
4672 expressions: Vec::new(),
4673 with_ordinality: false,
4674 alias: None,
4675 offset_alias: None,
4676 },
4677 ));
4678 let join_this = if let Some(ref alias) = t.alias {
4679 if matches!(
4680 target,
4681 DialectType::Presto
4682 | DialectType::Trino
4683 | DialectType::Athena
4684 ) {
4685 // Presto: UNNEST(x) AS _t0(results)
4686 Expression::Alias(Box::new(
4687 crate::expressions::Alias {
4688 this: unnest_expr,
4689 alias: Identifier::new("_t0"),
4690 column_aliases: vec![alias.clone()],
4691 pre_alias_comments: vec![],
4692 trailing_comments: vec![],
4693 },
4694 ))
4695 } else {
4696 // BigQuery: UNNEST(x) AS results
4697 Expression::Alias(Box::new(
4698 crate::expressions::Alias {
4699 this: unnest_expr,
4700 alias: alias.clone(),
4701 column_aliases: vec![],
4702 pre_alias_comments: vec![],
4703 trailing_comments: vec![],
4704 },
4705 ))
4706 }
4707 } else {
4708 unnest_expr
4709 };
4710 new_joins.push(crate::expressions::Join {
4711 kind: crate::expressions::JoinKind::Cross,
4712 this: join_this,
4713 on: None,
4714 using: Vec::new(),
4715 use_inner_keyword: false,
4716 use_outer_keyword: false,
4717 deferred_condition: false,
4718 join_hint: None,
4719 match_condition: None,
4720 pivots: Vec::new(),
4721 comments: Vec::new(),
4722 nesting_group: 0,
4723 directed: false,
4724 });
4725 } else {
4726 new_from_exprs.push(expr.clone());
4727 }
4728 } else if t.schema.is_none() && t.name.name.contains('.') {
4729 // Dotted name in quoted identifier: `Coordinates.position`
4730 let parts: Vec<&str> = t.name.name.split('.').collect();
4731 if parts.len() >= 2
4732 && first_tables.contains(&parts[0].to_lowercase())
4733 {
4734 let join_this =
4735 if matches!(target, DialectType::BigQuery) {
4736 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4737 Expression::Table(t.clone())
4738 } else {
4739 // Other targets: split into "schema"."name"
4740 let mut new_t = t.clone();
4741 new_t.schema =
4742 Some(Identifier::quoted(parts[0]));
4743 new_t.name = Identifier::quoted(parts[1]);
4744 Expression::Table(new_t)
4745 };
4746 new_joins.push(crate::expressions::Join {
4747 kind: crate::expressions::JoinKind::Cross,
4748 this: join_this,
4749 on: None,
4750 using: Vec::new(),
4751 use_inner_keyword: false,
4752 use_outer_keyword: false,
4753 deferred_condition: false,
4754 join_hint: None,
4755 match_condition: None,
4756 pivots: Vec::new(),
4757 comments: Vec::new(),
4758 nesting_group: 0,
4759 directed: false,
4760 });
4761 } else {
4762 new_from_exprs.push(expr.clone());
4763 }
4764 } else {
4765 new_from_exprs.push(expr.clone());
4766 }
4767 } else {
4768 new_from_exprs.push(expr.clone());
4769 }
4770 }
4771
4772 new_select.from = Some(crate::expressions::From {
4773 expressions: new_from_exprs,
4774 ..from.clone()
4775 });
4776 new_select.joins = new_joins;
4777 return Ok(Expression::Select(new_select));
4778 }
4779 }
4780 }
4781 }
4782 }
4783
4784 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4785 if matches!(
4786 target,
4787 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4788 ) {
4789 if let Expression::Select(ref s) = e {
4790 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4791 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4792 matches!(expr, Expression::Unnest(_))
4793 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4794 };
4795 let has_unnest_join = s.joins.iter().any(|j| {
4796 j.kind == crate::expressions::JoinKind::Cross && (
4797 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4798 || is_unnest_or_explode_expr(&j.this)
4799 )
4800 });
4801 if has_unnest_join {
4802 let mut select = s.clone();
4803 let mut new_joins = Vec::new();
4804 for join in select.joins.drain(..) {
4805 if join.kind == crate::expressions::JoinKind::Cross {
4806 // Extract the UNNEST/EXPLODE from the join
4807 let (func_expr, table_alias, col_aliases) = match &join.this {
4808 Expression::Alias(a) => {
4809 let ta = if a.alias.is_empty() {
4810 None
4811 } else {
4812 Some(a.alias.clone())
4813 };
4814 let cas = a.column_aliases.clone();
4815 match &a.this {
4816 Expression::Unnest(u) => {
4817 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4818 if !u.expressions.is_empty() {
4819 let mut all_args = vec![u.this.clone()];
4820 all_args.extend(u.expressions.clone());
4821 let arrays_zip =
4822 Expression::Function(Box::new(
4823 crate::expressions::Function::new(
4824 "ARRAYS_ZIP".to_string(),
4825 all_args,
4826 ),
4827 ));
4828 let inline = Expression::Function(Box::new(
4829 crate::expressions::Function::new(
4830 "INLINE".to_string(),
4831 vec![arrays_zip],
4832 ),
4833 ));
4834 (Some(inline), ta, a.column_aliases.clone())
4835 } else {
4836 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4837 let func_name = if u.with_ordinality {
4838 "POSEXPLODE"
4839 } else {
4840 "EXPLODE"
4841 };
4842 let explode = Expression::Function(Box::new(
4843 crate::expressions::Function::new(
4844 func_name.to_string(),
4845 vec![u.this.clone()],
4846 ),
4847 ));
4848 // For POSEXPLODE, add 'pos' to column aliases
4849 let cas = if u.with_ordinality {
4850 let mut pos_aliases =
4851 vec![Identifier::new(
4852 "pos".to_string(),
4853 )];
4854 pos_aliases
4855 .extend(a.column_aliases.clone());
4856 pos_aliases
4857 } else {
4858 a.column_aliases.clone()
4859 };
4860 (Some(explode), ta, cas)
4861 }
4862 }
4863 Expression::Function(f)
4864 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4865 {
4866 (Some(Expression::Function(f.clone())), ta, cas)
4867 }
4868 _ => (None, None, Vec::new()),
4869 }
4870 }
4871 Expression::Unnest(u) => {
4872 let func_name = if u.with_ordinality {
4873 "POSEXPLODE"
4874 } else {
4875 "EXPLODE"
4876 };
4877 let explode = Expression::Function(Box::new(
4878 crate::expressions::Function::new(
4879 func_name.to_string(),
4880 vec![u.this.clone()],
4881 ),
4882 ));
4883 let ta = u.alias.clone();
4884 let col_aliases = if u.with_ordinality {
4885 vec![Identifier::new("pos".to_string())]
4886 } else {
4887 Vec::new()
4888 };
4889 (Some(explode), ta, col_aliases)
4890 }
4891 _ => (None, None, Vec::new()),
4892 };
4893 if let Some(func) = func_expr {
4894 select.lateral_views.push(crate::expressions::LateralView {
4895 this: func,
4896 table_alias,
4897 column_aliases: col_aliases,
4898 outer: false,
4899 });
4900 } else {
4901 new_joins.push(join);
4902 }
4903 } else {
4904 new_joins.push(join);
4905 }
4906 }
4907 select.joins = new_joins;
4908 return Ok(Expression::Select(select));
4909 }
4910 }
4911 }
4912
4913 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4914 // for BigQuery, Presto/Trino, Snowflake
4915 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4916 && matches!(
4917 target,
4918 DialectType::BigQuery
4919 | DialectType::Presto
4920 | DialectType::Trino
4921 | DialectType::Snowflake
4922 )
4923 {
4924 if let Expression::Select(ref s) = e {
4925 // Check if any SELECT expressions contain UNNEST
4926 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4927 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4928 fn contains_unnest(e: &Expression) -> bool {
4929 match e {
4930 Expression::Unnest(_) => true,
4931 Expression::Function(f)
4932 if f.name.eq_ignore_ascii_case("UNNEST") =>
4933 {
4934 true
4935 }
4936 Expression::Alias(a) => contains_unnest(&a.this),
4937 Expression::Add(op)
4938 | Expression::Sub(op)
4939 | Expression::Mul(op)
4940 | Expression::Div(op) => {
4941 contains_unnest(&op.left) || contains_unnest(&op.right)
4942 }
4943 _ => false,
4944 }
4945 }
4946 contains_unnest(expr)
4947 });
4948
4949 if has_unnest_in_select {
4950 let rewritten = Self::rewrite_unnest_expansion(s, target);
4951 if let Some(new_select) = rewritten {
4952 return Ok(Expression::Select(Box::new(new_select)));
4953 }
4954 }
4955 }
4956 }
4957
4958 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
4959 // BigQuery '\n' -> PostgreSQL literal newline in string
4960 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
4961 {
4962 if let Expression::Literal(Literal::String(ref s)) = e {
4963 if s.contains("\\n")
4964 || s.contains("\\t")
4965 || s.contains("\\r")
4966 || s.contains("\\\\")
4967 {
4968 let converted = s
4969 .replace("\\n", "\n")
4970 .replace("\\t", "\t")
4971 .replace("\\r", "\r")
4972 .replace("\\\\", "\\");
4973 return Ok(Expression::Literal(Literal::String(converted)));
4974 }
4975 }
4976 }
4977
4978 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
4979 // when source != target (identity tests keep the Literal::Timestamp for native handling)
4980 if source != target {
4981 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
4982 let s = s.clone();
4983 // MySQL: TIMESTAMP handling depends on source dialect
4984 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
4985 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
4986 if matches!(target, DialectType::MySQL) {
4987 if matches!(source, DialectType::BigQuery) {
4988 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
4989 return Ok(Expression::Function(Box::new(Function::new(
4990 "TIMESTAMP".to_string(),
4991 vec![Expression::Literal(Literal::String(s))],
4992 ))));
4993 } else {
4994 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
4995 return Ok(Expression::Cast(Box::new(Cast {
4996 this: Expression::Literal(Literal::String(s)),
4997 to: DataType::Custom {
4998 name: "DATETIME".to_string(),
4999 },
5000 trailing_comments: Vec::new(),
5001 double_colon_syntax: false,
5002 format: None,
5003 default: None,
5004 })));
5005 }
5006 }
5007 let dt = match target {
5008 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5009 name: "DATETIME".to_string(),
5010 },
5011 DialectType::Snowflake => {
5012 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5013 if matches!(source, DialectType::BigQuery) {
5014 DataType::Custom {
5015 name: "TIMESTAMPTZ".to_string(),
5016 }
5017 } else if matches!(
5018 source,
5019 DialectType::PostgreSQL
5020 | DialectType::Redshift
5021 | DialectType::Snowflake
5022 ) {
5023 DataType::Timestamp {
5024 precision: None,
5025 timezone: false,
5026 }
5027 } else {
5028 DataType::Custom {
5029 name: "TIMESTAMPNTZ".to_string(),
5030 }
5031 }
5032 }
5033 DialectType::Spark | DialectType::Databricks => {
5034 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5035 if matches!(source, DialectType::BigQuery) {
5036 DataType::Timestamp {
5037 precision: None,
5038 timezone: false,
5039 }
5040 } else {
5041 DataType::Custom {
5042 name: "TIMESTAMP_NTZ".to_string(),
5043 }
5044 }
5045 }
5046 DialectType::ClickHouse => DataType::Nullable {
5047 inner: Box::new(DataType::Custom {
5048 name: "DateTime".to_string(),
5049 }),
5050 },
5051 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5052 name: "DATETIME2".to_string(),
5053 },
5054 DialectType::DuckDB => {
5055 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5056 // or when the timestamp string explicitly has timezone info
5057 if matches!(source, DialectType::BigQuery)
5058 || Self::timestamp_string_has_timezone(&s)
5059 {
5060 DataType::Custom {
5061 name: "TIMESTAMPTZ".to_string(),
5062 }
5063 } else {
5064 DataType::Timestamp {
5065 precision: None,
5066 timezone: false,
5067 }
5068 }
5069 }
5070 _ => DataType::Timestamp {
5071 precision: None,
5072 timezone: false,
5073 },
5074 };
5075 return Ok(Expression::Cast(Box::new(Cast {
5076 this: Expression::Literal(Literal::String(s)),
5077 to: dt,
5078 trailing_comments: vec![],
5079 double_colon_syntax: false,
5080 format: None,
5081 default: None,
5082 })));
5083 }
5084 }
5085
5086 // PostgreSQL DELETE requires explicit AS for table aliases
5087 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5088 if let Expression::Delete(ref del) = e {
5089 if del.alias.is_some() && !del.alias_explicit_as {
5090 let mut new_del = del.clone();
5091 new_del.alias_explicit_as = true;
5092 return Ok(Expression::Delete(new_del));
5093 }
5094 }
5095 }
5096
5097 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5098 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5099 // while others don't support it (Presto, Spark, DuckDB, etc.)
5100 {
5101 let needs_distinct =
5102 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5103 let drop_distinct = matches!(
5104 target,
5105 DialectType::Presto
5106 | DialectType::Trino
5107 | DialectType::Athena
5108 | DialectType::Spark
5109 | DialectType::Databricks
5110 | DialectType::DuckDB
5111 | DialectType::Hive
5112 | DialectType::MySQL
5113 | DialectType::PostgreSQL
5114 | DialectType::SQLite
5115 | DialectType::TSQL
5116 | DialectType::Redshift
5117 | DialectType::Snowflake
5118 | DialectType::Oracle
5119 | DialectType::Teradata
5120 | DialectType::Drill
5121 | DialectType::Doris
5122 | DialectType::StarRocks
5123 );
5124 match &e {
5125 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5126 let mut new_u = (**u).clone();
5127 new_u.distinct = true;
5128 return Ok(Expression::Union(Box::new(new_u)));
5129 }
5130 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5131 let mut new_i = (**i).clone();
5132 new_i.distinct = true;
5133 return Ok(Expression::Intersect(Box::new(new_i)));
5134 }
5135 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5136 let mut new_ex = (**ex).clone();
5137 new_ex.distinct = true;
5138 return Ok(Expression::Except(Box::new(new_ex)));
5139 }
5140 Expression::Union(u) if u.distinct && drop_distinct => {
5141 let mut new_u = (**u).clone();
5142 new_u.distinct = false;
5143 return Ok(Expression::Union(Box::new(new_u)));
5144 }
5145 Expression::Intersect(i) if i.distinct && drop_distinct => {
5146 let mut new_i = (**i).clone();
5147 new_i.distinct = false;
5148 return Ok(Expression::Intersect(Box::new(new_i)));
5149 }
5150 Expression::Except(ex) if ex.distinct && drop_distinct => {
5151 let mut new_ex = (**ex).clone();
5152 new_ex.distinct = false;
5153 return Ok(Expression::Except(Box::new(new_ex)));
5154 }
5155 _ => {}
5156 }
5157 }
5158
5159 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5160 if matches!(target, DialectType::ClickHouse) {
5161 if let Expression::Function(ref f) = e {
5162 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5163 let mut new_f = f.as_ref().clone();
5164 new_f.name = "map".to_string();
5165 return Ok(Expression::Function(Box::new(new_f)));
5166 }
5167 }
5168 }
5169
5170 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5171 if matches!(target, DialectType::ClickHouse) {
5172 if let Expression::Intersect(ref i) = e {
5173 if i.all {
5174 let mut new_i = (**i).clone();
5175 new_i.all = false;
5176 return Ok(Expression::Intersect(Box::new(new_i)));
5177 }
5178 }
5179 }
5180
5181 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5182 // Only from Generic source, to prevent double-wrapping
5183 if matches!(source, DialectType::Generic) {
5184 if let Expression::Div(ref op) = e {
5185 let cast_type = match target {
5186 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5187 precision: None,
5188 scale: None,
5189 real_spelling: false,
5190 }),
5191 DialectType::Drill
5192 | DialectType::Trino
5193 | DialectType::Athena
5194 | DialectType::Presto => Some(DataType::Double {
5195 precision: None,
5196 scale: None,
5197 }),
5198 DialectType::PostgreSQL
5199 | DialectType::Redshift
5200 | DialectType::Materialize
5201 | DialectType::Teradata
5202 | DialectType::RisingWave => Some(DataType::Double {
5203 precision: None,
5204 scale: None,
5205 }),
5206 _ => None,
5207 };
5208 if let Some(dt) = cast_type {
5209 let cast_left = Expression::Cast(Box::new(Cast {
5210 this: op.left.clone(),
5211 to: dt,
5212 double_colon_syntax: false,
5213 trailing_comments: Vec::new(),
5214 format: None,
5215 default: None,
5216 }));
5217 let new_op = crate::expressions::BinaryOp {
5218 left: cast_left,
5219 right: op.right.clone(),
5220 left_comments: op.left_comments.clone(),
5221 operator_comments: op.operator_comments.clone(),
5222 trailing_comments: op.trailing_comments.clone(),
5223 };
5224 return Ok(Expression::Div(Box::new(new_op)));
5225 }
5226 }
5227 }
5228
5229 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5230 if matches!(target, DialectType::DuckDB) {
5231 if let Expression::CreateDatabase(db) = e {
5232 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5233 schema.if_not_exists = db.if_not_exists;
5234 return Ok(Expression::CreateSchema(Box::new(schema)));
5235 }
5236 if let Expression::DropDatabase(db) = e {
5237 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5238 schema.if_exists = db.if_exists;
5239 return Ok(Expression::DropSchema(Box::new(schema)));
5240 }
5241 }
5242
5243 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5244 if matches!(source, DialectType::ClickHouse)
5245 && !matches!(target, DialectType::ClickHouse)
5246 {
5247 if let Expression::Cast(ref c) = e {
5248 if let DataType::Custom { ref name } = c.to {
5249 let upper = name.to_uppercase();
5250 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5251 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5252 let inner_upper = inner.to_uppercase();
5253 let new_dt = match inner_upper.as_str() {
5254 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5255 precision: None,
5256 timezone: false,
5257 },
5258 "DATE" => DataType::Date,
5259 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5260 "INT32" | "INT" | "INTEGER" => DataType::Int {
5261 length: None,
5262 integer_spelling: false,
5263 },
5264 "FLOAT64" | "DOUBLE" => DataType::Double {
5265 precision: None,
5266 scale: None,
5267 },
5268 "STRING" => DataType::Text,
5269 _ => DataType::Custom {
5270 name: inner.to_string(),
5271 },
5272 };
5273 let mut new_cast = c.clone();
5274 new_cast.to = new_dt;
5275 return Ok(Expression::Cast(new_cast));
5276 }
5277 }
5278 }
5279 }
5280
5281 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5282 if matches!(target, DialectType::Snowflake) {
5283 if let Expression::ArrayConcatAgg(ref agg) = e {
5284 let mut agg_clone = agg.as_ref().clone();
5285 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5286 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5287 let flatten = Expression::Function(Box::new(Function::new(
5288 "ARRAY_FLATTEN".to_string(),
5289 vec![array_agg],
5290 )));
5291 return Ok(flatten);
5292 }
5293 }
5294
5295 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5296 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5297 if let Expression::ArrayConcatAgg(agg) = e {
5298 let arg = agg.this;
5299 return Ok(Expression::Function(Box::new(Function::new(
5300 "ARRAY_CONCAT_AGG".to_string(),
5301 vec![arg],
5302 ))));
5303 }
5304 }
5305
5306 // Determine what action to take by inspecting e immutably
5307 let action = {
5308 let source_propagates_nulls =
5309 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5310 let target_ignores_nulls =
5311 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5312
5313 match &e {
5314 Expression::Function(f) => {
5315 let name = f.name.to_uppercase();
5316 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5317 if (name == "DATE_PART" || name == "DATEPART")
5318 && f.args.len() == 2
5319 && matches!(target, DialectType::Snowflake)
5320 && !matches!(source, DialectType::Snowflake)
5321 && matches!(
5322 &f.args[0],
5323 Expression::Literal(crate::expressions::Literal::String(_))
5324 )
5325 {
5326 Action::DatePartUnquote
5327 } else if source_propagates_nulls
5328 && target_ignores_nulls
5329 && (name == "GREATEST" || name == "LEAST")
5330 && f.args.len() >= 2
5331 {
5332 Action::GreatestLeastNull
5333 } else if matches!(source, DialectType::Snowflake)
5334 && name == "ARRAY_GENERATE_RANGE"
5335 && f.args.len() >= 2
5336 {
5337 Action::ArrayGenerateRange
5338 } else if matches!(source, DialectType::Snowflake)
5339 && matches!(target, DialectType::DuckDB)
5340 && name == "DATE_TRUNC"
5341 && f.args.len() == 2
5342 {
5343 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5344 // Logic based on Python sqlglot's input_type_preserved flag:
5345 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5346 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5347 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5348 let unit_str = match &f.args[0] {
5349 Expression::Literal(crate::expressions::Literal::String(s)) => {
5350 Some(s.to_uppercase())
5351 }
5352 _ => None,
5353 };
5354 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5355 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5356 });
5357 match &f.args[1] {
5358 Expression::Cast(c) => match &c.to {
5359 DataType::Time { .. } => Action::DateTruncWrapCast,
5360 DataType::Custom { name }
5361 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5362 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5363 {
5364 Action::DateTruncWrapCast
5365 }
5366 DataType::Timestamp { timezone: true, .. } => {
5367 Action::DateTruncWrapCast
5368 }
5369 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5370 DataType::Timestamp {
5371 timezone: false, ..
5372 } if is_date_unit => Action::DateTruncWrapCast,
5373 _ => Action::None,
5374 },
5375 _ => Action::None,
5376 }
5377 } else if matches!(source, DialectType::Snowflake)
5378 && matches!(target, DialectType::DuckDB)
5379 && name == "TO_DATE"
5380 && f.args.len() == 1
5381 && !matches!(
5382 &f.args[0],
5383 Expression::Literal(crate::expressions::Literal::String(_))
5384 )
5385 {
5386 Action::ToDateToCast
5387 } else if !matches!(source, DialectType::Redshift)
5388 && matches!(target, DialectType::Redshift)
5389 && name == "CONVERT_TIMEZONE"
5390 && (f.args.len() == 2 || f.args.len() == 3)
5391 {
5392 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5393 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5394 // The Redshift parser adds 'UTC' as default source_tz, but when
5395 // transpiling from other dialects, we should preserve the original form.
5396 Action::ConvertTimezoneToExpr
5397 } else if matches!(source, DialectType::Snowflake)
5398 && matches!(target, DialectType::DuckDB)
5399 && name == "REGEXP_REPLACE"
5400 && f.args.len() == 4
5401 && !matches!(
5402 &f.args[3],
5403 Expression::Literal(crate::expressions::Literal::String(_))
5404 )
5405 {
5406 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5407 Action::RegexpReplaceSnowflakeToDuckDB
5408 } else if name == "_BQ_TO_HEX" {
5409 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5410 Action::BigQueryToHexBare
5411 } else if matches!(source, DialectType::BigQuery)
5412 && !matches!(target, DialectType::BigQuery)
5413 {
5414 // BigQuery-specific functions that need to be converted to standard forms
5415 match name.as_str() {
5416 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5417 | "DATE_DIFF"
5418 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5419 | "DATETIME_ADD" | "DATETIME_SUB"
5420 | "TIME_ADD" | "TIME_SUB"
5421 | "DATE_ADD" | "DATE_SUB"
5422 | "SAFE_DIVIDE"
5423 | "GENERATE_UUID"
5424 | "COUNTIF"
5425 | "EDIT_DISTANCE"
5426 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5427 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5428 | "TO_HEX"
5429 | "TO_JSON_STRING"
5430 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5431 | "DIV"
5432 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5433 | "LAST_DAY"
5434 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5435 | "REGEXP_CONTAINS"
5436 | "CONTAINS_SUBSTR"
5437 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5438 | "SAFE_CAST"
5439 | "GENERATE_DATE_ARRAY"
5440 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5441 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5442 | "ARRAY_CONCAT"
5443 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5444 | "INSTR"
5445 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5446 | "GENERATE_UUID()" // just in case
5447 | "REGEXP_EXTRACT_ALL"
5448 | "REGEXP_EXTRACT"
5449 | "INT64"
5450 | "ARRAY_CONCAT_AGG"
5451 | "DATE_DIFF(" // just in case
5452 | "TO_HEX_MD5" // internal
5453 | "MOD"
5454 | "CONCAT"
5455 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5456 | "STRUCT"
5457 | "ROUND"
5458 | "MAKE_INTERVAL"
5459 | "ARRAY_TO_STRING"
5460 | "PERCENTILE_CONT"
5461 => Action::BigQueryFunctionNormalize,
5462 "ARRAY" if matches!(target, DialectType::Snowflake)
5463 && f.args.len() == 1
5464 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5465 => Action::BigQueryArraySelectAsStructToSnowflake,
5466 _ => Action::None,
5467 }
5468 } else if matches!(source, DialectType::BigQuery)
5469 && matches!(target, DialectType::BigQuery)
5470 {
5471 // BigQuery -> BigQuery normalizations
5472 match name.as_str() {
5473 "TIMESTAMP_DIFF"
5474 | "DATETIME_DIFF"
5475 | "TIME_DIFF"
5476 | "DATE_DIFF"
5477 | "DATE_ADD"
5478 | "TO_HEX"
5479 | "CURRENT_TIMESTAMP"
5480 | "CURRENT_DATE"
5481 | "CURRENT_TIME"
5482 | "CURRENT_DATETIME"
5483 | "GENERATE_DATE_ARRAY"
5484 | "INSTR"
5485 | "FORMAT_DATETIME"
5486 | "DATETIME"
5487 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5488 _ => Action::None,
5489 }
5490 } else {
5491 // Generic function normalization for non-BigQuery sources
5492 match name.as_str() {
5493 "ARBITRARY" | "AGGREGATE"
5494 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5495 | "STRUCT_EXTRACT"
5496 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5497 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5498 | "SUBSTRINGINDEX"
5499 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5500 | "UNICODE"
5501 | "XOR"
5502 | "ARRAY_REVERSE_SORT"
5503 | "ENCODE" | "DECODE"
5504 | "QUANTILE"
5505 | "EPOCH" | "EPOCH_MS"
5506 | "HASHBYTES"
5507 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5508 | "APPROX_DISTINCT"
5509 | "DATE_PARSE" | "FORMAT_DATETIME"
5510 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5511 | "RLIKE"
5512 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5513 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5514 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5515 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5516 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5517 | "MAP" | "MAP_FROM_ENTRIES"
5518 | "COLLECT_LIST" | "COLLECT_SET"
5519 | "ISNAN" | "IS_NAN"
5520 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5521 | "FORMAT_NUMBER"
5522 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5523 | "ELEMENT_AT"
5524 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5525 | "SPLIT_PART"
5526 // GENERATE_SERIES: handled separately below
5527 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5528 | "JSON_QUERY" | "JSON_VALUE"
5529 | "JSON_SEARCH"
5530 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5531 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5532 | "CURDATE" | "CURTIME"
5533 | "ARRAY_TO_STRING"
5534 | "ARRAY_SORT" | "SORT_ARRAY"
5535 | "LEFT" | "RIGHT"
5536 | "MAP_FROM_ARRAYS"
5537 | "LIKE" | "ILIKE"
5538 | "ARRAY_CONCAT" | "LIST_CONCAT"
5539 | "QUANTILE_CONT" | "QUANTILE_DISC"
5540 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5541 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5542 | "LOCATE" | "STRPOS" | "INSTR"
5543 | "CHAR"
5544 // CONCAT: handled separately for COALESCE wrapping
5545 | "ARRAY_JOIN"
5546 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5547 | "ISNULL"
5548 | "MONTHNAME"
5549 | "TO_TIMESTAMP"
5550 | "TO_DATE"
5551 | "TO_JSON"
5552 | "REGEXP_SPLIT"
5553 | "SPLIT"
5554 | "FORMATDATETIME"
5555 | "ARRAYJOIN"
5556 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5557 | "NVL"
5558 | "TO_CHAR"
5559 | "DBMS_RANDOM.VALUE"
5560 | "REGEXP_LIKE"
5561 | "REPLICATE"
5562 | "LEN"
5563 | "COUNT_BIG"
5564 | "DATEFROMPARTS"
5565 | "DATETIMEFROMPARTS"
5566 | "CONVERT" | "TRY_CONVERT"
5567 | "STRFTIME" | "STRPTIME"
5568 | "DATE_FORMAT" | "FORMAT_DATE"
5569 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5570 | "FROM_BASE64" | "TO_BASE64"
5571 | "GETDATE"
5572 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5573 | "TO_UTF8" | "FROM_UTF8"
5574 | "STARTS_WITH" | "STARTSWITH"
5575 | "APPROX_COUNT_DISTINCT"
5576 | "JSON_FORMAT"
5577 | "SYSDATE"
5578 | "LOGICAL_OR" | "LOGICAL_AND"
5579 | "MONTHS_ADD"
5580 | "SCHEMA_NAME"
5581 | "STRTOL"
5582 | "EDITDIST3"
5583 | "FORMAT"
5584 | "LIST_CONTAINS" | "LIST_HAS"
5585 | "VARIANCE" | "STDDEV"
5586 | "ISINF"
5587 | "TO_UNIXTIME"
5588 | "FROM_UNIXTIME"
5589 | "DATEPART" | "DATE_PART"
5590 | "DATENAME"
5591 | "STRING_AGG"
5592 | "JSON_ARRAYAGG"
5593 | "APPROX_QUANTILE"
5594 | "MAKE_DATE"
5595 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5596 | "RANGE"
5597 | "TRY_ELEMENT_AT"
5598 | "STR_TO_MAP"
5599 | "STRING"
5600 | "STR_TO_TIME"
5601 | "CURRENT_SCHEMA"
5602 | "LTRIM" | "RTRIM"
5603 | "UUID"
5604 | "FARM_FINGERPRINT"
5605 | "JSON_KEYS"
5606 | "WEEKOFYEAR"
5607 | "CONCAT_WS"
5608 | "ARRAY_SLICE"
5609 | "ARRAY_PREPEND"
5610 | "ARRAY_REMOVE"
5611 | "GENERATE_DATE_ARRAY"
5612 | "PARSE_JSON"
5613 | "JSON_REMOVE"
5614 | "JSON_SET"
5615 | "LEVENSHTEIN"
5616 => Action::GenericFunctionNormalize,
5617 // Canonical date functions -> dialect-specific
5618 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5619 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5620 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5621 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5622 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5623 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5624 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5625 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5626 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5627 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5628 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5629 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5630 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5631 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5632 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5633 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5634 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5635 // STR_TO_DATE(x, fmt) -> dialect-specific
5636 "STR_TO_DATE" if f.args.len() == 2
5637 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5638 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5639 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5640 "TS_OR_DS_ADD" if f.args.len() == 3
5641 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5642 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5643 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5644 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5645 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5646 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5647 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5648 // IS_ASCII(x) -> dialect-specific
5649 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5650 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5651 "STR_POSITION" => Action::StrPositionConvert,
5652 // ARRAY_SUM -> dialect-specific
5653 "ARRAY_SUM" => Action::ArraySumConvert,
5654 // ARRAY_SIZE -> dialect-specific (Drill only)
5655 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5656 // ARRAY_ANY -> dialect-specific
5657 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5658 // Functions needing specific cross-dialect transforms
5659 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5660 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5661 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5662 "ARRAY" if matches!(source, DialectType::BigQuery)
5663 && matches!(target, DialectType::Snowflake)
5664 && f.args.len() == 1
5665 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5666 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5667 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5668 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5669 "DATE_TRUNC" if f.args.len() == 2
5670 && matches!(source, DialectType::Generic)
5671 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5672 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5673 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5674 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5675 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5676 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5677 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5678 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5679 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5680 // GENERATE_SERIES with interval normalization for PG target
5681 "GENERATE_SERIES" if f.args.len() >= 3
5682 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5683 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5684 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5685 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5686 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5687 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5688 "CONCAT" => Action::GenericFunctionNormalize,
5689 // DIV(a, b) -> target-specific integer division
5690 "DIV" if f.args.len() == 2
5691 && matches!(source, DialectType::PostgreSQL)
5692 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5693 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5694 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5695 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5696 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5697 "JSONB_EXISTS" if f.args.len() == 2
5698 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5699 // DATE_BIN -> TIME_BUCKET for DuckDB
5700 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5701 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5702 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5703 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5704 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5705 // ClickHouse any -> ANY_VALUE for other dialects
5706 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5707 _ => Action::None,
5708 }
5709 }
5710 }
5711 Expression::AggregateFunction(af) => {
5712 let name = af.name.to_uppercase();
5713 match name.as_str() {
5714 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5715 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5716 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5717 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5718 if matches!(target, DialectType::DuckDB) =>
5719 {
5720 Action::JsonObjectAggConvert
5721 }
5722 "ARRAY_AGG"
5723 if matches!(
5724 target,
5725 DialectType::Hive
5726 | DialectType::Spark
5727 | DialectType::Databricks
5728 ) =>
5729 {
5730 Action::ArrayAggToCollectList
5731 }
5732 "MAX_BY" | "MIN_BY"
5733 if matches!(
5734 target,
5735 DialectType::ClickHouse
5736 | DialectType::Spark
5737 | DialectType::Databricks
5738 | DialectType::DuckDB
5739 ) =>
5740 {
5741 Action::MaxByMinByConvert
5742 }
5743 "COLLECT_LIST"
5744 if matches!(
5745 target,
5746 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5747 ) =>
5748 {
5749 Action::CollectListToArrayAgg
5750 }
5751 "COLLECT_SET"
5752 if matches!(
5753 target,
5754 DialectType::Presto
5755 | DialectType::Trino
5756 | DialectType::Snowflake
5757 | DialectType::DuckDB
5758 ) =>
5759 {
5760 Action::CollectSetConvert
5761 }
5762 "PERCENTILE"
5763 if matches!(
5764 target,
5765 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5766 ) =>
5767 {
5768 Action::PercentileConvert
5769 }
5770 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5771 "CORR"
5772 if matches!(target, DialectType::DuckDB)
5773 && matches!(source, DialectType::Snowflake) =>
5774 {
5775 Action::CorrIsnanWrap
5776 }
5777 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5778 "APPROX_QUANTILES"
5779 if matches!(source, DialectType::BigQuery)
5780 && matches!(target, DialectType::DuckDB) =>
5781 {
5782 Action::BigQueryApproxQuantiles
5783 }
5784 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5785 "PERCENTILE_CONT"
5786 if matches!(source, DialectType::BigQuery)
5787 && matches!(target, DialectType::DuckDB)
5788 && af.args.len() >= 2 =>
5789 {
5790 Action::BigQueryPercentileContToDuckDB
5791 }
5792 _ => Action::None,
5793 }
5794 }
5795 Expression::JSONArrayAgg(_) => match target {
5796 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5797 _ => Action::None,
5798 },
5799 Expression::ToNumber(tn) => {
5800 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5801 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5802 match target {
5803 DialectType::Oracle
5804 | DialectType::Snowflake
5805 | DialectType::Teradata => Action::None,
5806 _ => Action::GenericFunctionNormalize,
5807 }
5808 } else {
5809 Action::None
5810 }
5811 }
5812 Expression::Nvl2(_) => {
5813 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5814 // Keep as NVL2 for dialects that support it natively
5815 match target {
5816 DialectType::Oracle
5817 | DialectType::Snowflake
5818 | DialectType::Teradata
5819 | DialectType::Spark
5820 | DialectType::Databricks
5821 | DialectType::Redshift => Action::None,
5822 _ => Action::Nvl2Expand,
5823 }
5824 }
5825 Expression::Decode(_) | Expression::DecodeCase(_) => {
5826 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5827 // Keep as DECODE for Oracle/Snowflake
5828 match target {
5829 DialectType::Oracle | DialectType::Snowflake => Action::None,
5830 _ => Action::DecodeSimplify,
5831 }
5832 }
5833 Expression::Coalesce(ref cf) => {
5834 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5835 // BigQuery keeps IFNULL natively when source is also BigQuery
5836 if cf.original_name.as_deref() == Some("IFNULL")
5837 && !(matches!(source, DialectType::BigQuery)
5838 && matches!(target, DialectType::BigQuery))
5839 {
5840 Action::IfnullToCoalesce
5841 } else {
5842 Action::None
5843 }
5844 }
5845 Expression::IfFunc(if_func) => {
5846 if matches!(source, DialectType::Snowflake)
5847 && matches!(
5848 target,
5849 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5850 )
5851 && matches!(if_func.false_value, Some(Expression::Div(_)))
5852 {
5853 Action::Div0TypedDivision
5854 } else {
5855 Action::None
5856 }
5857 }
5858 Expression::ToJson(_) => match target {
5859 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5860 DialectType::BigQuery => Action::ToJsonConvert,
5861 DialectType::DuckDB => Action::ToJsonConvert,
5862 _ => Action::None,
5863 },
5864 Expression::ArrayAgg(ref agg) => {
5865 if matches!(
5866 target,
5867 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5868 ) {
5869 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5870 Action::ArrayAggToCollectList
5871 } else if matches!(
5872 source,
5873 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5874 ) && matches!(target, DialectType::DuckDB)
5875 && agg.filter.is_some()
5876 {
5877 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5878 // Need to add NOT x IS NULL to existing filter
5879 Action::ArrayAggNullFilter
5880 } else if matches!(target, DialectType::DuckDB)
5881 && agg.ignore_nulls == Some(true)
5882 && !agg.order_by.is_empty()
5883 {
5884 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5885 Action::ArrayAggIgnoreNullsDuckDB
5886 } else if !matches!(source, DialectType::Snowflake) {
5887 Action::None
5888 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5889 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5890 == Some("ARRAY_AGG".to_string())
5891 || agg.name.is_none();
5892 if is_array_agg {
5893 Action::ArrayAggCollectList
5894 } else {
5895 Action::None
5896 }
5897 } else if matches!(
5898 target,
5899 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5900 ) && agg.filter.is_none()
5901 {
5902 Action::ArrayAggFilter
5903 } else {
5904 Action::None
5905 }
5906 }
5907 Expression::WithinGroup(wg) => {
5908 if matches!(source, DialectType::Snowflake)
5909 && matches!(
5910 target,
5911 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5912 )
5913 && matches!(wg.this, Expression::ArrayAgg(_))
5914 {
5915 Action::ArrayAggWithinGroupFilter
5916 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5917 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5918 || matches!(&wg.this, Expression::StringAgg(_))
5919 {
5920 Action::StringAggConvert
5921 } else if matches!(
5922 target,
5923 DialectType::Presto
5924 | DialectType::Trino
5925 | DialectType::Athena
5926 | DialectType::Spark
5927 | DialectType::Databricks
5928 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5929 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5930 || matches!(&wg.this, Expression::PercentileCont(_)))
5931 {
5932 Action::PercentileContConvert
5933 } else {
5934 Action::None
5935 }
5936 }
5937 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5938 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5939 // DATETIME is the timezone-unaware type
5940 Expression::Cast(ref c) => {
5941 if c.format.is_some()
5942 && (matches!(source, DialectType::BigQuery)
5943 || matches!(source, DialectType::Teradata))
5944 {
5945 Action::BigQueryCastFormat
5946 } else if matches!(target, DialectType::BigQuery)
5947 && !matches!(source, DialectType::BigQuery)
5948 && matches!(
5949 c.to,
5950 DataType::Timestamp {
5951 timezone: false,
5952 ..
5953 }
5954 )
5955 {
5956 Action::CastTimestampToDatetime
5957 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
5958 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
5959 && matches!(
5960 c.to,
5961 DataType::Timestamp {
5962 timezone: false,
5963 ..
5964 }
5965 )
5966 {
5967 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5968 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
5969 Action::CastTimestampToDatetime
5970 } else if matches!(
5971 source,
5972 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5973 ) && matches!(
5974 target,
5975 DialectType::Presto
5976 | DialectType::Trino
5977 | DialectType::Athena
5978 | DialectType::DuckDB
5979 | DialectType::Snowflake
5980 | DialectType::BigQuery
5981 | DialectType::Databricks
5982 | DialectType::TSQL
5983 ) {
5984 Action::HiveCastToTryCast
5985 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5986 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
5987 {
5988 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
5989 Action::CastTimestamptzToFunc
5990 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5991 && matches!(
5992 target,
5993 DialectType::Hive
5994 | DialectType::Spark
5995 | DialectType::Databricks
5996 | DialectType::BigQuery
5997 )
5998 {
5999 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6000 Action::CastTimestampStripTz
6001 } else if matches!(&c.to, DataType::Json)
6002 && matches!(&c.this, Expression::Literal(Literal::String(_)))
6003 && matches!(
6004 target,
6005 DialectType::Presto
6006 | DialectType::Trino
6007 | DialectType::Athena
6008 | DialectType::Snowflake
6009 )
6010 {
6011 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6012 // Only when the input is a string literal (JSON 'value' syntax)
6013 Action::JsonLiteralToJsonParse
6014 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6015 && matches!(target, DialectType::Spark | DialectType::Databricks)
6016 {
6017 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6018 Action::CastToJsonForSpark
6019 } else if (matches!(
6020 &c.to,
6021 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6022 )) && matches!(
6023 target,
6024 DialectType::Spark | DialectType::Databricks
6025 ) && (matches!(&c.this, Expression::ParseJson(_))
6026 || matches!(
6027 &c.this,
6028 Expression::Function(f)
6029 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6030 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6031 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6032 ))
6033 {
6034 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6035 // -> FROM_JSON(..., type_string) for Spark
6036 Action::CastJsonToFromJson
6037 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6038 && matches!(
6039 c.to,
6040 DataType::Timestamp {
6041 timezone: false,
6042 ..
6043 }
6044 )
6045 && matches!(source, DialectType::DuckDB)
6046 {
6047 Action::StrftimeCastTimestamp
6048 } else if matches!(source, DialectType::DuckDB)
6049 && matches!(
6050 c.to,
6051 DataType::Decimal {
6052 precision: None,
6053 ..
6054 }
6055 )
6056 {
6057 Action::DecimalDefaultPrecision
6058 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6059 && matches!(c.to, DataType::Char { length: None })
6060 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6061 {
6062 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6063 Action::MysqlCastCharToText
6064 } else if matches!(
6065 source,
6066 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6067 ) && matches!(
6068 target,
6069 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6070 ) && Self::has_varchar_char_type(&c.to)
6071 {
6072 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6073 Action::SparkCastVarcharToString
6074 } else {
6075 Action::None
6076 }
6077 }
6078 Expression::SafeCast(ref c) => {
6079 if c.format.is_some()
6080 && matches!(source, DialectType::BigQuery)
6081 && !matches!(target, DialectType::BigQuery)
6082 {
6083 Action::BigQueryCastFormat
6084 } else {
6085 Action::None
6086 }
6087 }
6088 // For DuckDB: DATE_TRUNC should preserve the input type
6089 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6090 if matches!(source, DialectType::Snowflake)
6091 && matches!(target, DialectType::DuckDB)
6092 {
6093 Action::DateTruncWrapCast
6094 } else {
6095 Action::None
6096 }
6097 }
6098 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6099 Expression::SetStatement(s) => {
6100 if matches!(target, DialectType::DuckDB)
6101 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6102 && s.items.iter().any(|item| item.kind.is_none())
6103 {
6104 Action::SetToVariable
6105 } else {
6106 Action::None
6107 }
6108 }
6109 // Cross-dialect NULL ordering normalization.
6110 // When nulls_first is not specified, fill in the source dialect's implied
6111 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6112 Expression::Ordered(o) => {
6113 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6114 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6115 Action::MysqlNullsOrdering
6116 } else {
6117 // Skip targets that don't support NULLS FIRST/LAST syntax
6118 let target_supports_nulls = !matches!(
6119 target,
6120 DialectType::MySQL
6121 | DialectType::TSQL
6122 | DialectType::StarRocks
6123 | DialectType::Doris
6124 );
6125 if o.nulls_first.is_none() && source != target && target_supports_nulls
6126 {
6127 Action::NullsOrdering
6128 } else {
6129 Action::None
6130 }
6131 }
6132 }
6133 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6134 Expression::DataType(dt) => {
6135 if matches!(source, DialectType::BigQuery)
6136 && !matches!(target, DialectType::BigQuery)
6137 {
6138 match dt {
6139 DataType::Custom { ref name }
6140 if name.eq_ignore_ascii_case("INT64")
6141 || name.eq_ignore_ascii_case("FLOAT64")
6142 || name.eq_ignore_ascii_case("BOOL")
6143 || name.eq_ignore_ascii_case("BYTES")
6144 || name.eq_ignore_ascii_case("NUMERIC")
6145 || name.eq_ignore_ascii_case("STRING")
6146 || name.eq_ignore_ascii_case("DATETIME") =>
6147 {
6148 Action::BigQueryCastType
6149 }
6150 _ => Action::None,
6151 }
6152 } else if matches!(source, DialectType::TSQL) {
6153 // For TSQL source -> any target (including TSQL itself for REAL)
6154 match dt {
6155 // REAL -> FLOAT even for TSQL->TSQL
6156 DataType::Custom { ref name }
6157 if name.eq_ignore_ascii_case("REAL") =>
6158 {
6159 Action::TSQLTypeNormalize
6160 }
6161 DataType::Float {
6162 real_spelling: true,
6163 ..
6164 } => Action::TSQLTypeNormalize,
6165 // Other TSQL type normalizations only for non-TSQL targets
6166 DataType::Custom { ref name }
6167 if !matches!(target, DialectType::TSQL)
6168 && (name.eq_ignore_ascii_case("MONEY")
6169 || name.eq_ignore_ascii_case("SMALLMONEY")
6170 || name.eq_ignore_ascii_case("DATETIME2")
6171 || name.eq_ignore_ascii_case("IMAGE")
6172 || name.eq_ignore_ascii_case("BIT")
6173 || name.eq_ignore_ascii_case("ROWVERSION")
6174 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6175 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6176 || name.to_uppercase().starts_with("NUMERIC")
6177 || name.to_uppercase().starts_with("DATETIME2(")
6178 || name.to_uppercase().starts_with("TIME(")) =>
6179 {
6180 Action::TSQLTypeNormalize
6181 }
6182 DataType::Float {
6183 precision: Some(_), ..
6184 } if !matches!(target, DialectType::TSQL) => {
6185 Action::TSQLTypeNormalize
6186 }
6187 DataType::TinyInt { .. }
6188 if !matches!(target, DialectType::TSQL) =>
6189 {
6190 Action::TSQLTypeNormalize
6191 }
6192 // INTEGER -> INT for Databricks/Spark targets
6193 DataType::Int {
6194 integer_spelling: true,
6195 ..
6196 } if matches!(
6197 target,
6198 DialectType::Databricks | DialectType::Spark
6199 ) =>
6200 {
6201 Action::TSQLTypeNormalize
6202 }
6203 _ => Action::None,
6204 }
6205 } else if (matches!(source, DialectType::Oracle)
6206 || matches!(source, DialectType::Generic))
6207 && !matches!(target, DialectType::Oracle)
6208 {
6209 match dt {
6210 DataType::Custom { ref name }
6211 if name.to_uppercase().starts_with("VARCHAR2(")
6212 || name.to_uppercase().starts_with("NVARCHAR2(")
6213 || name.eq_ignore_ascii_case("VARCHAR2")
6214 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6215 {
6216 Action::OracleVarchar2ToVarchar
6217 }
6218 _ => Action::None,
6219 }
6220 } else if matches!(target, DialectType::Snowflake)
6221 && !matches!(source, DialectType::Snowflake)
6222 {
6223 // When target is Snowflake but source is NOT Snowflake,
6224 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6225 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6226 // should keep their FLOAT spelling.
6227 match dt {
6228 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6229 _ => Action::None,
6230 }
6231 } else {
6232 Action::None
6233 }
6234 }
6235 // LOWER patterns from BigQuery TO_HEX conversions:
6236 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6237 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6238 Expression::Lower(uf) => {
6239 if matches!(source, DialectType::BigQuery) {
6240 match &uf.this {
6241 Expression::Lower(_) => Action::BigQueryToHexLower,
6242 Expression::Function(f)
6243 if f.name == "TO_HEX"
6244 && matches!(target, DialectType::BigQuery) =>
6245 {
6246 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6247 Action::BigQueryToHexLower
6248 }
6249 _ => Action::None,
6250 }
6251 } else {
6252 Action::None
6253 }
6254 }
6255 // UPPER patterns from BigQuery TO_HEX conversions:
6256 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6257 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6258 Expression::Upper(uf) => {
6259 if matches!(source, DialectType::BigQuery) {
6260 match &uf.this {
6261 Expression::Lower(_) => Action::BigQueryToHexUpper,
6262 _ => Action::None,
6263 }
6264 } else {
6265 Action::None
6266 }
6267 }
6268 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6269 // Snowflake supports LAST_DAY with unit, so keep it there
6270 Expression::LastDay(ld) => {
6271 if matches!(source, DialectType::BigQuery)
6272 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6273 && ld.unit.is_some()
6274 {
6275 Action::BigQueryLastDayStripUnit
6276 } else {
6277 Action::None
6278 }
6279 }
6280 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6281 Expression::SafeDivide(_) => {
6282 if matches!(source, DialectType::BigQuery)
6283 && !matches!(target, DialectType::BigQuery)
6284 {
6285 Action::BigQuerySafeDivide
6286 } else {
6287 Action::None
6288 }
6289 }
6290 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6291 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6292 Expression::AnyValue(ref agg) => {
6293 if matches!(source, DialectType::BigQuery)
6294 && matches!(target, DialectType::DuckDB)
6295 && agg.having_max.is_some()
6296 {
6297 Action::BigQueryAnyValueHaving
6298 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6299 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6300 && agg.ignore_nulls.is_none()
6301 {
6302 Action::AnyValueIgnoreNulls
6303 } else {
6304 Action::None
6305 }
6306 }
6307 Expression::Any(ref q) => {
6308 if matches!(source, DialectType::PostgreSQL)
6309 && matches!(
6310 target,
6311 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6312 )
6313 && q.op.is_some()
6314 && !matches!(
6315 q.subquery,
6316 Expression::Select(_) | Expression::Subquery(_)
6317 )
6318 {
6319 Action::AnyToExists
6320 } else {
6321 Action::None
6322 }
6323 }
6324 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6325 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6326 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6327 Expression::RegexpLike(_)
6328 if !matches!(source, DialectType::DuckDB)
6329 && matches!(target, DialectType::DuckDB) =>
6330 {
6331 Action::RegexpLikeToDuckDB
6332 }
6333 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6334 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6335 Expression::Div(ref op)
6336 if matches!(
6337 source,
6338 DialectType::MySQL
6339 | DialectType::DuckDB
6340 | DialectType::SingleStore
6341 | DialectType::TiDB
6342 | DialectType::ClickHouse
6343 | DialectType::Doris
6344 ) && matches!(
6345 target,
6346 DialectType::PostgreSQL
6347 | DialectType::Redshift
6348 | DialectType::Drill
6349 | DialectType::Trino
6350 | DialectType::Presto
6351 | DialectType::Athena
6352 | DialectType::TSQL
6353 | DialectType::Teradata
6354 | DialectType::SQLite
6355 | DialectType::BigQuery
6356 | DialectType::Snowflake
6357 | DialectType::Databricks
6358 | DialectType::Oracle
6359 | DialectType::Materialize
6360 | DialectType::RisingWave
6361 ) =>
6362 {
6363 // Only wrap if RHS is not already NULLIF
6364 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6365 {
6366 Action::MySQLSafeDivide
6367 } else {
6368 Action::None
6369 }
6370 }
6371 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6372 // For TSQL/Fabric, convert to sp_rename instead
6373 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6374 if let Some(crate::expressions::AlterTableAction::RenameTable(
6375 ref new_tbl,
6376 )) = at.actions.first()
6377 {
6378 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6379 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6380 Action::AlterTableToSpRename
6381 } else if new_tbl.schema.is_some()
6382 && matches!(
6383 target,
6384 DialectType::BigQuery
6385 | DialectType::Doris
6386 | DialectType::StarRocks
6387 | DialectType::DuckDB
6388 | DialectType::PostgreSQL
6389 | DialectType::Redshift
6390 )
6391 {
6392 Action::AlterTableRenameStripSchema
6393 } else {
6394 Action::None
6395 }
6396 } else {
6397 Action::None
6398 }
6399 }
6400 // EPOCH(x) expression -> target-specific epoch conversion
6401 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6402 Action::EpochConvert
6403 }
6404 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6405 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6406 Action::EpochMsConvert
6407 }
6408 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6409 Expression::StringAgg(_) => {
6410 if matches!(
6411 target,
6412 DialectType::MySQL
6413 | DialectType::SingleStore
6414 | DialectType::Doris
6415 | DialectType::StarRocks
6416 | DialectType::SQLite
6417 ) {
6418 Action::StringAggConvert
6419 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6420 Action::StringAggConvert
6421 } else {
6422 Action::None
6423 }
6424 }
6425 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6426 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6427 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6428 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6429 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6430 Action::ArrayLengthConvert
6431 }
6432 Expression::ArraySize(_) => {
6433 if matches!(target, DialectType::Drill) {
6434 Action::ArraySizeDrill
6435 } else {
6436 Action::ArrayLengthConvert
6437 }
6438 }
6439 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6440 Expression::ArrayRemove(_) => match target {
6441 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6442 Action::ArrayRemoveConvert
6443 }
6444 _ => Action::None,
6445 },
6446 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6447 Expression::ArrayReverse(_) => match target {
6448 DialectType::ClickHouse => Action::ArrayReverseConvert,
6449 _ => Action::None,
6450 },
6451 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6452 Expression::JsonKeys(_) => match target {
6453 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6454 Action::JsonKeysConvert
6455 }
6456 _ => Action::None,
6457 },
6458 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6459 Expression::ParseJson(_) => match target {
6460 DialectType::SQLite
6461 | DialectType::Doris
6462 | DialectType::MySQL
6463 | DialectType::StarRocks => Action::ParseJsonStrip,
6464 _ => Action::None,
6465 },
6466 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6467 Expression::WeekOfYear(_)
6468 if matches!(target, DialectType::Snowflake)
6469 && !matches!(source, DialectType::Snowflake) =>
6470 {
6471 Action::WeekOfYearToWeekIso
6472 }
6473 // NVL: clear original_name so generator uses dialect-specific function names
6474 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6475 // XOR: expand for dialects that don't support the XOR keyword
6476 Expression::Xor(_) => {
6477 let target_supports_xor = matches!(
6478 target,
6479 DialectType::MySQL
6480 | DialectType::SingleStore
6481 | DialectType::Doris
6482 | DialectType::StarRocks
6483 );
6484 if !target_supports_xor {
6485 Action::XorExpand
6486 } else {
6487 Action::None
6488 }
6489 }
6490 // TSQL #table -> temp table normalization (CREATE TABLE)
6491 Expression::CreateTable(ct)
6492 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6493 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6494 && ct.name.name.name.starts_with('#') =>
6495 {
6496 Action::TempTableHash
6497 }
6498 // TSQL #table -> strip # from table references in SELECT/etc.
6499 Expression::Table(tr)
6500 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6501 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6502 && tr.name.name.starts_with('#') =>
6503 {
6504 Action::TempTableHash
6505 }
6506 // TSQL #table -> strip # from DROP TABLE names
6507 Expression::DropTable(ref dt)
6508 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6509 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6510 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6511 {
6512 Action::TempTableHash
6513 }
6514 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6515 Expression::JsonExtract(_)
6516 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6517 {
6518 Action::JsonExtractToTsql
6519 }
6520 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6521 Expression::JsonExtractScalar(_)
6522 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6523 {
6524 Action::JsonExtractToTsql
6525 }
6526 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6527 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6528 Action::JsonExtractToClickHouse
6529 }
6530 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6531 Expression::JsonExtractScalar(_)
6532 if matches!(target, DialectType::ClickHouse) =>
6533 {
6534 Action::JsonExtractToClickHouse
6535 }
6536 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6537 Expression::JsonExtract(ref f)
6538 if !f.arrow_syntax
6539 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6540 {
6541 Action::JsonExtractToArrow
6542 }
6543 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6544 Expression::JsonExtract(ref f)
6545 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6546 && !matches!(
6547 source,
6548 DialectType::PostgreSQL
6549 | DialectType::Redshift
6550 | DialectType::Materialize
6551 )
6552 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6553 {
6554 Action::JsonExtractToGetJsonObject
6555 }
6556 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6557 Expression::JsonExtract(_)
6558 if matches!(
6559 target,
6560 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6561 ) =>
6562 {
6563 Action::JsonExtractToGetJsonObject
6564 }
6565 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6566 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6567 Expression::JsonExtractScalar(ref f)
6568 if !f.arrow_syntax
6569 && !f.hash_arrow_syntax
6570 && matches!(
6571 target,
6572 DialectType::PostgreSQL
6573 | DialectType::Redshift
6574 | DialectType::Snowflake
6575 | DialectType::SQLite
6576 | DialectType::DuckDB
6577 ) =>
6578 {
6579 Action::JsonExtractScalarConvert
6580 }
6581 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6582 Expression::JsonExtractScalar(_)
6583 if matches!(
6584 target,
6585 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6586 ) =>
6587 {
6588 Action::JsonExtractScalarToGetJsonObject
6589 }
6590 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6591 Expression::JsonExtract(ref f)
6592 if !f.arrow_syntax
6593 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6594 {
6595 Action::JsonPathNormalize
6596 }
6597 // JsonQuery (parsed JSON_QUERY) -> target-specific
6598 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6599 // JsonValue (parsed JSON_VALUE) -> target-specific
6600 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6601 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6602 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6603 Expression::AtTimeZone(_)
6604 if matches!(
6605 target,
6606 DialectType::Presto
6607 | DialectType::Trino
6608 | DialectType::Athena
6609 | DialectType::Spark
6610 | DialectType::Databricks
6611 | DialectType::BigQuery
6612 | DialectType::Snowflake
6613 ) =>
6614 {
6615 Action::AtTimeZoneConvert
6616 }
6617 // DAY_OF_WEEK -> dialect-specific
6618 Expression::DayOfWeek(_)
6619 if matches!(
6620 target,
6621 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6622 ) =>
6623 {
6624 Action::DayOfWeekConvert
6625 }
6626 // CURRENT_USER -> CURRENT_USER() for Snowflake
6627 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6628 Action::CurrentUserParens
6629 }
6630 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6631 Expression::ElementAt(_)
6632 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6633 {
6634 Action::ElementAtConvert
6635 }
6636 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6637 Expression::ArrayFunc(ref arr)
6638 if !arr.bracket_notation
6639 && matches!(
6640 target,
6641 DialectType::Spark
6642 | DialectType::Databricks
6643 | DialectType::Hive
6644 | DialectType::BigQuery
6645 | DialectType::DuckDB
6646 | DialectType::Snowflake
6647 | DialectType::Presto
6648 | DialectType::Trino
6649 | DialectType::Athena
6650 | DialectType::ClickHouse
6651 | DialectType::StarRocks
6652 ) =>
6653 {
6654 Action::ArraySyntaxConvert
6655 }
6656 // VARIANCE expression -> varSamp for ClickHouse
6657 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6658 Action::VarianceToClickHouse
6659 }
6660 // STDDEV expression -> stddevSamp for ClickHouse
6661 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6662 Action::StddevToClickHouse
6663 }
6664 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6665 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6666 Action::ApproxQuantileConvert
6667 }
6668 // MonthsBetween -> target-specific
6669 Expression::MonthsBetween(_)
6670 if !matches!(
6671 target,
6672 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6673 ) =>
6674 {
6675 Action::MonthsBetweenConvert
6676 }
6677 // AddMonths -> target-specific DATEADD/DATE_ADD
6678 Expression::AddMonths(_) => Action::AddMonthsConvert,
6679 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6680 Expression::MapFromArrays(_)
6681 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6682 {
6683 Action::MapFromArraysConvert
6684 }
6685 // CURRENT_USER -> CURRENT_USER() for Spark
6686 Expression::CurrentUser(_)
6687 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6688 {
6689 Action::CurrentUserSparkParens
6690 }
6691 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6692 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6693 if matches!(
6694 source,
6695 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6696 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6697 && matches!(
6698 target,
6699 DialectType::DuckDB
6700 | DialectType::Presto
6701 | DialectType::Trino
6702 | DialectType::Athena
6703 | DialectType::PostgreSQL
6704 | DialectType::Redshift
6705 ) =>
6706 {
6707 Action::SparkDateFuncCast
6708 }
6709 // $parameter -> @parameter for BigQuery
6710 Expression::Parameter(ref p)
6711 if matches!(target, DialectType::BigQuery)
6712 && matches!(source, DialectType::DuckDB)
6713 && (p.style == crate::expressions::ParameterStyle::Dollar
6714 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6715 {
6716 Action::DollarParamConvert
6717 }
6718 // EscapeString literal: normalize literal newlines to \n
6719 Expression::Literal(Literal::EscapeString(ref s))
6720 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6721 {
6722 Action::EscapeStringNormalize
6723 }
6724 // straight_join: keep lowercase for DuckDB, quote for MySQL
6725 Expression::Column(ref col)
6726 if col.name.name == "STRAIGHT_JOIN"
6727 && col.table.is_none()
6728 && matches!(source, DialectType::DuckDB)
6729 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6730 {
6731 Action::StraightJoinCase
6732 }
6733 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6734 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6735 Expression::Interval(ref iv)
6736 if matches!(
6737 target,
6738 DialectType::Snowflake
6739 | DialectType::PostgreSQL
6740 | DialectType::Redshift
6741 ) && iv.unit.is_some()
6742 && matches!(
6743 &iv.this,
6744 Some(Expression::Literal(Literal::String(_)))
6745 ) =>
6746 {
6747 Action::SnowflakeIntervalFormat
6748 }
6749 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6750 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6751 if let Some(ref sample) = ts.sample {
6752 if !sample.explicit_method {
6753 Action::TablesampleReservoir
6754 } else {
6755 Action::None
6756 }
6757 } else {
6758 Action::None
6759 }
6760 }
6761 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6762 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6763 Expression::TableSample(ref ts)
6764 if matches!(target, DialectType::Snowflake)
6765 && !matches!(source, DialectType::Snowflake)
6766 && ts.sample.is_some() =>
6767 {
6768 if let Some(ref sample) = ts.sample {
6769 if !sample.explicit_method {
6770 Action::TablesampleSnowflakeStrip
6771 } else {
6772 Action::None
6773 }
6774 } else {
6775 Action::None
6776 }
6777 }
6778 Expression::Table(ref t)
6779 if matches!(target, DialectType::Snowflake)
6780 && !matches!(source, DialectType::Snowflake)
6781 && t.table_sample.is_some() =>
6782 {
6783 if let Some(ref sample) = t.table_sample {
6784 if !sample.explicit_method {
6785 Action::TablesampleSnowflakeStrip
6786 } else {
6787 Action::None
6788 }
6789 } else {
6790 Action::None
6791 }
6792 }
6793 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6794 Expression::AlterTable(ref at)
6795 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6796 && !at.actions.is_empty()
6797 && matches!(
6798 at.actions.first(),
6799 Some(crate::expressions::AlterTableAction::RenameTable(_))
6800 ) =>
6801 {
6802 Action::AlterTableToSpRename
6803 }
6804 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6805 Expression::Subscript(ref sub)
6806 if matches!(
6807 target,
6808 DialectType::BigQuery
6809 | DialectType::Hive
6810 | DialectType::Spark
6811 | DialectType::Databricks
6812 ) && matches!(
6813 source,
6814 DialectType::DuckDB
6815 | DialectType::PostgreSQL
6816 | DialectType::Presto
6817 | DialectType::Trino
6818 | DialectType::Redshift
6819 | DialectType::ClickHouse
6820 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6821 {
6822 Action::ArrayIndexConvert
6823 }
6824 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6825 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6826 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6827 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6828 Expression::WindowFunction(ref wf) => {
6829 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6830 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6831 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6832 if matches!(target, DialectType::BigQuery)
6833 && !is_row_number
6834 && !wf.over.order_by.is_empty()
6835 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6836 {
6837 Action::BigQueryNullsOrdering
6838 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6839 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6840 } else {
6841 let source_nulls_last = matches!(source, DialectType::DuckDB);
6842 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6843 matches!(
6844 f.kind,
6845 crate::expressions::WindowFrameKind::Range
6846 | crate::expressions::WindowFrameKind::Groups
6847 )
6848 });
6849 if source_nulls_last
6850 && matches!(target, DialectType::MySQL)
6851 && !wf.over.order_by.is_empty()
6852 && wf.over.order_by.iter().any(|o| !o.desc)
6853 && !has_range_frame
6854 {
6855 Action::MysqlNullsLastRewrite
6856 } else {
6857 match &wf.this {
6858 Expression::FirstValue(ref vf)
6859 | Expression::LastValue(ref vf)
6860 if vf.ignore_nulls == Some(false) =>
6861 {
6862 // RESPECT NULLS
6863 match target {
6864 DialectType::SQLite => Action::RespectNullsConvert,
6865 _ => Action::None,
6866 }
6867 }
6868 _ => Action::None,
6869 }
6870 }
6871 }
6872 }
6873 // CREATE TABLE a LIKE b -> dialect-specific transformations
6874 Expression::CreateTable(ref ct)
6875 if ct.columns.is_empty()
6876 && ct.constraints.iter().any(|c| {
6877 matches!(c, crate::expressions::TableConstraint::Like { .. })
6878 })
6879 && matches!(
6880 target,
6881 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6882 ) =>
6883 {
6884 Action::CreateTableLikeToCtas
6885 }
6886 Expression::CreateTable(ref ct)
6887 if ct.columns.is_empty()
6888 && ct.constraints.iter().any(|c| {
6889 matches!(c, crate::expressions::TableConstraint::Like { .. })
6890 })
6891 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6892 {
6893 Action::CreateTableLikeToSelectInto
6894 }
6895 Expression::CreateTable(ref ct)
6896 if ct.columns.is_empty()
6897 && ct.constraints.iter().any(|c| {
6898 matches!(c, crate::expressions::TableConstraint::Like { .. })
6899 })
6900 && matches!(target, DialectType::ClickHouse) =>
6901 {
6902 Action::CreateTableLikeToAs
6903 }
6904 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6905 Expression::CreateTable(ref ct)
6906 if matches!(target, DialectType::DuckDB)
6907 && matches!(
6908 source,
6909 DialectType::DuckDB
6910 | DialectType::Spark
6911 | DialectType::Databricks
6912 | DialectType::Hive
6913 ) =>
6914 {
6915 let has_comment = ct.columns.iter().any(|c| {
6916 c.comment.is_some()
6917 || c.constraints.iter().any(|con| {
6918 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6919 })
6920 });
6921 let has_props = !ct.properties.is_empty();
6922 if has_comment || has_props {
6923 Action::CreateTableStripComment
6924 } else {
6925 Action::None
6926 }
6927 }
6928 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6929 Expression::Array(_)
6930 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6931 {
6932 Action::ArrayConcatBracketConvert
6933 }
6934 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6935 Expression::ArrayFunc(ref arr)
6936 if arr.bracket_notation
6937 && matches!(source, DialectType::BigQuery)
6938 && matches!(target, DialectType::Redshift) =>
6939 {
6940 Action::ArrayConcatBracketConvert
6941 }
6942 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6943 Expression::BitwiseOrAgg(ref f)
6944 | Expression::BitwiseAndAgg(ref f)
6945 | Expression::BitwiseXorAgg(ref f) => {
6946 if matches!(target, DialectType::DuckDB) {
6947 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
6948 if let Expression::Cast(ref c) = f.this {
6949 match &c.to {
6950 DataType::Float { .. }
6951 | DataType::Double { .. }
6952 | DataType::Decimal { .. } => Action::BitAggFloatCast,
6953 DataType::Custom { ref name }
6954 if name.eq_ignore_ascii_case("REAL") =>
6955 {
6956 Action::BitAggFloatCast
6957 }
6958 _ => Action::None,
6959 }
6960 } else {
6961 Action::None
6962 }
6963 } else if matches!(target, DialectType::Snowflake) {
6964 Action::BitAggSnowflakeRename
6965 } else {
6966 Action::None
6967 }
6968 }
6969 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
6970 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
6971 Action::FilterToIff
6972 }
6973 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6974 Expression::Avg(ref f)
6975 | Expression::Sum(ref f)
6976 | Expression::Min(ref f)
6977 | Expression::Max(ref f)
6978 | Expression::CountIf(ref f)
6979 | Expression::Stddev(ref f)
6980 | Expression::StddevPop(ref f)
6981 | Expression::StddevSamp(ref f)
6982 | Expression::Variance(ref f)
6983 | Expression::VarPop(ref f)
6984 | Expression::VarSamp(ref f)
6985 | Expression::Median(ref f)
6986 | Expression::Mode(ref f)
6987 | Expression::First(ref f)
6988 | Expression::Last(ref f)
6989 | Expression::ApproxDistinct(ref f)
6990 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6991 {
6992 Action::AggFilterToIff
6993 }
6994 Expression::Count(ref c)
6995 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6996 {
6997 Action::AggFilterToIff
6998 }
6999 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7000 Expression::Count(ref c)
7001 if c.distinct
7002 && matches!(&c.this, Some(Expression::Tuple(_)))
7003 && matches!(
7004 target,
7005 DialectType::Presto
7006 | DialectType::Trino
7007 | DialectType::DuckDB
7008 | DialectType::PostgreSQL
7009 ) =>
7010 {
7011 Action::CountDistinctMultiArg
7012 }
7013 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7014 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7015 Action::JsonToGetPath
7016 }
7017 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7018 Expression::Struct(_)
7019 if matches!(
7020 target,
7021 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7022 ) && matches!(source, DialectType::DuckDB) =>
7023 {
7024 Action::StructToRow
7025 }
7026 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7027 Expression::MapFunc(ref m)
7028 if m.curly_brace_syntax
7029 && matches!(
7030 target,
7031 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7032 )
7033 && matches!(source, DialectType::DuckDB) =>
7034 {
7035 Action::StructToRow
7036 }
7037 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7038 Expression::ApproxCountDistinct(_)
7039 if matches!(
7040 target,
7041 DialectType::Presto | DialectType::Trino | DialectType::Athena
7042 ) =>
7043 {
7044 Action::ApproxCountDistinctToApproxDistinct
7045 }
7046 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7047 Expression::ArrayContains(_)
7048 if matches!(
7049 target,
7050 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7051 ) =>
7052 {
7053 Action::ArrayContainsConvert
7054 }
7055 // StrPosition with position -> complex expansion for Presto/DuckDB
7056 // STRPOS doesn't support a position arg in these dialects
7057 Expression::StrPosition(ref sp)
7058 if sp.position.is_some()
7059 && matches!(
7060 target,
7061 DialectType::Presto
7062 | DialectType::Trino
7063 | DialectType::Athena
7064 | DialectType::DuckDB
7065 ) =>
7066 {
7067 Action::StrPositionExpand
7068 }
7069 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7070 Expression::First(ref f)
7071 if f.ignore_nulls == Some(true)
7072 && matches!(target, DialectType::DuckDB) =>
7073 {
7074 Action::FirstToAnyValue
7075 }
7076 // BEGIN -> START TRANSACTION for Presto/Trino
7077 Expression::Command(ref cmd)
7078 if cmd.this.eq_ignore_ascii_case("BEGIN")
7079 && matches!(
7080 target,
7081 DialectType::Presto | DialectType::Trino | DialectType::Athena
7082 ) =>
7083 {
7084 // Handled inline below
7085 Action::None // We'll handle it directly
7086 }
7087 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7088 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7089 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7090 Expression::Concat(ref _op)
7091 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7092 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7093 {
7094 Action::PipeConcatToConcat
7095 }
7096 _ => Action::None,
7097 }
7098 };
7099
7100 match action {
7101 Action::None => {
7102 // Handle inline transforms that don't need a dedicated action
7103
7104 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7105 if let Expression::Between(ref b) = e {
7106 if let Some(sym) = b.symmetric {
7107 let keeps_symmetric =
7108 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7109 if !keeps_symmetric {
7110 if sym {
7111 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7112 let b = if let Expression::Between(b) = e {
7113 *b
7114 } else {
7115 unreachable!()
7116 };
7117 let between1 = Expression::Between(Box::new(
7118 crate::expressions::Between {
7119 this: b.this.clone(),
7120 low: b.low.clone(),
7121 high: b.high.clone(),
7122 not: b.not,
7123 symmetric: None,
7124 },
7125 ));
7126 let between2 = Expression::Between(Box::new(
7127 crate::expressions::Between {
7128 this: b.this,
7129 low: b.high,
7130 high: b.low,
7131 not: b.not,
7132 symmetric: None,
7133 },
7134 ));
7135 return Ok(Expression::Paren(Box::new(
7136 crate::expressions::Paren {
7137 this: Expression::Or(Box::new(
7138 crate::expressions::BinaryOp::new(
7139 between1, between2,
7140 ),
7141 )),
7142 trailing_comments: vec![],
7143 },
7144 )));
7145 } else {
7146 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7147 let b = if let Expression::Between(b) = e {
7148 *b
7149 } else {
7150 unreachable!()
7151 };
7152 return Ok(Expression::Between(Box::new(
7153 crate::expressions::Between {
7154 this: b.this,
7155 low: b.low,
7156 high: b.high,
7157 not: b.not,
7158 symmetric: None,
7159 },
7160 )));
7161 }
7162 }
7163 }
7164 }
7165
7166 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7167 if let Expression::ILike(ref _like) = e {
7168 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7169 let like = if let Expression::ILike(l) = e {
7170 *l
7171 } else {
7172 unreachable!()
7173 };
7174 let lower_left = Expression::Function(Box::new(Function::new(
7175 "LOWER".to_string(),
7176 vec![like.left],
7177 )));
7178 let lower_right = Expression::Function(Box::new(Function::new(
7179 "LOWER".to_string(),
7180 vec![like.right],
7181 )));
7182 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7183 left: lower_left,
7184 right: lower_right,
7185 escape: like.escape,
7186 quantifier: like.quantifier,
7187 })));
7188 }
7189 }
7190
7191 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7192 if let Expression::MethodCall(ref mc) = e {
7193 if matches!(source, DialectType::Oracle)
7194 && mc.method.name.eq_ignore_ascii_case("VALUE")
7195 && mc.args.is_empty()
7196 {
7197 let is_dbms_random = match &mc.this {
7198 Expression::Identifier(id) => {
7199 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7200 }
7201 Expression::Column(col) => {
7202 col.table.is_none()
7203 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7204 }
7205 _ => false,
7206 };
7207 if is_dbms_random {
7208 let func_name = match target {
7209 DialectType::PostgreSQL
7210 | DialectType::Redshift
7211 | DialectType::DuckDB
7212 | DialectType::SQLite => "RANDOM",
7213 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7214 _ => "RAND",
7215 };
7216 return Ok(Expression::Function(Box::new(Function::new(
7217 func_name.to_string(),
7218 vec![],
7219 ))));
7220 }
7221 }
7222 }
7223 // TRIM without explicit position -> add BOTH for ClickHouse
7224 if let Expression::Trim(ref trim) = e {
7225 if matches!(target, DialectType::ClickHouse)
7226 && trim.sql_standard_syntax
7227 && trim.characters.is_some()
7228 && !trim.position_explicit
7229 {
7230 let mut new_trim = (**trim).clone();
7231 new_trim.position_explicit = true;
7232 return Ok(Expression::Trim(Box::new(new_trim)));
7233 }
7234 }
7235 // BEGIN -> START TRANSACTION for Presto/Trino
7236 if let Expression::Transaction(ref txn) = e {
7237 if matches!(
7238 target,
7239 DialectType::Presto | DialectType::Trino | DialectType::Athena
7240 ) {
7241 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7242 let mut txn = txn.clone();
7243 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7244 "START".to_string(),
7245 ))));
7246 return Ok(Expression::Transaction(Box::new(*txn)));
7247 }
7248 }
7249 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7250 if matches!(
7251 target,
7252 DialectType::Presto | DialectType::Trino | DialectType::Athena
7253 ) {
7254 match &e {
7255 Expression::IsTrue(itf) if !itf.not => {
7256 // x IS TRUE -> x
7257 return Ok(itf.this.clone());
7258 }
7259 Expression::IsTrue(itf) if itf.not => {
7260 // x IS NOT TRUE -> NOT x
7261 return Ok(Expression::Not(Box::new(
7262 crate::expressions::UnaryOp {
7263 this: itf.this.clone(),
7264 },
7265 )));
7266 }
7267 Expression::IsFalse(itf) if !itf.not => {
7268 // x IS FALSE -> NOT x
7269 return Ok(Expression::Not(Box::new(
7270 crate::expressions::UnaryOp {
7271 this: itf.this.clone(),
7272 },
7273 )));
7274 }
7275 Expression::IsFalse(itf) if itf.not => {
7276 // x IS NOT FALSE -> NOT NOT x
7277 let not_x =
7278 Expression::Not(Box::new(crate::expressions::UnaryOp {
7279 this: itf.this.clone(),
7280 }));
7281 return Ok(Expression::Not(Box::new(
7282 crate::expressions::UnaryOp { this: not_x },
7283 )));
7284 }
7285 _ => {}
7286 }
7287 }
7288 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7289 if matches!(target, DialectType::Redshift) {
7290 if let Expression::IsFalse(ref itf) = e {
7291 if itf.not {
7292 return Ok(Expression::Not(Box::new(
7293 crate::expressions::UnaryOp {
7294 this: Expression::IsFalse(Box::new(
7295 crate::expressions::IsTrueFalse {
7296 this: itf.this.clone(),
7297 not: false,
7298 },
7299 )),
7300 },
7301 )));
7302 }
7303 }
7304 }
7305 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7306 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7307 if let Expression::Function(ref f) = e {
7308 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7309 && matches!(source, DialectType::Snowflake)
7310 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7311 {
7312 if f.args.len() == 3 {
7313 let mut args = f.args.clone();
7314 args.push(Expression::string("g"));
7315 return Ok(Expression::Function(Box::new(Function::new(
7316 "REGEXP_REPLACE".to_string(),
7317 args,
7318 ))));
7319 } else if f.args.len() == 4 {
7320 // 4th arg might be position, add 'g' as 5th
7321 let mut args = f.args.clone();
7322 args.push(Expression::string("g"));
7323 return Ok(Expression::Function(Box::new(Function::new(
7324 "REGEXP_REPLACE".to_string(),
7325 args,
7326 ))));
7327 }
7328 }
7329 }
7330 Ok(e)
7331 }
7332
7333 Action::GreatestLeastNull => {
7334 let f = if let Expression::Function(f) = e {
7335 *f
7336 } else {
7337 unreachable!("action only triggered for Function expressions")
7338 };
7339 let mut null_checks: Vec<Expression> = f
7340 .args
7341 .iter()
7342 .map(|a| {
7343 Expression::IsNull(Box::new(IsNull {
7344 this: a.clone(),
7345 not: false,
7346 postfix_form: false,
7347 }))
7348 })
7349 .collect();
7350 let condition = if null_checks.len() == 1 {
7351 null_checks.remove(0)
7352 } else {
7353 let first = null_checks.remove(0);
7354 null_checks.into_iter().fold(first, |acc, check| {
7355 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7356 })
7357 };
7358 Ok(Expression::Case(Box::new(Case {
7359 operand: None,
7360 whens: vec![(condition, Expression::Null(Null))],
7361 else_: Some(Expression::Function(Box::new(Function::new(
7362 f.name, f.args,
7363 )))),
7364 comments: Vec::new(),
7365 })))
7366 }
7367
7368 Action::ArrayGenerateRange => {
7369 let f = if let Expression::Function(f) = e {
7370 *f
7371 } else {
7372 unreachable!("action only triggered for Function expressions")
7373 };
7374 let start = f.args[0].clone();
7375 let end = f.args[1].clone();
7376 let step = f.args.get(2).cloned();
7377
7378 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7379 end.clone(),
7380 Expression::number(1),
7381 )));
7382
7383 match target {
7384 DialectType::PostgreSQL | DialectType::Redshift => {
7385 let mut args = vec![start, end_minus_1];
7386 if let Some(s) = step {
7387 args.push(s);
7388 }
7389 Ok(Expression::Function(Box::new(Function::new(
7390 "GENERATE_SERIES".to_string(),
7391 args,
7392 ))))
7393 }
7394 DialectType::Presto | DialectType::Trino => {
7395 let mut args = vec![start, end_minus_1];
7396 if let Some(s) = step {
7397 args.push(s);
7398 }
7399 Ok(Expression::Function(Box::new(Function::new(
7400 "SEQUENCE".to_string(),
7401 args,
7402 ))))
7403 }
7404 DialectType::BigQuery => {
7405 let mut args = vec![start, end_minus_1];
7406 if let Some(s) = step {
7407 args.push(s);
7408 }
7409 Ok(Expression::Function(Box::new(Function::new(
7410 "GENERATE_ARRAY".to_string(),
7411 args,
7412 ))))
7413 }
7414 DialectType::Snowflake => {
7415 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7416 Expression::Paren(Box::new(Paren {
7417 this: end_minus_1,
7418 trailing_comments: vec![],
7419 })),
7420 Expression::number(1),
7421 )));
7422 let mut args = vec![start, normalized_end];
7423 if let Some(s) = step {
7424 args.push(s);
7425 }
7426 Ok(Expression::Function(Box::new(Function::new(
7427 "ARRAY_GENERATE_RANGE".to_string(),
7428 args,
7429 ))))
7430 }
7431 _ => Ok(Expression::Function(Box::new(Function::new(
7432 f.name, f.args,
7433 )))),
7434 }
7435 }
7436
7437 Action::Div0TypedDivision => {
7438 let if_func = if let Expression::IfFunc(f) = e {
7439 *f
7440 } else {
7441 unreachable!("action only triggered for IfFunc expressions")
7442 };
7443 if let Some(Expression::Div(div)) = if_func.false_value {
7444 let cast_type = if matches!(target, DialectType::SQLite) {
7445 DataType::Float {
7446 precision: None,
7447 scale: None,
7448 real_spelling: true,
7449 }
7450 } else {
7451 DataType::Double {
7452 precision: None,
7453 scale: None,
7454 }
7455 };
7456 let casted_left = Expression::Cast(Box::new(Cast {
7457 this: div.left,
7458 to: cast_type,
7459 trailing_comments: vec![],
7460 double_colon_syntax: false,
7461 format: None,
7462 default: None,
7463 }));
7464 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7465 condition: if_func.condition,
7466 true_value: if_func.true_value,
7467 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7468 casted_left,
7469 div.right,
7470 )))),
7471 original_name: if_func.original_name,
7472 })))
7473 } else {
7474 // Not actually a Div, reconstruct
7475 Ok(Expression::IfFunc(Box::new(if_func)))
7476 }
7477 }
7478
7479 Action::ArrayAggCollectList => {
7480 let agg = if let Expression::ArrayAgg(a) = e {
7481 *a
7482 } else {
7483 unreachable!("action only triggered for ArrayAgg expressions")
7484 };
7485 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7486 name: Some("COLLECT_LIST".to_string()),
7487 ..agg
7488 })))
7489 }
7490
7491 Action::ArrayAggWithinGroupFilter => {
7492 let wg = if let Expression::WithinGroup(w) = e {
7493 *w
7494 } else {
7495 unreachable!("action only triggered for WithinGroup expressions")
7496 };
7497 if let Expression::ArrayAgg(inner_agg) = wg.this {
7498 let col = inner_agg.this.clone();
7499 let filter = Expression::IsNull(Box::new(IsNull {
7500 this: col,
7501 not: true,
7502 postfix_form: false,
7503 }));
7504 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7505 let order_by = if matches!(target, DialectType::DuckDB) {
7506 wg.order_by
7507 .into_iter()
7508 .map(|mut o| {
7509 if o.desc && o.nulls_first.is_none() {
7510 o.nulls_first = Some(true);
7511 }
7512 o
7513 })
7514 .collect()
7515 } else {
7516 wg.order_by
7517 };
7518 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7519 this: inner_agg.this,
7520 distinct: inner_agg.distinct,
7521 filter: Some(filter),
7522 order_by,
7523 name: inner_agg.name,
7524 ignore_nulls: inner_agg.ignore_nulls,
7525 having_max: inner_agg.having_max,
7526 limit: inner_agg.limit,
7527 })))
7528 } else {
7529 Ok(Expression::WithinGroup(Box::new(wg)))
7530 }
7531 }
7532
7533 Action::ArrayAggFilter => {
7534 let agg = if let Expression::ArrayAgg(a) = e {
7535 *a
7536 } else {
7537 unreachable!("action only triggered for ArrayAgg expressions")
7538 };
7539 let col = agg.this.clone();
7540 let filter = Expression::IsNull(Box::new(IsNull {
7541 this: col,
7542 not: true,
7543 postfix_form: false,
7544 }));
7545 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7546 filter: Some(filter),
7547 ..agg
7548 })))
7549 }
7550
7551 Action::ArrayAggNullFilter => {
7552 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7553 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7554 let agg = if let Expression::ArrayAgg(a) = e {
7555 *a
7556 } else {
7557 unreachable!("action only triggered for ArrayAgg expressions")
7558 };
7559 let col = agg.this.clone();
7560 let not_null = Expression::IsNull(Box::new(IsNull {
7561 this: col,
7562 not: true,
7563 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7564 }));
7565 let new_filter = if let Some(existing_filter) = agg.filter {
7566 // AND the NOT IS NULL with existing filter
7567 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7568 existing_filter,
7569 not_null,
7570 )))
7571 } else {
7572 not_null
7573 };
7574 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7575 filter: Some(new_filter),
7576 ..agg
7577 })))
7578 }
7579
7580 Action::BigQueryArraySelectAsStructToSnowflake => {
7581 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7582 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7583 if let Expression::Function(mut f) = e {
7584 let is_match = f.args.len() == 1
7585 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7586 if is_match {
7587 let inner_select = match f.args.remove(0) {
7588 Expression::Select(s) => *s,
7589 _ => unreachable!(
7590 "argument already verified to be a Select expression"
7591 ),
7592 };
7593 // Build OBJECT_CONSTRUCT args from SELECT expressions
7594 let mut oc_args = Vec::new();
7595 for expr in &inner_select.expressions {
7596 match expr {
7597 Expression::Alias(a) => {
7598 let key = Expression::Literal(Literal::String(
7599 a.alias.name.clone(),
7600 ));
7601 let value = a.this.clone();
7602 oc_args.push(key);
7603 oc_args.push(value);
7604 }
7605 Expression::Column(c) => {
7606 let key = Expression::Literal(Literal::String(
7607 c.name.name.clone(),
7608 ));
7609 oc_args.push(key);
7610 oc_args.push(expr.clone());
7611 }
7612 _ => {
7613 oc_args.push(expr.clone());
7614 }
7615 }
7616 }
7617 let object_construct = Expression::Function(Box::new(Function::new(
7618 "OBJECT_CONSTRUCT".to_string(),
7619 oc_args,
7620 )));
7621 let array_agg = Expression::Function(Box::new(Function::new(
7622 "ARRAY_AGG".to_string(),
7623 vec![object_construct],
7624 )));
7625 let mut new_select = crate::expressions::Select::new();
7626 new_select.expressions = vec![array_agg];
7627 new_select.from = inner_select.from.clone();
7628 new_select.where_clause = inner_select.where_clause.clone();
7629 new_select.group_by = inner_select.group_by.clone();
7630 new_select.having = inner_select.having.clone();
7631 new_select.joins = inner_select.joins.clone();
7632 Ok(Expression::Subquery(Box::new(
7633 crate::expressions::Subquery {
7634 this: Expression::Select(Box::new(new_select)),
7635 alias: None,
7636 column_aliases: Vec::new(),
7637 order_by: None,
7638 limit: None,
7639 offset: None,
7640 distribute_by: None,
7641 sort_by: None,
7642 cluster_by: None,
7643 lateral: false,
7644 modifiers_inside: false,
7645 trailing_comments: Vec::new(),
7646 },
7647 )))
7648 } else {
7649 Ok(Expression::Function(f))
7650 }
7651 } else {
7652 Ok(e)
7653 }
7654 }
7655
7656 Action::BigQueryPercentileContToDuckDB => {
7657 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7658 if let Expression::AggregateFunction(mut af) = e {
7659 af.name = "QUANTILE_CONT".to_string();
7660 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7661 // Keep only first 2 args
7662 if af.args.len() > 2 {
7663 af.args.truncate(2);
7664 }
7665 Ok(Expression::AggregateFunction(af))
7666 } else {
7667 Ok(e)
7668 }
7669 }
7670
7671 Action::ArrayAggIgnoreNullsDuckDB => {
7672 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7673 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7674 let mut agg = if let Expression::ArrayAgg(a) = e {
7675 *a
7676 } else {
7677 unreachable!("action only triggered for ArrayAgg expressions")
7678 };
7679 agg.ignore_nulls = None; // Strip IGNORE NULLS
7680 if !agg.order_by.is_empty() {
7681 agg.order_by[0].nulls_first = Some(true);
7682 }
7683 Ok(Expression::ArrayAgg(Box::new(agg)))
7684 }
7685
7686 Action::CountDistinctMultiArg => {
7687 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7688 if let Expression::Count(c) = e {
7689 if let Some(Expression::Tuple(t)) = c.this {
7690 let args = t.expressions;
7691 // Build CASE expression:
7692 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7693 let mut whens = Vec::new();
7694 for arg in &args {
7695 whens.push((
7696 Expression::IsNull(Box::new(IsNull {
7697 this: arg.clone(),
7698 not: false,
7699 postfix_form: false,
7700 })),
7701 Expression::Null(crate::expressions::Null),
7702 ));
7703 }
7704 // Build the tuple for ELSE
7705 let tuple_expr =
7706 Expression::Tuple(Box::new(crate::expressions::Tuple {
7707 expressions: args,
7708 }));
7709 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7710 operand: None,
7711 whens,
7712 else_: Some(tuple_expr),
7713 comments: Vec::new(),
7714 }));
7715 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7716 this: Some(case_expr),
7717 star: false,
7718 distinct: true,
7719 filter: c.filter,
7720 ignore_nulls: c.ignore_nulls,
7721 original_name: c.original_name,
7722 })))
7723 } else {
7724 Ok(Expression::Count(c))
7725 }
7726 } else {
7727 Ok(e)
7728 }
7729 }
7730
7731 Action::CastTimestampToDatetime => {
7732 let c = if let Expression::Cast(c) = e {
7733 *c
7734 } else {
7735 unreachable!("action only triggered for Cast expressions")
7736 };
7737 Ok(Expression::Cast(Box::new(Cast {
7738 to: DataType::Custom {
7739 name: "DATETIME".to_string(),
7740 },
7741 ..c
7742 })))
7743 }
7744
7745 Action::CastTimestampStripTz => {
7746 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7747 let c = if let Expression::Cast(c) = e {
7748 *c
7749 } else {
7750 unreachable!("action only triggered for Cast expressions")
7751 };
7752 Ok(Expression::Cast(Box::new(Cast {
7753 to: DataType::Timestamp {
7754 precision: None,
7755 timezone: false,
7756 },
7757 ..c
7758 })))
7759 }
7760
7761 Action::CastTimestamptzToFunc => {
7762 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7763 let c = if let Expression::Cast(c) = e {
7764 *c
7765 } else {
7766 unreachable!("action only triggered for Cast expressions")
7767 };
7768 Ok(Expression::Function(Box::new(Function::new(
7769 "TIMESTAMP".to_string(),
7770 vec![c.this],
7771 ))))
7772 }
7773
7774 Action::ToDateToCast => {
7775 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7776 if let Expression::Function(f) = e {
7777 let arg = f.args.into_iter().next().unwrap();
7778 Ok(Expression::Cast(Box::new(Cast {
7779 this: arg,
7780 to: DataType::Date,
7781 double_colon_syntax: false,
7782 trailing_comments: vec![],
7783 format: None,
7784 default: None,
7785 })))
7786 } else {
7787 Ok(e)
7788 }
7789 }
7790 Action::DateTruncWrapCast => {
7791 // Handle both Expression::DateTrunc/TimestampTrunc and
7792 // Expression::Function("DATE_TRUNC", [unit, expr])
7793 match e {
7794 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7795 let input_type = match &d.this {
7796 Expression::Cast(c) => Some(c.to.clone()),
7797 _ => None,
7798 };
7799 if let Some(cast_type) = input_type {
7800 let is_time = matches!(cast_type, DataType::Time { .. });
7801 if is_time {
7802 let date_expr = Expression::Cast(Box::new(Cast {
7803 this: Expression::Literal(
7804 crate::expressions::Literal::String(
7805 "1970-01-01".to_string(),
7806 ),
7807 ),
7808 to: DataType::Date,
7809 double_colon_syntax: false,
7810 trailing_comments: vec![],
7811 format: None,
7812 default: None,
7813 }));
7814 let add_expr =
7815 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7816 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7817 this: add_expr,
7818 unit: d.unit,
7819 }));
7820 Ok(Expression::Cast(Box::new(Cast {
7821 this: inner,
7822 to: cast_type,
7823 double_colon_syntax: false,
7824 trailing_comments: vec![],
7825 format: None,
7826 default: None,
7827 })))
7828 } else {
7829 let inner = Expression::DateTrunc(Box::new(*d));
7830 Ok(Expression::Cast(Box::new(Cast {
7831 this: inner,
7832 to: cast_type,
7833 double_colon_syntax: false,
7834 trailing_comments: vec![],
7835 format: None,
7836 default: None,
7837 })))
7838 }
7839 } else {
7840 Ok(Expression::DateTrunc(d))
7841 }
7842 }
7843 Expression::Function(f) if f.args.len() == 2 => {
7844 // Function-based DATE_TRUNC(unit, expr)
7845 let input_type = match &f.args[1] {
7846 Expression::Cast(c) => Some(c.to.clone()),
7847 _ => None,
7848 };
7849 if let Some(cast_type) = input_type {
7850 let is_time = matches!(cast_type, DataType::Time { .. });
7851 if is_time {
7852 let date_expr = Expression::Cast(Box::new(Cast {
7853 this: Expression::Literal(
7854 crate::expressions::Literal::String(
7855 "1970-01-01".to_string(),
7856 ),
7857 ),
7858 to: DataType::Date,
7859 double_colon_syntax: false,
7860 trailing_comments: vec![],
7861 format: None,
7862 default: None,
7863 }));
7864 let mut args = f.args;
7865 let unit_arg = args.remove(0);
7866 let time_expr = args.remove(0);
7867 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7868 date_expr, time_expr,
7869 )));
7870 let inner = Expression::Function(Box::new(Function::new(
7871 "DATE_TRUNC".to_string(),
7872 vec![unit_arg, add_expr],
7873 )));
7874 Ok(Expression::Cast(Box::new(Cast {
7875 this: inner,
7876 to: cast_type,
7877 double_colon_syntax: false,
7878 trailing_comments: vec![],
7879 format: None,
7880 default: None,
7881 })))
7882 } else {
7883 // Wrap the function in CAST
7884 Ok(Expression::Cast(Box::new(Cast {
7885 this: Expression::Function(f),
7886 to: cast_type,
7887 double_colon_syntax: false,
7888 trailing_comments: vec![],
7889 format: None,
7890 default: None,
7891 })))
7892 }
7893 } else {
7894 Ok(Expression::Function(f))
7895 }
7896 }
7897 other => Ok(other),
7898 }
7899 }
7900
7901 Action::RegexpReplaceSnowflakeToDuckDB => {
7902 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7903 if let Expression::Function(f) = e {
7904 let mut args = f.args;
7905 let subject = args.remove(0);
7906 let pattern = args.remove(0);
7907 let replacement = args.remove(0);
7908 Ok(Expression::Function(Box::new(Function::new(
7909 "REGEXP_REPLACE".to_string(),
7910 vec![
7911 subject,
7912 pattern,
7913 replacement,
7914 Expression::Literal(crate::expressions::Literal::String(
7915 "g".to_string(),
7916 )),
7917 ],
7918 ))))
7919 } else {
7920 Ok(e)
7921 }
7922 }
7923
7924 Action::SetToVariable => {
7925 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7926 if let Expression::SetStatement(mut s) = e {
7927 for item in &mut s.items {
7928 if item.kind.is_none() {
7929 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
7930 let already_variable = match &item.name {
7931 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
7932 _ => false,
7933 };
7934 if already_variable {
7935 // Extract the actual name and set kind
7936 if let Expression::Identifier(ref mut id) = item.name {
7937 let actual_name = id.name["VARIABLE ".len()..].to_string();
7938 id.name = actual_name;
7939 }
7940 }
7941 item.kind = Some("VARIABLE".to_string());
7942 }
7943 }
7944 Ok(Expression::SetStatement(s))
7945 } else {
7946 Ok(e)
7947 }
7948 }
7949
7950 Action::ConvertTimezoneToExpr => {
7951 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
7952 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
7953 if let Expression::Function(f) = e {
7954 if f.args.len() == 2 {
7955 let mut args = f.args;
7956 let target_tz = args.remove(0);
7957 let timestamp = args.remove(0);
7958 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7959 source_tz: None,
7960 target_tz: Some(Box::new(target_tz)),
7961 timestamp: Some(Box::new(timestamp)),
7962 options: vec![],
7963 })))
7964 } else if f.args.len() == 3 {
7965 let mut args = f.args;
7966 let source_tz = args.remove(0);
7967 let target_tz = args.remove(0);
7968 let timestamp = args.remove(0);
7969 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7970 source_tz: Some(Box::new(source_tz)),
7971 target_tz: Some(Box::new(target_tz)),
7972 timestamp: Some(Box::new(timestamp)),
7973 options: vec![],
7974 })))
7975 } else {
7976 Ok(Expression::Function(f))
7977 }
7978 } else {
7979 Ok(e)
7980 }
7981 }
7982
7983 Action::BigQueryCastType => {
7984 // Convert BigQuery types to standard SQL types
7985 if let Expression::DataType(dt) = e {
7986 match dt {
7987 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
7988 Ok(Expression::DataType(DataType::BigInt { length: None }))
7989 }
7990 DataType::Custom { ref name }
7991 if name.eq_ignore_ascii_case("FLOAT64") =>
7992 {
7993 Ok(Expression::DataType(DataType::Double {
7994 precision: None,
7995 scale: None,
7996 }))
7997 }
7998 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
7999 Ok(Expression::DataType(DataType::Boolean))
8000 }
8001 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
8002 Ok(Expression::DataType(DataType::VarBinary { length: None }))
8003 }
8004 DataType::Custom { ref name }
8005 if name.eq_ignore_ascii_case("NUMERIC") =>
8006 {
8007 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
8008 // default precision (18, 3) being added to bare DECIMAL
8009 if matches!(target, DialectType::DuckDB) {
8010 Ok(Expression::DataType(DataType::Custom {
8011 name: "DECIMAL".to_string(),
8012 }))
8013 } else {
8014 Ok(Expression::DataType(DataType::Decimal {
8015 precision: None,
8016 scale: None,
8017 }))
8018 }
8019 }
8020 DataType::Custom { ref name }
8021 if name.eq_ignore_ascii_case("STRING") =>
8022 {
8023 Ok(Expression::DataType(DataType::String { length: None }))
8024 }
8025 DataType::Custom { ref name }
8026 if name.eq_ignore_ascii_case("DATETIME") =>
8027 {
8028 Ok(Expression::DataType(DataType::Timestamp {
8029 precision: None,
8030 timezone: false,
8031 }))
8032 }
8033 _ => Ok(Expression::DataType(dt)),
8034 }
8035 } else {
8036 Ok(e)
8037 }
8038 }
8039
8040 Action::BigQuerySafeDivide => {
8041 // Convert SafeDivide expression to IF/CASE form for most targets
8042 if let Expression::SafeDivide(sd) = e {
8043 let x = *sd.this;
8044 let y = *sd.expression;
8045 // Wrap x and y in parens if they're complex expressions
8046 let y_ref = match &y {
8047 Expression::Column(_)
8048 | Expression::Literal(_)
8049 | Expression::Identifier(_) => y.clone(),
8050 _ => Expression::Paren(Box::new(Paren {
8051 this: y.clone(),
8052 trailing_comments: vec![],
8053 })),
8054 };
8055 let x_ref = match &x {
8056 Expression::Column(_)
8057 | Expression::Literal(_)
8058 | Expression::Identifier(_) => x.clone(),
8059 _ => Expression::Paren(Box::new(Paren {
8060 this: x.clone(),
8061 trailing_comments: vec![],
8062 })),
8063 };
8064 let condition = Expression::Neq(Box::new(BinaryOp::new(
8065 y_ref.clone(),
8066 Expression::number(0),
8067 )));
8068 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
8069
8070 if matches!(target, DialectType::Presto | DialectType::Trino) {
8071 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
8072 let cast_x = Expression::Cast(Box::new(Cast {
8073 this: match &x {
8074 Expression::Column(_)
8075 | Expression::Literal(_)
8076 | Expression::Identifier(_) => x,
8077 _ => Expression::Paren(Box::new(Paren {
8078 this: x,
8079 trailing_comments: vec![],
8080 })),
8081 },
8082 to: DataType::Double {
8083 precision: None,
8084 scale: None,
8085 },
8086 trailing_comments: vec![],
8087 double_colon_syntax: false,
8088 format: None,
8089 default: None,
8090 }));
8091 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8092 cast_x,
8093 match &y {
8094 Expression::Column(_)
8095 | Expression::Literal(_)
8096 | Expression::Identifier(_) => y,
8097 _ => Expression::Paren(Box::new(Paren {
8098 this: y,
8099 trailing_comments: vec![],
8100 })),
8101 },
8102 )));
8103 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8104 condition,
8105 true_value: cast_div,
8106 false_value: Some(Expression::Null(Null)),
8107 original_name: None,
8108 })))
8109 } else if matches!(target, DialectType::PostgreSQL) {
8110 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8111 let cast_x = Expression::Cast(Box::new(Cast {
8112 this: match &x {
8113 Expression::Column(_)
8114 | Expression::Literal(_)
8115 | Expression::Identifier(_) => x,
8116 _ => Expression::Paren(Box::new(Paren {
8117 this: x,
8118 trailing_comments: vec![],
8119 })),
8120 },
8121 to: DataType::Custom {
8122 name: "DOUBLE PRECISION".to_string(),
8123 },
8124 trailing_comments: vec![],
8125 double_colon_syntax: false,
8126 format: None,
8127 default: None,
8128 }));
8129 let y_paren = match &y {
8130 Expression::Column(_)
8131 | Expression::Literal(_)
8132 | Expression::Identifier(_) => y,
8133 _ => Expression::Paren(Box::new(Paren {
8134 this: y,
8135 trailing_comments: vec![],
8136 })),
8137 };
8138 let cast_div =
8139 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8140 Ok(Expression::Case(Box::new(Case {
8141 operand: None,
8142 whens: vec![(condition, cast_div)],
8143 else_: Some(Expression::Null(Null)),
8144 comments: Vec::new(),
8145 })))
8146 } else if matches!(target, DialectType::DuckDB) {
8147 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8148 Ok(Expression::Case(Box::new(Case {
8149 operand: None,
8150 whens: vec![(condition, div_expr)],
8151 else_: Some(Expression::Null(Null)),
8152 comments: Vec::new(),
8153 })))
8154 } else if matches!(target, DialectType::Snowflake) {
8155 // Snowflake: IFF(y <> 0, x / y, NULL)
8156 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8157 condition,
8158 true_value: div_expr,
8159 false_value: Some(Expression::Null(Null)),
8160 original_name: Some("IFF".to_string()),
8161 })))
8162 } else {
8163 // All others: IF(y <> 0, x / y, NULL)
8164 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8165 condition,
8166 true_value: div_expr,
8167 false_value: Some(Expression::Null(Null)),
8168 original_name: None,
8169 })))
8170 }
8171 } else {
8172 Ok(e)
8173 }
8174 }
8175
8176 Action::BigQueryLastDayStripUnit => {
8177 if let Expression::LastDay(mut ld) = e {
8178 ld.unit = None; // Strip the unit (MONTH is default)
8179 match target {
8180 DialectType::PostgreSQL => {
8181 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8182 let date_trunc = Expression::Function(Box::new(Function::new(
8183 "DATE_TRUNC".to_string(),
8184 vec![
8185 Expression::Literal(crate::expressions::Literal::String(
8186 "MONTH".to_string(),
8187 )),
8188 ld.this.clone(),
8189 ],
8190 )));
8191 let plus_month =
8192 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8193 date_trunc,
8194 Expression::Interval(Box::new(
8195 crate::expressions::Interval {
8196 this: Some(Expression::Literal(
8197 crate::expressions::Literal::String(
8198 "1 MONTH".to_string(),
8199 ),
8200 )),
8201 unit: None,
8202 },
8203 )),
8204 )));
8205 let minus_day =
8206 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8207 plus_month,
8208 Expression::Interval(Box::new(
8209 crate::expressions::Interval {
8210 this: Some(Expression::Literal(
8211 crate::expressions::Literal::String(
8212 "1 DAY".to_string(),
8213 ),
8214 )),
8215 unit: None,
8216 },
8217 )),
8218 )));
8219 Ok(Expression::Cast(Box::new(Cast {
8220 this: minus_day,
8221 to: DataType::Date,
8222 trailing_comments: vec![],
8223 double_colon_syntax: false,
8224 format: None,
8225 default: None,
8226 })))
8227 }
8228 DialectType::Presto => {
8229 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8230 Ok(Expression::Function(Box::new(Function::new(
8231 "LAST_DAY_OF_MONTH".to_string(),
8232 vec![ld.this],
8233 ))))
8234 }
8235 DialectType::ClickHouse => {
8236 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8237 // Need to wrap the DATE type in Nullable
8238 let nullable_date = match ld.this {
8239 Expression::Cast(mut c) => {
8240 c.to = DataType::Nullable {
8241 inner: Box::new(DataType::Date),
8242 };
8243 Expression::Cast(c)
8244 }
8245 other => other,
8246 };
8247 ld.this = nullable_date;
8248 Ok(Expression::LastDay(ld))
8249 }
8250 _ => Ok(Expression::LastDay(ld)),
8251 }
8252 } else {
8253 Ok(e)
8254 }
8255 }
8256
8257 Action::BigQueryCastFormat => {
8258 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8259 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8260 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8261 let (this, to, format_expr, is_safe) = match e {
8262 Expression::Cast(ref c) if c.format.is_some() => (
8263 c.this.clone(),
8264 c.to.clone(),
8265 c.format.as_ref().unwrap().as_ref().clone(),
8266 false,
8267 ),
8268 Expression::SafeCast(ref c) if c.format.is_some() => (
8269 c.this.clone(),
8270 c.to.clone(),
8271 c.format.as_ref().unwrap().as_ref().clone(),
8272 true,
8273 ),
8274 _ => return Ok(e),
8275 };
8276 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8277 if matches!(target, DialectType::BigQuery) {
8278 match &to {
8279 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8280 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8281 return Ok(e);
8282 }
8283 _ => {}
8284 }
8285 }
8286 // Extract timezone from format if AT TIME ZONE is present
8287 let (actual_format_expr, timezone) = match &format_expr {
8288 Expression::AtTimeZone(ref atz) => {
8289 (atz.this.clone(), Some(atz.zone.clone()))
8290 }
8291 _ => (format_expr.clone(), None),
8292 };
8293 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8294 match target {
8295 DialectType::BigQuery => {
8296 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8297 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8298 let func_name = match &to {
8299 DataType::Date => "PARSE_DATE",
8300 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8301 DataType::Time { .. } => "PARSE_TIMESTAMP",
8302 _ => "PARSE_TIMESTAMP",
8303 };
8304 let mut func_args = vec![strftime_fmt, this];
8305 if let Some(tz) = timezone {
8306 func_args.push(tz);
8307 }
8308 Ok(Expression::Function(Box::new(Function::new(
8309 func_name.to_string(),
8310 func_args,
8311 ))))
8312 }
8313 DialectType::DuckDB => {
8314 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8315 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8316 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8317 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8318 let parse_call = Expression::Function(Box::new(Function::new(
8319 parse_fn_name.to_string(),
8320 vec![this, duck_fmt],
8321 )));
8322 Ok(Expression::Cast(Box::new(Cast {
8323 this: parse_call,
8324 to,
8325 trailing_comments: vec![],
8326 double_colon_syntax: false,
8327 format: None,
8328 default: None,
8329 })))
8330 }
8331 _ => Ok(e),
8332 }
8333 }
8334
8335 Action::BigQueryFunctionNormalize => {
8336 Self::normalize_bigquery_function(e, source, target)
8337 }
8338
8339 Action::BigQueryToHexBare => {
8340 // Not used anymore - handled directly in normalize_bigquery_function
8341 Ok(e)
8342 }
8343
8344 Action::BigQueryToHexLower => {
8345 if let Expression::Lower(uf) = e {
8346 match uf.this {
8347 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8348 Expression::Function(f)
8349 if matches!(target, DialectType::BigQuery)
8350 && f.name == "TO_HEX" =>
8351 {
8352 Ok(Expression::Function(f))
8353 }
8354 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8355 Expression::Lower(inner_uf) => {
8356 if matches!(target, DialectType::BigQuery) {
8357 // BQ->BQ: extract TO_HEX
8358 if let Expression::Function(f) = inner_uf.this {
8359 Ok(Expression::Function(Box::new(Function::new(
8360 "TO_HEX".to_string(),
8361 f.args,
8362 ))))
8363 } else {
8364 Ok(Expression::Lower(inner_uf))
8365 }
8366 } else {
8367 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8368 Ok(Expression::Lower(inner_uf))
8369 }
8370 }
8371 other => {
8372 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8373 this: other,
8374 original_name: None,
8375 })))
8376 }
8377 }
8378 } else {
8379 Ok(e)
8380 }
8381 }
8382
8383 Action::BigQueryToHexUpper => {
8384 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8385 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8386 if let Expression::Upper(uf) = e {
8387 if let Expression::Lower(inner_uf) = uf.this {
8388 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8389 if matches!(target, DialectType::BigQuery) {
8390 // Restore TO_HEX name in inner function
8391 if let Expression::Function(f) = inner_uf.this {
8392 let restored = Expression::Function(Box::new(Function::new(
8393 "TO_HEX".to_string(),
8394 f.args,
8395 )));
8396 Ok(Expression::Upper(Box::new(
8397 crate::expressions::UnaryFunc::new(restored),
8398 )))
8399 } else {
8400 Ok(Expression::Upper(inner_uf))
8401 }
8402 } else {
8403 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8404 Ok(inner_uf.this)
8405 }
8406 } else {
8407 Ok(Expression::Upper(uf))
8408 }
8409 } else {
8410 Ok(e)
8411 }
8412 }
8413
8414 Action::BigQueryAnyValueHaving => {
8415 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8416 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8417 if let Expression::AnyValue(agg) = e {
8418 if let Some((having_expr, is_max)) = agg.having_max {
8419 let func_name = if is_max {
8420 "ARG_MAX_NULL"
8421 } else {
8422 "ARG_MIN_NULL"
8423 };
8424 Ok(Expression::Function(Box::new(Function::new(
8425 func_name.to_string(),
8426 vec![agg.this, *having_expr],
8427 ))))
8428 } else {
8429 Ok(Expression::AnyValue(agg))
8430 }
8431 } else {
8432 Ok(e)
8433 }
8434 }
8435
8436 Action::BigQueryApproxQuantiles => {
8437 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8438 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8439 if let Expression::AggregateFunction(agg) = e {
8440 if agg.args.len() >= 2 {
8441 let x_expr = agg.args[0].clone();
8442 let n_expr = &agg.args[1];
8443
8444 // Extract the numeric value from n_expr
8445 let n = match n_expr {
8446 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8447 s.parse::<usize>().unwrap_or(2)
8448 }
8449 _ => 2,
8450 };
8451
8452 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8453 let mut quantiles = Vec::new();
8454 for i in 0..=n {
8455 let q = i as f64 / n as f64;
8456 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8457 if q == 0.0 {
8458 quantiles.push(Expression::number(0));
8459 } else if q == 1.0 {
8460 quantiles.push(Expression::number(1));
8461 } else {
8462 quantiles.push(Expression::Literal(
8463 crate::expressions::Literal::Number(format!("{}", q)),
8464 ));
8465 }
8466 }
8467
8468 let array_expr =
8469 Expression::Array(Box::new(crate::expressions::Array {
8470 expressions: quantiles,
8471 }));
8472
8473 // Preserve DISTINCT modifier
8474 let mut new_func = Function::new(
8475 "APPROX_QUANTILE".to_string(),
8476 vec![x_expr, array_expr],
8477 );
8478 new_func.distinct = agg.distinct;
8479 Ok(Expression::Function(Box::new(new_func)))
8480 } else {
8481 Ok(Expression::AggregateFunction(agg))
8482 }
8483 } else {
8484 Ok(e)
8485 }
8486 }
8487
8488 Action::GenericFunctionNormalize => {
8489 // Helper closure to convert ARBITRARY to target-specific function
8490 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8491 let name = match target {
8492 DialectType::ClickHouse => "any",
8493 DialectType::TSQL | DialectType::SQLite => "MAX",
8494 DialectType::Hive => "FIRST",
8495 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8496 "ARBITRARY"
8497 }
8498 _ => "ANY_VALUE",
8499 };
8500 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8501 }
8502
8503 if let Expression::Function(f) = e {
8504 let name = f.name.to_uppercase();
8505 match name.as_str() {
8506 "ARBITRARY" if f.args.len() == 1 => {
8507 let arg = f.args.into_iter().next().unwrap();
8508 Ok(convert_arbitrary(arg, target))
8509 }
8510 "TO_NUMBER" if f.args.len() == 1 => {
8511 let arg = f.args.into_iter().next().unwrap();
8512 match target {
8513 DialectType::Oracle | DialectType::Snowflake => {
8514 Ok(Expression::Function(Box::new(Function::new(
8515 "TO_NUMBER".to_string(),
8516 vec![arg],
8517 ))))
8518 }
8519 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8520 this: arg,
8521 to: crate::expressions::DataType::Double {
8522 precision: None,
8523 scale: None,
8524 },
8525 double_colon_syntax: false,
8526 trailing_comments: Vec::new(),
8527 format: None,
8528 default: None,
8529 }))),
8530 }
8531 }
8532 "AGGREGATE" if f.args.len() >= 3 => match target {
8533 DialectType::DuckDB
8534 | DialectType::Hive
8535 | DialectType::Presto
8536 | DialectType::Trino => Ok(Expression::Function(Box::new(
8537 Function::new("REDUCE".to_string(), f.args),
8538 ))),
8539 _ => Ok(Expression::Function(f)),
8540 },
8541 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8542 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8543 if matches!(target, DialectType::DuckDB) {
8544 Ok(Expression::Function(f))
8545 } else {
8546 let mut args = f.args;
8547 let this = args.remove(0);
8548 let pattern = args.remove(0);
8549 let flags = if args.is_empty() {
8550 None
8551 } else {
8552 Some(args.remove(0))
8553 };
8554 Ok(Expression::RegexpLike(Box::new(
8555 crate::expressions::RegexpFunc {
8556 this,
8557 pattern,
8558 flags,
8559 },
8560 )))
8561 }
8562 }
8563 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8564 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8565 if matches!(target, DialectType::DuckDB) {
8566 Ok(Expression::Function(f))
8567 } else {
8568 let mut args = f.args;
8569 let this = args.remove(0);
8570 let pattern = args.remove(0);
8571 let flags = if args.is_empty() {
8572 None
8573 } else {
8574 Some(args.remove(0))
8575 };
8576 Ok(Expression::RegexpLike(Box::new(
8577 crate::expressions::RegexpFunc {
8578 this,
8579 pattern,
8580 flags,
8581 },
8582 )))
8583 }
8584 }
8585 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8586 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8587 let mut args = f.args;
8588 let this = args.remove(0);
8589 let field_expr = args.remove(0);
8590 // Extract string literal to get field name
8591 let field_name = match &field_expr {
8592 Expression::Literal(crate::expressions::Literal::String(s)) => {
8593 s.clone()
8594 }
8595 Expression::Identifier(id) => id.name.clone(),
8596 _ => {
8597 return Ok(Expression::Function(Box::new(Function::new(
8598 "STRUCT_EXTRACT".to_string(),
8599 vec![this, field_expr],
8600 ))))
8601 }
8602 };
8603 Ok(Expression::StructExtract(Box::new(
8604 crate::expressions::StructExtractFunc {
8605 this,
8606 field: crate::expressions::Identifier::new(field_name),
8607 },
8608 )))
8609 }
8610 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8611 "LIST_FILTER" if f.args.len() == 2 => {
8612 let name = match target {
8613 DialectType::DuckDB => "LIST_FILTER",
8614 _ => "FILTER",
8615 };
8616 Ok(Expression::Function(Box::new(Function::new(
8617 name.to_string(),
8618 f.args,
8619 ))))
8620 }
8621 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8622 "LIST_TRANSFORM" if f.args.len() == 2 => {
8623 let name = match target {
8624 DialectType::DuckDB => "LIST_TRANSFORM",
8625 _ => "TRANSFORM",
8626 };
8627 Ok(Expression::Function(Box::new(Function::new(
8628 name.to_string(),
8629 f.args,
8630 ))))
8631 }
8632 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8633 "LIST_SORT" if f.args.len() >= 1 => {
8634 let name = match target {
8635 DialectType::DuckDB
8636 | DialectType::Presto
8637 | DialectType::Trino => "ARRAY_SORT",
8638 _ => "SORT_ARRAY",
8639 };
8640 Ok(Expression::Function(Box::new(Function::new(
8641 name.to_string(),
8642 f.args,
8643 ))))
8644 }
8645 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8646 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8647 match target {
8648 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8649 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8650 ))),
8651 DialectType::Spark
8652 | DialectType::Databricks
8653 | DialectType::Hive => {
8654 let mut args = f.args;
8655 args.push(Expression::Identifier(
8656 crate::expressions::Identifier::new("FALSE"),
8657 ));
8658 Ok(Expression::Function(Box::new(Function::new(
8659 "SORT_ARRAY".to_string(),
8660 args,
8661 ))))
8662 }
8663 DialectType::Presto
8664 | DialectType::Trino
8665 | DialectType::Athena => {
8666 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8667 let arr = f.args.into_iter().next().unwrap();
8668 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8669 parameters: vec![
8670 crate::expressions::Identifier::new("a"),
8671 crate::expressions::Identifier::new("b"),
8672 ],
8673 body: Expression::Case(Box::new(Case {
8674 operand: None,
8675 whens: vec![
8676 (
8677 Expression::Lt(Box::new(BinaryOp::new(
8678 Expression::Identifier(crate::expressions::Identifier::new("a")),
8679 Expression::Identifier(crate::expressions::Identifier::new("b")),
8680 ))),
8681 Expression::number(1),
8682 ),
8683 (
8684 Expression::Gt(Box::new(BinaryOp::new(
8685 Expression::Identifier(crate::expressions::Identifier::new("a")),
8686 Expression::Identifier(crate::expressions::Identifier::new("b")),
8687 ))),
8688 Expression::Literal(Literal::Number("-1".to_string())),
8689 ),
8690 ],
8691 else_: Some(Expression::number(0)),
8692 comments: Vec::new(),
8693 })),
8694 colon: false,
8695 parameter_types: Vec::new(),
8696 }));
8697 Ok(Expression::Function(Box::new(Function::new(
8698 "ARRAY_SORT".to_string(),
8699 vec![arr, lambda],
8700 ))))
8701 }
8702 _ => Ok(Expression::Function(Box::new(Function::new(
8703 "LIST_REVERSE_SORT".to_string(),
8704 f.args,
8705 )))),
8706 }
8707 }
8708 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8709 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8710 let mut args = f.args;
8711 args.push(Expression::string(","));
8712 let name = match target {
8713 DialectType::DuckDB => "STR_SPLIT",
8714 DialectType::Presto | DialectType::Trino => "SPLIT",
8715 DialectType::Spark
8716 | DialectType::Databricks
8717 | DialectType::Hive => "SPLIT",
8718 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8719 DialectType::Redshift => "SPLIT_TO_ARRAY",
8720 _ => "SPLIT",
8721 };
8722 Ok(Expression::Function(Box::new(Function::new(
8723 name.to_string(),
8724 args,
8725 ))))
8726 }
8727 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8728 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8729 let name = match target {
8730 DialectType::DuckDB => "STR_SPLIT",
8731 DialectType::Presto | DialectType::Trino => "SPLIT",
8732 DialectType::Spark
8733 | DialectType::Databricks
8734 | DialectType::Hive => "SPLIT",
8735 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8736 DialectType::Redshift => "SPLIT_TO_ARRAY",
8737 _ => "SPLIT",
8738 };
8739 Ok(Expression::Function(Box::new(Function::new(
8740 name.to_string(),
8741 f.args,
8742 ))))
8743 }
8744 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8745 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8746 let name = match target {
8747 DialectType::DuckDB => "STR_SPLIT",
8748 DialectType::Presto | DialectType::Trino => "SPLIT",
8749 DialectType::Spark
8750 | DialectType::Databricks
8751 | DialectType::Hive => "SPLIT",
8752 DialectType::Doris | DialectType::StarRocks => {
8753 "SPLIT_BY_STRING"
8754 }
8755 DialectType::PostgreSQL | DialectType::Redshift => {
8756 "STRING_TO_ARRAY"
8757 }
8758 _ => "SPLIT",
8759 };
8760 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8761 if matches!(
8762 target,
8763 DialectType::Spark
8764 | DialectType::Databricks
8765 | DialectType::Hive
8766 ) {
8767 let mut args = f.args;
8768 let x = args.remove(0);
8769 let sep = args.remove(0);
8770 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8771 let escaped_sep =
8772 Expression::Function(Box::new(Function::new(
8773 "CONCAT".to_string(),
8774 vec![
8775 Expression::string("\\Q"),
8776 sep,
8777 Expression::string("\\E"),
8778 ],
8779 )));
8780 Ok(Expression::Function(Box::new(Function::new(
8781 name.to_string(),
8782 vec![x, escaped_sep],
8783 ))))
8784 } else {
8785 Ok(Expression::Function(Box::new(Function::new(
8786 name.to_string(),
8787 f.args,
8788 ))))
8789 }
8790 }
8791 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8792 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8793 let name = match target {
8794 DialectType::DuckDB => "STR_SPLIT_REGEX",
8795 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8796 DialectType::Spark
8797 | DialectType::Databricks
8798 | DialectType::Hive => "SPLIT",
8799 _ => "REGEXP_SPLIT",
8800 };
8801 Ok(Expression::Function(Box::new(Function::new(
8802 name.to_string(),
8803 f.args,
8804 ))))
8805 }
8806 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8807 "SPLIT"
8808 if f.args.len() == 2
8809 && matches!(
8810 source,
8811 DialectType::Presto
8812 | DialectType::Trino
8813 | DialectType::Athena
8814 | DialectType::StarRocks
8815 | DialectType::Doris
8816 )
8817 && matches!(
8818 target,
8819 DialectType::Spark
8820 | DialectType::Databricks
8821 | DialectType::Hive
8822 ) =>
8823 {
8824 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8825 let mut args = f.args;
8826 let x = args.remove(0);
8827 let sep = args.remove(0);
8828 let escaped_sep = Expression::Function(Box::new(Function::new(
8829 "CONCAT".to_string(),
8830 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8831 )));
8832 Ok(Expression::Function(Box::new(Function::new(
8833 "SPLIT".to_string(),
8834 vec![x, escaped_sep],
8835 ))))
8836 }
8837 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8838 // For ClickHouse target, preserve original name to maintain camelCase
8839 "SUBSTRINGINDEX" => {
8840 let name = if matches!(target, DialectType::ClickHouse) {
8841 f.name.clone()
8842 } else {
8843 "SUBSTRING_INDEX".to_string()
8844 };
8845 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8846 }
8847 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8848 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8849 // Get the array argument (first arg, drop dimension args)
8850 let mut args = f.args;
8851 let arr = if args.is_empty() {
8852 return Ok(Expression::Function(Box::new(Function::new(
8853 name.to_string(),
8854 args,
8855 ))));
8856 } else {
8857 args.remove(0)
8858 };
8859 let name =
8860 match target {
8861 DialectType::Spark
8862 | DialectType::Databricks
8863 | DialectType::Hive => "SIZE",
8864 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8865 DialectType::BigQuery => "ARRAY_LENGTH",
8866 DialectType::DuckDB => {
8867 // DuckDB: use ARRAY_LENGTH with all args
8868 let mut all_args = vec![arr];
8869 all_args.extend(args);
8870 return Ok(Expression::Function(Box::new(
8871 Function::new("ARRAY_LENGTH".to_string(), all_args),
8872 )));
8873 }
8874 DialectType::PostgreSQL | DialectType::Redshift => {
8875 // Keep ARRAY_LENGTH with dimension arg
8876 let mut all_args = vec![arr];
8877 all_args.extend(args);
8878 return Ok(Expression::Function(Box::new(
8879 Function::new("ARRAY_LENGTH".to_string(), all_args),
8880 )));
8881 }
8882 DialectType::ClickHouse => "LENGTH",
8883 _ => "ARRAY_LENGTH",
8884 };
8885 Ok(Expression::Function(Box::new(Function::new(
8886 name.to_string(),
8887 vec![arr],
8888 ))))
8889 }
8890 // UNICODE(x) -> target-specific codepoint function
8891 "UNICODE" if f.args.len() == 1 => {
8892 match target {
8893 DialectType::SQLite | DialectType::DuckDB => {
8894 Ok(Expression::Function(Box::new(Function::new(
8895 "UNICODE".to_string(),
8896 f.args,
8897 ))))
8898 }
8899 DialectType::Oracle => {
8900 // ASCII(UNISTR(x))
8901 let inner = Expression::Function(Box::new(Function::new(
8902 "UNISTR".to_string(),
8903 f.args,
8904 )));
8905 Ok(Expression::Function(Box::new(Function::new(
8906 "ASCII".to_string(),
8907 vec![inner],
8908 ))))
8909 }
8910 DialectType::MySQL => {
8911 // ORD(CONVERT(x USING utf32))
8912 let arg = f.args.into_iter().next().unwrap();
8913 let convert_expr = Expression::ConvertToCharset(Box::new(
8914 crate::expressions::ConvertToCharset {
8915 this: Box::new(arg),
8916 dest: Some(Box::new(Expression::Identifier(
8917 crate::expressions::Identifier::new("utf32"),
8918 ))),
8919 source: None,
8920 },
8921 ));
8922 Ok(Expression::Function(Box::new(Function::new(
8923 "ORD".to_string(),
8924 vec![convert_expr],
8925 ))))
8926 }
8927 _ => Ok(Expression::Function(Box::new(Function::new(
8928 "ASCII".to_string(),
8929 f.args,
8930 )))),
8931 }
8932 }
8933 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
8934 "XOR" if f.args.len() >= 2 => {
8935 match target {
8936 DialectType::ClickHouse => {
8937 // ClickHouse: keep as xor() function with lowercase name
8938 Ok(Expression::Function(Box::new(Function::new(
8939 "xor".to_string(),
8940 f.args,
8941 ))))
8942 }
8943 DialectType::Presto | DialectType::Trino => {
8944 if f.args.len() == 2 {
8945 Ok(Expression::Function(Box::new(Function::new(
8946 "BITWISE_XOR".to_string(),
8947 f.args,
8948 ))))
8949 } else {
8950 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
8951 let mut args = f.args;
8952 let first = args.remove(0);
8953 let second = args.remove(0);
8954 let mut result =
8955 Expression::Function(Box::new(Function::new(
8956 "BITWISE_XOR".to_string(),
8957 vec![first, second],
8958 )));
8959 for arg in args {
8960 result =
8961 Expression::Function(Box::new(Function::new(
8962 "BITWISE_XOR".to_string(),
8963 vec![result, arg],
8964 )));
8965 }
8966 Ok(result)
8967 }
8968 }
8969 DialectType::MySQL
8970 | DialectType::SingleStore
8971 | DialectType::Doris
8972 | DialectType::StarRocks => {
8973 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
8974 let args = f.args;
8975 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
8976 this: None,
8977 expression: None,
8978 expressions: args,
8979 })))
8980 }
8981 DialectType::PostgreSQL | DialectType::Redshift => {
8982 // PostgreSQL: a # b (hash operator for XOR)
8983 let mut args = f.args;
8984 let first = args.remove(0);
8985 let second = args.remove(0);
8986 let mut result = Expression::BitwiseXor(Box::new(
8987 BinaryOp::new(first, second),
8988 ));
8989 for arg in args {
8990 result = Expression::BitwiseXor(Box::new(
8991 BinaryOp::new(result, arg),
8992 ));
8993 }
8994 Ok(result)
8995 }
8996 DialectType::DuckDB => {
8997 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
8998 Ok(Expression::Function(Box::new(Function::new(
8999 "XOR".to_string(),
9000 f.args,
9001 ))))
9002 }
9003 DialectType::BigQuery => {
9004 // BigQuery: a ^ b (caret operator for XOR)
9005 let mut args = f.args;
9006 let first = args.remove(0);
9007 let second = args.remove(0);
9008 let mut result = Expression::BitwiseXor(Box::new(
9009 BinaryOp::new(first, second),
9010 ));
9011 for arg in args {
9012 result = Expression::BitwiseXor(Box::new(
9013 BinaryOp::new(result, arg),
9014 ));
9015 }
9016 Ok(result)
9017 }
9018 _ => Ok(Expression::Function(Box::new(Function::new(
9019 "XOR".to_string(),
9020 f.args,
9021 )))),
9022 }
9023 }
9024 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9025 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
9026 match target {
9027 DialectType::Spark
9028 | DialectType::Databricks
9029 | DialectType::Hive => {
9030 let mut args = f.args;
9031 args.push(Expression::Identifier(
9032 crate::expressions::Identifier::new("FALSE"),
9033 ));
9034 Ok(Expression::Function(Box::new(Function::new(
9035 "SORT_ARRAY".to_string(),
9036 args,
9037 ))))
9038 }
9039 DialectType::Presto
9040 | DialectType::Trino
9041 | DialectType::Athena => {
9042 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9043 let arr = f.args.into_iter().next().unwrap();
9044 let lambda = Expression::Lambda(Box::new(
9045 crate::expressions::LambdaExpr {
9046 parameters: vec![
9047 Identifier::new("a"),
9048 Identifier::new("b"),
9049 ],
9050 colon: false,
9051 parameter_types: Vec::new(),
9052 body: Expression::Case(Box::new(Case {
9053 operand: None,
9054 whens: vec![
9055 (
9056 Expression::Lt(Box::new(
9057 BinaryOp::new(
9058 Expression::Identifier(
9059 Identifier::new("a"),
9060 ),
9061 Expression::Identifier(
9062 Identifier::new("b"),
9063 ),
9064 ),
9065 )),
9066 Expression::number(1),
9067 ),
9068 (
9069 Expression::Gt(Box::new(
9070 BinaryOp::new(
9071 Expression::Identifier(
9072 Identifier::new("a"),
9073 ),
9074 Expression::Identifier(
9075 Identifier::new("b"),
9076 ),
9077 ),
9078 )),
9079 Expression::Neg(Box::new(
9080 crate::expressions::UnaryOp {
9081 this: Expression::number(1),
9082 },
9083 )),
9084 ),
9085 ],
9086 else_: Some(Expression::number(0)),
9087 comments: Vec::new(),
9088 })),
9089 },
9090 ));
9091 Ok(Expression::Function(Box::new(Function::new(
9092 "ARRAY_SORT".to_string(),
9093 vec![arr, lambda],
9094 ))))
9095 }
9096 _ => Ok(Expression::Function(Box::new(Function::new(
9097 "ARRAY_REVERSE_SORT".to_string(),
9098 f.args,
9099 )))),
9100 }
9101 }
9102 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9103 "ENCODE" if f.args.len() == 1 => match target {
9104 DialectType::Spark
9105 | DialectType::Databricks
9106 | DialectType::Hive => {
9107 let mut args = f.args;
9108 args.push(Expression::string("utf-8"));
9109 Ok(Expression::Function(Box::new(Function::new(
9110 "ENCODE".to_string(),
9111 args,
9112 ))))
9113 }
9114 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9115 Ok(Expression::Function(Box::new(Function::new(
9116 "TO_UTF8".to_string(),
9117 f.args,
9118 ))))
9119 }
9120 _ => Ok(Expression::Function(Box::new(Function::new(
9121 "ENCODE".to_string(),
9122 f.args,
9123 )))),
9124 },
9125 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9126 "DECODE" if f.args.len() == 1 => match target {
9127 DialectType::Spark
9128 | DialectType::Databricks
9129 | DialectType::Hive => {
9130 let mut args = f.args;
9131 args.push(Expression::string("utf-8"));
9132 Ok(Expression::Function(Box::new(Function::new(
9133 "DECODE".to_string(),
9134 args,
9135 ))))
9136 }
9137 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9138 Ok(Expression::Function(Box::new(Function::new(
9139 "FROM_UTF8".to_string(),
9140 f.args,
9141 ))))
9142 }
9143 _ => Ok(Expression::Function(Box::new(Function::new(
9144 "DECODE".to_string(),
9145 f.args,
9146 )))),
9147 },
9148 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9149 "QUANTILE" if f.args.len() == 2 => {
9150 let name = match target {
9151 DialectType::Spark
9152 | DialectType::Databricks
9153 | DialectType::Hive => "PERCENTILE",
9154 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9155 DialectType::BigQuery => "PERCENTILE_CONT",
9156 _ => "QUANTILE",
9157 };
9158 Ok(Expression::Function(Box::new(Function::new(
9159 name.to_string(),
9160 f.args,
9161 ))))
9162 }
9163 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9164 "QUANTILE_CONT" if f.args.len() == 2 => {
9165 let mut args = f.args;
9166 let column = args.remove(0);
9167 let quantile = args.remove(0);
9168 match target {
9169 DialectType::DuckDB => {
9170 Ok(Expression::Function(Box::new(Function::new(
9171 "QUANTILE_CONT".to_string(),
9172 vec![column, quantile],
9173 ))))
9174 }
9175 DialectType::PostgreSQL
9176 | DialectType::Redshift
9177 | DialectType::Snowflake => {
9178 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9179 let inner = Expression::PercentileCont(Box::new(
9180 crate::expressions::PercentileFunc {
9181 this: column.clone(),
9182 percentile: quantile,
9183 order_by: None,
9184 filter: None,
9185 },
9186 ));
9187 Ok(Expression::WithinGroup(Box::new(
9188 crate::expressions::WithinGroup {
9189 this: inner,
9190 order_by: vec![crate::expressions::Ordered {
9191 this: column,
9192 desc: false,
9193 nulls_first: None,
9194 explicit_asc: false,
9195 with_fill: None,
9196 }],
9197 },
9198 )))
9199 }
9200 _ => Ok(Expression::Function(Box::new(Function::new(
9201 "QUANTILE_CONT".to_string(),
9202 vec![column, quantile],
9203 )))),
9204 }
9205 }
9206 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9207 "QUANTILE_DISC" if f.args.len() == 2 => {
9208 let mut args = f.args;
9209 let column = args.remove(0);
9210 let quantile = args.remove(0);
9211 match target {
9212 DialectType::DuckDB => {
9213 Ok(Expression::Function(Box::new(Function::new(
9214 "QUANTILE_DISC".to_string(),
9215 vec![column, quantile],
9216 ))))
9217 }
9218 DialectType::PostgreSQL
9219 | DialectType::Redshift
9220 | DialectType::Snowflake => {
9221 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9222 let inner = Expression::PercentileDisc(Box::new(
9223 crate::expressions::PercentileFunc {
9224 this: column.clone(),
9225 percentile: quantile,
9226 order_by: None,
9227 filter: None,
9228 },
9229 ));
9230 Ok(Expression::WithinGroup(Box::new(
9231 crate::expressions::WithinGroup {
9232 this: inner,
9233 order_by: vec![crate::expressions::Ordered {
9234 this: column,
9235 desc: false,
9236 nulls_first: None,
9237 explicit_asc: false,
9238 with_fill: None,
9239 }],
9240 },
9241 )))
9242 }
9243 _ => Ok(Expression::Function(Box::new(Function::new(
9244 "QUANTILE_DISC".to_string(),
9245 vec![column, quantile],
9246 )))),
9247 }
9248 }
9249 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9250 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9251 let name = match target {
9252 DialectType::Presto
9253 | DialectType::Trino
9254 | DialectType::Athena => "APPROX_PERCENTILE",
9255 DialectType::Spark
9256 | DialectType::Databricks
9257 | DialectType::Hive => "PERCENTILE_APPROX",
9258 DialectType::DuckDB => "APPROX_QUANTILE",
9259 DialectType::PostgreSQL | DialectType::Redshift => {
9260 "PERCENTILE_CONT"
9261 }
9262 _ => &f.name,
9263 };
9264 Ok(Expression::Function(Box::new(Function::new(
9265 name.to_string(),
9266 f.args,
9267 ))))
9268 }
9269 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9270 "EPOCH" if f.args.len() == 1 => {
9271 let name = match target {
9272 DialectType::Spark
9273 | DialectType::Databricks
9274 | DialectType::Hive => "UNIX_TIMESTAMP",
9275 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9276 _ => "EPOCH",
9277 };
9278 Ok(Expression::Function(Box::new(Function::new(
9279 name.to_string(),
9280 f.args,
9281 ))))
9282 }
9283 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9284 "EPOCH_MS" if f.args.len() == 1 => {
9285 match target {
9286 DialectType::Spark | DialectType::Databricks => {
9287 Ok(Expression::Function(Box::new(Function::new(
9288 "TIMESTAMP_MILLIS".to_string(),
9289 f.args,
9290 ))))
9291 }
9292 DialectType::Hive => {
9293 // Hive: FROM_UNIXTIME(x / 1000)
9294 let arg = f.args.into_iter().next().unwrap();
9295 let div_expr = Expression::Div(Box::new(
9296 crate::expressions::BinaryOp::new(
9297 arg,
9298 Expression::number(1000),
9299 ),
9300 ));
9301 Ok(Expression::Function(Box::new(Function::new(
9302 "FROM_UNIXTIME".to_string(),
9303 vec![div_expr],
9304 ))))
9305 }
9306 DialectType::Presto | DialectType::Trino => {
9307 Ok(Expression::Function(Box::new(Function::new(
9308 "FROM_UNIXTIME".to_string(),
9309 vec![Expression::Div(Box::new(
9310 crate::expressions::BinaryOp::new(
9311 f.args.into_iter().next().unwrap(),
9312 Expression::number(1000),
9313 ),
9314 ))],
9315 ))))
9316 }
9317 _ => Ok(Expression::Function(Box::new(Function::new(
9318 "EPOCH_MS".to_string(),
9319 f.args,
9320 )))),
9321 }
9322 }
9323 // HASHBYTES('algorithm', x) -> target-specific hash function
9324 "HASHBYTES" if f.args.len() == 2 => {
9325 // Keep HASHBYTES as-is for TSQL target
9326 if matches!(target, DialectType::TSQL) {
9327 return Ok(Expression::Function(f));
9328 }
9329 let algo_expr = &f.args[0];
9330 let algo = match algo_expr {
9331 Expression::Literal(crate::expressions::Literal::String(s)) => {
9332 s.to_uppercase()
9333 }
9334 _ => return Ok(Expression::Function(f)),
9335 };
9336 let data_arg = f.args.into_iter().nth(1).unwrap();
9337 match algo.as_str() {
9338 "SHA1" => {
9339 let name = match target {
9340 DialectType::Spark | DialectType::Databricks => "SHA",
9341 DialectType::Hive => "SHA1",
9342 _ => "SHA1",
9343 };
9344 Ok(Expression::Function(Box::new(Function::new(
9345 name.to_string(),
9346 vec![data_arg],
9347 ))))
9348 }
9349 "SHA2_256" => {
9350 Ok(Expression::Function(Box::new(Function::new(
9351 "SHA2".to_string(),
9352 vec![data_arg, Expression::number(256)],
9353 ))))
9354 }
9355 "SHA2_512" => {
9356 Ok(Expression::Function(Box::new(Function::new(
9357 "SHA2".to_string(),
9358 vec![data_arg, Expression::number(512)],
9359 ))))
9360 }
9361 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9362 "MD5".to_string(),
9363 vec![data_arg],
9364 )))),
9365 _ => Ok(Expression::Function(Box::new(Function::new(
9366 "HASHBYTES".to_string(),
9367 vec![Expression::string(&algo), data_arg],
9368 )))),
9369 }
9370 }
9371 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9372 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9373 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9374 let mut args = f.args;
9375 let json_expr = args.remove(0);
9376 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9377 let mut json_path = "$".to_string();
9378 for a in &args {
9379 match a {
9380 Expression::Literal(
9381 crate::expressions::Literal::String(s),
9382 ) => {
9383 // Numeric string keys become array indices: [0]
9384 if s.chars().all(|c| c.is_ascii_digit()) {
9385 json_path.push('[');
9386 json_path.push_str(s);
9387 json_path.push(']');
9388 } else {
9389 json_path.push('.');
9390 json_path.push_str(s);
9391 }
9392 }
9393 _ => {
9394 json_path.push_str(".?");
9395 }
9396 }
9397 }
9398 match target {
9399 DialectType::Spark
9400 | DialectType::Databricks
9401 | DialectType::Hive => {
9402 Ok(Expression::Function(Box::new(Function::new(
9403 "GET_JSON_OBJECT".to_string(),
9404 vec![json_expr, Expression::string(&json_path)],
9405 ))))
9406 }
9407 DialectType::Presto | DialectType::Trino => {
9408 let func_name = if is_text {
9409 "JSON_EXTRACT_SCALAR"
9410 } else {
9411 "JSON_EXTRACT"
9412 };
9413 Ok(Expression::Function(Box::new(Function::new(
9414 func_name.to_string(),
9415 vec![json_expr, Expression::string(&json_path)],
9416 ))))
9417 }
9418 DialectType::BigQuery | DialectType::MySQL => {
9419 let func_name = if is_text {
9420 "JSON_EXTRACT_SCALAR"
9421 } else {
9422 "JSON_EXTRACT"
9423 };
9424 Ok(Expression::Function(Box::new(Function::new(
9425 func_name.to_string(),
9426 vec![json_expr, Expression::string(&json_path)],
9427 ))))
9428 }
9429 DialectType::PostgreSQL | DialectType::Materialize => {
9430 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9431 let func_name = if is_text {
9432 "JSON_EXTRACT_PATH_TEXT"
9433 } else {
9434 "JSON_EXTRACT_PATH"
9435 };
9436 let mut new_args = vec![json_expr];
9437 new_args.extend(args);
9438 Ok(Expression::Function(Box::new(Function::new(
9439 func_name.to_string(),
9440 new_args,
9441 ))))
9442 }
9443 DialectType::DuckDB | DialectType::SQLite => {
9444 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9445 if is_text {
9446 Ok(Expression::JsonExtractScalar(Box::new(
9447 crate::expressions::JsonExtractFunc {
9448 this: json_expr,
9449 path: Expression::string(&json_path),
9450 returning: None,
9451 arrow_syntax: true,
9452 hash_arrow_syntax: false,
9453 wrapper_option: None,
9454 quotes_option: None,
9455 on_scalar_string: false,
9456 on_error: None,
9457 },
9458 )))
9459 } else {
9460 Ok(Expression::JsonExtract(Box::new(
9461 crate::expressions::JsonExtractFunc {
9462 this: json_expr,
9463 path: Expression::string(&json_path),
9464 returning: None,
9465 arrow_syntax: true,
9466 hash_arrow_syntax: false,
9467 wrapper_option: None,
9468 quotes_option: None,
9469 on_scalar_string: false,
9470 on_error: None,
9471 },
9472 )))
9473 }
9474 }
9475 DialectType::Redshift => {
9476 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9477 let mut new_args = vec![json_expr];
9478 new_args.extend(args);
9479 Ok(Expression::Function(Box::new(Function::new(
9480 "JSON_EXTRACT_PATH_TEXT".to_string(),
9481 new_args,
9482 ))))
9483 }
9484 DialectType::TSQL => {
9485 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9486 let jq = Expression::Function(Box::new(Function::new(
9487 "JSON_QUERY".to_string(),
9488 vec![json_expr.clone(), Expression::string(&json_path)],
9489 )));
9490 let jv = Expression::Function(Box::new(Function::new(
9491 "JSON_VALUE".to_string(),
9492 vec![json_expr, Expression::string(&json_path)],
9493 )));
9494 Ok(Expression::Function(Box::new(Function::new(
9495 "ISNULL".to_string(),
9496 vec![jq, jv],
9497 ))))
9498 }
9499 DialectType::ClickHouse => {
9500 let func_name = if is_text {
9501 "JSONExtractString"
9502 } else {
9503 "JSONExtractRaw"
9504 };
9505 let mut new_args = vec![json_expr];
9506 new_args.extend(args);
9507 Ok(Expression::Function(Box::new(Function::new(
9508 func_name.to_string(),
9509 new_args,
9510 ))))
9511 }
9512 _ => {
9513 let func_name = if is_text {
9514 "JSON_EXTRACT_SCALAR"
9515 } else {
9516 "JSON_EXTRACT"
9517 };
9518 Ok(Expression::Function(Box::new(Function::new(
9519 func_name.to_string(),
9520 vec![json_expr, Expression::string(&json_path)],
9521 ))))
9522 }
9523 }
9524 }
9525 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9526 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9527 let name = match target {
9528 DialectType::Spark
9529 | DialectType::Databricks
9530 | DialectType::Hive
9531 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9532 _ => "APPROX_DISTINCT",
9533 };
9534 let mut args = f.args;
9535 // Hive doesn't support the accuracy parameter
9536 if name == "APPROX_COUNT_DISTINCT"
9537 && matches!(target, DialectType::Hive)
9538 {
9539 args.truncate(1);
9540 }
9541 Ok(Expression::Function(Box::new(Function::new(
9542 name.to_string(),
9543 args,
9544 ))))
9545 }
9546 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9547 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9548 // Determine source default group index
9549 let source_default = match source {
9550 DialectType::Presto
9551 | DialectType::Trino
9552 | DialectType::DuckDB => 0,
9553 _ => 1, // Hive/Spark/Databricks default = 1
9554 };
9555 // Determine target default group index
9556 let target_default = match target {
9557 DialectType::Presto
9558 | DialectType::Trino
9559 | DialectType::DuckDB
9560 | DialectType::BigQuery => 0,
9561 DialectType::Snowflake => {
9562 // Snowflake uses REGEXP_SUBSTR
9563 return Ok(Expression::Function(Box::new(Function::new(
9564 "REGEXP_SUBSTR".to_string(),
9565 f.args,
9566 ))));
9567 }
9568 _ => 1, // Hive/Spark/Databricks default = 1
9569 };
9570 if source_default != target_default {
9571 let mut args = f.args;
9572 args.push(Expression::number(source_default));
9573 Ok(Expression::Function(Box::new(Function::new(
9574 "REGEXP_EXTRACT".to_string(),
9575 args,
9576 ))))
9577 } else {
9578 Ok(Expression::Function(Box::new(Function::new(
9579 "REGEXP_EXTRACT".to_string(),
9580 f.args,
9581 ))))
9582 }
9583 }
9584 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9585 "RLIKE" if f.args.len() == 2 => {
9586 let mut args = f.args;
9587 let str_expr = args.remove(0);
9588 let pattern = args.remove(0);
9589 match target {
9590 DialectType::DuckDB => {
9591 // REGEXP_MATCHES(str, pattern)
9592 Ok(Expression::Function(Box::new(Function::new(
9593 "REGEXP_MATCHES".to_string(),
9594 vec![str_expr, pattern],
9595 ))))
9596 }
9597 _ => {
9598 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9599 Ok(Expression::RegexpLike(Box::new(
9600 crate::expressions::RegexpFunc {
9601 this: str_expr,
9602 pattern,
9603 flags: None,
9604 },
9605 )))
9606 }
9607 }
9608 }
9609 // EOMONTH(date[, month_offset]) -> target-specific
9610 "EOMONTH" if f.args.len() >= 1 => {
9611 let mut args = f.args;
9612 let date_arg = args.remove(0);
9613 let month_offset = if !args.is_empty() {
9614 Some(args.remove(0))
9615 } else {
9616 None
9617 };
9618
9619 // Helper: wrap date in CAST to DATE
9620 let cast_to_date = |e: Expression| -> Expression {
9621 Expression::Cast(Box::new(Cast {
9622 this: e,
9623 to: DataType::Date,
9624 trailing_comments: vec![],
9625 double_colon_syntax: false,
9626 format: None,
9627 default: None,
9628 }))
9629 };
9630
9631 match target {
9632 DialectType::TSQL | DialectType::Fabric => {
9633 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9634 let date = cast_to_date(date_arg);
9635 let date = if let Some(offset) = month_offset {
9636 Expression::Function(Box::new(Function::new(
9637 "DATEADD".to_string(),
9638 vec![
9639 Expression::Identifier(Identifier::new(
9640 "MONTH",
9641 )),
9642 offset,
9643 date,
9644 ],
9645 )))
9646 } else {
9647 date
9648 };
9649 Ok(Expression::Function(Box::new(Function::new(
9650 "EOMONTH".to_string(),
9651 vec![date],
9652 ))))
9653 }
9654 DialectType::Presto
9655 | DialectType::Trino
9656 | DialectType::Athena => {
9657 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9658 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9659 let cast_ts = Expression::Cast(Box::new(Cast {
9660 this: date_arg,
9661 to: DataType::Timestamp {
9662 timezone: false,
9663 precision: None,
9664 },
9665 trailing_comments: vec![],
9666 double_colon_syntax: false,
9667 format: None,
9668 default: None,
9669 }));
9670 let date = cast_to_date(cast_ts);
9671 let date = if let Some(offset) = month_offset {
9672 Expression::Function(Box::new(Function::new(
9673 "DATE_ADD".to_string(),
9674 vec![Expression::string("MONTH"), offset, date],
9675 )))
9676 } else {
9677 date
9678 };
9679 Ok(Expression::Function(Box::new(Function::new(
9680 "LAST_DAY_OF_MONTH".to_string(),
9681 vec![date],
9682 ))))
9683 }
9684 DialectType::PostgreSQL => {
9685 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9686 let date = cast_to_date(date_arg);
9687 let date = if let Some(offset) = month_offset {
9688 let interval_str = format!(
9689 "{} MONTH",
9690 Self::expr_to_string_static(&offset)
9691 );
9692 Expression::Add(Box::new(
9693 crate::expressions::BinaryOp::new(
9694 date,
9695 Expression::Interval(Box::new(
9696 crate::expressions::Interval {
9697 this: Some(Expression::string(
9698 &interval_str,
9699 )),
9700 unit: None,
9701 },
9702 )),
9703 ),
9704 ))
9705 } else {
9706 date
9707 };
9708 let truncated =
9709 Expression::Function(Box::new(Function::new(
9710 "DATE_TRUNC".to_string(),
9711 vec![Expression::string("MONTH"), date],
9712 )));
9713 let plus_month = Expression::Add(Box::new(
9714 crate::expressions::BinaryOp::new(
9715 truncated,
9716 Expression::Interval(Box::new(
9717 crate::expressions::Interval {
9718 this: Some(Expression::string("1 MONTH")),
9719 unit: None,
9720 },
9721 )),
9722 ),
9723 ));
9724 let minus_day = Expression::Sub(Box::new(
9725 crate::expressions::BinaryOp::new(
9726 plus_month,
9727 Expression::Interval(Box::new(
9728 crate::expressions::Interval {
9729 this: Some(Expression::string("1 DAY")),
9730 unit: None,
9731 },
9732 )),
9733 ),
9734 ));
9735 Ok(Expression::Cast(Box::new(Cast {
9736 this: minus_day,
9737 to: DataType::Date,
9738 trailing_comments: vec![],
9739 double_colon_syntax: false,
9740 format: None,
9741 default: None,
9742 })))
9743 }
9744 DialectType::DuckDB => {
9745 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9746 let date = cast_to_date(date_arg);
9747 let date = if let Some(offset) = month_offset {
9748 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9749 let interval_val =
9750 if matches!(&offset, Expression::Neg(_)) {
9751 Expression::Paren(Box::new(
9752 crate::expressions::Paren {
9753 this: offset,
9754 trailing_comments: Vec::new(),
9755 },
9756 ))
9757 } else {
9758 offset
9759 };
9760 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9761 date,
9762 Expression::Interval(Box::new(crate::expressions::Interval {
9763 this: Some(interval_val),
9764 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9765 unit: crate::expressions::IntervalUnit::Month,
9766 use_plural: false,
9767 }),
9768 })),
9769 )))
9770 } else {
9771 date
9772 };
9773 Ok(Expression::Function(Box::new(Function::new(
9774 "LAST_DAY".to_string(),
9775 vec![date],
9776 ))))
9777 }
9778 DialectType::Snowflake | DialectType::Redshift => {
9779 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9780 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9781 let date = if matches!(target, DialectType::Snowflake) {
9782 Expression::Function(Box::new(Function::new(
9783 "TO_DATE".to_string(),
9784 vec![date_arg],
9785 )))
9786 } else {
9787 cast_to_date(date_arg)
9788 };
9789 let date = if let Some(offset) = month_offset {
9790 Expression::Function(Box::new(Function::new(
9791 "DATEADD".to_string(),
9792 vec![
9793 Expression::Identifier(Identifier::new(
9794 "MONTH",
9795 )),
9796 offset,
9797 date,
9798 ],
9799 )))
9800 } else {
9801 date
9802 };
9803 Ok(Expression::Function(Box::new(Function::new(
9804 "LAST_DAY".to_string(),
9805 vec![date],
9806 ))))
9807 }
9808 DialectType::Spark | DialectType::Databricks => {
9809 // Spark: LAST_DAY(TO_DATE(date))
9810 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9811 let date = Expression::Function(Box::new(Function::new(
9812 "TO_DATE".to_string(),
9813 vec![date_arg],
9814 )));
9815 let date = if let Some(offset) = month_offset {
9816 Expression::Function(Box::new(Function::new(
9817 "ADD_MONTHS".to_string(),
9818 vec![date, offset],
9819 )))
9820 } else {
9821 date
9822 };
9823 Ok(Expression::Function(Box::new(Function::new(
9824 "LAST_DAY".to_string(),
9825 vec![date],
9826 ))))
9827 }
9828 DialectType::MySQL => {
9829 // MySQL: LAST_DAY(DATE(date)) - no offset
9830 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9831 let date = if let Some(offset) = month_offset {
9832 let iu = crate::expressions::IntervalUnit::Month;
9833 Expression::DateAdd(Box::new(
9834 crate::expressions::DateAddFunc {
9835 this: date_arg,
9836 interval: offset,
9837 unit: iu,
9838 },
9839 ))
9840 } else {
9841 Expression::Function(Box::new(Function::new(
9842 "DATE".to_string(),
9843 vec![date_arg],
9844 )))
9845 };
9846 Ok(Expression::Function(Box::new(Function::new(
9847 "LAST_DAY".to_string(),
9848 vec![date],
9849 ))))
9850 }
9851 DialectType::BigQuery => {
9852 // BigQuery: LAST_DAY(CAST(date AS DATE))
9853 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9854 let date = cast_to_date(date_arg);
9855 let date = if let Some(offset) = month_offset {
9856 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9857 this: Some(offset),
9858 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9859 unit: crate::expressions::IntervalUnit::Month,
9860 use_plural: false,
9861 }),
9862 }));
9863 Expression::Function(Box::new(Function::new(
9864 "DATE_ADD".to_string(),
9865 vec![date, interval],
9866 )))
9867 } else {
9868 date
9869 };
9870 Ok(Expression::Function(Box::new(Function::new(
9871 "LAST_DAY".to_string(),
9872 vec![date],
9873 ))))
9874 }
9875 DialectType::ClickHouse => {
9876 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9877 let date = Expression::Cast(Box::new(Cast {
9878 this: date_arg,
9879 to: DataType::Nullable {
9880 inner: Box::new(DataType::Date),
9881 },
9882 trailing_comments: vec![],
9883 double_colon_syntax: false,
9884 format: None,
9885 default: None,
9886 }));
9887 let date = if let Some(offset) = month_offset {
9888 Expression::Function(Box::new(Function::new(
9889 "DATE_ADD".to_string(),
9890 vec![
9891 Expression::Identifier(Identifier::new(
9892 "MONTH",
9893 )),
9894 offset,
9895 date,
9896 ],
9897 )))
9898 } else {
9899 date
9900 };
9901 Ok(Expression::Function(Box::new(Function::new(
9902 "LAST_DAY".to_string(),
9903 vec![date],
9904 ))))
9905 }
9906 DialectType::Hive => {
9907 // Hive: LAST_DAY(date)
9908 let date = if let Some(offset) = month_offset {
9909 Expression::Function(Box::new(Function::new(
9910 "ADD_MONTHS".to_string(),
9911 vec![date_arg, offset],
9912 )))
9913 } else {
9914 date_arg
9915 };
9916 Ok(Expression::Function(Box::new(Function::new(
9917 "LAST_DAY".to_string(),
9918 vec![date],
9919 ))))
9920 }
9921 _ => {
9922 // Default: LAST_DAY(date)
9923 let date = if let Some(offset) = month_offset {
9924 let unit =
9925 Expression::Identifier(Identifier::new("MONTH"));
9926 Expression::Function(Box::new(Function::new(
9927 "DATEADD".to_string(),
9928 vec![unit, offset, date_arg],
9929 )))
9930 } else {
9931 date_arg
9932 };
9933 Ok(Expression::Function(Box::new(Function::new(
9934 "LAST_DAY".to_string(),
9935 vec![date],
9936 ))))
9937 }
9938 }
9939 }
9940 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
9941 "LAST_DAY" | "LAST_DAY_OF_MONTH"
9942 if !matches!(source, DialectType::BigQuery)
9943 && f.args.len() >= 1 =>
9944 {
9945 let first_arg = f.args.into_iter().next().unwrap();
9946 match target {
9947 DialectType::TSQL | DialectType::Fabric => {
9948 Ok(Expression::Function(Box::new(Function::new(
9949 "EOMONTH".to_string(),
9950 vec![first_arg],
9951 ))))
9952 }
9953 DialectType::Presto
9954 | DialectType::Trino
9955 | DialectType::Athena => {
9956 Ok(Expression::Function(Box::new(Function::new(
9957 "LAST_DAY_OF_MONTH".to_string(),
9958 vec![first_arg],
9959 ))))
9960 }
9961 _ => Ok(Expression::Function(Box::new(Function::new(
9962 "LAST_DAY".to_string(),
9963 vec![first_arg],
9964 )))),
9965 }
9966 }
9967 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
9968 "MAP"
9969 if f.args.len() == 2
9970 && matches!(
9971 source,
9972 DialectType::Presto
9973 | DialectType::Trino
9974 | DialectType::Athena
9975 ) =>
9976 {
9977 let keys_arg = f.args[0].clone();
9978 let vals_arg = f.args[1].clone();
9979
9980 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
9981 fn extract_array_elements(
9982 expr: &Expression,
9983 ) -> Option<&Vec<Expression>> {
9984 match expr {
9985 Expression::Array(arr) => Some(&arr.expressions),
9986 Expression::ArrayFunc(arr) => Some(&arr.expressions),
9987 Expression::Function(f)
9988 if f.name.eq_ignore_ascii_case("ARRAY") =>
9989 {
9990 Some(&f.args)
9991 }
9992 _ => None,
9993 }
9994 }
9995
9996 match target {
9997 DialectType::Spark | DialectType::Databricks => {
9998 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
9999 Ok(Expression::Function(Box::new(Function::new(
10000 "MAP_FROM_ARRAYS".to_string(),
10001 f.args,
10002 ))))
10003 }
10004 DialectType::Hive => {
10005 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
10006 if let (Some(keys), Some(vals)) = (
10007 extract_array_elements(&keys_arg),
10008 extract_array_elements(&vals_arg),
10009 ) {
10010 if keys.len() == vals.len() {
10011 let mut interleaved = Vec::new();
10012 for (k, v) in keys.iter().zip(vals.iter()) {
10013 interleaved.push(k.clone());
10014 interleaved.push(v.clone());
10015 }
10016 Ok(Expression::Function(Box::new(Function::new(
10017 "MAP".to_string(),
10018 interleaved,
10019 ))))
10020 } else {
10021 Ok(Expression::Function(Box::new(Function::new(
10022 "MAP".to_string(),
10023 f.args,
10024 ))))
10025 }
10026 } else {
10027 Ok(Expression::Function(Box::new(Function::new(
10028 "MAP".to_string(),
10029 f.args,
10030 ))))
10031 }
10032 }
10033 DialectType::Snowflake => {
10034 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
10035 if let (Some(keys), Some(vals)) = (
10036 extract_array_elements(&keys_arg),
10037 extract_array_elements(&vals_arg),
10038 ) {
10039 if keys.len() == vals.len() {
10040 let mut interleaved = Vec::new();
10041 for (k, v) in keys.iter().zip(vals.iter()) {
10042 interleaved.push(k.clone());
10043 interleaved.push(v.clone());
10044 }
10045 Ok(Expression::Function(Box::new(Function::new(
10046 "OBJECT_CONSTRUCT".to_string(),
10047 interleaved,
10048 ))))
10049 } else {
10050 Ok(Expression::Function(Box::new(Function::new(
10051 "MAP".to_string(),
10052 f.args,
10053 ))))
10054 }
10055 } else {
10056 Ok(Expression::Function(Box::new(Function::new(
10057 "MAP".to_string(),
10058 f.args,
10059 ))))
10060 }
10061 }
10062 _ => Ok(Expression::Function(f)),
10063 }
10064 }
10065 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
10066 "MAP"
10067 if f.args.is_empty()
10068 && matches!(
10069 source,
10070 DialectType::Hive
10071 | DialectType::Spark
10072 | DialectType::Databricks
10073 )
10074 && matches!(
10075 target,
10076 DialectType::Presto
10077 | DialectType::Trino
10078 | DialectType::Athena
10079 ) =>
10080 {
10081 let empty_keys =
10082 Expression::Array(Box::new(crate::expressions::Array {
10083 expressions: vec![],
10084 }));
10085 let empty_vals =
10086 Expression::Array(Box::new(crate::expressions::Array {
10087 expressions: vec![],
10088 }));
10089 Ok(Expression::Function(Box::new(Function::new(
10090 "MAP".to_string(),
10091 vec![empty_keys, empty_vals],
10092 ))))
10093 }
10094 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10095 "MAP"
10096 if f.args.len() >= 2
10097 && f.args.len() % 2 == 0
10098 && matches!(
10099 source,
10100 DialectType::Hive
10101 | DialectType::Spark
10102 | DialectType::Databricks
10103 | DialectType::ClickHouse
10104 ) =>
10105 {
10106 let args = f.args;
10107 match target {
10108 DialectType::DuckDB => {
10109 // MAP([k1, k2], [v1, v2])
10110 let mut keys = Vec::new();
10111 let mut vals = Vec::new();
10112 for (i, arg) in args.into_iter().enumerate() {
10113 if i % 2 == 0 {
10114 keys.push(arg);
10115 } else {
10116 vals.push(arg);
10117 }
10118 }
10119 let keys_arr = Expression::Array(Box::new(
10120 crate::expressions::Array { expressions: keys },
10121 ));
10122 let vals_arr = Expression::Array(Box::new(
10123 crate::expressions::Array { expressions: vals },
10124 ));
10125 Ok(Expression::Function(Box::new(Function::new(
10126 "MAP".to_string(),
10127 vec![keys_arr, vals_arr],
10128 ))))
10129 }
10130 DialectType::Presto | DialectType::Trino => {
10131 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10132 let mut keys = Vec::new();
10133 let mut vals = Vec::new();
10134 for (i, arg) in args.into_iter().enumerate() {
10135 if i % 2 == 0 {
10136 keys.push(arg);
10137 } else {
10138 vals.push(arg);
10139 }
10140 }
10141 let keys_arr = Expression::Array(Box::new(
10142 crate::expressions::Array { expressions: keys },
10143 ));
10144 let vals_arr = Expression::Array(Box::new(
10145 crate::expressions::Array { expressions: vals },
10146 ));
10147 Ok(Expression::Function(Box::new(Function::new(
10148 "MAP".to_string(),
10149 vec![keys_arr, vals_arr],
10150 ))))
10151 }
10152 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10153 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10154 ))),
10155 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10156 Function::new("map".to_string(), args),
10157 ))),
10158 _ => Ok(Expression::Function(Box::new(Function::new(
10159 "MAP".to_string(),
10160 args,
10161 )))),
10162 }
10163 }
10164 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10165 "COLLECT_LIST" if f.args.len() >= 1 => {
10166 let name = match target {
10167 DialectType::Spark
10168 | DialectType::Databricks
10169 | DialectType::Hive => "COLLECT_LIST",
10170 DialectType::DuckDB
10171 | DialectType::PostgreSQL
10172 | DialectType::Redshift
10173 | DialectType::Snowflake
10174 | DialectType::BigQuery => "ARRAY_AGG",
10175 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10176 _ => "ARRAY_AGG",
10177 };
10178 Ok(Expression::Function(Box::new(Function::new(
10179 name.to_string(),
10180 f.args,
10181 ))))
10182 }
10183 // COLLECT_SET(x) -> target-specific distinct array aggregation
10184 "COLLECT_SET" if f.args.len() >= 1 => {
10185 let name = match target {
10186 DialectType::Spark
10187 | DialectType::Databricks
10188 | DialectType::Hive => "COLLECT_SET",
10189 DialectType::Presto
10190 | DialectType::Trino
10191 | DialectType::Athena => "SET_AGG",
10192 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10193 _ => "ARRAY_AGG",
10194 };
10195 Ok(Expression::Function(Box::new(Function::new(
10196 name.to_string(),
10197 f.args,
10198 ))))
10199 }
10200 // ISNAN(x) / IS_NAN(x) - normalize
10201 "ISNAN" | "IS_NAN" => {
10202 let name = match target {
10203 DialectType::Spark
10204 | DialectType::Databricks
10205 | DialectType::Hive => "ISNAN",
10206 DialectType::Presto
10207 | DialectType::Trino
10208 | DialectType::Athena => "IS_NAN",
10209 DialectType::BigQuery
10210 | DialectType::PostgreSQL
10211 | DialectType::Redshift => "IS_NAN",
10212 DialectType::ClickHouse => "IS_NAN",
10213 _ => "ISNAN",
10214 };
10215 Ok(Expression::Function(Box::new(Function::new(
10216 name.to_string(),
10217 f.args,
10218 ))))
10219 }
10220 // SPLIT_PART(str, delim, index) -> target-specific
10221 "SPLIT_PART" if f.args.len() == 3 => {
10222 match target {
10223 DialectType::Spark | DialectType::Databricks => {
10224 // Keep as SPLIT_PART (Spark 3.4+)
10225 Ok(Expression::Function(Box::new(Function::new(
10226 "SPLIT_PART".to_string(),
10227 f.args,
10228 ))))
10229 }
10230 DialectType::DuckDB
10231 | DialectType::PostgreSQL
10232 | DialectType::Snowflake
10233 | DialectType::Redshift
10234 | DialectType::Trino
10235 | DialectType::Presto => Ok(Expression::Function(Box::new(
10236 Function::new("SPLIT_PART".to_string(), f.args),
10237 ))),
10238 DialectType::Hive => {
10239 // SPLIT(str, delim)[index]
10240 // Complex conversion, just keep as-is for now
10241 Ok(Expression::Function(Box::new(Function::new(
10242 "SPLIT_PART".to_string(),
10243 f.args,
10244 ))))
10245 }
10246 _ => Ok(Expression::Function(Box::new(Function::new(
10247 "SPLIT_PART".to_string(),
10248 f.args,
10249 )))),
10250 }
10251 }
10252 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10253 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10254 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10255 match target {
10256 DialectType::Spark
10257 | DialectType::Databricks
10258 | DialectType::Hive => {
10259 let mut args = f.args;
10260 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10261 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10262 if let Some(Expression::Function(inner)) = args.first() {
10263 if inner.name.eq_ignore_ascii_case("TRY")
10264 && inner.args.len() == 1
10265 {
10266 let mut inner_args = inner.args.clone();
10267 args[0] = inner_args.remove(0);
10268 }
10269 }
10270 Ok(Expression::Function(Box::new(Function::new(
10271 "GET_JSON_OBJECT".to_string(),
10272 args,
10273 ))))
10274 }
10275 DialectType::DuckDB | DialectType::SQLite => {
10276 // json -> path syntax
10277 let mut args = f.args;
10278 let json_expr = args.remove(0);
10279 let path = args.remove(0);
10280 Ok(Expression::JsonExtract(Box::new(
10281 crate::expressions::JsonExtractFunc {
10282 this: json_expr,
10283 path,
10284 returning: None,
10285 arrow_syntax: true,
10286 hash_arrow_syntax: false,
10287 wrapper_option: None,
10288 quotes_option: None,
10289 on_scalar_string: false,
10290 on_error: None,
10291 },
10292 )))
10293 }
10294 DialectType::TSQL => {
10295 let func_name = if is_scalar {
10296 "JSON_VALUE"
10297 } else {
10298 "JSON_QUERY"
10299 };
10300 Ok(Expression::Function(Box::new(Function::new(
10301 func_name.to_string(),
10302 f.args,
10303 ))))
10304 }
10305 DialectType::PostgreSQL | DialectType::Redshift => {
10306 let func_name = if is_scalar {
10307 "JSON_EXTRACT_PATH_TEXT"
10308 } else {
10309 "JSON_EXTRACT_PATH"
10310 };
10311 Ok(Expression::Function(Box::new(Function::new(
10312 func_name.to_string(),
10313 f.args,
10314 ))))
10315 }
10316 _ => Ok(Expression::Function(Box::new(Function::new(
10317 name.to_string(),
10318 f.args,
10319 )))),
10320 }
10321 }
10322 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
10323 "JSON_SEARCH"
10324 if matches!(target, DialectType::DuckDB)
10325 && (3..=5).contains(&f.args.len()) =>
10326 {
10327 let args = &f.args;
10328
10329 // Only rewrite deterministic modes and NULL/no escape-char variant.
10330 let mode = match &args[1] {
10331 Expression::Literal(crate::expressions::Literal::String(s)) => {
10332 s.to_ascii_lowercase()
10333 }
10334 _ => return Ok(Expression::Function(f)),
10335 };
10336 if mode != "one" && mode != "all" {
10337 return Ok(Expression::Function(f));
10338 }
10339 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
10340 return Ok(Expression::Function(f));
10341 }
10342
10343 let json_doc_sql = match Generator::sql(&args[0]) {
10344 Ok(sql) => sql,
10345 Err(_) => return Ok(Expression::Function(f)),
10346 };
10347 let search_sql = match Generator::sql(&args[2]) {
10348 Ok(sql) => sql,
10349 Err(_) => return Ok(Expression::Function(f)),
10350 };
10351 let path_sql = if args.len() == 5 {
10352 match Generator::sql(&args[4]) {
10353 Ok(sql) => sql,
10354 Err(_) => return Ok(Expression::Function(f)),
10355 }
10356 } else {
10357 "'$'".to_string()
10358 };
10359
10360 let rewrite_sql = if mode == "all" {
10361 format!(
10362 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
10363 json_doc_sql, path_sql, search_sql
10364 )
10365 } else {
10366 format!(
10367 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
10368 json_doc_sql, path_sql, search_sql
10369 )
10370 };
10371
10372 Ok(Expression::Raw(crate::expressions::Raw {
10373 sql: rewrite_sql,
10374 }))
10375 }
10376 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10377 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10378 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10379 if f.args.len() >= 2
10380 && matches!(source, DialectType::SingleStore) =>
10381 {
10382 let is_bson = name == "BSON_EXTRACT_BSON";
10383 let mut args = f.args;
10384 let json_expr = args.remove(0);
10385
10386 // Build JSONPath from remaining arguments
10387 let mut path = String::from("$");
10388 for arg in &args {
10389 if let Expression::Literal(
10390 crate::expressions::Literal::String(s),
10391 ) = arg
10392 {
10393 // Check if it's a numeric string (array index)
10394 if s.parse::<i64>().is_ok() {
10395 path.push('[');
10396 path.push_str(s);
10397 path.push(']');
10398 } else {
10399 path.push('.');
10400 path.push_str(s);
10401 }
10402 }
10403 }
10404
10405 let target_func = if is_bson {
10406 "JSONB_EXTRACT"
10407 } else {
10408 "JSON_EXTRACT"
10409 };
10410 Ok(Expression::Function(Box::new(Function::new(
10411 target_func.to_string(),
10412 vec![json_expr, Expression::string(&path)],
10413 ))))
10414 }
10415 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10416 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10417 Ok(Expression::Function(Box::new(Function {
10418 name: "arraySum".to_string(),
10419 args: f.args,
10420 distinct: f.distinct,
10421 trailing_comments: f.trailing_comments,
10422 use_bracket_syntax: f.use_bracket_syntax,
10423 no_parens: f.no_parens,
10424 quoted: f.quoted,
10425 span: None,
10426 })))
10427 }
10428 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10429 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10430 // and is handled by JsonQueryValueConvert action. This handles the case where
10431 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10432 "JSON_QUERY" | "JSON_VALUE"
10433 if f.args.len() == 2
10434 && matches!(
10435 source,
10436 DialectType::TSQL | DialectType::Fabric
10437 ) =>
10438 {
10439 match target {
10440 DialectType::Spark
10441 | DialectType::Databricks
10442 | DialectType::Hive => Ok(Expression::Function(Box::new(
10443 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10444 ))),
10445 _ => Ok(Expression::Function(Box::new(Function::new(
10446 name.to_string(),
10447 f.args,
10448 )))),
10449 }
10450 }
10451 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10452 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10453 let arg = f.args.into_iter().next().unwrap();
10454 let is_hive_source = matches!(
10455 source,
10456 DialectType::Hive
10457 | DialectType::Spark
10458 | DialectType::Databricks
10459 );
10460 match target {
10461 DialectType::DuckDB if is_hive_source => {
10462 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10463 let strptime =
10464 Expression::Function(Box::new(Function::new(
10465 "STRPTIME".to_string(),
10466 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10467 )));
10468 Ok(Expression::Function(Box::new(Function::new(
10469 "EPOCH".to_string(),
10470 vec![strptime],
10471 ))))
10472 }
10473 DialectType::Presto | DialectType::Trino if is_hive_source => {
10474 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10475 let cast_varchar =
10476 Expression::Cast(Box::new(crate::expressions::Cast {
10477 this: arg.clone(),
10478 to: DataType::VarChar {
10479 length: None,
10480 parenthesized_length: false,
10481 },
10482 trailing_comments: vec![],
10483 double_colon_syntax: false,
10484 format: None,
10485 default: None,
10486 }));
10487 let date_parse =
10488 Expression::Function(Box::new(Function::new(
10489 "DATE_PARSE".to_string(),
10490 vec![
10491 cast_varchar,
10492 Expression::string("%Y-%m-%d %T"),
10493 ],
10494 )));
10495 let try_expr = Expression::Function(Box::new(
10496 Function::new("TRY".to_string(), vec![date_parse]),
10497 ));
10498 let date_format =
10499 Expression::Function(Box::new(Function::new(
10500 "DATE_FORMAT".to_string(),
10501 vec![arg, Expression::string("%Y-%m-%d %T")],
10502 )));
10503 let parse_datetime =
10504 Expression::Function(Box::new(Function::new(
10505 "PARSE_DATETIME".to_string(),
10506 vec![
10507 date_format,
10508 Expression::string("yyyy-MM-dd HH:mm:ss"),
10509 ],
10510 )));
10511 let coalesce =
10512 Expression::Function(Box::new(Function::new(
10513 "COALESCE".to_string(),
10514 vec![try_expr, parse_datetime],
10515 )));
10516 Ok(Expression::Function(Box::new(Function::new(
10517 "TO_UNIXTIME".to_string(),
10518 vec![coalesce],
10519 ))))
10520 }
10521 DialectType::Presto | DialectType::Trino => {
10522 Ok(Expression::Function(Box::new(Function::new(
10523 "TO_UNIXTIME".to_string(),
10524 vec![arg],
10525 ))))
10526 }
10527 _ => Ok(Expression::Function(Box::new(Function::new(
10528 "UNIX_TIMESTAMP".to_string(),
10529 vec![arg],
10530 )))),
10531 }
10532 }
10533 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10534 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10535 DialectType::Spark
10536 | DialectType::Databricks
10537 | DialectType::Hive => Ok(Expression::Function(Box::new(
10538 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10539 ))),
10540 _ => Ok(Expression::Function(Box::new(Function::new(
10541 "TO_UNIX_TIMESTAMP".to_string(),
10542 f.args,
10543 )))),
10544 },
10545 // CURDATE() -> CURRENT_DATE
10546 "CURDATE" => {
10547 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10548 }
10549 // CURTIME() -> CURRENT_TIME
10550 "CURTIME" => {
10551 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10552 precision: None,
10553 }))
10554 }
10555 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10556 "ARRAY_SORT" if f.args.len() >= 1 => {
10557 match target {
10558 DialectType::Hive => {
10559 let mut args = f.args;
10560 args.truncate(1); // Drop lambda comparator
10561 Ok(Expression::Function(Box::new(Function::new(
10562 "SORT_ARRAY".to_string(),
10563 args,
10564 ))))
10565 }
10566 _ => Ok(Expression::Function(f)),
10567 }
10568 }
10569 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10570 "SORT_ARRAY" if f.args.len() == 1 => match target {
10571 DialectType::Hive
10572 | DialectType::Spark
10573 | DialectType::Databricks => Ok(Expression::Function(f)),
10574 _ => Ok(Expression::Function(Box::new(Function::new(
10575 "ARRAY_SORT".to_string(),
10576 f.args,
10577 )))),
10578 },
10579 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10580 "SORT_ARRAY" if f.args.len() == 2 => {
10581 let is_desc =
10582 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10583 if is_desc {
10584 match target {
10585 DialectType::DuckDB => {
10586 Ok(Expression::Function(Box::new(Function::new(
10587 "ARRAY_REVERSE_SORT".to_string(),
10588 vec![f.args.into_iter().next().unwrap()],
10589 ))))
10590 }
10591 DialectType::Presto | DialectType::Trino => {
10592 let arr_arg = f.args.into_iter().next().unwrap();
10593 let a =
10594 Expression::Column(crate::expressions::Column {
10595 name: crate::expressions::Identifier::new("a"),
10596 table: None,
10597 join_mark: false,
10598 trailing_comments: Vec::new(),
10599 span: None,
10600 });
10601 let b =
10602 Expression::Column(crate::expressions::Column {
10603 name: crate::expressions::Identifier::new("b"),
10604 table: None,
10605 join_mark: false,
10606 trailing_comments: Vec::new(),
10607 span: None,
10608 });
10609 let case_expr = Expression::Case(Box::new(
10610 crate::expressions::Case {
10611 operand: None,
10612 whens: vec![
10613 (
10614 Expression::Lt(Box::new(
10615 BinaryOp::new(a.clone(), b.clone()),
10616 )),
10617 Expression::Literal(Literal::Number(
10618 "1".to_string(),
10619 )),
10620 ),
10621 (
10622 Expression::Gt(Box::new(
10623 BinaryOp::new(a.clone(), b.clone()),
10624 )),
10625 Expression::Literal(Literal::Number(
10626 "-1".to_string(),
10627 )),
10628 ),
10629 ],
10630 else_: Some(Expression::Literal(
10631 Literal::Number("0".to_string()),
10632 )),
10633 comments: Vec::new(),
10634 },
10635 ));
10636 let lambda = Expression::Lambda(Box::new(
10637 crate::expressions::LambdaExpr {
10638 parameters: vec![
10639 crate::expressions::Identifier::new("a"),
10640 crate::expressions::Identifier::new("b"),
10641 ],
10642 body: case_expr,
10643 colon: false,
10644 parameter_types: Vec::new(),
10645 },
10646 ));
10647 Ok(Expression::Function(Box::new(Function::new(
10648 "ARRAY_SORT".to_string(),
10649 vec![arr_arg, lambda],
10650 ))))
10651 }
10652 _ => Ok(Expression::Function(f)),
10653 }
10654 } else {
10655 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10656 match target {
10657 DialectType::Hive => Ok(Expression::Function(f)),
10658 _ => Ok(Expression::Function(Box::new(Function::new(
10659 "ARRAY_SORT".to_string(),
10660 vec![f.args.into_iter().next().unwrap()],
10661 )))),
10662 }
10663 }
10664 }
10665 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10666 "LEFT" if f.args.len() == 2 => {
10667 match target {
10668 DialectType::Hive
10669 | DialectType::Presto
10670 | DialectType::Trino
10671 | DialectType::Athena => {
10672 let x = f.args[0].clone();
10673 let n = f.args[1].clone();
10674 Ok(Expression::Function(Box::new(Function::new(
10675 "SUBSTRING".to_string(),
10676 vec![x, Expression::number(1), n],
10677 ))))
10678 }
10679 DialectType::Spark | DialectType::Databricks
10680 if matches!(
10681 source,
10682 DialectType::TSQL | DialectType::Fabric
10683 ) =>
10684 {
10685 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10686 let x = f.args[0].clone();
10687 let n = f.args[1].clone();
10688 let cast_x = Expression::Cast(Box::new(Cast {
10689 this: x,
10690 to: DataType::VarChar {
10691 length: None,
10692 parenthesized_length: false,
10693 },
10694 double_colon_syntax: false,
10695 trailing_comments: Vec::new(),
10696 format: None,
10697 default: None,
10698 }));
10699 Ok(Expression::Function(Box::new(Function::new(
10700 "LEFT".to_string(),
10701 vec![cast_x, n],
10702 ))))
10703 }
10704 _ => Ok(Expression::Function(f)),
10705 }
10706 }
10707 "RIGHT" if f.args.len() == 2 => {
10708 match target {
10709 DialectType::Hive
10710 | DialectType::Presto
10711 | DialectType::Trino
10712 | DialectType::Athena => {
10713 let x = f.args[0].clone();
10714 let n = f.args[1].clone();
10715 // SUBSTRING(x, LENGTH(x) - (n - 1))
10716 let len_x = Expression::Function(Box::new(Function::new(
10717 "LENGTH".to_string(),
10718 vec![x.clone()],
10719 )));
10720 let n_minus_1 = Expression::Sub(Box::new(
10721 crate::expressions::BinaryOp::new(
10722 n,
10723 Expression::number(1),
10724 ),
10725 ));
10726 let n_minus_1_paren = Expression::Paren(Box::new(
10727 crate::expressions::Paren {
10728 this: n_minus_1,
10729 trailing_comments: Vec::new(),
10730 },
10731 ));
10732 let offset = Expression::Sub(Box::new(
10733 crate::expressions::BinaryOp::new(
10734 len_x,
10735 n_minus_1_paren,
10736 ),
10737 ));
10738 Ok(Expression::Function(Box::new(Function::new(
10739 "SUBSTRING".to_string(),
10740 vec![x, offset],
10741 ))))
10742 }
10743 DialectType::Spark | DialectType::Databricks
10744 if matches!(
10745 source,
10746 DialectType::TSQL | DialectType::Fabric
10747 ) =>
10748 {
10749 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10750 let x = f.args[0].clone();
10751 let n = f.args[1].clone();
10752 let cast_x = Expression::Cast(Box::new(Cast {
10753 this: x,
10754 to: DataType::VarChar {
10755 length: None,
10756 parenthesized_length: false,
10757 },
10758 double_colon_syntax: false,
10759 trailing_comments: Vec::new(),
10760 format: None,
10761 default: None,
10762 }));
10763 Ok(Expression::Function(Box::new(Function::new(
10764 "RIGHT".to_string(),
10765 vec![cast_x, n],
10766 ))))
10767 }
10768 _ => Ok(Expression::Function(f)),
10769 }
10770 }
10771 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10772 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10773 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10774 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10775 ))),
10776 DialectType::Spark | DialectType::Databricks => {
10777 Ok(Expression::Function(Box::new(Function::new(
10778 "MAP_FROM_ARRAYS".to_string(),
10779 f.args,
10780 ))))
10781 }
10782 _ => Ok(Expression::Function(Box::new(Function::new(
10783 "MAP".to_string(),
10784 f.args,
10785 )))),
10786 },
10787 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10788 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10789 "LIKE" if f.args.len() >= 2 => {
10790 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10791 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10792 (f.args[1].clone(), f.args[0].clone())
10793 } else {
10794 // Standard: LIKE(string, pattern) -> string LIKE pattern
10795 (f.args[0].clone(), f.args[1].clone())
10796 };
10797 let escape = if f.args.len() >= 3 {
10798 Some(f.args[2].clone())
10799 } else {
10800 None
10801 };
10802 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10803 left: this,
10804 right: pattern,
10805 escape,
10806 quantifier: None,
10807 })))
10808 }
10809 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10810 "ILIKE" if f.args.len() >= 2 => {
10811 let this = f.args[0].clone();
10812 let pattern = f.args[1].clone();
10813 let escape = if f.args.len() >= 3 {
10814 Some(f.args[2].clone())
10815 } else {
10816 None
10817 };
10818 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10819 left: this,
10820 right: pattern,
10821 escape,
10822 quantifier: None,
10823 })))
10824 }
10825 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10826 "CHAR" if f.args.len() == 1 => match target {
10827 DialectType::MySQL
10828 | DialectType::SingleStore
10829 | DialectType::TSQL => Ok(Expression::Function(f)),
10830 _ => Ok(Expression::Function(Box::new(Function::new(
10831 "CHR".to_string(),
10832 f.args,
10833 )))),
10834 },
10835 // CONCAT(a, b) -> a || b for PostgreSQL
10836 "CONCAT"
10837 if f.args.len() == 2
10838 && matches!(target, DialectType::PostgreSQL)
10839 && matches!(
10840 source,
10841 DialectType::ClickHouse | DialectType::MySQL
10842 ) =>
10843 {
10844 let mut args = f.args;
10845 let right = args.pop().unwrap();
10846 let left = args.pop().unwrap();
10847 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10848 this: Box::new(left),
10849 expression: Box::new(right),
10850 safe: None,
10851 })))
10852 }
10853 // ARRAY_TO_STRING(arr, delim) -> target-specific
10854 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10855 DialectType::Presto | DialectType::Trino => {
10856 Ok(Expression::Function(Box::new(Function::new(
10857 "ARRAY_JOIN".to_string(),
10858 f.args,
10859 ))))
10860 }
10861 DialectType::TSQL => Ok(Expression::Function(Box::new(
10862 Function::new("STRING_AGG".to_string(), f.args),
10863 ))),
10864 _ => Ok(Expression::Function(f)),
10865 },
10866 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10867 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10868 DialectType::Spark
10869 | DialectType::Databricks
10870 | DialectType::Hive => Ok(Expression::Function(Box::new(
10871 Function::new("CONCAT".to_string(), f.args),
10872 ))),
10873 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10874 Function::new("ARRAY_CAT".to_string(), f.args),
10875 ))),
10876 DialectType::Redshift => Ok(Expression::Function(Box::new(
10877 Function::new("ARRAY_CONCAT".to_string(), f.args),
10878 ))),
10879 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10880 Function::new("ARRAY_CAT".to_string(), f.args),
10881 ))),
10882 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10883 Function::new("LIST_CONCAT".to_string(), f.args),
10884 ))),
10885 DialectType::Presto | DialectType::Trino => {
10886 Ok(Expression::Function(Box::new(Function::new(
10887 "CONCAT".to_string(),
10888 f.args,
10889 ))))
10890 }
10891 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10892 Function::new("ARRAY_CONCAT".to_string(), f.args),
10893 ))),
10894 _ => Ok(Expression::Function(f)),
10895 },
10896 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10897 "HAS" if f.args.len() == 2 => match target {
10898 DialectType::Spark
10899 | DialectType::Databricks
10900 | DialectType::Hive => Ok(Expression::Function(Box::new(
10901 Function::new("ARRAY_CONTAINS".to_string(), f.args),
10902 ))),
10903 DialectType::Presto | DialectType::Trino => {
10904 Ok(Expression::Function(Box::new(Function::new(
10905 "CONTAINS".to_string(),
10906 f.args,
10907 ))))
10908 }
10909 _ => Ok(Expression::Function(f)),
10910 },
10911 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
10912 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
10913 Function::new("COALESCE".to_string(), f.args),
10914 ))),
10915 // ISNULL(x) in MySQL -> (x IS NULL)
10916 "ISNULL"
10917 if f.args.len() == 1
10918 && matches!(source, DialectType::MySQL)
10919 && matches!(target, DialectType::MySQL) =>
10920 {
10921 let arg = f.args.into_iter().next().unwrap();
10922 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10923 this: Expression::IsNull(Box::new(
10924 crate::expressions::IsNull {
10925 this: arg,
10926 not: false,
10927 postfix_form: false,
10928 },
10929 )),
10930 trailing_comments: Vec::new(),
10931 })))
10932 }
10933 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
10934 "MONTHNAME"
10935 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
10936 {
10937 let arg = f.args.into_iter().next().unwrap();
10938 Ok(Expression::Function(Box::new(Function::new(
10939 "DATE_FORMAT".to_string(),
10940 vec![arg, Expression::string("%M")],
10941 ))))
10942 }
10943 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
10944 "SPLITBYSTRING" if f.args.len() == 2 => {
10945 let sep = f.args[0].clone();
10946 let str_arg = f.args[1].clone();
10947 match target {
10948 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10949 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
10950 ))),
10951 DialectType::Doris => {
10952 Ok(Expression::Function(Box::new(Function::new(
10953 "SPLIT_BY_STRING".to_string(),
10954 vec![str_arg, sep],
10955 ))))
10956 }
10957 DialectType::Hive
10958 | DialectType::Spark
10959 | DialectType::Databricks => {
10960 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
10961 let escaped =
10962 Expression::Function(Box::new(Function::new(
10963 "CONCAT".to_string(),
10964 vec![
10965 Expression::string("\\Q"),
10966 sep,
10967 Expression::string("\\E"),
10968 ],
10969 )));
10970 Ok(Expression::Function(Box::new(Function::new(
10971 "SPLIT".to_string(),
10972 vec![str_arg, escaped],
10973 ))))
10974 }
10975 _ => Ok(Expression::Function(f)),
10976 }
10977 }
10978 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
10979 "SPLITBYREGEXP" if f.args.len() == 2 => {
10980 let sep = f.args[0].clone();
10981 let str_arg = f.args[1].clone();
10982 match target {
10983 DialectType::DuckDB => {
10984 Ok(Expression::Function(Box::new(Function::new(
10985 "STR_SPLIT_REGEX".to_string(),
10986 vec![str_arg, sep],
10987 ))))
10988 }
10989 DialectType::Hive
10990 | DialectType::Spark
10991 | DialectType::Databricks => {
10992 Ok(Expression::Function(Box::new(Function::new(
10993 "SPLIT".to_string(),
10994 vec![str_arg, sep],
10995 ))))
10996 }
10997 _ => Ok(Expression::Function(f)),
10998 }
10999 }
11000 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
11001 "TOMONDAY" => {
11002 if f.args.len() == 1 {
11003 let arg = f.args.into_iter().next().unwrap();
11004 match target {
11005 DialectType::Doris => {
11006 Ok(Expression::Function(Box::new(Function::new(
11007 "DATE_TRUNC".to_string(),
11008 vec![arg, Expression::string("WEEK")],
11009 ))))
11010 }
11011 _ => Ok(Expression::Function(Box::new(Function::new(
11012 "DATE_TRUNC".to_string(),
11013 vec![Expression::string("WEEK"), arg],
11014 )))),
11015 }
11016 } else {
11017 Ok(Expression::Function(f))
11018 }
11019 }
11020 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
11021 "COLLECT_LIST" if f.args.len() == 1 => match target {
11022 DialectType::Spark
11023 | DialectType::Databricks
11024 | DialectType::Hive => Ok(Expression::Function(f)),
11025 _ => Ok(Expression::Function(Box::new(Function::new(
11026 "ARRAY_AGG".to_string(),
11027 f.args,
11028 )))),
11029 },
11030 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
11031 "TO_CHAR"
11032 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
11033 {
11034 let arg = f.args.into_iter().next().unwrap();
11035 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11036 this: arg,
11037 to: DataType::Custom {
11038 name: "STRING".to_string(),
11039 },
11040 double_colon_syntax: false,
11041 trailing_comments: Vec::new(),
11042 format: None,
11043 default: None,
11044 })))
11045 }
11046 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
11047 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
11048 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11049 Function::new("RANDOM".to_string(), vec![]),
11050 ))),
11051 _ => Ok(Expression::Function(f)),
11052 },
11053 // ClickHouse formatDateTime -> target-specific
11054 "FORMATDATETIME" if f.args.len() >= 2 => match target {
11055 DialectType::MySQL => Ok(Expression::Function(Box::new(
11056 Function::new("DATE_FORMAT".to_string(), f.args),
11057 ))),
11058 _ => Ok(Expression::Function(f)),
11059 },
11060 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
11061 "REPLICATE" if f.args.len() == 2 => match target {
11062 DialectType::TSQL => Ok(Expression::Function(f)),
11063 _ => Ok(Expression::Function(Box::new(Function::new(
11064 "REPEAT".to_string(),
11065 f.args,
11066 )))),
11067 },
11068 // LEN(x) -> LENGTH(x) for non-TSQL targets
11069 // No CAST needed when arg is already a string literal
11070 "LEN" if f.args.len() == 1 => {
11071 match target {
11072 DialectType::TSQL => Ok(Expression::Function(f)),
11073 DialectType::Spark | DialectType::Databricks => {
11074 let arg = f.args.into_iter().next().unwrap();
11075 // Don't wrap string literals with CAST - they're already strings
11076 let is_string = matches!(
11077 &arg,
11078 Expression::Literal(
11079 crate::expressions::Literal::String(_)
11080 )
11081 );
11082 let final_arg = if is_string {
11083 arg
11084 } else {
11085 Expression::Cast(Box::new(Cast {
11086 this: arg,
11087 to: DataType::VarChar {
11088 length: None,
11089 parenthesized_length: false,
11090 },
11091 double_colon_syntax: false,
11092 trailing_comments: Vec::new(),
11093 format: None,
11094 default: None,
11095 }))
11096 };
11097 Ok(Expression::Function(Box::new(Function::new(
11098 "LENGTH".to_string(),
11099 vec![final_arg],
11100 ))))
11101 }
11102 _ => {
11103 let arg = f.args.into_iter().next().unwrap();
11104 Ok(Expression::Function(Box::new(Function::new(
11105 "LENGTH".to_string(),
11106 vec![arg],
11107 ))))
11108 }
11109 }
11110 }
11111 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
11112 "COUNT_BIG" if f.args.len() == 1 => match target {
11113 DialectType::TSQL => Ok(Expression::Function(f)),
11114 _ => Ok(Expression::Function(Box::new(Function::new(
11115 "COUNT".to_string(),
11116 f.args,
11117 )))),
11118 },
11119 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
11120 "DATEFROMPARTS" if f.args.len() == 3 => match target {
11121 DialectType::TSQL => Ok(Expression::Function(f)),
11122 _ => Ok(Expression::Function(Box::new(Function::new(
11123 "MAKE_DATE".to_string(),
11124 f.args,
11125 )))),
11126 },
11127 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
11128 "REGEXP_LIKE" if f.args.len() >= 2 => {
11129 let str_expr = f.args[0].clone();
11130 let pattern = f.args[1].clone();
11131 let flags = if f.args.len() >= 3 {
11132 Some(f.args[2].clone())
11133 } else {
11134 None
11135 };
11136 match target {
11137 DialectType::DuckDB => {
11138 let mut new_args = vec![str_expr, pattern];
11139 if let Some(fl) = flags {
11140 new_args.push(fl);
11141 }
11142 Ok(Expression::Function(Box::new(Function::new(
11143 "REGEXP_MATCHES".to_string(),
11144 new_args,
11145 ))))
11146 }
11147 _ => Ok(Expression::RegexpLike(Box::new(
11148 crate::expressions::RegexpFunc {
11149 this: str_expr,
11150 pattern,
11151 flags,
11152 },
11153 ))),
11154 }
11155 }
11156 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11157 "ARRAYJOIN" if f.args.len() == 1 => match target {
11158 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11159 Function::new("UNNEST".to_string(), f.args),
11160 ))),
11161 _ => Ok(Expression::Function(f)),
11162 },
11163 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11164 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11165 match target {
11166 DialectType::TSQL => Ok(Expression::Function(f)),
11167 DialectType::DuckDB => {
11168 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11169 let mut args = f.args;
11170 let ms = args.pop().unwrap();
11171 let s = args.pop().unwrap();
11172 // s + (ms / 1000.0)
11173 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11174 ms,
11175 Expression::Literal(
11176 crate::expressions::Literal::Number(
11177 "1000.0".to_string(),
11178 ),
11179 ),
11180 )));
11181 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11182 s,
11183 Expression::Paren(Box::new(Paren {
11184 this: ms_frac,
11185 trailing_comments: vec![],
11186 })),
11187 )));
11188 args.push(s_with_ms);
11189 Ok(Expression::Function(Box::new(Function::new(
11190 "MAKE_TIMESTAMP".to_string(),
11191 args,
11192 ))))
11193 }
11194 DialectType::Snowflake => {
11195 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11196 let mut args = f.args;
11197 let ms = args.pop().unwrap();
11198 // ms * 1000000
11199 let ns = Expression::Mul(Box::new(BinaryOp::new(
11200 ms,
11201 Expression::number(1000000),
11202 )));
11203 args.push(ns);
11204 Ok(Expression::Function(Box::new(Function::new(
11205 "TIMESTAMP_FROM_PARTS".to_string(),
11206 args,
11207 ))))
11208 }
11209 _ => {
11210 // Default: keep function name for other targets
11211 Ok(Expression::Function(Box::new(Function::new(
11212 "DATETIMEFROMPARTS".to_string(),
11213 f.args,
11214 ))))
11215 }
11216 }
11217 }
11218 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11219 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11220 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11221 let is_try = name == "TRY_CONVERT";
11222 let type_expr = f.args[0].clone();
11223 let value_expr = f.args[1].clone();
11224 let style = if f.args.len() >= 3 {
11225 Some(&f.args[2])
11226 } else {
11227 None
11228 };
11229
11230 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11231 if matches!(target, DialectType::TSQL) {
11232 let normalized_type = match &type_expr {
11233 Expression::DataType(dt) => {
11234 let new_dt = match dt {
11235 DataType::Int { .. } => DataType::Custom {
11236 name: "INTEGER".to_string(),
11237 },
11238 _ => dt.clone(),
11239 };
11240 Expression::DataType(new_dt)
11241 }
11242 Expression::Identifier(id) => {
11243 let upper = id.name.to_uppercase();
11244 let normalized = match upper.as_str() {
11245 "INT" => "INTEGER",
11246 _ => &upper,
11247 };
11248 Expression::Identifier(
11249 crate::expressions::Identifier::new(normalized),
11250 )
11251 }
11252 Expression::Column(col) => {
11253 let upper = col.name.name.to_uppercase();
11254 let normalized = match upper.as_str() {
11255 "INT" => "INTEGER",
11256 _ => &upper,
11257 };
11258 Expression::Identifier(
11259 crate::expressions::Identifier::new(normalized),
11260 )
11261 }
11262 _ => type_expr.clone(),
11263 };
11264 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11265 let mut new_args = vec![normalized_type, value_expr];
11266 if let Some(s) = style {
11267 new_args.push(s.clone());
11268 }
11269 return Ok(Expression::Function(Box::new(Function::new(
11270 func_name.to_string(),
11271 new_args,
11272 ))));
11273 }
11274
11275 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11276 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11277 match e {
11278 Expression::DataType(dt) => {
11279 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11280 match dt {
11281 DataType::Custom { name }
11282 if name.starts_with("NVARCHAR(")
11283 || name.starts_with("NCHAR(") =>
11284 {
11285 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11286 let inner = &name[name.find('(').unwrap() + 1
11287 ..name.len() - 1];
11288 if inner.eq_ignore_ascii_case("MAX") {
11289 Some(DataType::Text)
11290 } else if let Ok(len) = inner.parse::<u32>() {
11291 if name.starts_with("NCHAR") {
11292 Some(DataType::Char {
11293 length: Some(len),
11294 })
11295 } else {
11296 Some(DataType::VarChar {
11297 length: Some(len),
11298 parenthesized_length: false,
11299 })
11300 }
11301 } else {
11302 Some(dt.clone())
11303 }
11304 }
11305 DataType::Custom { name } if name == "NVARCHAR" => {
11306 Some(DataType::VarChar {
11307 length: None,
11308 parenthesized_length: false,
11309 })
11310 }
11311 DataType::Custom { name } if name == "NCHAR" => {
11312 Some(DataType::Char { length: None })
11313 }
11314 DataType::Custom { name }
11315 if name == "NVARCHAR(MAX)"
11316 || name == "VARCHAR(MAX)" =>
11317 {
11318 Some(DataType::Text)
11319 }
11320 _ => Some(dt.clone()),
11321 }
11322 }
11323 Expression::Identifier(id) => {
11324 let name = id.name.to_uppercase();
11325 match name.as_str() {
11326 "INT" | "INTEGER" => Some(DataType::Int {
11327 length: None,
11328 integer_spelling: false,
11329 }),
11330 "BIGINT" => Some(DataType::BigInt { length: None }),
11331 "SMALLINT" => {
11332 Some(DataType::SmallInt { length: None })
11333 }
11334 "TINYINT" => {
11335 Some(DataType::TinyInt { length: None })
11336 }
11337 "FLOAT" => Some(DataType::Float {
11338 precision: None,
11339 scale: None,
11340 real_spelling: false,
11341 }),
11342 "REAL" => Some(DataType::Float {
11343 precision: None,
11344 scale: None,
11345 real_spelling: true,
11346 }),
11347 "DATETIME" | "DATETIME2" => {
11348 Some(DataType::Timestamp {
11349 timezone: false,
11350 precision: None,
11351 })
11352 }
11353 "DATE" => Some(DataType::Date),
11354 "BIT" => Some(DataType::Boolean),
11355 "TEXT" => Some(DataType::Text),
11356 "NUMERIC" => Some(DataType::Decimal {
11357 precision: None,
11358 scale: None,
11359 }),
11360 "MONEY" => Some(DataType::Decimal {
11361 precision: Some(15),
11362 scale: Some(4),
11363 }),
11364 "SMALLMONEY" => Some(DataType::Decimal {
11365 precision: Some(6),
11366 scale: Some(4),
11367 }),
11368 "VARCHAR" => Some(DataType::VarChar {
11369 length: None,
11370 parenthesized_length: false,
11371 }),
11372 "NVARCHAR" => Some(DataType::VarChar {
11373 length: None,
11374 parenthesized_length: false,
11375 }),
11376 "CHAR" => Some(DataType::Char { length: None }),
11377 "NCHAR" => Some(DataType::Char { length: None }),
11378 _ => Some(DataType::Custom { name }),
11379 }
11380 }
11381 Expression::Column(col) => {
11382 let name = col.name.name.to_uppercase();
11383 match name.as_str() {
11384 "INT" | "INTEGER" => Some(DataType::Int {
11385 length: None,
11386 integer_spelling: false,
11387 }),
11388 "BIGINT" => Some(DataType::BigInt { length: None }),
11389 "FLOAT" => Some(DataType::Float {
11390 precision: None,
11391 scale: None,
11392 real_spelling: false,
11393 }),
11394 "DATETIME" | "DATETIME2" => {
11395 Some(DataType::Timestamp {
11396 timezone: false,
11397 precision: None,
11398 })
11399 }
11400 "DATE" => Some(DataType::Date),
11401 "NUMERIC" => Some(DataType::Decimal {
11402 precision: None,
11403 scale: None,
11404 }),
11405 "VARCHAR" => Some(DataType::VarChar {
11406 length: None,
11407 parenthesized_length: false,
11408 }),
11409 "NVARCHAR" => Some(DataType::VarChar {
11410 length: None,
11411 parenthesized_length: false,
11412 }),
11413 "CHAR" => Some(DataType::Char { length: None }),
11414 "NCHAR" => Some(DataType::Char { length: None }),
11415 _ => Some(DataType::Custom { name }),
11416 }
11417 }
11418 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11419 Expression::Function(f) => {
11420 let fname = f.name.to_uppercase();
11421 match fname.as_str() {
11422 "VARCHAR" | "NVARCHAR" => {
11423 let len = f.args.first().and_then(|a| {
11424 if let Expression::Literal(
11425 crate::expressions::Literal::Number(n),
11426 ) = a
11427 {
11428 n.parse::<u32>().ok()
11429 } else if let Expression::Identifier(id) = a
11430 {
11431 if id.name.eq_ignore_ascii_case("MAX") {
11432 None
11433 } else {
11434 None
11435 }
11436 } else {
11437 None
11438 }
11439 });
11440 // Check for VARCHAR(MAX) -> TEXT
11441 let is_max = f.args.first().map_or(false, |a| {
11442 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11443 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11444 });
11445 if is_max {
11446 Some(DataType::Text)
11447 } else {
11448 Some(DataType::VarChar {
11449 length: len,
11450 parenthesized_length: false,
11451 })
11452 }
11453 }
11454 "NCHAR" | "CHAR" => {
11455 let len = f.args.first().and_then(|a| {
11456 if let Expression::Literal(
11457 crate::expressions::Literal::Number(n),
11458 ) = a
11459 {
11460 n.parse::<u32>().ok()
11461 } else {
11462 None
11463 }
11464 });
11465 Some(DataType::Char { length: len })
11466 }
11467 "NUMERIC" | "DECIMAL" => {
11468 let precision = f.args.first().and_then(|a| {
11469 if let Expression::Literal(
11470 crate::expressions::Literal::Number(n),
11471 ) = a
11472 {
11473 n.parse::<u32>().ok()
11474 } else {
11475 None
11476 }
11477 });
11478 let scale = f.args.get(1).and_then(|a| {
11479 if let Expression::Literal(
11480 crate::expressions::Literal::Number(n),
11481 ) = a
11482 {
11483 n.parse::<u32>().ok()
11484 } else {
11485 None
11486 }
11487 });
11488 Some(DataType::Decimal { precision, scale })
11489 }
11490 _ => None,
11491 }
11492 }
11493 _ => None,
11494 }
11495 }
11496
11497 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11498 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11499 let is_tsql_source =
11500 matches!(source, DialectType::TSQL | DialectType::Fabric);
11501 if is_tsql_source {
11502 match &dt {
11503 DataType::VarChar { length: None, .. } => {
11504 dt = DataType::VarChar {
11505 length: Some(30),
11506 parenthesized_length: false,
11507 };
11508 }
11509 DataType::Char { length: None } => {
11510 dt = DataType::Char { length: Some(30) };
11511 }
11512 _ => {}
11513 }
11514 }
11515
11516 // Determine if this is a string type
11517 let is_string_type = matches!(
11518 dt,
11519 DataType::VarChar { .. }
11520 | DataType::Char { .. }
11521 | DataType::Text
11522 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11523 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11524 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11525 || name == "STRING");
11526
11527 // Determine if this is a date/time type
11528 let is_datetime_type = matches!(
11529 dt,
11530 DataType::Timestamp { .. } | DataType::Date
11531 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11532 || name == "DATETIME2" || name == "SMALLDATETIME");
11533
11534 // Check for date conversion with style
11535 if style.is_some() {
11536 let style_num = style.and_then(|s| {
11537 if let Expression::Literal(
11538 crate::expressions::Literal::Number(n),
11539 ) = s
11540 {
11541 n.parse::<u32>().ok()
11542 } else {
11543 None
11544 }
11545 });
11546
11547 // TSQL CONVERT date styles (Java format)
11548 let format_str = style_num.and_then(|n| match n {
11549 101 => Some("MM/dd/yyyy"),
11550 102 => Some("yyyy.MM.dd"),
11551 103 => Some("dd/MM/yyyy"),
11552 104 => Some("dd.MM.yyyy"),
11553 105 => Some("dd-MM-yyyy"),
11554 108 => Some("HH:mm:ss"),
11555 110 => Some("MM-dd-yyyy"),
11556 112 => Some("yyyyMMdd"),
11557 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11558 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11559 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11560 _ => None,
11561 });
11562
11563 // Non-string, non-datetime types with style: just CAST, ignore the style
11564 if !is_string_type && !is_datetime_type {
11565 let cast_expr = if is_try {
11566 Expression::TryCast(Box::new(
11567 crate::expressions::Cast {
11568 this: value_expr,
11569 to: dt,
11570 trailing_comments: Vec::new(),
11571 double_colon_syntax: false,
11572 format: None,
11573 default: None,
11574 },
11575 ))
11576 } else {
11577 Expression::Cast(Box::new(
11578 crate::expressions::Cast {
11579 this: value_expr,
11580 to: dt,
11581 trailing_comments: Vec::new(),
11582 double_colon_syntax: false,
11583 format: None,
11584 default: None,
11585 },
11586 ))
11587 };
11588 return Ok(cast_expr);
11589 }
11590
11591 if let Some(java_fmt) = format_str {
11592 let c_fmt = java_fmt
11593 .replace("yyyy", "%Y")
11594 .replace("MM", "%m")
11595 .replace("dd", "%d")
11596 .replace("HH", "%H")
11597 .replace("mm", "%M")
11598 .replace("ss", "%S")
11599 .replace("SSSSSS", "%f")
11600 .replace("SSS", "%f")
11601 .replace("'T'", "T");
11602
11603 // For datetime target types: style is the INPUT format for parsing strings -> dates
11604 if is_datetime_type {
11605 match target {
11606 DialectType::DuckDB => {
11607 return Ok(Expression::Function(Box::new(
11608 Function::new(
11609 "STRPTIME".to_string(),
11610 vec![
11611 value_expr,
11612 Expression::string(&c_fmt),
11613 ],
11614 ),
11615 )));
11616 }
11617 DialectType::Spark
11618 | DialectType::Databricks => {
11619 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11620 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11621 let func_name =
11622 if matches!(dt, DataType::Date) {
11623 "TO_DATE"
11624 } else {
11625 "TO_TIMESTAMP"
11626 };
11627 return Ok(Expression::Function(Box::new(
11628 Function::new(
11629 func_name.to_string(),
11630 vec![
11631 value_expr,
11632 Expression::string(java_fmt),
11633 ],
11634 ),
11635 )));
11636 }
11637 DialectType::Hive => {
11638 return Ok(Expression::Function(Box::new(
11639 Function::new(
11640 "TO_TIMESTAMP".to_string(),
11641 vec![
11642 value_expr,
11643 Expression::string(java_fmt),
11644 ],
11645 ),
11646 )));
11647 }
11648 _ => {
11649 return Ok(Expression::Cast(Box::new(
11650 crate::expressions::Cast {
11651 this: value_expr,
11652 to: dt,
11653 trailing_comments: Vec::new(),
11654 double_colon_syntax: false,
11655 format: None,
11656 default: None,
11657 },
11658 )));
11659 }
11660 }
11661 }
11662
11663 // For string target types: style is the OUTPUT format for dates -> strings
11664 match target {
11665 DialectType::DuckDB => Ok(Expression::Function(
11666 Box::new(Function::new(
11667 "STRPTIME".to_string(),
11668 vec![
11669 value_expr,
11670 Expression::string(&c_fmt),
11671 ],
11672 )),
11673 )),
11674 DialectType::Spark | DialectType::Databricks => {
11675 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11676 // Determine the target string type
11677 let string_dt = match &dt {
11678 DataType::VarChar {
11679 length: Some(l),
11680 ..
11681 } => DataType::VarChar {
11682 length: Some(*l),
11683 parenthesized_length: false,
11684 },
11685 DataType::Text => DataType::Custom {
11686 name: "STRING".to_string(),
11687 },
11688 _ => DataType::Custom {
11689 name: "STRING".to_string(),
11690 },
11691 };
11692 let date_format_expr = Expression::Function(
11693 Box::new(Function::new(
11694 "DATE_FORMAT".to_string(),
11695 vec![
11696 value_expr,
11697 Expression::string(java_fmt),
11698 ],
11699 )),
11700 );
11701 let cast_expr = if is_try {
11702 Expression::TryCast(Box::new(
11703 crate::expressions::Cast {
11704 this: date_format_expr,
11705 to: string_dt,
11706 trailing_comments: Vec::new(),
11707 double_colon_syntax: false,
11708 format: None,
11709 default: None,
11710 },
11711 ))
11712 } else {
11713 Expression::Cast(Box::new(
11714 crate::expressions::Cast {
11715 this: date_format_expr,
11716 to: string_dt,
11717 trailing_comments: Vec::new(),
11718 double_colon_syntax: false,
11719 format: None,
11720 default: None,
11721 },
11722 ))
11723 };
11724 Ok(cast_expr)
11725 }
11726 DialectType::MySQL | DialectType::SingleStore => {
11727 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11728 let mysql_fmt = java_fmt
11729 .replace("yyyy", "%Y")
11730 .replace("MM", "%m")
11731 .replace("dd", "%d")
11732 .replace("HH:mm:ss.SSSSSS", "%T")
11733 .replace("HH:mm:ss", "%T")
11734 .replace("HH", "%H")
11735 .replace("mm", "%i")
11736 .replace("ss", "%S");
11737 let date_format_expr = Expression::Function(
11738 Box::new(Function::new(
11739 "DATE_FORMAT".to_string(),
11740 vec![
11741 value_expr,
11742 Expression::string(&mysql_fmt),
11743 ],
11744 )),
11745 );
11746 // MySQL uses CHAR for string casts
11747 let mysql_dt = match &dt {
11748 DataType::VarChar { length, .. } => {
11749 DataType::Char { length: *length }
11750 }
11751 _ => dt,
11752 };
11753 Ok(Expression::Cast(Box::new(
11754 crate::expressions::Cast {
11755 this: date_format_expr,
11756 to: mysql_dt,
11757 trailing_comments: Vec::new(),
11758 double_colon_syntax: false,
11759 format: None,
11760 default: None,
11761 },
11762 )))
11763 }
11764 DialectType::Hive => {
11765 let func_name = "TO_TIMESTAMP";
11766 Ok(Expression::Function(Box::new(
11767 Function::new(
11768 func_name.to_string(),
11769 vec![
11770 value_expr,
11771 Expression::string(java_fmt),
11772 ],
11773 ),
11774 )))
11775 }
11776 _ => Ok(Expression::Cast(Box::new(
11777 crate::expressions::Cast {
11778 this: value_expr,
11779 to: dt,
11780 trailing_comments: Vec::new(),
11781 double_colon_syntax: false,
11782 format: None,
11783 default: None,
11784 },
11785 ))),
11786 }
11787 } else {
11788 // Unknown style, just CAST
11789 let cast_expr = if is_try {
11790 Expression::TryCast(Box::new(
11791 crate::expressions::Cast {
11792 this: value_expr,
11793 to: dt,
11794 trailing_comments: Vec::new(),
11795 double_colon_syntax: false,
11796 format: None,
11797 default: None,
11798 },
11799 ))
11800 } else {
11801 Expression::Cast(Box::new(
11802 crate::expressions::Cast {
11803 this: value_expr,
11804 to: dt,
11805 trailing_comments: Vec::new(),
11806 double_colon_syntax: false,
11807 format: None,
11808 default: None,
11809 },
11810 ))
11811 };
11812 Ok(cast_expr)
11813 }
11814 } else {
11815 // No style - simple CAST
11816 let final_dt = if matches!(
11817 target,
11818 DialectType::MySQL | DialectType::SingleStore
11819 ) {
11820 match &dt {
11821 DataType::Int { .. }
11822 | DataType::BigInt { .. }
11823 | DataType::SmallInt { .. }
11824 | DataType::TinyInt { .. } => DataType::Custom {
11825 name: "SIGNED".to_string(),
11826 },
11827 DataType::VarChar { length, .. } => {
11828 DataType::Char { length: *length }
11829 }
11830 _ => dt,
11831 }
11832 } else {
11833 dt
11834 };
11835 let cast_expr = if is_try {
11836 Expression::TryCast(Box::new(
11837 crate::expressions::Cast {
11838 this: value_expr,
11839 to: final_dt,
11840 trailing_comments: Vec::new(),
11841 double_colon_syntax: false,
11842 format: None,
11843 default: None,
11844 },
11845 ))
11846 } else {
11847 Expression::Cast(Box::new(crate::expressions::Cast {
11848 this: value_expr,
11849 to: final_dt,
11850 trailing_comments: Vec::new(),
11851 double_colon_syntax: false,
11852 format: None,
11853 default: None,
11854 }))
11855 };
11856 Ok(cast_expr)
11857 }
11858 } else {
11859 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11860 Ok(Expression::Function(f))
11861 }
11862 }
11863 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11864 "STRFTIME" if f.args.len() == 2 => {
11865 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11866 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11867 // SQLite: args[0] = format, args[1] = value
11868 (f.args[1].clone(), &f.args[0])
11869 } else {
11870 // DuckDB and others: args[0] = value, args[1] = format
11871 (f.args[0].clone(), &f.args[1])
11872 };
11873
11874 // Helper to convert C-style format to Java-style
11875 fn c_to_java_format(fmt: &str) -> String {
11876 fmt.replace("%Y", "yyyy")
11877 .replace("%m", "MM")
11878 .replace("%d", "dd")
11879 .replace("%H", "HH")
11880 .replace("%M", "mm")
11881 .replace("%S", "ss")
11882 .replace("%f", "SSSSSS")
11883 .replace("%y", "yy")
11884 .replace("%-m", "M")
11885 .replace("%-d", "d")
11886 .replace("%-H", "H")
11887 .replace("%-I", "h")
11888 .replace("%I", "hh")
11889 .replace("%p", "a")
11890 .replace("%j", "DDD")
11891 .replace("%a", "EEE")
11892 .replace("%b", "MMM")
11893 .replace("%F", "yyyy-MM-dd")
11894 .replace("%T", "HH:mm:ss")
11895 }
11896
11897 // Helper: recursively convert format strings within expressions (handles CONCAT)
11898 fn convert_fmt_expr(
11899 expr: &Expression,
11900 converter: &dyn Fn(&str) -> String,
11901 ) -> Expression {
11902 match expr {
11903 Expression::Literal(
11904 crate::expressions::Literal::String(s),
11905 ) => Expression::string(&converter(s)),
11906 Expression::Function(func)
11907 if func.name.eq_ignore_ascii_case("CONCAT") =>
11908 {
11909 let new_args: Vec<Expression> = func
11910 .args
11911 .iter()
11912 .map(|a| convert_fmt_expr(a, converter))
11913 .collect();
11914 Expression::Function(Box::new(Function::new(
11915 "CONCAT".to_string(),
11916 new_args,
11917 )))
11918 }
11919 other => other.clone(),
11920 }
11921 }
11922
11923 match target {
11924 DialectType::DuckDB => {
11925 if matches!(source, DialectType::SQLite) {
11926 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
11927 let cast_val = Expression::Cast(Box::new(Cast {
11928 this: val,
11929 to: crate::expressions::DataType::Timestamp {
11930 precision: None,
11931 timezone: false,
11932 },
11933 trailing_comments: Vec::new(),
11934 double_colon_syntax: false,
11935 format: None,
11936 default: None,
11937 }));
11938 Ok(Expression::Function(Box::new(Function::new(
11939 "STRFTIME".to_string(),
11940 vec![cast_val, fmt_expr.clone()],
11941 ))))
11942 } else {
11943 Ok(Expression::Function(f))
11944 }
11945 }
11946 DialectType::Spark
11947 | DialectType::Databricks
11948 | DialectType::Hive => {
11949 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
11950 let converted_fmt =
11951 convert_fmt_expr(fmt_expr, &c_to_java_format);
11952 Ok(Expression::Function(Box::new(Function::new(
11953 "DATE_FORMAT".to_string(),
11954 vec![val, converted_fmt],
11955 ))))
11956 }
11957 DialectType::TSQL | DialectType::Fabric => {
11958 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
11959 let converted_fmt =
11960 convert_fmt_expr(fmt_expr, &c_to_java_format);
11961 Ok(Expression::Function(Box::new(Function::new(
11962 "FORMAT".to_string(),
11963 vec![val, converted_fmt],
11964 ))))
11965 }
11966 DialectType::Presto
11967 | DialectType::Trino
11968 | DialectType::Athena => {
11969 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
11970 if let Expression::Literal(
11971 crate::expressions::Literal::String(s),
11972 ) = fmt_expr
11973 {
11974 let presto_fmt = duckdb_to_presto_format(s);
11975 Ok(Expression::Function(Box::new(Function::new(
11976 "DATE_FORMAT".to_string(),
11977 vec![val, Expression::string(&presto_fmt)],
11978 ))))
11979 } else {
11980 Ok(Expression::Function(Box::new(Function::new(
11981 "DATE_FORMAT".to_string(),
11982 vec![val, fmt_expr.clone()],
11983 ))))
11984 }
11985 }
11986 DialectType::BigQuery => {
11987 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
11988 if let Expression::Literal(
11989 crate::expressions::Literal::String(s),
11990 ) = fmt_expr
11991 {
11992 let bq_fmt = duckdb_to_bigquery_format(s);
11993 Ok(Expression::Function(Box::new(Function::new(
11994 "FORMAT_DATE".to_string(),
11995 vec![Expression::string(&bq_fmt), val],
11996 ))))
11997 } else {
11998 Ok(Expression::Function(Box::new(Function::new(
11999 "FORMAT_DATE".to_string(),
12000 vec![fmt_expr.clone(), val],
12001 ))))
12002 }
12003 }
12004 DialectType::PostgreSQL | DialectType::Redshift => {
12005 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
12006 if let Expression::Literal(
12007 crate::expressions::Literal::String(s),
12008 ) = fmt_expr
12009 {
12010 let pg_fmt = s
12011 .replace("%Y", "YYYY")
12012 .replace("%m", "MM")
12013 .replace("%d", "DD")
12014 .replace("%H", "HH24")
12015 .replace("%M", "MI")
12016 .replace("%S", "SS")
12017 .replace("%y", "YY")
12018 .replace("%-m", "FMMM")
12019 .replace("%-d", "FMDD")
12020 .replace("%-H", "FMHH24")
12021 .replace("%-I", "FMHH12")
12022 .replace("%p", "AM")
12023 .replace("%F", "YYYY-MM-DD")
12024 .replace("%T", "HH24:MI:SS");
12025 Ok(Expression::Function(Box::new(Function::new(
12026 "TO_CHAR".to_string(),
12027 vec![val, Expression::string(&pg_fmt)],
12028 ))))
12029 } else {
12030 Ok(Expression::Function(Box::new(Function::new(
12031 "TO_CHAR".to_string(),
12032 vec![val, fmt_expr.clone()],
12033 ))))
12034 }
12035 }
12036 _ => Ok(Expression::Function(f)),
12037 }
12038 }
12039 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
12040 "STRPTIME" if f.args.len() == 2 => {
12041 let val = f.args[0].clone();
12042 let fmt_expr = &f.args[1];
12043
12044 fn c_to_java_format_parse(fmt: &str) -> String {
12045 fmt.replace("%Y", "yyyy")
12046 .replace("%m", "MM")
12047 .replace("%d", "dd")
12048 .replace("%H", "HH")
12049 .replace("%M", "mm")
12050 .replace("%S", "ss")
12051 .replace("%f", "SSSSSS")
12052 .replace("%y", "yy")
12053 .replace("%-m", "M")
12054 .replace("%-d", "d")
12055 .replace("%-H", "H")
12056 .replace("%-I", "h")
12057 .replace("%I", "hh")
12058 .replace("%p", "a")
12059 .replace("%F", "yyyy-MM-dd")
12060 .replace("%T", "HH:mm:ss")
12061 }
12062
12063 match target {
12064 DialectType::DuckDB => Ok(Expression::Function(f)),
12065 DialectType::Spark | DialectType::Databricks => {
12066 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
12067 if let Expression::Literal(
12068 crate::expressions::Literal::String(s),
12069 ) = fmt_expr
12070 {
12071 let java_fmt = c_to_java_format_parse(s);
12072 Ok(Expression::Function(Box::new(Function::new(
12073 "TO_TIMESTAMP".to_string(),
12074 vec![val, Expression::string(&java_fmt)],
12075 ))))
12076 } else {
12077 Ok(Expression::Function(Box::new(Function::new(
12078 "TO_TIMESTAMP".to_string(),
12079 vec![val, fmt_expr.clone()],
12080 ))))
12081 }
12082 }
12083 DialectType::Hive => {
12084 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
12085 if let Expression::Literal(
12086 crate::expressions::Literal::String(s),
12087 ) = fmt_expr
12088 {
12089 let java_fmt = c_to_java_format_parse(s);
12090 let unix_ts =
12091 Expression::Function(Box::new(Function::new(
12092 "UNIX_TIMESTAMP".to_string(),
12093 vec![val, Expression::string(&java_fmt)],
12094 )));
12095 let from_unix =
12096 Expression::Function(Box::new(Function::new(
12097 "FROM_UNIXTIME".to_string(),
12098 vec![unix_ts],
12099 )));
12100 Ok(Expression::Cast(Box::new(
12101 crate::expressions::Cast {
12102 this: from_unix,
12103 to: DataType::Timestamp {
12104 timezone: false,
12105 precision: None,
12106 },
12107 trailing_comments: Vec::new(),
12108 double_colon_syntax: false,
12109 format: None,
12110 default: None,
12111 },
12112 )))
12113 } else {
12114 Ok(Expression::Function(f))
12115 }
12116 }
12117 DialectType::Presto
12118 | DialectType::Trino
12119 | DialectType::Athena => {
12120 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
12121 if let Expression::Literal(
12122 crate::expressions::Literal::String(s),
12123 ) = fmt_expr
12124 {
12125 let presto_fmt = duckdb_to_presto_format(s);
12126 Ok(Expression::Function(Box::new(Function::new(
12127 "DATE_PARSE".to_string(),
12128 vec![val, Expression::string(&presto_fmt)],
12129 ))))
12130 } else {
12131 Ok(Expression::Function(Box::new(Function::new(
12132 "DATE_PARSE".to_string(),
12133 vec![val, fmt_expr.clone()],
12134 ))))
12135 }
12136 }
12137 DialectType::BigQuery => {
12138 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12139 if let Expression::Literal(
12140 crate::expressions::Literal::String(s),
12141 ) = fmt_expr
12142 {
12143 let bq_fmt = duckdb_to_bigquery_format(s);
12144 Ok(Expression::Function(Box::new(Function::new(
12145 "PARSE_TIMESTAMP".to_string(),
12146 vec![Expression::string(&bq_fmt), val],
12147 ))))
12148 } else {
12149 Ok(Expression::Function(Box::new(Function::new(
12150 "PARSE_TIMESTAMP".to_string(),
12151 vec![fmt_expr.clone(), val],
12152 ))))
12153 }
12154 }
12155 _ => Ok(Expression::Function(f)),
12156 }
12157 }
12158 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12159 "DATE_FORMAT"
12160 if f.args.len() >= 2
12161 && matches!(
12162 source,
12163 DialectType::Presto
12164 | DialectType::Trino
12165 | DialectType::Athena
12166 ) =>
12167 {
12168 let val = f.args[0].clone();
12169 let fmt_expr = &f.args[1];
12170
12171 match target {
12172 DialectType::Presto
12173 | DialectType::Trino
12174 | DialectType::Athena => {
12175 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12176 if let Expression::Literal(
12177 crate::expressions::Literal::String(s),
12178 ) = fmt_expr
12179 {
12180 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12181 Ok(Expression::Function(Box::new(Function::new(
12182 "DATE_FORMAT".to_string(),
12183 vec![val, Expression::string(&normalized)],
12184 ))))
12185 } else {
12186 Ok(Expression::Function(f))
12187 }
12188 }
12189 DialectType::Hive
12190 | DialectType::Spark
12191 | DialectType::Databricks => {
12192 // Convert Presto C-style to Java-style format
12193 if let Expression::Literal(
12194 crate::expressions::Literal::String(s),
12195 ) = fmt_expr
12196 {
12197 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12198 Ok(Expression::Function(Box::new(Function::new(
12199 "DATE_FORMAT".to_string(),
12200 vec![val, Expression::string(&java_fmt)],
12201 ))))
12202 } else {
12203 Ok(Expression::Function(f))
12204 }
12205 }
12206 DialectType::DuckDB => {
12207 // Convert to STRFTIME(val, duckdb_fmt)
12208 if let Expression::Literal(
12209 crate::expressions::Literal::String(s),
12210 ) = fmt_expr
12211 {
12212 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12213 Ok(Expression::Function(Box::new(Function::new(
12214 "STRFTIME".to_string(),
12215 vec![val, Expression::string(&duckdb_fmt)],
12216 ))))
12217 } else {
12218 Ok(Expression::Function(Box::new(Function::new(
12219 "STRFTIME".to_string(),
12220 vec![val, fmt_expr.clone()],
12221 ))))
12222 }
12223 }
12224 DialectType::BigQuery => {
12225 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12226 if let Expression::Literal(
12227 crate::expressions::Literal::String(s),
12228 ) = fmt_expr
12229 {
12230 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12231 Ok(Expression::Function(Box::new(Function::new(
12232 "FORMAT_DATE".to_string(),
12233 vec![Expression::string(&bq_fmt), val],
12234 ))))
12235 } else {
12236 Ok(Expression::Function(Box::new(Function::new(
12237 "FORMAT_DATE".to_string(),
12238 vec![fmt_expr.clone(), val],
12239 ))))
12240 }
12241 }
12242 _ => Ok(Expression::Function(f)),
12243 }
12244 }
12245 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12246 "DATE_PARSE"
12247 if f.args.len() >= 2
12248 && matches!(
12249 source,
12250 DialectType::Presto
12251 | DialectType::Trino
12252 | DialectType::Athena
12253 ) =>
12254 {
12255 let val = f.args[0].clone();
12256 let fmt_expr = &f.args[1];
12257
12258 match target {
12259 DialectType::Presto
12260 | DialectType::Trino
12261 | DialectType::Athena => {
12262 // Presto -> Presto: normalize format
12263 if let Expression::Literal(
12264 crate::expressions::Literal::String(s),
12265 ) = fmt_expr
12266 {
12267 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12268 Ok(Expression::Function(Box::new(Function::new(
12269 "DATE_PARSE".to_string(),
12270 vec![val, Expression::string(&normalized)],
12271 ))))
12272 } else {
12273 Ok(Expression::Function(f))
12274 }
12275 }
12276 DialectType::Hive => {
12277 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12278 if let Expression::Literal(
12279 crate::expressions::Literal::String(s),
12280 ) = fmt_expr
12281 {
12282 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12283 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12284 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12285 this: val,
12286 to: DataType::Timestamp { timezone: false, precision: None },
12287 trailing_comments: Vec::new(),
12288 double_colon_syntax: false,
12289 format: None,
12290 default: None,
12291 })))
12292 } else {
12293 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12294 Ok(Expression::Function(Box::new(Function::new(
12295 "TO_TIMESTAMP".to_string(),
12296 vec![val, Expression::string(&java_fmt)],
12297 ))))
12298 }
12299 } else {
12300 Ok(Expression::Function(f))
12301 }
12302 }
12303 DialectType::Spark | DialectType::Databricks => {
12304 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12305 if let Expression::Literal(
12306 crate::expressions::Literal::String(s),
12307 ) = fmt_expr
12308 {
12309 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12310 Ok(Expression::Function(Box::new(Function::new(
12311 "TO_TIMESTAMP".to_string(),
12312 vec![val, Expression::string(&java_fmt)],
12313 ))))
12314 } else {
12315 Ok(Expression::Function(f))
12316 }
12317 }
12318 DialectType::DuckDB => {
12319 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12320 if let Expression::Literal(
12321 crate::expressions::Literal::String(s),
12322 ) = fmt_expr
12323 {
12324 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12325 Ok(Expression::Function(Box::new(Function::new(
12326 "STRPTIME".to_string(),
12327 vec![val, Expression::string(&duckdb_fmt)],
12328 ))))
12329 } else {
12330 Ok(Expression::Function(Box::new(Function::new(
12331 "STRPTIME".to_string(),
12332 vec![val, fmt_expr.clone()],
12333 ))))
12334 }
12335 }
12336 _ => Ok(Expression::Function(f)),
12337 }
12338 }
12339 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12340 "FROM_BASE64"
12341 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12342 {
12343 Ok(Expression::Function(Box::new(Function::new(
12344 "UNBASE64".to_string(),
12345 f.args,
12346 ))))
12347 }
12348 "TO_BASE64"
12349 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12350 {
12351 Ok(Expression::Function(Box::new(Function::new(
12352 "BASE64".to_string(),
12353 f.args,
12354 ))))
12355 }
12356 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12357 "FROM_UNIXTIME"
12358 if f.args.len() == 1
12359 && matches!(
12360 source,
12361 DialectType::Presto
12362 | DialectType::Trino
12363 | DialectType::Athena
12364 )
12365 && matches!(
12366 target,
12367 DialectType::Spark | DialectType::Databricks
12368 ) =>
12369 {
12370 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12371 let from_unix = Expression::Function(Box::new(Function::new(
12372 "FROM_UNIXTIME".to_string(),
12373 f.args,
12374 )));
12375 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12376 this: from_unix,
12377 to: DataType::Timestamp {
12378 timezone: false,
12379 precision: None,
12380 },
12381 trailing_comments: Vec::new(),
12382 double_colon_syntax: false,
12383 format: None,
12384 default: None,
12385 })))
12386 }
12387 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12388 "DATE_FORMAT"
12389 if f.args.len() >= 2
12390 && !matches!(
12391 target,
12392 DialectType::Hive
12393 | DialectType::Spark
12394 | DialectType::Databricks
12395 | DialectType::MySQL
12396 | DialectType::SingleStore
12397 ) =>
12398 {
12399 let val = f.args[0].clone();
12400 let fmt_expr = &f.args[1];
12401 let is_hive_source = matches!(
12402 source,
12403 DialectType::Hive
12404 | DialectType::Spark
12405 | DialectType::Databricks
12406 );
12407
12408 fn java_to_c_format(fmt: &str) -> String {
12409 // Replace Java patterns with C strftime patterns.
12410 // Uses multi-pass to handle patterns that conflict.
12411 // First pass: replace multi-char patterns (longer first)
12412 let result = fmt
12413 .replace("yyyy", "%Y")
12414 .replace("SSSSSS", "%f")
12415 .replace("EEEE", "%W")
12416 .replace("MM", "%m")
12417 .replace("dd", "%d")
12418 .replace("HH", "%H")
12419 .replace("mm", "%M")
12420 .replace("ss", "%S")
12421 .replace("yy", "%y");
12422 // Second pass: handle single-char timezone patterns
12423 // z -> %Z (timezone name), Z -> %z (timezone offset)
12424 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12425 let mut out = String::new();
12426 let chars: Vec<char> = result.chars().collect();
12427 let mut i = 0;
12428 while i < chars.len() {
12429 if chars[i] == '%' && i + 1 < chars.len() {
12430 // Already a format specifier, skip both chars
12431 out.push(chars[i]);
12432 out.push(chars[i + 1]);
12433 i += 2;
12434 } else if chars[i] == 'z' {
12435 out.push_str("%Z");
12436 i += 1;
12437 } else if chars[i] == 'Z' {
12438 out.push_str("%z");
12439 i += 1;
12440 } else {
12441 out.push(chars[i]);
12442 i += 1;
12443 }
12444 }
12445 out
12446 }
12447
12448 fn java_to_presto_format(fmt: &str) -> String {
12449 // Presto uses %T for HH:MM:SS
12450 let c_fmt = java_to_c_format(fmt);
12451 c_fmt.replace("%H:%M:%S", "%T")
12452 }
12453
12454 fn java_to_bq_format(fmt: &str) -> String {
12455 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12456 let c_fmt = java_to_c_format(fmt);
12457 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12458 }
12459
12460 // For Hive source, CAST string literals to appropriate type
12461 let cast_val = if is_hive_source {
12462 match &val {
12463 Expression::Literal(
12464 crate::expressions::Literal::String(_),
12465 ) => {
12466 match target {
12467 DialectType::DuckDB
12468 | DialectType::Presto
12469 | DialectType::Trino
12470 | DialectType::Athena => {
12471 Self::ensure_cast_timestamp(val.clone())
12472 }
12473 DialectType::BigQuery => {
12474 // BigQuery: CAST(val AS DATETIME)
12475 Expression::Cast(Box::new(
12476 crate::expressions::Cast {
12477 this: val.clone(),
12478 to: DataType::Custom {
12479 name: "DATETIME".to_string(),
12480 },
12481 trailing_comments: vec![],
12482 double_colon_syntax: false,
12483 format: None,
12484 default: None,
12485 },
12486 ))
12487 }
12488 _ => val.clone(),
12489 }
12490 }
12491 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12492 Expression::Cast(c)
12493 if matches!(c.to, DataType::Date)
12494 && matches!(
12495 target,
12496 DialectType::Presto
12497 | DialectType::Trino
12498 | DialectType::Athena
12499 ) =>
12500 {
12501 Expression::Cast(Box::new(crate::expressions::Cast {
12502 this: val.clone(),
12503 to: DataType::Timestamp {
12504 timezone: false,
12505 precision: None,
12506 },
12507 trailing_comments: vec![],
12508 double_colon_syntax: false,
12509 format: None,
12510 default: None,
12511 }))
12512 }
12513 Expression::Literal(crate::expressions::Literal::Date(
12514 _,
12515 )) if matches!(
12516 target,
12517 DialectType::Presto
12518 | DialectType::Trino
12519 | DialectType::Athena
12520 ) =>
12521 {
12522 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12523 let cast_date = Self::date_literal_to_cast(val.clone());
12524 Expression::Cast(Box::new(crate::expressions::Cast {
12525 this: cast_date,
12526 to: DataType::Timestamp {
12527 timezone: false,
12528 precision: None,
12529 },
12530 trailing_comments: vec![],
12531 double_colon_syntax: false,
12532 format: None,
12533 default: None,
12534 }))
12535 }
12536 _ => val.clone(),
12537 }
12538 } else {
12539 val.clone()
12540 };
12541
12542 match target {
12543 DialectType::DuckDB => {
12544 if let Expression::Literal(
12545 crate::expressions::Literal::String(s),
12546 ) = fmt_expr
12547 {
12548 let c_fmt = if is_hive_source {
12549 java_to_c_format(s)
12550 } else {
12551 s.clone()
12552 };
12553 Ok(Expression::Function(Box::new(Function::new(
12554 "STRFTIME".to_string(),
12555 vec![cast_val, Expression::string(&c_fmt)],
12556 ))))
12557 } else {
12558 Ok(Expression::Function(Box::new(Function::new(
12559 "STRFTIME".to_string(),
12560 vec![cast_val, fmt_expr.clone()],
12561 ))))
12562 }
12563 }
12564 DialectType::Presto
12565 | DialectType::Trino
12566 | DialectType::Athena => {
12567 if is_hive_source {
12568 if let Expression::Literal(
12569 crate::expressions::Literal::String(s),
12570 ) = fmt_expr
12571 {
12572 let p_fmt = java_to_presto_format(s);
12573 Ok(Expression::Function(Box::new(Function::new(
12574 "DATE_FORMAT".to_string(),
12575 vec![cast_val, Expression::string(&p_fmt)],
12576 ))))
12577 } else {
12578 Ok(Expression::Function(Box::new(Function::new(
12579 "DATE_FORMAT".to_string(),
12580 vec![cast_val, fmt_expr.clone()],
12581 ))))
12582 }
12583 } else {
12584 Ok(Expression::Function(Box::new(Function::new(
12585 "DATE_FORMAT".to_string(),
12586 f.args,
12587 ))))
12588 }
12589 }
12590 DialectType::BigQuery => {
12591 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12592 if let Expression::Literal(
12593 crate::expressions::Literal::String(s),
12594 ) = fmt_expr
12595 {
12596 let bq_fmt = if is_hive_source {
12597 java_to_bq_format(s)
12598 } else {
12599 java_to_c_format(s)
12600 };
12601 Ok(Expression::Function(Box::new(Function::new(
12602 "FORMAT_DATE".to_string(),
12603 vec![Expression::string(&bq_fmt), cast_val],
12604 ))))
12605 } else {
12606 Ok(Expression::Function(Box::new(Function::new(
12607 "FORMAT_DATE".to_string(),
12608 vec![fmt_expr.clone(), cast_val],
12609 ))))
12610 }
12611 }
12612 DialectType::PostgreSQL | DialectType::Redshift => {
12613 if let Expression::Literal(
12614 crate::expressions::Literal::String(s),
12615 ) = fmt_expr
12616 {
12617 let pg_fmt = s
12618 .replace("yyyy", "YYYY")
12619 .replace("MM", "MM")
12620 .replace("dd", "DD")
12621 .replace("HH", "HH24")
12622 .replace("mm", "MI")
12623 .replace("ss", "SS")
12624 .replace("yy", "YY");
12625 Ok(Expression::Function(Box::new(Function::new(
12626 "TO_CHAR".to_string(),
12627 vec![val, Expression::string(&pg_fmt)],
12628 ))))
12629 } else {
12630 Ok(Expression::Function(Box::new(Function::new(
12631 "TO_CHAR".to_string(),
12632 vec![val, fmt_expr.clone()],
12633 ))))
12634 }
12635 }
12636 _ => Ok(Expression::Function(f)),
12637 }
12638 }
12639 // DATEDIFF(unit, start, end) - 3-arg form
12640 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12641 "DATEDIFF" if f.args.len() == 3 => {
12642 let mut args = f.args;
12643 // SQLite source: args = (date1, date2, unit_string)
12644 // Standard source: args = (unit, start, end)
12645 let (_arg0, arg1, arg2, unit_str) =
12646 if matches!(source, DialectType::SQLite) {
12647 let date1 = args.remove(0);
12648 let date2 = args.remove(0);
12649 let unit_expr = args.remove(0);
12650 let unit_s = Self::get_unit_str_static(&unit_expr);
12651
12652 // For SQLite target, generate JULIANDAY arithmetic directly
12653 if matches!(target, DialectType::SQLite) {
12654 let jd_first = Expression::Function(Box::new(
12655 Function::new("JULIANDAY".to_string(), vec![date1]),
12656 ));
12657 let jd_second = Expression::Function(Box::new(
12658 Function::new("JULIANDAY".to_string(), vec![date2]),
12659 ));
12660 let diff = Expression::Sub(Box::new(
12661 crate::expressions::BinaryOp::new(
12662 jd_first, jd_second,
12663 ),
12664 ));
12665 let paren_diff = Expression::Paren(Box::new(
12666 crate::expressions::Paren {
12667 this: diff,
12668 trailing_comments: Vec::new(),
12669 },
12670 ));
12671 let adjusted = match unit_s.as_str() {
12672 "HOUR" => Expression::Mul(Box::new(
12673 crate::expressions::BinaryOp::new(
12674 paren_diff,
12675 Expression::Literal(Literal::Number(
12676 "24.0".to_string(),
12677 )),
12678 ),
12679 )),
12680 "MINUTE" => Expression::Mul(Box::new(
12681 crate::expressions::BinaryOp::new(
12682 paren_diff,
12683 Expression::Literal(Literal::Number(
12684 "1440.0".to_string(),
12685 )),
12686 ),
12687 )),
12688 "SECOND" => Expression::Mul(Box::new(
12689 crate::expressions::BinaryOp::new(
12690 paren_diff,
12691 Expression::Literal(Literal::Number(
12692 "86400.0".to_string(),
12693 )),
12694 ),
12695 )),
12696 "MONTH" => Expression::Div(Box::new(
12697 crate::expressions::BinaryOp::new(
12698 paren_diff,
12699 Expression::Literal(Literal::Number(
12700 "30.0".to_string(),
12701 )),
12702 ),
12703 )),
12704 "YEAR" => Expression::Div(Box::new(
12705 crate::expressions::BinaryOp::new(
12706 paren_diff,
12707 Expression::Literal(Literal::Number(
12708 "365.0".to_string(),
12709 )),
12710 ),
12711 )),
12712 _ => paren_diff,
12713 };
12714 return Ok(Expression::Cast(Box::new(Cast {
12715 this: adjusted,
12716 to: DataType::Int {
12717 length: None,
12718 integer_spelling: true,
12719 },
12720 trailing_comments: vec![],
12721 double_colon_syntax: false,
12722 format: None,
12723 default: None,
12724 })));
12725 }
12726
12727 // For other targets, remap to standard (unit, start, end) form
12728 let unit_ident =
12729 Expression::Identifier(Identifier::new(&unit_s));
12730 (unit_ident, date1, date2, unit_s)
12731 } else {
12732 let arg0 = args.remove(0);
12733 let arg1 = args.remove(0);
12734 let arg2 = args.remove(0);
12735 let unit_s = Self::get_unit_str_static(&arg0);
12736 (arg0, arg1, arg2, unit_s)
12737 };
12738
12739 // For Hive/Spark source, string literal dates need to be cast
12740 // Note: Databricks is excluded - it handles string args like standard SQL
12741 let is_hive_spark =
12742 matches!(source, DialectType::Hive | DialectType::Spark);
12743
12744 match target {
12745 DialectType::Snowflake => {
12746 let unit =
12747 Expression::Identifier(Identifier::new(&unit_str));
12748 // Use ensure_to_date_preserved to add TO_DATE with a marker
12749 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12750 let d1 = if is_hive_spark {
12751 Self::ensure_to_date_preserved(arg1)
12752 } else {
12753 arg1
12754 };
12755 let d2 = if is_hive_spark {
12756 Self::ensure_to_date_preserved(arg2)
12757 } else {
12758 arg2
12759 };
12760 Ok(Expression::Function(Box::new(Function::new(
12761 "DATEDIFF".to_string(),
12762 vec![unit, d1, d2],
12763 ))))
12764 }
12765 DialectType::Redshift => {
12766 let unit =
12767 Expression::Identifier(Identifier::new(&unit_str));
12768 let d1 = if is_hive_spark {
12769 Self::ensure_cast_date(arg1)
12770 } else {
12771 arg1
12772 };
12773 let d2 = if is_hive_spark {
12774 Self::ensure_cast_date(arg2)
12775 } else {
12776 arg2
12777 };
12778 Ok(Expression::Function(Box::new(Function::new(
12779 "DATEDIFF".to_string(),
12780 vec![unit, d1, d2],
12781 ))))
12782 }
12783 DialectType::TSQL => {
12784 let unit =
12785 Expression::Identifier(Identifier::new(&unit_str));
12786 Ok(Expression::Function(Box::new(Function::new(
12787 "DATEDIFF".to_string(),
12788 vec![unit, arg1, arg2],
12789 ))))
12790 }
12791 DialectType::DuckDB => {
12792 let is_redshift_tsql = matches!(
12793 source,
12794 DialectType::Redshift | DialectType::TSQL
12795 );
12796 if is_hive_spark {
12797 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12798 let d1 = Self::ensure_cast_date(arg1);
12799 let d2 = Self::ensure_cast_date(arg2);
12800 Ok(Expression::Function(Box::new(Function::new(
12801 "DATE_DIFF".to_string(),
12802 vec![Expression::string(&unit_str), d1, d2],
12803 ))))
12804 } else if matches!(source, DialectType::Snowflake) {
12805 // For Snowflake source: special handling per unit
12806 match unit_str.as_str() {
12807 "NANOSECOND" => {
12808 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12809 fn cast_to_timestamp_ns(
12810 expr: Expression,
12811 ) -> Expression
12812 {
12813 Expression::Cast(Box::new(Cast {
12814 this: expr,
12815 to: DataType::Custom {
12816 name: "TIMESTAMP_NS".to_string(),
12817 },
12818 trailing_comments: vec![],
12819 double_colon_syntax: false,
12820 format: None,
12821 default: None,
12822 }))
12823 }
12824 let epoch_end = Expression::Function(Box::new(
12825 Function::new(
12826 "EPOCH_NS".to_string(),
12827 vec![cast_to_timestamp_ns(arg2)],
12828 ),
12829 ));
12830 let epoch_start = Expression::Function(
12831 Box::new(Function::new(
12832 "EPOCH_NS".to_string(),
12833 vec![cast_to_timestamp_ns(arg1)],
12834 )),
12835 );
12836 Ok(Expression::Sub(Box::new(BinaryOp::new(
12837 epoch_end,
12838 epoch_start,
12839 ))))
12840 }
12841 "WEEK" => {
12842 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12843 let d1 = Self::force_cast_date(arg1);
12844 let d2 = Self::force_cast_date(arg2);
12845 let dt1 = Expression::Function(Box::new(
12846 Function::new(
12847 "DATE_TRUNC".to_string(),
12848 vec![Expression::string("WEEK"), d1],
12849 ),
12850 ));
12851 let dt2 = Expression::Function(Box::new(
12852 Function::new(
12853 "DATE_TRUNC".to_string(),
12854 vec![Expression::string("WEEK"), d2],
12855 ),
12856 ));
12857 Ok(Expression::Function(Box::new(
12858 Function::new(
12859 "DATE_DIFF".to_string(),
12860 vec![
12861 Expression::string(&unit_str),
12862 dt1,
12863 dt2,
12864 ],
12865 ),
12866 )))
12867 }
12868 _ => {
12869 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12870 let d1 = Self::force_cast_date(arg1);
12871 let d2 = Self::force_cast_date(arg2);
12872 Ok(Expression::Function(Box::new(
12873 Function::new(
12874 "DATE_DIFF".to_string(),
12875 vec![
12876 Expression::string(&unit_str),
12877 d1,
12878 d2,
12879 ],
12880 ),
12881 )))
12882 }
12883 }
12884 } else if is_redshift_tsql {
12885 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
12886 let d1 = Self::force_cast_timestamp(arg1);
12887 let d2 = Self::force_cast_timestamp(arg2);
12888 Ok(Expression::Function(Box::new(Function::new(
12889 "DATE_DIFF".to_string(),
12890 vec![Expression::string(&unit_str), d1, d2],
12891 ))))
12892 } else {
12893 // Keep as DATEDIFF so DuckDB's transform_datediff handles
12894 // DATE_TRUNC for WEEK, CAST for string literals, etc.
12895 let unit =
12896 Expression::Identifier(Identifier::new(&unit_str));
12897 Ok(Expression::Function(Box::new(Function::new(
12898 "DATEDIFF".to_string(),
12899 vec![unit, arg1, arg2],
12900 ))))
12901 }
12902 }
12903 DialectType::BigQuery => {
12904 let is_redshift_tsql = matches!(
12905 source,
12906 DialectType::Redshift
12907 | DialectType::TSQL
12908 | DialectType::Snowflake
12909 );
12910 let cast_d1 = if is_hive_spark {
12911 Self::ensure_cast_date(arg1)
12912 } else if is_redshift_tsql {
12913 Self::force_cast_datetime(arg1)
12914 } else {
12915 Self::ensure_cast_datetime(arg1)
12916 };
12917 let cast_d2 = if is_hive_spark {
12918 Self::ensure_cast_date(arg2)
12919 } else if is_redshift_tsql {
12920 Self::force_cast_datetime(arg2)
12921 } else {
12922 Self::ensure_cast_datetime(arg2)
12923 };
12924 let unit =
12925 Expression::Identifier(Identifier::new(&unit_str));
12926 Ok(Expression::Function(Box::new(Function::new(
12927 "DATE_DIFF".to_string(),
12928 vec![cast_d2, cast_d1, unit],
12929 ))))
12930 }
12931 DialectType::Presto
12932 | DialectType::Trino
12933 | DialectType::Athena => {
12934 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
12935 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
12936 let is_redshift_tsql = matches!(
12937 source,
12938 DialectType::Redshift
12939 | DialectType::TSQL
12940 | DialectType::Snowflake
12941 );
12942 let d1 = if is_hive_spark {
12943 Self::double_cast_timestamp_date(arg1)
12944 } else if is_redshift_tsql {
12945 Self::force_cast_timestamp(arg1)
12946 } else {
12947 arg1
12948 };
12949 let d2 = if is_hive_spark {
12950 Self::double_cast_timestamp_date(arg2)
12951 } else if is_redshift_tsql {
12952 Self::force_cast_timestamp(arg2)
12953 } else {
12954 arg2
12955 };
12956 Ok(Expression::Function(Box::new(Function::new(
12957 "DATE_DIFF".to_string(),
12958 vec![Expression::string(&unit_str), d1, d2],
12959 ))))
12960 }
12961 DialectType::Hive => match unit_str.as_str() {
12962 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
12963 this: Expression::Function(Box::new(Function::new(
12964 "MONTHS_BETWEEN".to_string(),
12965 vec![arg2, arg1],
12966 ))),
12967 to: DataType::Int {
12968 length: None,
12969 integer_spelling: false,
12970 },
12971 trailing_comments: vec![],
12972 double_colon_syntax: false,
12973 format: None,
12974 default: None,
12975 }))),
12976 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
12977 this: Expression::Div(Box::new(
12978 crate::expressions::BinaryOp::new(
12979 Expression::Function(Box::new(Function::new(
12980 "DATEDIFF".to_string(),
12981 vec![arg2, arg1],
12982 ))),
12983 Expression::number(7),
12984 ),
12985 )),
12986 to: DataType::Int {
12987 length: None,
12988 integer_spelling: false,
12989 },
12990 trailing_comments: vec![],
12991 double_colon_syntax: false,
12992 format: None,
12993 default: None,
12994 }))),
12995 _ => Ok(Expression::Function(Box::new(Function::new(
12996 "DATEDIFF".to_string(),
12997 vec![arg2, arg1],
12998 )))),
12999 },
13000 DialectType::Spark | DialectType::Databricks => {
13001 let unit =
13002 Expression::Identifier(Identifier::new(&unit_str));
13003 Ok(Expression::Function(Box::new(Function::new(
13004 "DATEDIFF".to_string(),
13005 vec![unit, arg1, arg2],
13006 ))))
13007 }
13008 _ => {
13009 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
13010 let d1 = if is_hive_spark {
13011 Self::ensure_cast_date(arg1)
13012 } else {
13013 arg1
13014 };
13015 let d2 = if is_hive_spark {
13016 Self::ensure_cast_date(arg2)
13017 } else {
13018 arg2
13019 };
13020 let unit =
13021 Expression::Identifier(Identifier::new(&unit_str));
13022 Ok(Expression::Function(Box::new(Function::new(
13023 "DATEDIFF".to_string(),
13024 vec![unit, d1, d2],
13025 ))))
13026 }
13027 }
13028 }
13029 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
13030 "DATEDIFF" if f.args.len() == 2 => {
13031 let mut args = f.args;
13032 let arg0 = args.remove(0);
13033 let arg1 = args.remove(0);
13034
13035 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
13036 // Also recognizes TryCast/Cast to DATE that may have been produced by
13037 // cross-dialect TO_DATE -> TRY_CAST conversion
13038 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
13039 if let Expression::Function(ref f) = e {
13040 if f.name.eq_ignore_ascii_case("TO_DATE")
13041 && f.args.len() == 1
13042 {
13043 return (f.args[0].clone(), true);
13044 }
13045 }
13046 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
13047 if let Expression::TryCast(ref c) = e {
13048 if matches!(c.to, DataType::Date) {
13049 return (e, true); // Already properly cast, return as-is
13050 }
13051 }
13052 (e, false)
13053 };
13054
13055 match target {
13056 DialectType::DuckDB => {
13057 // For Hive source, always CAST to DATE
13058 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
13059 let cast_d0 = if matches!(
13060 source,
13061 DialectType::Hive
13062 | DialectType::Spark
13063 | DialectType::Databricks
13064 ) {
13065 let (inner, was_to_date) = unwrap_to_date(arg1);
13066 if was_to_date {
13067 // Already a date expression, use directly
13068 if matches!(&inner, Expression::TryCast(_)) {
13069 inner // Already TRY_CAST(x AS DATE)
13070 } else {
13071 Self::try_cast_date(inner)
13072 }
13073 } else {
13074 Self::force_cast_date(inner)
13075 }
13076 } else {
13077 Self::ensure_cast_date(arg1)
13078 };
13079 let cast_d1 = if matches!(
13080 source,
13081 DialectType::Hive
13082 | DialectType::Spark
13083 | DialectType::Databricks
13084 ) {
13085 let (inner, was_to_date) = unwrap_to_date(arg0);
13086 if was_to_date {
13087 if matches!(&inner, Expression::TryCast(_)) {
13088 inner
13089 } else {
13090 Self::try_cast_date(inner)
13091 }
13092 } else {
13093 Self::force_cast_date(inner)
13094 }
13095 } else {
13096 Self::ensure_cast_date(arg0)
13097 };
13098 Ok(Expression::Function(Box::new(Function::new(
13099 "DATE_DIFF".to_string(),
13100 vec![Expression::string("DAY"), cast_d0, cast_d1],
13101 ))))
13102 }
13103 DialectType::Presto
13104 | DialectType::Trino
13105 | DialectType::Athena => {
13106 // For Hive/Spark source, apply double_cast_timestamp_date
13107 // For other sources (MySQL etc.), just swap args without casting
13108 if matches!(
13109 source,
13110 DialectType::Hive
13111 | DialectType::Spark
13112 | DialectType::Databricks
13113 ) {
13114 let cast_fn = |e: Expression| -> Expression {
13115 let (inner, was_to_date) = unwrap_to_date(e);
13116 if was_to_date {
13117 let first_cast =
13118 Self::double_cast_timestamp_date(inner);
13119 Self::double_cast_timestamp_date(first_cast)
13120 } else {
13121 Self::double_cast_timestamp_date(inner)
13122 }
13123 };
13124 Ok(Expression::Function(Box::new(Function::new(
13125 "DATE_DIFF".to_string(),
13126 vec![
13127 Expression::string("DAY"),
13128 cast_fn(arg1),
13129 cast_fn(arg0),
13130 ],
13131 ))))
13132 } else {
13133 Ok(Expression::Function(Box::new(Function::new(
13134 "DATE_DIFF".to_string(),
13135 vec![Expression::string("DAY"), arg1, arg0],
13136 ))))
13137 }
13138 }
13139 DialectType::Redshift => {
13140 let unit = Expression::Identifier(Identifier::new("DAY"));
13141 Ok(Expression::Function(Box::new(Function::new(
13142 "DATEDIFF".to_string(),
13143 vec![unit, arg1, arg0],
13144 ))))
13145 }
13146 _ => Ok(Expression::Function(Box::new(Function::new(
13147 "DATEDIFF".to_string(),
13148 vec![arg0, arg1],
13149 )))),
13150 }
13151 }
13152 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13153 "DATE_DIFF" if f.args.len() == 3 => {
13154 let mut args = f.args;
13155 let arg0 = args.remove(0);
13156 let arg1 = args.remove(0);
13157 let arg2 = args.remove(0);
13158 let unit_str = Self::get_unit_str_static(&arg0);
13159
13160 match target {
13161 DialectType::DuckDB => {
13162 // DuckDB: DATE_DIFF('UNIT', start, end)
13163 Ok(Expression::Function(Box::new(Function::new(
13164 "DATE_DIFF".to_string(),
13165 vec![Expression::string(&unit_str), arg1, arg2],
13166 ))))
13167 }
13168 DialectType::Presto
13169 | DialectType::Trino
13170 | DialectType::Athena => {
13171 Ok(Expression::Function(Box::new(Function::new(
13172 "DATE_DIFF".to_string(),
13173 vec![Expression::string(&unit_str), arg1, arg2],
13174 ))))
13175 }
13176 DialectType::ClickHouse => {
13177 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13178 let unit =
13179 Expression::Identifier(Identifier::new(&unit_str));
13180 Ok(Expression::Function(Box::new(Function::new(
13181 "DATE_DIFF".to_string(),
13182 vec![unit, arg1, arg2],
13183 ))))
13184 }
13185 DialectType::Snowflake | DialectType::Redshift => {
13186 let unit =
13187 Expression::Identifier(Identifier::new(&unit_str));
13188 Ok(Expression::Function(Box::new(Function::new(
13189 "DATEDIFF".to_string(),
13190 vec![unit, arg1, arg2],
13191 ))))
13192 }
13193 _ => {
13194 let unit =
13195 Expression::Identifier(Identifier::new(&unit_str));
13196 Ok(Expression::Function(Box::new(Function::new(
13197 "DATEDIFF".to_string(),
13198 vec![unit, arg1, arg2],
13199 ))))
13200 }
13201 }
13202 }
13203 // DATEADD(unit, val, date) - 3-arg form
13204 "DATEADD" if f.args.len() == 3 => {
13205 let mut args = f.args;
13206 let arg0 = args.remove(0);
13207 let arg1 = args.remove(0);
13208 let arg2 = args.remove(0);
13209 let unit_str = Self::get_unit_str_static(&arg0);
13210
13211 // Normalize TSQL unit abbreviations to standard names
13212 let unit_str = match unit_str.as_str() {
13213 "YY" | "YYYY" => "YEAR".to_string(),
13214 "QQ" | "Q" => "QUARTER".to_string(),
13215 "MM" | "M" => "MONTH".to_string(),
13216 "WK" | "WW" => "WEEK".to_string(),
13217 "DD" | "D" | "DY" => "DAY".to_string(),
13218 "HH" => "HOUR".to_string(),
13219 "MI" | "N" => "MINUTE".to_string(),
13220 "SS" | "S" => "SECOND".to_string(),
13221 "MS" => "MILLISECOND".to_string(),
13222 "MCS" | "US" => "MICROSECOND".to_string(),
13223 _ => unit_str,
13224 };
13225 match target {
13226 DialectType::Snowflake => {
13227 let unit =
13228 Expression::Identifier(Identifier::new(&unit_str));
13229 // Cast string literal to TIMESTAMP, but not for Snowflake source
13230 // (Snowflake natively accepts string literals in DATEADD)
13231 let arg2 = if matches!(
13232 &arg2,
13233 Expression::Literal(Literal::String(_))
13234 ) && !matches!(source, DialectType::Snowflake)
13235 {
13236 Expression::Cast(Box::new(Cast {
13237 this: arg2,
13238 to: DataType::Timestamp {
13239 precision: None,
13240 timezone: false,
13241 },
13242 trailing_comments: Vec::new(),
13243 double_colon_syntax: false,
13244 format: None,
13245 default: None,
13246 }))
13247 } else {
13248 arg2
13249 };
13250 Ok(Expression::Function(Box::new(Function::new(
13251 "DATEADD".to_string(),
13252 vec![unit, arg1, arg2],
13253 ))))
13254 }
13255 DialectType::TSQL => {
13256 let unit =
13257 Expression::Identifier(Identifier::new(&unit_str));
13258 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13259 let arg2 = if matches!(
13260 &arg2,
13261 Expression::Literal(Literal::String(_))
13262 ) && !matches!(
13263 source,
13264 DialectType::Spark
13265 | DialectType::Databricks
13266 | DialectType::Hive
13267 ) {
13268 Expression::Cast(Box::new(Cast {
13269 this: arg2,
13270 to: DataType::Custom {
13271 name: "DATETIME2".to_string(),
13272 },
13273 trailing_comments: Vec::new(),
13274 double_colon_syntax: false,
13275 format: None,
13276 default: None,
13277 }))
13278 } else {
13279 arg2
13280 };
13281 Ok(Expression::Function(Box::new(Function::new(
13282 "DATEADD".to_string(),
13283 vec![unit, arg1, arg2],
13284 ))))
13285 }
13286 DialectType::Redshift => {
13287 let unit =
13288 Expression::Identifier(Identifier::new(&unit_str));
13289 Ok(Expression::Function(Box::new(Function::new(
13290 "DATEADD".to_string(),
13291 vec![unit, arg1, arg2],
13292 ))))
13293 }
13294 DialectType::Databricks => {
13295 let unit =
13296 Expression::Identifier(Identifier::new(&unit_str));
13297 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13298 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13299 let func_name = if matches!(
13300 source,
13301 DialectType::TSQL
13302 | DialectType::Fabric
13303 | DialectType::Databricks
13304 | DialectType::Snowflake
13305 ) {
13306 "DATEADD"
13307 } else {
13308 "DATE_ADD"
13309 };
13310 Ok(Expression::Function(Box::new(Function::new(
13311 func_name.to_string(),
13312 vec![unit, arg1, arg2],
13313 ))))
13314 }
13315 DialectType::DuckDB => {
13316 // Special handling for NANOSECOND from Snowflake
13317 if unit_str == "NANOSECOND"
13318 && matches!(source, DialectType::Snowflake)
13319 {
13320 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13321 let cast_ts = Expression::Cast(Box::new(Cast {
13322 this: arg2,
13323 to: DataType::Custom {
13324 name: "TIMESTAMP_NS".to_string(),
13325 },
13326 trailing_comments: vec![],
13327 double_colon_syntax: false,
13328 format: None,
13329 default: None,
13330 }));
13331 let epoch_ns =
13332 Expression::Function(Box::new(Function::new(
13333 "EPOCH_NS".to_string(),
13334 vec![cast_ts],
13335 )));
13336 let sum = Expression::Add(Box::new(BinaryOp::new(
13337 epoch_ns, arg1,
13338 )));
13339 Ok(Expression::Function(Box::new(Function::new(
13340 "MAKE_TIMESTAMP_NS".to_string(),
13341 vec![sum],
13342 ))))
13343 } else {
13344 // DuckDB: convert to date + INTERVAL syntax with CAST
13345 let iu = Self::parse_interval_unit_static(&unit_str);
13346 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13347 this: Some(arg1),
13348 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13349 }));
13350 // Cast string literal to TIMESTAMP
13351 let arg2 = if matches!(
13352 &arg2,
13353 Expression::Literal(Literal::String(_))
13354 ) {
13355 Expression::Cast(Box::new(Cast {
13356 this: arg2,
13357 to: DataType::Timestamp {
13358 precision: None,
13359 timezone: false,
13360 },
13361 trailing_comments: Vec::new(),
13362 double_colon_syntax: false,
13363 format: None,
13364 default: None,
13365 }))
13366 } else {
13367 arg2
13368 };
13369 Ok(Expression::Add(Box::new(
13370 crate::expressions::BinaryOp::new(arg2, interval),
13371 )))
13372 }
13373 }
13374 DialectType::Spark => {
13375 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13376 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13377 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13378 {
13379 fn multiply_expr_spark(
13380 expr: Expression,
13381 factor: i64,
13382 ) -> Expression
13383 {
13384 if let Expression::Literal(
13385 crate::expressions::Literal::Number(n),
13386 ) = &expr
13387 {
13388 if let Ok(val) = n.parse::<i64>() {
13389 return Expression::Literal(
13390 crate::expressions::Literal::Number(
13391 (val * factor).to_string(),
13392 ),
13393 );
13394 }
13395 }
13396 Expression::Mul(Box::new(
13397 crate::expressions::BinaryOp::new(
13398 expr,
13399 Expression::Literal(
13400 crate::expressions::Literal::Number(
13401 factor.to_string(),
13402 ),
13403 ),
13404 ),
13405 ))
13406 }
13407 let normalized_unit = match unit_str.as_str() {
13408 "YEAR" | "YY" | "YYYY" => "YEAR",
13409 "QUARTER" | "QQ" | "Q" => "QUARTER",
13410 "MONTH" | "MM" | "M" => "MONTH",
13411 "WEEK" | "WK" | "WW" => "WEEK",
13412 "DAY" | "DD" | "D" | "DY" => "DAY",
13413 _ => &unit_str,
13414 };
13415 match normalized_unit {
13416 "YEAR" => {
13417 let months = multiply_expr_spark(arg1, 12);
13418 Ok(Expression::Function(Box::new(
13419 Function::new(
13420 "ADD_MONTHS".to_string(),
13421 vec![arg2, months],
13422 ),
13423 )))
13424 }
13425 "QUARTER" => {
13426 let months = multiply_expr_spark(arg1, 3);
13427 Ok(Expression::Function(Box::new(
13428 Function::new(
13429 "ADD_MONTHS".to_string(),
13430 vec![arg2, months],
13431 ),
13432 )))
13433 }
13434 "MONTH" => Ok(Expression::Function(Box::new(
13435 Function::new(
13436 "ADD_MONTHS".to_string(),
13437 vec![arg2, arg1],
13438 ),
13439 ))),
13440 "WEEK" => {
13441 let days = multiply_expr_spark(arg1, 7);
13442 Ok(Expression::Function(Box::new(
13443 Function::new(
13444 "DATE_ADD".to_string(),
13445 vec![arg2, days],
13446 ),
13447 )))
13448 }
13449 "DAY" => Ok(Expression::Function(Box::new(
13450 Function::new(
13451 "DATE_ADD".to_string(),
13452 vec![arg2, arg1],
13453 ),
13454 ))),
13455 _ => {
13456 let unit = Expression::Identifier(
13457 Identifier::new(&unit_str),
13458 );
13459 Ok(Expression::Function(Box::new(
13460 Function::new(
13461 "DATE_ADD".to_string(),
13462 vec![unit, arg1, arg2],
13463 ),
13464 )))
13465 }
13466 }
13467 } else {
13468 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13469 let unit =
13470 Expression::Identifier(Identifier::new(&unit_str));
13471 Ok(Expression::Function(Box::new(Function::new(
13472 "DATE_ADD".to_string(),
13473 vec![unit, arg1, arg2],
13474 ))))
13475 }
13476 }
13477 DialectType::Hive => match unit_str.as_str() {
13478 "MONTH" => {
13479 Ok(Expression::Function(Box::new(Function::new(
13480 "ADD_MONTHS".to_string(),
13481 vec![arg2, arg1],
13482 ))))
13483 }
13484 _ => Ok(Expression::Function(Box::new(Function::new(
13485 "DATE_ADD".to_string(),
13486 vec![arg2, arg1],
13487 )))),
13488 },
13489 DialectType::Presto
13490 | DialectType::Trino
13491 | DialectType::Athena => {
13492 // Cast string literal date to TIMESTAMP
13493 let arg2 = if matches!(
13494 &arg2,
13495 Expression::Literal(Literal::String(_))
13496 ) {
13497 Expression::Cast(Box::new(Cast {
13498 this: arg2,
13499 to: DataType::Timestamp {
13500 precision: None,
13501 timezone: false,
13502 },
13503 trailing_comments: Vec::new(),
13504 double_colon_syntax: false,
13505 format: None,
13506 default: None,
13507 }))
13508 } else {
13509 arg2
13510 };
13511 Ok(Expression::Function(Box::new(Function::new(
13512 "DATE_ADD".to_string(),
13513 vec![Expression::string(&unit_str), arg1, arg2],
13514 ))))
13515 }
13516 DialectType::MySQL => {
13517 let iu = Self::parse_interval_unit_static(&unit_str);
13518 Ok(Expression::DateAdd(Box::new(
13519 crate::expressions::DateAddFunc {
13520 this: arg2,
13521 interval: arg1,
13522 unit: iu,
13523 },
13524 )))
13525 }
13526 DialectType::PostgreSQL => {
13527 // Cast string literal date to TIMESTAMP
13528 let arg2 = if matches!(
13529 &arg2,
13530 Expression::Literal(Literal::String(_))
13531 ) {
13532 Expression::Cast(Box::new(Cast {
13533 this: arg2,
13534 to: DataType::Timestamp {
13535 precision: None,
13536 timezone: false,
13537 },
13538 trailing_comments: Vec::new(),
13539 double_colon_syntax: false,
13540 format: None,
13541 default: None,
13542 }))
13543 } else {
13544 arg2
13545 };
13546 let interval = Expression::Interval(Box::new(
13547 crate::expressions::Interval {
13548 this: Some(Expression::string(&format!(
13549 "{} {}",
13550 Self::expr_to_string_static(&arg1),
13551 unit_str
13552 ))),
13553 unit: None,
13554 },
13555 ));
13556 Ok(Expression::Add(Box::new(
13557 crate::expressions::BinaryOp::new(arg2, interval),
13558 )))
13559 }
13560 DialectType::BigQuery => {
13561 let iu = Self::parse_interval_unit_static(&unit_str);
13562 let interval = Expression::Interval(Box::new(
13563 crate::expressions::Interval {
13564 this: Some(arg1),
13565 unit: Some(
13566 crate::expressions::IntervalUnitSpec::Simple {
13567 unit: iu,
13568 use_plural: false,
13569 },
13570 ),
13571 },
13572 ));
13573 // Non-TSQL sources: CAST string literal to DATETIME
13574 let arg2 = if !matches!(
13575 source,
13576 DialectType::TSQL | DialectType::Fabric
13577 ) && matches!(
13578 &arg2,
13579 Expression::Literal(Literal::String(_))
13580 ) {
13581 Expression::Cast(Box::new(Cast {
13582 this: arg2,
13583 to: DataType::Custom {
13584 name: "DATETIME".to_string(),
13585 },
13586 trailing_comments: Vec::new(),
13587 double_colon_syntax: false,
13588 format: None,
13589 default: None,
13590 }))
13591 } else {
13592 arg2
13593 };
13594 Ok(Expression::Function(Box::new(Function::new(
13595 "DATE_ADD".to_string(),
13596 vec![arg2, interval],
13597 ))))
13598 }
13599 _ => {
13600 let unit =
13601 Expression::Identifier(Identifier::new(&unit_str));
13602 Ok(Expression::Function(Box::new(Function::new(
13603 "DATEADD".to_string(),
13604 vec![unit, arg1, arg2],
13605 ))))
13606 }
13607 }
13608 }
13609 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13610 // or (date, val, 'UNIT') from Generic canonical form
13611 "DATE_ADD" if f.args.len() == 3 => {
13612 let mut args = f.args;
13613 let arg0 = args.remove(0);
13614 let arg1 = args.remove(0);
13615 let arg2 = args.remove(0);
13616 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13617 // where arg2 is a string literal matching a unit name
13618 let arg2_unit = match &arg2 {
13619 Expression::Literal(Literal::String(s)) => {
13620 let u = s.to_uppercase();
13621 if matches!(
13622 u.as_str(),
13623 "DAY"
13624 | "MONTH"
13625 | "YEAR"
13626 | "HOUR"
13627 | "MINUTE"
13628 | "SECOND"
13629 | "WEEK"
13630 | "QUARTER"
13631 | "MILLISECOND"
13632 | "MICROSECOND"
13633 ) {
13634 Some(u)
13635 } else {
13636 None
13637 }
13638 }
13639 _ => None,
13640 };
13641 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13642 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13643 (u, arg1, arg0)
13644 } else {
13645 (Self::get_unit_str_static(&arg0), arg1, arg2)
13646 };
13647 // Alias for backward compat with the rest of the match
13648 let arg1 = val;
13649 let arg2 = date;
13650
13651 match target {
13652 DialectType::Presto
13653 | DialectType::Trino
13654 | DialectType::Athena => {
13655 Ok(Expression::Function(Box::new(Function::new(
13656 "DATE_ADD".to_string(),
13657 vec![Expression::string(&unit_str), arg1, arg2],
13658 ))))
13659 }
13660 DialectType::DuckDB => {
13661 let iu = Self::parse_interval_unit_static(&unit_str);
13662 let interval = Expression::Interval(Box::new(
13663 crate::expressions::Interval {
13664 this: Some(arg1),
13665 unit: Some(
13666 crate::expressions::IntervalUnitSpec::Simple {
13667 unit: iu,
13668 use_plural: false,
13669 },
13670 ),
13671 },
13672 ));
13673 Ok(Expression::Add(Box::new(
13674 crate::expressions::BinaryOp::new(arg2, interval),
13675 )))
13676 }
13677 DialectType::PostgreSQL
13678 | DialectType::Materialize
13679 | DialectType::RisingWave => {
13680 // PostgreSQL: x + INTERVAL '1 DAY'
13681 let amount_str = Self::expr_to_string_static(&arg1);
13682 let interval = Expression::Interval(Box::new(
13683 crate::expressions::Interval {
13684 this: Some(Expression::string(&format!(
13685 "{} {}",
13686 amount_str, unit_str
13687 ))),
13688 unit: None,
13689 },
13690 ));
13691 Ok(Expression::Add(Box::new(
13692 crate::expressions::BinaryOp::new(arg2, interval),
13693 )))
13694 }
13695 DialectType::Snowflake
13696 | DialectType::TSQL
13697 | DialectType::Redshift => {
13698 let unit =
13699 Expression::Identifier(Identifier::new(&unit_str));
13700 Ok(Expression::Function(Box::new(Function::new(
13701 "DATEADD".to_string(),
13702 vec![unit, arg1, arg2],
13703 ))))
13704 }
13705 DialectType::BigQuery
13706 | DialectType::MySQL
13707 | DialectType::Doris
13708 | DialectType::StarRocks
13709 | DialectType::Drill => {
13710 // DATE_ADD(date, INTERVAL amount UNIT)
13711 let iu = Self::parse_interval_unit_static(&unit_str);
13712 let interval = Expression::Interval(Box::new(
13713 crate::expressions::Interval {
13714 this: Some(arg1),
13715 unit: Some(
13716 crate::expressions::IntervalUnitSpec::Simple {
13717 unit: iu,
13718 use_plural: false,
13719 },
13720 ),
13721 },
13722 ));
13723 Ok(Expression::Function(Box::new(Function::new(
13724 "DATE_ADD".to_string(),
13725 vec![arg2, interval],
13726 ))))
13727 }
13728 DialectType::SQLite => {
13729 // SQLite: DATE(x, '1 DAY')
13730 // Build the string '1 DAY' from amount and unit
13731 let amount_str = match &arg1 {
13732 Expression::Literal(Literal::Number(n)) => n.clone(),
13733 _ => "1".to_string(),
13734 };
13735 Ok(Expression::Function(Box::new(Function::new(
13736 "DATE".to_string(),
13737 vec![
13738 arg2,
13739 Expression::string(format!(
13740 "{} {}",
13741 amount_str, unit_str
13742 )),
13743 ],
13744 ))))
13745 }
13746 DialectType::Dremio => {
13747 // Dremio: DATE_ADD(date, amount) - drops unit
13748 Ok(Expression::Function(Box::new(Function::new(
13749 "DATE_ADD".to_string(),
13750 vec![arg2, arg1],
13751 ))))
13752 }
13753 DialectType::Spark => {
13754 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13755 if unit_str == "DAY" {
13756 Ok(Expression::Function(Box::new(Function::new(
13757 "DATE_ADD".to_string(),
13758 vec![arg2, arg1],
13759 ))))
13760 } else {
13761 let unit =
13762 Expression::Identifier(Identifier::new(&unit_str));
13763 Ok(Expression::Function(Box::new(Function::new(
13764 "DATE_ADD".to_string(),
13765 vec![unit, arg1, arg2],
13766 ))))
13767 }
13768 }
13769 DialectType::Databricks => {
13770 let unit =
13771 Expression::Identifier(Identifier::new(&unit_str));
13772 Ok(Expression::Function(Box::new(Function::new(
13773 "DATE_ADD".to_string(),
13774 vec![unit, arg1, arg2],
13775 ))))
13776 }
13777 DialectType::Hive => {
13778 // Hive: DATE_ADD(date, val) for DAY
13779 Ok(Expression::Function(Box::new(Function::new(
13780 "DATE_ADD".to_string(),
13781 vec![arg2, arg1],
13782 ))))
13783 }
13784 _ => {
13785 let unit =
13786 Expression::Identifier(Identifier::new(&unit_str));
13787 Ok(Expression::Function(Box::new(Function::new(
13788 "DATE_ADD".to_string(),
13789 vec![unit, arg1, arg2],
13790 ))))
13791 }
13792 }
13793 }
13794 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13795 "DATE_ADD"
13796 if f.args.len() == 2
13797 && matches!(
13798 source,
13799 DialectType::Hive
13800 | DialectType::Spark
13801 | DialectType::Databricks
13802 | DialectType::Generic
13803 ) =>
13804 {
13805 let mut args = f.args;
13806 let date = args.remove(0);
13807 let days = args.remove(0);
13808 match target {
13809 DialectType::Hive | DialectType::Spark => {
13810 // Keep as DATE_ADD(date, days) for Hive/Spark
13811 Ok(Expression::Function(Box::new(Function::new(
13812 "DATE_ADD".to_string(),
13813 vec![date, days],
13814 ))))
13815 }
13816 DialectType::Databricks => {
13817 // Databricks: DATEADD(DAY, days, date)
13818 Ok(Expression::Function(Box::new(Function::new(
13819 "DATEADD".to_string(),
13820 vec![
13821 Expression::Identifier(Identifier::new("DAY")),
13822 days,
13823 date,
13824 ],
13825 ))))
13826 }
13827 DialectType::DuckDB => {
13828 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13829 let cast_date = Self::ensure_cast_date(date);
13830 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13831 let interval_val = if matches!(
13832 days,
13833 Expression::Mul(_)
13834 | Expression::Sub(_)
13835 | Expression::Add(_)
13836 ) {
13837 Expression::Paren(Box::new(crate::expressions::Paren {
13838 this: days,
13839 trailing_comments: vec![],
13840 }))
13841 } else {
13842 days
13843 };
13844 let interval = Expression::Interval(Box::new(
13845 crate::expressions::Interval {
13846 this: Some(interval_val),
13847 unit: Some(
13848 crate::expressions::IntervalUnitSpec::Simple {
13849 unit: crate::expressions::IntervalUnit::Day,
13850 use_plural: false,
13851 },
13852 ),
13853 },
13854 ));
13855 Ok(Expression::Add(Box::new(
13856 crate::expressions::BinaryOp::new(cast_date, interval),
13857 )))
13858 }
13859 DialectType::Snowflake => {
13860 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13861 let cast_date = if matches!(
13862 source,
13863 DialectType::Hive
13864 | DialectType::Spark
13865 | DialectType::Databricks
13866 ) {
13867 if matches!(
13868 date,
13869 Expression::Literal(Literal::String(_))
13870 ) {
13871 Self::double_cast_timestamp_date(date)
13872 } else {
13873 date
13874 }
13875 } else {
13876 date
13877 };
13878 Ok(Expression::Function(Box::new(Function::new(
13879 "DATEADD".to_string(),
13880 vec![
13881 Expression::Identifier(Identifier::new("DAY")),
13882 days,
13883 cast_date,
13884 ],
13885 ))))
13886 }
13887 DialectType::Redshift => {
13888 Ok(Expression::Function(Box::new(Function::new(
13889 "DATEADD".to_string(),
13890 vec![
13891 Expression::Identifier(Identifier::new("DAY")),
13892 days,
13893 date,
13894 ],
13895 ))))
13896 }
13897 DialectType::TSQL | DialectType::Fabric => {
13898 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
13899 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
13900 let cast_date = if matches!(
13901 source,
13902 DialectType::Hive | DialectType::Spark
13903 ) {
13904 if matches!(
13905 date,
13906 Expression::Literal(Literal::String(_))
13907 ) {
13908 Self::double_cast_datetime2_date(date)
13909 } else {
13910 date
13911 }
13912 } else {
13913 date
13914 };
13915 Ok(Expression::Function(Box::new(Function::new(
13916 "DATEADD".to_string(),
13917 vec![
13918 Expression::Identifier(Identifier::new("DAY")),
13919 days,
13920 cast_date,
13921 ],
13922 ))))
13923 }
13924 DialectType::Presto
13925 | DialectType::Trino
13926 | DialectType::Athena => {
13927 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13928 let cast_date = if matches!(
13929 source,
13930 DialectType::Hive
13931 | DialectType::Spark
13932 | DialectType::Databricks
13933 ) {
13934 if matches!(
13935 date,
13936 Expression::Literal(Literal::String(_))
13937 ) {
13938 Self::double_cast_timestamp_date(date)
13939 } else {
13940 date
13941 }
13942 } else {
13943 date
13944 };
13945 Ok(Expression::Function(Box::new(Function::new(
13946 "DATE_ADD".to_string(),
13947 vec![Expression::string("DAY"), days, cast_date],
13948 ))))
13949 }
13950 DialectType::BigQuery => {
13951 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
13952 let cast_date = if matches!(
13953 source,
13954 DialectType::Hive
13955 | DialectType::Spark
13956 | DialectType::Databricks
13957 ) {
13958 Self::double_cast_datetime_date(date)
13959 } else {
13960 date
13961 };
13962 // Wrap complex expressions in Paren for interval
13963 let interval_val = if matches!(
13964 days,
13965 Expression::Mul(_)
13966 | Expression::Sub(_)
13967 | Expression::Add(_)
13968 ) {
13969 Expression::Paren(Box::new(crate::expressions::Paren {
13970 this: days,
13971 trailing_comments: vec![],
13972 }))
13973 } else {
13974 days
13975 };
13976 let interval = Expression::Interval(Box::new(
13977 crate::expressions::Interval {
13978 this: Some(interval_val),
13979 unit: Some(
13980 crate::expressions::IntervalUnitSpec::Simple {
13981 unit: crate::expressions::IntervalUnit::Day,
13982 use_plural: false,
13983 },
13984 ),
13985 },
13986 ));
13987 Ok(Expression::Function(Box::new(Function::new(
13988 "DATE_ADD".to_string(),
13989 vec![cast_date, interval],
13990 ))))
13991 }
13992 DialectType::MySQL => {
13993 let iu = crate::expressions::IntervalUnit::Day;
13994 Ok(Expression::DateAdd(Box::new(
13995 crate::expressions::DateAddFunc {
13996 this: date,
13997 interval: days,
13998 unit: iu,
13999 },
14000 )))
14001 }
14002 DialectType::PostgreSQL => {
14003 let interval = Expression::Interval(Box::new(
14004 crate::expressions::Interval {
14005 this: Some(Expression::string(&format!(
14006 "{} DAY",
14007 Self::expr_to_string_static(&days)
14008 ))),
14009 unit: None,
14010 },
14011 ));
14012 Ok(Expression::Add(Box::new(
14013 crate::expressions::BinaryOp::new(date, interval),
14014 )))
14015 }
14016 DialectType::Doris
14017 | DialectType::StarRocks
14018 | DialectType::Drill => {
14019 // DATE_ADD(date, INTERVAL days DAY)
14020 let interval = Expression::Interval(Box::new(
14021 crate::expressions::Interval {
14022 this: Some(days),
14023 unit: Some(
14024 crate::expressions::IntervalUnitSpec::Simple {
14025 unit: crate::expressions::IntervalUnit::Day,
14026 use_plural: false,
14027 },
14028 ),
14029 },
14030 ));
14031 Ok(Expression::Function(Box::new(Function::new(
14032 "DATE_ADD".to_string(),
14033 vec![date, interval],
14034 ))))
14035 }
14036 _ => Ok(Expression::Function(Box::new(Function::new(
14037 "DATE_ADD".to_string(),
14038 vec![date, days],
14039 )))),
14040 }
14041 }
14042 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
14043 "DATE_SUB"
14044 if f.args.len() == 2
14045 && matches!(
14046 source,
14047 DialectType::Hive
14048 | DialectType::Spark
14049 | DialectType::Databricks
14050 ) =>
14051 {
14052 let mut args = f.args;
14053 let date = args.remove(0);
14054 let days = args.remove(0);
14055 // Helper to create days * -1
14056 let make_neg_days = |d: Expression| -> Expression {
14057 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
14058 d,
14059 Expression::Literal(Literal::Number("-1".to_string())),
14060 )))
14061 };
14062 let is_string_literal =
14063 matches!(date, Expression::Literal(Literal::String(_)));
14064 match target {
14065 DialectType::Hive
14066 | DialectType::Spark
14067 | DialectType::Databricks => {
14068 // Keep as DATE_SUB(date, days) for Hive/Spark
14069 Ok(Expression::Function(Box::new(Function::new(
14070 "DATE_SUB".to_string(),
14071 vec![date, days],
14072 ))))
14073 }
14074 DialectType::DuckDB => {
14075 let cast_date = Self::ensure_cast_date(date);
14076 let neg = make_neg_days(days);
14077 let interval = Expression::Interval(Box::new(
14078 crate::expressions::Interval {
14079 this: Some(Expression::Paren(Box::new(
14080 crate::expressions::Paren {
14081 this: neg,
14082 trailing_comments: vec![],
14083 },
14084 ))),
14085 unit: Some(
14086 crate::expressions::IntervalUnitSpec::Simple {
14087 unit: crate::expressions::IntervalUnit::Day,
14088 use_plural: false,
14089 },
14090 ),
14091 },
14092 ));
14093 Ok(Expression::Add(Box::new(
14094 crate::expressions::BinaryOp::new(cast_date, interval),
14095 )))
14096 }
14097 DialectType::Snowflake => {
14098 let cast_date = if is_string_literal {
14099 Self::double_cast_timestamp_date(date)
14100 } else {
14101 date
14102 };
14103 let neg = make_neg_days(days);
14104 Ok(Expression::Function(Box::new(Function::new(
14105 "DATEADD".to_string(),
14106 vec![
14107 Expression::Identifier(Identifier::new("DAY")),
14108 neg,
14109 cast_date,
14110 ],
14111 ))))
14112 }
14113 DialectType::Redshift => {
14114 let neg = make_neg_days(days);
14115 Ok(Expression::Function(Box::new(Function::new(
14116 "DATEADD".to_string(),
14117 vec![
14118 Expression::Identifier(Identifier::new("DAY")),
14119 neg,
14120 date,
14121 ],
14122 ))))
14123 }
14124 DialectType::TSQL | DialectType::Fabric => {
14125 let cast_date = if is_string_literal {
14126 Self::double_cast_datetime2_date(date)
14127 } else {
14128 date
14129 };
14130 let neg = make_neg_days(days);
14131 Ok(Expression::Function(Box::new(Function::new(
14132 "DATEADD".to_string(),
14133 vec![
14134 Expression::Identifier(Identifier::new("DAY")),
14135 neg,
14136 cast_date,
14137 ],
14138 ))))
14139 }
14140 DialectType::Presto
14141 | DialectType::Trino
14142 | DialectType::Athena => {
14143 let cast_date = if is_string_literal {
14144 Self::double_cast_timestamp_date(date)
14145 } else {
14146 date
14147 };
14148 let neg = make_neg_days(days);
14149 Ok(Expression::Function(Box::new(Function::new(
14150 "DATE_ADD".to_string(),
14151 vec![Expression::string("DAY"), neg, cast_date],
14152 ))))
14153 }
14154 DialectType::BigQuery => {
14155 let cast_date = if is_string_literal {
14156 Self::double_cast_datetime_date(date)
14157 } else {
14158 date
14159 };
14160 let neg = make_neg_days(days);
14161 let interval = Expression::Interval(Box::new(
14162 crate::expressions::Interval {
14163 this: Some(Expression::Paren(Box::new(
14164 crate::expressions::Paren {
14165 this: neg,
14166 trailing_comments: vec![],
14167 },
14168 ))),
14169 unit: Some(
14170 crate::expressions::IntervalUnitSpec::Simple {
14171 unit: crate::expressions::IntervalUnit::Day,
14172 use_plural: false,
14173 },
14174 ),
14175 },
14176 ));
14177 Ok(Expression::Function(Box::new(Function::new(
14178 "DATE_ADD".to_string(),
14179 vec![cast_date, interval],
14180 ))))
14181 }
14182 _ => Ok(Expression::Function(Box::new(Function::new(
14183 "DATE_SUB".to_string(),
14184 vec![date, days],
14185 )))),
14186 }
14187 }
14188 // ADD_MONTHS(date, val) -> target-specific
14189 "ADD_MONTHS" if f.args.len() == 2 => {
14190 let mut args = f.args;
14191 let date = args.remove(0);
14192 let val = args.remove(0);
14193 match target {
14194 DialectType::TSQL => {
14195 let cast_date = Self::ensure_cast_datetime2(date);
14196 Ok(Expression::Function(Box::new(Function::new(
14197 "DATEADD".to_string(),
14198 vec![
14199 Expression::Identifier(Identifier::new("MONTH")),
14200 val,
14201 cast_date,
14202 ],
14203 ))))
14204 }
14205 DialectType::DuckDB => {
14206 let interval = Expression::Interval(Box::new(
14207 crate::expressions::Interval {
14208 this: Some(val),
14209 unit: Some(
14210 crate::expressions::IntervalUnitSpec::Simple {
14211 unit:
14212 crate::expressions::IntervalUnit::Month,
14213 use_plural: false,
14214 },
14215 ),
14216 },
14217 ));
14218 Ok(Expression::Add(Box::new(
14219 crate::expressions::BinaryOp::new(date, interval),
14220 )))
14221 }
14222 DialectType::Snowflake => {
14223 // Keep ADD_MONTHS when source is Snowflake
14224 if matches!(source, DialectType::Snowflake) {
14225 Ok(Expression::Function(Box::new(Function::new(
14226 "ADD_MONTHS".to_string(),
14227 vec![date, val],
14228 ))))
14229 } else {
14230 Ok(Expression::Function(Box::new(Function::new(
14231 "DATEADD".to_string(),
14232 vec![
14233 Expression::Identifier(Identifier::new(
14234 "MONTH",
14235 )),
14236 val,
14237 date,
14238 ],
14239 ))))
14240 }
14241 }
14242 DialectType::Redshift => {
14243 Ok(Expression::Function(Box::new(Function::new(
14244 "DATEADD".to_string(),
14245 vec![
14246 Expression::Identifier(Identifier::new("MONTH")),
14247 val,
14248 date,
14249 ],
14250 ))))
14251 }
14252 DialectType::Presto
14253 | DialectType::Trino
14254 | DialectType::Athena => {
14255 Ok(Expression::Function(Box::new(Function::new(
14256 "DATE_ADD".to_string(),
14257 vec![Expression::string("MONTH"), val, date],
14258 ))))
14259 }
14260 DialectType::BigQuery => {
14261 let interval = Expression::Interval(Box::new(
14262 crate::expressions::Interval {
14263 this: Some(val),
14264 unit: Some(
14265 crate::expressions::IntervalUnitSpec::Simple {
14266 unit:
14267 crate::expressions::IntervalUnit::Month,
14268 use_plural: false,
14269 },
14270 ),
14271 },
14272 ));
14273 Ok(Expression::Function(Box::new(Function::new(
14274 "DATE_ADD".to_string(),
14275 vec![date, interval],
14276 ))))
14277 }
14278 _ => Ok(Expression::Function(Box::new(Function::new(
14279 "ADD_MONTHS".to_string(),
14280 vec![date, val],
14281 )))),
14282 }
14283 }
14284 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14285 "DATETRUNC" if f.args.len() == 2 => {
14286 let mut args = f.args;
14287 let arg0 = args.remove(0);
14288 let arg1 = args.remove(0);
14289 let unit_str = Self::get_unit_str_static(&arg0);
14290 match target {
14291 DialectType::TSQL | DialectType::Fabric => {
14292 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14293 Ok(Expression::Function(Box::new(Function::new(
14294 "DATETRUNC".to_string(),
14295 vec![
14296 Expression::Identifier(Identifier::new(&unit_str)),
14297 arg1,
14298 ],
14299 ))))
14300 }
14301 DialectType::DuckDB => {
14302 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14303 let date = Self::ensure_cast_timestamp(arg1);
14304 Ok(Expression::Function(Box::new(Function::new(
14305 "DATE_TRUNC".to_string(),
14306 vec![Expression::string(&unit_str), date],
14307 ))))
14308 }
14309 DialectType::ClickHouse => {
14310 // ClickHouse: dateTrunc('UNIT', expr)
14311 Ok(Expression::Function(Box::new(Function::new(
14312 "dateTrunc".to_string(),
14313 vec![Expression::string(&unit_str), arg1],
14314 ))))
14315 }
14316 _ => {
14317 // Standard: DATE_TRUNC('UNIT', expr)
14318 let unit = Expression::string(&unit_str);
14319 Ok(Expression::Function(Box::new(Function::new(
14320 "DATE_TRUNC".to_string(),
14321 vec![unit, arg1],
14322 ))))
14323 }
14324 }
14325 }
14326 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14327 "GETDATE" if f.args.is_empty() => match target {
14328 DialectType::TSQL => Ok(Expression::Function(f)),
14329 DialectType::Redshift => Ok(Expression::Function(Box::new(
14330 Function::new("GETDATE".to_string(), vec![]),
14331 ))),
14332 _ => Ok(Expression::CurrentTimestamp(
14333 crate::expressions::CurrentTimestamp {
14334 precision: None,
14335 sysdate: false,
14336 },
14337 )),
14338 },
14339 // TO_HEX(x) / HEX(x) -> target-specific hex function
14340 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14341 let name = match target {
14342 DialectType::Presto | DialectType::Trino => "TO_HEX",
14343 DialectType::Spark
14344 | DialectType::Databricks
14345 | DialectType::Hive => "HEX",
14346 DialectType::DuckDB
14347 | DialectType::PostgreSQL
14348 | DialectType::Redshift => "TO_HEX",
14349 _ => &f.name,
14350 };
14351 Ok(Expression::Function(Box::new(Function::new(
14352 name.to_string(),
14353 f.args,
14354 ))))
14355 }
14356 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14357 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14358 match target {
14359 DialectType::BigQuery => {
14360 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14361 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14362 // because BigQuery MD5 returns BYTES, not hex string
14363 let arg = &f.args[0];
14364 let wrapped_arg = match arg {
14365 Expression::Function(inner_f)
14366 if inner_f.name.to_uppercase() == "MD5"
14367 || inner_f.name.to_uppercase() == "SHA1"
14368 || inner_f.name.to_uppercase() == "SHA256"
14369 || inner_f.name.to_uppercase() == "SHA512" =>
14370 {
14371 // Wrap hash function in TO_HEX for BigQuery
14372 Expression::Function(Box::new(Function::new(
14373 "TO_HEX".to_string(),
14374 vec![arg.clone()],
14375 )))
14376 }
14377 _ => f.args.into_iter().next().unwrap(),
14378 };
14379 Ok(Expression::Function(Box::new(Function::new(
14380 "FROM_HEX".to_string(),
14381 vec![wrapped_arg],
14382 ))))
14383 }
14384 _ => {
14385 let name = match target {
14386 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14387 DialectType::Spark
14388 | DialectType::Databricks
14389 | DialectType::Hive => "UNHEX",
14390 _ => &f.name,
14391 };
14392 Ok(Expression::Function(Box::new(Function::new(
14393 name.to_string(),
14394 f.args,
14395 ))))
14396 }
14397 }
14398 }
14399 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14400 "TO_UTF8" if f.args.len() == 1 => match target {
14401 DialectType::Spark | DialectType::Databricks => {
14402 let mut args = f.args;
14403 args.push(Expression::string("utf-8"));
14404 Ok(Expression::Function(Box::new(Function::new(
14405 "ENCODE".to_string(),
14406 args,
14407 ))))
14408 }
14409 _ => Ok(Expression::Function(f)),
14410 },
14411 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14412 "FROM_UTF8" if f.args.len() == 1 => match target {
14413 DialectType::Spark | DialectType::Databricks => {
14414 let mut args = f.args;
14415 args.push(Expression::string("utf-8"));
14416 Ok(Expression::Function(Box::new(Function::new(
14417 "DECODE".to_string(),
14418 args,
14419 ))))
14420 }
14421 _ => Ok(Expression::Function(f)),
14422 },
14423 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14424 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14425 let name = match target {
14426 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14427 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14428 DialectType::PostgreSQL | DialectType::Redshift => {
14429 "STARTS_WITH"
14430 }
14431 _ => &f.name,
14432 };
14433 Ok(Expression::Function(Box::new(Function::new(
14434 name.to_string(),
14435 f.args,
14436 ))))
14437 }
14438 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14439 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14440 let name = match target {
14441 DialectType::Presto
14442 | DialectType::Trino
14443 | DialectType::Athena => "APPROX_DISTINCT",
14444 _ => "APPROX_COUNT_DISTINCT",
14445 };
14446 Ok(Expression::Function(Box::new(Function::new(
14447 name.to_string(),
14448 f.args,
14449 ))))
14450 }
14451 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14452 "JSON_EXTRACT"
14453 if f.args.len() == 2
14454 && !matches!(source, DialectType::BigQuery)
14455 && matches!(
14456 target,
14457 DialectType::Spark
14458 | DialectType::Databricks
14459 | DialectType::Hive
14460 ) =>
14461 {
14462 Ok(Expression::Function(Box::new(Function::new(
14463 "GET_JSON_OBJECT".to_string(),
14464 f.args,
14465 ))))
14466 }
14467 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14468 "JSON_EXTRACT"
14469 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14470 {
14471 let mut args = f.args;
14472 let path = args.remove(1);
14473 let this = args.remove(0);
14474 Ok(Expression::JsonExtract(Box::new(
14475 crate::expressions::JsonExtractFunc {
14476 this,
14477 path,
14478 returning: None,
14479 arrow_syntax: true,
14480 hash_arrow_syntax: false,
14481 wrapper_option: None,
14482 quotes_option: None,
14483 on_scalar_string: false,
14484 on_error: None,
14485 },
14486 )))
14487 }
14488 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14489 "JSON_FORMAT" if f.args.len() == 1 => {
14490 match target {
14491 DialectType::Spark | DialectType::Databricks => {
14492 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14493 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14494 if matches!(
14495 source,
14496 DialectType::Presto
14497 | DialectType::Trino
14498 | DialectType::Athena
14499 ) {
14500 if let Some(Expression::ParseJson(pj)) = f.args.first()
14501 {
14502 if let Expression::Literal(Literal::String(s)) =
14503 &pj.this
14504 {
14505 let wrapped = Expression::Literal(
14506 Literal::String(format!("[{}]", s)),
14507 );
14508 let schema_of_json = Expression::Function(
14509 Box::new(Function::new(
14510 "SCHEMA_OF_JSON".to_string(),
14511 vec![wrapped.clone()],
14512 )),
14513 );
14514 let from_json = Expression::Function(Box::new(
14515 Function::new(
14516 "FROM_JSON".to_string(),
14517 vec![wrapped, schema_of_json],
14518 ),
14519 ));
14520 let to_json = Expression::Function(Box::new(
14521 Function::new(
14522 "TO_JSON".to_string(),
14523 vec![from_json],
14524 ),
14525 ));
14526 return Ok(Expression::Function(Box::new(
14527 Function::new(
14528 "REGEXP_EXTRACT".to_string(),
14529 vec![
14530 to_json,
14531 Expression::Literal(
14532 Literal::String(
14533 "^.(.*).$".to_string(),
14534 ),
14535 ),
14536 Expression::Literal(
14537 Literal::Number(
14538 "1".to_string(),
14539 ),
14540 ),
14541 ],
14542 ),
14543 )));
14544 }
14545 }
14546 }
14547
14548 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14549 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14550 let mut args = f.args;
14551 if let Some(Expression::Cast(ref c)) = args.first() {
14552 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14553 args = vec![c.this.clone()];
14554 }
14555 } else if let Some(Expression::Function(ref inner_f)) =
14556 args.first()
14557 {
14558 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14559 && inner_f.args.len() == 1
14560 {
14561 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14562 args = inner_f.args.clone();
14563 }
14564 }
14565 Ok(Expression::Function(Box::new(Function::new(
14566 "TO_JSON".to_string(),
14567 args,
14568 ))))
14569 }
14570 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14571 Function::new("TO_JSON_STRING".to_string(), f.args),
14572 ))),
14573 DialectType::DuckDB => {
14574 // CAST(TO_JSON(x) AS TEXT)
14575 let to_json = Expression::Function(Box::new(
14576 Function::new("TO_JSON".to_string(), f.args),
14577 ));
14578 Ok(Expression::Cast(Box::new(Cast {
14579 this: to_json,
14580 to: DataType::Text,
14581 trailing_comments: Vec::new(),
14582 double_colon_syntax: false,
14583 format: None,
14584 default: None,
14585 })))
14586 }
14587 _ => Ok(Expression::Function(f)),
14588 }
14589 }
14590 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14591 "SYSDATE" if f.args.is_empty() => {
14592 match target {
14593 DialectType::Oracle | DialectType::Redshift => {
14594 Ok(Expression::Function(f))
14595 }
14596 DialectType::Snowflake => {
14597 // Snowflake uses SYSDATE() with parens
14598 let mut f = *f;
14599 f.no_parens = false;
14600 Ok(Expression::Function(Box::new(f)))
14601 }
14602 DialectType::DuckDB => {
14603 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14604 Ok(Expression::AtTimeZone(Box::new(
14605 crate::expressions::AtTimeZone {
14606 this: Expression::CurrentTimestamp(
14607 crate::expressions::CurrentTimestamp {
14608 precision: None,
14609 sysdate: false,
14610 },
14611 ),
14612 zone: Expression::Literal(Literal::String(
14613 "UTC".to_string(),
14614 )),
14615 },
14616 )))
14617 }
14618 _ => Ok(Expression::CurrentTimestamp(
14619 crate::expressions::CurrentTimestamp {
14620 precision: None,
14621 sysdate: true,
14622 },
14623 )),
14624 }
14625 }
14626 // LOGICAL_OR(x) -> BOOL_OR(x)
14627 "LOGICAL_OR" if f.args.len() == 1 => {
14628 let name = match target {
14629 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14630 _ => &f.name,
14631 };
14632 Ok(Expression::Function(Box::new(Function::new(
14633 name.to_string(),
14634 f.args,
14635 ))))
14636 }
14637 // LOGICAL_AND(x) -> BOOL_AND(x)
14638 "LOGICAL_AND" if f.args.len() == 1 => {
14639 let name = match target {
14640 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14641 _ => &f.name,
14642 };
14643 Ok(Expression::Function(Box::new(Function::new(
14644 name.to_string(),
14645 f.args,
14646 ))))
14647 }
14648 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14649 "MONTHS_ADD" if f.args.len() == 2 => match target {
14650 DialectType::Oracle => Ok(Expression::Function(Box::new(
14651 Function::new("ADD_MONTHS".to_string(), f.args),
14652 ))),
14653 _ => Ok(Expression::Function(f)),
14654 },
14655 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14656 "ARRAY_JOIN" if f.args.len() >= 2 => {
14657 match target {
14658 DialectType::Spark | DialectType::Databricks => {
14659 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14660 Ok(Expression::Function(f))
14661 }
14662 DialectType::Hive => {
14663 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14664 let mut args = f.args;
14665 let arr = args.remove(0);
14666 let sep = args.remove(0);
14667 // Drop any remaining args (null_replacement)
14668 Ok(Expression::Function(Box::new(Function::new(
14669 "CONCAT_WS".to_string(),
14670 vec![sep, arr],
14671 ))))
14672 }
14673 DialectType::Presto | DialectType::Trino => {
14674 Ok(Expression::Function(f))
14675 }
14676 _ => Ok(Expression::Function(f)),
14677 }
14678 }
14679 // LOCATE(substr, str, pos) 3-arg -> target-specific
14680 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14681 "LOCATE"
14682 if f.args.len() == 3
14683 && matches!(
14684 target,
14685 DialectType::Presto
14686 | DialectType::Trino
14687 | DialectType::Athena
14688 | DialectType::DuckDB
14689 ) =>
14690 {
14691 let mut args = f.args;
14692 let substr = args.remove(0);
14693 let string = args.remove(0);
14694 let pos = args.remove(0);
14695 // STRPOS(SUBSTRING(string, pos), substr)
14696 let substring_call = Expression::Function(Box::new(Function::new(
14697 "SUBSTRING".to_string(),
14698 vec![string.clone(), pos.clone()],
14699 )));
14700 let strpos_call = Expression::Function(Box::new(Function::new(
14701 "STRPOS".to_string(),
14702 vec![substring_call, substr.clone()],
14703 )));
14704 // STRPOS(...) + pos - 1
14705 let pos_adjusted =
14706 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14707 Expression::Add(Box::new(
14708 crate::expressions::BinaryOp::new(
14709 strpos_call.clone(),
14710 pos.clone(),
14711 ),
14712 )),
14713 Expression::number(1),
14714 )));
14715 // STRPOS(...) = 0
14716 let is_zero =
14717 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14718 strpos_call.clone(),
14719 Expression::number(0),
14720 )));
14721
14722 match target {
14723 DialectType::Presto
14724 | DialectType::Trino
14725 | DialectType::Athena => {
14726 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14727 Ok(Expression::Function(Box::new(Function::new(
14728 "IF".to_string(),
14729 vec![is_zero, Expression::number(0), pos_adjusted],
14730 ))))
14731 }
14732 DialectType::DuckDB => {
14733 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14734 Ok(Expression::Case(Box::new(crate::expressions::Case {
14735 operand: None,
14736 whens: vec![(is_zero, Expression::number(0))],
14737 else_: Some(pos_adjusted),
14738 comments: Vec::new(),
14739 })))
14740 }
14741 _ => Ok(Expression::Function(Box::new(Function::new(
14742 "LOCATE".to_string(),
14743 vec![substr, string, pos],
14744 )))),
14745 }
14746 }
14747 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14748 "STRPOS"
14749 if f.args.len() == 3
14750 && matches!(
14751 target,
14752 DialectType::BigQuery
14753 | DialectType::Oracle
14754 | DialectType::Teradata
14755 ) =>
14756 {
14757 let mut args = f.args;
14758 let haystack = args.remove(0);
14759 let needle = args.remove(0);
14760 let occurrence = args.remove(0);
14761 Ok(Expression::Function(Box::new(Function::new(
14762 "INSTR".to_string(),
14763 vec![haystack, needle, Expression::number(1), occurrence],
14764 ))))
14765 }
14766 // SCHEMA_NAME(id) -> target-specific
14767 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14768 DialectType::MySQL | DialectType::SingleStore => {
14769 Ok(Expression::Function(Box::new(Function::new(
14770 "SCHEMA".to_string(),
14771 vec![],
14772 ))))
14773 }
14774 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14775 crate::expressions::CurrentSchema { this: None },
14776 ))),
14777 DialectType::SQLite => Ok(Expression::string("main")),
14778 _ => Ok(Expression::Function(f)),
14779 },
14780 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14781 "STRTOL" if f.args.len() == 2 => match target {
14782 DialectType::Presto | DialectType::Trino => {
14783 Ok(Expression::Function(Box::new(Function::new(
14784 "FROM_BASE".to_string(),
14785 f.args,
14786 ))))
14787 }
14788 _ => Ok(Expression::Function(f)),
14789 },
14790 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14791 "EDITDIST3" if f.args.len() == 2 => match target {
14792 DialectType::Spark | DialectType::Databricks => {
14793 Ok(Expression::Function(Box::new(Function::new(
14794 "LEVENSHTEIN".to_string(),
14795 f.args,
14796 ))))
14797 }
14798 _ => Ok(Expression::Function(f)),
14799 },
14800 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14801 "FORMAT"
14802 if f.args.len() == 2
14803 && matches!(
14804 source,
14805 DialectType::MySQL | DialectType::SingleStore
14806 )
14807 && matches!(target, DialectType::DuckDB) =>
14808 {
14809 let mut args = f.args;
14810 let num_expr = args.remove(0);
14811 let decimals_expr = args.remove(0);
14812 // Extract decimal count
14813 let dec_count = match &decimals_expr {
14814 Expression::Literal(Literal::Number(n)) => n.clone(),
14815 _ => "0".to_string(),
14816 };
14817 let fmt_str = format!("{{:,.{}f}}", dec_count);
14818 Ok(Expression::Function(Box::new(Function::new(
14819 "FORMAT".to_string(),
14820 vec![Expression::string(&fmt_str), num_expr],
14821 ))))
14822 }
14823 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14824 "FORMAT"
14825 if f.args.len() == 2
14826 && matches!(
14827 source,
14828 DialectType::TSQL | DialectType::Fabric
14829 ) =>
14830 {
14831 let val_expr = f.args[0].clone();
14832 let fmt_expr = f.args[1].clone();
14833 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14834 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14835 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14836 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14837 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14838 Expression::Literal(crate::expressions::Literal::String(s)) => {
14839 match s.as_str() {
14840 "m" | "M" => (Expression::string("MMMM d"), true),
14841 "t" => (Expression::string("h:mm tt"), true),
14842 "T" => (Expression::string("h:mm:ss tt"), true),
14843 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14844 _ => (fmt_expr.clone(), false),
14845 }
14846 }
14847 _ => (fmt_expr.clone(), false),
14848 };
14849 // Check if the format looks like a date format
14850 let is_date_format = is_shortcode
14851 || match &expanded_fmt {
14852 Expression::Literal(
14853 crate::expressions::Literal::String(s),
14854 ) => {
14855 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14856 s.contains("yyyy")
14857 || s.contains("YYYY")
14858 || s.contains("MM")
14859 || s.contains("dd")
14860 || s.contains("MMMM")
14861 || s.contains("HH")
14862 || s.contains("hh")
14863 || s.contains("ss")
14864 }
14865 _ => false,
14866 };
14867 match target {
14868 DialectType::Spark | DialectType::Databricks => {
14869 let func_name = if is_date_format {
14870 "DATE_FORMAT"
14871 } else {
14872 "FORMAT_NUMBER"
14873 };
14874 Ok(Expression::Function(Box::new(Function::new(
14875 func_name.to_string(),
14876 vec![val_expr, expanded_fmt],
14877 ))))
14878 }
14879 _ => {
14880 // For TSQL and other targets, expand shortcodes but keep FORMAT
14881 if is_shortcode {
14882 Ok(Expression::Function(Box::new(Function::new(
14883 "FORMAT".to_string(),
14884 vec![val_expr, expanded_fmt],
14885 ))))
14886 } else {
14887 Ok(Expression::Function(f))
14888 }
14889 }
14890 }
14891 }
14892 // FORMAT('%s', x) from Trino/Presto -> target-specific
14893 "FORMAT"
14894 if f.args.len() >= 2
14895 && matches!(
14896 source,
14897 DialectType::Trino
14898 | DialectType::Presto
14899 | DialectType::Athena
14900 ) =>
14901 {
14902 let fmt_expr = f.args[0].clone();
14903 let value_args: Vec<Expression> = f.args[1..].to_vec();
14904 match target {
14905 // DuckDB: replace %s with {} in format string
14906 DialectType::DuckDB => {
14907 let new_fmt = match &fmt_expr {
14908 Expression::Literal(Literal::String(s)) => {
14909 Expression::Literal(Literal::String(
14910 s.replace("%s", "{}"),
14911 ))
14912 }
14913 _ => fmt_expr,
14914 };
14915 let mut args = vec![new_fmt];
14916 args.extend(value_args);
14917 Ok(Expression::Function(Box::new(Function::new(
14918 "FORMAT".to_string(),
14919 args,
14920 ))))
14921 }
14922 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
14923 DialectType::Snowflake => match &fmt_expr {
14924 Expression::Literal(Literal::String(s))
14925 if s == "%s" && value_args.len() == 1 =>
14926 {
14927 Ok(Expression::Function(Box::new(Function::new(
14928 "TO_CHAR".to_string(),
14929 value_args,
14930 ))))
14931 }
14932 _ => Ok(Expression::Function(f)),
14933 },
14934 // Default: keep FORMAT as-is
14935 _ => Ok(Expression::Function(f)),
14936 }
14937 }
14938 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
14939 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
14940 if f.args.len() == 2 =>
14941 {
14942 match target {
14943 DialectType::PostgreSQL | DialectType::Redshift => {
14944 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
14945 let arr = f.args[0].clone();
14946 let needle = f.args[1].clone();
14947 // Convert [] to ARRAY[] for PostgreSQL
14948 let pg_arr = match arr {
14949 Expression::Array(a) => Expression::ArrayFunc(
14950 Box::new(crate::expressions::ArrayConstructor {
14951 expressions: a.expressions,
14952 bracket_notation: false,
14953 use_list_keyword: false,
14954 }),
14955 ),
14956 _ => arr,
14957 };
14958 // needle = ANY(arr) using the Any quantified expression
14959 let any_expr = Expression::Any(Box::new(
14960 crate::expressions::QuantifiedExpr {
14961 this: needle.clone(),
14962 subquery: pg_arr,
14963 op: Some(crate::expressions::QuantifiedOp::Eq),
14964 },
14965 ));
14966 let coalesce = Expression::Coalesce(Box::new(
14967 crate::expressions::VarArgFunc {
14968 expressions: vec![
14969 any_expr,
14970 Expression::Boolean(
14971 crate::expressions::BooleanLiteral {
14972 value: false,
14973 },
14974 ),
14975 ],
14976 original_name: None,
14977 },
14978 ));
14979 let is_null_check = Expression::IsNull(Box::new(
14980 crate::expressions::IsNull {
14981 this: needle,
14982 not: false,
14983 postfix_form: false,
14984 },
14985 ));
14986 Ok(Expression::Case(Box::new(Case {
14987 operand: None,
14988 whens: vec![(
14989 is_null_check,
14990 Expression::Null(crate::expressions::Null),
14991 )],
14992 else_: Some(coalesce),
14993 comments: Vec::new(),
14994 })))
14995 }
14996 _ => Ok(Expression::Function(Box::new(Function::new(
14997 "ARRAY_CONTAINS".to_string(),
14998 f.args,
14999 )))),
15000 }
15001 }
15002 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
15003 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
15004 match target {
15005 DialectType::PostgreSQL | DialectType::Redshift => {
15006 // arr1 && arr2 with ARRAY[] syntax
15007 let mut args = f.args;
15008 let arr1 = args.remove(0);
15009 let arr2 = args.remove(0);
15010 let pg_arr1 = match arr1 {
15011 Expression::Array(a) => Expression::ArrayFunc(
15012 Box::new(crate::expressions::ArrayConstructor {
15013 expressions: a.expressions,
15014 bracket_notation: false,
15015 use_list_keyword: false,
15016 }),
15017 ),
15018 _ => arr1,
15019 };
15020 let pg_arr2 = match arr2 {
15021 Expression::Array(a) => Expression::ArrayFunc(
15022 Box::new(crate::expressions::ArrayConstructor {
15023 expressions: a.expressions,
15024 bracket_notation: false,
15025 use_list_keyword: false,
15026 }),
15027 ),
15028 _ => arr2,
15029 };
15030 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15031 pg_arr1, pg_arr2,
15032 ))))
15033 }
15034 DialectType::DuckDB => {
15035 // DuckDB: arr1 && arr2 (native support)
15036 let mut args = f.args;
15037 let arr1 = args.remove(0);
15038 let arr2 = args.remove(0);
15039 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15040 arr1, arr2,
15041 ))))
15042 }
15043 _ => Ok(Expression::Function(Box::new(Function::new(
15044 "LIST_HAS_ANY".to_string(),
15045 f.args,
15046 )))),
15047 }
15048 }
15049 // APPROX_QUANTILE(x, q) -> target-specific
15050 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
15051 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15052 Function::new("APPROX_PERCENTILE".to_string(), f.args),
15053 ))),
15054 DialectType::DuckDB => Ok(Expression::Function(f)),
15055 _ => Ok(Expression::Function(f)),
15056 },
15057 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
15058 "MAKE_DATE" if f.args.len() == 3 => match target {
15059 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15060 Function::new("DATE".to_string(), f.args),
15061 ))),
15062 _ => Ok(Expression::Function(f)),
15063 },
15064 // RANGE(start, end[, step]) -> target-specific
15065 "RANGE"
15066 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
15067 {
15068 let start = f.args[0].clone();
15069 let end = f.args[1].clone();
15070 let step = f.args.get(2).cloned();
15071 match target {
15072 DialectType::Spark | DialectType::Databricks => {
15073 // RANGE(start, end) -> SEQUENCE(start, end-1)
15074 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
15075 // RANGE(start, start) -> ARRAY() (empty)
15076 // RANGE(start, end, 0) -> ARRAY() (empty)
15077 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15078
15079 // Check for constant args
15080 fn extract_i64(e: &Expression) -> Option<i64> {
15081 match e {
15082 Expression::Literal(Literal::Number(n)) => {
15083 n.parse::<i64>().ok()
15084 }
15085 Expression::Neg(u) => {
15086 if let Expression::Literal(Literal::Number(n)) =
15087 &u.this
15088 {
15089 n.parse::<i64>().ok().map(|v| -v)
15090 } else {
15091 None
15092 }
15093 }
15094 _ => None,
15095 }
15096 }
15097 let start_val = extract_i64(&start);
15098 let end_val = extract_i64(&end);
15099 let step_val = step.as_ref().and_then(|s| extract_i64(s));
15100
15101 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
15102 if step_val == Some(0) {
15103 return Ok(Expression::Function(Box::new(
15104 Function::new("ARRAY".to_string(), vec![]),
15105 )));
15106 }
15107 if let (Some(s), Some(e_val)) = (start_val, end_val) {
15108 if s == e_val {
15109 return Ok(Expression::Function(Box::new(
15110 Function::new("ARRAY".to_string(), vec![]),
15111 )));
15112 }
15113 }
15114
15115 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
15116 // All constants - compute new end = end - step (if step provided) or end - 1
15117 match step_val {
15118 Some(st) if st < 0 => {
15119 // Negative step: SEQUENCE(start, end - step, step)
15120 let new_end = e_val - st; // end - step (= end + |step|)
15121 let mut args =
15122 vec![start, Expression::number(new_end)];
15123 if let Some(s) = step {
15124 args.push(s);
15125 }
15126 Ok(Expression::Function(Box::new(
15127 Function::new("SEQUENCE".to_string(), args),
15128 )))
15129 }
15130 Some(st) => {
15131 let new_end = e_val - st;
15132 let mut args =
15133 vec![start, Expression::number(new_end)];
15134 if let Some(s) = step {
15135 args.push(s);
15136 }
15137 Ok(Expression::Function(Box::new(
15138 Function::new("SEQUENCE".to_string(), args),
15139 )))
15140 }
15141 None => {
15142 // No step: SEQUENCE(start, end - 1)
15143 let new_end = e_val - 1;
15144 Ok(Expression::Function(Box::new(
15145 Function::new(
15146 "SEQUENCE".to_string(),
15147 vec![
15148 start,
15149 Expression::number(new_end),
15150 ],
15151 ),
15152 )))
15153 }
15154 }
15155 } else {
15156 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15157 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15158 end.clone(),
15159 Expression::number(1),
15160 )));
15161 let cond = Expression::Lte(Box::new(BinaryOp::new(
15162 Expression::Paren(Box::new(Paren {
15163 this: end_m1.clone(),
15164 trailing_comments: Vec::new(),
15165 })),
15166 start.clone(),
15167 )));
15168 let empty = Expression::Function(Box::new(
15169 Function::new("ARRAY".to_string(), vec![]),
15170 ));
15171 let mut seq_args = vec![
15172 start,
15173 Expression::Paren(Box::new(Paren {
15174 this: end_m1,
15175 trailing_comments: Vec::new(),
15176 })),
15177 ];
15178 if let Some(s) = step {
15179 seq_args.push(s);
15180 }
15181 let seq = Expression::Function(Box::new(
15182 Function::new("SEQUENCE".to_string(), seq_args),
15183 ));
15184 Ok(Expression::IfFunc(Box::new(
15185 crate::expressions::IfFunc {
15186 condition: cond,
15187 true_value: empty,
15188 false_value: Some(seq),
15189 original_name: None,
15190 },
15191 )))
15192 }
15193 }
15194 DialectType::SQLite => {
15195 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15196 // The subquery wrapping is handled at the Alias level
15197 let mut args = vec![start, end];
15198 if let Some(s) = step {
15199 args.push(s);
15200 }
15201 Ok(Expression::Function(Box::new(Function::new(
15202 "GENERATE_SERIES".to_string(),
15203 args,
15204 ))))
15205 }
15206 _ => Ok(Expression::Function(f)),
15207 }
15208 }
15209 // ARRAY_REVERSE_SORT -> target-specific
15210 // (handled above as well, but also need DuckDB self-normalization)
15211 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15212 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15213 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15214 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15215 ))),
15216 DialectType::Spark | DialectType::Databricks => {
15217 Ok(Expression::Function(Box::new(Function::new(
15218 "MAP_FROM_ARRAYS".to_string(),
15219 f.args,
15220 ))))
15221 }
15222 _ => Ok(Expression::Function(Box::new(Function::new(
15223 "MAP".to_string(),
15224 f.args,
15225 )))),
15226 },
15227 // VARIANCE(x) -> varSamp(x) for ClickHouse
15228 "VARIANCE" if f.args.len() == 1 => match target {
15229 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15230 Function::new("varSamp".to_string(), f.args),
15231 ))),
15232 _ => Ok(Expression::Function(f)),
15233 },
15234 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15235 "STDDEV" if f.args.len() == 1 => match target {
15236 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15237 Function::new("stddevSamp".to_string(), f.args),
15238 ))),
15239 _ => Ok(Expression::Function(f)),
15240 },
15241 // ISINF(x) -> IS_INF(x) for BigQuery
15242 "ISINF" if f.args.len() == 1 => match target {
15243 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15244 Function::new("IS_INF".to_string(), f.args),
15245 ))),
15246 _ => Ok(Expression::Function(f)),
15247 },
15248 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15249 "CONTAINS" if f.args.len() == 2 => match target {
15250 DialectType::Spark
15251 | DialectType::Databricks
15252 | DialectType::Hive => Ok(Expression::Function(Box::new(
15253 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15254 ))),
15255 _ => Ok(Expression::Function(f)),
15256 },
15257 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15258 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15259 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15260 Ok(Expression::Function(Box::new(Function::new(
15261 "CONTAINS".to_string(),
15262 f.args,
15263 ))))
15264 }
15265 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15266 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15267 ))),
15268 _ => Ok(Expression::Function(f)),
15269 },
15270 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15271 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15272 DialectType::Hive
15273 | DialectType::Spark
15274 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15275 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15276 ))),
15277 _ => Ok(Expression::Function(f)),
15278 },
15279 // FROM_UNIXTIME(x) -> target-specific
15280 "FROM_UNIXTIME" if f.args.len() == 1 => {
15281 match target {
15282 DialectType::Hive
15283 | DialectType::Spark
15284 | DialectType::Databricks
15285 | DialectType::Presto
15286 | DialectType::Trino => Ok(Expression::Function(f)),
15287 DialectType::DuckDB => {
15288 // DuckDB: TO_TIMESTAMP(x)
15289 let arg = f.args.into_iter().next().unwrap();
15290 Ok(Expression::Function(Box::new(Function::new(
15291 "TO_TIMESTAMP".to_string(),
15292 vec![arg],
15293 ))))
15294 }
15295 DialectType::PostgreSQL => {
15296 // PG: TO_TIMESTAMP(col)
15297 let arg = f.args.into_iter().next().unwrap();
15298 Ok(Expression::Function(Box::new(Function::new(
15299 "TO_TIMESTAMP".to_string(),
15300 vec![arg],
15301 ))))
15302 }
15303 DialectType::Redshift => {
15304 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15305 let arg = f.args.into_iter().next().unwrap();
15306 let epoch_ts = Expression::Literal(Literal::Timestamp(
15307 "epoch".to_string(),
15308 ));
15309 let interval = Expression::Interval(Box::new(
15310 crate::expressions::Interval {
15311 this: Some(Expression::string("1 SECOND")),
15312 unit: None,
15313 },
15314 ));
15315 let mul =
15316 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15317 let add =
15318 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15319 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15320 this: add,
15321 trailing_comments: Vec::new(),
15322 })))
15323 }
15324 _ => Ok(Expression::Function(f)),
15325 }
15326 }
15327 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15328 "FROM_UNIXTIME"
15329 if f.args.len() == 2
15330 && matches!(
15331 source,
15332 DialectType::Hive
15333 | DialectType::Spark
15334 | DialectType::Databricks
15335 ) =>
15336 {
15337 let mut args = f.args;
15338 let unix_ts = args.remove(0);
15339 let fmt_expr = args.remove(0);
15340 match target {
15341 DialectType::DuckDB => {
15342 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15343 let to_ts = Expression::Function(Box::new(Function::new(
15344 "TO_TIMESTAMP".to_string(),
15345 vec![unix_ts],
15346 )));
15347 if let Expression::Literal(
15348 crate::expressions::Literal::String(s),
15349 ) = &fmt_expr
15350 {
15351 let c_fmt = Self::hive_format_to_c_format(s);
15352 Ok(Expression::Function(Box::new(Function::new(
15353 "STRFTIME".to_string(),
15354 vec![to_ts, Expression::string(&c_fmt)],
15355 ))))
15356 } else {
15357 Ok(Expression::Function(Box::new(Function::new(
15358 "STRFTIME".to_string(),
15359 vec![to_ts, fmt_expr],
15360 ))))
15361 }
15362 }
15363 DialectType::Presto
15364 | DialectType::Trino
15365 | DialectType::Athena => {
15366 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15367 let from_unix =
15368 Expression::Function(Box::new(Function::new(
15369 "FROM_UNIXTIME".to_string(),
15370 vec![unix_ts],
15371 )));
15372 if let Expression::Literal(
15373 crate::expressions::Literal::String(s),
15374 ) = &fmt_expr
15375 {
15376 let p_fmt = Self::hive_format_to_presto_format(s);
15377 Ok(Expression::Function(Box::new(Function::new(
15378 "DATE_FORMAT".to_string(),
15379 vec![from_unix, Expression::string(&p_fmt)],
15380 ))))
15381 } else {
15382 Ok(Expression::Function(Box::new(Function::new(
15383 "DATE_FORMAT".to_string(),
15384 vec![from_unix, fmt_expr],
15385 ))))
15386 }
15387 }
15388 _ => {
15389 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15390 Ok(Expression::Function(Box::new(Function::new(
15391 "FROM_UNIXTIME".to_string(),
15392 vec![unix_ts, fmt_expr],
15393 ))))
15394 }
15395 }
15396 }
15397 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15398 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15399 let unit_str = Self::get_unit_str_static(&f.args[0]);
15400 // Get the raw unit text preserving original case
15401 let raw_unit = match &f.args[0] {
15402 Expression::Identifier(id) => id.name.clone(),
15403 Expression::Literal(crate::expressions::Literal::String(s)) => {
15404 s.clone()
15405 }
15406 Expression::Column(col) => col.name.name.clone(),
15407 _ => unit_str.clone(),
15408 };
15409 match target {
15410 DialectType::TSQL | DialectType::Fabric => {
15411 // Preserve original case of unit for TSQL
15412 let unit_name = match unit_str.as_str() {
15413 "YY" | "YYYY" => "YEAR".to_string(),
15414 "QQ" | "Q" => "QUARTER".to_string(),
15415 "MM" | "M" => "MONTH".to_string(),
15416 "WK" | "WW" => "WEEK".to_string(),
15417 "DD" | "D" | "DY" => "DAY".to_string(),
15418 "HH" => "HOUR".to_string(),
15419 "MI" | "N" => "MINUTE".to_string(),
15420 "SS" | "S" => "SECOND".to_string(),
15421 _ => raw_unit.clone(), // preserve original case
15422 };
15423 let mut args = f.args;
15424 args[0] =
15425 Expression::Identifier(Identifier::new(&unit_name));
15426 Ok(Expression::Function(Box::new(Function::new(
15427 "DATEPART".to_string(),
15428 args,
15429 ))))
15430 }
15431 DialectType::Spark | DialectType::Databricks => {
15432 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15433 // Preserve original case for non-abbreviation units
15434 let unit = match unit_str.as_str() {
15435 "YY" | "YYYY" => "YEAR".to_string(),
15436 "QQ" | "Q" => "QUARTER".to_string(),
15437 "MM" | "M" => "MONTH".to_string(),
15438 "WK" | "WW" => "WEEK".to_string(),
15439 "DD" | "D" | "DY" => "DAY".to_string(),
15440 "HH" => "HOUR".to_string(),
15441 "MI" | "N" => "MINUTE".to_string(),
15442 "SS" | "S" => "SECOND".to_string(),
15443 _ => raw_unit, // preserve original case
15444 };
15445 Ok(Expression::Extract(Box::new(
15446 crate::expressions::ExtractFunc {
15447 this: f.args[1].clone(),
15448 field: crate::expressions::DateTimeField::Custom(
15449 unit,
15450 ),
15451 },
15452 )))
15453 }
15454 _ => Ok(Expression::Function(Box::new(Function::new(
15455 "DATE_PART".to_string(),
15456 f.args,
15457 )))),
15458 }
15459 }
15460 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15461 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15462 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15463 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15464 "DATENAME" if f.args.len() == 2 => {
15465 let unit_str = Self::get_unit_str_static(&f.args[0]);
15466 let date_expr = f.args[1].clone();
15467 match unit_str.as_str() {
15468 "MM" | "M" | "MONTH" => match target {
15469 DialectType::TSQL => {
15470 let cast_date = Expression::Cast(Box::new(
15471 crate::expressions::Cast {
15472 this: date_expr,
15473 to: DataType::Custom {
15474 name: "DATETIME2".to_string(),
15475 },
15476 trailing_comments: Vec::new(),
15477 double_colon_syntax: false,
15478 format: None,
15479 default: None,
15480 },
15481 ));
15482 Ok(Expression::Function(Box::new(Function::new(
15483 "FORMAT".to_string(),
15484 vec![cast_date, Expression::string("MMMM")],
15485 ))))
15486 }
15487 DialectType::Spark | DialectType::Databricks => {
15488 let cast_date = Expression::Cast(Box::new(
15489 crate::expressions::Cast {
15490 this: date_expr,
15491 to: DataType::Timestamp {
15492 timezone: false,
15493 precision: None,
15494 },
15495 trailing_comments: Vec::new(),
15496 double_colon_syntax: false,
15497 format: None,
15498 default: None,
15499 },
15500 ));
15501 Ok(Expression::Function(Box::new(Function::new(
15502 "DATE_FORMAT".to_string(),
15503 vec![cast_date, Expression::string("MMMM")],
15504 ))))
15505 }
15506 _ => Ok(Expression::Function(f)),
15507 },
15508 "DW" | "WEEKDAY" => match target {
15509 DialectType::TSQL => {
15510 let cast_date = Expression::Cast(Box::new(
15511 crate::expressions::Cast {
15512 this: date_expr,
15513 to: DataType::Custom {
15514 name: "DATETIME2".to_string(),
15515 },
15516 trailing_comments: Vec::new(),
15517 double_colon_syntax: false,
15518 format: None,
15519 default: None,
15520 },
15521 ));
15522 Ok(Expression::Function(Box::new(Function::new(
15523 "FORMAT".to_string(),
15524 vec![cast_date, Expression::string("dddd")],
15525 ))))
15526 }
15527 DialectType::Spark | DialectType::Databricks => {
15528 let cast_date = Expression::Cast(Box::new(
15529 crate::expressions::Cast {
15530 this: date_expr,
15531 to: DataType::Timestamp {
15532 timezone: false,
15533 precision: None,
15534 },
15535 trailing_comments: Vec::new(),
15536 double_colon_syntax: false,
15537 format: None,
15538 default: None,
15539 },
15540 ));
15541 Ok(Expression::Function(Box::new(Function::new(
15542 "DATE_FORMAT".to_string(),
15543 vec![cast_date, Expression::string("EEEE")],
15544 ))))
15545 }
15546 _ => Ok(Expression::Function(f)),
15547 },
15548 _ => Ok(Expression::Function(f)),
15549 }
15550 }
15551 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15552 "STRING_AGG" if f.args.len() >= 2 => {
15553 let x = f.args[0].clone();
15554 let sep = f.args[1].clone();
15555 match target {
15556 DialectType::MySQL
15557 | DialectType::SingleStore
15558 | DialectType::Doris
15559 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15560 Box::new(crate::expressions::GroupConcatFunc {
15561 this: x,
15562 separator: Some(sep),
15563 order_by: None,
15564 distinct: false,
15565 filter: None,
15566 }),
15567 )),
15568 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15569 crate::expressions::GroupConcatFunc {
15570 this: x,
15571 separator: Some(sep),
15572 order_by: None,
15573 distinct: false,
15574 filter: None,
15575 },
15576 ))),
15577 DialectType::PostgreSQL | DialectType::Redshift => {
15578 Ok(Expression::StringAgg(Box::new(
15579 crate::expressions::StringAggFunc {
15580 this: x,
15581 separator: Some(sep),
15582 order_by: None,
15583 distinct: false,
15584 filter: None,
15585 limit: None,
15586 },
15587 )))
15588 }
15589 _ => Ok(Expression::Function(f)),
15590 }
15591 }
15592 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15593 "JSON_ARRAYAGG" => match target {
15594 DialectType::PostgreSQL => {
15595 Ok(Expression::Function(Box::new(Function {
15596 name: "JSON_AGG".to_string(),
15597 ..(*f)
15598 })))
15599 }
15600 _ => Ok(Expression::Function(f)),
15601 },
15602 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15603 "SCHEMA_NAME" => match target {
15604 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15605 crate::expressions::CurrentSchema { this: None },
15606 ))),
15607 DialectType::SQLite => Ok(Expression::string("main")),
15608 _ => Ok(Expression::Function(f)),
15609 },
15610 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15611 "TO_TIMESTAMP"
15612 if f.args.len() == 2
15613 && matches!(
15614 source,
15615 DialectType::Spark
15616 | DialectType::Databricks
15617 | DialectType::Hive
15618 )
15619 && matches!(target, DialectType::DuckDB) =>
15620 {
15621 let mut args = f.args;
15622 let val = args.remove(0);
15623 let fmt_expr = args.remove(0);
15624 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15625 // Convert Java/Spark format to C strptime format
15626 fn java_to_c_fmt(fmt: &str) -> String {
15627 let result = fmt
15628 .replace("yyyy", "%Y")
15629 .replace("SSSSSS", "%f")
15630 .replace("EEEE", "%W")
15631 .replace("MM", "%m")
15632 .replace("dd", "%d")
15633 .replace("HH", "%H")
15634 .replace("mm", "%M")
15635 .replace("ss", "%S")
15636 .replace("yy", "%y");
15637 let mut out = String::new();
15638 let chars: Vec<char> = result.chars().collect();
15639 let mut i = 0;
15640 while i < chars.len() {
15641 if chars[i] == '%' && i + 1 < chars.len() {
15642 out.push(chars[i]);
15643 out.push(chars[i + 1]);
15644 i += 2;
15645 } else if chars[i] == 'z' {
15646 out.push_str("%Z");
15647 i += 1;
15648 } else if chars[i] == 'Z' {
15649 out.push_str("%z");
15650 i += 1;
15651 } else {
15652 out.push(chars[i]);
15653 i += 1;
15654 }
15655 }
15656 out
15657 }
15658 let c_fmt = java_to_c_fmt(s);
15659 Ok(Expression::Function(Box::new(Function::new(
15660 "STRPTIME".to_string(),
15661 vec![val, Expression::string(&c_fmt)],
15662 ))))
15663 } else {
15664 Ok(Expression::Function(Box::new(Function::new(
15665 "STRPTIME".to_string(),
15666 vec![val, fmt_expr],
15667 ))))
15668 }
15669 }
15670 // TO_DATE(x) 1-arg from Doris: date conversion
15671 "TO_DATE"
15672 if f.args.len() == 1
15673 && matches!(
15674 source,
15675 DialectType::Doris | DialectType::StarRocks
15676 ) =>
15677 {
15678 let arg = f.args.into_iter().next().unwrap();
15679 match target {
15680 DialectType::Oracle
15681 | DialectType::DuckDB
15682 | DialectType::TSQL => {
15683 // CAST(x AS DATE)
15684 Ok(Expression::Cast(Box::new(Cast {
15685 this: arg,
15686 to: DataType::Date,
15687 double_colon_syntax: false,
15688 trailing_comments: vec![],
15689 format: None,
15690 default: None,
15691 })))
15692 }
15693 DialectType::MySQL | DialectType::SingleStore => {
15694 // DATE(x)
15695 Ok(Expression::Function(Box::new(Function::new(
15696 "DATE".to_string(),
15697 vec![arg],
15698 ))))
15699 }
15700 _ => {
15701 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15702 Ok(Expression::Function(Box::new(Function::new(
15703 "TO_DATE".to_string(),
15704 vec![arg],
15705 ))))
15706 }
15707 }
15708 }
15709 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15710 "TO_DATE"
15711 if f.args.len() == 1
15712 && matches!(
15713 source,
15714 DialectType::Spark
15715 | DialectType::Databricks
15716 | DialectType::Hive
15717 ) =>
15718 {
15719 let arg = f.args.into_iter().next().unwrap();
15720 match target {
15721 DialectType::DuckDB => {
15722 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15723 Ok(Expression::TryCast(Box::new(Cast {
15724 this: arg,
15725 to: DataType::Date,
15726 double_colon_syntax: false,
15727 trailing_comments: vec![],
15728 format: None,
15729 default: None,
15730 })))
15731 }
15732 DialectType::Presto
15733 | DialectType::Trino
15734 | DialectType::Athena => {
15735 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15736 Ok(Self::double_cast_timestamp_date(arg))
15737 }
15738 DialectType::Snowflake => {
15739 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15740 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15741 Ok(Expression::Function(Box::new(Function::new(
15742 "TRY_TO_DATE".to_string(),
15743 vec![arg, Expression::string("yyyy-mm-DD")],
15744 ))))
15745 }
15746 _ => {
15747 // Default: keep as TO_DATE(x)
15748 Ok(Expression::Function(Box::new(Function::new(
15749 "TO_DATE".to_string(),
15750 vec![arg],
15751 ))))
15752 }
15753 }
15754 }
15755 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15756 "TO_DATE"
15757 if f.args.len() == 2
15758 && matches!(
15759 source,
15760 DialectType::Spark
15761 | DialectType::Databricks
15762 | DialectType::Hive
15763 ) =>
15764 {
15765 let mut args = f.args;
15766 let val = args.remove(0);
15767 let fmt_expr = args.remove(0);
15768 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15769
15770 if is_default_format {
15771 // Default format: same as 1-arg form
15772 match target {
15773 DialectType::DuckDB => {
15774 Ok(Expression::TryCast(Box::new(Cast {
15775 this: val,
15776 to: DataType::Date,
15777 double_colon_syntax: false,
15778 trailing_comments: vec![],
15779 format: None,
15780 default: None,
15781 })))
15782 }
15783 DialectType::Presto
15784 | DialectType::Trino
15785 | DialectType::Athena => {
15786 Ok(Self::double_cast_timestamp_date(val))
15787 }
15788 DialectType::Snowflake => {
15789 // TRY_TO_DATE(x, format) with Snowflake format mapping
15790 let sf_fmt = "yyyy-MM-dd"
15791 .replace("yyyy", "yyyy")
15792 .replace("MM", "mm")
15793 .replace("dd", "DD");
15794 Ok(Expression::Function(Box::new(Function::new(
15795 "TRY_TO_DATE".to_string(),
15796 vec![val, Expression::string(&sf_fmt)],
15797 ))))
15798 }
15799 _ => Ok(Expression::Function(Box::new(Function::new(
15800 "TO_DATE".to_string(),
15801 vec![val],
15802 )))),
15803 }
15804 } else {
15805 // Non-default format: use format-based parsing
15806 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15807 match target {
15808 DialectType::DuckDB => {
15809 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15810 fn java_to_c_fmt_todate(fmt: &str) -> String {
15811 let result = fmt
15812 .replace("yyyy", "%Y")
15813 .replace("SSSSSS", "%f")
15814 .replace("EEEE", "%W")
15815 .replace("MM", "%m")
15816 .replace("dd", "%d")
15817 .replace("HH", "%H")
15818 .replace("mm", "%M")
15819 .replace("ss", "%S")
15820 .replace("yy", "%y");
15821 let mut out = String::new();
15822 let chars: Vec<char> = result.chars().collect();
15823 let mut i = 0;
15824 while i < chars.len() {
15825 if chars[i] == '%' && i + 1 < chars.len() {
15826 out.push(chars[i]);
15827 out.push(chars[i + 1]);
15828 i += 2;
15829 } else if chars[i] == 'z' {
15830 out.push_str("%Z");
15831 i += 1;
15832 } else if chars[i] == 'Z' {
15833 out.push_str("%z");
15834 i += 1;
15835 } else {
15836 out.push(chars[i]);
15837 i += 1;
15838 }
15839 }
15840 out
15841 }
15842 let c_fmt = java_to_c_fmt_todate(s);
15843 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15844 let try_strptime =
15845 Expression::Function(Box::new(Function::new(
15846 "TRY_STRPTIME".to_string(),
15847 vec![val, Expression::string(&c_fmt)],
15848 )));
15849 let cast_ts = Expression::Cast(Box::new(Cast {
15850 this: try_strptime,
15851 to: DataType::Timestamp {
15852 precision: None,
15853 timezone: false,
15854 },
15855 double_colon_syntax: false,
15856 trailing_comments: vec![],
15857 format: None,
15858 default: None,
15859 }));
15860 Ok(Expression::Cast(Box::new(Cast {
15861 this: cast_ts,
15862 to: DataType::Date,
15863 double_colon_syntax: false,
15864 trailing_comments: vec![],
15865 format: None,
15866 default: None,
15867 })))
15868 }
15869 DialectType::Presto
15870 | DialectType::Trino
15871 | DialectType::Athena => {
15872 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
15873 let p_fmt = s
15874 .replace("yyyy", "%Y")
15875 .replace("SSSSSS", "%f")
15876 .replace("MM", "%m")
15877 .replace("dd", "%d")
15878 .replace("HH", "%H")
15879 .replace("mm", "%M")
15880 .replace("ss", "%S")
15881 .replace("yy", "%y");
15882 let date_parse =
15883 Expression::Function(Box::new(Function::new(
15884 "DATE_PARSE".to_string(),
15885 vec![val, Expression::string(&p_fmt)],
15886 )));
15887 Ok(Expression::Cast(Box::new(Cast {
15888 this: date_parse,
15889 to: DataType::Date,
15890 double_colon_syntax: false,
15891 trailing_comments: vec![],
15892 format: None,
15893 default: None,
15894 })))
15895 }
15896 DialectType::Snowflake => {
15897 // TRY_TO_DATE(x, snowflake_fmt)
15898 Ok(Expression::Function(Box::new(Function::new(
15899 "TRY_TO_DATE".to_string(),
15900 vec![val, Expression::string(s)],
15901 ))))
15902 }
15903 _ => Ok(Expression::Function(Box::new(Function::new(
15904 "TO_DATE".to_string(),
15905 vec![val, fmt_expr],
15906 )))),
15907 }
15908 } else {
15909 Ok(Expression::Function(Box::new(Function::new(
15910 "TO_DATE".to_string(),
15911 vec![val, fmt_expr],
15912 ))))
15913 }
15914 }
15915 }
15916 // TO_TIMESTAMP(x) 1-arg: epoch conversion
15917 "TO_TIMESTAMP"
15918 if f.args.len() == 1
15919 && matches!(source, DialectType::DuckDB)
15920 && matches!(
15921 target,
15922 DialectType::BigQuery
15923 | DialectType::Presto
15924 | DialectType::Trino
15925 | DialectType::Hive
15926 | DialectType::Spark
15927 | DialectType::Databricks
15928 | DialectType::Athena
15929 ) =>
15930 {
15931 let arg = f.args.into_iter().next().unwrap();
15932 let func_name = match target {
15933 DialectType::BigQuery => "TIMESTAMP_SECONDS",
15934 DialectType::Presto
15935 | DialectType::Trino
15936 | DialectType::Athena
15937 | DialectType::Hive
15938 | DialectType::Spark
15939 | DialectType::Databricks => "FROM_UNIXTIME",
15940 _ => "TO_TIMESTAMP",
15941 };
15942 Ok(Expression::Function(Box::new(Function::new(
15943 func_name.to_string(),
15944 vec![arg],
15945 ))))
15946 }
15947 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
15948 "CONCAT" if f.args.len() == 1 => {
15949 let arg = f.args.into_iter().next().unwrap();
15950 match target {
15951 DialectType::Presto
15952 | DialectType::Trino
15953 | DialectType::Athena => {
15954 // CONCAT(a) -> CAST(a AS VARCHAR)
15955 Ok(Expression::Cast(Box::new(Cast {
15956 this: arg,
15957 to: DataType::VarChar {
15958 length: None,
15959 parenthesized_length: false,
15960 },
15961 trailing_comments: vec![],
15962 double_colon_syntax: false,
15963 format: None,
15964 default: None,
15965 })))
15966 }
15967 DialectType::TSQL => {
15968 // CONCAT(a) -> a
15969 Ok(arg)
15970 }
15971 DialectType::DuckDB => {
15972 // Keep CONCAT(a) for DuckDB (native support)
15973 Ok(Expression::Function(Box::new(Function::new(
15974 "CONCAT".to_string(),
15975 vec![arg],
15976 ))))
15977 }
15978 DialectType::Spark | DialectType::Databricks => {
15979 let coalesced = Expression::Coalesce(Box::new(
15980 crate::expressions::VarArgFunc {
15981 expressions: vec![arg, Expression::string("")],
15982 original_name: None,
15983 },
15984 ));
15985 Ok(Expression::Function(Box::new(Function::new(
15986 "CONCAT".to_string(),
15987 vec![coalesced],
15988 ))))
15989 }
15990 _ => Ok(Expression::Function(Box::new(Function::new(
15991 "CONCAT".to_string(),
15992 vec![arg],
15993 )))),
15994 }
15995 }
15996 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
15997 "REGEXP_EXTRACT"
15998 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
15999 {
16000 // If group_index is 0, drop it
16001 let drop_group = match &f.args[2] {
16002 Expression::Literal(Literal::Number(n)) => n == "0",
16003 _ => false,
16004 };
16005 if drop_group {
16006 let mut args = f.args;
16007 args.truncate(2);
16008 Ok(Expression::Function(Box::new(Function::new(
16009 "REGEXP_EXTRACT".to_string(),
16010 args,
16011 ))))
16012 } else {
16013 Ok(Expression::Function(f))
16014 }
16015 }
16016 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
16017 "REGEXP_EXTRACT"
16018 if f.args.len() == 4
16019 && matches!(target, DialectType::Snowflake) =>
16020 {
16021 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
16022 let mut args = f.args;
16023 let this = args.remove(0);
16024 let pattern = args.remove(0);
16025 let group = args.remove(0);
16026 let flags = args.remove(0);
16027 Ok(Expression::Function(Box::new(Function::new(
16028 "REGEXP_SUBSTR".to_string(),
16029 vec![
16030 this,
16031 pattern,
16032 Expression::number(1),
16033 Expression::number(1),
16034 flags,
16035 group,
16036 ],
16037 ))))
16038 }
16039 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
16040 "REGEXP_SUBSTR"
16041 if f.args.len() == 3
16042 && matches!(
16043 target,
16044 DialectType::DuckDB
16045 | DialectType::Presto
16046 | DialectType::Trino
16047 | DialectType::Spark
16048 | DialectType::Databricks
16049 ) =>
16050 {
16051 let mut args = f.args;
16052 let this = args.remove(0);
16053 let pattern = args.remove(0);
16054 let position = args.remove(0);
16055 // Wrap subject in SUBSTRING(this, position) to apply the offset
16056 let substring_expr = Expression::Function(Box::new(Function::new(
16057 "SUBSTRING".to_string(),
16058 vec![this, position],
16059 )));
16060 let target_name = match target {
16061 DialectType::DuckDB => "REGEXP_EXTRACT",
16062 _ => "REGEXP_EXTRACT",
16063 };
16064 Ok(Expression::Function(Box::new(Function::new(
16065 target_name.to_string(),
16066 vec![substring_expr, pattern],
16067 ))))
16068 }
16069 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
16070 "TO_DAYS" if f.args.len() == 1 => {
16071 let x = f.args.into_iter().next().unwrap();
16072 let epoch = Expression::string("0000-01-01");
16073 // Build the final target-specific expression directly
16074 let datediff_expr = match target {
16075 DialectType::MySQL | DialectType::SingleStore => {
16076 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
16077 Expression::Function(Box::new(Function::new(
16078 "DATEDIFF".to_string(),
16079 vec![x, epoch],
16080 )))
16081 }
16082 DialectType::DuckDB => {
16083 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
16084 let cast_epoch = Expression::Cast(Box::new(Cast {
16085 this: epoch,
16086 to: DataType::Date,
16087 trailing_comments: Vec::new(),
16088 double_colon_syntax: false,
16089 format: None,
16090 default: None,
16091 }));
16092 let cast_x = Expression::Cast(Box::new(Cast {
16093 this: x,
16094 to: DataType::Date,
16095 trailing_comments: Vec::new(),
16096 double_colon_syntax: false,
16097 format: None,
16098 default: None,
16099 }));
16100 Expression::Function(Box::new(Function::new(
16101 "DATE_DIFF".to_string(),
16102 vec![Expression::string("DAY"), cast_epoch, cast_x],
16103 )))
16104 }
16105 DialectType::Presto
16106 | DialectType::Trino
16107 | DialectType::Athena => {
16108 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
16109 let cast_epoch = Self::double_cast_timestamp_date(epoch);
16110 let cast_x = Self::double_cast_timestamp_date(x);
16111 Expression::Function(Box::new(Function::new(
16112 "DATE_DIFF".to_string(),
16113 vec![Expression::string("DAY"), cast_epoch, cast_x],
16114 )))
16115 }
16116 _ => {
16117 // Default: (DATEDIFF(x, '0000-01-01') + 1)
16118 Expression::Function(Box::new(Function::new(
16119 "DATEDIFF".to_string(),
16120 vec![x, epoch],
16121 )))
16122 }
16123 };
16124 let add_one = Expression::Add(Box::new(BinaryOp::new(
16125 datediff_expr,
16126 Expression::number(1),
16127 )));
16128 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16129 this: add_one,
16130 trailing_comments: Vec::new(),
16131 })))
16132 }
16133 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16134 "STR_TO_DATE"
16135 if f.args.len() == 2
16136 && matches!(
16137 target,
16138 DialectType::Presto | DialectType::Trino
16139 ) =>
16140 {
16141 let mut args = f.args;
16142 let x = args.remove(0);
16143 let format_expr = args.remove(0);
16144 // Check if the format contains time components
16145 let has_time =
16146 if let Expression::Literal(Literal::String(ref fmt)) =
16147 format_expr
16148 {
16149 fmt.contains("%H")
16150 || fmt.contains("%T")
16151 || fmt.contains("%M")
16152 || fmt.contains("%S")
16153 || fmt.contains("%I")
16154 || fmt.contains("%p")
16155 } else {
16156 false
16157 };
16158 let date_parse = Expression::Function(Box::new(Function::new(
16159 "DATE_PARSE".to_string(),
16160 vec![x, format_expr],
16161 )));
16162 if has_time {
16163 // Has time components: just DATE_PARSE
16164 Ok(date_parse)
16165 } else {
16166 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16167 Ok(Expression::Cast(Box::new(Cast {
16168 this: date_parse,
16169 to: DataType::Date,
16170 trailing_comments: Vec::new(),
16171 double_colon_syntax: false,
16172 format: None,
16173 default: None,
16174 })))
16175 }
16176 }
16177 "STR_TO_DATE"
16178 if f.args.len() == 2
16179 && matches!(
16180 target,
16181 DialectType::PostgreSQL | DialectType::Redshift
16182 ) =>
16183 {
16184 let mut args = f.args;
16185 let x = args.remove(0);
16186 let fmt = args.remove(0);
16187 let pg_fmt = match fmt {
16188 Expression::Literal(Literal::String(s)) => Expression::string(
16189 &s.replace("%Y", "YYYY")
16190 .replace("%m", "MM")
16191 .replace("%d", "DD")
16192 .replace("%H", "HH24")
16193 .replace("%M", "MI")
16194 .replace("%S", "SS"),
16195 ),
16196 other => other,
16197 };
16198 let to_date = Expression::Function(Box::new(Function::new(
16199 "TO_DATE".to_string(),
16200 vec![x, pg_fmt],
16201 )));
16202 Ok(Expression::Cast(Box::new(Cast {
16203 this: to_date,
16204 to: DataType::Timestamp {
16205 timezone: false,
16206 precision: None,
16207 },
16208 trailing_comments: Vec::new(),
16209 double_colon_syntax: false,
16210 format: None,
16211 default: None,
16212 })))
16213 }
16214 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16215 "RANGE"
16216 if (f.args.len() == 1 || f.args.len() == 2)
16217 && matches!(target, DialectType::SQLite) =>
16218 {
16219 if f.args.len() == 2 {
16220 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16221 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16222 let mut args = f.args;
16223 let start = args.remove(0);
16224 let end = args.remove(0);
16225 Ok(Expression::Function(Box::new(Function::new(
16226 "GENERATE_SERIES".to_string(),
16227 vec![start, end],
16228 ))))
16229 } else {
16230 Ok(Expression::Function(f))
16231 }
16232 }
16233 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16234 // When source is Snowflake, keep as-is (args already in correct form)
16235 "UNIFORM"
16236 if matches!(target, DialectType::Snowflake)
16237 && (f.args.len() == 2 || f.args.len() == 3) =>
16238 {
16239 if matches!(source, DialectType::Snowflake) {
16240 // Snowflake -> Snowflake: keep as-is
16241 Ok(Expression::Function(f))
16242 } else {
16243 let mut args = f.args;
16244 let low = args.remove(0);
16245 let high = args.remove(0);
16246 let random = if !args.is_empty() {
16247 let seed = args.remove(0);
16248 Expression::Function(Box::new(Function::new(
16249 "RANDOM".to_string(),
16250 vec![seed],
16251 )))
16252 } else {
16253 Expression::Function(Box::new(Function::new(
16254 "RANDOM".to_string(),
16255 vec![],
16256 )))
16257 };
16258 Ok(Expression::Function(Box::new(Function::new(
16259 "UNIFORM".to_string(),
16260 vec![low, high, random],
16261 ))))
16262 }
16263 }
16264 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16265 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16266 let mut args = f.args;
16267 let ts_arg = args.remove(0);
16268 let tz_arg = args.remove(0);
16269 // Cast string literal to TIMESTAMP for all targets
16270 let ts_cast =
16271 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16272 Expression::Cast(Box::new(Cast {
16273 this: ts_arg,
16274 to: DataType::Timestamp {
16275 timezone: false,
16276 precision: None,
16277 },
16278 trailing_comments: vec![],
16279 double_colon_syntax: false,
16280 format: None,
16281 default: None,
16282 }))
16283 } else {
16284 ts_arg
16285 };
16286 match target {
16287 DialectType::Spark | DialectType::Databricks => {
16288 Ok(Expression::Function(Box::new(Function::new(
16289 "TO_UTC_TIMESTAMP".to_string(),
16290 vec![ts_cast, tz_arg],
16291 ))))
16292 }
16293 DialectType::Snowflake => {
16294 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16295 Ok(Expression::Function(Box::new(Function::new(
16296 "CONVERT_TIMEZONE".to_string(),
16297 vec![tz_arg, Expression::string("UTC"), ts_cast],
16298 ))))
16299 }
16300 DialectType::Presto
16301 | DialectType::Trino
16302 | DialectType::Athena => {
16303 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16304 let wtz = Expression::Function(Box::new(Function::new(
16305 "WITH_TIMEZONE".to_string(),
16306 vec![ts_cast, tz_arg],
16307 )));
16308 Ok(Expression::AtTimeZone(Box::new(
16309 crate::expressions::AtTimeZone {
16310 this: wtz,
16311 zone: Expression::string("UTC"),
16312 },
16313 )))
16314 }
16315 DialectType::BigQuery => {
16316 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16317 let cast_dt = Expression::Cast(Box::new(Cast {
16318 this: if let Expression::Cast(c) = ts_cast {
16319 c.this
16320 } else {
16321 ts_cast.clone()
16322 },
16323 to: DataType::Custom {
16324 name: "DATETIME".to_string(),
16325 },
16326 trailing_comments: vec![],
16327 double_colon_syntax: false,
16328 format: None,
16329 default: None,
16330 }));
16331 let ts_func =
16332 Expression::Function(Box::new(Function::new(
16333 "TIMESTAMP".to_string(),
16334 vec![cast_dt, tz_arg],
16335 )));
16336 Ok(Expression::Function(Box::new(Function::new(
16337 "DATETIME".to_string(),
16338 vec![ts_func, Expression::string("UTC")],
16339 ))))
16340 }
16341 _ => {
16342 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16343 let atz1 = Expression::AtTimeZone(Box::new(
16344 crate::expressions::AtTimeZone {
16345 this: ts_cast,
16346 zone: tz_arg,
16347 },
16348 ));
16349 Ok(Expression::AtTimeZone(Box::new(
16350 crate::expressions::AtTimeZone {
16351 this: atz1,
16352 zone: Expression::string("UTC"),
16353 },
16354 )))
16355 }
16356 }
16357 }
16358 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16359 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16360 let mut args = f.args;
16361 let ts_arg = args.remove(0);
16362 let tz_arg = args.remove(0);
16363 // Cast string literal to TIMESTAMP
16364 let ts_cast =
16365 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16366 Expression::Cast(Box::new(Cast {
16367 this: ts_arg,
16368 to: DataType::Timestamp {
16369 timezone: false,
16370 precision: None,
16371 },
16372 trailing_comments: vec![],
16373 double_colon_syntax: false,
16374 format: None,
16375 default: None,
16376 }))
16377 } else {
16378 ts_arg
16379 };
16380 match target {
16381 DialectType::Spark | DialectType::Databricks => {
16382 Ok(Expression::Function(Box::new(Function::new(
16383 "FROM_UTC_TIMESTAMP".to_string(),
16384 vec![ts_cast, tz_arg],
16385 ))))
16386 }
16387 DialectType::Presto
16388 | DialectType::Trino
16389 | DialectType::Athena => {
16390 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16391 Ok(Expression::Function(Box::new(Function::new(
16392 "AT_TIMEZONE".to_string(),
16393 vec![ts_cast, tz_arg],
16394 ))))
16395 }
16396 DialectType::Snowflake => {
16397 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16398 Ok(Expression::Function(Box::new(Function::new(
16399 "CONVERT_TIMEZONE".to_string(),
16400 vec![Expression::string("UTC"), tz_arg, ts_cast],
16401 ))))
16402 }
16403 _ => {
16404 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16405 Ok(Expression::AtTimeZone(Box::new(
16406 crate::expressions::AtTimeZone {
16407 this: ts_cast,
16408 zone: tz_arg,
16409 },
16410 )))
16411 }
16412 }
16413 }
16414 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16415 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16416 let name = match target {
16417 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16418 _ => "MAP",
16419 };
16420 Ok(Expression::Function(Box::new(Function::new(
16421 name.to_string(),
16422 f.args,
16423 ))))
16424 }
16425 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16426 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16427 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16428 Ok(Expression::Function(Box::new(Function::new(
16429 "SPLIT_TO_MAP".to_string(),
16430 f.args,
16431 ))))
16432 }
16433 _ => Ok(Expression::Function(f)),
16434 },
16435 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16436 "TIME_TO_STR" if f.args.len() == 2 => {
16437 let mut args = f.args;
16438 let this = args.remove(0);
16439 let fmt_expr = args.remove(0);
16440 let format =
16441 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16442 s
16443 } else {
16444 "%Y-%m-%d %H:%M:%S".to_string()
16445 };
16446 Ok(Expression::TimeToStr(Box::new(
16447 crate::expressions::TimeToStr {
16448 this: Box::new(this),
16449 format,
16450 culture: None,
16451 zone: None,
16452 },
16453 )))
16454 }
16455 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16456 "STR_TO_TIME" if f.args.len() == 2 => {
16457 let mut args = f.args;
16458 let this = args.remove(0);
16459 let fmt_expr = args.remove(0);
16460 let format =
16461 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16462 s
16463 } else {
16464 "%Y-%m-%d %H:%M:%S".to_string()
16465 };
16466 Ok(Expression::StrToTime(Box::new(
16467 crate::expressions::StrToTime {
16468 this: Box::new(this),
16469 format,
16470 zone: None,
16471 safe: None,
16472 target_type: None,
16473 },
16474 )))
16475 }
16476 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16477 "STR_TO_UNIX" if f.args.len() >= 1 => {
16478 let mut args = f.args;
16479 let this = args.remove(0);
16480 let format = if !args.is_empty() {
16481 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16482 {
16483 Some(s)
16484 } else {
16485 None
16486 }
16487 } else {
16488 None
16489 };
16490 Ok(Expression::StrToUnix(Box::new(
16491 crate::expressions::StrToUnix {
16492 this: Some(Box::new(this)),
16493 format,
16494 },
16495 )))
16496 }
16497 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16498 "TIME_TO_UNIX" if f.args.len() == 1 => {
16499 let mut args = f.args;
16500 let this = args.remove(0);
16501 Ok(Expression::TimeToUnix(Box::new(
16502 crate::expressions::UnaryFunc {
16503 this,
16504 original_name: None,
16505 },
16506 )))
16507 }
16508 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16509 "UNIX_TO_STR" if f.args.len() >= 1 => {
16510 let mut args = f.args;
16511 let this = args.remove(0);
16512 let format = if !args.is_empty() {
16513 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16514 {
16515 Some(s)
16516 } else {
16517 None
16518 }
16519 } else {
16520 None
16521 };
16522 Ok(Expression::UnixToStr(Box::new(
16523 crate::expressions::UnixToStr {
16524 this: Box::new(this),
16525 format,
16526 },
16527 )))
16528 }
16529 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16530 "UNIX_TO_TIME" if f.args.len() == 1 => {
16531 let mut args = f.args;
16532 let this = args.remove(0);
16533 Ok(Expression::UnixToTime(Box::new(
16534 crate::expressions::UnixToTime {
16535 this: Box::new(this),
16536 scale: None,
16537 zone: None,
16538 hours: None,
16539 minutes: None,
16540 format: None,
16541 target_type: None,
16542 },
16543 )))
16544 }
16545 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16546 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16547 let mut args = f.args;
16548 let this = args.remove(0);
16549 Ok(Expression::TimeStrToDate(Box::new(
16550 crate::expressions::UnaryFunc {
16551 this,
16552 original_name: None,
16553 },
16554 )))
16555 }
16556 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16557 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16558 let mut args = f.args;
16559 let this = args.remove(0);
16560 Ok(Expression::TimeStrToTime(Box::new(
16561 crate::expressions::TimeStrToTime {
16562 this: Box::new(this),
16563 zone: None,
16564 },
16565 )))
16566 }
16567 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16568 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16569 match target {
16570 DialectType::DuckDB => {
16571 let mut args = f.args;
16572 let end_date = args.remove(0);
16573 let start_date = args.remove(0);
16574 let cast_end = Self::ensure_cast_date(end_date);
16575 let cast_start = Self::ensure_cast_date(start_date);
16576 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16577 let dd = Expression::Function(Box::new(Function::new(
16578 "DATE_DIFF".to_string(),
16579 vec![
16580 Expression::string("MONTH"),
16581 cast_start.clone(),
16582 cast_end.clone(),
16583 ],
16584 )));
16585 let day_end =
16586 Expression::Function(Box::new(Function::new(
16587 "DAY".to_string(),
16588 vec![cast_end.clone()],
16589 )));
16590 let day_start =
16591 Expression::Function(Box::new(Function::new(
16592 "DAY".to_string(),
16593 vec![cast_start.clone()],
16594 )));
16595 let last_day_end =
16596 Expression::Function(Box::new(Function::new(
16597 "LAST_DAY".to_string(),
16598 vec![cast_end.clone()],
16599 )));
16600 let last_day_start =
16601 Expression::Function(Box::new(Function::new(
16602 "LAST_DAY".to_string(),
16603 vec![cast_start.clone()],
16604 )));
16605 let day_last_end = Expression::Function(Box::new(
16606 Function::new("DAY".to_string(), vec![last_day_end]),
16607 ));
16608 let day_last_start = Expression::Function(Box::new(
16609 Function::new("DAY".to_string(), vec![last_day_start]),
16610 ));
16611 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16612 day_end.clone(),
16613 day_last_end,
16614 )));
16615 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16616 day_start.clone(),
16617 day_last_start,
16618 )));
16619 let both_cond =
16620 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16621 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16622 day_end, day_start,
16623 )));
16624 let day_diff_paren = Expression::Paren(Box::new(
16625 crate::expressions::Paren {
16626 this: day_diff,
16627 trailing_comments: Vec::new(),
16628 },
16629 ));
16630 let frac = Expression::Div(Box::new(BinaryOp::new(
16631 day_diff_paren,
16632 Expression::Literal(Literal::Number(
16633 "31.0".to_string(),
16634 )),
16635 )));
16636 let case_expr = Expression::Case(Box::new(Case {
16637 operand: None,
16638 whens: vec![(both_cond, Expression::number(0))],
16639 else_: Some(frac),
16640 comments: Vec::new(),
16641 }));
16642 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16643 }
16644 DialectType::Snowflake | DialectType::Redshift => {
16645 let mut args = f.args;
16646 let end_date = args.remove(0);
16647 let start_date = args.remove(0);
16648 let unit = Expression::Identifier(Identifier::new("MONTH"));
16649 Ok(Expression::Function(Box::new(Function::new(
16650 "DATEDIFF".to_string(),
16651 vec![unit, start_date, end_date],
16652 ))))
16653 }
16654 DialectType::Presto
16655 | DialectType::Trino
16656 | DialectType::Athena => {
16657 let mut args = f.args;
16658 let end_date = args.remove(0);
16659 let start_date = args.remove(0);
16660 Ok(Expression::Function(Box::new(Function::new(
16661 "DATE_DIFF".to_string(),
16662 vec![Expression::string("MONTH"), start_date, end_date],
16663 ))))
16664 }
16665 _ => Ok(Expression::Function(f)),
16666 }
16667 }
16668 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16669 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16670 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16671 match target {
16672 DialectType::Spark | DialectType::Databricks => {
16673 Ok(Expression::Function(f))
16674 }
16675 _ => {
16676 // Drop the 3rd arg and delegate to the 2-arg logic
16677 let mut args = f.args;
16678 let end_date = args.remove(0);
16679 let start_date = args.remove(0);
16680 // Re-create as 2-arg and process
16681 let f2 = Function::new(
16682 "MONTHS_BETWEEN".to_string(),
16683 vec![end_date, start_date],
16684 );
16685 let e2 = Expression::Function(Box::new(f2));
16686 Self::cross_dialect_normalize(e2, source, target)
16687 }
16688 }
16689 }
16690 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16691 "TO_TIMESTAMP"
16692 if f.args.len() == 1
16693 && matches!(
16694 source,
16695 DialectType::Spark
16696 | DialectType::Databricks
16697 | DialectType::Hive
16698 ) =>
16699 {
16700 let arg = f.args.into_iter().next().unwrap();
16701 Ok(Expression::Cast(Box::new(Cast {
16702 this: arg,
16703 to: DataType::Timestamp {
16704 timezone: false,
16705 precision: None,
16706 },
16707 trailing_comments: vec![],
16708 double_colon_syntax: false,
16709 format: None,
16710 default: None,
16711 })))
16712 }
16713 // STRING(x) -> CAST(x AS STRING) for Spark target
16714 "STRING"
16715 if f.args.len() == 1
16716 && matches!(
16717 source,
16718 DialectType::Spark | DialectType::Databricks
16719 ) =>
16720 {
16721 let arg = f.args.into_iter().next().unwrap();
16722 let dt = match target {
16723 DialectType::Spark
16724 | DialectType::Databricks
16725 | DialectType::Hive => DataType::Custom {
16726 name: "STRING".to_string(),
16727 },
16728 _ => DataType::Text,
16729 };
16730 Ok(Expression::Cast(Box::new(Cast {
16731 this: arg,
16732 to: dt,
16733 trailing_comments: vec![],
16734 double_colon_syntax: false,
16735 format: None,
16736 default: None,
16737 })))
16738 }
16739 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16740 "LOGICAL_OR" if f.args.len() == 1 => {
16741 let name = match target {
16742 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16743 _ => "LOGICAL_OR",
16744 };
16745 Ok(Expression::Function(Box::new(Function::new(
16746 name.to_string(),
16747 f.args,
16748 ))))
16749 }
16750 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16751 "SPLIT"
16752 if f.args.len() == 2
16753 && matches!(
16754 source,
16755 DialectType::Spark
16756 | DialectType::Databricks
16757 | DialectType::Hive
16758 ) =>
16759 {
16760 let name = match target {
16761 DialectType::DuckDB => "STR_SPLIT_REGEX",
16762 DialectType::Presto
16763 | DialectType::Trino
16764 | DialectType::Athena => "REGEXP_SPLIT",
16765 DialectType::Spark
16766 | DialectType::Databricks
16767 | DialectType::Hive => "SPLIT",
16768 _ => "SPLIT",
16769 };
16770 Ok(Expression::Function(Box::new(Function::new(
16771 name.to_string(),
16772 f.args,
16773 ))))
16774 }
16775 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16776 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16777 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16778 Ok(Expression::Function(Box::new(Function::new(
16779 "ELEMENT_AT".to_string(),
16780 f.args,
16781 ))))
16782 }
16783 DialectType::DuckDB => {
16784 let mut args = f.args;
16785 let arr = args.remove(0);
16786 let idx = args.remove(0);
16787 Ok(Expression::Subscript(Box::new(
16788 crate::expressions::Subscript {
16789 this: arr,
16790 index: idx,
16791 },
16792 )))
16793 }
16794 _ => Ok(Expression::Function(f)),
16795 },
16796 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16797 "ARRAY_FILTER" if f.args.len() == 2 => {
16798 let name = match target {
16799 DialectType::DuckDB => "LIST_FILTER",
16800 DialectType::StarRocks => "ARRAY_FILTER",
16801 _ => "FILTER",
16802 };
16803 Ok(Expression::Function(Box::new(Function::new(
16804 name.to_string(),
16805 f.args,
16806 ))))
16807 }
16808 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16809 "FILTER" if f.args.len() == 2 => {
16810 let name = match target {
16811 DialectType::DuckDB => "LIST_FILTER",
16812 DialectType::StarRocks => "ARRAY_FILTER",
16813 _ => "FILTER",
16814 };
16815 Ok(Expression::Function(Box::new(Function::new(
16816 name.to_string(),
16817 f.args,
16818 ))))
16819 }
16820 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16821 "REDUCE" if f.args.len() >= 3 => {
16822 let name = match target {
16823 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16824 _ => "REDUCE",
16825 };
16826 Ok(Expression::Function(Box::new(Function::new(
16827 name.to_string(),
16828 f.args,
16829 ))))
16830 }
16831 // CURRENT_SCHEMA() -> dialect-specific
16832 "CURRENT_SCHEMA" => {
16833 match target {
16834 DialectType::PostgreSQL => {
16835 // PostgreSQL: CURRENT_SCHEMA (no parens)
16836 Ok(Expression::Function(Box::new(Function {
16837 name: "CURRENT_SCHEMA".to_string(),
16838 args: vec![],
16839 distinct: false,
16840 trailing_comments: vec![],
16841 use_bracket_syntax: false,
16842 no_parens: true,
16843 quoted: false,
16844 span: None,
16845 })))
16846 }
16847 DialectType::MySQL
16848 | DialectType::Doris
16849 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
16850 Function::new("SCHEMA".to_string(), vec![]),
16851 ))),
16852 DialectType::TSQL => Ok(Expression::Function(Box::new(
16853 Function::new("SCHEMA_NAME".to_string(), vec![]),
16854 ))),
16855 DialectType::SQLite => {
16856 Ok(Expression::Literal(Literal::String("main".to_string())))
16857 }
16858 _ => Ok(Expression::Function(f)),
16859 }
16860 }
16861 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16862 "LTRIM" if f.args.len() == 2 => match target {
16863 DialectType::Spark
16864 | DialectType::Hive
16865 | DialectType::Databricks
16866 | DialectType::ClickHouse => {
16867 let mut args = f.args;
16868 let str_expr = args.remove(0);
16869 let chars = args.remove(0);
16870 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16871 this: str_expr,
16872 characters: Some(chars),
16873 position: crate::expressions::TrimPosition::Leading,
16874 sql_standard_syntax: true,
16875 position_explicit: true,
16876 })))
16877 }
16878 _ => Ok(Expression::Function(f)),
16879 },
16880 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16881 "RTRIM" if f.args.len() == 2 => match target {
16882 DialectType::Spark
16883 | DialectType::Hive
16884 | DialectType::Databricks
16885 | DialectType::ClickHouse => {
16886 let mut args = f.args;
16887 let str_expr = args.remove(0);
16888 let chars = args.remove(0);
16889 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16890 this: str_expr,
16891 characters: Some(chars),
16892 position: crate::expressions::TrimPosition::Trailing,
16893 sql_standard_syntax: true,
16894 position_explicit: true,
16895 })))
16896 }
16897 _ => Ok(Expression::Function(f)),
16898 },
16899 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16900 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
16901 DialectType::ClickHouse => {
16902 let mut new_f = *f;
16903 new_f.name = "arrayReverse".to_string();
16904 Ok(Expression::Function(Box::new(new_f)))
16905 }
16906 _ => Ok(Expression::Function(f)),
16907 },
16908 // UUID() -> NEWID() for TSQL
16909 "UUID" if f.args.is_empty() => match target {
16910 DialectType::TSQL | DialectType::Fabric => {
16911 Ok(Expression::Function(Box::new(Function::new(
16912 "NEWID".to_string(),
16913 vec![],
16914 ))))
16915 }
16916 _ => Ok(Expression::Function(f)),
16917 },
16918 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
16919 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
16920 DialectType::ClickHouse => {
16921 let mut new_f = *f;
16922 new_f.name = "farmFingerprint64".to_string();
16923 Ok(Expression::Function(Box::new(new_f)))
16924 }
16925 DialectType::Redshift => {
16926 let mut new_f = *f;
16927 new_f.name = "FARMFINGERPRINT64".to_string();
16928 Ok(Expression::Function(Box::new(new_f)))
16929 }
16930 _ => Ok(Expression::Function(f)),
16931 },
16932 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
16933 "JSON_KEYS" => match target {
16934 DialectType::Databricks | DialectType::Spark => {
16935 let mut new_f = *f;
16936 new_f.name = "JSON_OBJECT_KEYS".to_string();
16937 Ok(Expression::Function(Box::new(new_f)))
16938 }
16939 DialectType::Snowflake => {
16940 let mut new_f = *f;
16941 new_f.name = "OBJECT_KEYS".to_string();
16942 Ok(Expression::Function(Box::new(new_f)))
16943 }
16944 _ => Ok(Expression::Function(f)),
16945 },
16946 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
16947 "WEEKOFYEAR" => match target {
16948 DialectType::Snowflake => {
16949 let mut new_f = *f;
16950 new_f.name = "WEEKISO".to_string();
16951 Ok(Expression::Function(Box::new(new_f)))
16952 }
16953 _ => Ok(Expression::Function(f)),
16954 },
16955 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
16956 "FORMAT"
16957 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
16958 {
16959 match target {
16960 DialectType::Databricks | DialectType::Spark => {
16961 let mut new_f = *f;
16962 new_f.name = "FORMAT_STRING".to_string();
16963 Ok(Expression::Function(Box::new(new_f)))
16964 }
16965 _ => Ok(Expression::Function(f)),
16966 }
16967 }
16968 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
16969 "CONCAT_WS" if f.args.len() >= 2 => match target {
16970 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16971 let mut args = f.args;
16972 let sep = args.remove(0);
16973 let cast_args: Vec<Expression> = args
16974 .into_iter()
16975 .map(|a| {
16976 Expression::Cast(Box::new(Cast {
16977 this: a,
16978 to: DataType::VarChar {
16979 length: None,
16980 parenthesized_length: false,
16981 },
16982 double_colon_syntax: false,
16983 trailing_comments: Vec::new(),
16984 format: None,
16985 default: None,
16986 }))
16987 })
16988 .collect();
16989 let mut new_args = vec![sep];
16990 new_args.extend(cast_args);
16991 Ok(Expression::Function(Box::new(Function::new(
16992 "CONCAT_WS".to_string(),
16993 new_args,
16994 ))))
16995 }
16996 _ => Ok(Expression::Function(f)),
16997 },
16998 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
16999 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
17000 DialectType::Presto
17001 | DialectType::Trino
17002 | DialectType::Athena
17003 | DialectType::Databricks
17004 | DialectType::Spark => {
17005 let mut new_f = *f;
17006 new_f.name = "SLICE".to_string();
17007 Ok(Expression::Function(Box::new(new_f)))
17008 }
17009 DialectType::ClickHouse => {
17010 let mut new_f = *f;
17011 new_f.name = "arraySlice".to_string();
17012 Ok(Expression::Function(Box::new(new_f)))
17013 }
17014 _ => Ok(Expression::Function(f)),
17015 },
17016 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
17017 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
17018 DialectType::DuckDB => {
17019 let mut args = f.args;
17020 let arr = args.remove(0);
17021 let val = args.remove(0);
17022 Ok(Expression::Function(Box::new(Function::new(
17023 "LIST_PREPEND".to_string(),
17024 vec![val, arr],
17025 ))))
17026 }
17027 _ => Ok(Expression::Function(f)),
17028 },
17029 // ARRAY_REMOVE(arr, target) -> dialect-specific
17030 "ARRAY_REMOVE" if f.args.len() == 2 => {
17031 match target {
17032 DialectType::DuckDB => {
17033 let mut args = f.args;
17034 let arr = args.remove(0);
17035 let target_val = args.remove(0);
17036 let u_id = crate::expressions::Identifier::new("_u");
17037 // LIST_FILTER(arr, _u -> _u <> target)
17038 let lambda = Expression::Lambda(Box::new(
17039 crate::expressions::LambdaExpr {
17040 parameters: vec![u_id.clone()],
17041 body: Expression::Neq(Box::new(BinaryOp {
17042 left: Expression::Identifier(u_id),
17043 right: target_val,
17044 left_comments: Vec::new(),
17045 operator_comments: Vec::new(),
17046 trailing_comments: Vec::new(),
17047 })),
17048 colon: false,
17049 parameter_types: Vec::new(),
17050 },
17051 ));
17052 Ok(Expression::Function(Box::new(Function::new(
17053 "LIST_FILTER".to_string(),
17054 vec![arr, lambda],
17055 ))))
17056 }
17057 DialectType::ClickHouse => {
17058 let mut args = f.args;
17059 let arr = args.remove(0);
17060 let target_val = args.remove(0);
17061 let u_id = crate::expressions::Identifier::new("_u");
17062 // arrayFilter(_u -> _u <> target, arr)
17063 let lambda = Expression::Lambda(Box::new(
17064 crate::expressions::LambdaExpr {
17065 parameters: vec![u_id.clone()],
17066 body: Expression::Neq(Box::new(BinaryOp {
17067 left: Expression::Identifier(u_id),
17068 right: target_val,
17069 left_comments: Vec::new(),
17070 operator_comments: Vec::new(),
17071 trailing_comments: Vec::new(),
17072 })),
17073 colon: false,
17074 parameter_types: Vec::new(),
17075 },
17076 ));
17077 Ok(Expression::Function(Box::new(Function::new(
17078 "arrayFilter".to_string(),
17079 vec![lambda, arr],
17080 ))))
17081 }
17082 DialectType::BigQuery => {
17083 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
17084 let mut args = f.args;
17085 let arr = args.remove(0);
17086 let target_val = args.remove(0);
17087 let u_id = crate::expressions::Identifier::new("_u");
17088 let u_col =
17089 Expression::Column(crate::expressions::Column {
17090 name: u_id.clone(),
17091 table: None,
17092 join_mark: false,
17093 trailing_comments: Vec::new(),
17094 span: None,
17095 });
17096 // UNNEST(the_array) AS _u
17097 let unnest_expr = Expression::Unnest(Box::new(
17098 crate::expressions::UnnestFunc {
17099 this: arr,
17100 expressions: Vec::new(),
17101 with_ordinality: false,
17102 alias: None,
17103 offset_alias: None,
17104 },
17105 ));
17106 let aliased_unnest = Expression::Alias(Box::new(
17107 crate::expressions::Alias {
17108 this: unnest_expr,
17109 alias: u_id.clone(),
17110 column_aliases: Vec::new(),
17111 pre_alias_comments: Vec::new(),
17112 trailing_comments: Vec::new(),
17113 },
17114 ));
17115 // _u <> target
17116 let where_cond = Expression::Neq(Box::new(BinaryOp {
17117 left: u_col.clone(),
17118 right: target_val,
17119 left_comments: Vec::new(),
17120 operator_comments: Vec::new(),
17121 trailing_comments: Vec::new(),
17122 }));
17123 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
17124 let subquery = Expression::Select(Box::new(
17125 crate::expressions::Select::new()
17126 .column(u_col)
17127 .from(aliased_unnest)
17128 .where_(where_cond),
17129 ));
17130 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
17131 Ok(Expression::ArrayFunc(Box::new(
17132 crate::expressions::ArrayConstructor {
17133 expressions: vec![subquery],
17134 bracket_notation: false,
17135 use_list_keyword: false,
17136 },
17137 )))
17138 }
17139 _ => Ok(Expression::Function(f)),
17140 }
17141 }
17142 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17143 "PARSE_JSON" if f.args.len() == 1 => {
17144 match target {
17145 DialectType::SQLite
17146 | DialectType::Doris
17147 | DialectType::MySQL
17148 | DialectType::StarRocks => {
17149 // Strip PARSE_JSON, return the inner argument
17150 Ok(f.args.into_iter().next().unwrap())
17151 }
17152 _ => Ok(Expression::Function(f)),
17153 }
17154 }
17155 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17156 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17157 "JSON_REMOVE" => Ok(Expression::Function(f)),
17158 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17159 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17160 "JSON_SET" => Ok(Expression::Function(f)),
17161 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17162 // Behavior per search value type:
17163 // NULL literal -> CASE WHEN x IS NULL THEN result
17164 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17165 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17166 "DECODE" if f.args.len() >= 3 => {
17167 // Keep as DECODE for targets that support it natively
17168 let keep_as_decode = matches!(
17169 target,
17170 DialectType::Oracle
17171 | DialectType::Snowflake
17172 | DialectType::Redshift
17173 | DialectType::Teradata
17174 | DialectType::Spark
17175 | DialectType::Databricks
17176 );
17177 if keep_as_decode {
17178 return Ok(Expression::Function(f));
17179 }
17180
17181 let mut args = f.args;
17182 let this_expr = args.remove(0);
17183 let mut pairs = Vec::new();
17184 let mut default = None;
17185 let mut i = 0;
17186 while i + 1 < args.len() {
17187 pairs.push((args[i].clone(), args[i + 1].clone()));
17188 i += 2;
17189 }
17190 if i < args.len() {
17191 default = Some(args[i].clone());
17192 }
17193 // Helper: check if expression is a literal value
17194 fn is_literal(e: &Expression) -> bool {
17195 matches!(
17196 e,
17197 Expression::Literal(_)
17198 | Expression::Boolean(_)
17199 | Expression::Neg(_)
17200 )
17201 }
17202 let whens: Vec<(Expression, Expression)> = pairs
17203 .into_iter()
17204 .map(|(search, result)| {
17205 if matches!(&search, Expression::Null(_)) {
17206 // NULL search -> IS NULL
17207 let condition = Expression::Is(Box::new(BinaryOp {
17208 left: this_expr.clone(),
17209 right: Expression::Null(crate::expressions::Null),
17210 left_comments: Vec::new(),
17211 operator_comments: Vec::new(),
17212 trailing_comments: Vec::new(),
17213 }));
17214 (condition, result)
17215 } else if is_literal(&search) {
17216 // Literal search -> simple equality
17217 let eq = Expression::Eq(Box::new(BinaryOp {
17218 left: this_expr.clone(),
17219 right: search,
17220 left_comments: Vec::new(),
17221 operator_comments: Vec::new(),
17222 trailing_comments: Vec::new(),
17223 }));
17224 (eq, result)
17225 } else {
17226 // Non-literal (column ref, expression) -> null-safe comparison
17227 let needs_paren = matches!(
17228 &search,
17229 Expression::Eq(_)
17230 | Expression::Neq(_)
17231 | Expression::Gt(_)
17232 | Expression::Gte(_)
17233 | Expression::Lt(_)
17234 | Expression::Lte(_)
17235 );
17236 let search_for_eq = if needs_paren {
17237 Expression::Paren(Box::new(
17238 crate::expressions::Paren {
17239 this: search.clone(),
17240 trailing_comments: Vec::new(),
17241 },
17242 ))
17243 } else {
17244 search.clone()
17245 };
17246 let eq = Expression::Eq(Box::new(BinaryOp {
17247 left: this_expr.clone(),
17248 right: search_for_eq,
17249 left_comments: Vec::new(),
17250 operator_comments: Vec::new(),
17251 trailing_comments: Vec::new(),
17252 }));
17253 let search_for_null = if needs_paren {
17254 Expression::Paren(Box::new(
17255 crate::expressions::Paren {
17256 this: search.clone(),
17257 trailing_comments: Vec::new(),
17258 },
17259 ))
17260 } else {
17261 search.clone()
17262 };
17263 let x_is_null = Expression::Is(Box::new(BinaryOp {
17264 left: this_expr.clone(),
17265 right: Expression::Null(crate::expressions::Null),
17266 left_comments: Vec::new(),
17267 operator_comments: Vec::new(),
17268 trailing_comments: Vec::new(),
17269 }));
17270 let s_is_null = Expression::Is(Box::new(BinaryOp {
17271 left: search_for_null,
17272 right: Expression::Null(crate::expressions::Null),
17273 left_comments: Vec::new(),
17274 operator_comments: Vec::new(),
17275 trailing_comments: Vec::new(),
17276 }));
17277 let both_null = Expression::And(Box::new(BinaryOp {
17278 left: x_is_null,
17279 right: s_is_null,
17280 left_comments: Vec::new(),
17281 operator_comments: Vec::new(),
17282 trailing_comments: Vec::new(),
17283 }));
17284 let condition = Expression::Or(Box::new(BinaryOp {
17285 left: eq,
17286 right: Expression::Paren(Box::new(
17287 crate::expressions::Paren {
17288 this: both_null,
17289 trailing_comments: Vec::new(),
17290 },
17291 )),
17292 left_comments: Vec::new(),
17293 operator_comments: Vec::new(),
17294 trailing_comments: Vec::new(),
17295 }));
17296 (condition, result)
17297 }
17298 })
17299 .collect();
17300 Ok(Expression::Case(Box::new(Case {
17301 operand: None,
17302 whens,
17303 else_: default,
17304 comments: Vec::new(),
17305 })))
17306 }
17307 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17308 "LEVENSHTEIN" => {
17309 match target {
17310 DialectType::BigQuery => {
17311 let mut new_f = *f;
17312 new_f.name = "EDIT_DISTANCE".to_string();
17313 Ok(Expression::Function(Box::new(new_f)))
17314 }
17315 DialectType::Drill => {
17316 let mut new_f = *f;
17317 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17318 Ok(Expression::Function(Box::new(new_f)))
17319 }
17320 DialectType::PostgreSQL if f.args.len() == 6 => {
17321 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17322 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17323 let mut new_f = *f;
17324 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17325 Ok(Expression::Function(Box::new(new_f)))
17326 }
17327 _ => Ok(Expression::Function(f)),
17328 }
17329 }
17330 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17331 "ARRAY_REVERSE" => match target {
17332 DialectType::ClickHouse => {
17333 let mut new_f = *f;
17334 new_f.name = "arrayReverse".to_string();
17335 Ok(Expression::Function(Box::new(new_f)))
17336 }
17337 _ => Ok(Expression::Function(f)),
17338 },
17339 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17340 "GENERATE_DATE_ARRAY" => {
17341 let mut args = f.args;
17342 if matches!(target, DialectType::BigQuery) {
17343 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17344 if args.len() == 2 {
17345 let default_interval = Expression::Interval(Box::new(
17346 crate::expressions::Interval {
17347 this: Some(Expression::Literal(Literal::String(
17348 "1".to_string(),
17349 ))),
17350 unit: Some(
17351 crate::expressions::IntervalUnitSpec::Simple {
17352 unit: crate::expressions::IntervalUnit::Day,
17353 use_plural: false,
17354 },
17355 ),
17356 },
17357 ));
17358 args.push(default_interval);
17359 }
17360 Ok(Expression::Function(Box::new(Function::new(
17361 "GENERATE_DATE_ARRAY".to_string(),
17362 args,
17363 ))))
17364 } else if matches!(target, DialectType::DuckDB) {
17365 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17366 let start = args.get(0).cloned();
17367 let end = args.get(1).cloned();
17368 let step = args.get(2).cloned().or_else(|| {
17369 Some(Expression::Interval(Box::new(
17370 crate::expressions::Interval {
17371 this: Some(Expression::Literal(Literal::String(
17372 "1".to_string(),
17373 ))),
17374 unit: Some(
17375 crate::expressions::IntervalUnitSpec::Simple {
17376 unit: crate::expressions::IntervalUnit::Day,
17377 use_plural: false,
17378 },
17379 ),
17380 },
17381 )))
17382 });
17383 let gen_series = Expression::GenerateSeries(Box::new(
17384 crate::expressions::GenerateSeries {
17385 start: start.map(Box::new),
17386 end: end.map(Box::new),
17387 step: step.map(Box::new),
17388 is_end_exclusive: None,
17389 },
17390 ));
17391 Ok(Expression::Cast(Box::new(Cast {
17392 this: gen_series,
17393 to: DataType::Array {
17394 element_type: Box::new(DataType::Date),
17395 dimension: None,
17396 },
17397 trailing_comments: vec![],
17398 double_colon_syntax: false,
17399 format: None,
17400 default: None,
17401 })))
17402 } else if matches!(
17403 target,
17404 DialectType::Presto | DialectType::Trino | DialectType::Athena
17405 ) {
17406 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17407 let start = args.get(0).cloned();
17408 let end = args.get(1).cloned();
17409 let step = args.get(2).cloned().or_else(|| {
17410 Some(Expression::Interval(Box::new(
17411 crate::expressions::Interval {
17412 this: Some(Expression::Literal(Literal::String(
17413 "1".to_string(),
17414 ))),
17415 unit: Some(
17416 crate::expressions::IntervalUnitSpec::Simple {
17417 unit: crate::expressions::IntervalUnit::Day,
17418 use_plural: false,
17419 },
17420 ),
17421 },
17422 )))
17423 });
17424 let gen_series = Expression::GenerateSeries(Box::new(
17425 crate::expressions::GenerateSeries {
17426 start: start.map(Box::new),
17427 end: end.map(Box::new),
17428 step: step.map(Box::new),
17429 is_end_exclusive: None,
17430 },
17431 ));
17432 Ok(gen_series)
17433 } else if matches!(
17434 target,
17435 DialectType::Spark | DialectType::Databricks
17436 ) {
17437 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17438 let start = args.get(0).cloned();
17439 let end = args.get(1).cloned();
17440 let step = args.get(2).cloned().or_else(|| {
17441 Some(Expression::Interval(Box::new(
17442 crate::expressions::Interval {
17443 this: Some(Expression::Literal(Literal::String(
17444 "1".to_string(),
17445 ))),
17446 unit: Some(
17447 crate::expressions::IntervalUnitSpec::Simple {
17448 unit: crate::expressions::IntervalUnit::Day,
17449 use_plural: false,
17450 },
17451 ),
17452 },
17453 )))
17454 });
17455 let gen_series = Expression::GenerateSeries(Box::new(
17456 crate::expressions::GenerateSeries {
17457 start: start.map(Box::new),
17458 end: end.map(Box::new),
17459 step: step.map(Box::new),
17460 is_end_exclusive: None,
17461 },
17462 ));
17463 Ok(gen_series)
17464 } else if matches!(target, DialectType::Snowflake) {
17465 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17466 if args.len() == 2 {
17467 let default_interval = Expression::Interval(Box::new(
17468 crate::expressions::Interval {
17469 this: Some(Expression::Literal(Literal::String(
17470 "1".to_string(),
17471 ))),
17472 unit: Some(
17473 crate::expressions::IntervalUnitSpec::Simple {
17474 unit: crate::expressions::IntervalUnit::Day,
17475 use_plural: false,
17476 },
17477 ),
17478 },
17479 ));
17480 args.push(default_interval);
17481 }
17482 Ok(Expression::Function(Box::new(Function::new(
17483 "GENERATE_DATE_ARRAY".to_string(),
17484 args,
17485 ))))
17486 } else if matches!(
17487 target,
17488 DialectType::MySQL
17489 | DialectType::TSQL
17490 | DialectType::Fabric
17491 | DialectType::Redshift
17492 ) {
17493 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17494 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17495 Ok(Expression::Function(Box::new(Function::new(
17496 "GENERATE_DATE_ARRAY".to_string(),
17497 args,
17498 ))))
17499 } else {
17500 // PostgreSQL/others: convert to GenerateSeries
17501 let start = args.get(0).cloned();
17502 let end = args.get(1).cloned();
17503 let step = args.get(2).cloned().or_else(|| {
17504 Some(Expression::Interval(Box::new(
17505 crate::expressions::Interval {
17506 this: Some(Expression::Literal(Literal::String(
17507 "1".to_string(),
17508 ))),
17509 unit: Some(
17510 crate::expressions::IntervalUnitSpec::Simple {
17511 unit: crate::expressions::IntervalUnit::Day,
17512 use_plural: false,
17513 },
17514 ),
17515 },
17516 )))
17517 });
17518 Ok(Expression::GenerateSeries(Box::new(
17519 crate::expressions::GenerateSeries {
17520 start: start.map(Box::new),
17521 end: end.map(Box::new),
17522 step: step.map(Box::new),
17523 is_end_exclusive: None,
17524 },
17525 )))
17526 }
17527 }
17528 _ => Ok(Expression::Function(f)),
17529 }
17530 } else if let Expression::AggregateFunction(mut af) = e {
17531 let name = af.name.to_uppercase();
17532 match name.as_str() {
17533 "ARBITRARY" if af.args.len() == 1 => {
17534 let arg = af.args.into_iter().next().unwrap();
17535 Ok(convert_arbitrary(arg, target))
17536 }
17537 "JSON_ARRAYAGG" => {
17538 match target {
17539 DialectType::PostgreSQL => {
17540 af.name = "JSON_AGG".to_string();
17541 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17542 for ordered in af.order_by.iter_mut() {
17543 if ordered.nulls_first.is_none() {
17544 ordered.nulls_first = Some(true);
17545 }
17546 }
17547 Ok(Expression::AggregateFunction(af))
17548 }
17549 _ => Ok(Expression::AggregateFunction(af)),
17550 }
17551 }
17552 _ => Ok(Expression::AggregateFunction(af)),
17553 }
17554 } else if let Expression::JSONArrayAgg(ja) = e {
17555 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17556 match target {
17557 DialectType::PostgreSQL => {
17558 let mut order_by = Vec::new();
17559 if let Some(order_expr) = ja.order {
17560 if let Expression::OrderBy(ob) = *order_expr {
17561 for mut ordered in ob.expressions {
17562 if ordered.nulls_first.is_none() {
17563 ordered.nulls_first = Some(true);
17564 }
17565 order_by.push(ordered);
17566 }
17567 }
17568 }
17569 Ok(Expression::AggregateFunction(Box::new(
17570 crate::expressions::AggregateFunction {
17571 name: "JSON_AGG".to_string(),
17572 args: vec![*ja.this],
17573 distinct: false,
17574 filter: None,
17575 order_by,
17576 limit: None,
17577 ignore_nulls: None,
17578 },
17579 )))
17580 }
17581 _ => Ok(Expression::JSONArrayAgg(ja)),
17582 }
17583 } else if let Expression::ToNumber(tn) = e {
17584 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17585 let arg = *tn.this;
17586 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17587 this: arg,
17588 to: crate::expressions::DataType::Double {
17589 precision: None,
17590 scale: None,
17591 },
17592 double_colon_syntax: false,
17593 trailing_comments: Vec::new(),
17594 format: None,
17595 default: None,
17596 })))
17597 } else {
17598 Ok(e)
17599 }
17600 }
17601
17602 Action::RegexpLikeToDuckDB => {
17603 if let Expression::RegexpLike(f) = e {
17604 let mut args = vec![f.this, f.pattern];
17605 if let Some(flags) = f.flags {
17606 args.push(flags);
17607 }
17608 Ok(Expression::Function(Box::new(Function::new(
17609 "REGEXP_MATCHES".to_string(),
17610 args,
17611 ))))
17612 } else {
17613 Ok(e)
17614 }
17615 }
17616 Action::EpochConvert => {
17617 if let Expression::Epoch(f) = e {
17618 let arg = f.this;
17619 let name = match target {
17620 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17621 "UNIX_TIMESTAMP"
17622 }
17623 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17624 DialectType::BigQuery => "TIME_TO_UNIX",
17625 _ => "EPOCH",
17626 };
17627 Ok(Expression::Function(Box::new(Function::new(
17628 name.to_string(),
17629 vec![arg],
17630 ))))
17631 } else {
17632 Ok(e)
17633 }
17634 }
17635 Action::EpochMsConvert => {
17636 use crate::expressions::{BinaryOp, Cast};
17637 if let Expression::EpochMs(f) = e {
17638 let arg = f.this;
17639 match target {
17640 DialectType::Spark | DialectType::Databricks => {
17641 Ok(Expression::Function(Box::new(Function::new(
17642 "TIMESTAMP_MILLIS".to_string(),
17643 vec![arg],
17644 ))))
17645 }
17646 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17647 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17648 ))),
17649 DialectType::Presto | DialectType::Trino => {
17650 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17651 let cast_arg = Expression::Cast(Box::new(Cast {
17652 this: arg,
17653 to: DataType::Double {
17654 precision: None,
17655 scale: None,
17656 },
17657 trailing_comments: Vec::new(),
17658 double_colon_syntax: false,
17659 format: None,
17660 default: None,
17661 }));
17662 let div = Expression::Div(Box::new(BinaryOp::new(
17663 cast_arg,
17664 Expression::Function(Box::new(Function::new(
17665 "POW".to_string(),
17666 vec![Expression::number(10), Expression::number(3)],
17667 ))),
17668 )));
17669 Ok(Expression::Function(Box::new(Function::new(
17670 "FROM_UNIXTIME".to_string(),
17671 vec![div],
17672 ))))
17673 }
17674 DialectType::MySQL => {
17675 // FROM_UNIXTIME(x / POWER(10, 3))
17676 let div = Expression::Div(Box::new(BinaryOp::new(
17677 arg,
17678 Expression::Function(Box::new(Function::new(
17679 "POWER".to_string(),
17680 vec![Expression::number(10), Expression::number(3)],
17681 ))),
17682 )));
17683 Ok(Expression::Function(Box::new(Function::new(
17684 "FROM_UNIXTIME".to_string(),
17685 vec![div],
17686 ))))
17687 }
17688 DialectType::PostgreSQL | DialectType::Redshift => {
17689 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17690 let cast_arg = Expression::Cast(Box::new(Cast {
17691 this: arg,
17692 to: DataType::Custom {
17693 name: "DOUBLE PRECISION".to_string(),
17694 },
17695 trailing_comments: Vec::new(),
17696 double_colon_syntax: false,
17697 format: None,
17698 default: None,
17699 }));
17700 let div = Expression::Div(Box::new(BinaryOp::new(
17701 cast_arg,
17702 Expression::Function(Box::new(Function::new(
17703 "POWER".to_string(),
17704 vec![Expression::number(10), Expression::number(3)],
17705 ))),
17706 )));
17707 Ok(Expression::Function(Box::new(Function::new(
17708 "TO_TIMESTAMP".to_string(),
17709 vec![div],
17710 ))))
17711 }
17712 DialectType::ClickHouse => {
17713 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17714 let cast_arg = Expression::Cast(Box::new(Cast {
17715 this: arg,
17716 to: DataType::Nullable {
17717 inner: Box::new(DataType::BigInt { length: None }),
17718 },
17719 trailing_comments: Vec::new(),
17720 double_colon_syntax: false,
17721 format: None,
17722 default: None,
17723 }));
17724 Ok(Expression::Function(Box::new(Function::new(
17725 "fromUnixTimestamp64Milli".to_string(),
17726 vec![cast_arg],
17727 ))))
17728 }
17729 _ => Ok(Expression::Function(Box::new(Function::new(
17730 "EPOCH_MS".to_string(),
17731 vec![arg],
17732 )))),
17733 }
17734 } else {
17735 Ok(e)
17736 }
17737 }
17738 Action::TSQLTypeNormalize => {
17739 if let Expression::DataType(dt) = e {
17740 let new_dt = match &dt {
17741 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17742 DataType::Decimal {
17743 precision: Some(15),
17744 scale: Some(4),
17745 }
17746 }
17747 DataType::Custom { name }
17748 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17749 {
17750 DataType::Decimal {
17751 precision: Some(6),
17752 scale: Some(4),
17753 }
17754 }
17755 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17756 DataType::Timestamp {
17757 timezone: false,
17758 precision: None,
17759 }
17760 }
17761 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17762 DataType::Float {
17763 precision: None,
17764 scale: None,
17765 real_spelling: false,
17766 }
17767 }
17768 DataType::Float {
17769 real_spelling: true,
17770 ..
17771 } => DataType::Float {
17772 precision: None,
17773 scale: None,
17774 real_spelling: false,
17775 },
17776 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17777 DataType::Custom {
17778 name: "BLOB".to_string(),
17779 }
17780 }
17781 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17782 DataType::Boolean
17783 }
17784 DataType::Custom { name }
17785 if name.eq_ignore_ascii_case("ROWVERSION") =>
17786 {
17787 DataType::Custom {
17788 name: "BINARY".to_string(),
17789 }
17790 }
17791 DataType::Custom { name }
17792 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17793 {
17794 match target {
17795 DialectType::Spark
17796 | DialectType::Databricks
17797 | DialectType::Hive => DataType::Custom {
17798 name: "STRING".to_string(),
17799 },
17800 _ => DataType::VarChar {
17801 length: Some(36),
17802 parenthesized_length: true,
17803 },
17804 }
17805 }
17806 DataType::Custom { name }
17807 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17808 {
17809 match target {
17810 DialectType::Spark
17811 | DialectType::Databricks
17812 | DialectType::Hive => DataType::Timestamp {
17813 timezone: false,
17814 precision: None,
17815 },
17816 _ => DataType::Timestamp {
17817 timezone: true,
17818 precision: None,
17819 },
17820 }
17821 }
17822 DataType::Custom { ref name }
17823 if name.to_uppercase().starts_with("DATETIME2(") =>
17824 {
17825 // DATETIME2(n) -> TIMESTAMP
17826 DataType::Timestamp {
17827 timezone: false,
17828 precision: None,
17829 }
17830 }
17831 DataType::Custom { ref name }
17832 if name.to_uppercase().starts_with("TIME(") =>
17833 {
17834 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
17835 match target {
17836 DialectType::Spark
17837 | DialectType::Databricks
17838 | DialectType::Hive => DataType::Timestamp {
17839 timezone: false,
17840 precision: None,
17841 },
17842 _ => return Ok(Expression::DataType(dt)),
17843 }
17844 }
17845 DataType::Custom { ref name }
17846 if name.to_uppercase().starts_with("NUMERIC") =>
17847 {
17848 // Parse NUMERIC(p,s) back to Decimal(p,s)
17849 let upper = name.to_uppercase();
17850 if let Some(inner) = upper
17851 .strip_prefix("NUMERIC(")
17852 .and_then(|s| s.strip_suffix(')'))
17853 {
17854 let parts: Vec<&str> = inner.split(',').collect();
17855 let precision =
17856 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
17857 let scale =
17858 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
17859 DataType::Decimal { precision, scale }
17860 } else if upper == "NUMERIC" {
17861 DataType::Decimal {
17862 precision: None,
17863 scale: None,
17864 }
17865 } else {
17866 return Ok(Expression::DataType(dt));
17867 }
17868 }
17869 DataType::Float {
17870 precision: Some(p), ..
17871 } => {
17872 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
17873 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
17874 let boundary = match target {
17875 DialectType::Hive
17876 | DialectType::Spark
17877 | DialectType::Databricks => 32,
17878 _ => 24,
17879 };
17880 if *p <= boundary {
17881 DataType::Float {
17882 precision: None,
17883 scale: None,
17884 real_spelling: false,
17885 }
17886 } else {
17887 DataType::Double {
17888 precision: None,
17889 scale: None,
17890 }
17891 }
17892 }
17893 DataType::TinyInt { .. } => match target {
17894 DialectType::DuckDB => DataType::Custom {
17895 name: "UTINYINT".to_string(),
17896 },
17897 DialectType::Hive
17898 | DialectType::Spark
17899 | DialectType::Databricks => DataType::SmallInt { length: None },
17900 _ => return Ok(Expression::DataType(dt)),
17901 },
17902 // INTEGER -> INT for Spark/Databricks
17903 DataType::Int {
17904 length,
17905 integer_spelling: true,
17906 } => DataType::Int {
17907 length: *length,
17908 integer_spelling: false,
17909 },
17910 _ => return Ok(Expression::DataType(dt)),
17911 };
17912 Ok(Expression::DataType(new_dt))
17913 } else {
17914 Ok(e)
17915 }
17916 }
17917 Action::MySQLSafeDivide => {
17918 use crate::expressions::{BinaryOp, Cast};
17919 if let Expression::Div(op) = e {
17920 let left = op.left;
17921 let right = op.right;
17922 // For SQLite: CAST left as REAL but NO NULLIF wrapping
17923 if matches!(target, DialectType::SQLite) {
17924 let new_left = Expression::Cast(Box::new(Cast {
17925 this: left,
17926 to: DataType::Float {
17927 precision: None,
17928 scale: None,
17929 real_spelling: true,
17930 },
17931 trailing_comments: Vec::new(),
17932 double_colon_syntax: false,
17933 format: None,
17934 default: None,
17935 }));
17936 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
17937 }
17938 // Wrap right in NULLIF(right, 0)
17939 let nullif_right = Expression::Function(Box::new(Function::new(
17940 "NULLIF".to_string(),
17941 vec![right, Expression::number(0)],
17942 )));
17943 // For some dialects, also CAST the left side
17944 let new_left = match target {
17945 DialectType::PostgreSQL
17946 | DialectType::Redshift
17947 | DialectType::Teradata
17948 | DialectType::Materialize
17949 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
17950 this: left,
17951 to: DataType::Custom {
17952 name: "DOUBLE PRECISION".to_string(),
17953 },
17954 trailing_comments: Vec::new(),
17955 double_colon_syntax: false,
17956 format: None,
17957 default: None,
17958 })),
17959 DialectType::Drill
17960 | DialectType::Trino
17961 | DialectType::Presto
17962 | DialectType::Athena => Expression::Cast(Box::new(Cast {
17963 this: left,
17964 to: DataType::Double {
17965 precision: None,
17966 scale: None,
17967 },
17968 trailing_comments: Vec::new(),
17969 double_colon_syntax: false,
17970 format: None,
17971 default: None,
17972 })),
17973 DialectType::TSQL => Expression::Cast(Box::new(Cast {
17974 this: left,
17975 to: DataType::Float {
17976 precision: None,
17977 scale: None,
17978 real_spelling: false,
17979 },
17980 trailing_comments: Vec::new(),
17981 double_colon_syntax: false,
17982 format: None,
17983 default: None,
17984 })),
17985 _ => left,
17986 };
17987 Ok(Expression::Div(Box::new(BinaryOp::new(
17988 new_left,
17989 nullif_right,
17990 ))))
17991 } else {
17992 Ok(e)
17993 }
17994 }
17995 Action::AlterTableRenameStripSchema => {
17996 if let Expression::AlterTable(mut at) = e {
17997 if let Some(crate::expressions::AlterTableAction::RenameTable(
17998 ref mut new_tbl,
17999 )) = at.actions.first_mut()
18000 {
18001 new_tbl.schema = None;
18002 new_tbl.catalog = None;
18003 }
18004 Ok(Expression::AlterTable(at))
18005 } else {
18006 Ok(e)
18007 }
18008 }
18009 Action::NullsOrdering => {
18010 // Fill in the source dialect's implied null ordering default.
18011 // This makes implicit null ordering explicit so the target generator
18012 // can correctly strip or keep it.
18013 //
18014 // Dialect null ordering categories:
18015 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
18016 // ASC -> NULLS LAST, DESC -> NULLS FIRST
18017 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
18018 // ASC -> NULLS FIRST, DESC -> NULLS LAST
18019 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
18020 // NULLS LAST always (both ASC and DESC)
18021 if let Expression::Ordered(mut o) = e {
18022 let is_asc = !o.desc;
18023
18024 let is_source_nulls_large = matches!(
18025 source,
18026 DialectType::Oracle
18027 | DialectType::PostgreSQL
18028 | DialectType::Redshift
18029 | DialectType::Snowflake
18030 );
18031 let is_source_nulls_last = matches!(
18032 source,
18033 DialectType::DuckDB
18034 | DialectType::Presto
18035 | DialectType::Trino
18036 | DialectType::Dremio
18037 | DialectType::Athena
18038 | DialectType::ClickHouse
18039 | DialectType::Drill
18040 | DialectType::Exasol
18041 | DialectType::DataFusion
18042 );
18043
18044 // Determine target category to check if default matches
18045 let is_target_nulls_large = matches!(
18046 target,
18047 DialectType::Oracle
18048 | DialectType::PostgreSQL
18049 | DialectType::Redshift
18050 | DialectType::Snowflake
18051 );
18052 let is_target_nulls_last = matches!(
18053 target,
18054 DialectType::DuckDB
18055 | DialectType::Presto
18056 | DialectType::Trino
18057 | DialectType::Dremio
18058 | DialectType::Athena
18059 | DialectType::ClickHouse
18060 | DialectType::Drill
18061 | DialectType::Exasol
18062 | DialectType::DataFusion
18063 );
18064
18065 // Compute the implied nulls_first for source
18066 let source_nulls_first = if is_source_nulls_large {
18067 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
18068 } else if is_source_nulls_last {
18069 false // NULLS LAST always
18070 } else {
18071 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
18072 };
18073
18074 // Compute the target's default
18075 let target_nulls_first = if is_target_nulls_large {
18076 !is_asc
18077 } else if is_target_nulls_last {
18078 false
18079 } else {
18080 is_asc
18081 };
18082
18083 // Only add explicit nulls ordering if source and target defaults differ
18084 if source_nulls_first != target_nulls_first {
18085 o.nulls_first = Some(source_nulls_first);
18086 }
18087 // If they match, leave nulls_first as None so the generator won't output it
18088
18089 Ok(Expression::Ordered(o))
18090 } else {
18091 Ok(e)
18092 }
18093 }
18094 Action::StringAggConvert => {
18095 match e {
18096 Expression::WithinGroup(wg) => {
18097 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
18098 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
18099 let (x_opt, sep_opt, distinct) = match wg.this {
18100 Expression::AggregateFunction(ref af)
18101 if af.name.eq_ignore_ascii_case("STRING_AGG")
18102 && af.args.len() >= 2 =>
18103 {
18104 (
18105 Some(af.args[0].clone()),
18106 Some(af.args[1].clone()),
18107 af.distinct,
18108 )
18109 }
18110 Expression::Function(ref f)
18111 if f.name.eq_ignore_ascii_case("STRING_AGG")
18112 && f.args.len() >= 2 =>
18113 {
18114 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
18115 }
18116 Expression::StringAgg(ref sa) => {
18117 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
18118 }
18119 _ => (None, None, false),
18120 };
18121 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
18122 let order_by = wg.order_by;
18123
18124 match target {
18125 DialectType::TSQL | DialectType::Fabric => {
18126 // Keep as WithinGroup(StringAgg) for TSQL
18127 Ok(Expression::WithinGroup(Box::new(
18128 crate::expressions::WithinGroup {
18129 this: Expression::StringAgg(Box::new(
18130 crate::expressions::StringAggFunc {
18131 this: x,
18132 separator: Some(sep),
18133 order_by: None, // order_by goes in WithinGroup, not StringAgg
18134 distinct,
18135 filter: None,
18136 limit: None,
18137 },
18138 )),
18139 order_by,
18140 },
18141 )))
18142 }
18143 DialectType::MySQL
18144 | DialectType::SingleStore
18145 | DialectType::Doris
18146 | DialectType::StarRocks => {
18147 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18148 Ok(Expression::GroupConcat(Box::new(
18149 crate::expressions::GroupConcatFunc {
18150 this: x,
18151 separator: Some(sep),
18152 order_by: Some(order_by),
18153 distinct,
18154 filter: None,
18155 },
18156 )))
18157 }
18158 DialectType::SQLite => {
18159 // GROUP_CONCAT(x, sep) - no ORDER BY support
18160 Ok(Expression::GroupConcat(Box::new(
18161 crate::expressions::GroupConcatFunc {
18162 this: x,
18163 separator: Some(sep),
18164 order_by: None,
18165 distinct,
18166 filter: None,
18167 },
18168 )))
18169 }
18170 DialectType::PostgreSQL | DialectType::Redshift => {
18171 // STRING_AGG(x, sep ORDER BY z)
18172 Ok(Expression::StringAgg(Box::new(
18173 crate::expressions::StringAggFunc {
18174 this: x,
18175 separator: Some(sep),
18176 order_by: Some(order_by),
18177 distinct,
18178 filter: None,
18179 limit: None,
18180 },
18181 )))
18182 }
18183 _ => {
18184 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18185 Ok(Expression::StringAgg(Box::new(
18186 crate::expressions::StringAggFunc {
18187 this: x,
18188 separator: Some(sep),
18189 order_by: Some(order_by),
18190 distinct,
18191 filter: None,
18192 limit: None,
18193 },
18194 )))
18195 }
18196 }
18197 } else {
18198 Ok(Expression::WithinGroup(wg))
18199 }
18200 }
18201 Expression::StringAgg(sa) => {
18202 match target {
18203 DialectType::MySQL
18204 | DialectType::SingleStore
18205 | DialectType::Doris
18206 | DialectType::StarRocks => {
18207 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18208 Ok(Expression::GroupConcat(Box::new(
18209 crate::expressions::GroupConcatFunc {
18210 this: sa.this,
18211 separator: sa.separator,
18212 order_by: sa.order_by,
18213 distinct: sa.distinct,
18214 filter: sa.filter,
18215 },
18216 )))
18217 }
18218 DialectType::SQLite => {
18219 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18220 Ok(Expression::GroupConcat(Box::new(
18221 crate::expressions::GroupConcatFunc {
18222 this: sa.this,
18223 separator: sa.separator,
18224 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18225 distinct: sa.distinct,
18226 filter: sa.filter,
18227 },
18228 )))
18229 }
18230 DialectType::Spark | DialectType::Databricks => {
18231 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18232 Ok(Expression::ListAgg(Box::new(
18233 crate::expressions::ListAggFunc {
18234 this: sa.this,
18235 separator: sa.separator,
18236 on_overflow: None,
18237 order_by: sa.order_by,
18238 distinct: sa.distinct,
18239 filter: None,
18240 },
18241 )))
18242 }
18243 _ => Ok(Expression::StringAgg(sa)),
18244 }
18245 }
18246 _ => Ok(e),
18247 }
18248 }
18249 Action::GroupConcatConvert => {
18250 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18251 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18252 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18253 if let Expression::Function(ref f) = expr {
18254 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18255 let mut result = f.args[0].clone();
18256 for arg in &f.args[1..] {
18257 result = Expression::Concat(Box::new(BinaryOp {
18258 left: result,
18259 right: arg.clone(),
18260 left_comments: vec![],
18261 operator_comments: vec![],
18262 trailing_comments: vec![],
18263 }));
18264 }
18265 return result;
18266 }
18267 }
18268 expr
18269 }
18270 fn expand_concat_to_plus(expr: Expression) -> Expression {
18271 if let Expression::Function(ref f) = expr {
18272 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18273 let mut result = f.args[0].clone();
18274 for arg in &f.args[1..] {
18275 result = Expression::Add(Box::new(BinaryOp {
18276 left: result,
18277 right: arg.clone(),
18278 left_comments: vec![],
18279 operator_comments: vec![],
18280 trailing_comments: vec![],
18281 }));
18282 }
18283 return result;
18284 }
18285 }
18286 expr
18287 }
18288 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18289 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18290 if let Expression::Function(ref f) = expr {
18291 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18292 let new_args: Vec<Expression> = f
18293 .args
18294 .iter()
18295 .map(|arg| {
18296 Expression::Cast(Box::new(crate::expressions::Cast {
18297 this: arg.clone(),
18298 to: crate::expressions::DataType::VarChar {
18299 length: None,
18300 parenthesized_length: false,
18301 },
18302 trailing_comments: Vec::new(),
18303 double_colon_syntax: false,
18304 format: None,
18305 default: None,
18306 }))
18307 })
18308 .collect();
18309 return Expression::Function(Box::new(
18310 crate::expressions::Function::new(
18311 "CONCAT".to_string(),
18312 new_args,
18313 ),
18314 ));
18315 }
18316 }
18317 expr
18318 }
18319 if let Expression::GroupConcat(gc) = e {
18320 match target {
18321 DialectType::Presto => {
18322 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18323 let sep = gc.separator.unwrap_or(Expression::string(","));
18324 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18325 let this = wrap_concat_args_in_varchar_cast(gc.this);
18326 let array_agg =
18327 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18328 this,
18329 distinct: gc.distinct,
18330 filter: gc.filter,
18331 order_by: gc.order_by.unwrap_or_default(),
18332 name: None,
18333 ignore_nulls: None,
18334 having_max: None,
18335 limit: None,
18336 }));
18337 Ok(Expression::ArrayJoin(Box::new(
18338 crate::expressions::ArrayJoinFunc {
18339 this: array_agg,
18340 separator: sep,
18341 null_replacement: None,
18342 },
18343 )))
18344 }
18345 DialectType::Trino => {
18346 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18347 let sep = gc.separator.unwrap_or(Expression::string(","));
18348 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18349 let this = wrap_concat_args_in_varchar_cast(gc.this);
18350 Ok(Expression::ListAgg(Box::new(
18351 crate::expressions::ListAggFunc {
18352 this,
18353 separator: Some(sep),
18354 on_overflow: None,
18355 order_by: gc.order_by,
18356 distinct: gc.distinct,
18357 filter: gc.filter,
18358 },
18359 )))
18360 }
18361 DialectType::PostgreSQL
18362 | DialectType::Redshift
18363 | DialectType::Snowflake
18364 | DialectType::DuckDB
18365 | DialectType::Hive
18366 | DialectType::ClickHouse => {
18367 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18368 let sep = gc.separator.unwrap_or(Expression::string(","));
18369 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18370 let this = expand_concat_to_dpipe(gc.this);
18371 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18372 let order_by = if target == DialectType::PostgreSQL {
18373 gc.order_by.map(|ords| {
18374 ords.into_iter()
18375 .map(|mut o| {
18376 if o.nulls_first.is_none() {
18377 if o.desc {
18378 o.nulls_first = Some(false);
18379 // NULLS LAST
18380 } else {
18381 o.nulls_first = Some(true);
18382 // NULLS FIRST
18383 }
18384 }
18385 o
18386 })
18387 .collect()
18388 })
18389 } else {
18390 gc.order_by
18391 };
18392 Ok(Expression::StringAgg(Box::new(
18393 crate::expressions::StringAggFunc {
18394 this,
18395 separator: Some(sep),
18396 order_by,
18397 distinct: gc.distinct,
18398 filter: gc.filter,
18399 limit: None,
18400 },
18401 )))
18402 }
18403 DialectType::TSQL => {
18404 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18405 // TSQL doesn't support DISTINCT in STRING_AGG
18406 let sep = gc.separator.unwrap_or(Expression::string(","));
18407 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18408 let this = expand_concat_to_plus(gc.this);
18409 Ok(Expression::StringAgg(Box::new(
18410 crate::expressions::StringAggFunc {
18411 this,
18412 separator: Some(sep),
18413 order_by: gc.order_by,
18414 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18415 filter: gc.filter,
18416 limit: None,
18417 },
18418 )))
18419 }
18420 DialectType::SQLite => {
18421 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18422 // SQLite GROUP_CONCAT doesn't support ORDER BY
18423 // Expand CONCAT(a,b,c) -> a || b || c
18424 let this = expand_concat_to_dpipe(gc.this);
18425 Ok(Expression::GroupConcat(Box::new(
18426 crate::expressions::GroupConcatFunc {
18427 this,
18428 separator: gc.separator,
18429 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18430 distinct: gc.distinct,
18431 filter: gc.filter,
18432 },
18433 )))
18434 }
18435 DialectType::Spark | DialectType::Databricks => {
18436 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18437 let sep = gc.separator.unwrap_or(Expression::string(","));
18438 Ok(Expression::ListAgg(Box::new(
18439 crate::expressions::ListAggFunc {
18440 this: gc.this,
18441 separator: Some(sep),
18442 on_overflow: None,
18443 order_by: gc.order_by,
18444 distinct: gc.distinct,
18445 filter: None,
18446 },
18447 )))
18448 }
18449 DialectType::MySQL
18450 | DialectType::SingleStore
18451 | DialectType::StarRocks => {
18452 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18453 if gc.separator.is_none() {
18454 let mut gc = gc;
18455 gc.separator = Some(Expression::string(","));
18456 Ok(Expression::GroupConcat(gc))
18457 } else {
18458 Ok(Expression::GroupConcat(gc))
18459 }
18460 }
18461 _ => Ok(Expression::GroupConcat(gc)),
18462 }
18463 } else {
18464 Ok(e)
18465 }
18466 }
18467 Action::TempTableHash => {
18468 match e {
18469 Expression::CreateTable(mut ct) => {
18470 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18471 let name = &ct.name.name.name;
18472 if name.starts_with('#') {
18473 ct.name.name.name = name.trim_start_matches('#').to_string();
18474 }
18475 // Set temporary flag
18476 ct.temporary = true;
18477 Ok(Expression::CreateTable(ct))
18478 }
18479 Expression::Table(mut tr) => {
18480 // Strip # from table references
18481 let name = &tr.name.name;
18482 if name.starts_with('#') {
18483 tr.name.name = name.trim_start_matches('#').to_string();
18484 }
18485 Ok(Expression::Table(tr))
18486 }
18487 Expression::DropTable(mut dt) => {
18488 // Strip # from DROP TABLE names
18489 for table_ref in &mut dt.names {
18490 if table_ref.name.name.starts_with('#') {
18491 table_ref.name.name =
18492 table_ref.name.name.trim_start_matches('#').to_string();
18493 }
18494 }
18495 Ok(Expression::DropTable(dt))
18496 }
18497 _ => Ok(e),
18498 }
18499 }
18500 Action::NvlClearOriginal => {
18501 if let Expression::Nvl(mut f) = e {
18502 f.original_name = None;
18503 Ok(Expression::Nvl(f))
18504 } else {
18505 Ok(e)
18506 }
18507 }
18508 Action::HiveCastToTryCast => {
18509 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18510 if let Expression::Cast(mut c) = e {
18511 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18512 // (Spark's TIMESTAMP is always timezone-aware)
18513 if matches!(target, DialectType::DuckDB)
18514 && matches!(source, DialectType::Spark | DialectType::Databricks)
18515 && matches!(
18516 c.to,
18517 DataType::Timestamp {
18518 timezone: false,
18519 ..
18520 }
18521 )
18522 {
18523 c.to = DataType::Custom {
18524 name: "TIMESTAMPTZ".to_string(),
18525 };
18526 }
18527 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18528 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18529 if matches!(target, DialectType::Databricks | DialectType::Spark)
18530 && matches!(
18531 source,
18532 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18533 )
18534 && Self::has_varchar_char_type(&c.to)
18535 {
18536 c.to = Self::normalize_varchar_to_string(c.to);
18537 }
18538 Ok(Expression::TryCast(c))
18539 } else {
18540 Ok(e)
18541 }
18542 }
18543 Action::XorExpand => {
18544 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18545 // Snowflake: use BOOLXOR(a, b) instead
18546 if let Expression::Xor(xor) = e {
18547 // Collect all XOR operands
18548 let mut operands = Vec::new();
18549 if let Some(this) = xor.this {
18550 operands.push(*this);
18551 }
18552 if let Some(expr) = xor.expression {
18553 operands.push(*expr);
18554 }
18555 operands.extend(xor.expressions);
18556
18557 // Snowflake: use BOOLXOR(a, b)
18558 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18559 let a = operands.remove(0);
18560 let b = operands.remove(0);
18561 return Ok(Expression::Function(Box::new(Function::new(
18562 "BOOLXOR".to_string(),
18563 vec![a, b],
18564 ))));
18565 }
18566
18567 // Helper to build (a AND NOT b) OR (NOT a AND b)
18568 let make_xor = |a: Expression, b: Expression| -> Expression {
18569 let not_b = Expression::Not(Box::new(
18570 crate::expressions::UnaryOp::new(b.clone()),
18571 ));
18572 let not_a = Expression::Not(Box::new(
18573 crate::expressions::UnaryOp::new(a.clone()),
18574 ));
18575 let left_and = Expression::And(Box::new(BinaryOp {
18576 left: a,
18577 right: Expression::Paren(Box::new(Paren {
18578 this: not_b,
18579 trailing_comments: Vec::new(),
18580 })),
18581 left_comments: Vec::new(),
18582 operator_comments: Vec::new(),
18583 trailing_comments: Vec::new(),
18584 }));
18585 let right_and = Expression::And(Box::new(BinaryOp {
18586 left: Expression::Paren(Box::new(Paren {
18587 this: not_a,
18588 trailing_comments: Vec::new(),
18589 })),
18590 right: b,
18591 left_comments: Vec::new(),
18592 operator_comments: Vec::new(),
18593 trailing_comments: Vec::new(),
18594 }));
18595 Expression::Or(Box::new(BinaryOp {
18596 left: Expression::Paren(Box::new(Paren {
18597 this: left_and,
18598 trailing_comments: Vec::new(),
18599 })),
18600 right: Expression::Paren(Box::new(Paren {
18601 this: right_and,
18602 trailing_comments: Vec::new(),
18603 })),
18604 left_comments: Vec::new(),
18605 operator_comments: Vec::new(),
18606 trailing_comments: Vec::new(),
18607 }))
18608 };
18609
18610 if operands.len() >= 2 {
18611 let mut result = make_xor(operands.remove(0), operands.remove(0));
18612 for operand in operands {
18613 result = make_xor(result, operand);
18614 }
18615 Ok(result)
18616 } else if operands.len() == 1 {
18617 Ok(operands.remove(0))
18618 } else {
18619 // No operands - return FALSE (shouldn't happen)
18620 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18621 value: false,
18622 }))
18623 }
18624 } else {
18625 Ok(e)
18626 }
18627 }
18628 Action::DatePartUnquote => {
18629 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18630 // Convert the quoted string first arg to a bare Column/Identifier
18631 if let Expression::Function(mut f) = e {
18632 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18633 f.args.first()
18634 {
18635 let bare_name = s.to_lowercase();
18636 f.args[0] = Expression::Column(crate::expressions::Column {
18637 name: Identifier::new(bare_name),
18638 table: None,
18639 join_mark: false,
18640 trailing_comments: Vec::new(),
18641 span: None,
18642 });
18643 }
18644 Ok(Expression::Function(f))
18645 } else {
18646 Ok(e)
18647 }
18648 }
18649 Action::ArrayLengthConvert => {
18650 // Extract the argument from the expression
18651 let arg = match e {
18652 Expression::Cardinality(ref f) => f.this.clone(),
18653 Expression::ArrayLength(ref f) => f.this.clone(),
18654 Expression::ArraySize(ref f) => f.this.clone(),
18655 _ => return Ok(e),
18656 };
18657 match target {
18658 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18659 Ok(Expression::Function(Box::new(Function::new(
18660 "SIZE".to_string(),
18661 vec![arg],
18662 ))))
18663 }
18664 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18665 Ok(Expression::Cardinality(Box::new(
18666 crate::expressions::UnaryFunc::new(arg),
18667 )))
18668 }
18669 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18670 crate::expressions::UnaryFunc::new(arg),
18671 ))),
18672 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18673 crate::expressions::UnaryFunc::new(arg),
18674 ))),
18675 DialectType::PostgreSQL | DialectType::Redshift => {
18676 // PostgreSQL ARRAY_LENGTH requires dimension arg
18677 Ok(Expression::Function(Box::new(Function::new(
18678 "ARRAY_LENGTH".to_string(),
18679 vec![arg, Expression::number(1)],
18680 ))))
18681 }
18682 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18683 crate::expressions::UnaryFunc::new(arg),
18684 ))),
18685 _ => Ok(e), // Keep original
18686 }
18687 }
18688
18689 Action::JsonExtractToArrow => {
18690 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18691 if let Expression::JsonExtract(mut f) = e {
18692 f.arrow_syntax = true;
18693 // Transform path: convert bracket notation to dot notation
18694 // SQLite strips wildcards, DuckDB preserves them
18695 if let Expression::Literal(Literal::String(ref s)) = f.path {
18696 let mut transformed = s.clone();
18697 if matches!(target, DialectType::SQLite) {
18698 transformed = Self::strip_json_wildcards(&transformed);
18699 }
18700 transformed = Self::bracket_to_dot_notation(&transformed);
18701 if transformed != *s {
18702 f.path = Expression::string(&transformed);
18703 }
18704 }
18705 Ok(Expression::JsonExtract(f))
18706 } else {
18707 Ok(e)
18708 }
18709 }
18710
18711 Action::JsonExtractToGetJsonObject => {
18712 if let Expression::JsonExtract(f) = e {
18713 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18714 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18715 // Use proper decomposition that handles brackets
18716 let keys: Vec<Expression> =
18717 if let Expression::Literal(Literal::String(ref s)) = f.path {
18718 let parts = Self::decompose_json_path(s);
18719 parts.into_iter().map(|k| Expression::string(&k)).collect()
18720 } else {
18721 vec![f.path]
18722 };
18723 let func_name = if matches!(target, DialectType::Redshift) {
18724 "JSON_EXTRACT_PATH_TEXT"
18725 } else {
18726 "JSON_EXTRACT_PATH"
18727 };
18728 let mut args = vec![f.this];
18729 args.extend(keys);
18730 Ok(Expression::Function(Box::new(Function::new(
18731 func_name.to_string(),
18732 args,
18733 ))))
18734 } else {
18735 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18736 // Convert bracket double quotes to single quotes
18737 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18738 let normalized = Self::bracket_to_single_quotes(s);
18739 if normalized != *s {
18740 Expression::string(&normalized)
18741 } else {
18742 f.path
18743 }
18744 } else {
18745 f.path
18746 };
18747 Ok(Expression::Function(Box::new(Function::new(
18748 "GET_JSON_OBJECT".to_string(),
18749 vec![f.this, path],
18750 ))))
18751 }
18752 } else {
18753 Ok(e)
18754 }
18755 }
18756
18757 Action::JsonExtractScalarToGetJsonObject => {
18758 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18759 if let Expression::JsonExtractScalar(f) = e {
18760 Ok(Expression::Function(Box::new(Function::new(
18761 "GET_JSON_OBJECT".to_string(),
18762 vec![f.this, f.path],
18763 ))))
18764 } else {
18765 Ok(e)
18766 }
18767 }
18768
18769 Action::JsonExtractToTsql => {
18770 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18771 let (this, path) = match e {
18772 Expression::JsonExtract(f) => (f.this, f.path),
18773 Expression::JsonExtractScalar(f) => (f.this, f.path),
18774 _ => return Ok(e),
18775 };
18776 // Transform path: strip wildcards, convert bracket notation to dot notation
18777 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18778 {
18779 let stripped = Self::strip_json_wildcards(s);
18780 let dotted = Self::bracket_to_dot_notation(&stripped);
18781 Expression::string(&dotted)
18782 } else {
18783 path
18784 };
18785 let json_query = Expression::Function(Box::new(Function::new(
18786 "JSON_QUERY".to_string(),
18787 vec![this.clone(), transformed_path.clone()],
18788 )));
18789 let json_value = Expression::Function(Box::new(Function::new(
18790 "JSON_VALUE".to_string(),
18791 vec![this, transformed_path],
18792 )));
18793 Ok(Expression::Function(Box::new(Function::new(
18794 "ISNULL".to_string(),
18795 vec![json_query, json_value],
18796 ))))
18797 }
18798
18799 Action::JsonExtractToClickHouse => {
18800 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18801 let (this, path) = match e {
18802 Expression::JsonExtract(f) => (f.this, f.path),
18803 Expression::JsonExtractScalar(f) => (f.this, f.path),
18804 _ => return Ok(e),
18805 };
18806 let args: Vec<Expression> =
18807 if let Expression::Literal(Literal::String(ref s)) = path {
18808 let parts = Self::decompose_json_path(s);
18809 let mut result = vec![this];
18810 for part in parts {
18811 // ClickHouse uses 1-based integer indices for array access
18812 if let Ok(idx) = part.parse::<i64>() {
18813 result.push(Expression::number(idx + 1));
18814 } else {
18815 result.push(Expression::string(&part));
18816 }
18817 }
18818 result
18819 } else {
18820 vec![this, path]
18821 };
18822 Ok(Expression::Function(Box::new(Function::new(
18823 "JSONExtractString".to_string(),
18824 args,
18825 ))))
18826 }
18827
18828 Action::JsonExtractScalarConvert => {
18829 // JSON_EXTRACT_SCALAR -> target-specific
18830 if let Expression::JsonExtractScalar(f) = e {
18831 match target {
18832 DialectType::PostgreSQL | DialectType::Redshift => {
18833 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
18834 let keys: Vec<Expression> =
18835 if let Expression::Literal(Literal::String(ref s)) = f.path {
18836 let parts = Self::decompose_json_path(s);
18837 parts.into_iter().map(|k| Expression::string(&k)).collect()
18838 } else {
18839 vec![f.path]
18840 };
18841 let mut args = vec![f.this];
18842 args.extend(keys);
18843 Ok(Expression::Function(Box::new(Function::new(
18844 "JSON_EXTRACT_PATH_TEXT".to_string(),
18845 args,
18846 ))))
18847 }
18848 DialectType::Snowflake => {
18849 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
18850 let stripped_path =
18851 if let Expression::Literal(Literal::String(ref s)) = f.path {
18852 let stripped = Self::strip_json_dollar_prefix(s);
18853 Expression::string(&stripped)
18854 } else {
18855 f.path
18856 };
18857 Ok(Expression::Function(Box::new(Function::new(
18858 "JSON_EXTRACT_PATH_TEXT".to_string(),
18859 vec![f.this, stripped_path],
18860 ))))
18861 }
18862 DialectType::SQLite | DialectType::DuckDB => {
18863 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
18864 Ok(Expression::JsonExtractScalar(Box::new(
18865 crate::expressions::JsonExtractFunc {
18866 this: f.this,
18867 path: f.path,
18868 returning: f.returning,
18869 arrow_syntax: true,
18870 hash_arrow_syntax: false,
18871 wrapper_option: None,
18872 quotes_option: None,
18873 on_scalar_string: false,
18874 on_error: None,
18875 },
18876 )))
18877 }
18878 _ => Ok(Expression::JsonExtractScalar(f)),
18879 }
18880 } else {
18881 Ok(e)
18882 }
18883 }
18884
18885 Action::JsonPathNormalize => {
18886 // Normalize JSON path format for BigQuery, MySQL, etc.
18887 if let Expression::JsonExtract(mut f) = e {
18888 if let Expression::Literal(Literal::String(ref s)) = f.path {
18889 let mut normalized = s.clone();
18890 // Convert bracket notation and handle wildcards per dialect
18891 match target {
18892 DialectType::BigQuery => {
18893 // BigQuery strips wildcards and uses single quotes in brackets
18894 normalized = Self::strip_json_wildcards(&normalized);
18895 normalized = Self::bracket_to_single_quotes(&normalized);
18896 }
18897 DialectType::MySQL => {
18898 // MySQL preserves wildcards, converts brackets to dot notation
18899 normalized = Self::bracket_to_dot_notation(&normalized);
18900 }
18901 _ => {}
18902 }
18903 if normalized != *s {
18904 f.path = Expression::string(&normalized);
18905 }
18906 }
18907 Ok(Expression::JsonExtract(f))
18908 } else {
18909 Ok(e)
18910 }
18911 }
18912
18913 Action::JsonQueryValueConvert => {
18914 // JsonQuery/JsonValue -> target-specific
18915 let (f, is_query) = match e {
18916 Expression::JsonQuery(f) => (f, true),
18917 Expression::JsonValue(f) => (f, false),
18918 _ => return Ok(e),
18919 };
18920 match target {
18921 DialectType::TSQL | DialectType::Fabric => {
18922 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
18923 let json_query = Expression::Function(Box::new(Function::new(
18924 "JSON_QUERY".to_string(),
18925 vec![f.this.clone(), f.path.clone()],
18926 )));
18927 let json_value = Expression::Function(Box::new(Function::new(
18928 "JSON_VALUE".to_string(),
18929 vec![f.this, f.path],
18930 )));
18931 Ok(Expression::Function(Box::new(Function::new(
18932 "ISNULL".to_string(),
18933 vec![json_query, json_value],
18934 ))))
18935 }
18936 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18937 Ok(Expression::Function(Box::new(Function::new(
18938 "GET_JSON_OBJECT".to_string(),
18939 vec![f.this, f.path],
18940 ))))
18941 }
18942 DialectType::PostgreSQL | DialectType::Redshift => {
18943 Ok(Expression::Function(Box::new(Function::new(
18944 "JSON_EXTRACT_PATH_TEXT".to_string(),
18945 vec![f.this, f.path],
18946 ))))
18947 }
18948 DialectType::DuckDB | DialectType::SQLite => {
18949 // json -> path arrow syntax
18950 Ok(Expression::JsonExtract(Box::new(
18951 crate::expressions::JsonExtractFunc {
18952 this: f.this,
18953 path: f.path,
18954 returning: f.returning,
18955 arrow_syntax: true,
18956 hash_arrow_syntax: false,
18957 wrapper_option: f.wrapper_option,
18958 quotes_option: f.quotes_option,
18959 on_scalar_string: f.on_scalar_string,
18960 on_error: f.on_error,
18961 },
18962 )))
18963 }
18964 DialectType::Snowflake => {
18965 // GET_PATH(PARSE_JSON(json), 'path')
18966 // Strip $. prefix from path
18967 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
18968 let json_expr = match &f.this {
18969 Expression::Function(ref inner_f)
18970 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
18971 {
18972 f.this
18973 }
18974 Expression::ParseJson(_) => {
18975 // Already a ParseJson expression, which generates as PARSE_JSON(...)
18976 f.this
18977 }
18978 _ => Expression::Function(Box::new(Function::new(
18979 "PARSE_JSON".to_string(),
18980 vec![f.this],
18981 ))),
18982 };
18983 let path_str = match &f.path {
18984 Expression::Literal(Literal::String(s)) => {
18985 let stripped = s.strip_prefix("$.").unwrap_or(s);
18986 Expression::Literal(Literal::String(stripped.to_string()))
18987 }
18988 other => other.clone(),
18989 };
18990 Ok(Expression::Function(Box::new(Function::new(
18991 "GET_PATH".to_string(),
18992 vec![json_expr, path_str],
18993 ))))
18994 }
18995 _ => {
18996 // Default: keep as JSON_QUERY/JSON_VALUE function
18997 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
18998 Ok(Expression::Function(Box::new(Function::new(
18999 func_name.to_string(),
19000 vec![f.this, f.path],
19001 ))))
19002 }
19003 }
19004 }
19005
19006 Action::JsonLiteralToJsonParse => {
19007 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
19008 if let Expression::Cast(c) = e {
19009 let func_name = if matches!(target, DialectType::Snowflake) {
19010 "PARSE_JSON"
19011 } else {
19012 "JSON_PARSE"
19013 };
19014 Ok(Expression::Function(Box::new(Function::new(
19015 func_name.to_string(),
19016 vec![c.this],
19017 ))))
19018 } else {
19019 Ok(e)
19020 }
19021 }
19022
19023 Action::AtTimeZoneConvert => {
19024 // AT TIME ZONE -> target-specific conversion
19025 if let Expression::AtTimeZone(atz) = e {
19026 match target {
19027 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19028 Ok(Expression::Function(Box::new(Function::new(
19029 "AT_TIMEZONE".to_string(),
19030 vec![atz.this, atz.zone],
19031 ))))
19032 }
19033 DialectType::Spark | DialectType::Databricks => {
19034 Ok(Expression::Function(Box::new(Function::new(
19035 "FROM_UTC_TIMESTAMP".to_string(),
19036 vec![atz.this, atz.zone],
19037 ))))
19038 }
19039 DialectType::Snowflake => {
19040 // CONVERT_TIMEZONE('zone', expr)
19041 Ok(Expression::Function(Box::new(Function::new(
19042 "CONVERT_TIMEZONE".to_string(),
19043 vec![atz.zone, atz.this],
19044 ))))
19045 }
19046 DialectType::BigQuery => {
19047 // TIMESTAMP(DATETIME(expr, 'zone'))
19048 let datetime_call = Expression::Function(Box::new(Function::new(
19049 "DATETIME".to_string(),
19050 vec![atz.this, atz.zone],
19051 )));
19052 Ok(Expression::Function(Box::new(Function::new(
19053 "TIMESTAMP".to_string(),
19054 vec![datetime_call],
19055 ))))
19056 }
19057 _ => Ok(Expression::Function(Box::new(Function::new(
19058 "AT_TIMEZONE".to_string(),
19059 vec![atz.this, atz.zone],
19060 )))),
19061 }
19062 } else {
19063 Ok(e)
19064 }
19065 }
19066
19067 Action::DayOfWeekConvert => {
19068 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
19069 if let Expression::DayOfWeek(f) = e {
19070 match target {
19071 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19072 Function::new("ISODOW".to_string(), vec![f.this]),
19073 ))),
19074 DialectType::Spark | DialectType::Databricks => {
19075 // ((DAYOFWEEK(x) % 7) + 1)
19076 let dayofweek = Expression::Function(Box::new(Function::new(
19077 "DAYOFWEEK".to_string(),
19078 vec![f.this],
19079 )));
19080 let modulo = Expression::Mod(Box::new(BinaryOp {
19081 left: dayofweek,
19082 right: Expression::number(7),
19083 left_comments: Vec::new(),
19084 operator_comments: Vec::new(),
19085 trailing_comments: Vec::new(),
19086 }));
19087 let paren_mod = Expression::Paren(Box::new(Paren {
19088 this: modulo,
19089 trailing_comments: Vec::new(),
19090 }));
19091 let add_one = Expression::Add(Box::new(BinaryOp {
19092 left: paren_mod,
19093 right: Expression::number(1),
19094 left_comments: Vec::new(),
19095 operator_comments: Vec::new(),
19096 trailing_comments: Vec::new(),
19097 }));
19098 Ok(Expression::Paren(Box::new(Paren {
19099 this: add_one,
19100 trailing_comments: Vec::new(),
19101 })))
19102 }
19103 _ => Ok(Expression::DayOfWeek(f)),
19104 }
19105 } else {
19106 Ok(e)
19107 }
19108 }
19109
19110 Action::MaxByMinByConvert => {
19111 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
19112 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
19113 // Handle both Expression::Function and Expression::AggregateFunction
19114 let (is_max, args) = match &e {
19115 Expression::Function(f) => {
19116 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
19117 }
19118 Expression::AggregateFunction(af) => {
19119 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
19120 }
19121 _ => return Ok(e),
19122 };
19123 match target {
19124 DialectType::ClickHouse => {
19125 let name = if is_max { "argMax" } else { "argMin" };
19126 let mut args = args;
19127 args.truncate(2);
19128 Ok(Expression::Function(Box::new(Function::new(
19129 name.to_string(),
19130 args,
19131 ))))
19132 }
19133 DialectType::DuckDB => {
19134 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
19135 Ok(Expression::Function(Box::new(Function::new(
19136 name.to_string(),
19137 args,
19138 ))))
19139 }
19140 DialectType::Spark | DialectType::Databricks => {
19141 let mut args = args;
19142 args.truncate(2);
19143 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19144 Ok(Expression::Function(Box::new(Function::new(
19145 name.to_string(),
19146 args,
19147 ))))
19148 }
19149 _ => Ok(e),
19150 }
19151 }
19152
19153 Action::ElementAtConvert => {
19154 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19155 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19156 (bf.this, bf.expression)
19157 } else if let Expression::Function(ref f) = e {
19158 if f.args.len() >= 2 {
19159 if let Expression::Function(f) = e {
19160 let mut args = f.args;
19161 let arr = args.remove(0);
19162 let idx = args.remove(0);
19163 (arr, idx)
19164 } else {
19165 unreachable!("outer condition already matched Expression::Function")
19166 }
19167 } else {
19168 return Ok(e);
19169 }
19170 } else {
19171 return Ok(e);
19172 };
19173 match target {
19174 DialectType::PostgreSQL => {
19175 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19176 let arr_expr = Expression::Paren(Box::new(Paren {
19177 this: arr,
19178 trailing_comments: vec![],
19179 }));
19180 Ok(Expression::Subscript(Box::new(
19181 crate::expressions::Subscript {
19182 this: arr_expr,
19183 index: idx,
19184 },
19185 )))
19186 }
19187 DialectType::BigQuery => {
19188 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19189 let arr_expr = match arr {
19190 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19191 crate::expressions::ArrayConstructor {
19192 expressions: af.expressions,
19193 bracket_notation: true,
19194 use_list_keyword: false,
19195 },
19196 )),
19197 other => other,
19198 };
19199 let safe_ordinal = Expression::Function(Box::new(Function::new(
19200 "SAFE_ORDINAL".to_string(),
19201 vec![idx],
19202 )));
19203 Ok(Expression::Subscript(Box::new(
19204 crate::expressions::Subscript {
19205 this: arr_expr,
19206 index: safe_ordinal,
19207 },
19208 )))
19209 }
19210 _ => Ok(Expression::Function(Box::new(Function::new(
19211 "ELEMENT_AT".to_string(),
19212 vec![arr, idx],
19213 )))),
19214 }
19215 }
19216
19217 Action::CurrentUserParens => {
19218 // CURRENT_USER -> CURRENT_USER() for Snowflake
19219 Ok(Expression::Function(Box::new(Function::new(
19220 "CURRENT_USER".to_string(),
19221 vec![],
19222 ))))
19223 }
19224
19225 Action::ArrayAggToCollectList => {
19226 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19227 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19228 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19229 match e {
19230 Expression::AggregateFunction(mut af) => {
19231 let is_simple =
19232 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19233 let args = if af.args.is_empty() {
19234 vec![]
19235 } else {
19236 vec![af.args[0].clone()]
19237 };
19238 af.name = "COLLECT_LIST".to_string();
19239 af.args = args;
19240 if is_simple {
19241 af.order_by = Vec::new();
19242 }
19243 Ok(Expression::AggregateFunction(af))
19244 }
19245 Expression::ArrayAgg(agg) => {
19246 let is_simple =
19247 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19248 Ok(Expression::AggregateFunction(Box::new(
19249 crate::expressions::AggregateFunction {
19250 name: "COLLECT_LIST".to_string(),
19251 args: vec![agg.this.clone()],
19252 distinct: agg.distinct,
19253 filter: agg.filter.clone(),
19254 order_by: if is_simple {
19255 Vec::new()
19256 } else {
19257 agg.order_by.clone()
19258 },
19259 limit: agg.limit.clone(),
19260 ignore_nulls: agg.ignore_nulls,
19261 },
19262 )))
19263 }
19264 _ => Ok(e),
19265 }
19266 }
19267
19268 Action::ArraySyntaxConvert => {
19269 match e {
19270 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19271 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19272 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19273 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19274 expressions: arr.expressions,
19275 bracket_notation: true,
19276 use_list_keyword: false,
19277 })),
19278 ),
19279 // ARRAY(y) function style -> ArrayFunc for target dialect
19280 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19281 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19282 let bracket = matches!(
19283 target,
19284 DialectType::BigQuery
19285 | DialectType::DuckDB
19286 | DialectType::ClickHouse
19287 | DialectType::StarRocks
19288 );
19289 Ok(Expression::ArrayFunc(Box::new(
19290 crate::expressions::ArrayConstructor {
19291 expressions: f.args,
19292 bracket_notation: bracket,
19293 use_list_keyword: false,
19294 },
19295 )))
19296 }
19297 _ => Ok(e),
19298 }
19299 }
19300
19301 Action::CastToJsonForSpark => {
19302 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19303 if let Expression::Cast(c) = e {
19304 Ok(Expression::Function(Box::new(Function::new(
19305 "TO_JSON".to_string(),
19306 vec![c.this],
19307 ))))
19308 } else {
19309 Ok(e)
19310 }
19311 }
19312
19313 Action::CastJsonToFromJson => {
19314 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19315 if let Expression::Cast(c) = e {
19316 // Extract the string literal from ParseJson
19317 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19318 pj.this
19319 } else {
19320 c.this
19321 };
19322 // Convert the target DataType to Spark's type string format
19323 let type_str = Self::data_type_to_spark_string(&c.to);
19324 Ok(Expression::Function(Box::new(Function::new(
19325 "FROM_JSON".to_string(),
19326 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19327 ))))
19328 } else {
19329 Ok(e)
19330 }
19331 }
19332
19333 Action::ToJsonConvert => {
19334 // TO_JSON(x) -> target-specific conversion
19335 if let Expression::ToJson(f) = e {
19336 let arg = f.this;
19337 match target {
19338 DialectType::Presto | DialectType::Trino => {
19339 // JSON_FORMAT(CAST(x AS JSON))
19340 let cast_json = Expression::Cast(Box::new(Cast {
19341 this: arg,
19342 to: DataType::Custom {
19343 name: "JSON".to_string(),
19344 },
19345 trailing_comments: vec![],
19346 double_colon_syntax: false,
19347 format: None,
19348 default: None,
19349 }));
19350 Ok(Expression::Function(Box::new(Function::new(
19351 "JSON_FORMAT".to_string(),
19352 vec![cast_json],
19353 ))))
19354 }
19355 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19356 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19357 ))),
19358 DialectType::DuckDB => {
19359 // CAST(TO_JSON(x) AS TEXT)
19360 let to_json =
19361 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19362 this: arg,
19363 original_name: None,
19364 }));
19365 Ok(Expression::Cast(Box::new(Cast {
19366 this: to_json,
19367 to: DataType::Text,
19368 trailing_comments: vec![],
19369 double_colon_syntax: false,
19370 format: None,
19371 default: None,
19372 })))
19373 }
19374 _ => Ok(Expression::ToJson(Box::new(
19375 crate::expressions::UnaryFunc {
19376 this: arg,
19377 original_name: None,
19378 },
19379 ))),
19380 }
19381 } else {
19382 Ok(e)
19383 }
19384 }
19385
19386 Action::VarianceToClickHouse => {
19387 if let Expression::Variance(f) = e {
19388 Ok(Expression::Function(Box::new(Function::new(
19389 "varSamp".to_string(),
19390 vec![f.this],
19391 ))))
19392 } else {
19393 Ok(e)
19394 }
19395 }
19396
19397 Action::StddevToClickHouse => {
19398 if let Expression::Stddev(f) = e {
19399 Ok(Expression::Function(Box::new(Function::new(
19400 "stddevSamp".to_string(),
19401 vec![f.this],
19402 ))))
19403 } else {
19404 Ok(e)
19405 }
19406 }
19407
19408 Action::ApproxQuantileConvert => {
19409 if let Expression::ApproxQuantile(aq) = e {
19410 let mut args = vec![*aq.this];
19411 if let Some(q) = aq.quantile {
19412 args.push(*q);
19413 }
19414 Ok(Expression::Function(Box::new(Function::new(
19415 "APPROX_PERCENTILE".to_string(),
19416 args,
19417 ))))
19418 } else {
19419 Ok(e)
19420 }
19421 }
19422
19423 Action::DollarParamConvert => {
19424 if let Expression::Parameter(p) = e {
19425 Ok(Expression::Parameter(Box::new(
19426 crate::expressions::Parameter {
19427 name: p.name,
19428 index: p.index,
19429 style: crate::expressions::ParameterStyle::At,
19430 quoted: p.quoted,
19431 string_quoted: p.string_quoted,
19432 expression: p.expression,
19433 },
19434 )))
19435 } else {
19436 Ok(e)
19437 }
19438 }
19439
19440 Action::EscapeStringNormalize => {
19441 if let Expression::Literal(Literal::EscapeString(s)) = e {
19442 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19443 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19444 s[2..].to_string()
19445 } else {
19446 s
19447 };
19448 let normalized = stripped
19449 .replace('\n', "\\n")
19450 .replace('\r', "\\r")
19451 .replace('\t', "\\t");
19452 match target {
19453 DialectType::BigQuery => {
19454 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19455 // Use Raw for the b'...' part to avoid double-escaping
19456 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19457 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19458 }
19459 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19460 }
19461 } else {
19462 Ok(e)
19463 }
19464 }
19465
19466 Action::StraightJoinCase => {
19467 // straight_join: keep lowercase for DuckDB, quote for MySQL
19468 if let Expression::Column(col) = e {
19469 if col.name.name == "STRAIGHT_JOIN" {
19470 let mut new_col = col;
19471 new_col.name.name = "straight_join".to_string();
19472 if matches!(target, DialectType::MySQL) {
19473 // MySQL: needs quoting since it's a reserved keyword
19474 new_col.name.quoted = true;
19475 }
19476 Ok(Expression::Column(new_col))
19477 } else {
19478 Ok(Expression::Column(col))
19479 }
19480 } else {
19481 Ok(e)
19482 }
19483 }
19484
19485 Action::TablesampleReservoir => {
19486 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19487 if let Expression::TableSample(mut ts) = e {
19488 if let Some(ref mut sample) = ts.sample {
19489 sample.method = crate::expressions::SampleMethod::Reservoir;
19490 sample.explicit_method = true;
19491 }
19492 Ok(Expression::TableSample(ts))
19493 } else {
19494 Ok(e)
19495 }
19496 }
19497
19498 Action::TablesampleSnowflakeStrip => {
19499 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19500 match e {
19501 Expression::TableSample(mut ts) => {
19502 if let Some(ref mut sample) = ts.sample {
19503 sample.suppress_method_output = true;
19504 sample.unit_after_size = false;
19505 sample.is_percent = false;
19506 }
19507 Ok(Expression::TableSample(ts))
19508 }
19509 Expression::Table(mut t) => {
19510 if let Some(ref mut sample) = t.table_sample {
19511 sample.suppress_method_output = true;
19512 sample.unit_after_size = false;
19513 sample.is_percent = false;
19514 }
19515 Ok(Expression::Table(t))
19516 }
19517 _ => Ok(e),
19518 }
19519 }
19520
19521 Action::FirstToAnyValue => {
19522 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19523 if let Expression::First(mut agg) = e {
19524 agg.ignore_nulls = None;
19525 agg.name = Some("ANY_VALUE".to_string());
19526 Ok(Expression::AnyValue(agg))
19527 } else {
19528 Ok(e)
19529 }
19530 }
19531
19532 Action::ArrayIndexConvert => {
19533 // Subscript index: 1-based to 0-based for BigQuery
19534 if let Expression::Subscript(mut sub) = e {
19535 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19536 if let Ok(val) = n.parse::<i64>() {
19537 sub.index =
19538 Expression::Literal(Literal::Number((val - 1).to_string()));
19539 }
19540 }
19541 Ok(Expression::Subscript(sub))
19542 } else {
19543 Ok(e)
19544 }
19545 }
19546
19547 Action::AnyValueIgnoreNulls => {
19548 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19549 if let Expression::AnyValue(mut av) = e {
19550 if av.ignore_nulls.is_none() {
19551 av.ignore_nulls = Some(true);
19552 }
19553 Ok(Expression::AnyValue(av))
19554 } else {
19555 Ok(e)
19556 }
19557 }
19558
19559 Action::BigQueryNullsOrdering => {
19560 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19561 if let Expression::WindowFunction(mut wf) = e {
19562 for o in &mut wf.over.order_by {
19563 o.nulls_first = None;
19564 }
19565 Ok(Expression::WindowFunction(wf))
19566 } else if let Expression::Ordered(mut o) = e {
19567 o.nulls_first = None;
19568 Ok(Expression::Ordered(o))
19569 } else {
19570 Ok(e)
19571 }
19572 }
19573
19574 Action::SnowflakeFloatProtect => {
19575 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19576 // Snowflake's target transform from converting it to DOUBLE.
19577 // Non-Snowflake sources should keep their FLOAT spelling.
19578 if let Expression::DataType(DataType::Float { .. }) = e {
19579 Ok(Expression::DataType(DataType::Custom {
19580 name: "FLOAT".to_string(),
19581 }))
19582 } else {
19583 Ok(e)
19584 }
19585 }
19586
19587 Action::MysqlNullsOrdering => {
19588 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19589 if let Expression::Ordered(mut o) = e {
19590 let nulls_last = o.nulls_first == Some(false);
19591 let desc = o.desc;
19592 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19593 // If requested ordering matches default, just strip NULLS clause
19594 let matches_default = if desc {
19595 // DESC default is NULLS FIRST, so nulls_first=true matches
19596 o.nulls_first == Some(true)
19597 } else {
19598 // ASC default is NULLS LAST, so nulls_first=false matches
19599 nulls_last
19600 };
19601 if matches_default {
19602 o.nulls_first = None;
19603 Ok(Expression::Ordered(o))
19604 } else {
19605 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19606 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19607 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19608 let null_val = if desc { 1 } else { 0 };
19609 let non_null_val = if desc { 0 } else { 1 };
19610 let _case_expr = Expression::Case(Box::new(Case {
19611 operand: None,
19612 whens: vec![(
19613 Expression::IsNull(Box::new(crate::expressions::IsNull {
19614 this: o.this.clone(),
19615 not: false,
19616 postfix_form: false,
19617 })),
19618 Expression::number(null_val),
19619 )],
19620 else_: Some(Expression::number(non_null_val)),
19621 comments: Vec::new(),
19622 }));
19623 o.nulls_first = None;
19624 // Return a tuple of [case_expr, ordered_expr]
19625 // We need to return both as part of the ORDER BY
19626 // But since transform_recursive processes individual expressions,
19627 // we can't easily add extra ORDER BY items here.
19628 // Instead, strip the nulls_first
19629 o.nulls_first = None;
19630 Ok(Expression::Ordered(o))
19631 }
19632 } else {
19633 Ok(e)
19634 }
19635 }
19636
19637 Action::MysqlNullsLastRewrite => {
19638 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19639 // to simulate NULLS LAST for ASC ordering
19640 if let Expression::WindowFunction(mut wf) = e {
19641 let mut new_order_by = Vec::new();
19642 for o in wf.over.order_by {
19643 if !o.desc {
19644 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19645 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19646 let case_expr = Expression::Case(Box::new(Case {
19647 operand: None,
19648 whens: vec![(
19649 Expression::IsNull(Box::new(crate::expressions::IsNull {
19650 this: o.this.clone(),
19651 not: false,
19652 postfix_form: false,
19653 })),
19654 Expression::Literal(Literal::Number("1".to_string())),
19655 )],
19656 else_: Some(Expression::Literal(Literal::Number(
19657 "0".to_string(),
19658 ))),
19659 comments: Vec::new(),
19660 }));
19661 new_order_by.push(crate::expressions::Ordered {
19662 this: case_expr,
19663 desc: false,
19664 nulls_first: None,
19665 explicit_asc: false,
19666 with_fill: None,
19667 });
19668 let mut ordered = o;
19669 ordered.nulls_first = None;
19670 new_order_by.push(ordered);
19671 } else {
19672 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19673 // No change needed
19674 let mut ordered = o;
19675 ordered.nulls_first = None;
19676 new_order_by.push(ordered);
19677 }
19678 }
19679 wf.over.order_by = new_order_by;
19680 Ok(Expression::WindowFunction(wf))
19681 } else {
19682 Ok(e)
19683 }
19684 }
19685
19686 Action::RespectNullsConvert => {
19687 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19688 if let Expression::WindowFunction(mut wf) = e {
19689 match &mut wf.this {
19690 Expression::FirstValue(ref mut vf) => {
19691 if vf.ignore_nulls == Some(false) {
19692 vf.ignore_nulls = None;
19693 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19694 // but that's handled by the generator's NULLS ordering
19695 }
19696 }
19697 Expression::LastValue(ref mut vf) => {
19698 if vf.ignore_nulls == Some(false) {
19699 vf.ignore_nulls = None;
19700 }
19701 }
19702 _ => {}
19703 }
19704 Ok(Expression::WindowFunction(wf))
19705 } else {
19706 Ok(e)
19707 }
19708 }
19709
19710 Action::CreateTableStripComment => {
19711 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19712 if let Expression::CreateTable(mut ct) = e {
19713 for col in &mut ct.columns {
19714 col.comment = None;
19715 col.constraints.retain(|c| {
19716 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19717 });
19718 // Also remove Comment from constraint_order
19719 col.constraint_order.retain(|c| {
19720 !matches!(c, crate::expressions::ConstraintType::Comment)
19721 });
19722 }
19723 // Strip properties (USING, PARTITIONED BY, etc.)
19724 ct.properties.clear();
19725 Ok(Expression::CreateTable(ct))
19726 } else {
19727 Ok(e)
19728 }
19729 }
19730
19731 Action::AlterTableToSpRename => {
19732 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19733 if let Expression::AlterTable(ref at) = e {
19734 if let Some(crate::expressions::AlterTableAction::RenameTable(
19735 ref new_tbl,
19736 )) = at.actions.first()
19737 {
19738 // Build the old table name using TSQL bracket quoting
19739 let old_name = if let Some(ref schema) = at.name.schema {
19740 if at.name.name.quoted || schema.quoted {
19741 format!("[{}].[{}]", schema.name, at.name.name.name)
19742 } else {
19743 format!("{}.{}", schema.name, at.name.name.name)
19744 }
19745 } else {
19746 if at.name.name.quoted {
19747 format!("[{}]", at.name.name.name)
19748 } else {
19749 at.name.name.name.clone()
19750 }
19751 };
19752 let new_name = new_tbl.name.name.clone();
19753 // EXEC sp_rename 'old_name', 'new_name'
19754 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19755 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19756 } else {
19757 Ok(e)
19758 }
19759 } else {
19760 Ok(e)
19761 }
19762 }
19763
19764 Action::SnowflakeIntervalFormat => {
19765 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19766 if let Expression::Interval(mut iv) = e {
19767 if let (
19768 Some(Expression::Literal(Literal::String(ref val))),
19769 Some(ref unit_spec),
19770 ) = (&iv.this, &iv.unit)
19771 {
19772 let unit_str = match unit_spec {
19773 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19774 match unit {
19775 crate::expressions::IntervalUnit::Year => "YEAR",
19776 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19777 crate::expressions::IntervalUnit::Month => "MONTH",
19778 crate::expressions::IntervalUnit::Week => "WEEK",
19779 crate::expressions::IntervalUnit::Day => "DAY",
19780 crate::expressions::IntervalUnit::Hour => "HOUR",
19781 crate::expressions::IntervalUnit::Minute => "MINUTE",
19782 crate::expressions::IntervalUnit::Second => "SECOND",
19783 crate::expressions::IntervalUnit::Millisecond => {
19784 "MILLISECOND"
19785 }
19786 crate::expressions::IntervalUnit::Microsecond => {
19787 "MICROSECOND"
19788 }
19789 crate::expressions::IntervalUnit::Nanosecond => {
19790 "NANOSECOND"
19791 }
19792 }
19793 }
19794 _ => "",
19795 };
19796 if !unit_str.is_empty() {
19797 let combined = format!("{} {}", val, unit_str);
19798 iv.this = Some(Expression::Literal(Literal::String(combined)));
19799 iv.unit = None;
19800 }
19801 }
19802 Ok(Expression::Interval(iv))
19803 } else {
19804 Ok(e)
19805 }
19806 }
19807
19808 Action::ArrayConcatBracketConvert => {
19809 // Expression::Array/ArrayFunc -> target-specific
19810 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
19811 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
19812 match e {
19813 Expression::Array(arr) => {
19814 if matches!(target, DialectType::Redshift) {
19815 Ok(Expression::Function(Box::new(Function::new(
19816 "ARRAY".to_string(),
19817 arr.expressions,
19818 ))))
19819 } else {
19820 Ok(Expression::ArrayFunc(Box::new(
19821 crate::expressions::ArrayConstructor {
19822 expressions: arr.expressions,
19823 bracket_notation: false,
19824 use_list_keyword: false,
19825 },
19826 )))
19827 }
19828 }
19829 Expression::ArrayFunc(arr) => {
19830 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
19831 if matches!(target, DialectType::Redshift) {
19832 Ok(Expression::Function(Box::new(Function::new(
19833 "ARRAY".to_string(),
19834 arr.expressions,
19835 ))))
19836 } else {
19837 Ok(Expression::ArrayFunc(arr))
19838 }
19839 }
19840 _ => Ok(e),
19841 }
19842 }
19843
19844 Action::BitAggFloatCast => {
19845 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
19846 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19847 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19848 let int_type = DataType::Int {
19849 length: None,
19850 integer_spelling: false,
19851 };
19852 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
19853 if let Expression::Cast(c) = agg_this {
19854 match &c.to {
19855 DataType::Float { .. }
19856 | DataType::Double { .. }
19857 | DataType::Custom { .. } => {
19858 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19859 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
19860 let inner_type = match &c.to {
19861 DataType::Float {
19862 precision, scale, ..
19863 } => DataType::Float {
19864 precision: *precision,
19865 scale: *scale,
19866 real_spelling: true,
19867 },
19868 other => other.clone(),
19869 };
19870 let inner_cast =
19871 Expression::Cast(Box::new(crate::expressions::Cast {
19872 this: c.this.clone(),
19873 to: inner_type,
19874 trailing_comments: Vec::new(),
19875 double_colon_syntax: false,
19876 format: None,
19877 default: None,
19878 }));
19879 let rounded = Expression::Function(Box::new(Function::new(
19880 "ROUND".to_string(),
19881 vec![inner_cast],
19882 )));
19883 Expression::Cast(Box::new(crate::expressions::Cast {
19884 this: rounded,
19885 to: int_dt,
19886 trailing_comments: Vec::new(),
19887 double_colon_syntax: false,
19888 format: None,
19889 default: None,
19890 }))
19891 }
19892 DataType::Decimal { .. } => {
19893 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19894 Expression::Cast(Box::new(crate::expressions::Cast {
19895 this: Expression::Cast(c),
19896 to: int_dt,
19897 trailing_comments: Vec::new(),
19898 double_colon_syntax: false,
19899 format: None,
19900 default: None,
19901 }))
19902 }
19903 _ => Expression::Cast(c),
19904 }
19905 } else {
19906 agg_this
19907 }
19908 };
19909 match e {
19910 Expression::BitwiseOrAgg(mut f) => {
19911 f.this = wrap_agg(f.this, int_type);
19912 Ok(Expression::BitwiseOrAgg(f))
19913 }
19914 Expression::BitwiseAndAgg(mut f) => {
19915 let int_type = DataType::Int {
19916 length: None,
19917 integer_spelling: false,
19918 };
19919 f.this = wrap_agg(f.this, int_type);
19920 Ok(Expression::BitwiseAndAgg(f))
19921 }
19922 Expression::BitwiseXorAgg(mut f) => {
19923 let int_type = DataType::Int {
19924 length: None,
19925 integer_spelling: false,
19926 };
19927 f.this = wrap_agg(f.this, int_type);
19928 Ok(Expression::BitwiseXorAgg(f))
19929 }
19930 _ => Ok(e),
19931 }
19932 }
19933
19934 Action::BitAggSnowflakeRename => {
19935 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
19936 match e {
19937 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
19938 Function::new("BITORAGG".to_string(), vec![f.this]),
19939 ))),
19940 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
19941 Function::new("BITANDAGG".to_string(), vec![f.this]),
19942 ))),
19943 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
19944 Function::new("BITXORAGG".to_string(), vec![f.this]),
19945 ))),
19946 _ => Ok(e),
19947 }
19948 }
19949
19950 Action::StrftimeCastTimestamp => {
19951 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
19952 if let Expression::Cast(mut c) = e {
19953 if matches!(
19954 c.to,
19955 DataType::Timestamp {
19956 timezone: false,
19957 ..
19958 }
19959 ) {
19960 c.to = DataType::Custom {
19961 name: "TIMESTAMP_NTZ".to_string(),
19962 };
19963 }
19964 Ok(Expression::Cast(c))
19965 } else {
19966 Ok(e)
19967 }
19968 }
19969
19970 Action::DecimalDefaultPrecision => {
19971 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
19972 if let Expression::Cast(mut c) = e {
19973 if matches!(
19974 c.to,
19975 DataType::Decimal {
19976 precision: None,
19977 ..
19978 }
19979 ) {
19980 c.to = DataType::Decimal {
19981 precision: Some(18),
19982 scale: Some(3),
19983 };
19984 }
19985 Ok(Expression::Cast(c))
19986 } else {
19987 Ok(e)
19988 }
19989 }
19990
19991 Action::FilterToIff => {
19992 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
19993 if let Expression::Filter(f) = e {
19994 let condition = *f.expression;
19995 let agg = *f.this;
19996 // Strip WHERE from condition
19997 let cond = match condition {
19998 Expression::Where(w) => w.this,
19999 other => other,
20000 };
20001 // Extract the aggregate function and its argument
20002 // We want AVG(IFF(condition, x, NULL))
20003 match agg {
20004 Expression::Function(mut func) => {
20005 if !func.args.is_empty() {
20006 let orig_arg = func.args[0].clone();
20007 let iff_call = Expression::Function(Box::new(Function::new(
20008 "IFF".to_string(),
20009 vec![cond, orig_arg, Expression::Null(Null)],
20010 )));
20011 func.args[0] = iff_call;
20012 Ok(Expression::Function(func))
20013 } else {
20014 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20015 this: Box::new(Expression::Function(func)),
20016 expression: Box::new(cond),
20017 })))
20018 }
20019 }
20020 Expression::Avg(mut avg) => {
20021 let iff_call = Expression::Function(Box::new(Function::new(
20022 "IFF".to_string(),
20023 vec![cond, avg.this.clone(), Expression::Null(Null)],
20024 )));
20025 avg.this = iff_call;
20026 Ok(Expression::Avg(avg))
20027 }
20028 Expression::Sum(mut s) => {
20029 let iff_call = Expression::Function(Box::new(Function::new(
20030 "IFF".to_string(),
20031 vec![cond, s.this.clone(), Expression::Null(Null)],
20032 )));
20033 s.this = iff_call;
20034 Ok(Expression::Sum(s))
20035 }
20036 Expression::Count(mut c) => {
20037 if let Some(ref this_expr) = c.this {
20038 let iff_call = Expression::Function(Box::new(Function::new(
20039 "IFF".to_string(),
20040 vec![cond, this_expr.clone(), Expression::Null(Null)],
20041 )));
20042 c.this = Some(iff_call);
20043 }
20044 Ok(Expression::Count(c))
20045 }
20046 other => {
20047 // Fallback: keep as Filter
20048 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20049 this: Box::new(other),
20050 expression: Box::new(cond),
20051 })))
20052 }
20053 }
20054 } else {
20055 Ok(e)
20056 }
20057 }
20058
20059 Action::AggFilterToIff => {
20060 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
20061 // Helper macro to handle the common AggFunc case
20062 macro_rules! handle_agg_filter_to_iff {
20063 ($variant:ident, $agg:expr) => {{
20064 let mut agg = $agg;
20065 if let Some(filter_cond) = agg.filter.take() {
20066 let iff_call = Expression::Function(Box::new(Function::new(
20067 "IFF".to_string(),
20068 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
20069 )));
20070 agg.this = iff_call;
20071 }
20072 Ok(Expression::$variant(agg))
20073 }};
20074 }
20075
20076 match e {
20077 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
20078 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
20079 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
20080 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
20081 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
20082 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
20083 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
20084 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
20085 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
20086 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
20087 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
20088 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
20089 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
20090 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
20091 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
20092 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
20093 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
20094 Expression::ApproxDistinct(agg) => {
20095 handle_agg_filter_to_iff!(ApproxDistinct, agg)
20096 }
20097 Expression::Count(mut c) => {
20098 if let Some(filter_cond) = c.filter.take() {
20099 if let Some(ref this_expr) = c.this {
20100 let iff_call = Expression::Function(Box::new(Function::new(
20101 "IFF".to_string(),
20102 vec![
20103 filter_cond,
20104 this_expr.clone(),
20105 Expression::Null(Null),
20106 ],
20107 )));
20108 c.this = Some(iff_call);
20109 }
20110 }
20111 Ok(Expression::Count(c))
20112 }
20113 other => Ok(other),
20114 }
20115 }
20116
20117 Action::JsonToGetPath => {
20118 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
20119 if let Expression::JsonExtract(je) = e {
20120 // Convert to PARSE_JSON() wrapper:
20121 // - JSON(x) -> PARSE_JSON(x)
20122 // - PARSE_JSON(x) -> keep as-is
20123 // - anything else -> wrap in PARSE_JSON()
20124 let this = match &je.this {
20125 Expression::Function(f)
20126 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
20127 {
20128 Expression::Function(Box::new(Function::new(
20129 "PARSE_JSON".to_string(),
20130 f.args.clone(),
20131 )))
20132 }
20133 Expression::Function(f)
20134 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20135 {
20136 je.this.clone()
20137 }
20138 // GET_PATH result is already JSON, don't wrap
20139 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20140 je.this.clone()
20141 }
20142 other => {
20143 // Wrap non-JSON expressions in PARSE_JSON()
20144 Expression::Function(Box::new(Function::new(
20145 "PARSE_JSON".to_string(),
20146 vec![other.clone()],
20147 )))
20148 }
20149 };
20150 // Convert path: extract key from JSONPath or strip $. prefix from string
20151 let path = match &je.path {
20152 Expression::JSONPath(jp) => {
20153 // Extract the key from JSONPath: $root.key -> 'key'
20154 let mut key_parts = Vec::new();
20155 for expr in &jp.expressions {
20156 match expr {
20157 Expression::JSONPathRoot(_) => {} // skip root
20158 Expression::JSONPathKey(k) => {
20159 if let Expression::Literal(Literal::String(s)) =
20160 &*k.this
20161 {
20162 key_parts.push(s.clone());
20163 }
20164 }
20165 _ => {}
20166 }
20167 }
20168 if !key_parts.is_empty() {
20169 Expression::Literal(Literal::String(key_parts.join(".")))
20170 } else {
20171 je.path.clone()
20172 }
20173 }
20174 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20175 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20176 Expression::Literal(Literal::String(stripped))
20177 }
20178 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20179 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20180 Expression::Literal(Literal::String(stripped))
20181 }
20182 _ => je.path.clone(),
20183 };
20184 Ok(Expression::Function(Box::new(Function::new(
20185 "GET_PATH".to_string(),
20186 vec![this, path],
20187 ))))
20188 } else {
20189 Ok(e)
20190 }
20191 }
20192
20193 Action::StructToRow => {
20194 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20195 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20196
20197 // Extract key-value pairs from either Struct or MapFunc
20198 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20199 Expression::Struct(s) => Some(
20200 s.fields
20201 .iter()
20202 .map(|(opt_name, field_expr)| {
20203 if let Some(name) = opt_name {
20204 (name.clone(), field_expr.clone())
20205 } else if let Expression::NamedArgument(na) = field_expr {
20206 (na.name.name.clone(), na.value.clone())
20207 } else {
20208 (String::new(), field_expr.clone())
20209 }
20210 })
20211 .collect(),
20212 ),
20213 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20214 m.keys
20215 .iter()
20216 .zip(m.values.iter())
20217 .map(|(key, value)| {
20218 let key_name = match key {
20219 Expression::Literal(Literal::String(s)) => s.clone(),
20220 Expression::Identifier(id) => id.name.clone(),
20221 _ => String::new(),
20222 };
20223 (key_name, value.clone())
20224 })
20225 .collect(),
20226 ),
20227 _ => None,
20228 };
20229
20230 if let Some(pairs) = kv_pairs {
20231 let mut named_args = Vec::new();
20232 for (key_name, value) in pairs {
20233 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20234 named_args.push(Expression::Alias(Box::new(
20235 crate::expressions::Alias::new(
20236 value,
20237 Identifier::new(key_name),
20238 ),
20239 )));
20240 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20241 named_args.push(value);
20242 } else {
20243 named_args.push(value);
20244 }
20245 }
20246
20247 if matches!(target, DialectType::BigQuery) {
20248 Ok(Expression::Function(Box::new(Function::new(
20249 "STRUCT".to_string(),
20250 named_args,
20251 ))))
20252 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20253 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20254 let row_func = Expression::Function(Box::new(Function::new(
20255 "ROW".to_string(),
20256 named_args,
20257 )));
20258
20259 // Try to infer types for each pair
20260 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20261 Expression::Struct(s) => Some(
20262 s.fields
20263 .iter()
20264 .map(|(opt_name, field_expr)| {
20265 if let Some(name) = opt_name {
20266 (name.clone(), field_expr.clone())
20267 } else if let Expression::NamedArgument(na) = field_expr
20268 {
20269 (na.name.name.clone(), na.value.clone())
20270 } else {
20271 (String::new(), field_expr.clone())
20272 }
20273 })
20274 .collect(),
20275 ),
20276 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20277 m.keys
20278 .iter()
20279 .zip(m.values.iter())
20280 .map(|(key, value)| {
20281 let key_name = match key {
20282 Expression::Literal(Literal::String(s)) => {
20283 s.clone()
20284 }
20285 Expression::Identifier(id) => id.name.clone(),
20286 _ => String::new(),
20287 };
20288 (key_name, value.clone())
20289 })
20290 .collect(),
20291 ),
20292 _ => None,
20293 };
20294
20295 if let Some(pairs) = kv_pairs_again {
20296 // Infer types for all values
20297 let mut all_inferred = true;
20298 let mut fields = Vec::new();
20299 for (name, value) in &pairs {
20300 let inferred_type = match value {
20301 Expression::Literal(Literal::Number(n)) => {
20302 if n.contains('.') {
20303 Some(DataType::Double {
20304 precision: None,
20305 scale: None,
20306 })
20307 } else {
20308 Some(DataType::Int {
20309 length: None,
20310 integer_spelling: true,
20311 })
20312 }
20313 }
20314 Expression::Literal(Literal::String(_)) => {
20315 Some(DataType::VarChar {
20316 length: None,
20317 parenthesized_length: false,
20318 })
20319 }
20320 Expression::Boolean(_) => Some(DataType::Boolean),
20321 _ => None,
20322 };
20323 if let Some(dt) = inferred_type {
20324 fields.push(crate::expressions::StructField::new(
20325 name.clone(),
20326 dt,
20327 ));
20328 } else {
20329 all_inferred = false;
20330 break;
20331 }
20332 }
20333
20334 if all_inferred && !fields.is_empty() {
20335 let row_type = DataType::Struct {
20336 fields,
20337 nested: true,
20338 };
20339 Ok(Expression::Cast(Box::new(Cast {
20340 this: row_func,
20341 to: row_type,
20342 trailing_comments: Vec::new(),
20343 double_colon_syntax: false,
20344 format: None,
20345 default: None,
20346 })))
20347 } else {
20348 Ok(row_func)
20349 }
20350 } else {
20351 Ok(row_func)
20352 }
20353 } else {
20354 Ok(Expression::Function(Box::new(Function::new(
20355 "ROW".to_string(),
20356 named_args,
20357 ))))
20358 }
20359 } else {
20360 Ok(e)
20361 }
20362 }
20363
20364 Action::SparkStructConvert => {
20365 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20366 // or DuckDB {'name': val, ...}
20367 if let Expression::Function(f) = e {
20368 // Extract name-value pairs from aliased args
20369 let mut pairs: Vec<(String, Expression)> = Vec::new();
20370 for arg in &f.args {
20371 match arg {
20372 Expression::Alias(a) => {
20373 pairs.push((a.alias.name.clone(), a.this.clone()));
20374 }
20375 _ => {
20376 pairs.push((String::new(), arg.clone()));
20377 }
20378 }
20379 }
20380
20381 match target {
20382 DialectType::DuckDB => {
20383 // Convert to DuckDB struct literal {'name': value, ...}
20384 let mut keys = Vec::new();
20385 let mut values = Vec::new();
20386 for (name, value) in &pairs {
20387 keys.push(Expression::Literal(Literal::String(name.clone())));
20388 values.push(value.clone());
20389 }
20390 Ok(Expression::MapFunc(Box::new(
20391 crate::expressions::MapConstructor {
20392 keys,
20393 values,
20394 curly_brace_syntax: true,
20395 with_map_keyword: false,
20396 },
20397 )))
20398 }
20399 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20400 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20401 let row_args: Vec<Expression> =
20402 pairs.iter().map(|(_, v)| v.clone()).collect();
20403 let row_func = Expression::Function(Box::new(Function::new(
20404 "ROW".to_string(),
20405 row_args,
20406 )));
20407
20408 // Infer types
20409 let mut all_inferred = true;
20410 let mut fields = Vec::new();
20411 for (name, value) in &pairs {
20412 let inferred_type = match value {
20413 Expression::Literal(Literal::Number(n)) => {
20414 if n.contains('.') {
20415 Some(DataType::Double {
20416 precision: None,
20417 scale: None,
20418 })
20419 } else {
20420 Some(DataType::Int {
20421 length: None,
20422 integer_spelling: true,
20423 })
20424 }
20425 }
20426 Expression::Literal(Literal::String(_)) => {
20427 Some(DataType::VarChar {
20428 length: None,
20429 parenthesized_length: false,
20430 })
20431 }
20432 Expression::Boolean(_) => Some(DataType::Boolean),
20433 _ => None,
20434 };
20435 if let Some(dt) = inferred_type {
20436 fields.push(crate::expressions::StructField::new(
20437 name.clone(),
20438 dt,
20439 ));
20440 } else {
20441 all_inferred = false;
20442 break;
20443 }
20444 }
20445
20446 if all_inferred && !fields.is_empty() {
20447 let row_type = DataType::Struct {
20448 fields,
20449 nested: true,
20450 };
20451 Ok(Expression::Cast(Box::new(Cast {
20452 this: row_func,
20453 to: row_type,
20454 trailing_comments: Vec::new(),
20455 double_colon_syntax: false,
20456 format: None,
20457 default: None,
20458 })))
20459 } else {
20460 Ok(row_func)
20461 }
20462 }
20463 _ => Ok(Expression::Function(f)),
20464 }
20465 } else {
20466 Ok(e)
20467 }
20468 }
20469
20470 Action::ApproxCountDistinctToApproxDistinct => {
20471 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20472 if let Expression::ApproxCountDistinct(f) = e {
20473 Ok(Expression::ApproxDistinct(f))
20474 } else {
20475 Ok(e)
20476 }
20477 }
20478
20479 Action::CollectListToArrayAgg => {
20480 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20481 if let Expression::AggregateFunction(f) = e {
20482 let filter_expr = if !f.args.is_empty() {
20483 let arg = f.args[0].clone();
20484 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20485 this: arg,
20486 not: true,
20487 postfix_form: false,
20488 })))
20489 } else {
20490 None
20491 };
20492 let agg = crate::expressions::AggFunc {
20493 this: if f.args.is_empty() {
20494 Expression::Null(crate::expressions::Null)
20495 } else {
20496 f.args[0].clone()
20497 },
20498 distinct: f.distinct,
20499 order_by: f.order_by.clone(),
20500 filter: filter_expr,
20501 ignore_nulls: None,
20502 name: None,
20503 having_max: None,
20504 limit: None,
20505 };
20506 Ok(Expression::ArrayAgg(Box::new(agg)))
20507 } else {
20508 Ok(e)
20509 }
20510 }
20511
20512 Action::CollectSetConvert => {
20513 // COLLECT_SET(x) -> target-specific
20514 if let Expression::AggregateFunction(f) = e {
20515 match target {
20516 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20517 crate::expressions::AggregateFunction {
20518 name: "SET_AGG".to_string(),
20519 args: f.args,
20520 distinct: false,
20521 order_by: f.order_by,
20522 filter: f.filter,
20523 limit: f.limit,
20524 ignore_nulls: f.ignore_nulls,
20525 },
20526 ))),
20527 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20528 crate::expressions::AggregateFunction {
20529 name: "ARRAY_UNIQUE_AGG".to_string(),
20530 args: f.args,
20531 distinct: false,
20532 order_by: f.order_by,
20533 filter: f.filter,
20534 limit: f.limit,
20535 ignore_nulls: f.ignore_nulls,
20536 },
20537 ))),
20538 DialectType::Trino | DialectType::DuckDB => {
20539 let agg = crate::expressions::AggFunc {
20540 this: if f.args.is_empty() {
20541 Expression::Null(crate::expressions::Null)
20542 } else {
20543 f.args[0].clone()
20544 },
20545 distinct: true,
20546 order_by: Vec::new(),
20547 filter: None,
20548 ignore_nulls: None,
20549 name: None,
20550 having_max: None,
20551 limit: None,
20552 };
20553 Ok(Expression::ArrayAgg(Box::new(agg)))
20554 }
20555 _ => Ok(Expression::AggregateFunction(f)),
20556 }
20557 } else {
20558 Ok(e)
20559 }
20560 }
20561
20562 Action::PercentileConvert => {
20563 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20564 if let Expression::AggregateFunction(f) = e {
20565 let name = match target {
20566 DialectType::DuckDB => "QUANTILE",
20567 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20568 _ => "PERCENTILE",
20569 };
20570 Ok(Expression::AggregateFunction(Box::new(
20571 crate::expressions::AggregateFunction {
20572 name: name.to_string(),
20573 args: f.args,
20574 distinct: f.distinct,
20575 order_by: f.order_by,
20576 filter: f.filter,
20577 limit: f.limit,
20578 ignore_nulls: f.ignore_nulls,
20579 },
20580 )))
20581 } else {
20582 Ok(e)
20583 }
20584 }
20585
20586 Action::CorrIsnanWrap => {
20587 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20588 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20589 let corr_clone = e.clone();
20590 let isnan = Expression::Function(Box::new(Function::new(
20591 "ISNAN".to_string(),
20592 vec![corr_clone.clone()],
20593 )));
20594 let case_expr = Expression::Case(Box::new(Case {
20595 operand: None,
20596 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20597 else_: Some(corr_clone),
20598 comments: Vec::new(),
20599 }));
20600 Ok(case_expr)
20601 }
20602
20603 Action::TruncToDateTrunc => {
20604 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20605 if let Expression::Function(f) = e {
20606 if f.args.len() == 2 {
20607 let timestamp = f.args[0].clone();
20608 let unit_expr = f.args[1].clone();
20609
20610 if matches!(target, DialectType::ClickHouse) {
20611 // For ClickHouse, produce Expression::DateTrunc which the generator
20612 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20613 // target transform that would convert it to dateTrunc
20614 let unit_str = Self::get_unit_str_static(&unit_expr);
20615 let dt_field = match unit_str.as_str() {
20616 "YEAR" => DateTimeField::Year,
20617 "MONTH" => DateTimeField::Month,
20618 "DAY" => DateTimeField::Day,
20619 "HOUR" => DateTimeField::Hour,
20620 "MINUTE" => DateTimeField::Minute,
20621 "SECOND" => DateTimeField::Second,
20622 "WEEK" => DateTimeField::Week,
20623 "QUARTER" => DateTimeField::Quarter,
20624 _ => DateTimeField::Custom(unit_str),
20625 };
20626 Ok(Expression::DateTrunc(Box::new(
20627 crate::expressions::DateTruncFunc {
20628 this: timestamp,
20629 unit: dt_field,
20630 },
20631 )))
20632 } else {
20633 let new_args = vec![unit_expr, timestamp];
20634 Ok(Expression::Function(Box::new(Function::new(
20635 "DATE_TRUNC".to_string(),
20636 new_args,
20637 ))))
20638 }
20639 } else {
20640 Ok(Expression::Function(f))
20641 }
20642 } else {
20643 Ok(e)
20644 }
20645 }
20646
20647 Action::ArrayContainsConvert => {
20648 if let Expression::ArrayContains(f) = e {
20649 match target {
20650 DialectType::Presto | DialectType::Trino => {
20651 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20652 Ok(Expression::Function(Box::new(Function::new(
20653 "CONTAINS".to_string(),
20654 vec![f.this, f.expression],
20655 ))))
20656 }
20657 DialectType::Snowflake => {
20658 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20659 let cast_val =
20660 Expression::Cast(Box::new(crate::expressions::Cast {
20661 this: f.expression,
20662 to: crate::expressions::DataType::Custom {
20663 name: "VARIANT".to_string(),
20664 },
20665 trailing_comments: Vec::new(),
20666 double_colon_syntax: false,
20667 format: None,
20668 default: None,
20669 }));
20670 Ok(Expression::Function(Box::new(Function::new(
20671 "ARRAY_CONTAINS".to_string(),
20672 vec![cast_val, f.this],
20673 ))))
20674 }
20675 _ => Ok(Expression::ArrayContains(f)),
20676 }
20677 } else {
20678 Ok(e)
20679 }
20680 }
20681
20682 Action::StrPositionExpand => {
20683 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20684 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20685 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20686 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20687 if let Expression::StrPosition(sp) = e {
20688 let crate::expressions::StrPosition {
20689 this,
20690 substr,
20691 position,
20692 occurrence,
20693 } = *sp;
20694 let string = *this;
20695 let substr_expr = match substr {
20696 Some(s) => *s,
20697 None => Expression::Null(Null),
20698 };
20699 let pos = match position {
20700 Some(p) => *p,
20701 None => Expression::number(1),
20702 };
20703
20704 // SUBSTRING(string, pos)
20705 let substring_call = Expression::Function(Box::new(Function::new(
20706 "SUBSTRING".to_string(),
20707 vec![string.clone(), pos.clone()],
20708 )));
20709 // STRPOS(SUBSTRING(string, pos), substr)
20710 let strpos_call = Expression::Function(Box::new(Function::new(
20711 "STRPOS".to_string(),
20712 vec![substring_call, substr_expr.clone()],
20713 )));
20714 // STRPOS(...) + pos - 1
20715 let pos_adjusted =
20716 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20717 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20718 strpos_call.clone(),
20719 pos.clone(),
20720 ))),
20721 Expression::number(1),
20722 )));
20723 // STRPOS(...) = 0
20724 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20725 strpos_call.clone(),
20726 Expression::number(0),
20727 )));
20728
20729 match target {
20730 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20731 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20732 Ok(Expression::Function(Box::new(Function::new(
20733 "IF".to_string(),
20734 vec![is_zero, Expression::number(0), pos_adjusted],
20735 ))))
20736 }
20737 DialectType::DuckDB => {
20738 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20739 Ok(Expression::Case(Box::new(Case {
20740 operand: None,
20741 whens: vec![(is_zero, Expression::number(0))],
20742 else_: Some(pos_adjusted),
20743 comments: Vec::new(),
20744 })))
20745 }
20746 _ => {
20747 // Reconstruct StrPosition
20748 Ok(Expression::StrPosition(Box::new(
20749 crate::expressions::StrPosition {
20750 this: Box::new(string),
20751 substr: Some(Box::new(substr_expr)),
20752 position: Some(Box::new(pos)),
20753 occurrence,
20754 },
20755 )))
20756 }
20757 }
20758 } else {
20759 Ok(e)
20760 }
20761 }
20762
20763 Action::MonthsBetweenConvert => {
20764 if let Expression::MonthsBetween(mb) = e {
20765 let crate::expressions::BinaryFunc {
20766 this: end_date,
20767 expression: start_date,
20768 ..
20769 } = *mb;
20770 match target {
20771 DialectType::DuckDB => {
20772 let cast_end = Self::ensure_cast_date(end_date);
20773 let cast_start = Self::ensure_cast_date(start_date);
20774 let dd = Expression::Function(Box::new(Function::new(
20775 "DATE_DIFF".to_string(),
20776 vec![
20777 Expression::string("MONTH"),
20778 cast_start.clone(),
20779 cast_end.clone(),
20780 ],
20781 )));
20782 let day_end = Expression::Function(Box::new(Function::new(
20783 "DAY".to_string(),
20784 vec![cast_end.clone()],
20785 )));
20786 let day_start = Expression::Function(Box::new(Function::new(
20787 "DAY".to_string(),
20788 vec![cast_start.clone()],
20789 )));
20790 let last_day_end = Expression::Function(Box::new(Function::new(
20791 "LAST_DAY".to_string(),
20792 vec![cast_end.clone()],
20793 )));
20794 let last_day_start = Expression::Function(Box::new(Function::new(
20795 "LAST_DAY".to_string(),
20796 vec![cast_start.clone()],
20797 )));
20798 let day_last_end = Expression::Function(Box::new(Function::new(
20799 "DAY".to_string(),
20800 vec![last_day_end],
20801 )));
20802 let day_last_start = Expression::Function(Box::new(Function::new(
20803 "DAY".to_string(),
20804 vec![last_day_start],
20805 )));
20806 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20807 day_end.clone(),
20808 day_last_end,
20809 )));
20810 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20811 day_start.clone(),
20812 day_last_start,
20813 )));
20814 let both_cond =
20815 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20816 let day_diff =
20817 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
20818 let day_diff_paren =
20819 Expression::Paren(Box::new(crate::expressions::Paren {
20820 this: day_diff,
20821 trailing_comments: Vec::new(),
20822 }));
20823 let frac = Expression::Div(Box::new(BinaryOp::new(
20824 day_diff_paren,
20825 Expression::Literal(Literal::Number("31.0".to_string())),
20826 )));
20827 let case_expr = Expression::Case(Box::new(Case {
20828 operand: None,
20829 whens: vec![(both_cond, Expression::number(0))],
20830 else_: Some(frac),
20831 comments: Vec::new(),
20832 }));
20833 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20834 }
20835 DialectType::Snowflake | DialectType::Redshift => {
20836 let unit = Expression::Identifier(Identifier::new("MONTH"));
20837 Ok(Expression::Function(Box::new(Function::new(
20838 "DATEDIFF".to_string(),
20839 vec![unit, start_date, end_date],
20840 ))))
20841 }
20842 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20843 Ok(Expression::Function(Box::new(Function::new(
20844 "DATE_DIFF".to_string(),
20845 vec![Expression::string("MONTH"), start_date, end_date],
20846 ))))
20847 }
20848 _ => Ok(Expression::MonthsBetween(Box::new(
20849 crate::expressions::BinaryFunc {
20850 this: end_date,
20851 expression: start_date,
20852 original_name: None,
20853 },
20854 ))),
20855 }
20856 } else {
20857 Ok(e)
20858 }
20859 }
20860
20861 Action::AddMonthsConvert => {
20862 if let Expression::AddMonths(am) = e {
20863 let date = am.this;
20864 let val = am.expression;
20865 match target {
20866 DialectType::TSQL | DialectType::Fabric => {
20867 let cast_date = Self::ensure_cast_datetime2(date);
20868 Ok(Expression::Function(Box::new(Function::new(
20869 "DATEADD".to_string(),
20870 vec![
20871 Expression::Identifier(Identifier::new("MONTH")),
20872 val,
20873 cast_date,
20874 ],
20875 ))))
20876 }
20877 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
20878 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20879 // Optionally wrapped in CAST(... AS type) if the input had a specific type
20880
20881 // Determine the cast type from the date expression
20882 let (cast_date, return_type) = match &date {
20883 Expression::Literal(Literal::String(_)) => {
20884 // String literal: CAST(str AS TIMESTAMP), no outer CAST
20885 (
20886 Expression::Cast(Box::new(Cast {
20887 this: date.clone(),
20888 to: DataType::Timestamp {
20889 precision: None,
20890 timezone: false,
20891 },
20892 trailing_comments: Vec::new(),
20893 double_colon_syntax: false,
20894 format: None,
20895 default: None,
20896 })),
20897 None,
20898 )
20899 }
20900 Expression::Cast(c) => {
20901 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
20902 (date.clone(), Some(c.to.clone()))
20903 }
20904 _ => {
20905 // Expression or NULL::TYPE - keep as-is, check for cast type
20906 if let Expression::Cast(c) = &date {
20907 (date.clone(), Some(c.to.clone()))
20908 } else {
20909 (date.clone(), None)
20910 }
20911 }
20912 };
20913
20914 // Build the interval expression
20915 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
20916 // For integer values, use INTERVAL val MONTH
20917 let is_non_integer_val = match &val {
20918 Expression::Literal(Literal::Number(n)) => n.contains('.'),
20919 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
20920 Expression::Neg(n) => {
20921 if let Expression::Literal(Literal::Number(s)) = &n.this {
20922 s.contains('.')
20923 } else {
20924 false
20925 }
20926 }
20927 _ => false,
20928 };
20929
20930 let add_interval = if is_non_integer_val {
20931 // TO_MONTHS(CAST(ROUND(val) AS INT))
20932 let round_val = Expression::Function(Box::new(Function::new(
20933 "ROUND".to_string(),
20934 vec![val.clone()],
20935 )));
20936 let cast_int = Expression::Cast(Box::new(Cast {
20937 this: round_val,
20938 to: DataType::Int {
20939 length: None,
20940 integer_spelling: false,
20941 },
20942 trailing_comments: Vec::new(),
20943 double_colon_syntax: false,
20944 format: None,
20945 default: None,
20946 }));
20947 Expression::Function(Box::new(Function::new(
20948 "TO_MONTHS".to_string(),
20949 vec![cast_int],
20950 )))
20951 } else {
20952 // INTERVAL val MONTH
20953 // For negative numbers, wrap in parens
20954 let interval_val = match &val {
20955 Expression::Literal(Literal::Number(n))
20956 if n.starts_with('-') =>
20957 {
20958 Expression::Paren(Box::new(Paren {
20959 this: val.clone(),
20960 trailing_comments: Vec::new(),
20961 }))
20962 }
20963 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
20964 this: val.clone(),
20965 trailing_comments: Vec::new(),
20966 })),
20967 Expression::Null(_) => Expression::Paren(Box::new(Paren {
20968 this: val.clone(),
20969 trailing_comments: Vec::new(),
20970 })),
20971 _ => val.clone(),
20972 };
20973 Expression::Interval(Box::new(crate::expressions::Interval {
20974 this: Some(interval_val),
20975 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20976 unit: crate::expressions::IntervalUnit::Month,
20977 use_plural: false,
20978 }),
20979 }))
20980 };
20981
20982 // Build: date + interval
20983 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
20984 cast_date.clone(),
20985 add_interval.clone(),
20986 )));
20987
20988 // Build LAST_DAY(date)
20989 let last_day_date = Expression::Function(Box::new(Function::new(
20990 "LAST_DAY".to_string(),
20991 vec![cast_date.clone()],
20992 )));
20993
20994 // Build LAST_DAY(date + interval)
20995 let last_day_date_plus =
20996 Expression::Function(Box::new(Function::new(
20997 "LAST_DAY".to_string(),
20998 vec![date_plus_interval.clone()],
20999 )));
21000
21001 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
21002 let case_expr = Expression::Case(Box::new(Case {
21003 operand: None,
21004 whens: vec![(
21005 Expression::Eq(Box::new(BinaryOp::new(
21006 last_day_date,
21007 cast_date.clone(),
21008 ))),
21009 last_day_date_plus,
21010 )],
21011 else_: Some(date_plus_interval),
21012 comments: Vec::new(),
21013 }));
21014
21015 // Wrap in CAST(... AS type) if needed
21016 if let Some(dt) = return_type {
21017 Ok(Expression::Cast(Box::new(Cast {
21018 this: case_expr,
21019 to: dt,
21020 trailing_comments: Vec::new(),
21021 double_colon_syntax: false,
21022 format: None,
21023 default: None,
21024 })))
21025 } else {
21026 Ok(case_expr)
21027 }
21028 }
21029 DialectType::DuckDB => {
21030 // Non-Snowflake source: simple date + INTERVAL
21031 let cast_date =
21032 if matches!(&date, Expression::Literal(Literal::String(_))) {
21033 Expression::Cast(Box::new(Cast {
21034 this: date,
21035 to: DataType::Timestamp {
21036 precision: None,
21037 timezone: false,
21038 },
21039 trailing_comments: Vec::new(),
21040 double_colon_syntax: false,
21041 format: None,
21042 default: None,
21043 }))
21044 } else {
21045 date
21046 };
21047 let interval =
21048 Expression::Interval(Box::new(crate::expressions::Interval {
21049 this: Some(val),
21050 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21051 unit: crate::expressions::IntervalUnit::Month,
21052 use_plural: false,
21053 }),
21054 }));
21055 Ok(Expression::Add(Box::new(BinaryOp::new(
21056 cast_date, interval,
21057 ))))
21058 }
21059 DialectType::Snowflake => {
21060 // Keep ADD_MONTHS when source is also Snowflake
21061 if matches!(source, DialectType::Snowflake) {
21062 Ok(Expression::Function(Box::new(Function::new(
21063 "ADD_MONTHS".to_string(),
21064 vec![date, val],
21065 ))))
21066 } else {
21067 Ok(Expression::Function(Box::new(Function::new(
21068 "DATEADD".to_string(),
21069 vec![
21070 Expression::Identifier(Identifier::new("MONTH")),
21071 val,
21072 date,
21073 ],
21074 ))))
21075 }
21076 }
21077 DialectType::Redshift => {
21078 Ok(Expression::Function(Box::new(Function::new(
21079 "DATEADD".to_string(),
21080 vec![
21081 Expression::Identifier(Identifier::new("MONTH")),
21082 val,
21083 date,
21084 ],
21085 ))))
21086 }
21087 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21088 let cast_date =
21089 if matches!(&date, Expression::Literal(Literal::String(_))) {
21090 Expression::Cast(Box::new(Cast {
21091 this: date,
21092 to: DataType::Timestamp {
21093 precision: None,
21094 timezone: false,
21095 },
21096 trailing_comments: Vec::new(),
21097 double_colon_syntax: false,
21098 format: None,
21099 default: None,
21100 }))
21101 } else {
21102 date
21103 };
21104 Ok(Expression::Function(Box::new(Function::new(
21105 "DATE_ADD".to_string(),
21106 vec![Expression::string("MONTH"), val, cast_date],
21107 ))))
21108 }
21109 DialectType::BigQuery => {
21110 let interval =
21111 Expression::Interval(Box::new(crate::expressions::Interval {
21112 this: Some(val),
21113 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21114 unit: crate::expressions::IntervalUnit::Month,
21115 use_plural: false,
21116 }),
21117 }));
21118 let cast_date =
21119 if matches!(&date, Expression::Literal(Literal::String(_))) {
21120 Expression::Cast(Box::new(Cast {
21121 this: date,
21122 to: DataType::Custom {
21123 name: "DATETIME".to_string(),
21124 },
21125 trailing_comments: Vec::new(),
21126 double_colon_syntax: false,
21127 format: None,
21128 default: None,
21129 }))
21130 } else {
21131 date
21132 };
21133 Ok(Expression::Function(Box::new(Function::new(
21134 "DATE_ADD".to_string(),
21135 vec![cast_date, interval],
21136 ))))
21137 }
21138 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21139 Ok(Expression::Function(Box::new(Function::new(
21140 "ADD_MONTHS".to_string(),
21141 vec![date, val],
21142 ))))
21143 }
21144 _ => {
21145 // Default: keep as AddMonths expression
21146 Ok(Expression::AddMonths(Box::new(
21147 crate::expressions::BinaryFunc {
21148 this: date,
21149 expression: val,
21150 original_name: None,
21151 },
21152 )))
21153 }
21154 }
21155 } else {
21156 Ok(e)
21157 }
21158 }
21159
21160 Action::PercentileContConvert => {
21161 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21162 // Presto/Trino: APPROX_PERCENTILE(col, p)
21163 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21164 if let Expression::WithinGroup(wg) = e {
21165 // Extract percentile value and order by column
21166 let (percentile, _is_disc) = match &wg.this {
21167 Expression::Function(f) => {
21168 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21169 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21170 Literal::Number("0.5".to_string()),
21171 ));
21172 (pct, is_disc)
21173 }
21174 Expression::AggregateFunction(af) => {
21175 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21176 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21177 Literal::Number("0.5".to_string()),
21178 ));
21179 (pct, is_disc)
21180 }
21181 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21182 _ => return Ok(Expression::WithinGroup(wg)),
21183 };
21184 let col = wg
21185 .order_by
21186 .first()
21187 .map(|o| o.this.clone())
21188 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21189
21190 let func_name = match target {
21191 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21192 "APPROX_PERCENTILE"
21193 }
21194 _ => "PERCENTILE_APPROX", // Spark, Databricks
21195 };
21196 Ok(Expression::Function(Box::new(Function::new(
21197 func_name.to_string(),
21198 vec![col, percentile],
21199 ))))
21200 } else {
21201 Ok(e)
21202 }
21203 }
21204
21205 Action::CurrentUserSparkParens => {
21206 // CURRENT_USER -> CURRENT_USER() for Spark
21207 if let Expression::CurrentUser(_) = e {
21208 Ok(Expression::Function(Box::new(Function::new(
21209 "CURRENT_USER".to_string(),
21210 vec![],
21211 ))))
21212 } else {
21213 Ok(e)
21214 }
21215 }
21216
21217 Action::SparkDateFuncCast => {
21218 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21219 let cast_arg = |arg: Expression| -> Expression {
21220 match target {
21221 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21222 Self::double_cast_timestamp_date(arg)
21223 }
21224 _ => {
21225 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21226 Self::ensure_cast_date(arg)
21227 }
21228 }
21229 };
21230 match e {
21231 Expression::Month(f) => Ok(Expression::Month(Box::new(
21232 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21233 ))),
21234 Expression::Year(f) => Ok(Expression::Year(Box::new(
21235 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21236 ))),
21237 Expression::Day(f) => Ok(Expression::Day(Box::new(
21238 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21239 ))),
21240 other => Ok(other),
21241 }
21242 }
21243
21244 Action::MapFromArraysConvert => {
21245 // Expression::MapFromArrays -> target-specific
21246 if let Expression::MapFromArrays(mfa) = e {
21247 let keys = mfa.this;
21248 let values = mfa.expression;
21249 match target {
21250 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21251 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21252 ))),
21253 _ => {
21254 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21255 Ok(Expression::Function(Box::new(Function::new(
21256 "MAP".to_string(),
21257 vec![keys, values],
21258 ))))
21259 }
21260 }
21261 } else {
21262 Ok(e)
21263 }
21264 }
21265
21266 Action::AnyToExists => {
21267 if let Expression::Any(q) = e {
21268 if let Some(op) = q.op.clone() {
21269 let lambda_param = crate::expressions::Identifier::new("x");
21270 let rhs = Expression::Identifier(lambda_param.clone());
21271 let body = match op {
21272 crate::expressions::QuantifiedOp::Eq => {
21273 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21274 }
21275 crate::expressions::QuantifiedOp::Neq => {
21276 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21277 }
21278 crate::expressions::QuantifiedOp::Lt => {
21279 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21280 }
21281 crate::expressions::QuantifiedOp::Lte => {
21282 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21283 }
21284 crate::expressions::QuantifiedOp::Gt => {
21285 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21286 }
21287 crate::expressions::QuantifiedOp::Gte => {
21288 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21289 }
21290 };
21291 let lambda =
21292 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21293 parameters: vec![lambda_param],
21294 body,
21295 colon: false,
21296 parameter_types: Vec::new(),
21297 }));
21298 Ok(Expression::Function(Box::new(Function::new(
21299 "EXISTS".to_string(),
21300 vec![q.subquery, lambda],
21301 ))))
21302 } else {
21303 Ok(Expression::Any(q))
21304 }
21305 } else {
21306 Ok(e)
21307 }
21308 }
21309
21310 Action::GenerateSeriesConvert => {
21311 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21312 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21313 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21314 if let Expression::Function(f) = e {
21315 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21316 let start = f.args[0].clone();
21317 let end = f.args[1].clone();
21318 let step = f.args.get(2).cloned();
21319
21320 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21321 let step = step.map(|s| Self::normalize_interval_string(s, target));
21322
21323 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21324 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21325 if matches!(
21326 target,
21327 DialectType::Presto
21328 | DialectType::Trino
21329 | DialectType::Athena
21330 | DialectType::Spark
21331 | DialectType::Databricks
21332 | DialectType::Hive
21333 ) {
21334 match &arg {
21335 Expression::CurrentTimestamp(_) => {
21336 Expression::Cast(Box::new(Cast {
21337 this: arg,
21338 to: DataType::Timestamp {
21339 precision: None,
21340 timezone: false,
21341 },
21342 trailing_comments: Vec::new(),
21343 double_colon_syntax: false,
21344 format: None,
21345 default: None,
21346 }))
21347 }
21348 _ => arg,
21349 }
21350 } else {
21351 arg
21352 }
21353 };
21354
21355 let start = maybe_cast_timestamp(start);
21356 let end = maybe_cast_timestamp(end);
21357
21358 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21359 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21360 let mut gs_args = vec![start, end];
21361 if let Some(step) = step {
21362 gs_args.push(step);
21363 }
21364 return Ok(Expression::Function(Box::new(Function::new(
21365 "GENERATE_SERIES".to_string(),
21366 gs_args,
21367 ))));
21368 }
21369
21370 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21371 if matches!(target, DialectType::DuckDB) {
21372 let mut gs_args = vec![start, end];
21373 if let Some(step) = step {
21374 gs_args.push(step);
21375 }
21376 let gs = Expression::Function(Box::new(Function::new(
21377 "GENERATE_SERIES".to_string(),
21378 gs_args,
21379 )));
21380 return Ok(Expression::Function(Box::new(Function::new(
21381 "UNNEST".to_string(),
21382 vec![gs],
21383 ))));
21384 }
21385
21386 let mut seq_args = vec![start, end];
21387 if let Some(step) = step {
21388 seq_args.push(step);
21389 }
21390
21391 let seq = Expression::Function(Box::new(Function::new(
21392 "SEQUENCE".to_string(),
21393 seq_args,
21394 )));
21395
21396 match target {
21397 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21398 // Wrap in UNNEST
21399 Ok(Expression::Function(Box::new(Function::new(
21400 "UNNEST".to_string(),
21401 vec![seq],
21402 ))))
21403 }
21404 DialectType::Spark
21405 | DialectType::Databricks
21406 | DialectType::Hive => {
21407 // Wrap in EXPLODE
21408 Ok(Expression::Function(Box::new(Function::new(
21409 "EXPLODE".to_string(),
21410 vec![seq],
21411 ))))
21412 }
21413 _ => {
21414 // Just SEQUENCE for others
21415 Ok(seq)
21416 }
21417 }
21418 } else {
21419 Ok(Expression::Function(f))
21420 }
21421 } else {
21422 Ok(e)
21423 }
21424 }
21425
21426 Action::ConcatCoalesceWrap => {
21427 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21428 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21429 if let Expression::Function(f) = e {
21430 if f.name.eq_ignore_ascii_case("CONCAT") {
21431 let new_args: Vec<Expression> = f
21432 .args
21433 .into_iter()
21434 .map(|arg| {
21435 let cast_arg = if matches!(
21436 target,
21437 DialectType::Presto
21438 | DialectType::Trino
21439 | DialectType::Athena
21440 ) {
21441 Expression::Cast(Box::new(Cast {
21442 this: arg,
21443 to: DataType::VarChar {
21444 length: None,
21445 parenthesized_length: false,
21446 },
21447 trailing_comments: Vec::new(),
21448 double_colon_syntax: false,
21449 format: None,
21450 default: None,
21451 }))
21452 } else {
21453 arg
21454 };
21455 Expression::Function(Box::new(Function::new(
21456 "COALESCE".to_string(),
21457 vec![cast_arg, Expression::string("")],
21458 )))
21459 })
21460 .collect();
21461 Ok(Expression::Function(Box::new(Function::new(
21462 "CONCAT".to_string(),
21463 new_args,
21464 ))))
21465 } else {
21466 Ok(Expression::Function(f))
21467 }
21468 } else {
21469 Ok(e)
21470 }
21471 }
21472
21473 Action::PipeConcatToConcat => {
21474 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21475 if let Expression::Concat(op) = e {
21476 let cast_left = Expression::Cast(Box::new(Cast {
21477 this: op.left,
21478 to: DataType::VarChar {
21479 length: None,
21480 parenthesized_length: false,
21481 },
21482 trailing_comments: Vec::new(),
21483 double_colon_syntax: false,
21484 format: None,
21485 default: None,
21486 }));
21487 let cast_right = Expression::Cast(Box::new(Cast {
21488 this: op.right,
21489 to: DataType::VarChar {
21490 length: None,
21491 parenthesized_length: false,
21492 },
21493 trailing_comments: Vec::new(),
21494 double_colon_syntax: false,
21495 format: None,
21496 default: None,
21497 }));
21498 Ok(Expression::Function(Box::new(Function::new(
21499 "CONCAT".to_string(),
21500 vec![cast_left, cast_right],
21501 ))))
21502 } else {
21503 Ok(e)
21504 }
21505 }
21506
21507 Action::DivFuncConvert => {
21508 // DIV(a, b) -> target-specific integer division
21509 if let Expression::Function(f) = e {
21510 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21511 let a = f.args[0].clone();
21512 let b = f.args[1].clone();
21513 match target {
21514 DialectType::DuckDB => {
21515 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21516 let int_div = Expression::IntDiv(Box::new(
21517 crate::expressions::BinaryFunc {
21518 this: a,
21519 expression: b,
21520 original_name: None,
21521 },
21522 ));
21523 Ok(Expression::Cast(Box::new(Cast {
21524 this: int_div,
21525 to: DataType::Decimal {
21526 precision: None,
21527 scale: None,
21528 },
21529 trailing_comments: Vec::new(),
21530 double_colon_syntax: false,
21531 format: None,
21532 default: None,
21533 })))
21534 }
21535 DialectType::BigQuery => {
21536 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21537 let div_func = Expression::Function(Box::new(Function::new(
21538 "DIV".to_string(),
21539 vec![a, b],
21540 )));
21541 Ok(Expression::Cast(Box::new(Cast {
21542 this: div_func,
21543 to: DataType::Custom {
21544 name: "NUMERIC".to_string(),
21545 },
21546 trailing_comments: Vec::new(),
21547 double_colon_syntax: false,
21548 format: None,
21549 default: None,
21550 })))
21551 }
21552 DialectType::SQLite => {
21553 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21554 let cast_a = Expression::Cast(Box::new(Cast {
21555 this: a,
21556 to: DataType::Custom {
21557 name: "REAL".to_string(),
21558 },
21559 trailing_comments: Vec::new(),
21560 double_colon_syntax: false,
21561 format: None,
21562 default: None,
21563 }));
21564 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21565 let cast_int = Expression::Cast(Box::new(Cast {
21566 this: div,
21567 to: DataType::Int {
21568 length: None,
21569 integer_spelling: true,
21570 },
21571 trailing_comments: Vec::new(),
21572 double_colon_syntax: false,
21573 format: None,
21574 default: None,
21575 }));
21576 Ok(Expression::Cast(Box::new(Cast {
21577 this: cast_int,
21578 to: DataType::Custom {
21579 name: "REAL".to_string(),
21580 },
21581 trailing_comments: Vec::new(),
21582 double_colon_syntax: false,
21583 format: None,
21584 default: None,
21585 })))
21586 }
21587 _ => Ok(Expression::Function(f)),
21588 }
21589 } else {
21590 Ok(Expression::Function(f))
21591 }
21592 } else {
21593 Ok(e)
21594 }
21595 }
21596
21597 Action::JsonObjectAggConvert => {
21598 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21599 match e {
21600 Expression::Function(f) => Ok(Expression::Function(Box::new(
21601 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21602 ))),
21603 Expression::AggregateFunction(af) => {
21604 // AggregateFunction stores all args in the `args` vec
21605 Ok(Expression::Function(Box::new(Function::new(
21606 "JSON_GROUP_OBJECT".to_string(),
21607 af.args,
21608 ))))
21609 }
21610 other => Ok(other),
21611 }
21612 }
21613
21614 Action::JsonbExistsConvert => {
21615 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21616 if let Expression::Function(f) = e {
21617 if f.args.len() == 2 {
21618 let json_expr = f.args[0].clone();
21619 let key = match &f.args[1] {
21620 Expression::Literal(crate::expressions::Literal::String(s)) => {
21621 format!("$.{}", s)
21622 }
21623 _ => return Ok(Expression::Function(f)),
21624 };
21625 Ok(Expression::Function(Box::new(Function::new(
21626 "JSON_EXISTS".to_string(),
21627 vec![json_expr, Expression::string(&key)],
21628 ))))
21629 } else {
21630 Ok(Expression::Function(f))
21631 }
21632 } else {
21633 Ok(e)
21634 }
21635 }
21636
21637 Action::DateBinConvert => {
21638 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21639 if let Expression::Function(f) = e {
21640 Ok(Expression::Function(Box::new(Function::new(
21641 "TIME_BUCKET".to_string(),
21642 f.args,
21643 ))))
21644 } else {
21645 Ok(e)
21646 }
21647 }
21648
21649 Action::MysqlCastCharToText => {
21650 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21651 if let Expression::Cast(mut c) = e {
21652 c.to = DataType::Text;
21653 Ok(Expression::Cast(c))
21654 } else {
21655 Ok(e)
21656 }
21657 }
21658
21659 Action::SparkCastVarcharToString => {
21660 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21661 match e {
21662 Expression::Cast(mut c) => {
21663 c.to = Self::normalize_varchar_to_string(c.to);
21664 Ok(Expression::Cast(c))
21665 }
21666 Expression::TryCast(mut c) => {
21667 c.to = Self::normalize_varchar_to_string(c.to);
21668 Ok(Expression::TryCast(c))
21669 }
21670 _ => Ok(e),
21671 }
21672 }
21673
21674 Action::MinMaxToLeastGreatest => {
21675 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21676 if let Expression::Function(f) = e {
21677 let name = f.name.to_uppercase();
21678 let new_name = match name.as_str() {
21679 "MIN" => "LEAST",
21680 "MAX" => "GREATEST",
21681 _ => return Ok(Expression::Function(f)),
21682 };
21683 Ok(Expression::Function(Box::new(Function::new(
21684 new_name.to_string(),
21685 f.args,
21686 ))))
21687 } else {
21688 Ok(e)
21689 }
21690 }
21691
21692 Action::ClickHouseUniqToApproxCountDistinct => {
21693 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21694 if let Expression::Function(f) = e {
21695 Ok(Expression::Function(Box::new(Function::new(
21696 "APPROX_COUNT_DISTINCT".to_string(),
21697 f.args,
21698 ))))
21699 } else {
21700 Ok(e)
21701 }
21702 }
21703
21704 Action::ClickHouseAnyToAnyValue => {
21705 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21706 if let Expression::Function(f) = e {
21707 Ok(Expression::Function(Box::new(Function::new(
21708 "ANY_VALUE".to_string(),
21709 f.args,
21710 ))))
21711 } else {
21712 Ok(e)
21713 }
21714 }
21715
21716 Action::OracleVarchar2ToVarchar => {
21717 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21718 if let Expression::DataType(DataType::Custom { ref name }) = e {
21719 let upper = name.to_uppercase();
21720 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21721 let inner =
21722 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21723 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21724 let end = name.len() - 1; // skip trailing ")"
21725 Some(&name[start..end])
21726 } else {
21727 Option::None
21728 };
21729 if let Some(inner_str) = inner {
21730 // Parse the number part, ignoring BYTE/CHAR qualifier
21731 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21732 if let Ok(n) = num_str.parse::<u32>() {
21733 Ok(Expression::DataType(DataType::VarChar {
21734 length: Some(n),
21735 parenthesized_length: false,
21736 }))
21737 } else {
21738 Ok(e)
21739 }
21740 } else {
21741 // Plain VARCHAR2 / NVARCHAR2 without parens
21742 Ok(Expression::DataType(DataType::VarChar {
21743 length: Option::None,
21744 parenthesized_length: false,
21745 }))
21746 }
21747 } else {
21748 Ok(e)
21749 }
21750 }
21751
21752 Action::Nvl2Expand => {
21753 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21754 // But keep as NVL2 for dialects that support it natively
21755 let nvl2_native = matches!(
21756 target,
21757 DialectType::Oracle
21758 | DialectType::Snowflake
21759 | DialectType::Redshift
21760 | DialectType::Teradata
21761 | DialectType::Spark
21762 | DialectType::Databricks
21763 );
21764 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21765 if nvl2_native {
21766 return Ok(Expression::Nvl2(nvl2));
21767 }
21768 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
21769 } else if let Expression::Function(f) = e {
21770 if nvl2_native {
21771 return Ok(Expression::Function(Box::new(Function::new(
21772 "NVL2".to_string(),
21773 f.args,
21774 ))));
21775 }
21776 if f.args.len() < 2 {
21777 return Ok(Expression::Function(f));
21778 }
21779 let mut args = f.args;
21780 let a = args.remove(0);
21781 let b = args.remove(0);
21782 let c = if !args.is_empty() {
21783 Some(args.remove(0))
21784 } else {
21785 Option::None
21786 };
21787 (a, b, c)
21788 } else {
21789 return Ok(e);
21790 };
21791 // Build: NOT (a IS NULL)
21792 let is_null = Expression::IsNull(Box::new(IsNull {
21793 this: a,
21794 not: false,
21795 postfix_form: false,
21796 }));
21797 let not_null =
21798 Expression::Not(Box::new(crate::expressions::UnaryOp { this: is_null }));
21799 Ok(Expression::Case(Box::new(Case {
21800 operand: Option::None,
21801 whens: vec![(not_null, b)],
21802 else_: c,
21803 comments: Vec::new(),
21804 })))
21805 }
21806
21807 Action::IfnullToCoalesce => {
21808 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
21809 if let Expression::Coalesce(mut cf) = e {
21810 cf.original_name = Option::None;
21811 Ok(Expression::Coalesce(cf))
21812 } else if let Expression::Function(f) = e {
21813 Ok(Expression::Function(Box::new(Function::new(
21814 "COALESCE".to_string(),
21815 f.args,
21816 ))))
21817 } else {
21818 Ok(e)
21819 }
21820 }
21821
21822 Action::IsAsciiConvert => {
21823 // IS_ASCII(x) -> dialect-specific ASCII check
21824 if let Expression::Function(f) = e {
21825 let arg = f.args.into_iter().next().unwrap();
21826 match target {
21827 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
21828 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
21829 Ok(Expression::Function(Box::new(Function::new(
21830 "REGEXP_LIKE".to_string(),
21831 vec![
21832 arg,
21833 Expression::Literal(Literal::String(
21834 "^[[:ascii:]]*$".to_string(),
21835 )),
21836 ],
21837 ))))
21838 }
21839 DialectType::PostgreSQL
21840 | DialectType::Redshift
21841 | DialectType::Materialize
21842 | DialectType::RisingWave => {
21843 // (x ~ '^[[:ascii:]]*$')
21844 Ok(Expression::Paren(Box::new(Paren {
21845 this: Expression::RegexpLike(Box::new(
21846 crate::expressions::RegexpFunc {
21847 this: arg,
21848 pattern: Expression::Literal(Literal::String(
21849 "^[[:ascii:]]*$".to_string(),
21850 )),
21851 flags: Option::None,
21852 },
21853 )),
21854 trailing_comments: Vec::new(),
21855 })))
21856 }
21857 DialectType::SQLite => {
21858 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
21859 let hex_lit = Expression::Literal(Literal::HexString(
21860 "2a5b5e012d7f5d2a".to_string(),
21861 ));
21862 let cast_expr = Expression::Cast(Box::new(Cast {
21863 this: hex_lit,
21864 to: DataType::Text,
21865 trailing_comments: Vec::new(),
21866 double_colon_syntax: false,
21867 format: Option::None,
21868 default: Option::None,
21869 }));
21870 let glob = Expression::Glob(Box::new(BinaryOp {
21871 left: arg,
21872 right: cast_expr,
21873 left_comments: Vec::new(),
21874 operator_comments: Vec::new(),
21875 trailing_comments: Vec::new(),
21876 }));
21877 Ok(Expression::Paren(Box::new(Paren {
21878 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
21879 this: glob,
21880 })),
21881 trailing_comments: Vec::new(),
21882 })))
21883 }
21884 DialectType::TSQL | DialectType::Fabric => {
21885 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
21886 let hex_lit = Expression::Literal(Literal::HexNumber(
21887 "255b5e002d7f5d25".to_string(),
21888 ));
21889 let convert_expr = Expression::Convert(Box::new(
21890 crate::expressions::ConvertFunc {
21891 this: hex_lit,
21892 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
21893 style: None,
21894 },
21895 ));
21896 let collated = Expression::Collation(Box::new(
21897 crate::expressions::CollationExpr {
21898 this: convert_expr,
21899 collation: "Latin1_General_BIN".to_string(),
21900 quoted: false,
21901 double_quoted: false,
21902 },
21903 ));
21904 let patindex = Expression::Function(Box::new(Function::new(
21905 "PATINDEX".to_string(),
21906 vec![collated, arg],
21907 )));
21908 let zero = Expression::Literal(Literal::Number("0".to_string()));
21909 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21910 left: patindex,
21911 right: zero,
21912 left_comments: Vec::new(),
21913 operator_comments: Vec::new(),
21914 trailing_comments: Vec::new(),
21915 }));
21916 Ok(Expression::Paren(Box::new(Paren {
21917 this: eq_zero,
21918 trailing_comments: Vec::new(),
21919 })))
21920 }
21921 DialectType::Oracle => {
21922 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
21923 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21924 let s1 = Expression::Literal(Literal::String("^[".to_string()));
21925 let chr1 = Expression::Function(Box::new(Function::new(
21926 "CHR".to_string(),
21927 vec![Expression::Literal(Literal::Number("1".to_string()))],
21928 )));
21929 let dash = Expression::Literal(Literal::String("-".to_string()));
21930 let chr127 = Expression::Function(Box::new(Function::new(
21931 "CHR".to_string(),
21932 vec![Expression::Literal(Literal::Number("127".to_string()))],
21933 )));
21934 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
21935 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21936 let concat1 =
21937 Expression::DPipe(Box::new(crate::expressions::DPipe {
21938 this: Box::new(s1),
21939 expression: Box::new(chr1),
21940 safe: None,
21941 }));
21942 let concat2 =
21943 Expression::DPipe(Box::new(crate::expressions::DPipe {
21944 this: Box::new(concat1),
21945 expression: Box::new(dash),
21946 safe: None,
21947 }));
21948 let concat3 =
21949 Expression::DPipe(Box::new(crate::expressions::DPipe {
21950 this: Box::new(concat2),
21951 expression: Box::new(chr127),
21952 safe: None,
21953 }));
21954 let concat4 =
21955 Expression::DPipe(Box::new(crate::expressions::DPipe {
21956 this: Box::new(concat3),
21957 expression: Box::new(s2),
21958 safe: None,
21959 }));
21960 let regexp_like = Expression::Function(Box::new(Function::new(
21961 "REGEXP_LIKE".to_string(),
21962 vec![arg, concat4],
21963 )));
21964 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
21965 let true_expr = Expression::Column(crate::expressions::Column {
21966 name: Identifier {
21967 name: "TRUE".to_string(),
21968 quoted: false,
21969 trailing_comments: Vec::new(),
21970 span: None,
21971 },
21972 table: None,
21973 join_mark: false,
21974 trailing_comments: Vec::new(),
21975 span: None,
21976 });
21977 let nvl = Expression::Function(Box::new(Function::new(
21978 "NVL".to_string(),
21979 vec![regexp_like, true_expr],
21980 )));
21981 Ok(nvl)
21982 }
21983 _ => Ok(Expression::Function(Box::new(Function::new(
21984 "IS_ASCII".to_string(),
21985 vec![arg],
21986 )))),
21987 }
21988 } else {
21989 Ok(e)
21990 }
21991 }
21992
21993 Action::StrPositionConvert => {
21994 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
21995 if let Expression::Function(f) = e {
21996 if f.args.len() < 2 {
21997 return Ok(Expression::Function(f));
21998 }
21999 let mut args = f.args;
22000
22001 let haystack = args.remove(0);
22002 let needle = args.remove(0);
22003 let position = if !args.is_empty() {
22004 Some(args.remove(0))
22005 } else {
22006 Option::None
22007 };
22008 let occurrence = if !args.is_empty() {
22009 Some(args.remove(0))
22010 } else {
22011 Option::None
22012 };
22013
22014 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
22015 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
22016 fn build_position_expansion(
22017 haystack: Expression,
22018 needle: Expression,
22019 pos: Expression,
22020 occurrence: Option<Expression>,
22021 inner_func: &str,
22022 wrapper: &str, // "CASE", "IF", "IIF"
22023 ) -> Expression {
22024 let substr = Expression::Function(Box::new(Function::new(
22025 "SUBSTRING".to_string(),
22026 vec![haystack, pos.clone()],
22027 )));
22028 let mut inner_args = vec![substr, needle];
22029 if let Some(occ) = occurrence {
22030 inner_args.push(occ);
22031 }
22032 let inner_call = Expression::Function(Box::new(Function::new(
22033 inner_func.to_string(),
22034 inner_args,
22035 )));
22036 let zero = Expression::Literal(Literal::Number("0".to_string()));
22037 let one = Expression::Literal(Literal::Number("1".to_string()));
22038 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22039 left: inner_call.clone(),
22040 right: zero.clone(),
22041 left_comments: Vec::new(),
22042 operator_comments: Vec::new(),
22043 trailing_comments: Vec::new(),
22044 }));
22045 let add_pos = Expression::Add(Box::new(BinaryOp {
22046 left: inner_call,
22047 right: pos,
22048 left_comments: Vec::new(),
22049 operator_comments: Vec::new(),
22050 trailing_comments: Vec::new(),
22051 }));
22052 let sub_one = Expression::Sub(Box::new(BinaryOp {
22053 left: add_pos,
22054 right: one,
22055 left_comments: Vec::new(),
22056 operator_comments: Vec::new(),
22057 trailing_comments: Vec::new(),
22058 }));
22059
22060 match wrapper {
22061 "CASE" => Expression::Case(Box::new(Case {
22062 operand: Option::None,
22063 whens: vec![(eq_zero, zero)],
22064 else_: Some(sub_one),
22065 comments: Vec::new(),
22066 })),
22067 "IIF" => Expression::Function(Box::new(Function::new(
22068 "IIF".to_string(),
22069 vec![eq_zero, zero, sub_one],
22070 ))),
22071 _ => Expression::Function(Box::new(Function::new(
22072 "IF".to_string(),
22073 vec![eq_zero, zero, sub_one],
22074 ))),
22075 }
22076 }
22077
22078 match target {
22079 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
22080 DialectType::Athena
22081 | DialectType::DuckDB
22082 | DialectType::Presto
22083 | DialectType::Trino
22084 | DialectType::Drill => {
22085 if let Some(pos) = position {
22086 let wrapper = if matches!(target, DialectType::DuckDB) {
22087 "CASE"
22088 } else {
22089 "IF"
22090 };
22091 let result = build_position_expansion(
22092 haystack, needle, pos, occurrence, "STRPOS", wrapper,
22093 );
22094 if matches!(target, DialectType::Drill) {
22095 // Drill uses backtick-quoted `IF`
22096 if let Expression::Function(mut f) = result {
22097 f.name = "`IF`".to_string();
22098 Ok(Expression::Function(f))
22099 } else {
22100 Ok(result)
22101 }
22102 } else {
22103 Ok(result)
22104 }
22105 } else {
22106 Ok(Expression::Function(Box::new(Function::new(
22107 "STRPOS".to_string(),
22108 vec![haystack, needle],
22109 ))))
22110 }
22111 }
22112 // SQLite: IIF wrapper
22113 DialectType::SQLite => {
22114 if let Some(pos) = position {
22115 Ok(build_position_expansion(
22116 haystack, needle, pos, occurrence, "INSTR", "IIF",
22117 ))
22118 } else {
22119 Ok(Expression::Function(Box::new(Function::new(
22120 "INSTR".to_string(),
22121 vec![haystack, needle],
22122 ))))
22123 }
22124 }
22125 // INSTR group: Teradata, BigQuery, Oracle
22126 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
22127 let mut a = vec![haystack, needle];
22128 if let Some(pos) = position {
22129 a.push(pos);
22130 }
22131 if let Some(occ) = occurrence {
22132 a.push(occ);
22133 }
22134 Ok(Expression::Function(Box::new(Function::new(
22135 "INSTR".to_string(),
22136 a,
22137 ))))
22138 }
22139 // CHARINDEX group: Snowflake, TSQL
22140 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22141 let mut a = vec![needle, haystack];
22142 if let Some(pos) = position {
22143 a.push(pos);
22144 }
22145 Ok(Expression::Function(Box::new(Function::new(
22146 "CHARINDEX".to_string(),
22147 a,
22148 ))))
22149 }
22150 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22151 DialectType::PostgreSQL
22152 | DialectType::Materialize
22153 | DialectType::RisingWave
22154 | DialectType::Redshift => {
22155 if let Some(pos) = position {
22156 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22157 // ELSE POSITION(...) + pos - 1 END
22158 let substr = Expression::Substring(Box::new(
22159 crate::expressions::SubstringFunc {
22160 this: haystack,
22161 start: pos.clone(),
22162 length: Option::None,
22163 from_for_syntax: true,
22164 },
22165 ));
22166 let pos_in = Expression::StrPosition(Box::new(
22167 crate::expressions::StrPosition {
22168 this: Box::new(substr),
22169 substr: Some(Box::new(needle)),
22170 position: Option::None,
22171 occurrence: Option::None,
22172 },
22173 ));
22174 let zero =
22175 Expression::Literal(Literal::Number("0".to_string()));
22176 let one = Expression::Literal(Literal::Number("1".to_string()));
22177 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22178 left: pos_in.clone(),
22179 right: zero.clone(),
22180 left_comments: Vec::new(),
22181 operator_comments: Vec::new(),
22182 trailing_comments: Vec::new(),
22183 }));
22184 let add_pos = Expression::Add(Box::new(BinaryOp {
22185 left: pos_in,
22186 right: pos,
22187 left_comments: Vec::new(),
22188 operator_comments: Vec::new(),
22189 trailing_comments: Vec::new(),
22190 }));
22191 let sub_one = Expression::Sub(Box::new(BinaryOp {
22192 left: add_pos,
22193 right: one,
22194 left_comments: Vec::new(),
22195 operator_comments: Vec::new(),
22196 trailing_comments: Vec::new(),
22197 }));
22198 Ok(Expression::Case(Box::new(Case {
22199 operand: Option::None,
22200 whens: vec![(eq_zero, zero)],
22201 else_: Some(sub_one),
22202 comments: Vec::new(),
22203 })))
22204 } else {
22205 Ok(Expression::StrPosition(Box::new(
22206 crate::expressions::StrPosition {
22207 this: Box::new(haystack),
22208 substr: Some(Box::new(needle)),
22209 position: Option::None,
22210 occurrence: Option::None,
22211 },
22212 )))
22213 }
22214 }
22215 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22216 DialectType::MySQL
22217 | DialectType::SingleStore
22218 | DialectType::TiDB
22219 | DialectType::Hive
22220 | DialectType::Spark
22221 | DialectType::Databricks
22222 | DialectType::Doris
22223 | DialectType::StarRocks => {
22224 let mut a = vec![needle, haystack];
22225 if let Some(pos) = position {
22226 a.push(pos);
22227 }
22228 Ok(Expression::Function(Box::new(Function::new(
22229 "LOCATE".to_string(),
22230 a,
22231 ))))
22232 }
22233 // ClickHouse: POSITION(haystack, needle[, position])
22234 DialectType::ClickHouse => {
22235 let mut a = vec![haystack, needle];
22236 if let Some(pos) = position {
22237 a.push(pos);
22238 }
22239 Ok(Expression::Function(Box::new(Function::new(
22240 "POSITION".to_string(),
22241 a,
22242 ))))
22243 }
22244 _ => {
22245 let mut a = vec![haystack, needle];
22246 if let Some(pos) = position {
22247 a.push(pos);
22248 }
22249 if let Some(occ) = occurrence {
22250 a.push(occ);
22251 }
22252 Ok(Expression::Function(Box::new(Function::new(
22253 "STR_POSITION".to_string(),
22254 a,
22255 ))))
22256 }
22257 }
22258 } else {
22259 Ok(e)
22260 }
22261 }
22262
22263 Action::ArraySumConvert => {
22264 // ARRAY_SUM(arr) -> dialect-specific
22265 if let Expression::Function(f) = e {
22266 let args = f.args;
22267 match target {
22268 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22269 Function::new("LIST_SUM".to_string(), args),
22270 ))),
22271 DialectType::Spark | DialectType::Databricks => {
22272 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22273 let arr = args.into_iter().next().unwrap();
22274 let zero = Expression::Literal(Literal::Number("0".to_string()));
22275 let acc_id = Identifier::new("acc");
22276 let x_id = Identifier::new("x");
22277 let acc = Expression::Identifier(acc_id.clone());
22278 let x = Expression::Identifier(x_id.clone());
22279 let add = Expression::Add(Box::new(BinaryOp {
22280 left: acc.clone(),
22281 right: x,
22282 left_comments: Vec::new(),
22283 operator_comments: Vec::new(),
22284 trailing_comments: Vec::new(),
22285 }));
22286 let lambda1 =
22287 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22288 parameters: vec![acc_id.clone(), x_id],
22289 body: add,
22290 colon: false,
22291 parameter_types: Vec::new(),
22292 }));
22293 let lambda2 =
22294 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22295 parameters: vec![acc_id],
22296 body: acc,
22297 colon: false,
22298 parameter_types: Vec::new(),
22299 }));
22300 Ok(Expression::Function(Box::new(Function::new(
22301 "AGGREGATE".to_string(),
22302 vec![arr, zero, lambda1, lambda2],
22303 ))))
22304 }
22305 DialectType::Presto | DialectType::Athena => {
22306 // Presto/Athena keep ARRAY_SUM natively
22307 Ok(Expression::Function(Box::new(Function::new(
22308 "ARRAY_SUM".to_string(),
22309 args,
22310 ))))
22311 }
22312 DialectType::Trino => {
22313 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22314 if args.len() == 1 {
22315 let arr = args.into_iter().next().unwrap();
22316 let zero =
22317 Expression::Literal(Literal::Number("0".to_string()));
22318 let acc_id = Identifier::new("acc");
22319 let x_id = Identifier::new("x");
22320 let acc = Expression::Identifier(acc_id.clone());
22321 let x = Expression::Identifier(x_id.clone());
22322 let add = Expression::Add(Box::new(BinaryOp {
22323 left: acc.clone(),
22324 right: x,
22325 left_comments: Vec::new(),
22326 operator_comments: Vec::new(),
22327 trailing_comments: Vec::new(),
22328 }));
22329 let lambda1 = Expression::Lambda(Box::new(
22330 crate::expressions::LambdaExpr {
22331 parameters: vec![acc_id.clone(), x_id],
22332 body: add,
22333 colon: false,
22334 parameter_types: Vec::new(),
22335 },
22336 ));
22337 let lambda2 = Expression::Lambda(Box::new(
22338 crate::expressions::LambdaExpr {
22339 parameters: vec![acc_id],
22340 body: acc,
22341 colon: false,
22342 parameter_types: Vec::new(),
22343 },
22344 ));
22345 Ok(Expression::Function(Box::new(Function::new(
22346 "REDUCE".to_string(),
22347 vec![arr, zero, lambda1, lambda2],
22348 ))))
22349 } else {
22350 Ok(Expression::Function(Box::new(Function::new(
22351 "ARRAY_SUM".to_string(),
22352 args,
22353 ))))
22354 }
22355 }
22356 DialectType::ClickHouse => {
22357 // arraySum(lambda, arr) or arraySum(arr)
22358 Ok(Expression::Function(Box::new(Function::new(
22359 "arraySum".to_string(),
22360 args,
22361 ))))
22362 }
22363 _ => Ok(Expression::Function(Box::new(Function::new(
22364 "ARRAY_SUM".to_string(),
22365 args,
22366 )))),
22367 }
22368 } else {
22369 Ok(e)
22370 }
22371 }
22372
22373 Action::ArraySizeConvert => {
22374 if let Expression::Function(f) = e {
22375 Ok(Expression::Function(Box::new(Function::new(
22376 "REPEATED_COUNT".to_string(),
22377 f.args,
22378 ))))
22379 } else {
22380 Ok(e)
22381 }
22382 }
22383
22384 Action::ArrayAnyConvert => {
22385 if let Expression::Function(f) = e {
22386 let mut args = f.args;
22387 if args.len() == 2 {
22388 let arr = args.remove(0);
22389 let lambda = args.remove(0);
22390
22391 // Extract lambda parameter name and body
22392 let (param_name, pred_body) =
22393 if let Expression::Lambda(ref lam) = lambda {
22394 let name = if let Some(p) = lam.parameters.first() {
22395 p.name.clone()
22396 } else {
22397 "x".to_string()
22398 };
22399 (name, lam.body.clone())
22400 } else {
22401 ("x".to_string(), lambda.clone())
22402 };
22403
22404 // Helper: build a function call Expression
22405 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22406 Expression::Function(Box::new(Function::new(
22407 name.to_string(),
22408 args,
22409 )))
22410 };
22411
22412 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22413 let build_filter_pattern = |len_func: &str,
22414 len_args_extra: Vec<Expression>,
22415 filter_expr: Expression|
22416 -> Expression {
22417 // len_func(arr, ...extra) = 0
22418 let mut len_arr_args = vec![arr.clone()];
22419 len_arr_args.extend(len_args_extra.clone());
22420 let len_arr = make_func(len_func, len_arr_args);
22421 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22422 len_arr,
22423 Expression::number(0),
22424 )));
22425
22426 // len_func(filter_expr, ...extra) <> 0
22427 let mut len_filter_args = vec![filter_expr];
22428 len_filter_args.extend(len_args_extra);
22429 let len_filter = make_func(len_func, len_filter_args);
22430 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22431 len_filter,
22432 Expression::number(0),
22433 )));
22434
22435 // (eq_zero OR neq_zero)
22436 let or_expr =
22437 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22438 Expression::Paren(Box::new(Paren {
22439 this: or_expr,
22440 trailing_comments: Vec::new(),
22441 }))
22442 };
22443
22444 match target {
22445 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22446 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22447 }
22448 DialectType::ClickHouse => {
22449 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22450 // ClickHouse arrayFilter takes lambda first, then array
22451 let filter_expr =
22452 make_func("arrayFilter", vec![lambda, arr.clone()]);
22453 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22454 }
22455 DialectType::Databricks | DialectType::Spark => {
22456 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22457 let filter_expr =
22458 make_func("FILTER", vec![arr.clone(), lambda]);
22459 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22460 }
22461 DialectType::DuckDB => {
22462 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22463 let filter_expr =
22464 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22465 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22466 }
22467 DialectType::Teradata => {
22468 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22469 let filter_expr =
22470 make_func("FILTER", vec![arr.clone(), lambda]);
22471 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22472 }
22473 DialectType::BigQuery => {
22474 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22475 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22476 let param_col = Expression::column(¶m_name);
22477 let unnest_expr = Expression::Unnest(Box::new(
22478 crate::expressions::UnnestFunc {
22479 this: arr.clone(),
22480 expressions: vec![],
22481 with_ordinality: false,
22482 alias: Some(Identifier::new(¶m_name)),
22483 offset_alias: None,
22484 },
22485 ));
22486 let mut sel = crate::expressions::Select::default();
22487 sel.expressions = vec![param_col];
22488 sel.from = Some(crate::expressions::From {
22489 expressions: vec![unnest_expr],
22490 });
22491 sel.where_clause =
22492 Some(crate::expressions::Where { this: pred_body });
22493 let array_subquery =
22494 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22495 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22496 }
22497 DialectType::PostgreSQL => {
22498 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22499 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22500 let param_col = Expression::column(¶m_name);
22501 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22502 let unnest_with_alias =
22503 Expression::Alias(Box::new(crate::expressions::Alias {
22504 this: Expression::Unnest(Box::new(
22505 crate::expressions::UnnestFunc {
22506 this: arr.clone(),
22507 expressions: vec![],
22508 with_ordinality: false,
22509 alias: None,
22510 offset_alias: None,
22511 },
22512 )),
22513 alias: Identifier::new("_t0"),
22514 column_aliases: vec![Identifier::new(¶m_name)],
22515 pre_alias_comments: Vec::new(),
22516 trailing_comments: Vec::new(),
22517 }));
22518 let mut sel = crate::expressions::Select::default();
22519 sel.expressions = vec![param_col];
22520 sel.from = Some(crate::expressions::From {
22521 expressions: vec![unnest_with_alias],
22522 });
22523 sel.where_clause =
22524 Some(crate::expressions::Where { this: pred_body });
22525 let array_subquery =
22526 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22527 Ok(build_filter_pattern(
22528 "ARRAY_LENGTH",
22529 vec![Expression::number(1)],
22530 array_subquery,
22531 ))
22532 }
22533 _ => Ok(Expression::Function(Box::new(Function::new(
22534 "ARRAY_ANY".to_string(),
22535 vec![arr, lambda],
22536 )))),
22537 }
22538 } else {
22539 Ok(Expression::Function(Box::new(Function::new(
22540 "ARRAY_ANY".to_string(),
22541 args,
22542 ))))
22543 }
22544 } else {
22545 Ok(e)
22546 }
22547 }
22548
22549 Action::DecodeSimplify => {
22550 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22551 // For literal search values: CASE WHEN x = search THEN result
22552 // For NULL search: CASE WHEN x IS NULL THEN result
22553 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22554 fn is_decode_literal(e: &Expression) -> bool {
22555 matches!(
22556 e,
22557 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22558 )
22559 }
22560
22561 let build_decode_case =
22562 |this_expr: Expression,
22563 pairs: Vec<(Expression, Expression)>,
22564 default: Option<Expression>| {
22565 let whens: Vec<(Expression, Expression)> = pairs
22566 .into_iter()
22567 .map(|(search, result)| {
22568 if matches!(&search, Expression::Null(_)) {
22569 // NULL search -> IS NULL
22570 let condition = Expression::Is(Box::new(BinaryOp {
22571 left: this_expr.clone(),
22572 right: Expression::Null(crate::expressions::Null),
22573 left_comments: Vec::new(),
22574 operator_comments: Vec::new(),
22575 trailing_comments: Vec::new(),
22576 }));
22577 (condition, result)
22578 } else if is_decode_literal(&search)
22579 || is_decode_literal(&this_expr)
22580 {
22581 // At least one side is a literal -> simple equality (no NULL check needed)
22582 let eq = Expression::Eq(Box::new(BinaryOp {
22583 left: this_expr.clone(),
22584 right: search,
22585 left_comments: Vec::new(),
22586 operator_comments: Vec::new(),
22587 trailing_comments: Vec::new(),
22588 }));
22589 (eq, result)
22590 } else {
22591 // Non-literal -> null-safe comparison
22592 let needs_paren = matches!(
22593 &search,
22594 Expression::Eq(_)
22595 | Expression::Neq(_)
22596 | Expression::Gt(_)
22597 | Expression::Gte(_)
22598 | Expression::Lt(_)
22599 | Expression::Lte(_)
22600 );
22601 let search_ref = if needs_paren {
22602 Expression::Paren(Box::new(crate::expressions::Paren {
22603 this: search.clone(),
22604 trailing_comments: Vec::new(),
22605 }))
22606 } else {
22607 search.clone()
22608 };
22609 // Build: x = search OR (x IS NULL AND search IS NULL)
22610 let eq = Expression::Eq(Box::new(BinaryOp {
22611 left: this_expr.clone(),
22612 right: search_ref,
22613 left_comments: Vec::new(),
22614 operator_comments: Vec::new(),
22615 trailing_comments: Vec::new(),
22616 }));
22617 let search_in_null = if needs_paren {
22618 Expression::Paren(Box::new(crate::expressions::Paren {
22619 this: search.clone(),
22620 trailing_comments: Vec::new(),
22621 }))
22622 } else {
22623 search.clone()
22624 };
22625 let x_is_null = Expression::Is(Box::new(BinaryOp {
22626 left: this_expr.clone(),
22627 right: Expression::Null(crate::expressions::Null),
22628 left_comments: Vec::new(),
22629 operator_comments: Vec::new(),
22630 trailing_comments: Vec::new(),
22631 }));
22632 let search_is_null = Expression::Is(Box::new(BinaryOp {
22633 left: search_in_null,
22634 right: Expression::Null(crate::expressions::Null),
22635 left_comments: Vec::new(),
22636 operator_comments: Vec::new(),
22637 trailing_comments: Vec::new(),
22638 }));
22639 let both_null = Expression::And(Box::new(BinaryOp {
22640 left: x_is_null,
22641 right: search_is_null,
22642 left_comments: Vec::new(),
22643 operator_comments: Vec::new(),
22644 trailing_comments: Vec::new(),
22645 }));
22646 let condition = Expression::Or(Box::new(BinaryOp {
22647 left: eq,
22648 right: Expression::Paren(Box::new(
22649 crate::expressions::Paren {
22650 this: both_null,
22651 trailing_comments: Vec::new(),
22652 },
22653 )),
22654 left_comments: Vec::new(),
22655 operator_comments: Vec::new(),
22656 trailing_comments: Vec::new(),
22657 }));
22658 (condition, result)
22659 }
22660 })
22661 .collect();
22662 Expression::Case(Box::new(Case {
22663 operand: None,
22664 whens,
22665 else_: default,
22666 comments: Vec::new(),
22667 }))
22668 };
22669
22670 if let Expression::Decode(decode) = e {
22671 Ok(build_decode_case(
22672 decode.this,
22673 decode.search_results,
22674 decode.default,
22675 ))
22676 } else if let Expression::DecodeCase(dc) = e {
22677 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22678 let mut exprs = dc.expressions;
22679 if exprs.len() < 3 {
22680 return Ok(Expression::DecodeCase(Box::new(
22681 crate::expressions::DecodeCase { expressions: exprs },
22682 )));
22683 }
22684 let this_expr = exprs.remove(0);
22685 let mut pairs = Vec::new();
22686 let mut default = None;
22687 let mut i = 0;
22688 while i + 1 < exprs.len() {
22689 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22690 i += 2;
22691 }
22692 if i < exprs.len() {
22693 // Odd remaining element is the default
22694 default = Some(exprs[i].clone());
22695 }
22696 Ok(build_decode_case(this_expr, pairs, default))
22697 } else {
22698 Ok(e)
22699 }
22700 }
22701
22702 Action::CreateTableLikeToCtas => {
22703 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22704 if let Expression::CreateTable(ct) = e {
22705 let like_source = ct.constraints.iter().find_map(|c| {
22706 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22707 Some(source.clone())
22708 } else {
22709 None
22710 }
22711 });
22712 if let Some(source_table) = like_source {
22713 let mut new_ct = *ct;
22714 new_ct.constraints.clear();
22715 // Build: SELECT * FROM b LIMIT 0
22716 let select = Expression::Select(Box::new(crate::expressions::Select {
22717 expressions: vec![Expression::Star(crate::expressions::Star {
22718 table: None,
22719 except: None,
22720 replace: None,
22721 rename: None,
22722 trailing_comments: Vec::new(),
22723 span: None,
22724 })],
22725 from: Some(crate::expressions::From {
22726 expressions: vec![Expression::Table(source_table)],
22727 }),
22728 limit: Some(crate::expressions::Limit {
22729 this: Expression::Literal(Literal::Number("0".to_string())),
22730 percent: false,
22731 comments: Vec::new(),
22732 }),
22733 ..Default::default()
22734 }));
22735 new_ct.as_select = Some(select);
22736 Ok(Expression::CreateTable(Box::new(new_ct)))
22737 } else {
22738 Ok(Expression::CreateTable(ct))
22739 }
22740 } else {
22741 Ok(e)
22742 }
22743 }
22744
22745 Action::CreateTableLikeToSelectInto => {
22746 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
22747 if let Expression::CreateTable(ct) = e {
22748 let like_source = ct.constraints.iter().find_map(|c| {
22749 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22750 Some(source.clone())
22751 } else {
22752 None
22753 }
22754 });
22755 if let Some(source_table) = like_source {
22756 let mut aliased_source = source_table;
22757 aliased_source.alias = Some(Identifier::new("temp"));
22758 // Build: SELECT TOP 0 * INTO a FROM b AS temp
22759 let select = Expression::Select(Box::new(crate::expressions::Select {
22760 expressions: vec![Expression::Star(crate::expressions::Star {
22761 table: None,
22762 except: None,
22763 replace: None,
22764 rename: None,
22765 trailing_comments: Vec::new(),
22766 span: None,
22767 })],
22768 from: Some(crate::expressions::From {
22769 expressions: vec![Expression::Table(aliased_source)],
22770 }),
22771 into: Some(crate::expressions::SelectInto {
22772 this: Expression::Table(ct.name.clone()),
22773 temporary: false,
22774 unlogged: false,
22775 bulk_collect: false,
22776 expressions: Vec::new(),
22777 }),
22778 top: Some(crate::expressions::Top {
22779 this: Expression::Literal(Literal::Number("0".to_string())),
22780 percent: false,
22781 with_ties: false,
22782 parenthesized: false,
22783 }),
22784 ..Default::default()
22785 }));
22786 Ok(select)
22787 } else {
22788 Ok(Expression::CreateTable(ct))
22789 }
22790 } else {
22791 Ok(e)
22792 }
22793 }
22794
22795 Action::CreateTableLikeToAs => {
22796 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
22797 if let Expression::CreateTable(ct) = e {
22798 let like_source = ct.constraints.iter().find_map(|c| {
22799 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22800 Some(source.clone())
22801 } else {
22802 None
22803 }
22804 });
22805 if let Some(source_table) = like_source {
22806 let mut new_ct = *ct;
22807 new_ct.constraints.clear();
22808 // AS b (just a table reference, not a SELECT)
22809 new_ct.as_select = Some(Expression::Table(source_table));
22810 Ok(Expression::CreateTable(Box::new(new_ct)))
22811 } else {
22812 Ok(Expression::CreateTable(ct))
22813 }
22814 } else {
22815 Ok(e)
22816 }
22817 }
22818
22819 Action::TsOrDsToDateConvert => {
22820 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
22821 if let Expression::Function(f) = e {
22822 let mut args = f.args;
22823 let this = args.remove(0);
22824 let fmt = if !args.is_empty() {
22825 match &args[0] {
22826 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22827 _ => None,
22828 }
22829 } else {
22830 None
22831 };
22832 Ok(Expression::TsOrDsToDate(Box::new(
22833 crate::expressions::TsOrDsToDate {
22834 this: Box::new(this),
22835 format: fmt,
22836 safe: None,
22837 },
22838 )))
22839 } else {
22840 Ok(e)
22841 }
22842 }
22843
22844 Action::TsOrDsToDateStrConvert => {
22845 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
22846 if let Expression::Function(f) = e {
22847 let arg = f.args.into_iter().next().unwrap();
22848 let str_type = match target {
22849 DialectType::DuckDB
22850 | DialectType::PostgreSQL
22851 | DialectType::Materialize => DataType::Text,
22852 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22853 DataType::Custom {
22854 name: "STRING".to_string(),
22855 }
22856 }
22857 DialectType::Presto
22858 | DialectType::Trino
22859 | DialectType::Athena
22860 | DialectType::Drill => DataType::VarChar {
22861 length: None,
22862 parenthesized_length: false,
22863 },
22864 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
22865 DataType::Custom {
22866 name: "STRING".to_string(),
22867 }
22868 }
22869 _ => DataType::VarChar {
22870 length: None,
22871 parenthesized_length: false,
22872 },
22873 };
22874 let cast_expr = Expression::Cast(Box::new(Cast {
22875 this: arg,
22876 to: str_type,
22877 double_colon_syntax: false,
22878 trailing_comments: Vec::new(),
22879 format: None,
22880 default: None,
22881 }));
22882 Ok(Expression::Substring(Box::new(
22883 crate::expressions::SubstringFunc {
22884 this: cast_expr,
22885 start: Expression::number(1),
22886 length: Some(Expression::number(10)),
22887 from_for_syntax: false,
22888 },
22889 )))
22890 } else {
22891 Ok(e)
22892 }
22893 }
22894
22895 Action::DateStrToDateConvert => {
22896 // DATE_STR_TO_DATE(x) -> dialect-specific
22897 if let Expression::Function(f) = e {
22898 let arg = f.args.into_iter().next().unwrap();
22899 match target {
22900 DialectType::SQLite => {
22901 // SQLite: just the bare expression (dates are strings)
22902 Ok(arg)
22903 }
22904 _ => Ok(Expression::Cast(Box::new(Cast {
22905 this: arg,
22906 to: DataType::Date,
22907 double_colon_syntax: false,
22908 trailing_comments: Vec::new(),
22909 format: None,
22910 default: None,
22911 }))),
22912 }
22913 } else {
22914 Ok(e)
22915 }
22916 }
22917
22918 Action::TimeStrToDateConvert => {
22919 // TIME_STR_TO_DATE(x) -> dialect-specific
22920 if let Expression::Function(f) = e {
22921 let arg = f.args.into_iter().next().unwrap();
22922 match target {
22923 DialectType::Hive
22924 | DialectType::Doris
22925 | DialectType::StarRocks
22926 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
22927 Function::new("TO_DATE".to_string(), vec![arg]),
22928 ))),
22929 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22930 // Presto: CAST(x AS TIMESTAMP)
22931 Ok(Expression::Cast(Box::new(Cast {
22932 this: arg,
22933 to: DataType::Timestamp {
22934 timezone: false,
22935 precision: None,
22936 },
22937 double_colon_syntax: false,
22938 trailing_comments: Vec::new(),
22939 format: None,
22940 default: None,
22941 })))
22942 }
22943 _ => {
22944 // Default: CAST(x AS DATE)
22945 Ok(Expression::Cast(Box::new(Cast {
22946 this: arg,
22947 to: DataType::Date,
22948 double_colon_syntax: false,
22949 trailing_comments: Vec::new(),
22950 format: None,
22951 default: None,
22952 })))
22953 }
22954 }
22955 } else {
22956 Ok(e)
22957 }
22958 }
22959
22960 Action::TimeStrToTimeConvert => {
22961 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
22962 if let Expression::Function(f) = e {
22963 let mut args = f.args;
22964 let this = args.remove(0);
22965 let zone = if !args.is_empty() {
22966 match &args[0] {
22967 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22968 _ => None,
22969 }
22970 } else {
22971 None
22972 };
22973 let has_zone = zone.is_some();
22974
22975 match target {
22976 DialectType::SQLite => {
22977 // SQLite: just the bare expression
22978 Ok(this)
22979 }
22980 DialectType::MySQL => {
22981 if has_zone {
22982 // MySQL with zone: TIMESTAMP(x)
22983 Ok(Expression::Function(Box::new(Function::new(
22984 "TIMESTAMP".to_string(),
22985 vec![this],
22986 ))))
22987 } else {
22988 // MySQL: CAST(x AS DATETIME) or with precision
22989 // Use DataType::Custom to avoid MySQL's transform_cast converting
22990 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
22991 let precision =
22992 if let Expression::Literal(Literal::String(ref s)) = this {
22993 if let Some(dot_pos) = s.rfind('.') {
22994 let frac = &s[dot_pos + 1..];
22995 let digit_count = frac
22996 .chars()
22997 .take_while(|c| c.is_ascii_digit())
22998 .count();
22999 if digit_count > 0 {
23000 Some(digit_count)
23001 } else {
23002 None
23003 }
23004 } else {
23005 None
23006 }
23007 } else {
23008 None
23009 };
23010 let type_name = match precision {
23011 Some(p) => format!("DATETIME({})", p),
23012 None => "DATETIME".to_string(),
23013 };
23014 Ok(Expression::Cast(Box::new(Cast {
23015 this,
23016 to: DataType::Custom { name: type_name },
23017 double_colon_syntax: false,
23018 trailing_comments: Vec::new(),
23019 format: None,
23020 default: None,
23021 })))
23022 }
23023 }
23024 DialectType::ClickHouse => {
23025 if has_zone {
23026 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
23027 // We need to strip the timezone offset from the literal if present
23028 let clean_this =
23029 if let Expression::Literal(Literal::String(ref s)) = this {
23030 // Strip timezone offset like "-08:00" or "+00:00"
23031 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
23032 if let Some(offset_pos) = re_offset {
23033 if offset_pos > 10 {
23034 // After the date part
23035 let trimmed = s[..offset_pos].to_string();
23036 Expression::Literal(Literal::String(trimmed))
23037 } else {
23038 this.clone()
23039 }
23040 } else {
23041 this.clone()
23042 }
23043 } else {
23044 this.clone()
23045 };
23046 let zone_str = zone.unwrap();
23047 // Build: CAST(x AS DateTime64(6, 'zone'))
23048 let type_name = format!("DateTime64(6, '{}')", zone_str);
23049 Ok(Expression::Cast(Box::new(Cast {
23050 this: clean_this,
23051 to: DataType::Custom { name: type_name },
23052 double_colon_syntax: false,
23053 trailing_comments: Vec::new(),
23054 format: None,
23055 default: None,
23056 })))
23057 } else {
23058 Ok(Expression::Cast(Box::new(Cast {
23059 this,
23060 to: DataType::Custom {
23061 name: "DateTime64(6)".to_string(),
23062 },
23063 double_colon_syntax: false,
23064 trailing_comments: Vec::new(),
23065 format: None,
23066 default: None,
23067 })))
23068 }
23069 }
23070 DialectType::BigQuery => {
23071 if has_zone {
23072 // BigQuery with zone: CAST(x AS TIMESTAMP)
23073 Ok(Expression::Cast(Box::new(Cast {
23074 this,
23075 to: DataType::Timestamp {
23076 timezone: false,
23077 precision: None,
23078 },
23079 double_colon_syntax: false,
23080 trailing_comments: Vec::new(),
23081 format: None,
23082 default: None,
23083 })))
23084 } else {
23085 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
23086 Ok(Expression::Cast(Box::new(Cast {
23087 this,
23088 to: DataType::Custom {
23089 name: "DATETIME".to_string(),
23090 },
23091 double_colon_syntax: false,
23092 trailing_comments: Vec::new(),
23093 format: None,
23094 default: None,
23095 })))
23096 }
23097 }
23098 DialectType::Doris => {
23099 // Doris: CAST(x AS DATETIME)
23100 Ok(Expression::Cast(Box::new(Cast {
23101 this,
23102 to: DataType::Custom {
23103 name: "DATETIME".to_string(),
23104 },
23105 double_colon_syntax: false,
23106 trailing_comments: Vec::new(),
23107 format: None,
23108 default: None,
23109 })))
23110 }
23111 DialectType::TSQL | DialectType::Fabric => {
23112 if has_zone {
23113 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
23114 let cast_expr = Expression::Cast(Box::new(Cast {
23115 this,
23116 to: DataType::Custom {
23117 name: "DATETIMEOFFSET".to_string(),
23118 },
23119 double_colon_syntax: false,
23120 trailing_comments: Vec::new(),
23121 format: None,
23122 default: None,
23123 }));
23124 Ok(Expression::AtTimeZone(Box::new(
23125 crate::expressions::AtTimeZone {
23126 this: cast_expr,
23127 zone: Expression::Literal(Literal::String(
23128 "UTC".to_string(),
23129 )),
23130 },
23131 )))
23132 } else {
23133 // TSQL: CAST(x AS DATETIME2)
23134 Ok(Expression::Cast(Box::new(Cast {
23135 this,
23136 to: DataType::Custom {
23137 name: "DATETIME2".to_string(),
23138 },
23139 double_colon_syntax: false,
23140 trailing_comments: Vec::new(),
23141 format: None,
23142 default: None,
23143 })))
23144 }
23145 }
23146 DialectType::DuckDB => {
23147 if has_zone {
23148 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23149 Ok(Expression::Cast(Box::new(Cast {
23150 this,
23151 to: DataType::Timestamp {
23152 timezone: true,
23153 precision: None,
23154 },
23155 double_colon_syntax: false,
23156 trailing_comments: Vec::new(),
23157 format: None,
23158 default: None,
23159 })))
23160 } else {
23161 // DuckDB: CAST(x AS TIMESTAMP)
23162 Ok(Expression::Cast(Box::new(Cast {
23163 this,
23164 to: DataType::Timestamp {
23165 timezone: false,
23166 precision: None,
23167 },
23168 double_colon_syntax: false,
23169 trailing_comments: Vec::new(),
23170 format: None,
23171 default: None,
23172 })))
23173 }
23174 }
23175 DialectType::PostgreSQL
23176 | DialectType::Materialize
23177 | DialectType::RisingWave => {
23178 if has_zone {
23179 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23180 Ok(Expression::Cast(Box::new(Cast {
23181 this,
23182 to: DataType::Timestamp {
23183 timezone: true,
23184 precision: None,
23185 },
23186 double_colon_syntax: false,
23187 trailing_comments: Vec::new(),
23188 format: None,
23189 default: None,
23190 })))
23191 } else {
23192 // PostgreSQL: CAST(x AS TIMESTAMP)
23193 Ok(Expression::Cast(Box::new(Cast {
23194 this,
23195 to: DataType::Timestamp {
23196 timezone: false,
23197 precision: None,
23198 },
23199 double_colon_syntax: false,
23200 trailing_comments: Vec::new(),
23201 format: None,
23202 default: None,
23203 })))
23204 }
23205 }
23206 DialectType::Snowflake => {
23207 if has_zone {
23208 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23209 Ok(Expression::Cast(Box::new(Cast {
23210 this,
23211 to: DataType::Timestamp {
23212 timezone: true,
23213 precision: None,
23214 },
23215 double_colon_syntax: false,
23216 trailing_comments: Vec::new(),
23217 format: None,
23218 default: None,
23219 })))
23220 } else {
23221 // Snowflake: CAST(x AS TIMESTAMP)
23222 Ok(Expression::Cast(Box::new(Cast {
23223 this,
23224 to: DataType::Timestamp {
23225 timezone: false,
23226 precision: None,
23227 },
23228 double_colon_syntax: false,
23229 trailing_comments: Vec::new(),
23230 format: None,
23231 default: None,
23232 })))
23233 }
23234 }
23235 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23236 if has_zone {
23237 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23238 // Check for precision from sub-second digits
23239 let precision =
23240 if let Expression::Literal(Literal::String(ref s)) = this {
23241 if let Some(dot_pos) = s.rfind('.') {
23242 let frac = &s[dot_pos + 1..];
23243 let digit_count = frac
23244 .chars()
23245 .take_while(|c| c.is_ascii_digit())
23246 .count();
23247 if digit_count > 0
23248 && matches!(target, DialectType::Trino)
23249 {
23250 Some(digit_count as u32)
23251 } else {
23252 None
23253 }
23254 } else {
23255 None
23256 }
23257 } else {
23258 None
23259 };
23260 let dt = if let Some(prec) = precision {
23261 DataType::Timestamp {
23262 timezone: true,
23263 precision: Some(prec),
23264 }
23265 } else {
23266 DataType::Timestamp {
23267 timezone: true,
23268 precision: None,
23269 }
23270 };
23271 Ok(Expression::Cast(Box::new(Cast {
23272 this,
23273 to: dt,
23274 double_colon_syntax: false,
23275 trailing_comments: Vec::new(),
23276 format: None,
23277 default: None,
23278 })))
23279 } else {
23280 // Check for sub-second precision for Trino
23281 let precision =
23282 if let Expression::Literal(Literal::String(ref s)) = this {
23283 if let Some(dot_pos) = s.rfind('.') {
23284 let frac = &s[dot_pos + 1..];
23285 let digit_count = frac
23286 .chars()
23287 .take_while(|c| c.is_ascii_digit())
23288 .count();
23289 if digit_count > 0
23290 && matches!(target, DialectType::Trino)
23291 {
23292 Some(digit_count as u32)
23293 } else {
23294 None
23295 }
23296 } else {
23297 None
23298 }
23299 } else {
23300 None
23301 };
23302 let dt = DataType::Timestamp {
23303 timezone: false,
23304 precision,
23305 };
23306 Ok(Expression::Cast(Box::new(Cast {
23307 this,
23308 to: dt,
23309 double_colon_syntax: false,
23310 trailing_comments: Vec::new(),
23311 format: None,
23312 default: None,
23313 })))
23314 }
23315 }
23316 DialectType::Redshift => {
23317 if has_zone {
23318 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23319 Ok(Expression::Cast(Box::new(Cast {
23320 this,
23321 to: DataType::Timestamp {
23322 timezone: true,
23323 precision: None,
23324 },
23325 double_colon_syntax: false,
23326 trailing_comments: Vec::new(),
23327 format: None,
23328 default: None,
23329 })))
23330 } else {
23331 // Redshift: CAST(x AS TIMESTAMP)
23332 Ok(Expression::Cast(Box::new(Cast {
23333 this,
23334 to: DataType::Timestamp {
23335 timezone: false,
23336 precision: None,
23337 },
23338 double_colon_syntax: false,
23339 trailing_comments: Vec::new(),
23340 format: None,
23341 default: None,
23342 })))
23343 }
23344 }
23345 _ => {
23346 // Default: CAST(x AS TIMESTAMP)
23347 Ok(Expression::Cast(Box::new(Cast {
23348 this,
23349 to: DataType::Timestamp {
23350 timezone: false,
23351 precision: None,
23352 },
23353 double_colon_syntax: false,
23354 trailing_comments: Vec::new(),
23355 format: None,
23356 default: None,
23357 })))
23358 }
23359 }
23360 } else {
23361 Ok(e)
23362 }
23363 }
23364
23365 Action::DateToDateStrConvert => {
23366 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23367 if let Expression::Function(f) = e {
23368 let arg = f.args.into_iter().next().unwrap();
23369 let str_type = match target {
23370 DialectType::DuckDB => DataType::Text,
23371 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23372 DataType::Custom {
23373 name: "STRING".to_string(),
23374 }
23375 }
23376 DialectType::Presto
23377 | DialectType::Trino
23378 | DialectType::Athena
23379 | DialectType::Drill => DataType::VarChar {
23380 length: None,
23381 parenthesized_length: false,
23382 },
23383 _ => DataType::VarChar {
23384 length: None,
23385 parenthesized_length: false,
23386 },
23387 };
23388 Ok(Expression::Cast(Box::new(Cast {
23389 this: arg,
23390 to: str_type,
23391 double_colon_syntax: false,
23392 trailing_comments: Vec::new(),
23393 format: None,
23394 default: None,
23395 })))
23396 } else {
23397 Ok(e)
23398 }
23399 }
23400
23401 Action::DateToDiConvert => {
23402 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23403 if let Expression::Function(f) = e {
23404 let arg = f.args.into_iter().next().unwrap();
23405 let inner = match target {
23406 DialectType::DuckDB => {
23407 // STRFTIME(x, '%Y%m%d')
23408 Expression::Function(Box::new(Function::new(
23409 "STRFTIME".to_string(),
23410 vec![arg, Expression::string("%Y%m%d")],
23411 )))
23412 }
23413 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23414 // DATE_FORMAT(x, 'yyyyMMdd')
23415 Expression::Function(Box::new(Function::new(
23416 "DATE_FORMAT".to_string(),
23417 vec![arg, Expression::string("yyyyMMdd")],
23418 )))
23419 }
23420 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23421 // DATE_FORMAT(x, '%Y%m%d')
23422 Expression::Function(Box::new(Function::new(
23423 "DATE_FORMAT".to_string(),
23424 vec![arg, Expression::string("%Y%m%d")],
23425 )))
23426 }
23427 DialectType::Drill => {
23428 // TO_DATE(x, 'yyyyMMdd')
23429 Expression::Function(Box::new(Function::new(
23430 "TO_DATE".to_string(),
23431 vec![arg, Expression::string("yyyyMMdd")],
23432 )))
23433 }
23434 _ => {
23435 // Default: STRFTIME(x, '%Y%m%d')
23436 Expression::Function(Box::new(Function::new(
23437 "STRFTIME".to_string(),
23438 vec![arg, Expression::string("%Y%m%d")],
23439 )))
23440 }
23441 };
23442 // Use INT (not INTEGER) for Presto/Trino
23443 let int_type = match target {
23444 DialectType::Presto
23445 | DialectType::Trino
23446 | DialectType::Athena
23447 | DialectType::TSQL
23448 | DialectType::Fabric
23449 | DialectType::SQLite
23450 | DialectType::Redshift => DataType::Custom {
23451 name: "INT".to_string(),
23452 },
23453 _ => DataType::Int {
23454 length: None,
23455 integer_spelling: false,
23456 },
23457 };
23458 Ok(Expression::Cast(Box::new(Cast {
23459 this: inner,
23460 to: int_type,
23461 double_colon_syntax: false,
23462 trailing_comments: Vec::new(),
23463 format: None,
23464 default: None,
23465 })))
23466 } else {
23467 Ok(e)
23468 }
23469 }
23470
23471 Action::DiToDateConvert => {
23472 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23473 if let Expression::Function(f) = e {
23474 let arg = f.args.into_iter().next().unwrap();
23475 match target {
23476 DialectType::DuckDB => {
23477 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23478 let cast_text = Expression::Cast(Box::new(Cast {
23479 this: arg,
23480 to: DataType::Text,
23481 double_colon_syntax: false,
23482 trailing_comments: Vec::new(),
23483 format: None,
23484 default: None,
23485 }));
23486 let strptime = Expression::Function(Box::new(Function::new(
23487 "STRPTIME".to_string(),
23488 vec![cast_text, Expression::string("%Y%m%d")],
23489 )));
23490 Ok(Expression::Cast(Box::new(Cast {
23491 this: strptime,
23492 to: DataType::Date,
23493 double_colon_syntax: false,
23494 trailing_comments: Vec::new(),
23495 format: None,
23496 default: None,
23497 })))
23498 }
23499 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23500 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23501 let cast_str = Expression::Cast(Box::new(Cast {
23502 this: arg,
23503 to: DataType::Custom {
23504 name: "STRING".to_string(),
23505 },
23506 double_colon_syntax: false,
23507 trailing_comments: Vec::new(),
23508 format: None,
23509 default: None,
23510 }));
23511 Ok(Expression::Function(Box::new(Function::new(
23512 "TO_DATE".to_string(),
23513 vec![cast_str, Expression::string("yyyyMMdd")],
23514 ))))
23515 }
23516 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23517 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23518 let cast_varchar = Expression::Cast(Box::new(Cast {
23519 this: arg,
23520 to: DataType::VarChar {
23521 length: None,
23522 parenthesized_length: false,
23523 },
23524 double_colon_syntax: false,
23525 trailing_comments: Vec::new(),
23526 format: None,
23527 default: None,
23528 }));
23529 let date_parse = Expression::Function(Box::new(Function::new(
23530 "DATE_PARSE".to_string(),
23531 vec![cast_varchar, Expression::string("%Y%m%d")],
23532 )));
23533 Ok(Expression::Cast(Box::new(Cast {
23534 this: date_parse,
23535 to: DataType::Date,
23536 double_colon_syntax: false,
23537 trailing_comments: Vec::new(),
23538 format: None,
23539 default: None,
23540 })))
23541 }
23542 DialectType::Drill => {
23543 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23544 let cast_varchar = Expression::Cast(Box::new(Cast {
23545 this: arg,
23546 to: DataType::VarChar {
23547 length: None,
23548 parenthesized_length: false,
23549 },
23550 double_colon_syntax: false,
23551 trailing_comments: Vec::new(),
23552 format: None,
23553 default: None,
23554 }));
23555 Ok(Expression::Function(Box::new(Function::new(
23556 "TO_DATE".to_string(),
23557 vec![cast_varchar, Expression::string("yyyyMMdd")],
23558 ))))
23559 }
23560 _ => Ok(Expression::Function(Box::new(Function::new(
23561 "DI_TO_DATE".to_string(),
23562 vec![arg],
23563 )))),
23564 }
23565 } else {
23566 Ok(e)
23567 }
23568 }
23569
23570 Action::TsOrDiToDiConvert => {
23571 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23572 if let Expression::Function(f) = e {
23573 let arg = f.args.into_iter().next().unwrap();
23574 let str_type = match target {
23575 DialectType::DuckDB => DataType::Text,
23576 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23577 DataType::Custom {
23578 name: "STRING".to_string(),
23579 }
23580 }
23581 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23582 DataType::VarChar {
23583 length: None,
23584 parenthesized_length: false,
23585 }
23586 }
23587 _ => DataType::VarChar {
23588 length: None,
23589 parenthesized_length: false,
23590 },
23591 };
23592 let cast_str = Expression::Cast(Box::new(Cast {
23593 this: arg,
23594 to: str_type,
23595 double_colon_syntax: false,
23596 trailing_comments: Vec::new(),
23597 format: None,
23598 default: None,
23599 }));
23600 let replace_expr = Expression::Function(Box::new(Function::new(
23601 "REPLACE".to_string(),
23602 vec![cast_str, Expression::string("-"), Expression::string("")],
23603 )));
23604 let substr_name = match target {
23605 DialectType::DuckDB
23606 | DialectType::Hive
23607 | DialectType::Spark
23608 | DialectType::Databricks => "SUBSTR",
23609 _ => "SUBSTR",
23610 };
23611 let substr = Expression::Function(Box::new(Function::new(
23612 substr_name.to_string(),
23613 vec![replace_expr, Expression::number(1), Expression::number(8)],
23614 )));
23615 // Use INT (not INTEGER) for Presto/Trino etc.
23616 let int_type = match target {
23617 DialectType::Presto
23618 | DialectType::Trino
23619 | DialectType::Athena
23620 | DialectType::TSQL
23621 | DialectType::Fabric
23622 | DialectType::SQLite
23623 | DialectType::Redshift => DataType::Custom {
23624 name: "INT".to_string(),
23625 },
23626 _ => DataType::Int {
23627 length: None,
23628 integer_spelling: false,
23629 },
23630 };
23631 Ok(Expression::Cast(Box::new(Cast {
23632 this: substr,
23633 to: int_type,
23634 double_colon_syntax: false,
23635 trailing_comments: Vec::new(),
23636 format: None,
23637 default: None,
23638 })))
23639 } else {
23640 Ok(e)
23641 }
23642 }
23643
23644 Action::UnixToStrConvert => {
23645 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23646 if let Expression::Function(f) = e {
23647 let mut args = f.args;
23648 let this = args.remove(0);
23649 let fmt_expr = if !args.is_empty() {
23650 Some(args.remove(0))
23651 } else {
23652 None
23653 };
23654
23655 // Check if format is a string literal
23656 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23657 if let Expression::Literal(Literal::String(s)) = f {
23658 Some(s.clone())
23659 } else {
23660 None
23661 }
23662 });
23663
23664 if let Some(fmt_string) = fmt_str {
23665 // String literal format -> use UnixToStr expression (generator handles it)
23666 Ok(Expression::UnixToStr(Box::new(
23667 crate::expressions::UnixToStr {
23668 this: Box::new(this),
23669 format: Some(fmt_string),
23670 },
23671 )))
23672 } else if let Some(fmt_e) = fmt_expr {
23673 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23674 match target {
23675 DialectType::DuckDB => {
23676 // STRFTIME(TO_TIMESTAMP(x), y)
23677 let to_ts = Expression::Function(Box::new(Function::new(
23678 "TO_TIMESTAMP".to_string(),
23679 vec![this],
23680 )));
23681 Ok(Expression::Function(Box::new(Function::new(
23682 "STRFTIME".to_string(),
23683 vec![to_ts, fmt_e],
23684 ))))
23685 }
23686 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23687 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23688 let from_unix = Expression::Function(Box::new(Function::new(
23689 "FROM_UNIXTIME".to_string(),
23690 vec![this],
23691 )));
23692 Ok(Expression::Function(Box::new(Function::new(
23693 "DATE_FORMAT".to_string(),
23694 vec![from_unix, fmt_e],
23695 ))))
23696 }
23697 DialectType::Hive
23698 | DialectType::Spark
23699 | DialectType::Databricks
23700 | DialectType::Doris
23701 | DialectType::StarRocks => {
23702 // FROM_UNIXTIME(x, y)
23703 Ok(Expression::Function(Box::new(Function::new(
23704 "FROM_UNIXTIME".to_string(),
23705 vec![this, fmt_e],
23706 ))))
23707 }
23708 _ => {
23709 // Default: keep as UNIX_TO_STR(x, y)
23710 Ok(Expression::Function(Box::new(Function::new(
23711 "UNIX_TO_STR".to_string(),
23712 vec![this, fmt_e],
23713 ))))
23714 }
23715 }
23716 } else {
23717 Ok(Expression::UnixToStr(Box::new(
23718 crate::expressions::UnixToStr {
23719 this: Box::new(this),
23720 format: None,
23721 },
23722 )))
23723 }
23724 } else {
23725 Ok(e)
23726 }
23727 }
23728
23729 Action::UnixToTimeConvert => {
23730 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
23731 if let Expression::Function(f) = e {
23732 let arg = f.args.into_iter().next().unwrap();
23733 Ok(Expression::UnixToTime(Box::new(
23734 crate::expressions::UnixToTime {
23735 this: Box::new(arg),
23736 scale: None,
23737 zone: None,
23738 hours: None,
23739 minutes: None,
23740 format: None,
23741 target_type: None,
23742 },
23743 )))
23744 } else {
23745 Ok(e)
23746 }
23747 }
23748
23749 Action::UnixToTimeStrConvert => {
23750 // UNIX_TO_TIME_STR(x) -> dialect-specific
23751 if let Expression::Function(f) = e {
23752 let arg = f.args.into_iter().next().unwrap();
23753 match target {
23754 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23755 // FROM_UNIXTIME(x)
23756 Ok(Expression::Function(Box::new(Function::new(
23757 "FROM_UNIXTIME".to_string(),
23758 vec![arg],
23759 ))))
23760 }
23761 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23762 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
23763 let from_unix = Expression::Function(Box::new(Function::new(
23764 "FROM_UNIXTIME".to_string(),
23765 vec![arg],
23766 )));
23767 Ok(Expression::Cast(Box::new(Cast {
23768 this: from_unix,
23769 to: DataType::VarChar {
23770 length: None,
23771 parenthesized_length: false,
23772 },
23773 double_colon_syntax: false,
23774 trailing_comments: Vec::new(),
23775 format: None,
23776 default: None,
23777 })))
23778 }
23779 DialectType::DuckDB => {
23780 // CAST(TO_TIMESTAMP(x) AS TEXT)
23781 let to_ts = Expression::Function(Box::new(Function::new(
23782 "TO_TIMESTAMP".to_string(),
23783 vec![arg],
23784 )));
23785 Ok(Expression::Cast(Box::new(Cast {
23786 this: to_ts,
23787 to: DataType::Text,
23788 double_colon_syntax: false,
23789 trailing_comments: Vec::new(),
23790 format: None,
23791 default: None,
23792 })))
23793 }
23794 _ => Ok(Expression::Function(Box::new(Function::new(
23795 "UNIX_TO_TIME_STR".to_string(),
23796 vec![arg],
23797 )))),
23798 }
23799 } else {
23800 Ok(e)
23801 }
23802 }
23803
23804 Action::TimeToUnixConvert => {
23805 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
23806 if let Expression::Function(f) = e {
23807 let arg = f.args.into_iter().next().unwrap();
23808 Ok(Expression::TimeToUnix(Box::new(
23809 crate::expressions::UnaryFunc {
23810 this: arg,
23811 original_name: None,
23812 },
23813 )))
23814 } else {
23815 Ok(e)
23816 }
23817 }
23818
23819 Action::TimeToStrConvert => {
23820 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
23821 if let Expression::Function(f) = e {
23822 let mut args = f.args;
23823 let this = args.remove(0);
23824 let fmt = match args.remove(0) {
23825 Expression::Literal(Literal::String(s)) => s,
23826 other => {
23827 return Ok(Expression::Function(Box::new(Function::new(
23828 "TIME_TO_STR".to_string(),
23829 vec![this, other],
23830 ))));
23831 }
23832 };
23833 Ok(Expression::TimeToStr(Box::new(
23834 crate::expressions::TimeToStr {
23835 this: Box::new(this),
23836 format: fmt,
23837 culture: None,
23838 zone: None,
23839 },
23840 )))
23841 } else {
23842 Ok(e)
23843 }
23844 }
23845
23846 Action::StrToUnixConvert => {
23847 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
23848 if let Expression::Function(f) = e {
23849 let mut args = f.args;
23850 let this = args.remove(0);
23851 let fmt = match args.remove(0) {
23852 Expression::Literal(Literal::String(s)) => s,
23853 other => {
23854 return Ok(Expression::Function(Box::new(Function::new(
23855 "STR_TO_UNIX".to_string(),
23856 vec![this, other],
23857 ))));
23858 }
23859 };
23860 Ok(Expression::StrToUnix(Box::new(
23861 crate::expressions::StrToUnix {
23862 this: Some(Box::new(this)),
23863 format: Some(fmt),
23864 },
23865 )))
23866 } else {
23867 Ok(e)
23868 }
23869 }
23870
23871 Action::TimeStrToUnixConvert => {
23872 // TIME_STR_TO_UNIX(x) -> dialect-specific
23873 if let Expression::Function(f) = e {
23874 let arg = f.args.into_iter().next().unwrap();
23875 match target {
23876 DialectType::DuckDB => {
23877 // EPOCH(CAST(x AS TIMESTAMP))
23878 let cast_ts = Expression::Cast(Box::new(Cast {
23879 this: arg,
23880 to: DataType::Timestamp {
23881 timezone: false,
23882 precision: None,
23883 },
23884 double_colon_syntax: false,
23885 trailing_comments: Vec::new(),
23886 format: None,
23887 default: None,
23888 }));
23889 Ok(Expression::Function(Box::new(Function::new(
23890 "EPOCH".to_string(),
23891 vec![cast_ts],
23892 ))))
23893 }
23894 DialectType::Hive
23895 | DialectType::Doris
23896 | DialectType::StarRocks
23897 | DialectType::MySQL => {
23898 // UNIX_TIMESTAMP(x)
23899 Ok(Expression::Function(Box::new(Function::new(
23900 "UNIX_TIMESTAMP".to_string(),
23901 vec![arg],
23902 ))))
23903 }
23904 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23905 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
23906 let date_parse = Expression::Function(Box::new(Function::new(
23907 "DATE_PARSE".to_string(),
23908 vec![arg, Expression::string("%Y-%m-%d %T")],
23909 )));
23910 Ok(Expression::Function(Box::new(Function::new(
23911 "TO_UNIXTIME".to_string(),
23912 vec![date_parse],
23913 ))))
23914 }
23915 _ => Ok(Expression::Function(Box::new(Function::new(
23916 "TIME_STR_TO_UNIX".to_string(),
23917 vec![arg],
23918 )))),
23919 }
23920 } else {
23921 Ok(e)
23922 }
23923 }
23924
23925 Action::TimeToTimeStrConvert => {
23926 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
23927 if let Expression::Function(f) = e {
23928 let arg = f.args.into_iter().next().unwrap();
23929 let str_type = match target {
23930 DialectType::DuckDB => DataType::Text,
23931 DialectType::Hive
23932 | DialectType::Spark
23933 | DialectType::Databricks
23934 | DialectType::Doris
23935 | DialectType::StarRocks => DataType::Custom {
23936 name: "STRING".to_string(),
23937 },
23938 DialectType::Redshift => DataType::Custom {
23939 name: "VARCHAR(MAX)".to_string(),
23940 },
23941 _ => DataType::VarChar {
23942 length: None,
23943 parenthesized_length: false,
23944 },
23945 };
23946 Ok(Expression::Cast(Box::new(Cast {
23947 this: arg,
23948 to: str_type,
23949 double_colon_syntax: false,
23950 trailing_comments: Vec::new(),
23951 format: None,
23952 default: None,
23953 })))
23954 } else {
23955 Ok(e)
23956 }
23957 }
23958
23959 Action::DateTruncSwapArgs => {
23960 // DATE_TRUNC('unit', x) from Generic -> target-specific
23961 if let Expression::Function(f) = e {
23962 if f.args.len() == 2 {
23963 let unit_arg = f.args[0].clone();
23964 let expr_arg = f.args[1].clone();
23965 // Extract unit string from the first arg
23966 let unit_str = match &unit_arg {
23967 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23968 _ => return Ok(Expression::Function(f)),
23969 };
23970 match target {
23971 DialectType::BigQuery => {
23972 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
23973 let unit_ident =
23974 Expression::Column(crate::expressions::Column {
23975 name: crate::expressions::Identifier::new(unit_str),
23976 table: None,
23977 join_mark: false,
23978 trailing_comments: Vec::new(),
23979 span: None,
23980 });
23981 Ok(Expression::Function(Box::new(Function::new(
23982 "DATE_TRUNC".to_string(),
23983 vec![expr_arg, unit_ident],
23984 ))))
23985 }
23986 DialectType::Doris => {
23987 // Doris: DATE_TRUNC(x, 'UNIT')
23988 Ok(Expression::Function(Box::new(Function::new(
23989 "DATE_TRUNC".to_string(),
23990 vec![expr_arg, Expression::string(&unit_str)],
23991 ))))
23992 }
23993 DialectType::StarRocks => {
23994 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
23995 Ok(Expression::Function(Box::new(Function::new(
23996 "DATE_TRUNC".to_string(),
23997 vec![Expression::string(&unit_str), expr_arg],
23998 ))))
23999 }
24000 DialectType::Spark | DialectType::Databricks => {
24001 // Spark: TRUNC(x, 'UNIT')
24002 Ok(Expression::Function(Box::new(Function::new(
24003 "TRUNC".to_string(),
24004 vec![expr_arg, Expression::string(&unit_str)],
24005 ))))
24006 }
24007 DialectType::MySQL => {
24008 // MySQL: complex expansion based on unit
24009 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
24010 }
24011 _ => Ok(Expression::Function(f)),
24012 }
24013 } else {
24014 Ok(Expression::Function(f))
24015 }
24016 } else {
24017 Ok(e)
24018 }
24019 }
24020
24021 Action::TimestampTruncConvert => {
24022 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
24023 if let Expression::Function(f) = e {
24024 if f.args.len() >= 2 {
24025 let expr_arg = f.args[0].clone();
24026 let unit_arg = f.args[1].clone();
24027 let tz_arg = if f.args.len() >= 3 {
24028 Some(f.args[2].clone())
24029 } else {
24030 None
24031 };
24032 // Extract unit string
24033 let unit_str = match &unit_arg {
24034 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24035 Expression::Column(c) => c.name.name.to_uppercase(),
24036 _ => {
24037 return Ok(Expression::Function(f));
24038 }
24039 };
24040 match target {
24041 DialectType::Spark | DialectType::Databricks => {
24042 // Spark: DATE_TRUNC('UNIT', x)
24043 Ok(Expression::Function(Box::new(Function::new(
24044 "DATE_TRUNC".to_string(),
24045 vec![Expression::string(&unit_str), expr_arg],
24046 ))))
24047 }
24048 DialectType::Doris | DialectType::StarRocks => {
24049 // Doris: DATE_TRUNC(x, 'UNIT')
24050 Ok(Expression::Function(Box::new(Function::new(
24051 "DATE_TRUNC".to_string(),
24052 vec![expr_arg, Expression::string(&unit_str)],
24053 ))))
24054 }
24055 DialectType::BigQuery => {
24056 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
24057 let unit_ident =
24058 Expression::Column(crate::expressions::Column {
24059 name: crate::expressions::Identifier::new(unit_str),
24060 table: None,
24061 join_mark: false,
24062 trailing_comments: Vec::new(),
24063 span: None,
24064 });
24065 let mut args = vec![expr_arg, unit_ident];
24066 if let Some(tz) = tz_arg {
24067 args.push(tz);
24068 }
24069 Ok(Expression::Function(Box::new(Function::new(
24070 "TIMESTAMP_TRUNC".to_string(),
24071 args,
24072 ))))
24073 }
24074 DialectType::DuckDB => {
24075 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
24076 if let Some(tz) = tz_arg {
24077 let tz_str = match &tz {
24078 Expression::Literal(Literal::String(s)) => s.clone(),
24079 _ => "UTC".to_string(),
24080 };
24081 // x AT TIME ZONE 'tz'
24082 let at_tz = Expression::AtTimeZone(Box::new(
24083 crate::expressions::AtTimeZone {
24084 this: expr_arg,
24085 zone: Expression::string(&tz_str),
24086 },
24087 ));
24088 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
24089 let trunc = Expression::Function(Box::new(Function::new(
24090 "DATE_TRUNC".to_string(),
24091 vec![Expression::string(&unit_str), at_tz],
24092 )));
24093 // DATE_TRUNC(...) AT TIME ZONE 'tz'
24094 Ok(Expression::AtTimeZone(Box::new(
24095 crate::expressions::AtTimeZone {
24096 this: trunc,
24097 zone: Expression::string(&tz_str),
24098 },
24099 )))
24100 } else {
24101 Ok(Expression::Function(Box::new(Function::new(
24102 "DATE_TRUNC".to_string(),
24103 vec![Expression::string(&unit_str), expr_arg],
24104 ))))
24105 }
24106 }
24107 DialectType::Presto
24108 | DialectType::Trino
24109 | DialectType::Athena
24110 | DialectType::Snowflake => {
24111 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
24112 Ok(Expression::Function(Box::new(Function::new(
24113 "DATE_TRUNC".to_string(),
24114 vec![Expression::string(&unit_str), expr_arg],
24115 ))))
24116 }
24117 _ => {
24118 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
24119 let mut args = vec![Expression::string(&unit_str), expr_arg];
24120 if let Some(tz) = tz_arg {
24121 args.push(tz);
24122 }
24123 Ok(Expression::Function(Box::new(Function::new(
24124 "DATE_TRUNC".to_string(),
24125 args,
24126 ))))
24127 }
24128 }
24129 } else {
24130 Ok(Expression::Function(f))
24131 }
24132 } else {
24133 Ok(e)
24134 }
24135 }
24136
24137 Action::StrToDateConvert => {
24138 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
24139 if let Expression::Function(f) = e {
24140 if f.args.len() == 2 {
24141 let mut args = f.args;
24142 let this = args.remove(0);
24143 let fmt_expr = args.remove(0);
24144 let fmt_str = match &fmt_expr {
24145 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24146 _ => None,
24147 };
24148 let default_date = "%Y-%m-%d";
24149 let default_time = "%Y-%m-%d %H:%M:%S";
24150 let is_default = fmt_str
24151 .as_ref()
24152 .map_or(false, |f| f == default_date || f == default_time);
24153
24154 if is_default {
24155 // Default format: handle per-dialect
24156 match target {
24157 DialectType::MySQL
24158 | DialectType::Doris
24159 | DialectType::StarRocks => {
24160 // Keep STR_TO_DATE(x, fmt) as-is
24161 Ok(Expression::Function(Box::new(Function::new(
24162 "STR_TO_DATE".to_string(),
24163 vec![this, fmt_expr],
24164 ))))
24165 }
24166 DialectType::Hive => {
24167 // Hive: CAST(x AS DATE)
24168 Ok(Expression::Cast(Box::new(Cast {
24169 this,
24170 to: DataType::Date,
24171 double_colon_syntax: false,
24172 trailing_comments: Vec::new(),
24173 format: None,
24174 default: None,
24175 })))
24176 }
24177 DialectType::Presto
24178 | DialectType::Trino
24179 | DialectType::Athena => {
24180 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24181 let date_parse =
24182 Expression::Function(Box::new(Function::new(
24183 "DATE_PARSE".to_string(),
24184 vec![this, fmt_expr],
24185 )));
24186 Ok(Expression::Cast(Box::new(Cast {
24187 this: date_parse,
24188 to: DataType::Date,
24189 double_colon_syntax: false,
24190 trailing_comments: Vec::new(),
24191 format: None,
24192 default: None,
24193 })))
24194 }
24195 _ => {
24196 // Others: TsOrDsToDate (delegates to generator)
24197 Ok(Expression::TsOrDsToDate(Box::new(
24198 crate::expressions::TsOrDsToDate {
24199 this: Box::new(this),
24200 format: None,
24201 safe: None,
24202 },
24203 )))
24204 }
24205 }
24206 } else if let Some(fmt) = fmt_str {
24207 match target {
24208 DialectType::Doris
24209 | DialectType::StarRocks
24210 | DialectType::MySQL => {
24211 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24212 let mut normalized = fmt.clone();
24213 normalized = normalized.replace("%-d", "%e");
24214 normalized = normalized.replace("%-m", "%c");
24215 normalized = normalized.replace("%H:%M:%S", "%T");
24216 Ok(Expression::Function(Box::new(Function::new(
24217 "STR_TO_DATE".to_string(),
24218 vec![this, Expression::string(&normalized)],
24219 ))))
24220 }
24221 DialectType::Hive => {
24222 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24223 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24224 let unix_ts =
24225 Expression::Function(Box::new(Function::new(
24226 "UNIX_TIMESTAMP".to_string(),
24227 vec![this, Expression::string(&java_fmt)],
24228 )));
24229 let from_unix =
24230 Expression::Function(Box::new(Function::new(
24231 "FROM_UNIXTIME".to_string(),
24232 vec![unix_ts],
24233 )));
24234 Ok(Expression::Cast(Box::new(Cast {
24235 this: from_unix,
24236 to: DataType::Date,
24237 double_colon_syntax: false,
24238 trailing_comments: Vec::new(),
24239 format: None,
24240 default: None,
24241 })))
24242 }
24243 DialectType::Spark | DialectType::Databricks => {
24244 // Spark: TO_DATE(x, java_fmt)
24245 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24246 Ok(Expression::Function(Box::new(Function::new(
24247 "TO_DATE".to_string(),
24248 vec![this, Expression::string(&java_fmt)],
24249 ))))
24250 }
24251 DialectType::Drill => {
24252 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24253 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24254 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24255 let java_fmt = java_fmt.replace('T', "'T'");
24256 Ok(Expression::Function(Box::new(Function::new(
24257 "TO_DATE".to_string(),
24258 vec![this, Expression::string(&java_fmt)],
24259 ))))
24260 }
24261 _ => {
24262 // For other dialects: use TsOrDsToDate which delegates to generator
24263 Ok(Expression::TsOrDsToDate(Box::new(
24264 crate::expressions::TsOrDsToDate {
24265 this: Box::new(this),
24266 format: Some(fmt),
24267 safe: None,
24268 },
24269 )))
24270 }
24271 }
24272 } else {
24273 // Non-string format - keep as-is
24274 let mut new_args = Vec::new();
24275 new_args.push(this);
24276 new_args.push(fmt_expr);
24277 Ok(Expression::Function(Box::new(Function::new(
24278 "STR_TO_DATE".to_string(),
24279 new_args,
24280 ))))
24281 }
24282 } else {
24283 Ok(Expression::Function(f))
24284 }
24285 } else {
24286 Ok(e)
24287 }
24288 }
24289
24290 Action::TsOrDsAddConvert => {
24291 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24292 if let Expression::Function(f) = e {
24293 if f.args.len() == 3 {
24294 let mut args = f.args;
24295 let x = args.remove(0);
24296 let n = args.remove(0);
24297 let unit_expr = args.remove(0);
24298 let unit_str = match &unit_expr {
24299 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24300 _ => "DAY".to_string(),
24301 };
24302
24303 match target {
24304 DialectType::Hive
24305 | DialectType::Spark
24306 | DialectType::Databricks => {
24307 // DATE_ADD(x, n) - only supports DAY unit
24308 Ok(Expression::Function(Box::new(Function::new(
24309 "DATE_ADD".to_string(),
24310 vec![x, n],
24311 ))))
24312 }
24313 DialectType::MySQL => {
24314 // DATE_ADD(x, INTERVAL n UNIT)
24315 let iu = match unit_str.to_uppercase().as_str() {
24316 "YEAR" => crate::expressions::IntervalUnit::Year,
24317 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24318 "MONTH" => crate::expressions::IntervalUnit::Month,
24319 "WEEK" => crate::expressions::IntervalUnit::Week,
24320 "HOUR" => crate::expressions::IntervalUnit::Hour,
24321 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24322 "SECOND" => crate::expressions::IntervalUnit::Second,
24323 _ => crate::expressions::IntervalUnit::Day,
24324 };
24325 let interval = Expression::Interval(Box::new(
24326 crate::expressions::Interval {
24327 this: Some(n),
24328 unit: Some(
24329 crate::expressions::IntervalUnitSpec::Simple {
24330 unit: iu,
24331 use_plural: false,
24332 },
24333 ),
24334 },
24335 ));
24336 Ok(Expression::Function(Box::new(Function::new(
24337 "DATE_ADD".to_string(),
24338 vec![x, interval],
24339 ))))
24340 }
24341 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24342 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24343 let cast_ts = Expression::Cast(Box::new(Cast {
24344 this: x,
24345 to: DataType::Timestamp {
24346 precision: None,
24347 timezone: false,
24348 },
24349 double_colon_syntax: false,
24350 trailing_comments: Vec::new(),
24351 format: None,
24352 default: None,
24353 }));
24354 let cast_date = Expression::Cast(Box::new(Cast {
24355 this: cast_ts,
24356 to: DataType::Date,
24357 double_colon_syntax: false,
24358 trailing_comments: Vec::new(),
24359 format: None,
24360 default: None,
24361 }));
24362 Ok(Expression::Function(Box::new(Function::new(
24363 "DATE_ADD".to_string(),
24364 vec![Expression::string(&unit_str), n, cast_date],
24365 ))))
24366 }
24367 DialectType::DuckDB => {
24368 // CAST(x AS DATE) + INTERVAL n UNIT
24369 let cast_date = Expression::Cast(Box::new(Cast {
24370 this: x,
24371 to: DataType::Date,
24372 double_colon_syntax: false,
24373 trailing_comments: Vec::new(),
24374 format: None,
24375 default: None,
24376 }));
24377 let iu = match unit_str.to_uppercase().as_str() {
24378 "YEAR" => crate::expressions::IntervalUnit::Year,
24379 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24380 "MONTH" => crate::expressions::IntervalUnit::Month,
24381 "WEEK" => crate::expressions::IntervalUnit::Week,
24382 "HOUR" => crate::expressions::IntervalUnit::Hour,
24383 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24384 "SECOND" => crate::expressions::IntervalUnit::Second,
24385 _ => crate::expressions::IntervalUnit::Day,
24386 };
24387 let interval = Expression::Interval(Box::new(
24388 crate::expressions::Interval {
24389 this: Some(n),
24390 unit: Some(
24391 crate::expressions::IntervalUnitSpec::Simple {
24392 unit: iu,
24393 use_plural: false,
24394 },
24395 ),
24396 },
24397 ));
24398 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24399 left: cast_date,
24400 right: interval,
24401 left_comments: Vec::new(),
24402 operator_comments: Vec::new(),
24403 trailing_comments: Vec::new(),
24404 })))
24405 }
24406 DialectType::Drill => {
24407 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24408 let cast_date = Expression::Cast(Box::new(Cast {
24409 this: x,
24410 to: DataType::Date,
24411 double_colon_syntax: false,
24412 trailing_comments: Vec::new(),
24413 format: None,
24414 default: None,
24415 }));
24416 let iu = match unit_str.to_uppercase().as_str() {
24417 "YEAR" => crate::expressions::IntervalUnit::Year,
24418 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24419 "MONTH" => crate::expressions::IntervalUnit::Month,
24420 "WEEK" => crate::expressions::IntervalUnit::Week,
24421 "HOUR" => crate::expressions::IntervalUnit::Hour,
24422 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24423 "SECOND" => crate::expressions::IntervalUnit::Second,
24424 _ => crate::expressions::IntervalUnit::Day,
24425 };
24426 let interval = Expression::Interval(Box::new(
24427 crate::expressions::Interval {
24428 this: Some(n),
24429 unit: Some(
24430 crate::expressions::IntervalUnitSpec::Simple {
24431 unit: iu,
24432 use_plural: false,
24433 },
24434 ),
24435 },
24436 ));
24437 Ok(Expression::Function(Box::new(Function::new(
24438 "DATE_ADD".to_string(),
24439 vec![cast_date, interval],
24440 ))))
24441 }
24442 _ => {
24443 // Default: keep as TS_OR_DS_ADD
24444 Ok(Expression::Function(Box::new(Function::new(
24445 "TS_OR_DS_ADD".to_string(),
24446 vec![x, n, unit_expr],
24447 ))))
24448 }
24449 }
24450 } else {
24451 Ok(Expression::Function(f))
24452 }
24453 } else {
24454 Ok(e)
24455 }
24456 }
24457
24458 Action::DateFromUnixDateConvert => {
24459 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24460 if let Expression::Function(f) = e {
24461 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24462 if matches!(
24463 target,
24464 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24465 ) {
24466 return Ok(Expression::Function(Box::new(Function::new(
24467 "DATE_FROM_UNIX_DATE".to_string(),
24468 f.args,
24469 ))));
24470 }
24471 let n = f.args.into_iter().next().unwrap();
24472 let epoch_date = Expression::Cast(Box::new(Cast {
24473 this: Expression::string("1970-01-01"),
24474 to: DataType::Date,
24475 double_colon_syntax: false,
24476 trailing_comments: Vec::new(),
24477 format: None,
24478 default: None,
24479 }));
24480 match target {
24481 DialectType::DuckDB => {
24482 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24483 let interval =
24484 Expression::Interval(Box::new(crate::expressions::Interval {
24485 this: Some(n),
24486 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24487 unit: crate::expressions::IntervalUnit::Day,
24488 use_plural: false,
24489 }),
24490 }));
24491 Ok(Expression::Add(Box::new(
24492 crate::expressions::BinaryOp::new(epoch_date, interval),
24493 )))
24494 }
24495 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24496 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24497 Ok(Expression::Function(Box::new(Function::new(
24498 "DATE_ADD".to_string(),
24499 vec![Expression::string("DAY"), n, epoch_date],
24500 ))))
24501 }
24502 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24503 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24504 Ok(Expression::Function(Box::new(Function::new(
24505 "DATEADD".to_string(),
24506 vec![
24507 Expression::Identifier(Identifier::new("DAY")),
24508 n,
24509 epoch_date,
24510 ],
24511 ))))
24512 }
24513 DialectType::BigQuery => {
24514 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24515 let interval =
24516 Expression::Interval(Box::new(crate::expressions::Interval {
24517 this: Some(n),
24518 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24519 unit: crate::expressions::IntervalUnit::Day,
24520 use_plural: false,
24521 }),
24522 }));
24523 Ok(Expression::Function(Box::new(Function::new(
24524 "DATE_ADD".to_string(),
24525 vec![epoch_date, interval],
24526 ))))
24527 }
24528 DialectType::MySQL
24529 | DialectType::Doris
24530 | DialectType::StarRocks
24531 | DialectType::Drill => {
24532 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24533 let interval =
24534 Expression::Interval(Box::new(crate::expressions::Interval {
24535 this: Some(n),
24536 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24537 unit: crate::expressions::IntervalUnit::Day,
24538 use_plural: false,
24539 }),
24540 }));
24541 Ok(Expression::Function(Box::new(Function::new(
24542 "DATE_ADD".to_string(),
24543 vec![epoch_date, interval],
24544 ))))
24545 }
24546 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24547 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24548 Ok(Expression::Function(Box::new(Function::new(
24549 "DATE_ADD".to_string(),
24550 vec![epoch_date, n],
24551 ))))
24552 }
24553 DialectType::PostgreSQL
24554 | DialectType::Materialize
24555 | DialectType::RisingWave => {
24556 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24557 let n_str = match &n {
24558 Expression::Literal(Literal::Number(s)) => s.clone(),
24559 _ => Self::expr_to_string_static(&n),
24560 };
24561 let interval =
24562 Expression::Interval(Box::new(crate::expressions::Interval {
24563 this: Some(Expression::string(&format!("{} DAY", n_str))),
24564 unit: None,
24565 }));
24566 Ok(Expression::Add(Box::new(
24567 crate::expressions::BinaryOp::new(epoch_date, interval),
24568 )))
24569 }
24570 _ => {
24571 // Default: keep as-is
24572 Ok(Expression::Function(Box::new(Function::new(
24573 "DATE_FROM_UNIX_DATE".to_string(),
24574 vec![n],
24575 ))))
24576 }
24577 }
24578 } else {
24579 Ok(e)
24580 }
24581 }
24582
24583 Action::ArrayRemoveConvert => {
24584 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24585 if let Expression::ArrayRemove(bf) = e {
24586 let arr = bf.this;
24587 let target_val = bf.expression;
24588 match target {
24589 DialectType::DuckDB => {
24590 let u_id = crate::expressions::Identifier::new("_u");
24591 let lambda =
24592 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24593 parameters: vec![u_id.clone()],
24594 body: Expression::Neq(Box::new(BinaryOp {
24595 left: Expression::Identifier(u_id),
24596 right: target_val,
24597 left_comments: Vec::new(),
24598 operator_comments: Vec::new(),
24599 trailing_comments: Vec::new(),
24600 })),
24601 colon: false,
24602 parameter_types: Vec::new(),
24603 }));
24604 Ok(Expression::Function(Box::new(Function::new(
24605 "LIST_FILTER".to_string(),
24606 vec![arr, lambda],
24607 ))))
24608 }
24609 DialectType::ClickHouse => {
24610 let u_id = crate::expressions::Identifier::new("_u");
24611 let lambda =
24612 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24613 parameters: vec![u_id.clone()],
24614 body: Expression::Neq(Box::new(BinaryOp {
24615 left: Expression::Identifier(u_id),
24616 right: target_val,
24617 left_comments: Vec::new(),
24618 operator_comments: Vec::new(),
24619 trailing_comments: Vec::new(),
24620 })),
24621 colon: false,
24622 parameter_types: Vec::new(),
24623 }));
24624 Ok(Expression::Function(Box::new(Function::new(
24625 "arrayFilter".to_string(),
24626 vec![lambda, arr],
24627 ))))
24628 }
24629 DialectType::BigQuery => {
24630 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24631 let u_id = crate::expressions::Identifier::new("_u");
24632 let u_col = Expression::Column(crate::expressions::Column {
24633 name: u_id.clone(),
24634 table: None,
24635 join_mark: false,
24636 trailing_comments: Vec::new(),
24637 span: None,
24638 });
24639 let unnest_expr =
24640 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24641 this: arr,
24642 expressions: Vec::new(),
24643 with_ordinality: false,
24644 alias: None,
24645 offset_alias: None,
24646 }));
24647 let aliased_unnest =
24648 Expression::Alias(Box::new(crate::expressions::Alias {
24649 this: unnest_expr,
24650 alias: u_id.clone(),
24651 column_aliases: Vec::new(),
24652 pre_alias_comments: Vec::new(),
24653 trailing_comments: Vec::new(),
24654 }));
24655 let where_cond = Expression::Neq(Box::new(BinaryOp {
24656 left: u_col.clone(),
24657 right: target_val,
24658 left_comments: Vec::new(),
24659 operator_comments: Vec::new(),
24660 trailing_comments: Vec::new(),
24661 }));
24662 let subquery = Expression::Select(Box::new(
24663 crate::expressions::Select::new()
24664 .column(u_col)
24665 .from(aliased_unnest)
24666 .where_(where_cond),
24667 ));
24668 Ok(Expression::ArrayFunc(Box::new(
24669 crate::expressions::ArrayConstructor {
24670 expressions: vec![subquery],
24671 bracket_notation: false,
24672 use_list_keyword: false,
24673 },
24674 )))
24675 }
24676 _ => Ok(Expression::ArrayRemove(Box::new(
24677 crate::expressions::BinaryFunc {
24678 original_name: None,
24679 this: arr,
24680 expression: target_val,
24681 },
24682 ))),
24683 }
24684 } else {
24685 Ok(e)
24686 }
24687 }
24688
24689 Action::ArrayReverseConvert => {
24690 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
24691 if let Expression::ArrayReverse(af) = e {
24692 Ok(Expression::Function(Box::new(Function::new(
24693 "arrayReverse".to_string(),
24694 vec![af.this],
24695 ))))
24696 } else {
24697 Ok(e)
24698 }
24699 }
24700
24701 Action::JsonKeysConvert => {
24702 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
24703 if let Expression::JsonKeys(uf) = e {
24704 match target {
24705 DialectType::Spark | DialectType::Databricks => {
24706 Ok(Expression::Function(Box::new(Function::new(
24707 "JSON_OBJECT_KEYS".to_string(),
24708 vec![uf.this],
24709 ))))
24710 }
24711 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24712 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
24713 ))),
24714 _ => Ok(Expression::JsonKeys(uf)),
24715 }
24716 } else {
24717 Ok(e)
24718 }
24719 }
24720
24721 Action::ParseJsonStrip => {
24722 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
24723 if let Expression::ParseJson(uf) = e {
24724 Ok(uf.this)
24725 } else {
24726 Ok(e)
24727 }
24728 }
24729
24730 Action::ArraySizeDrill => {
24731 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
24732 if let Expression::ArraySize(uf) = e {
24733 Ok(Expression::Function(Box::new(Function::new(
24734 "REPEATED_COUNT".to_string(),
24735 vec![uf.this],
24736 ))))
24737 } else {
24738 Ok(e)
24739 }
24740 }
24741
24742 Action::WeekOfYearToWeekIso => {
24743 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
24744 if let Expression::WeekOfYear(uf) = e {
24745 Ok(Expression::Function(Box::new(Function::new(
24746 "WEEKISO".to_string(),
24747 vec![uf.this],
24748 ))))
24749 } else {
24750 Ok(e)
24751 }
24752 }
24753 }
24754 })
24755 }
24756
24757 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
24758 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
24759 use crate::expressions::Function;
24760 match unit {
24761 "DAY" => {
24762 // DATE(x)
24763 Ok(Expression::Function(Box::new(Function::new(
24764 "DATE".to_string(),
24765 vec![expr.clone()],
24766 ))))
24767 }
24768 "WEEK" => {
24769 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
24770 let year_x = Expression::Function(Box::new(Function::new(
24771 "YEAR".to_string(),
24772 vec![expr.clone()],
24773 )));
24774 let week_x = Expression::Function(Box::new(Function::new(
24775 "WEEK".to_string(),
24776 vec![expr.clone(), Expression::number(1)],
24777 )));
24778 let concat_args = vec![
24779 year_x,
24780 Expression::string(" "),
24781 week_x,
24782 Expression::string(" 1"),
24783 ];
24784 let concat = Expression::Function(Box::new(Function::new(
24785 "CONCAT".to_string(),
24786 concat_args,
24787 )));
24788 Ok(Expression::Function(Box::new(Function::new(
24789 "STR_TO_DATE".to_string(),
24790 vec![concat, Expression::string("%Y %u %w")],
24791 ))))
24792 }
24793 "MONTH" => {
24794 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
24795 let year_x = Expression::Function(Box::new(Function::new(
24796 "YEAR".to_string(),
24797 vec![expr.clone()],
24798 )));
24799 let month_x = Expression::Function(Box::new(Function::new(
24800 "MONTH".to_string(),
24801 vec![expr.clone()],
24802 )));
24803 let concat_args = vec![
24804 year_x,
24805 Expression::string(" "),
24806 month_x,
24807 Expression::string(" 1"),
24808 ];
24809 let concat = Expression::Function(Box::new(Function::new(
24810 "CONCAT".to_string(),
24811 concat_args,
24812 )));
24813 Ok(Expression::Function(Box::new(Function::new(
24814 "STR_TO_DATE".to_string(),
24815 vec![concat, Expression::string("%Y %c %e")],
24816 ))))
24817 }
24818 "QUARTER" => {
24819 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
24820 let year_x = Expression::Function(Box::new(Function::new(
24821 "YEAR".to_string(),
24822 vec![expr.clone()],
24823 )));
24824 let quarter_x = Expression::Function(Box::new(Function::new(
24825 "QUARTER".to_string(),
24826 vec![expr.clone()],
24827 )));
24828 // QUARTER(x) * 3 - 2
24829 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
24830 left: quarter_x,
24831 right: Expression::number(3),
24832 left_comments: Vec::new(),
24833 operator_comments: Vec::new(),
24834 trailing_comments: Vec::new(),
24835 }));
24836 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
24837 left: mul,
24838 right: Expression::number(2),
24839 left_comments: Vec::new(),
24840 operator_comments: Vec::new(),
24841 trailing_comments: Vec::new(),
24842 }));
24843 let concat_args = vec![
24844 year_x,
24845 Expression::string(" "),
24846 sub,
24847 Expression::string(" 1"),
24848 ];
24849 let concat = Expression::Function(Box::new(Function::new(
24850 "CONCAT".to_string(),
24851 concat_args,
24852 )));
24853 Ok(Expression::Function(Box::new(Function::new(
24854 "STR_TO_DATE".to_string(),
24855 vec![concat, Expression::string("%Y %c %e")],
24856 ))))
24857 }
24858 "YEAR" => {
24859 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
24860 let year_x = Expression::Function(Box::new(Function::new(
24861 "YEAR".to_string(),
24862 vec![expr.clone()],
24863 )));
24864 let concat_args = vec![year_x, Expression::string(" 1 1")];
24865 let concat = Expression::Function(Box::new(Function::new(
24866 "CONCAT".to_string(),
24867 concat_args,
24868 )));
24869 Ok(Expression::Function(Box::new(Function::new(
24870 "STR_TO_DATE".to_string(),
24871 vec![concat, Expression::string("%Y %c %e")],
24872 ))))
24873 }
24874 _ => {
24875 // Unsupported unit -> keep as DATE_TRUNC
24876 Ok(Expression::Function(Box::new(Function::new(
24877 "DATE_TRUNC".to_string(),
24878 vec![Expression::string(unit), expr.clone()],
24879 ))))
24880 }
24881 }
24882 }
24883
24884 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
24885 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
24886 use crate::expressions::DataType;
24887 match dt {
24888 DataType::VarChar { .. } | DataType::Char { .. } => true,
24889 DataType::Struct { fields, .. } => fields
24890 .iter()
24891 .any(|f| Self::has_varchar_char_type(&f.data_type)),
24892 _ => false,
24893 }
24894 }
24895
24896 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
24897 fn normalize_varchar_to_string(
24898 dt: crate::expressions::DataType,
24899 ) -> crate::expressions::DataType {
24900 use crate::expressions::DataType;
24901 match dt {
24902 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
24903 name: "STRING".to_string(),
24904 },
24905 DataType::Struct { fields, nested } => {
24906 let fields = fields
24907 .into_iter()
24908 .map(|mut f| {
24909 f.data_type = Self::normalize_varchar_to_string(f.data_type);
24910 f
24911 })
24912 .collect();
24913 DataType::Struct { fields, nested }
24914 }
24915 other => other,
24916 }
24917 }
24918
24919 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
24920 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
24921 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
24922 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
24923 let trimmed = s.trim();
24924
24925 // Find where digits end and unit text begins
24926 let digit_end = trimmed
24927 .find(|c: char| !c.is_ascii_digit())
24928 .unwrap_or(trimmed.len());
24929 if digit_end == 0 || digit_end == trimmed.len() {
24930 return expr;
24931 }
24932 let num = &trimmed[..digit_end];
24933 let unit_text = trimmed[digit_end..].trim().to_uppercase();
24934 if unit_text.is_empty() {
24935 return expr;
24936 }
24937
24938 let known_units = [
24939 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
24940 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
24941 ];
24942 if !known_units.contains(&unit_text.as_str()) {
24943 return expr;
24944 }
24945
24946 let unit_str = unit_text.clone();
24947 // Singularize
24948 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
24949 &unit_str[..unit_str.len() - 1]
24950 } else {
24951 &unit_str
24952 };
24953 let unit = unit_singular;
24954
24955 match target {
24956 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24957 // INTERVAL '2' DAY
24958 let iu = match unit {
24959 "DAY" => crate::expressions::IntervalUnit::Day,
24960 "HOUR" => crate::expressions::IntervalUnit::Hour,
24961 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24962 "SECOND" => crate::expressions::IntervalUnit::Second,
24963 "WEEK" => crate::expressions::IntervalUnit::Week,
24964 "MONTH" => crate::expressions::IntervalUnit::Month,
24965 "YEAR" => crate::expressions::IntervalUnit::Year,
24966 _ => return expr,
24967 };
24968 return Expression::Interval(Box::new(crate::expressions::Interval {
24969 this: Some(Expression::string(num)),
24970 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24971 unit: iu,
24972 use_plural: false,
24973 }),
24974 }));
24975 }
24976 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
24977 // INTERVAL '2 DAYS'
24978 let plural = if num != "1" && !unit_str.ends_with('S') {
24979 format!("{} {}S", num, unit)
24980 } else if unit_str.ends_with('S') {
24981 format!("{} {}", num, unit_str)
24982 } else {
24983 format!("{} {}", num, unit)
24984 };
24985 return Expression::Interval(Box::new(crate::expressions::Interval {
24986 this: Some(Expression::string(&plural)),
24987 unit: None,
24988 }));
24989 }
24990 _ => {
24991 // Spark/Databricks/Hive: INTERVAL '1' DAY
24992 let iu = match unit {
24993 "DAY" => crate::expressions::IntervalUnit::Day,
24994 "HOUR" => crate::expressions::IntervalUnit::Hour,
24995 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24996 "SECOND" => crate::expressions::IntervalUnit::Second,
24997 "WEEK" => crate::expressions::IntervalUnit::Week,
24998 "MONTH" => crate::expressions::IntervalUnit::Month,
24999 "YEAR" => crate::expressions::IntervalUnit::Year,
25000 _ => return expr,
25001 };
25002 return Expression::Interval(Box::new(crate::expressions::Interval {
25003 this: Some(Expression::string(num)),
25004 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25005 unit: iu,
25006 use_plural: false,
25007 }),
25008 }));
25009 }
25010 }
25011 }
25012 // If it's already an INTERVAL expression, pass through
25013 expr
25014 }
25015
25016 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
25017 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
25018 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
25019 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
25020 fn rewrite_unnest_expansion(
25021 select: &crate::expressions::Select,
25022 target: DialectType,
25023 ) -> Option<crate::expressions::Select> {
25024 use crate::expressions::{
25025 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
25026 UnnestFunc,
25027 };
25028
25029 let index_offset: i64 = match target {
25030 DialectType::Presto | DialectType::Trino => 1,
25031 _ => 0, // BigQuery, Snowflake
25032 };
25033
25034 let if_func_name = match target {
25035 DialectType::Snowflake => "IFF",
25036 _ => "IF",
25037 };
25038
25039 let array_length_func = match target {
25040 DialectType::BigQuery => "ARRAY_LENGTH",
25041 DialectType::Presto | DialectType::Trino => "CARDINALITY",
25042 DialectType::Snowflake => "ARRAY_SIZE",
25043 _ => "ARRAY_LENGTH",
25044 };
25045
25046 let use_table_aliases = matches!(
25047 target,
25048 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
25049 );
25050 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
25051
25052 fn make_col(name: &str, table: Option<&str>) -> Expression {
25053 if let Some(tbl) = table {
25054 Expression::Column(Column {
25055 name: Identifier::new(name.to_string()),
25056 table: Some(Identifier::new(tbl.to_string())),
25057 join_mark: false,
25058 trailing_comments: Vec::new(),
25059 span: None,
25060 })
25061 } else {
25062 Expression::Identifier(Identifier::new(name.to_string()))
25063 }
25064 }
25065
25066 fn make_join(this: Expression) -> Join {
25067 Join {
25068 this,
25069 on: None,
25070 using: Vec::new(),
25071 kind: JoinKind::Cross,
25072 use_inner_keyword: false,
25073 use_outer_keyword: false,
25074 deferred_condition: false,
25075 join_hint: None,
25076 match_condition: None,
25077 pivots: Vec::new(),
25078 comments: Vec::new(),
25079 nesting_group: 0,
25080 directed: false,
25081 }
25082 }
25083
25084 // Collect UNNEST info from SELECT expressions
25085 struct UnnestInfo {
25086 arr_expr: Expression,
25087 col_alias: String,
25088 pos_alias: String,
25089 source_alias: String,
25090 original_expr: Expression,
25091 has_outer_alias: Option<String>,
25092 }
25093
25094 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
25095 let mut col_counter = 0usize;
25096 let mut pos_counter = 1usize;
25097 let mut source_counter = 1usize;
25098
25099 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
25100 match expr {
25101 Expression::Unnest(u) => Some(u.this.clone()),
25102 Expression::Function(f)
25103 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
25104 {
25105 Some(f.args[0].clone())
25106 }
25107 Expression::Alias(a) => extract_unnest_arg(&a.this),
25108 Expression::Add(op)
25109 | Expression::Sub(op)
25110 | Expression::Mul(op)
25111 | Expression::Div(op) => {
25112 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
25113 }
25114 _ => None,
25115 }
25116 }
25117
25118 fn get_alias_name(expr: &Expression) -> Option<String> {
25119 if let Expression::Alias(a) = expr {
25120 Some(a.alias.name.clone())
25121 } else {
25122 None
25123 }
25124 }
25125
25126 for sel_expr in &select.expressions {
25127 if let Some(arr) = extract_unnest_arg(sel_expr) {
25128 col_counter += 1;
25129 pos_counter += 1;
25130 source_counter += 1;
25131
25132 let col_alias = if col_counter == 1 {
25133 "col".to_string()
25134 } else {
25135 format!("col_{}", col_counter)
25136 };
25137 let pos_alias = format!("pos_{}", pos_counter);
25138 let source_alias = format!("_u_{}", source_counter);
25139 let has_outer_alias = get_alias_name(sel_expr);
25140
25141 unnest_infos.push(UnnestInfo {
25142 arr_expr: arr,
25143 col_alias,
25144 pos_alias,
25145 source_alias,
25146 original_expr: sel_expr.clone(),
25147 has_outer_alias,
25148 });
25149 }
25150 }
25151
25152 if unnest_infos.is_empty() {
25153 return None;
25154 }
25155
25156 let series_alias = "pos".to_string();
25157 let series_source_alias = "_u".to_string();
25158 let tbl_ref = if use_table_aliases {
25159 Some(series_source_alias.as_str())
25160 } else {
25161 None
25162 };
25163
25164 // Build new SELECT expressions
25165 let mut new_select_exprs = Vec::new();
25166 for info in &unnest_infos {
25167 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25168 let src_ref = if use_table_aliases {
25169 Some(info.source_alias.as_str())
25170 } else {
25171 None
25172 };
25173
25174 let pos_col = make_col(&series_alias, tbl_ref);
25175 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25176 let col_ref = make_col(actual_col_name, src_ref);
25177
25178 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25179 pos_col.clone(),
25180 unnest_pos_col.clone(),
25181 )));
25182 let mut if_args = vec![eq_cond, col_ref];
25183 if null_third_arg {
25184 if_args.push(Expression::Null(crate::expressions::Null));
25185 }
25186
25187 let if_expr =
25188 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25189 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25190
25191 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25192 final_expr,
25193 Identifier::new(actual_col_name.clone()),
25194 ))));
25195 }
25196
25197 // Build array size expressions for GREATEST
25198 let size_exprs: Vec<Expression> = unnest_infos
25199 .iter()
25200 .map(|info| {
25201 Expression::Function(Box::new(Function::new(
25202 array_length_func.to_string(),
25203 vec![info.arr_expr.clone()],
25204 )))
25205 })
25206 .collect();
25207
25208 let greatest =
25209 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25210
25211 let series_end = if index_offset == 0 {
25212 Expression::Sub(Box::new(BinaryOp::new(
25213 greatest,
25214 Expression::Literal(Literal::Number("1".to_string())),
25215 )))
25216 } else {
25217 greatest
25218 };
25219
25220 // Build the position array source
25221 let series_unnest_expr = match target {
25222 DialectType::BigQuery => {
25223 let gen_array = Expression::Function(Box::new(Function::new(
25224 "GENERATE_ARRAY".to_string(),
25225 vec![
25226 Expression::Literal(Literal::Number("0".to_string())),
25227 series_end,
25228 ],
25229 )));
25230 Expression::Unnest(Box::new(UnnestFunc {
25231 this: gen_array,
25232 expressions: Vec::new(),
25233 with_ordinality: false,
25234 alias: None,
25235 offset_alias: None,
25236 }))
25237 }
25238 DialectType::Presto | DialectType::Trino => {
25239 let sequence = Expression::Function(Box::new(Function::new(
25240 "SEQUENCE".to_string(),
25241 vec![
25242 Expression::Literal(Literal::Number("1".to_string())),
25243 series_end,
25244 ],
25245 )));
25246 Expression::Unnest(Box::new(UnnestFunc {
25247 this: sequence,
25248 expressions: Vec::new(),
25249 with_ordinality: false,
25250 alias: None,
25251 offset_alias: None,
25252 }))
25253 }
25254 DialectType::Snowflake => {
25255 let range_end = Expression::Add(Box::new(BinaryOp::new(
25256 Expression::Paren(Box::new(crate::expressions::Paren {
25257 this: series_end,
25258 trailing_comments: Vec::new(),
25259 })),
25260 Expression::Literal(Literal::Number("1".to_string())),
25261 )));
25262 let gen_range = Expression::Function(Box::new(Function::new(
25263 "ARRAY_GENERATE_RANGE".to_string(),
25264 vec![
25265 Expression::Literal(Literal::Number("0".to_string())),
25266 range_end,
25267 ],
25268 )));
25269 let flatten_arg =
25270 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25271 name: Identifier::new("INPUT".to_string()),
25272 value: gen_range,
25273 separator: crate::expressions::NamedArgSeparator::DArrow,
25274 }));
25275 let flatten = Expression::Function(Box::new(Function::new(
25276 "FLATTEN".to_string(),
25277 vec![flatten_arg],
25278 )));
25279 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25280 }
25281 _ => return None,
25282 };
25283
25284 // Build series alias expression
25285 let series_alias_expr = if use_table_aliases {
25286 let col_aliases = if matches!(target, DialectType::Snowflake) {
25287 vec![
25288 Identifier::new("seq".to_string()),
25289 Identifier::new("key".to_string()),
25290 Identifier::new("path".to_string()),
25291 Identifier::new("index".to_string()),
25292 Identifier::new(series_alias.clone()),
25293 Identifier::new("this".to_string()),
25294 ]
25295 } else {
25296 vec![Identifier::new(series_alias.clone())]
25297 };
25298 Expression::Alias(Box::new(Alias {
25299 this: series_unnest_expr,
25300 alias: Identifier::new(series_source_alias.clone()),
25301 column_aliases: col_aliases,
25302 pre_alias_comments: Vec::new(),
25303 trailing_comments: Vec::new(),
25304 }))
25305 } else {
25306 Expression::Alias(Box::new(Alias::new(
25307 series_unnest_expr,
25308 Identifier::new(series_alias.clone()),
25309 )))
25310 };
25311
25312 // Build CROSS JOINs for each UNNEST
25313 let mut joins = Vec::new();
25314 for info in &unnest_infos {
25315 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25316
25317 let unnest_join_expr = match target {
25318 DialectType::BigQuery => {
25319 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25320 let unnest = UnnestFunc {
25321 this: info.arr_expr.clone(),
25322 expressions: Vec::new(),
25323 with_ordinality: true,
25324 alias: Some(Identifier::new(actual_col_name.clone())),
25325 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25326 };
25327 Expression::Unnest(Box::new(unnest))
25328 }
25329 DialectType::Presto | DialectType::Trino => {
25330 let unnest = UnnestFunc {
25331 this: info.arr_expr.clone(),
25332 expressions: Vec::new(),
25333 with_ordinality: true,
25334 alias: None,
25335 offset_alias: None,
25336 };
25337 Expression::Alias(Box::new(Alias {
25338 this: Expression::Unnest(Box::new(unnest)),
25339 alias: Identifier::new(info.source_alias.clone()),
25340 column_aliases: vec![
25341 Identifier::new(actual_col_name.clone()),
25342 Identifier::new(info.pos_alias.clone()),
25343 ],
25344 pre_alias_comments: Vec::new(),
25345 trailing_comments: Vec::new(),
25346 }))
25347 }
25348 DialectType::Snowflake => {
25349 let flatten_arg =
25350 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25351 name: Identifier::new("INPUT".to_string()),
25352 value: info.arr_expr.clone(),
25353 separator: crate::expressions::NamedArgSeparator::DArrow,
25354 }));
25355 let flatten = Expression::Function(Box::new(Function::new(
25356 "FLATTEN".to_string(),
25357 vec![flatten_arg],
25358 )));
25359 let table_fn = Expression::Function(Box::new(Function::new(
25360 "TABLE".to_string(),
25361 vec![flatten],
25362 )));
25363 Expression::Alias(Box::new(Alias {
25364 this: table_fn,
25365 alias: Identifier::new(info.source_alias.clone()),
25366 column_aliases: vec![
25367 Identifier::new("seq".to_string()),
25368 Identifier::new("key".to_string()),
25369 Identifier::new("path".to_string()),
25370 Identifier::new(info.pos_alias.clone()),
25371 Identifier::new(actual_col_name.clone()),
25372 Identifier::new("this".to_string()),
25373 ],
25374 pre_alias_comments: Vec::new(),
25375 trailing_comments: Vec::new(),
25376 }))
25377 }
25378 _ => return None,
25379 };
25380
25381 joins.push(make_join(unnest_join_expr));
25382 }
25383
25384 // Build WHERE clause
25385 let mut where_conditions: Vec<Expression> = Vec::new();
25386 for info in &unnest_infos {
25387 let src_ref = if use_table_aliases {
25388 Some(info.source_alias.as_str())
25389 } else {
25390 None
25391 };
25392 let pos_col = make_col(&series_alias, tbl_ref);
25393 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25394
25395 let arr_size = Expression::Function(Box::new(Function::new(
25396 array_length_func.to_string(),
25397 vec![info.arr_expr.clone()],
25398 )));
25399
25400 let size_ref = if index_offset == 0 {
25401 Expression::Paren(Box::new(crate::expressions::Paren {
25402 this: Expression::Sub(Box::new(BinaryOp::new(
25403 arr_size,
25404 Expression::Literal(Literal::Number("1".to_string())),
25405 ))),
25406 trailing_comments: Vec::new(),
25407 }))
25408 } else {
25409 arr_size
25410 };
25411
25412 let eq = Expression::Eq(Box::new(BinaryOp::new(
25413 pos_col.clone(),
25414 unnest_pos_col.clone(),
25415 )));
25416 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25417 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25418 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25419 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25420 this: and_cond,
25421 trailing_comments: Vec::new(),
25422 }));
25423 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25424
25425 where_conditions.push(or_cond);
25426 }
25427
25428 let where_expr = if where_conditions.len() == 1 {
25429 // Single condition: no parens needed
25430 where_conditions.into_iter().next().unwrap()
25431 } else {
25432 // Multiple conditions: wrap each OR in parens, then combine with AND
25433 let wrap = |e: Expression| {
25434 Expression::Paren(Box::new(crate::expressions::Paren {
25435 this: e,
25436 trailing_comments: Vec::new(),
25437 }))
25438 };
25439 let mut iter = where_conditions.into_iter();
25440 let first = wrap(iter.next().unwrap());
25441 let second = wrap(iter.next().unwrap());
25442 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25443 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25444 trailing_comments: Vec::new(),
25445 }));
25446 for cond in iter {
25447 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25448 }
25449 combined
25450 };
25451
25452 // Build the new SELECT
25453 let mut new_select = select.clone();
25454 new_select.expressions = new_select_exprs;
25455
25456 if new_select.from.is_some() {
25457 let mut all_joins = vec![make_join(series_alias_expr)];
25458 all_joins.extend(joins);
25459 new_select.joins.extend(all_joins);
25460 } else {
25461 new_select.from = Some(From {
25462 expressions: vec![series_alias_expr],
25463 });
25464 new_select.joins.extend(joins);
25465 }
25466
25467 if let Some(ref existing_where) = new_select.where_clause {
25468 let combined = Expression::And(Box::new(BinaryOp::new(
25469 existing_where.this.clone(),
25470 where_expr,
25471 )));
25472 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25473 } else {
25474 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25475 }
25476
25477 Some(new_select)
25478 }
25479
25480 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25481 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25482 match original {
25483 Expression::Unnest(_) => replacement.clone(),
25484 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25485 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25486 Expression::Add(op) => {
25487 let left = Self::replace_unnest_with_if(&op.left, replacement);
25488 let right = Self::replace_unnest_with_if(&op.right, replacement);
25489 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25490 }
25491 Expression::Sub(op) => {
25492 let left = Self::replace_unnest_with_if(&op.left, replacement);
25493 let right = Self::replace_unnest_with_if(&op.right, replacement);
25494 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25495 }
25496 Expression::Mul(op) => {
25497 let left = Self::replace_unnest_with_if(&op.left, replacement);
25498 let right = Self::replace_unnest_with_if(&op.right, replacement);
25499 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25500 }
25501 Expression::Div(op) => {
25502 let left = Self::replace_unnest_with_if(&op.left, replacement);
25503 let right = Self::replace_unnest_with_if(&op.right, replacement);
25504 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25505 }
25506 _ => original.clone(),
25507 }
25508 }
25509
25510 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25511 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25512 fn decompose_json_path(path: &str) -> Vec<String> {
25513 let mut parts = Vec::new();
25514 let path = if path.starts_with("$.") {
25515 &path[2..]
25516 } else if path.starts_with('$') {
25517 &path[1..]
25518 } else {
25519 path
25520 };
25521 if path.is_empty() {
25522 return parts;
25523 }
25524 let mut current = String::new();
25525 let chars: Vec<char> = path.chars().collect();
25526 let mut i = 0;
25527 while i < chars.len() {
25528 match chars[i] {
25529 '.' => {
25530 if !current.is_empty() {
25531 parts.push(current.clone());
25532 current.clear();
25533 }
25534 i += 1;
25535 }
25536 '[' => {
25537 if !current.is_empty() {
25538 parts.push(current.clone());
25539 current.clear();
25540 }
25541 i += 1;
25542 let mut bracket_content = String::new();
25543 while i < chars.len() && chars[i] != ']' {
25544 if chars[i] == '"' || chars[i] == '\'' {
25545 let quote = chars[i];
25546 i += 1;
25547 while i < chars.len() && chars[i] != quote {
25548 bracket_content.push(chars[i]);
25549 i += 1;
25550 }
25551 if i < chars.len() {
25552 i += 1;
25553 }
25554 } else {
25555 bracket_content.push(chars[i]);
25556 i += 1;
25557 }
25558 }
25559 if i < chars.len() {
25560 i += 1;
25561 }
25562 if bracket_content != "*" {
25563 parts.push(bracket_content);
25564 }
25565 }
25566 _ => {
25567 current.push(chars[i]);
25568 i += 1;
25569 }
25570 }
25571 }
25572 if !current.is_empty() {
25573 parts.push(current);
25574 }
25575 parts
25576 }
25577
25578 /// Strip `$` prefix from a JSON path, keeping the rest.
25579 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25580 fn strip_json_dollar_prefix(path: &str) -> String {
25581 if path.starts_with("$.") {
25582 path[2..].to_string()
25583 } else if path.starts_with('$') {
25584 path[1..].to_string()
25585 } else {
25586 path.to_string()
25587 }
25588 }
25589
25590 /// Strip `[*]` wildcards from a JSON path.
25591 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25592 fn strip_json_wildcards(path: &str) -> String {
25593 path.replace("[*]", "")
25594 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25595 .trim_end_matches('.')
25596 .to_string()
25597 }
25598
25599 /// Convert bracket notation to dot notation for JSON paths.
25600 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25601 fn bracket_to_dot_notation(path: &str) -> String {
25602 let mut result = String::new();
25603 let chars: Vec<char> = path.chars().collect();
25604 let mut i = 0;
25605 while i < chars.len() {
25606 if chars[i] == '[' {
25607 // Read bracket content
25608 i += 1;
25609 let mut bracket_content = String::new();
25610 let mut is_quoted = false;
25611 let mut _quote_char = '"';
25612 while i < chars.len() && chars[i] != ']' {
25613 if chars[i] == '"' || chars[i] == '\'' {
25614 is_quoted = true;
25615 _quote_char = chars[i];
25616 i += 1;
25617 while i < chars.len() && chars[i] != _quote_char {
25618 bracket_content.push(chars[i]);
25619 i += 1;
25620 }
25621 if i < chars.len() {
25622 i += 1;
25623 }
25624 } else {
25625 bracket_content.push(chars[i]);
25626 i += 1;
25627 }
25628 }
25629 if i < chars.len() {
25630 i += 1;
25631 } // skip ]
25632 if bracket_content == "*" {
25633 // Keep wildcard as-is
25634 result.push_str("[*]");
25635 } else if is_quoted {
25636 // Quoted bracket -> dot notation with quotes
25637 result.push('.');
25638 result.push('"');
25639 result.push_str(&bracket_content);
25640 result.push('"');
25641 } else {
25642 // Numeric index -> keep as bracket
25643 result.push('[');
25644 result.push_str(&bracket_content);
25645 result.push(']');
25646 }
25647 } else {
25648 result.push(chars[i]);
25649 i += 1;
25650 }
25651 }
25652 result
25653 }
25654
25655 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25656 /// `$["a b"]` -> `$['a b']`
25657 fn bracket_to_single_quotes(path: &str) -> String {
25658 let mut result = String::new();
25659 let chars: Vec<char> = path.chars().collect();
25660 let mut i = 0;
25661 while i < chars.len() {
25662 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25663 result.push('[');
25664 result.push('\'');
25665 i += 2; // skip [ and "
25666 while i < chars.len() && chars[i] != '"' {
25667 result.push(chars[i]);
25668 i += 1;
25669 }
25670 if i < chars.len() {
25671 i += 1;
25672 } // skip closing "
25673 result.push('\'');
25674 } else {
25675 result.push(chars[i]);
25676 i += 1;
25677 }
25678 }
25679 result
25680 }
25681
25682 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
25683 /// or PostgreSQL #temp -> TEMPORARY.
25684 /// Also strips # from INSERT INTO #table for non-TSQL targets.
25685 fn transform_select_into(
25686 expr: Expression,
25687 _source: DialectType,
25688 target: DialectType,
25689 ) -> Expression {
25690 use crate::expressions::{CreateTable, Expression, TableRef};
25691
25692 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
25693 if let Expression::Insert(ref insert) = expr {
25694 if insert.table.name.name.starts_with('#')
25695 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
25696 {
25697 let mut new_insert = insert.clone();
25698 new_insert.table.name.name =
25699 insert.table.name.name.trim_start_matches('#').to_string();
25700 return Expression::Insert(new_insert);
25701 }
25702 return expr;
25703 }
25704
25705 if let Expression::Select(ref select) = expr {
25706 if let Some(ref into) = select.into {
25707 let table_name_raw = match &into.this {
25708 Expression::Table(tr) => tr.name.name.clone(),
25709 Expression::Identifier(id) => id.name.clone(),
25710 _ => String::new(),
25711 };
25712 let is_temp = table_name_raw.starts_with('#') || into.temporary;
25713 let clean_name = table_name_raw.trim_start_matches('#').to_string();
25714
25715 match target {
25716 DialectType::DuckDB | DialectType::Snowflake => {
25717 // SELECT INTO -> CREATE TABLE AS SELECT
25718 let mut new_select = select.clone();
25719 new_select.into = None;
25720 let ct = CreateTable {
25721 name: TableRef::new(clean_name),
25722 on_cluster: None,
25723 columns: Vec::new(),
25724 constraints: Vec::new(),
25725 if_not_exists: false,
25726 temporary: is_temp,
25727 or_replace: false,
25728 table_modifier: None,
25729 as_select: Some(Expression::Select(new_select)),
25730 as_select_parenthesized: false,
25731 on_commit: None,
25732 clone_source: None,
25733 clone_at_clause: None,
25734 shallow_clone: false,
25735 is_copy: false,
25736 leading_comments: Vec::new(),
25737 with_properties: Vec::new(),
25738 teradata_post_name_options: Vec::new(),
25739 with_data: None,
25740 with_statistics: None,
25741 teradata_indexes: Vec::new(),
25742 with_cte: None,
25743 properties: Vec::new(),
25744 partition_of: None,
25745 post_table_properties: Vec::new(),
25746 mysql_table_options: Vec::new(),
25747 inherits: Vec::new(),
25748 on_property: None,
25749 copy_grants: false,
25750 using_template: None,
25751 rollup: None,
25752 };
25753 return Expression::CreateTable(Box::new(ct));
25754 }
25755 DialectType::PostgreSQL | DialectType::Redshift => {
25756 // PostgreSQL: #foo -> INTO TEMPORARY foo
25757 if is_temp && !into.temporary {
25758 let mut new_select = select.clone();
25759 let mut new_into = into.clone();
25760 new_into.temporary = true;
25761 new_into.unlogged = false;
25762 new_into.this = Expression::Table(TableRef::new(clean_name));
25763 new_select.into = Some(new_into);
25764 Expression::Select(new_select)
25765 } else {
25766 expr
25767 }
25768 }
25769 _ => expr,
25770 }
25771 } else {
25772 expr
25773 }
25774 } else {
25775 expr
25776 }
25777 }
25778
25779 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
25780 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
25781 fn transform_create_table_properties(
25782 ct: &mut crate::expressions::CreateTable,
25783 _source: DialectType,
25784 target: DialectType,
25785 ) {
25786 use crate::expressions::{
25787 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
25788 Properties,
25789 };
25790
25791 // Helper to convert a raw property value string to the correct Expression
25792 let value_to_expr = |v: &str| -> Expression {
25793 let trimmed = v.trim();
25794 // Check if it's a quoted string (starts and ends with ')
25795 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
25796 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
25797 }
25798 // Check if it's a number
25799 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
25800 Expression::Literal(Literal::Number(trimmed.to_string()))
25801 }
25802 // Check if it's ARRAY[...] or ARRAY(...)
25803 else if trimmed.to_uppercase().starts_with("ARRAY") {
25804 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
25805 let inner = trimmed
25806 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
25807 .trim_start_matches('[')
25808 .trim_start_matches('(')
25809 .trim_end_matches(']')
25810 .trim_end_matches(')');
25811 let elements: Vec<Expression> = inner
25812 .split(',')
25813 .map(|e| {
25814 let elem = e.trim().trim_matches('\'');
25815 Expression::Literal(Literal::String(elem.to_string()))
25816 })
25817 .collect();
25818 Expression::Function(Box::new(crate::expressions::Function::new(
25819 "ARRAY".to_string(),
25820 elements,
25821 )))
25822 }
25823 // Otherwise, just output as identifier (unquoted)
25824 else {
25825 Expression::Identifier(Identifier::new(trimmed.to_string()))
25826 }
25827 };
25828
25829 if ct.with_properties.is_empty() && ct.properties.is_empty() {
25830 return;
25831 }
25832
25833 // Handle Presto-style WITH properties
25834 if !ct.with_properties.is_empty() {
25835 // Extract FORMAT property and remaining properties
25836 let mut format_value: Option<String> = None;
25837 let mut partitioned_by: Option<String> = None;
25838 let mut other_props: Vec<(String, String)> = Vec::new();
25839
25840 for (key, value) in ct.with_properties.drain(..) {
25841 let key_upper = key.to_uppercase();
25842 if key_upper == "FORMAT" {
25843 // Strip surrounding quotes from value if present
25844 format_value = Some(value.trim_matches('\'').to_string());
25845 } else if key_upper == "PARTITIONED_BY" {
25846 partitioned_by = Some(value);
25847 } else {
25848 other_props.push((key, value));
25849 }
25850 }
25851
25852 match target {
25853 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25854 // Presto: keep WITH properties but lowercase 'format' key
25855 if let Some(fmt) = format_value {
25856 ct.with_properties
25857 .push(("format".to_string(), format!("'{}'", fmt)));
25858 }
25859 if let Some(part) = partitioned_by {
25860 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
25861 let trimmed = part.trim();
25862 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
25863 // Also handle ARRAY['...'] format - keep as-is
25864 if trimmed.to_uppercase().starts_with("ARRAY") {
25865 ct.with_properties
25866 .push(("PARTITIONED_BY".to_string(), part));
25867 } else {
25868 // Parse column names from the parenthesized list
25869 let cols: Vec<&str> = inner
25870 .split(',')
25871 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
25872 .collect();
25873 let array_val = format!(
25874 "ARRAY[{}]",
25875 cols.iter()
25876 .map(|c| format!("'{}'", c))
25877 .collect::<Vec<_>>()
25878 .join(", ")
25879 );
25880 ct.with_properties
25881 .push(("PARTITIONED_BY".to_string(), array_val));
25882 }
25883 }
25884 ct.with_properties.extend(other_props);
25885 }
25886 DialectType::Hive => {
25887 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
25888 if let Some(fmt) = format_value {
25889 ct.properties.push(Expression::FileFormatProperty(Box::new(
25890 FileFormatProperty {
25891 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25892 expressions: vec![],
25893 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
25894 value: true,
25895 }))),
25896 },
25897 )));
25898 }
25899 if let Some(_part) = partitioned_by {
25900 // PARTITIONED_BY handling is complex - move columns to partitioned by
25901 // For now, the partition columns are extracted from the column list
25902 Self::apply_partitioned_by(ct, &_part, target);
25903 }
25904 if !other_props.is_empty() {
25905 let eq_exprs: Vec<Expression> = other_props
25906 .into_iter()
25907 .map(|(k, v)| {
25908 Expression::Eq(Box::new(BinaryOp::new(
25909 Expression::Literal(Literal::String(k)),
25910 value_to_expr(&v),
25911 )))
25912 })
25913 .collect();
25914 ct.properties
25915 .push(Expression::Properties(Box::new(Properties {
25916 expressions: eq_exprs,
25917 })));
25918 }
25919 }
25920 DialectType::Spark | DialectType::Databricks => {
25921 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
25922 if let Some(fmt) = format_value {
25923 ct.properties.push(Expression::FileFormatProperty(Box::new(
25924 FileFormatProperty {
25925 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25926 expressions: vec![],
25927 hive_format: None, // None means USING syntax
25928 },
25929 )));
25930 }
25931 if let Some(_part) = partitioned_by {
25932 Self::apply_partitioned_by(ct, &_part, target);
25933 }
25934 if !other_props.is_empty() {
25935 let eq_exprs: Vec<Expression> = other_props
25936 .into_iter()
25937 .map(|(k, v)| {
25938 Expression::Eq(Box::new(BinaryOp::new(
25939 Expression::Literal(Literal::String(k)),
25940 value_to_expr(&v),
25941 )))
25942 })
25943 .collect();
25944 ct.properties
25945 .push(Expression::Properties(Box::new(Properties {
25946 expressions: eq_exprs,
25947 })));
25948 }
25949 }
25950 DialectType::DuckDB => {
25951 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
25952 // Keep nothing
25953 }
25954 _ => {
25955 // For other dialects, keep WITH properties as-is
25956 if let Some(fmt) = format_value {
25957 ct.with_properties
25958 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
25959 }
25960 if let Some(part) = partitioned_by {
25961 ct.with_properties
25962 .push(("PARTITIONED_BY".to_string(), part));
25963 }
25964 ct.with_properties.extend(other_props);
25965 }
25966 }
25967 }
25968
25969 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
25970 // and Hive STORED AS -> Presto WITH (format=...) conversion
25971 if !ct.properties.is_empty() {
25972 let is_presto_target = matches!(
25973 target,
25974 DialectType::Presto | DialectType::Trino | DialectType::Athena
25975 );
25976 let is_duckdb_target = matches!(target, DialectType::DuckDB);
25977
25978 if is_presto_target || is_duckdb_target {
25979 let mut new_properties = Vec::new();
25980 for prop in ct.properties.drain(..) {
25981 match &prop {
25982 Expression::FileFormatProperty(ffp) => {
25983 if is_presto_target {
25984 // Convert STORED AS/USING to WITH (format=...)
25985 if let Some(ref fmt_expr) = ffp.this {
25986 let fmt_str = match fmt_expr.as_ref() {
25987 Expression::Identifier(id) => id.name.clone(),
25988 Expression::Literal(Literal::String(s)) => s.clone(),
25989 _ => {
25990 new_properties.push(prop);
25991 continue;
25992 }
25993 };
25994 ct.with_properties
25995 .push(("format".to_string(), format!("'{}'", fmt_str)));
25996 }
25997 }
25998 // DuckDB: just strip file format properties
25999 }
26000 // Convert TBLPROPERTIES to WITH properties for Presto target
26001 Expression::Properties(props) if is_presto_target => {
26002 for expr in &props.expressions {
26003 if let Expression::Eq(eq) = expr {
26004 // Extract key and value from the Eq expression
26005 let key = match &eq.left {
26006 Expression::Literal(Literal::String(s)) => s.clone(),
26007 Expression::Identifier(id) => id.name.clone(),
26008 _ => continue,
26009 };
26010 let value = match &eq.right {
26011 Expression::Literal(Literal::String(s)) => {
26012 format!("'{}'", s)
26013 }
26014 Expression::Literal(Literal::Number(n)) => n.clone(),
26015 Expression::Identifier(id) => id.name.clone(),
26016 _ => continue,
26017 };
26018 ct.with_properties.push((key, value));
26019 }
26020 }
26021 }
26022 // Convert PartitionedByProperty for Presto target
26023 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
26024 // Check if it contains ColumnDef expressions (Hive-style with types)
26025 if let Expression::Tuple(ref tuple) = *pbp.this {
26026 let mut col_names: Vec<String> = Vec::new();
26027 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
26028 let mut has_col_defs = false;
26029 for expr in &tuple.expressions {
26030 if let Expression::ColumnDef(ref cd) = expr {
26031 has_col_defs = true;
26032 col_names.push(cd.name.name.clone());
26033 col_defs.push(*cd.clone());
26034 } else if let Expression::Column(ref col) = expr {
26035 col_names.push(col.name.name.clone());
26036 } else if let Expression::Identifier(ref id) = expr {
26037 col_names.push(id.name.clone());
26038 } else {
26039 // For function expressions like MONTHS(y), serialize to SQL
26040 let generic = Dialect::get(DialectType::Generic);
26041 if let Ok(sql) = generic.generate(expr) {
26042 col_names.push(sql);
26043 }
26044 }
26045 }
26046 if has_col_defs {
26047 // Merge partition column defs into the main column list
26048 for cd in col_defs {
26049 ct.columns.push(cd);
26050 }
26051 }
26052 if !col_names.is_empty() {
26053 // Add PARTITIONED_BY property
26054 let array_val = format!(
26055 "ARRAY[{}]",
26056 col_names
26057 .iter()
26058 .map(|n| format!("'{}'", n))
26059 .collect::<Vec<_>>()
26060 .join(", ")
26061 );
26062 ct.with_properties
26063 .push(("PARTITIONED_BY".to_string(), array_val));
26064 }
26065 }
26066 // Skip - don't keep in properties
26067 }
26068 _ => {
26069 if !is_duckdb_target {
26070 new_properties.push(prop);
26071 }
26072 }
26073 }
26074 }
26075 ct.properties = new_properties;
26076 } else {
26077 // For Hive/Spark targets, unquote format names in STORED AS
26078 for prop in &mut ct.properties {
26079 if let Expression::FileFormatProperty(ref mut ffp) = prop {
26080 if let Some(ref mut fmt_expr) = ffp.this {
26081 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
26082 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
26083 let unquoted = s.clone();
26084 *fmt_expr =
26085 Box::new(Expression::Identifier(Identifier::new(unquoted)));
26086 }
26087 }
26088 }
26089 }
26090 }
26091 }
26092 }
26093
26094 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
26095 fn apply_partitioned_by(
26096 ct: &mut crate::expressions::CreateTable,
26097 partitioned_by_value: &str,
26098 target: DialectType,
26099 ) {
26100 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
26101
26102 // Parse the ARRAY['col1', 'col2'] value to extract column names
26103 let mut col_names: Vec<String> = Vec::new();
26104 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
26105 let inner = partitioned_by_value
26106 .trim()
26107 .trim_start_matches("ARRAY")
26108 .trim_start_matches('[')
26109 .trim_start_matches('(')
26110 .trim_end_matches(']')
26111 .trim_end_matches(')');
26112 for part in inner.split(',') {
26113 let col = part.trim().trim_matches('\'').trim_matches('"');
26114 if !col.is_empty() {
26115 col_names.push(col.to_string());
26116 }
26117 }
26118
26119 if col_names.is_empty() {
26120 return;
26121 }
26122
26123 if matches!(target, DialectType::Hive) {
26124 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
26125 let mut partition_col_defs = Vec::new();
26126 for col_name in &col_names {
26127 // Find and remove from columns
26128 if let Some(pos) = ct
26129 .columns
26130 .iter()
26131 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
26132 {
26133 let col_def = ct.columns.remove(pos);
26134 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
26135 }
26136 }
26137 if !partition_col_defs.is_empty() {
26138 ct.properties
26139 .push(Expression::PartitionedByProperty(Box::new(
26140 PartitionedByProperty {
26141 this: Box::new(Expression::Tuple(Box::new(Tuple {
26142 expressions: partition_col_defs,
26143 }))),
26144 },
26145 )));
26146 }
26147 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26148 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26149 // Use quoted identifiers to match the quoting style of the original column definitions
26150 let partition_exprs: Vec<Expression> = col_names
26151 .iter()
26152 .map(|name| {
26153 // Check if the column exists in the column list and use its quoting
26154 let is_quoted = ct
26155 .columns
26156 .iter()
26157 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26158 let ident = if is_quoted {
26159 Identifier::quoted(name.clone())
26160 } else {
26161 Identifier::new(name.clone())
26162 };
26163 Expression::Column(Column {
26164 name: ident,
26165 table: None,
26166 join_mark: false,
26167 trailing_comments: Vec::new(),
26168 span: None,
26169 })
26170 })
26171 .collect();
26172 ct.properties
26173 .push(Expression::PartitionedByProperty(Box::new(
26174 PartitionedByProperty {
26175 this: Box::new(Expression::Tuple(Box::new(Tuple {
26176 expressions: partition_exprs,
26177 }))),
26178 },
26179 )));
26180 }
26181 // DuckDB: strip partitioned_by entirely (already handled)
26182 }
26183
26184 /// Convert a DataType to Spark's type string format (using angle brackets)
26185 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26186 use crate::expressions::DataType;
26187 match dt {
26188 DataType::Int { .. } => "INT".to_string(),
26189 DataType::BigInt { .. } => "BIGINT".to_string(),
26190 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26191 DataType::TinyInt { .. } => "TINYINT".to_string(),
26192 DataType::Float { .. } => "FLOAT".to_string(),
26193 DataType::Double { .. } => "DOUBLE".to_string(),
26194 DataType::Decimal {
26195 precision: Some(p),
26196 scale: Some(s),
26197 } => format!("DECIMAL({}, {})", p, s),
26198 DataType::Decimal {
26199 precision: Some(p), ..
26200 } => format!("DECIMAL({})", p),
26201 DataType::Decimal { .. } => "DECIMAL".to_string(),
26202 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26203 "STRING".to_string()
26204 }
26205 DataType::Char { .. } => "STRING".to_string(),
26206 DataType::Boolean => "BOOLEAN".to_string(),
26207 DataType::Date => "DATE".to_string(),
26208 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26209 DataType::Json | DataType::JsonB => "STRING".to_string(),
26210 DataType::Binary { .. } => "BINARY".to_string(),
26211 DataType::Array { element_type, .. } => {
26212 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26213 }
26214 DataType::Map {
26215 key_type,
26216 value_type,
26217 } => format!(
26218 "MAP<{}, {}>",
26219 Self::data_type_to_spark_string(key_type),
26220 Self::data_type_to_spark_string(value_type)
26221 ),
26222 DataType::Struct { fields, .. } => {
26223 let field_strs: Vec<String> = fields
26224 .iter()
26225 .map(|f| {
26226 if f.name.is_empty() {
26227 Self::data_type_to_spark_string(&f.data_type)
26228 } else {
26229 format!(
26230 "{}: {}",
26231 f.name,
26232 Self::data_type_to_spark_string(&f.data_type)
26233 )
26234 }
26235 })
26236 .collect();
26237 format!("STRUCT<{}>", field_strs.join(", "))
26238 }
26239 DataType::Custom { name } => name.clone(),
26240 _ => format!("{:?}", dt),
26241 }
26242 }
26243
26244 /// Extract value and unit from an Interval expression
26245 /// Returns (value_expression, IntervalUnit)
26246 fn extract_interval_parts(
26247 interval_expr: &Expression,
26248 ) -> (Expression, crate::expressions::IntervalUnit) {
26249 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26250
26251 if let Expression::Interval(iv) = interval_expr {
26252 let val = iv.this.clone().unwrap_or(Expression::number(0));
26253 let unit = match &iv.unit {
26254 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26255 None => {
26256 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26257 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26258 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26259 if parts.len() == 2 {
26260 let unit_str = parts[1].trim().to_uppercase();
26261 let parsed_unit = match unit_str.as_str() {
26262 "YEAR" | "YEARS" => IntervalUnit::Year,
26263 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26264 "MONTH" | "MONTHS" => IntervalUnit::Month,
26265 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26266 "DAY" | "DAYS" => IntervalUnit::Day,
26267 "HOUR" | "HOURS" => IntervalUnit::Hour,
26268 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26269 "SECOND" | "SECONDS" => IntervalUnit::Second,
26270 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26271 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26272 _ => IntervalUnit::Day,
26273 };
26274 // Return just the numeric part as value and parsed unit
26275 return (
26276 Expression::Literal(crate::expressions::Literal::String(
26277 parts[0].to_string(),
26278 )),
26279 parsed_unit,
26280 );
26281 }
26282 IntervalUnit::Day
26283 } else {
26284 IntervalUnit::Day
26285 }
26286 }
26287 _ => IntervalUnit::Day,
26288 };
26289 (val, unit)
26290 } else {
26291 // Not an interval - pass through
26292 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26293 }
26294 }
26295
26296 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26297 fn normalize_bigquery_function(
26298 e: Expression,
26299 source: DialectType,
26300 target: DialectType,
26301 ) -> Result<Expression> {
26302 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26303
26304 let f = if let Expression::Function(f) = e {
26305 *f
26306 } else {
26307 return Ok(e);
26308 };
26309 let name = f.name.to_uppercase();
26310 let mut args = f.args;
26311
26312 /// Helper to extract unit string from an identifier, column, or literal expression
26313 fn get_unit_str(expr: &Expression) -> String {
26314 match expr {
26315 Expression::Identifier(id) => id.name.to_uppercase(),
26316 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26317 Expression::Column(col) => col.name.name.to_uppercase(),
26318 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26319 Expression::Function(f) => {
26320 let base = f.name.to_uppercase();
26321 if !f.args.is_empty() {
26322 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26323 let inner = get_unit_str(&f.args[0]);
26324 format!("{}({})", base, inner)
26325 } else {
26326 base
26327 }
26328 }
26329 _ => "DAY".to_string(),
26330 }
26331 }
26332
26333 /// Parse unit string to IntervalUnit
26334 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26335 match s {
26336 "YEAR" => crate::expressions::IntervalUnit::Year,
26337 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26338 "MONTH" => crate::expressions::IntervalUnit::Month,
26339 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26340 "DAY" => crate::expressions::IntervalUnit::Day,
26341 "HOUR" => crate::expressions::IntervalUnit::Hour,
26342 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26343 "SECOND" => crate::expressions::IntervalUnit::Second,
26344 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26345 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26346 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26347 _ => crate::expressions::IntervalUnit::Day,
26348 }
26349 }
26350
26351 match name.as_str() {
26352 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26353 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26354 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26355 let date1 = args.remove(0);
26356 let date2 = args.remove(0);
26357 let unit_expr = args.remove(0);
26358 let unit_str = get_unit_str(&unit_expr);
26359
26360 if matches!(target, DialectType::BigQuery) {
26361 // BigQuery -> BigQuery: just uppercase the unit
26362 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26363 return Ok(Expression::Function(Box::new(Function::new(
26364 f.name,
26365 vec![date1, date2, unit],
26366 ))));
26367 }
26368
26369 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26370 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26371 if matches!(target, DialectType::Snowflake) {
26372 return Ok(Expression::TimestampDiff(Box::new(
26373 crate::expressions::TimestampDiff {
26374 this: Box::new(date2),
26375 expression: Box::new(date1),
26376 unit: Some(unit_str),
26377 },
26378 )));
26379 }
26380
26381 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26382 if matches!(target, DialectType::DuckDB) {
26383 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26384 // CAST to TIME
26385 let cast_fn = |e: Expression| -> Expression {
26386 match e {
26387 Expression::Literal(Literal::String(s)) => {
26388 Expression::Cast(Box::new(Cast {
26389 this: Expression::Literal(Literal::String(s)),
26390 to: DataType::Custom {
26391 name: "TIME".to_string(),
26392 },
26393 trailing_comments: vec![],
26394 double_colon_syntax: false,
26395 format: None,
26396 default: None,
26397 }))
26398 }
26399 other => other,
26400 }
26401 };
26402 (cast_fn(date1), cast_fn(date2))
26403 } else if name == "DATETIME_DIFF" {
26404 // CAST to TIMESTAMP
26405 (
26406 Self::ensure_cast_timestamp(date1),
26407 Self::ensure_cast_timestamp(date2),
26408 )
26409 } else {
26410 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26411 (
26412 Self::ensure_cast_timestamptz(date1),
26413 Self::ensure_cast_timestamptz(date2),
26414 )
26415 };
26416 return Ok(Expression::Function(Box::new(Function::new(
26417 "DATE_DIFF".to_string(),
26418 vec![
26419 Expression::Literal(Literal::String(unit_str)),
26420 cast_d2,
26421 cast_d1,
26422 ],
26423 ))));
26424 }
26425
26426 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26427 let unit = Expression::Identifier(Identifier::new(unit_str));
26428 Ok(Expression::Function(Box::new(Function::new(
26429 "TIMESTAMPDIFF".to_string(),
26430 vec![unit, date2, date1],
26431 ))))
26432 }
26433
26434 // DATEDIFF(unit, start, end) -> target-specific form
26435 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26436 "DATEDIFF" if args.len() == 3 => {
26437 let arg0 = args.remove(0);
26438 let arg1 = args.remove(0);
26439 let arg2 = args.remove(0);
26440 let unit_str = get_unit_str(&arg0);
26441
26442 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26443 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26444 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26445
26446 if matches!(target, DialectType::Snowflake) {
26447 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26448 let unit = Expression::Identifier(Identifier::new(unit_str));
26449 return Ok(Expression::Function(Box::new(Function::new(
26450 "DATEDIFF".to_string(),
26451 vec![unit, arg1, arg2],
26452 ))));
26453 }
26454
26455 if matches!(target, DialectType::DuckDB) {
26456 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26457 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26458 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26459 return Ok(Expression::Function(Box::new(Function::new(
26460 "DATE_DIFF".to_string(),
26461 vec![
26462 Expression::Literal(Literal::String(unit_str)),
26463 cast_d1,
26464 cast_d2,
26465 ],
26466 ))));
26467 }
26468
26469 if matches!(target, DialectType::BigQuery) {
26470 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26471 let cast_d1 = Self::ensure_cast_datetime(arg1);
26472 let cast_d2 = Self::ensure_cast_datetime(arg2);
26473 let unit = Expression::Identifier(Identifier::new(unit_str));
26474 return Ok(Expression::Function(Box::new(Function::new(
26475 "DATE_DIFF".to_string(),
26476 vec![cast_d2, cast_d1, unit],
26477 ))));
26478 }
26479
26480 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26481 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26482 let unit = Expression::Identifier(Identifier::new(unit_str));
26483 return Ok(Expression::Function(Box::new(Function::new(
26484 "DATEDIFF".to_string(),
26485 vec![unit, arg1, arg2],
26486 ))));
26487 }
26488
26489 if matches!(target, DialectType::Hive) {
26490 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26491 match unit_str.as_str() {
26492 "MONTH" => {
26493 return Ok(Expression::Function(Box::new(Function::new(
26494 "CAST".to_string(),
26495 vec![Expression::Function(Box::new(Function::new(
26496 "MONTHS_BETWEEN".to_string(),
26497 vec![arg2, arg1],
26498 )))],
26499 ))));
26500 }
26501 "WEEK" => {
26502 return Ok(Expression::Cast(Box::new(Cast {
26503 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26504 Expression::Function(Box::new(Function::new(
26505 "DATEDIFF".to_string(),
26506 vec![arg2, arg1],
26507 ))),
26508 Expression::Literal(Literal::Number("7".to_string())),
26509 ))),
26510 to: DataType::Int {
26511 length: None,
26512 integer_spelling: false,
26513 },
26514 trailing_comments: vec![],
26515 double_colon_syntax: false,
26516 format: None,
26517 default: None,
26518 })));
26519 }
26520 _ => {
26521 // Default: DATEDIFF(end, start) for DAY
26522 return Ok(Expression::Function(Box::new(Function::new(
26523 "DATEDIFF".to_string(),
26524 vec![arg2, arg1],
26525 ))));
26526 }
26527 }
26528 }
26529
26530 if matches!(
26531 target,
26532 DialectType::Presto | DialectType::Trino | DialectType::Athena
26533 ) {
26534 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26535 return Ok(Expression::Function(Box::new(Function::new(
26536 "DATE_DIFF".to_string(),
26537 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26538 ))));
26539 }
26540
26541 if matches!(target, DialectType::TSQL) {
26542 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26543 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26544 let unit = Expression::Identifier(Identifier::new(unit_str));
26545 return Ok(Expression::Function(Box::new(Function::new(
26546 "DATEDIFF".to_string(),
26547 vec![unit, arg1, cast_d2],
26548 ))));
26549 }
26550
26551 if matches!(target, DialectType::PostgreSQL) {
26552 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26553 // For now, use DATEDIFF (passthrough) with uppercased unit
26554 let unit = Expression::Identifier(Identifier::new(unit_str));
26555 return Ok(Expression::Function(Box::new(Function::new(
26556 "DATEDIFF".to_string(),
26557 vec![unit, arg1, arg2],
26558 ))));
26559 }
26560
26561 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26562 let unit = Expression::Identifier(Identifier::new(unit_str));
26563 Ok(Expression::Function(Box::new(Function::new(
26564 "DATEDIFF".to_string(),
26565 vec![unit, arg1, arg2],
26566 ))))
26567 }
26568
26569 // DATE_DIFF(date1, date2, unit) -> standard form
26570 "DATE_DIFF" if args.len() == 3 => {
26571 let date1 = args.remove(0);
26572 let date2 = args.remove(0);
26573 let unit_expr = args.remove(0);
26574 let unit_str = get_unit_str(&unit_expr);
26575
26576 if matches!(target, DialectType::BigQuery) {
26577 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26578 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26579 "WEEK".to_string()
26580 } else {
26581 unit_str
26582 };
26583 let norm_d1 = Self::date_literal_to_cast(date1);
26584 let norm_d2 = Self::date_literal_to_cast(date2);
26585 let unit = Expression::Identifier(Identifier::new(norm_unit));
26586 return Ok(Expression::Function(Box::new(Function::new(
26587 f.name,
26588 vec![norm_d1, norm_d2, unit],
26589 ))));
26590 }
26591
26592 if matches!(target, DialectType::MySQL) {
26593 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26594 let norm_d1 = Self::date_literal_to_cast(date1);
26595 let norm_d2 = Self::date_literal_to_cast(date2);
26596 return Ok(Expression::Function(Box::new(Function::new(
26597 "DATEDIFF".to_string(),
26598 vec![norm_d1, norm_d2],
26599 ))));
26600 }
26601
26602 if matches!(target, DialectType::StarRocks) {
26603 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26604 let norm_d1 = Self::date_literal_to_cast(date1);
26605 let norm_d2 = Self::date_literal_to_cast(date2);
26606 return Ok(Expression::Function(Box::new(Function::new(
26607 "DATE_DIFF".to_string(),
26608 vec![
26609 Expression::Literal(Literal::String(unit_str)),
26610 norm_d1,
26611 norm_d2,
26612 ],
26613 ))));
26614 }
26615
26616 if matches!(target, DialectType::DuckDB) {
26617 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26618 let norm_d1 = Self::ensure_cast_date(date1);
26619 let norm_d2 = Self::ensure_cast_date(date2);
26620
26621 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26622 let is_week_variant = unit_str == "WEEK"
26623 || unit_str.starts_with("WEEK(")
26624 || unit_str == "ISOWEEK";
26625 if is_week_variant {
26626 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26627 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26628 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26629 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26630 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26631 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26632 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26633 Some("1") // Shift Sunday to Monday alignment
26634 } else if unit_str == "WEEK(SATURDAY)" {
26635 Some("-5")
26636 } else if unit_str == "WEEK(TUESDAY)" {
26637 Some("-1")
26638 } else if unit_str == "WEEK(WEDNESDAY)" {
26639 Some("-2")
26640 } else if unit_str == "WEEK(THURSDAY)" {
26641 Some("-3")
26642 } else if unit_str == "WEEK(FRIDAY)" {
26643 Some("-4")
26644 } else {
26645 Some("1") // default to Sunday
26646 };
26647
26648 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26649 let shifted = if let Some(off) = offset {
26650 let interval =
26651 Expression::Interval(Box::new(crate::expressions::Interval {
26652 this: Some(Expression::Literal(Literal::String(
26653 off.to_string(),
26654 ))),
26655 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26656 unit: crate::expressions::IntervalUnit::Day,
26657 use_plural: false,
26658 }),
26659 }));
26660 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26661 date, interval,
26662 )))
26663 } else {
26664 date
26665 };
26666 Expression::Function(Box::new(Function::new(
26667 "DATE_TRUNC".to_string(),
26668 vec![
26669 Expression::Literal(Literal::String("WEEK".to_string())),
26670 shifted,
26671 ],
26672 )))
26673 };
26674
26675 let trunc_d2 = make_trunc(norm_d2, day_offset);
26676 let trunc_d1 = make_trunc(norm_d1, day_offset);
26677 return Ok(Expression::Function(Box::new(Function::new(
26678 "DATE_DIFF".to_string(),
26679 vec![
26680 Expression::Literal(Literal::String("WEEK".to_string())),
26681 trunc_d2,
26682 trunc_d1,
26683 ],
26684 ))));
26685 }
26686
26687 return Ok(Expression::Function(Box::new(Function::new(
26688 "DATE_DIFF".to_string(),
26689 vec![
26690 Expression::Literal(Literal::String(unit_str)),
26691 norm_d2,
26692 norm_d1,
26693 ],
26694 ))));
26695 }
26696
26697 // Default: DATEDIFF(unit, date2, date1)
26698 let unit = Expression::Identifier(Identifier::new(unit_str));
26699 Ok(Expression::Function(Box::new(Function::new(
26700 "DATEDIFF".to_string(),
26701 vec![unit, date2, date1],
26702 ))))
26703 }
26704
26705 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
26706 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
26707 let ts = args.remove(0);
26708 let interval_expr = args.remove(0);
26709 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26710
26711 match target {
26712 DialectType::Snowflake => {
26713 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
26714 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
26715 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
26716 let unit_str = Self::interval_unit_to_string(&unit);
26717 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26718 Ok(Expression::TimestampAdd(Box::new(
26719 crate::expressions::TimestampAdd {
26720 this: Box::new(val),
26721 expression: Box::new(cast_ts),
26722 unit: Some(unit_str),
26723 },
26724 )))
26725 }
26726 DialectType::Spark | DialectType::Databricks => {
26727 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
26728 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
26729 let interval =
26730 Expression::Interval(Box::new(crate::expressions::Interval {
26731 this: Some(val),
26732 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26733 unit,
26734 use_plural: false,
26735 }),
26736 }));
26737 Ok(Expression::Add(Box::new(
26738 crate::expressions::BinaryOp::new(ts, interval),
26739 )))
26740 } else if name == "DATETIME_ADD"
26741 && matches!(target, DialectType::Databricks)
26742 {
26743 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
26744 let unit_str = Self::interval_unit_to_string(&unit);
26745 Ok(Expression::Function(Box::new(Function::new(
26746 "TIMESTAMPADD".to_string(),
26747 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
26748 ))))
26749 } else {
26750 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
26751 let unit_str = Self::interval_unit_to_string(&unit);
26752 let cast_ts =
26753 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
26754 Self::maybe_cast_ts(ts)
26755 } else {
26756 ts
26757 };
26758 Ok(Expression::Function(Box::new(Function::new(
26759 "DATE_ADD".to_string(),
26760 vec![
26761 Expression::Identifier(Identifier::new(unit_str)),
26762 val,
26763 cast_ts,
26764 ],
26765 ))))
26766 }
26767 }
26768 DialectType::MySQL => {
26769 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
26770 let mysql_ts = if name.starts_with("TIMESTAMP") {
26771 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26772 match &ts {
26773 Expression::Function(ref inner_f)
26774 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26775 {
26776 // Already wrapped, keep as-is
26777 ts
26778 }
26779 _ => {
26780 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
26781 let unwrapped = match ts {
26782 Expression::Literal(Literal::Timestamp(s)) => {
26783 Expression::Literal(Literal::String(s))
26784 }
26785 other => other,
26786 };
26787 Expression::Function(Box::new(Function::new(
26788 "TIMESTAMP".to_string(),
26789 vec![unwrapped],
26790 )))
26791 }
26792 }
26793 } else {
26794 ts
26795 };
26796 Ok(Expression::DateAdd(Box::new(
26797 crate::expressions::DateAddFunc {
26798 this: mysql_ts,
26799 interval: val,
26800 unit,
26801 },
26802 )))
26803 }
26804 _ => {
26805 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
26806 let cast_ts = if matches!(target, DialectType::DuckDB) {
26807 if name == "DATETIME_ADD" {
26808 Self::ensure_cast_timestamp(ts)
26809 } else if name.starts_with("TIMESTAMP") {
26810 Self::maybe_cast_ts_to_tz(ts, &name)
26811 } else {
26812 ts
26813 }
26814 } else {
26815 ts
26816 };
26817 Ok(Expression::DateAdd(Box::new(
26818 crate::expressions::DateAddFunc {
26819 this: cast_ts,
26820 interval: val,
26821 unit,
26822 },
26823 )))
26824 }
26825 }
26826 }
26827
26828 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
26829 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
26830 let ts = args.remove(0);
26831 let interval_expr = args.remove(0);
26832 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26833
26834 match target {
26835 DialectType::Snowflake => {
26836 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
26837 let unit_str = Self::interval_unit_to_string(&unit);
26838 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26839 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26840 val,
26841 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26842 this: Expression::number(1),
26843 })),
26844 )));
26845 Ok(Expression::TimestampAdd(Box::new(
26846 crate::expressions::TimestampAdd {
26847 this: Box::new(neg_val),
26848 expression: Box::new(cast_ts),
26849 unit: Some(unit_str),
26850 },
26851 )))
26852 }
26853 DialectType::Spark | DialectType::Databricks => {
26854 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
26855 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
26856 {
26857 // Spark: ts - INTERVAL val UNIT
26858 let cast_ts = if name.starts_with("TIMESTAMP") {
26859 Self::maybe_cast_ts(ts)
26860 } else {
26861 ts
26862 };
26863 let interval =
26864 Expression::Interval(Box::new(crate::expressions::Interval {
26865 this: Some(val),
26866 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26867 unit,
26868 use_plural: false,
26869 }),
26870 }));
26871 Ok(Expression::Sub(Box::new(
26872 crate::expressions::BinaryOp::new(cast_ts, interval),
26873 )))
26874 } else {
26875 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
26876 let unit_str = Self::interval_unit_to_string(&unit);
26877 let neg_val =
26878 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26879 val,
26880 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26881 this: Expression::number(1),
26882 })),
26883 )));
26884 Ok(Expression::Function(Box::new(Function::new(
26885 "TIMESTAMPADD".to_string(),
26886 vec![
26887 Expression::Identifier(Identifier::new(unit_str)),
26888 neg_val,
26889 ts,
26890 ],
26891 ))))
26892 }
26893 }
26894 DialectType::MySQL => {
26895 let mysql_ts = if name.starts_with("TIMESTAMP") {
26896 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26897 match &ts {
26898 Expression::Function(ref inner_f)
26899 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26900 {
26901 // Already wrapped, keep as-is
26902 ts
26903 }
26904 _ => {
26905 let unwrapped = match ts {
26906 Expression::Literal(Literal::Timestamp(s)) => {
26907 Expression::Literal(Literal::String(s))
26908 }
26909 other => other,
26910 };
26911 Expression::Function(Box::new(Function::new(
26912 "TIMESTAMP".to_string(),
26913 vec![unwrapped],
26914 )))
26915 }
26916 }
26917 } else {
26918 ts
26919 };
26920 Ok(Expression::DateSub(Box::new(
26921 crate::expressions::DateAddFunc {
26922 this: mysql_ts,
26923 interval: val,
26924 unit,
26925 },
26926 )))
26927 }
26928 _ => {
26929 let cast_ts = if matches!(target, DialectType::DuckDB) {
26930 if name == "DATETIME_SUB" {
26931 Self::ensure_cast_timestamp(ts)
26932 } else if name.starts_with("TIMESTAMP") {
26933 Self::maybe_cast_ts_to_tz(ts, &name)
26934 } else {
26935 ts
26936 }
26937 } else {
26938 ts
26939 };
26940 Ok(Expression::DateSub(Box::new(
26941 crate::expressions::DateAddFunc {
26942 this: cast_ts,
26943 interval: val,
26944 unit,
26945 },
26946 )))
26947 }
26948 }
26949 }
26950
26951 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
26952 "DATE_SUB" if args.len() == 2 => {
26953 let date = args.remove(0);
26954 let interval_expr = args.remove(0);
26955 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26956
26957 match target {
26958 DialectType::Databricks | DialectType::Spark => {
26959 // Databricks/Spark: DATE_ADD(date, -val)
26960 // Use DateAdd expression with negative val so it generates correctly
26961 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
26962 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
26963 // Instead, we directly output as a simple negated DateSub
26964 Ok(Expression::DateSub(Box::new(
26965 crate::expressions::DateAddFunc {
26966 this: date,
26967 interval: val,
26968 unit,
26969 },
26970 )))
26971 }
26972 DialectType::DuckDB => {
26973 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
26974 let cast_date = Self::ensure_cast_date(date);
26975 let interval =
26976 Expression::Interval(Box::new(crate::expressions::Interval {
26977 this: Some(val),
26978 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26979 unit,
26980 use_plural: false,
26981 }),
26982 }));
26983 Ok(Expression::Sub(Box::new(
26984 crate::expressions::BinaryOp::new(cast_date, interval),
26985 )))
26986 }
26987 DialectType::Snowflake => {
26988 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
26989 // Just ensure the date is cast properly
26990 let cast_date = Self::ensure_cast_date(date);
26991 Ok(Expression::DateSub(Box::new(
26992 crate::expressions::DateAddFunc {
26993 this: cast_date,
26994 interval: val,
26995 unit,
26996 },
26997 )))
26998 }
26999 DialectType::PostgreSQL => {
27000 // PostgreSQL: date - INTERVAL 'val UNIT'
27001 let unit_str = Self::interval_unit_to_string(&unit);
27002 let interval =
27003 Expression::Interval(Box::new(crate::expressions::Interval {
27004 this: Some(Expression::Literal(Literal::String(format!(
27005 "{} {}",
27006 Self::expr_to_string(&val),
27007 unit_str
27008 )))),
27009 unit: None,
27010 }));
27011 Ok(Expression::Sub(Box::new(
27012 crate::expressions::BinaryOp::new(date, interval),
27013 )))
27014 }
27015 _ => Ok(Expression::DateSub(Box::new(
27016 crate::expressions::DateAddFunc {
27017 this: date,
27018 interval: val,
27019 unit,
27020 },
27021 ))),
27022 }
27023 }
27024
27025 // DATEADD(unit, val, date) -> target-specific form
27026 // Used by: Redshift, Snowflake, TSQL, ClickHouse
27027 "DATEADD" if args.len() == 3 => {
27028 let arg0 = args.remove(0);
27029 let arg1 = args.remove(0);
27030 let arg2 = args.remove(0);
27031 let unit_str = get_unit_str(&arg0);
27032
27033 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
27034 // Keep DATEADD(UNIT, val, date) with uppercased unit
27035 let unit = Expression::Identifier(Identifier::new(unit_str));
27036 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
27037 let date = if matches!(target, DialectType::TSQL)
27038 && !matches!(
27039 source,
27040 DialectType::Spark | DialectType::Databricks | DialectType::Hive
27041 ) {
27042 Self::ensure_cast_datetime2(arg2)
27043 } else {
27044 arg2
27045 };
27046 return Ok(Expression::Function(Box::new(Function::new(
27047 "DATEADD".to_string(),
27048 vec![unit, arg1, date],
27049 ))));
27050 }
27051
27052 if matches!(target, DialectType::DuckDB) {
27053 // DuckDB: date + INTERVAL 'val' UNIT
27054 let iu = parse_interval_unit(&unit_str);
27055 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27056 this: Some(arg1),
27057 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27058 unit: iu,
27059 use_plural: false,
27060 }),
27061 }));
27062 let cast_date = Self::ensure_cast_timestamp(arg2);
27063 return Ok(Expression::Add(Box::new(
27064 crate::expressions::BinaryOp::new(cast_date, interval),
27065 )));
27066 }
27067
27068 if matches!(target, DialectType::BigQuery) {
27069 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
27070 let iu = parse_interval_unit(&unit_str);
27071 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27072 this: Some(arg1),
27073 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27074 unit: iu,
27075 use_plural: false,
27076 }),
27077 }));
27078 return Ok(Expression::Function(Box::new(Function::new(
27079 "DATE_ADD".to_string(),
27080 vec![arg2, interval],
27081 ))));
27082 }
27083
27084 if matches!(target, DialectType::Databricks) {
27085 // Databricks: keep DATEADD(UNIT, val, date) format
27086 let unit = Expression::Identifier(Identifier::new(unit_str));
27087 return Ok(Expression::Function(Box::new(Function::new(
27088 "DATEADD".to_string(),
27089 vec![unit, arg1, arg2],
27090 ))));
27091 }
27092
27093 if matches!(target, DialectType::Spark) {
27094 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
27095 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
27096 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
27097 if let Ok(val) = n.parse::<i64>() {
27098 return Expression::Literal(crate::expressions::Literal::Number(
27099 (val * factor).to_string(),
27100 ));
27101 }
27102 }
27103 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27104 expr,
27105 Expression::Literal(crate::expressions::Literal::Number(
27106 factor.to_string(),
27107 )),
27108 )))
27109 }
27110 match unit_str.as_str() {
27111 "YEAR" => {
27112 let months = multiply_expr_dateadd(arg1, 12);
27113 return Ok(Expression::Function(Box::new(Function::new(
27114 "ADD_MONTHS".to_string(),
27115 vec![arg2, months],
27116 ))));
27117 }
27118 "QUARTER" => {
27119 let months = multiply_expr_dateadd(arg1, 3);
27120 return Ok(Expression::Function(Box::new(Function::new(
27121 "ADD_MONTHS".to_string(),
27122 vec![arg2, months],
27123 ))));
27124 }
27125 "MONTH" => {
27126 return Ok(Expression::Function(Box::new(Function::new(
27127 "ADD_MONTHS".to_string(),
27128 vec![arg2, arg1],
27129 ))));
27130 }
27131 "WEEK" => {
27132 let days = multiply_expr_dateadd(arg1, 7);
27133 return Ok(Expression::Function(Box::new(Function::new(
27134 "DATE_ADD".to_string(),
27135 vec![arg2, days],
27136 ))));
27137 }
27138 "DAY" => {
27139 return Ok(Expression::Function(Box::new(Function::new(
27140 "DATE_ADD".to_string(),
27141 vec![arg2, arg1],
27142 ))));
27143 }
27144 _ => {
27145 let unit = Expression::Identifier(Identifier::new(unit_str));
27146 return Ok(Expression::Function(Box::new(Function::new(
27147 "DATE_ADD".to_string(),
27148 vec![unit, arg1, arg2],
27149 ))));
27150 }
27151 }
27152 }
27153
27154 if matches!(target, DialectType::Hive) {
27155 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27156 match unit_str.as_str() {
27157 "DAY" => {
27158 return Ok(Expression::Function(Box::new(Function::new(
27159 "DATE_ADD".to_string(),
27160 vec![arg2, arg1],
27161 ))));
27162 }
27163 "MONTH" => {
27164 return Ok(Expression::Function(Box::new(Function::new(
27165 "ADD_MONTHS".to_string(),
27166 vec![arg2, arg1],
27167 ))));
27168 }
27169 _ => {
27170 let iu = parse_interval_unit(&unit_str);
27171 let interval =
27172 Expression::Interval(Box::new(crate::expressions::Interval {
27173 this: Some(arg1),
27174 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27175 unit: iu,
27176 use_plural: false,
27177 }),
27178 }));
27179 return Ok(Expression::Add(Box::new(
27180 crate::expressions::BinaryOp::new(arg2, interval),
27181 )));
27182 }
27183 }
27184 }
27185
27186 if matches!(target, DialectType::PostgreSQL) {
27187 // PostgreSQL: date + INTERVAL 'val UNIT'
27188 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27189 this: Some(Expression::Literal(Literal::String(format!(
27190 "{} {}",
27191 Self::expr_to_string(&arg1),
27192 unit_str
27193 )))),
27194 unit: None,
27195 }));
27196 return Ok(Expression::Add(Box::new(
27197 crate::expressions::BinaryOp::new(arg2, interval),
27198 )));
27199 }
27200
27201 if matches!(
27202 target,
27203 DialectType::Presto | DialectType::Trino | DialectType::Athena
27204 ) {
27205 // Presto/Trino: DATE_ADD('UNIT', val, date)
27206 return Ok(Expression::Function(Box::new(Function::new(
27207 "DATE_ADD".to_string(),
27208 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27209 ))));
27210 }
27211
27212 if matches!(target, DialectType::ClickHouse) {
27213 // ClickHouse: DATE_ADD(UNIT, val, date)
27214 let unit = Expression::Identifier(Identifier::new(unit_str));
27215 return Ok(Expression::Function(Box::new(Function::new(
27216 "DATE_ADD".to_string(),
27217 vec![unit, arg1, arg2],
27218 ))));
27219 }
27220
27221 // Default: keep DATEADD with uppercased unit
27222 let unit = Expression::Identifier(Identifier::new(unit_str));
27223 Ok(Expression::Function(Box::new(Function::new(
27224 "DATEADD".to_string(),
27225 vec![unit, arg1, arg2],
27226 ))))
27227 }
27228
27229 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27230 "DATE_ADD" if args.len() == 3 => {
27231 let arg0 = args.remove(0);
27232 let arg1 = args.remove(0);
27233 let arg2 = args.remove(0);
27234 let unit_str = get_unit_str(&arg0);
27235
27236 if matches!(
27237 target,
27238 DialectType::Presto | DialectType::Trino | DialectType::Athena
27239 ) {
27240 // Presto/Trino: DATE_ADD('UNIT', val, date)
27241 return Ok(Expression::Function(Box::new(Function::new(
27242 "DATE_ADD".to_string(),
27243 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27244 ))));
27245 }
27246
27247 if matches!(
27248 target,
27249 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27250 ) {
27251 // DATEADD(UNIT, val, date)
27252 let unit = Expression::Identifier(Identifier::new(unit_str));
27253 let date = if matches!(target, DialectType::TSQL) {
27254 Self::ensure_cast_datetime2(arg2)
27255 } else {
27256 arg2
27257 };
27258 return Ok(Expression::Function(Box::new(Function::new(
27259 "DATEADD".to_string(),
27260 vec![unit, arg1, date],
27261 ))));
27262 }
27263
27264 if matches!(target, DialectType::DuckDB) {
27265 // DuckDB: date + INTERVAL val UNIT
27266 let iu = parse_interval_unit(&unit_str);
27267 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27268 this: Some(arg1),
27269 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27270 unit: iu,
27271 use_plural: false,
27272 }),
27273 }));
27274 return Ok(Expression::Add(Box::new(
27275 crate::expressions::BinaryOp::new(arg2, interval),
27276 )));
27277 }
27278
27279 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27280 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27281 let unit = Expression::Identifier(Identifier::new(unit_str));
27282 return Ok(Expression::Function(Box::new(Function::new(
27283 "DATE_ADD".to_string(),
27284 vec![unit, arg1, arg2],
27285 ))));
27286 }
27287
27288 // Default: DATE_ADD(UNIT, val, date)
27289 let unit = Expression::Identifier(Identifier::new(unit_str));
27290 Ok(Expression::Function(Box::new(Function::new(
27291 "DATE_ADD".to_string(),
27292 vec![unit, arg1, arg2],
27293 ))))
27294 }
27295
27296 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27297 "DATE_ADD" if args.len() == 2 => {
27298 let date = args.remove(0);
27299 let interval_expr = args.remove(0);
27300 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27301 let unit_str = Self::interval_unit_to_string(&unit);
27302
27303 match target {
27304 DialectType::DuckDB => {
27305 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27306 let cast_date = Self::ensure_cast_date(date);
27307 let quoted_val = Self::quote_interval_val(&val);
27308 let interval =
27309 Expression::Interval(Box::new(crate::expressions::Interval {
27310 this: Some(quoted_val),
27311 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27312 unit,
27313 use_plural: false,
27314 }),
27315 }));
27316 Ok(Expression::Add(Box::new(
27317 crate::expressions::BinaryOp::new(cast_date, interval),
27318 )))
27319 }
27320 DialectType::PostgreSQL => {
27321 // PostgreSQL: date + INTERVAL 'val UNIT'
27322 let interval =
27323 Expression::Interval(Box::new(crate::expressions::Interval {
27324 this: Some(Expression::Literal(Literal::String(format!(
27325 "{} {}",
27326 Self::expr_to_string(&val),
27327 unit_str
27328 )))),
27329 unit: None,
27330 }));
27331 Ok(Expression::Add(Box::new(
27332 crate::expressions::BinaryOp::new(date, interval),
27333 )))
27334 }
27335 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27336 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27337 let val_str = Self::expr_to_string(&val);
27338 Ok(Expression::Function(Box::new(Function::new(
27339 "DATE_ADD".to_string(),
27340 vec![
27341 Expression::Literal(Literal::String(unit_str)),
27342 Expression::Cast(Box::new(Cast {
27343 this: Expression::Literal(Literal::String(val_str)),
27344 to: DataType::BigInt { length: None },
27345 trailing_comments: vec![],
27346 double_colon_syntax: false,
27347 format: None,
27348 default: None,
27349 })),
27350 date,
27351 ],
27352 ))))
27353 }
27354 DialectType::Spark | DialectType::Hive => {
27355 // Spark/Hive: DATE_ADD(date, val) for DAY
27356 match unit_str.as_str() {
27357 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27358 "DATE_ADD".to_string(),
27359 vec![date, val],
27360 )))),
27361 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27362 "ADD_MONTHS".to_string(),
27363 vec![date, val],
27364 )))),
27365 _ => {
27366 let iu = parse_interval_unit(&unit_str);
27367 let interval =
27368 Expression::Interval(Box::new(crate::expressions::Interval {
27369 this: Some(val),
27370 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27371 unit: iu,
27372 use_plural: false,
27373 }),
27374 }));
27375 Ok(Expression::Function(Box::new(Function::new(
27376 "DATE_ADD".to_string(),
27377 vec![date, interval],
27378 ))))
27379 }
27380 }
27381 }
27382 DialectType::Snowflake => {
27383 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27384 let cast_date = Self::ensure_cast_date(date);
27385 let val_str = Self::expr_to_string(&val);
27386 Ok(Expression::Function(Box::new(Function::new(
27387 "DATEADD".to_string(),
27388 vec![
27389 Expression::Identifier(Identifier::new(unit_str)),
27390 Expression::Literal(Literal::String(val_str)),
27391 cast_date,
27392 ],
27393 ))))
27394 }
27395 DialectType::TSQL | DialectType::Fabric => {
27396 let cast_date = Self::ensure_cast_datetime2(date);
27397 Ok(Expression::Function(Box::new(Function::new(
27398 "DATEADD".to_string(),
27399 vec![
27400 Expression::Identifier(Identifier::new(unit_str)),
27401 val,
27402 cast_date,
27403 ],
27404 ))))
27405 }
27406 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27407 "DATEADD".to_string(),
27408 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27409 )))),
27410 DialectType::MySQL => {
27411 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27412 let quoted_val = Self::quote_interval_val(&val);
27413 let iu = parse_interval_unit(&unit_str);
27414 let interval =
27415 Expression::Interval(Box::new(crate::expressions::Interval {
27416 this: Some(quoted_val),
27417 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27418 unit: iu,
27419 use_plural: false,
27420 }),
27421 }));
27422 Ok(Expression::Function(Box::new(Function::new(
27423 "DATE_ADD".to_string(),
27424 vec![date, interval],
27425 ))))
27426 }
27427 DialectType::BigQuery => {
27428 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27429 let quoted_val = Self::quote_interval_val(&val);
27430 let iu = parse_interval_unit(&unit_str);
27431 let interval =
27432 Expression::Interval(Box::new(crate::expressions::Interval {
27433 this: Some(quoted_val),
27434 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27435 unit: iu,
27436 use_plural: false,
27437 }),
27438 }));
27439 Ok(Expression::Function(Box::new(Function::new(
27440 "DATE_ADD".to_string(),
27441 vec![date, interval],
27442 ))))
27443 }
27444 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27445 "DATEADD".to_string(),
27446 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27447 )))),
27448 _ => {
27449 // Default: keep as DATE_ADD with decomposed interval
27450 Ok(Expression::DateAdd(Box::new(
27451 crate::expressions::DateAddFunc {
27452 this: date,
27453 interval: val,
27454 unit,
27455 },
27456 )))
27457 }
27458 }
27459 }
27460
27461 // ADD_MONTHS(date, val) -> target-specific form
27462 "ADD_MONTHS" if args.len() == 2 => {
27463 let date = args.remove(0);
27464 let val = args.remove(0);
27465
27466 if matches!(target, DialectType::TSQL) {
27467 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27468 let cast_date = Self::ensure_cast_datetime2(date);
27469 return Ok(Expression::Function(Box::new(Function::new(
27470 "DATEADD".to_string(),
27471 vec![
27472 Expression::Identifier(Identifier::new("MONTH")),
27473 val,
27474 cast_date,
27475 ],
27476 ))));
27477 }
27478
27479 if matches!(target, DialectType::DuckDB) {
27480 // DuckDB: date + INTERVAL val MONTH
27481 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27482 this: Some(val),
27483 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27484 unit: crate::expressions::IntervalUnit::Month,
27485 use_plural: false,
27486 }),
27487 }));
27488 return Ok(Expression::Add(Box::new(
27489 crate::expressions::BinaryOp::new(date, interval),
27490 )));
27491 }
27492
27493 if matches!(target, DialectType::Snowflake) {
27494 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27495 if matches!(source, DialectType::Snowflake) {
27496 return Ok(Expression::Function(Box::new(Function::new(
27497 "ADD_MONTHS".to_string(),
27498 vec![date, val],
27499 ))));
27500 }
27501 return Ok(Expression::Function(Box::new(Function::new(
27502 "DATEADD".to_string(),
27503 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27504 ))));
27505 }
27506
27507 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27508 // Spark: ADD_MONTHS(date, val) - keep as is
27509 return Ok(Expression::Function(Box::new(Function::new(
27510 "ADD_MONTHS".to_string(),
27511 vec![date, val],
27512 ))));
27513 }
27514
27515 if matches!(target, DialectType::Hive) {
27516 return Ok(Expression::Function(Box::new(Function::new(
27517 "ADD_MONTHS".to_string(),
27518 vec![date, val],
27519 ))));
27520 }
27521
27522 if matches!(
27523 target,
27524 DialectType::Presto | DialectType::Trino | DialectType::Athena
27525 ) {
27526 // Presto: DATE_ADD('MONTH', val, date)
27527 return Ok(Expression::Function(Box::new(Function::new(
27528 "DATE_ADD".to_string(),
27529 vec![
27530 Expression::Literal(Literal::String("MONTH".to_string())),
27531 val,
27532 date,
27533 ],
27534 ))));
27535 }
27536
27537 // Default: keep ADD_MONTHS
27538 Ok(Expression::Function(Box::new(Function::new(
27539 "ADD_MONTHS".to_string(),
27540 vec![date, val],
27541 ))))
27542 }
27543
27544 // SAFE_DIVIDE(x, y) -> target-specific form directly
27545 "SAFE_DIVIDE" if args.len() == 2 => {
27546 let x = args.remove(0);
27547 let y = args.remove(0);
27548 // Wrap x and y in parens if they're complex expressions
27549 let y_ref = match &y {
27550 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27551 y.clone()
27552 }
27553 _ => Expression::Paren(Box::new(Paren {
27554 this: y.clone(),
27555 trailing_comments: vec![],
27556 })),
27557 };
27558 let x_ref = match &x {
27559 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27560 x.clone()
27561 }
27562 _ => Expression::Paren(Box::new(Paren {
27563 this: x.clone(),
27564 trailing_comments: vec![],
27565 })),
27566 };
27567 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27568 y_ref.clone(),
27569 Expression::number(0),
27570 )));
27571 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27572 x_ref.clone(),
27573 y_ref.clone(),
27574 )));
27575
27576 match target {
27577 DialectType::DuckDB | DialectType::PostgreSQL => {
27578 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27579 let result_div = if matches!(target, DialectType::PostgreSQL) {
27580 let cast_x = Expression::Cast(Box::new(Cast {
27581 this: x_ref,
27582 to: DataType::Custom {
27583 name: "DOUBLE PRECISION".to_string(),
27584 },
27585 trailing_comments: vec![],
27586 double_colon_syntax: false,
27587 format: None,
27588 default: None,
27589 }));
27590 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27591 cast_x, y_ref,
27592 )))
27593 } else {
27594 div_expr
27595 };
27596 Ok(Expression::Case(Box::new(crate::expressions::Case {
27597 operand: None,
27598 whens: vec![(condition, result_div)],
27599 else_: Some(Expression::Null(crate::expressions::Null)),
27600 comments: Vec::new(),
27601 })))
27602 }
27603 DialectType::Snowflake => {
27604 // IFF(y <> 0, x / y, NULL)
27605 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27606 condition,
27607 true_value: div_expr,
27608 false_value: Some(Expression::Null(crate::expressions::Null)),
27609 original_name: Some("IFF".to_string()),
27610 })))
27611 }
27612 DialectType::Presto | DialectType::Trino => {
27613 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27614 let cast_x = Expression::Cast(Box::new(Cast {
27615 this: x_ref,
27616 to: DataType::Double {
27617 precision: None,
27618 scale: None,
27619 },
27620 trailing_comments: vec![],
27621 double_colon_syntax: false,
27622 format: None,
27623 default: None,
27624 }));
27625 let cast_div = Expression::Div(Box::new(
27626 crate::expressions::BinaryOp::new(cast_x, y_ref),
27627 ));
27628 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27629 condition,
27630 true_value: cast_div,
27631 false_value: Some(Expression::Null(crate::expressions::Null)),
27632 original_name: None,
27633 })))
27634 }
27635 _ => {
27636 // IF(y <> 0, x / y, NULL)
27637 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27638 condition,
27639 true_value: div_expr,
27640 false_value: Some(Expression::Null(crate::expressions::Null)),
27641 original_name: None,
27642 })))
27643 }
27644 }
27645 }
27646
27647 // GENERATE_UUID() -> UUID() with CAST to string
27648 "GENERATE_UUID" => {
27649 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27650 this: None,
27651 name: None,
27652 is_string: None,
27653 }));
27654 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27655 let cast_type = match target {
27656 DialectType::DuckDB => Some(DataType::Text),
27657 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27658 length: None,
27659 parenthesized_length: false,
27660 }),
27661 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27662 Some(DataType::String { length: None })
27663 }
27664 _ => None,
27665 };
27666 if let Some(dt) = cast_type {
27667 Ok(Expression::Cast(Box::new(Cast {
27668 this: uuid_expr,
27669 to: dt,
27670 trailing_comments: vec![],
27671 double_colon_syntax: false,
27672 format: None,
27673 default: None,
27674 })))
27675 } else {
27676 Ok(uuid_expr)
27677 }
27678 }
27679
27680 // COUNTIF(x) -> CountIf expression
27681 "COUNTIF" if args.len() == 1 => {
27682 let arg = args.remove(0);
27683 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
27684 this: arg,
27685 distinct: false,
27686 filter: None,
27687 order_by: vec![],
27688 name: None,
27689 ignore_nulls: None,
27690 having_max: None,
27691 limit: None,
27692 })))
27693 }
27694
27695 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
27696 "EDIT_DISTANCE" => {
27697 // Strip named arguments (max_distance => N) and pass as positional
27698 let mut positional_args: Vec<Expression> = vec![];
27699 for arg in args {
27700 match arg {
27701 Expression::NamedArgument(na) => {
27702 positional_args.push(na.value);
27703 }
27704 other => positional_args.push(other),
27705 }
27706 }
27707 if positional_args.len() >= 2 {
27708 let col1 = positional_args.remove(0);
27709 let col2 = positional_args.remove(0);
27710 let levenshtein = crate::expressions::BinaryFunc {
27711 this: col1,
27712 expression: col2,
27713 original_name: None,
27714 };
27715 // Pass extra args through a function wrapper with all args
27716 if !positional_args.is_empty() {
27717 let max_dist = positional_args.remove(0);
27718 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
27719 if matches!(target, DialectType::DuckDB) {
27720 let lev = Expression::Function(Box::new(Function::new(
27721 "LEVENSHTEIN".to_string(),
27722 vec![levenshtein.this, levenshtein.expression],
27723 )));
27724 let lev_is_null =
27725 Expression::IsNull(Box::new(crate::expressions::IsNull {
27726 this: lev.clone(),
27727 not: false,
27728 postfix_form: false,
27729 }));
27730 let max_is_null =
27731 Expression::IsNull(Box::new(crate::expressions::IsNull {
27732 this: max_dist.clone(),
27733 not: false,
27734 postfix_form: false,
27735 }));
27736 let null_check =
27737 Expression::Or(Box::new(crate::expressions::BinaryOp {
27738 left: lev_is_null,
27739 right: max_is_null,
27740 left_comments: Vec::new(),
27741 operator_comments: Vec::new(),
27742 trailing_comments: Vec::new(),
27743 }));
27744 let least =
27745 Expression::Least(Box::new(crate::expressions::VarArgFunc {
27746 expressions: vec![lev, max_dist],
27747 original_name: None,
27748 }));
27749 return Ok(Expression::Case(Box::new(crate::expressions::Case {
27750 operand: None,
27751 whens: vec![(
27752 null_check,
27753 Expression::Null(crate::expressions::Null),
27754 )],
27755 else_: Some(least),
27756 comments: Vec::new(),
27757 })));
27758 }
27759 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
27760 all_args.extend(positional_args);
27761 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
27762 let func_name = if matches!(target, DialectType::PostgreSQL) {
27763 "LEVENSHTEIN_LESS_EQUAL"
27764 } else {
27765 "LEVENSHTEIN"
27766 };
27767 return Ok(Expression::Function(Box::new(Function::new(
27768 func_name.to_string(),
27769 all_args,
27770 ))));
27771 }
27772 Ok(Expression::Levenshtein(Box::new(levenshtein)))
27773 } else {
27774 Ok(Expression::Function(Box::new(Function::new(
27775 "EDIT_DISTANCE".to_string(),
27776 positional_args,
27777 ))))
27778 }
27779 }
27780
27781 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
27782 "TIMESTAMP_SECONDS" if args.len() == 1 => {
27783 let arg = args.remove(0);
27784 Ok(Expression::UnixToTime(Box::new(
27785 crate::expressions::UnixToTime {
27786 this: Box::new(arg),
27787 scale: Some(0),
27788 zone: None,
27789 hours: None,
27790 minutes: None,
27791 format: None,
27792 target_type: None,
27793 },
27794 )))
27795 }
27796
27797 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
27798 "TIMESTAMP_MILLIS" if args.len() == 1 => {
27799 let arg = args.remove(0);
27800 Ok(Expression::UnixToTime(Box::new(
27801 crate::expressions::UnixToTime {
27802 this: Box::new(arg),
27803 scale: Some(3),
27804 zone: None,
27805 hours: None,
27806 minutes: None,
27807 format: None,
27808 target_type: None,
27809 },
27810 )))
27811 }
27812
27813 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
27814 "TIMESTAMP_MICROS" if args.len() == 1 => {
27815 let arg = args.remove(0);
27816 Ok(Expression::UnixToTime(Box::new(
27817 crate::expressions::UnixToTime {
27818 this: Box::new(arg),
27819 scale: Some(6),
27820 zone: None,
27821 hours: None,
27822 minutes: None,
27823 format: None,
27824 target_type: None,
27825 },
27826 )))
27827 }
27828
27829 // DIV(x, y) -> IntDiv expression
27830 "DIV" if args.len() == 2 => {
27831 let x = args.remove(0);
27832 let y = args.remove(0);
27833 Ok(Expression::IntDiv(Box::new(
27834 crate::expressions::BinaryFunc {
27835 this: x,
27836 expression: y,
27837 original_name: None,
27838 },
27839 )))
27840 }
27841
27842 // TO_HEX(x) -> target-specific form
27843 "TO_HEX" if args.len() == 1 => {
27844 let arg = args.remove(0);
27845 // Check if inner function already returns hex string in certain targets
27846 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
27847 if matches!(target, DialectType::BigQuery) {
27848 // BQ->BQ: keep as TO_HEX
27849 Ok(Expression::Function(Box::new(Function::new(
27850 "TO_HEX".to_string(),
27851 vec![arg],
27852 ))))
27853 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
27854 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
27855 Ok(arg)
27856 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
27857 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
27858 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
27859 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
27860 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
27861 if let Expression::Function(ref inner_f) = arg {
27862 let inner_args = inner_f.args.clone();
27863 let binary_func = match inner_f.name.to_uppercase().as_str() {
27864 "SHA1" => Expression::Function(Box::new(Function::new(
27865 "SHA1_BINARY".to_string(),
27866 inner_args,
27867 ))),
27868 "MD5" => Expression::Function(Box::new(Function::new(
27869 "MD5_BINARY".to_string(),
27870 inner_args,
27871 ))),
27872 "SHA256" => {
27873 let mut a = inner_args;
27874 a.push(Expression::number(256));
27875 Expression::Function(Box::new(Function::new(
27876 "SHA2_BINARY".to_string(),
27877 a,
27878 )))
27879 }
27880 "SHA512" => {
27881 let mut a = inner_args;
27882 a.push(Expression::number(512));
27883 Expression::Function(Box::new(Function::new(
27884 "SHA2_BINARY".to_string(),
27885 a,
27886 )))
27887 }
27888 _ => arg.clone(),
27889 };
27890 Ok(Expression::Function(Box::new(Function::new(
27891 "TO_CHAR".to_string(),
27892 vec![binary_func],
27893 ))))
27894 } else {
27895 let inner = Expression::Function(Box::new(Function::new(
27896 "HEX".to_string(),
27897 vec![arg],
27898 )));
27899 Ok(Expression::Lower(Box::new(
27900 crate::expressions::UnaryFunc::new(inner),
27901 )))
27902 }
27903 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
27904 let inner = Expression::Function(Box::new(Function::new(
27905 "TO_HEX".to_string(),
27906 vec![arg],
27907 )));
27908 Ok(Expression::Lower(Box::new(
27909 crate::expressions::UnaryFunc::new(inner),
27910 )))
27911 } else {
27912 let inner =
27913 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
27914 Ok(Expression::Lower(Box::new(
27915 crate::expressions::UnaryFunc::new(inner),
27916 )))
27917 }
27918 }
27919
27920 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
27921 "LAST_DAY" if args.len() == 2 => {
27922 let date = args.remove(0);
27923 let _unit = args.remove(0); // Strip the unit (MONTH is default)
27924 Ok(Expression::Function(Box::new(Function::new(
27925 "LAST_DAY".to_string(),
27926 vec![date],
27927 ))))
27928 }
27929
27930 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
27931 "GENERATE_ARRAY" => {
27932 let start = args.get(0).cloned();
27933 let end = args.get(1).cloned();
27934 let step = args.get(2).cloned();
27935 Ok(Expression::GenerateSeries(Box::new(
27936 crate::expressions::GenerateSeries {
27937 start: start.map(Box::new),
27938 end: end.map(Box::new),
27939 step: step.map(Box::new),
27940 is_end_exclusive: None,
27941 },
27942 )))
27943 }
27944
27945 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
27946 "GENERATE_TIMESTAMP_ARRAY" => {
27947 let start = args.get(0).cloned();
27948 let end = args.get(1).cloned();
27949 let step = args.get(2).cloned();
27950
27951 if matches!(target, DialectType::DuckDB) {
27952 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
27953 // Only cast string literals - leave columns/expressions as-is
27954 let maybe_cast_ts = |expr: Expression| -> Expression {
27955 if matches!(&expr, Expression::Literal(Literal::String(_))) {
27956 Expression::Cast(Box::new(Cast {
27957 this: expr,
27958 to: DataType::Timestamp {
27959 precision: None,
27960 timezone: false,
27961 },
27962 trailing_comments: vec![],
27963 double_colon_syntax: false,
27964 format: None,
27965 default: None,
27966 }))
27967 } else {
27968 expr
27969 }
27970 };
27971 let cast_start = start.map(maybe_cast_ts);
27972 let cast_end = end.map(maybe_cast_ts);
27973 Ok(Expression::GenerateSeries(Box::new(
27974 crate::expressions::GenerateSeries {
27975 start: cast_start.map(Box::new),
27976 end: cast_end.map(Box::new),
27977 step: step.map(Box::new),
27978 is_end_exclusive: None,
27979 },
27980 )))
27981 } else {
27982 Ok(Expression::GenerateSeries(Box::new(
27983 crate::expressions::GenerateSeries {
27984 start: start.map(Box::new),
27985 end: end.map(Box::new),
27986 step: step.map(Box::new),
27987 is_end_exclusive: None,
27988 },
27989 )))
27990 }
27991 }
27992
27993 // TO_JSON(x) -> target-specific (from Spark/Hive)
27994 "TO_JSON" => {
27995 match target {
27996 DialectType::Presto | DialectType::Trino => {
27997 // JSON_FORMAT(CAST(x AS JSON))
27998 let arg = args
27999 .into_iter()
28000 .next()
28001 .unwrap_or(Expression::Null(crate::expressions::Null));
28002 let cast_json = Expression::Cast(Box::new(Cast {
28003 this: arg,
28004 to: DataType::Custom {
28005 name: "JSON".to_string(),
28006 },
28007 trailing_comments: vec![],
28008 double_colon_syntax: false,
28009 format: None,
28010 default: None,
28011 }));
28012 Ok(Expression::Function(Box::new(Function::new(
28013 "JSON_FORMAT".to_string(),
28014 vec![cast_json],
28015 ))))
28016 }
28017 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
28018 "TO_JSON_STRING".to_string(),
28019 args,
28020 )))),
28021 DialectType::DuckDB => {
28022 // CAST(TO_JSON(x) AS TEXT)
28023 let arg = args
28024 .into_iter()
28025 .next()
28026 .unwrap_or(Expression::Null(crate::expressions::Null));
28027 let to_json = Expression::Function(Box::new(Function::new(
28028 "TO_JSON".to_string(),
28029 vec![arg],
28030 )));
28031 Ok(Expression::Cast(Box::new(Cast {
28032 this: to_json,
28033 to: DataType::Text,
28034 trailing_comments: vec![],
28035 double_colon_syntax: false,
28036 format: None,
28037 default: None,
28038 })))
28039 }
28040 _ => Ok(Expression::Function(Box::new(Function::new(
28041 "TO_JSON".to_string(),
28042 args,
28043 )))),
28044 }
28045 }
28046
28047 // TO_JSON_STRING(x) -> target-specific
28048 "TO_JSON_STRING" => {
28049 match target {
28050 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
28051 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
28052 ),
28053 DialectType::Presto | DialectType::Trino => {
28054 // JSON_FORMAT(CAST(x AS JSON))
28055 let arg = args
28056 .into_iter()
28057 .next()
28058 .unwrap_or(Expression::Null(crate::expressions::Null));
28059 let cast_json = Expression::Cast(Box::new(Cast {
28060 this: arg,
28061 to: DataType::Custom {
28062 name: "JSON".to_string(),
28063 },
28064 trailing_comments: vec![],
28065 double_colon_syntax: false,
28066 format: None,
28067 default: None,
28068 }));
28069 Ok(Expression::Function(Box::new(Function::new(
28070 "JSON_FORMAT".to_string(),
28071 vec![cast_json],
28072 ))))
28073 }
28074 DialectType::DuckDB => {
28075 // CAST(TO_JSON(x) AS TEXT)
28076 let arg = args
28077 .into_iter()
28078 .next()
28079 .unwrap_or(Expression::Null(crate::expressions::Null));
28080 let to_json = Expression::Function(Box::new(Function::new(
28081 "TO_JSON".to_string(),
28082 vec![arg],
28083 )));
28084 Ok(Expression::Cast(Box::new(Cast {
28085 this: to_json,
28086 to: DataType::Text,
28087 trailing_comments: vec![],
28088 double_colon_syntax: false,
28089 format: None,
28090 default: None,
28091 })))
28092 }
28093 DialectType::Snowflake => {
28094 // TO_JSON(x)
28095 Ok(Expression::Function(Box::new(Function::new(
28096 "TO_JSON".to_string(),
28097 args,
28098 ))))
28099 }
28100 _ => Ok(Expression::Function(Box::new(Function::new(
28101 "TO_JSON_STRING".to_string(),
28102 args,
28103 )))),
28104 }
28105 }
28106
28107 // SAFE_ADD(x, y) -> SafeAdd expression
28108 "SAFE_ADD" if args.len() == 2 => {
28109 let x = args.remove(0);
28110 let y = args.remove(0);
28111 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
28112 this: Box::new(x),
28113 expression: Box::new(y),
28114 })))
28115 }
28116
28117 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
28118 "SAFE_SUBTRACT" if args.len() == 2 => {
28119 let x = args.remove(0);
28120 let y = args.remove(0);
28121 Ok(Expression::SafeSubtract(Box::new(
28122 crate::expressions::SafeSubtract {
28123 this: Box::new(x),
28124 expression: Box::new(y),
28125 },
28126 )))
28127 }
28128
28129 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
28130 "SAFE_MULTIPLY" if args.len() == 2 => {
28131 let x = args.remove(0);
28132 let y = args.remove(0);
28133 Ok(Expression::SafeMultiply(Box::new(
28134 crate::expressions::SafeMultiply {
28135 this: Box::new(x),
28136 expression: Box::new(y),
28137 },
28138 )))
28139 }
28140
28141 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
28142 "REGEXP_CONTAINS" if args.len() == 2 => {
28143 let str_expr = args.remove(0);
28144 let pattern = args.remove(0);
28145 Ok(Expression::RegexpLike(Box::new(
28146 crate::expressions::RegexpFunc {
28147 this: str_expr,
28148 pattern,
28149 flags: None,
28150 },
28151 )))
28152 }
28153
28154 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28155 "CONTAINS_SUBSTR" if args.len() == 2 => {
28156 let a = args.remove(0);
28157 let b = args.remove(0);
28158 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28159 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28160 Ok(Expression::Function(Box::new(Function::new(
28161 "CONTAINS".to_string(),
28162 vec![lower_a, lower_b],
28163 ))))
28164 }
28165
28166 // INT64(x) -> CAST(x AS BIGINT)
28167 "INT64" if args.len() == 1 => {
28168 let arg = args.remove(0);
28169 Ok(Expression::Cast(Box::new(Cast {
28170 this: arg,
28171 to: DataType::BigInt { length: None },
28172 trailing_comments: vec![],
28173 double_colon_syntax: false,
28174 format: None,
28175 default: None,
28176 })))
28177 }
28178
28179 // INSTR(str, substr) -> target-specific
28180 "INSTR" if args.len() >= 2 => {
28181 let str_expr = args.remove(0);
28182 let substr = args.remove(0);
28183 if matches!(target, DialectType::Snowflake) {
28184 // CHARINDEX(substr, str)
28185 Ok(Expression::Function(Box::new(Function::new(
28186 "CHARINDEX".to_string(),
28187 vec![substr, str_expr],
28188 ))))
28189 } else if matches!(target, DialectType::BigQuery) {
28190 // Keep as INSTR
28191 Ok(Expression::Function(Box::new(Function::new(
28192 "INSTR".to_string(),
28193 vec![str_expr, substr],
28194 ))))
28195 } else {
28196 // Default: keep as INSTR
28197 Ok(Expression::Function(Box::new(Function::new(
28198 "INSTR".to_string(),
28199 vec![str_expr, substr],
28200 ))))
28201 }
28202 }
28203
28204 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28205 "DATE_TRUNC" if args.len() == 2 => {
28206 let expr = args.remove(0);
28207 let unit_expr = args.remove(0);
28208 let unit_str = get_unit_str(&unit_expr);
28209
28210 match target {
28211 DialectType::DuckDB
28212 | DialectType::Snowflake
28213 | DialectType::PostgreSQL
28214 | DialectType::Presto
28215 | DialectType::Trino
28216 | DialectType::Databricks
28217 | DialectType::Spark
28218 | DialectType::Redshift
28219 | DialectType::ClickHouse
28220 | DialectType::TSQL => {
28221 // Standard: DATE_TRUNC('UNIT', expr)
28222 Ok(Expression::Function(Box::new(Function::new(
28223 "DATE_TRUNC".to_string(),
28224 vec![Expression::Literal(Literal::String(unit_str)), expr],
28225 ))))
28226 }
28227 _ => {
28228 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28229 Ok(Expression::Function(Box::new(Function::new(
28230 "DATE_TRUNC".to_string(),
28231 vec![expr, unit_expr],
28232 ))))
28233 }
28234 }
28235 }
28236
28237 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28238 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28239 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28240 let ts = args.remove(0);
28241 let unit_expr = args.remove(0);
28242 let tz = if !args.is_empty() {
28243 Some(args.remove(0))
28244 } else {
28245 None
28246 };
28247 let unit_str = get_unit_str(&unit_expr);
28248
28249 match target {
28250 DialectType::DuckDB => {
28251 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28252 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28253 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28254 let is_coarse = matches!(
28255 unit_str.as_str(),
28256 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28257 );
28258 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28259 let cast_ts = if name == "DATETIME_TRUNC" {
28260 match ts {
28261 Expression::Literal(Literal::String(ref _s)) => {
28262 Expression::Cast(Box::new(Cast {
28263 this: ts,
28264 to: DataType::Timestamp {
28265 precision: None,
28266 timezone: false,
28267 },
28268 trailing_comments: vec![],
28269 double_colon_syntax: false,
28270 format: None,
28271 default: None,
28272 }))
28273 }
28274 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28275 }
28276 } else {
28277 Self::maybe_cast_ts_to_tz(ts, &name)
28278 };
28279
28280 if let Some(tz_arg) = tz {
28281 if is_coarse {
28282 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28283 let at_tz = Expression::AtTimeZone(Box::new(
28284 crate::expressions::AtTimeZone {
28285 this: cast_ts,
28286 zone: tz_arg.clone(),
28287 },
28288 ));
28289 let date_trunc = Expression::Function(Box::new(Function::new(
28290 "DATE_TRUNC".to_string(),
28291 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28292 )));
28293 Ok(Expression::AtTimeZone(Box::new(
28294 crate::expressions::AtTimeZone {
28295 this: date_trunc,
28296 zone: tz_arg,
28297 },
28298 )))
28299 } else {
28300 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28301 Ok(Expression::Function(Box::new(Function::new(
28302 "DATE_TRUNC".to_string(),
28303 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28304 ))))
28305 }
28306 } else {
28307 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28308 Ok(Expression::Function(Box::new(Function::new(
28309 "DATE_TRUNC".to_string(),
28310 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28311 ))))
28312 }
28313 }
28314 DialectType::Databricks | DialectType::Spark => {
28315 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28316 Ok(Expression::Function(Box::new(Function::new(
28317 "DATE_TRUNC".to_string(),
28318 vec![Expression::Literal(Literal::String(unit_str)), ts],
28319 ))))
28320 }
28321 _ => {
28322 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28323 let unit = Expression::Literal(Literal::String(unit_str));
28324 let mut date_trunc_args = vec![unit, ts];
28325 if let Some(tz_arg) = tz {
28326 date_trunc_args.push(tz_arg);
28327 }
28328 Ok(Expression::Function(Box::new(Function::new(
28329 "TIMESTAMP_TRUNC".to_string(),
28330 date_trunc_args,
28331 ))))
28332 }
28333 }
28334 }
28335
28336 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28337 "TIME" => {
28338 if args.len() == 3 {
28339 // TIME(h, m, s) constructor
28340 match target {
28341 DialectType::TSQL => {
28342 // TIMEFROMPARTS(h, m, s, 0, 0)
28343 args.push(Expression::number(0));
28344 args.push(Expression::number(0));
28345 Ok(Expression::Function(Box::new(Function::new(
28346 "TIMEFROMPARTS".to_string(),
28347 args,
28348 ))))
28349 }
28350 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28351 "MAKETIME".to_string(),
28352 args,
28353 )))),
28354 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28355 Function::new("MAKE_TIME".to_string(), args),
28356 ))),
28357 _ => Ok(Expression::Function(Box::new(Function::new(
28358 "TIME".to_string(),
28359 args,
28360 )))),
28361 }
28362 } else if args.len() == 1 {
28363 let arg = args.remove(0);
28364 if matches!(target, DialectType::Spark) {
28365 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28366 Ok(Expression::Cast(Box::new(Cast {
28367 this: arg,
28368 to: DataType::Timestamp {
28369 timezone: false,
28370 precision: None,
28371 },
28372 trailing_comments: vec![],
28373 double_colon_syntax: false,
28374 format: None,
28375 default: None,
28376 })))
28377 } else {
28378 // Most targets: CAST(x AS TIME)
28379 Ok(Expression::Cast(Box::new(Cast {
28380 this: arg,
28381 to: DataType::Time {
28382 precision: None,
28383 timezone: false,
28384 },
28385 trailing_comments: vec![],
28386 double_colon_syntax: false,
28387 format: None,
28388 default: None,
28389 })))
28390 }
28391 } else if args.len() == 2 {
28392 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28393 let expr = args.remove(0);
28394 let tz = args.remove(0);
28395 let cast_tstz = Expression::Cast(Box::new(Cast {
28396 this: expr,
28397 to: DataType::Timestamp {
28398 timezone: true,
28399 precision: None,
28400 },
28401 trailing_comments: vec![],
28402 double_colon_syntax: false,
28403 format: None,
28404 default: None,
28405 }));
28406 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28407 this: cast_tstz,
28408 zone: tz,
28409 }));
28410 Ok(Expression::Cast(Box::new(Cast {
28411 this: at_tz,
28412 to: DataType::Time {
28413 precision: None,
28414 timezone: false,
28415 },
28416 trailing_comments: vec![],
28417 double_colon_syntax: false,
28418 format: None,
28419 default: None,
28420 })))
28421 } else {
28422 Ok(Expression::Function(Box::new(Function::new(
28423 "TIME".to_string(),
28424 args,
28425 ))))
28426 }
28427 }
28428
28429 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28430 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28431 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28432 // DATETIME(y, m, d, h, min, s) -> target-specific
28433 "DATETIME" => {
28434 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28435 if matches!(target, DialectType::BigQuery) {
28436 if args.len() == 2 {
28437 let has_time_literal =
28438 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28439 if has_time_literal {
28440 let first = args.remove(0);
28441 let second = args.remove(0);
28442 let time_as_cast = match second {
28443 Expression::Literal(Literal::Time(s)) => {
28444 Expression::Cast(Box::new(Cast {
28445 this: Expression::Literal(Literal::String(s)),
28446 to: DataType::Time {
28447 precision: None,
28448 timezone: false,
28449 },
28450 trailing_comments: vec![],
28451 double_colon_syntax: false,
28452 format: None,
28453 default: None,
28454 }))
28455 }
28456 other => other,
28457 };
28458 return Ok(Expression::Function(Box::new(Function::new(
28459 "DATETIME".to_string(),
28460 vec![first, time_as_cast],
28461 ))));
28462 }
28463 }
28464 return Ok(Expression::Function(Box::new(Function::new(
28465 "DATETIME".to_string(),
28466 args,
28467 ))));
28468 }
28469
28470 if args.len() == 1 {
28471 let arg = args.remove(0);
28472 Ok(Expression::Cast(Box::new(Cast {
28473 this: arg,
28474 to: DataType::Timestamp {
28475 timezone: false,
28476 precision: None,
28477 },
28478 trailing_comments: vec![],
28479 double_colon_syntax: false,
28480 format: None,
28481 default: None,
28482 })))
28483 } else if args.len() == 2 {
28484 let first = args.remove(0);
28485 let second = args.remove(0);
28486 // Check if second arg is a TIME literal
28487 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28488 if is_time_literal {
28489 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28490 let cast_date = Expression::Cast(Box::new(Cast {
28491 this: first,
28492 to: DataType::Date,
28493 trailing_comments: vec![],
28494 double_colon_syntax: false,
28495 format: None,
28496 default: None,
28497 }));
28498 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28499 let time_as_string = match second {
28500 Expression::Literal(Literal::Time(s)) => {
28501 Expression::Literal(Literal::String(s))
28502 }
28503 other => other,
28504 };
28505 let cast_time = Expression::Cast(Box::new(Cast {
28506 this: time_as_string,
28507 to: DataType::Time {
28508 precision: None,
28509 timezone: false,
28510 },
28511 trailing_comments: vec![],
28512 double_colon_syntax: false,
28513 format: None,
28514 default: None,
28515 }));
28516 let add_expr =
28517 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28518 Ok(Expression::Cast(Box::new(Cast {
28519 this: add_expr,
28520 to: DataType::Timestamp {
28521 timezone: false,
28522 precision: None,
28523 },
28524 trailing_comments: vec![],
28525 double_colon_syntax: false,
28526 format: None,
28527 default: None,
28528 })))
28529 } else {
28530 // DATETIME('string', 'timezone')
28531 let cast_tstz = Expression::Cast(Box::new(Cast {
28532 this: first,
28533 to: DataType::Timestamp {
28534 timezone: true,
28535 precision: None,
28536 },
28537 trailing_comments: vec![],
28538 double_colon_syntax: false,
28539 format: None,
28540 default: None,
28541 }));
28542 let at_tz =
28543 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28544 this: cast_tstz,
28545 zone: second,
28546 }));
28547 Ok(Expression::Cast(Box::new(Cast {
28548 this: at_tz,
28549 to: DataType::Timestamp {
28550 timezone: false,
28551 precision: None,
28552 },
28553 trailing_comments: vec![],
28554 double_colon_syntax: false,
28555 format: None,
28556 default: None,
28557 })))
28558 }
28559 } else if args.len() >= 3 {
28560 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28561 // For other targets, use MAKE_TIMESTAMP or similar
28562 if matches!(target, DialectType::Snowflake) {
28563 Ok(Expression::Function(Box::new(Function::new(
28564 "TIMESTAMP_FROM_PARTS".to_string(),
28565 args,
28566 ))))
28567 } else {
28568 Ok(Expression::Function(Box::new(Function::new(
28569 "DATETIME".to_string(),
28570 args,
28571 ))))
28572 }
28573 } else {
28574 Ok(Expression::Function(Box::new(Function::new(
28575 "DATETIME".to_string(),
28576 args,
28577 ))))
28578 }
28579 }
28580
28581 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28582 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28583 "TIMESTAMP" => {
28584 if args.len() == 1 {
28585 let arg = args.remove(0);
28586 Ok(Expression::Cast(Box::new(Cast {
28587 this: arg,
28588 to: DataType::Timestamp {
28589 timezone: true,
28590 precision: None,
28591 },
28592 trailing_comments: vec![],
28593 double_colon_syntax: false,
28594 format: None,
28595 default: None,
28596 })))
28597 } else if args.len() == 2 {
28598 let arg = args.remove(0);
28599 let tz = args.remove(0);
28600 let cast_ts = Expression::Cast(Box::new(Cast {
28601 this: arg,
28602 to: DataType::Timestamp {
28603 timezone: false,
28604 precision: None,
28605 },
28606 trailing_comments: vec![],
28607 double_colon_syntax: false,
28608 format: None,
28609 default: None,
28610 }));
28611 if matches!(target, DialectType::Snowflake) {
28612 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28613 Ok(Expression::Function(Box::new(Function::new(
28614 "CONVERT_TIMEZONE".to_string(),
28615 vec![tz, cast_ts],
28616 ))))
28617 } else {
28618 Ok(Expression::AtTimeZone(Box::new(
28619 crate::expressions::AtTimeZone {
28620 this: cast_ts,
28621 zone: tz,
28622 },
28623 )))
28624 }
28625 } else {
28626 Ok(Expression::Function(Box::new(Function::new(
28627 "TIMESTAMP".to_string(),
28628 args,
28629 ))))
28630 }
28631 }
28632
28633 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28634 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28635 "STRING" => {
28636 if args.len() == 1 {
28637 let arg = args.remove(0);
28638 let cast_type = match target {
28639 DialectType::DuckDB => DataType::Text,
28640 _ => DataType::VarChar {
28641 length: None,
28642 parenthesized_length: false,
28643 },
28644 };
28645 Ok(Expression::Cast(Box::new(Cast {
28646 this: arg,
28647 to: cast_type,
28648 trailing_comments: vec![],
28649 double_colon_syntax: false,
28650 format: None,
28651 default: None,
28652 })))
28653 } else if args.len() == 2 {
28654 let arg = args.remove(0);
28655 let tz = args.remove(0);
28656 let cast_type = match target {
28657 DialectType::DuckDB => DataType::Text,
28658 _ => DataType::VarChar {
28659 length: None,
28660 parenthesized_length: false,
28661 },
28662 };
28663 if matches!(target, DialectType::Snowflake) {
28664 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
28665 let convert_tz = Expression::Function(Box::new(Function::new(
28666 "CONVERT_TIMEZONE".to_string(),
28667 vec![
28668 Expression::Literal(Literal::String("UTC".to_string())),
28669 tz,
28670 arg,
28671 ],
28672 )));
28673 Ok(Expression::Cast(Box::new(Cast {
28674 this: convert_tz,
28675 to: cast_type,
28676 trailing_comments: vec![],
28677 double_colon_syntax: false,
28678 format: None,
28679 default: None,
28680 })))
28681 } else {
28682 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
28683 let cast_ts = Expression::Cast(Box::new(Cast {
28684 this: arg,
28685 to: DataType::Timestamp {
28686 timezone: false,
28687 precision: None,
28688 },
28689 trailing_comments: vec![],
28690 double_colon_syntax: false,
28691 format: None,
28692 default: None,
28693 }));
28694 let at_utc =
28695 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28696 this: cast_ts,
28697 zone: Expression::Literal(Literal::String("UTC".to_string())),
28698 }));
28699 let at_tz =
28700 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28701 this: at_utc,
28702 zone: tz,
28703 }));
28704 Ok(Expression::Cast(Box::new(Cast {
28705 this: at_tz,
28706 to: cast_type,
28707 trailing_comments: vec![],
28708 double_colon_syntax: false,
28709 format: None,
28710 default: None,
28711 })))
28712 }
28713 } else {
28714 Ok(Expression::Function(Box::new(Function::new(
28715 "STRING".to_string(),
28716 args,
28717 ))))
28718 }
28719 }
28720
28721 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
28722 "UNIX_SECONDS" if args.len() == 1 => {
28723 let ts = args.remove(0);
28724 match target {
28725 DialectType::DuckDB => {
28726 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
28727 let cast_ts = Self::ensure_cast_timestamptz(ts);
28728 let epoch = Expression::Function(Box::new(Function::new(
28729 "EPOCH".to_string(),
28730 vec![cast_ts],
28731 )));
28732 Ok(Expression::Cast(Box::new(Cast {
28733 this: epoch,
28734 to: DataType::BigInt { length: None },
28735 trailing_comments: vec![],
28736 double_colon_syntax: false,
28737 format: None,
28738 default: None,
28739 })))
28740 }
28741 DialectType::Snowflake => {
28742 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
28743 let epoch = Expression::Cast(Box::new(Cast {
28744 this: Expression::Literal(Literal::String(
28745 "1970-01-01 00:00:00+00".to_string(),
28746 )),
28747 to: DataType::Timestamp {
28748 timezone: true,
28749 precision: None,
28750 },
28751 trailing_comments: vec![],
28752 double_colon_syntax: false,
28753 format: None,
28754 default: None,
28755 }));
28756 Ok(Expression::TimestampDiff(Box::new(
28757 crate::expressions::TimestampDiff {
28758 this: Box::new(epoch),
28759 expression: Box::new(ts),
28760 unit: Some("SECONDS".to_string()),
28761 },
28762 )))
28763 }
28764 _ => Ok(Expression::Function(Box::new(Function::new(
28765 "UNIX_SECONDS".to_string(),
28766 vec![ts],
28767 )))),
28768 }
28769 }
28770
28771 "UNIX_MILLIS" if args.len() == 1 => {
28772 let ts = args.remove(0);
28773 match target {
28774 DialectType::DuckDB => {
28775 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
28776 let cast_ts = Self::ensure_cast_timestamptz(ts);
28777 Ok(Expression::Function(Box::new(Function::new(
28778 "EPOCH_MS".to_string(),
28779 vec![cast_ts],
28780 ))))
28781 }
28782 _ => Ok(Expression::Function(Box::new(Function::new(
28783 "UNIX_MILLIS".to_string(),
28784 vec![ts],
28785 )))),
28786 }
28787 }
28788
28789 "UNIX_MICROS" if args.len() == 1 => {
28790 let ts = args.remove(0);
28791 match target {
28792 DialectType::DuckDB => {
28793 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
28794 let cast_ts = Self::ensure_cast_timestamptz(ts);
28795 Ok(Expression::Function(Box::new(Function::new(
28796 "EPOCH_US".to_string(),
28797 vec![cast_ts],
28798 ))))
28799 }
28800 _ => Ok(Expression::Function(Box::new(Function::new(
28801 "UNIX_MICROS".to_string(),
28802 vec![ts],
28803 )))),
28804 }
28805 }
28806
28807 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
28808 "ARRAY_CONCAT" | "LIST_CONCAT" => {
28809 match target {
28810 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28811 // CONCAT(arr1, arr2, ...)
28812 Ok(Expression::Function(Box::new(Function::new(
28813 "CONCAT".to_string(),
28814 args,
28815 ))))
28816 }
28817 DialectType::Presto | DialectType::Trino => {
28818 // CONCAT(arr1, arr2, ...)
28819 Ok(Expression::Function(Box::new(Function::new(
28820 "CONCAT".to_string(),
28821 args,
28822 ))))
28823 }
28824 DialectType::Snowflake => {
28825 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28826 if args.len() == 1 {
28827 // ARRAY_CAT requires 2 args, add empty array as []
28828 let empty_arr = Expression::ArrayFunc(Box::new(
28829 crate::expressions::ArrayConstructor {
28830 expressions: vec![],
28831 bracket_notation: true,
28832 use_list_keyword: false,
28833 },
28834 ));
28835 let mut new_args = args;
28836 new_args.push(empty_arr);
28837 Ok(Expression::Function(Box::new(Function::new(
28838 "ARRAY_CAT".to_string(),
28839 new_args,
28840 ))))
28841 } else if args.is_empty() {
28842 Ok(Expression::Function(Box::new(Function::new(
28843 "ARRAY_CAT".to_string(),
28844 args,
28845 ))))
28846 } else {
28847 let mut it = args.into_iter().rev();
28848 let mut result = it.next().unwrap();
28849 for arr in it {
28850 result = Expression::Function(Box::new(Function::new(
28851 "ARRAY_CAT".to_string(),
28852 vec![arr, result],
28853 )));
28854 }
28855 Ok(result)
28856 }
28857 }
28858 DialectType::PostgreSQL => {
28859 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28860 if args.len() <= 1 {
28861 Ok(Expression::Function(Box::new(Function::new(
28862 "ARRAY_CAT".to_string(),
28863 args,
28864 ))))
28865 } else {
28866 let mut it = args.into_iter().rev();
28867 let mut result = it.next().unwrap();
28868 for arr in it {
28869 result = Expression::Function(Box::new(Function::new(
28870 "ARRAY_CAT".to_string(),
28871 vec![arr, result],
28872 )));
28873 }
28874 Ok(result)
28875 }
28876 }
28877 DialectType::Redshift => {
28878 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
28879 if args.len() <= 2 {
28880 Ok(Expression::Function(Box::new(Function::new(
28881 "ARRAY_CONCAT".to_string(),
28882 args,
28883 ))))
28884 } else {
28885 let mut it = args.into_iter().rev();
28886 let mut result = it.next().unwrap();
28887 for arr in it {
28888 result = Expression::Function(Box::new(Function::new(
28889 "ARRAY_CONCAT".to_string(),
28890 vec![arr, result],
28891 )));
28892 }
28893 Ok(result)
28894 }
28895 }
28896 DialectType::DuckDB => {
28897 // LIST_CONCAT supports multiple args natively in DuckDB
28898 Ok(Expression::Function(Box::new(Function::new(
28899 "LIST_CONCAT".to_string(),
28900 args,
28901 ))))
28902 }
28903 _ => Ok(Expression::Function(Box::new(Function::new(
28904 "ARRAY_CONCAT".to_string(),
28905 args,
28906 )))),
28907 }
28908 }
28909
28910 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
28911 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
28912 let arg = args.remove(0);
28913 match target {
28914 DialectType::Snowflake => {
28915 let array_agg =
28916 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
28917 this: arg,
28918 distinct: false,
28919 filter: None,
28920 order_by: vec![],
28921 name: None,
28922 ignore_nulls: None,
28923 having_max: None,
28924 limit: None,
28925 }));
28926 Ok(Expression::Function(Box::new(Function::new(
28927 "ARRAY_FLATTEN".to_string(),
28928 vec![array_agg],
28929 ))))
28930 }
28931 _ => Ok(Expression::Function(Box::new(Function::new(
28932 "ARRAY_CONCAT_AGG".to_string(),
28933 vec![arg],
28934 )))),
28935 }
28936 }
28937
28938 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
28939 "MD5" if args.len() == 1 => {
28940 let arg = args.remove(0);
28941 match target {
28942 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28943 // UNHEX(MD5(x))
28944 let md5 = Expression::Function(Box::new(Function::new(
28945 "MD5".to_string(),
28946 vec![arg],
28947 )));
28948 Ok(Expression::Function(Box::new(Function::new(
28949 "UNHEX".to_string(),
28950 vec![md5],
28951 ))))
28952 }
28953 DialectType::Snowflake => {
28954 // MD5_BINARY(x)
28955 Ok(Expression::Function(Box::new(Function::new(
28956 "MD5_BINARY".to_string(),
28957 vec![arg],
28958 ))))
28959 }
28960 _ => Ok(Expression::Function(Box::new(Function::new(
28961 "MD5".to_string(),
28962 vec![arg],
28963 )))),
28964 }
28965 }
28966
28967 "SHA1" if args.len() == 1 => {
28968 let arg = args.remove(0);
28969 match target {
28970 DialectType::DuckDB => {
28971 // UNHEX(SHA1(x))
28972 let sha1 = Expression::Function(Box::new(Function::new(
28973 "SHA1".to_string(),
28974 vec![arg],
28975 )));
28976 Ok(Expression::Function(Box::new(Function::new(
28977 "UNHEX".to_string(),
28978 vec![sha1],
28979 ))))
28980 }
28981 _ => Ok(Expression::Function(Box::new(Function::new(
28982 "SHA1".to_string(),
28983 vec![arg],
28984 )))),
28985 }
28986 }
28987
28988 "SHA256" if args.len() == 1 => {
28989 let arg = args.remove(0);
28990 match target {
28991 DialectType::DuckDB => {
28992 // UNHEX(SHA256(x))
28993 let sha = Expression::Function(Box::new(Function::new(
28994 "SHA256".to_string(),
28995 vec![arg],
28996 )));
28997 Ok(Expression::Function(Box::new(Function::new(
28998 "UNHEX".to_string(),
28999 vec![sha],
29000 ))))
29001 }
29002 DialectType::Snowflake => {
29003 // SHA2_BINARY(x, 256)
29004 Ok(Expression::Function(Box::new(Function::new(
29005 "SHA2_BINARY".to_string(),
29006 vec![arg, Expression::number(256)],
29007 ))))
29008 }
29009 DialectType::Redshift | DialectType::Spark => {
29010 // SHA2(x, 256)
29011 Ok(Expression::Function(Box::new(Function::new(
29012 "SHA2".to_string(),
29013 vec![arg, Expression::number(256)],
29014 ))))
29015 }
29016 _ => Ok(Expression::Function(Box::new(Function::new(
29017 "SHA256".to_string(),
29018 vec![arg],
29019 )))),
29020 }
29021 }
29022
29023 "SHA512" if args.len() == 1 => {
29024 let arg = args.remove(0);
29025 match target {
29026 DialectType::Snowflake => {
29027 // SHA2_BINARY(x, 512)
29028 Ok(Expression::Function(Box::new(Function::new(
29029 "SHA2_BINARY".to_string(),
29030 vec![arg, Expression::number(512)],
29031 ))))
29032 }
29033 DialectType::Redshift | DialectType::Spark => {
29034 // SHA2(x, 512)
29035 Ok(Expression::Function(Box::new(Function::new(
29036 "SHA2".to_string(),
29037 vec![arg, Expression::number(512)],
29038 ))))
29039 }
29040 _ => Ok(Expression::Function(Box::new(Function::new(
29041 "SHA512".to_string(),
29042 vec![arg],
29043 )))),
29044 }
29045 }
29046
29047 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
29048 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
29049 let str_expr = args.remove(0);
29050 let pattern = args.remove(0);
29051
29052 // Check if pattern contains capturing groups (parentheses)
29053 let has_groups = match &pattern {
29054 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
29055 _ => false,
29056 };
29057
29058 match target {
29059 DialectType::DuckDB => {
29060 let group = if has_groups {
29061 Expression::number(1)
29062 } else {
29063 Expression::number(0)
29064 };
29065 Ok(Expression::Function(Box::new(Function::new(
29066 "REGEXP_EXTRACT_ALL".to_string(),
29067 vec![str_expr, pattern, group],
29068 ))))
29069 }
29070 DialectType::Spark | DialectType::Databricks => {
29071 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
29072 if has_groups {
29073 Ok(Expression::Function(Box::new(Function::new(
29074 "REGEXP_EXTRACT_ALL".to_string(),
29075 vec![str_expr, pattern],
29076 ))))
29077 } else {
29078 Ok(Expression::Function(Box::new(Function::new(
29079 "REGEXP_EXTRACT_ALL".to_string(),
29080 vec![str_expr, pattern, Expression::number(0)],
29081 ))))
29082 }
29083 }
29084 DialectType::Presto | DialectType::Trino => {
29085 if has_groups {
29086 Ok(Expression::Function(Box::new(Function::new(
29087 "REGEXP_EXTRACT_ALL".to_string(),
29088 vec![str_expr, pattern, Expression::number(1)],
29089 ))))
29090 } else {
29091 Ok(Expression::Function(Box::new(Function::new(
29092 "REGEXP_EXTRACT_ALL".to_string(),
29093 vec![str_expr, pattern],
29094 ))))
29095 }
29096 }
29097 DialectType::Snowflake => {
29098 if has_groups {
29099 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
29100 Ok(Expression::Function(Box::new(Function::new(
29101 "REGEXP_EXTRACT_ALL".to_string(),
29102 vec![
29103 str_expr,
29104 pattern,
29105 Expression::number(1),
29106 Expression::number(1),
29107 Expression::Literal(Literal::String("c".to_string())),
29108 Expression::number(1),
29109 ],
29110 ))))
29111 } else {
29112 Ok(Expression::Function(Box::new(Function::new(
29113 "REGEXP_EXTRACT_ALL".to_string(),
29114 vec![str_expr, pattern],
29115 ))))
29116 }
29117 }
29118 _ => Ok(Expression::Function(Box::new(Function::new(
29119 "REGEXP_EXTRACT_ALL".to_string(),
29120 vec![str_expr, pattern],
29121 )))),
29122 }
29123 }
29124
29125 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
29126 "MOD" if args.len() == 2 => {
29127 match target {
29128 DialectType::PostgreSQL
29129 | DialectType::DuckDB
29130 | DialectType::Presto
29131 | DialectType::Trino
29132 | DialectType::Athena
29133 | DialectType::Snowflake => {
29134 let x = args.remove(0);
29135 let y = args.remove(0);
29136 // Wrap complex expressions in parens to preserve precedence
29137 let needs_paren = |e: &Expression| {
29138 matches!(
29139 e,
29140 Expression::Add(_)
29141 | Expression::Sub(_)
29142 | Expression::Mul(_)
29143 | Expression::Div(_)
29144 )
29145 };
29146 let x = if needs_paren(&x) {
29147 Expression::Paren(Box::new(crate::expressions::Paren {
29148 this: x,
29149 trailing_comments: vec![],
29150 }))
29151 } else {
29152 x
29153 };
29154 let y = if needs_paren(&y) {
29155 Expression::Paren(Box::new(crate::expressions::Paren {
29156 this: y,
29157 trailing_comments: vec![],
29158 }))
29159 } else {
29160 y
29161 };
29162 Ok(Expression::Mod(Box::new(
29163 crate::expressions::BinaryOp::new(x, y),
29164 )))
29165 }
29166 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29167 // Hive/Spark: a % b
29168 let x = args.remove(0);
29169 let y = args.remove(0);
29170 let needs_paren = |e: &Expression| {
29171 matches!(
29172 e,
29173 Expression::Add(_)
29174 | Expression::Sub(_)
29175 | Expression::Mul(_)
29176 | Expression::Div(_)
29177 )
29178 };
29179 let x = if needs_paren(&x) {
29180 Expression::Paren(Box::new(crate::expressions::Paren {
29181 this: x,
29182 trailing_comments: vec![],
29183 }))
29184 } else {
29185 x
29186 };
29187 let y = if needs_paren(&y) {
29188 Expression::Paren(Box::new(crate::expressions::Paren {
29189 this: y,
29190 trailing_comments: vec![],
29191 }))
29192 } else {
29193 y
29194 };
29195 Ok(Expression::Mod(Box::new(
29196 crate::expressions::BinaryOp::new(x, y),
29197 )))
29198 }
29199 _ => Ok(Expression::Function(Box::new(Function::new(
29200 "MOD".to_string(),
29201 args,
29202 )))),
29203 }
29204 }
29205
29206 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29207 "ARRAY_FILTER" if args.len() == 2 => {
29208 let name = match target {
29209 DialectType::DuckDB => "LIST_FILTER",
29210 DialectType::StarRocks => "ARRAY_FILTER",
29211 _ => "FILTER",
29212 };
29213 Ok(Expression::Function(Box::new(Function::new(
29214 name.to_string(),
29215 args,
29216 ))))
29217 }
29218 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29219 "FILTER" if args.len() == 2 => {
29220 let name = match target {
29221 DialectType::DuckDB => "LIST_FILTER",
29222 DialectType::StarRocks => "ARRAY_FILTER",
29223 _ => "FILTER",
29224 };
29225 Ok(Expression::Function(Box::new(Function::new(
29226 name.to_string(),
29227 args,
29228 ))))
29229 }
29230 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29231 "REDUCE" if args.len() >= 3 => {
29232 let name = match target {
29233 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29234 _ => "REDUCE",
29235 };
29236 Ok(Expression::Function(Box::new(Function::new(
29237 name.to_string(),
29238 args,
29239 ))))
29240 }
29241 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29242 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29243 Function::new("ARRAY_REVERSE".to_string(), args),
29244 ))),
29245
29246 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29247 "CONCAT" if args.len() > 2 => match target {
29248 DialectType::DuckDB => {
29249 let mut it = args.into_iter();
29250 let mut result = it.next().unwrap();
29251 for arg in it {
29252 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29253 this: Box::new(result),
29254 expression: Box::new(arg),
29255 safe: None,
29256 }));
29257 }
29258 Ok(result)
29259 }
29260 _ => Ok(Expression::Function(Box::new(Function::new(
29261 "CONCAT".to_string(),
29262 args,
29263 )))),
29264 },
29265
29266 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29267 "GENERATE_DATE_ARRAY" => {
29268 if matches!(target, DialectType::BigQuery) {
29269 // BQ->BQ: add default interval if not present
29270 if args.len() == 2 {
29271 let start = args.remove(0);
29272 let end = args.remove(0);
29273 let default_interval =
29274 Expression::Interval(Box::new(crate::expressions::Interval {
29275 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29276 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29277 unit: crate::expressions::IntervalUnit::Day,
29278 use_plural: false,
29279 }),
29280 }));
29281 Ok(Expression::Function(Box::new(Function::new(
29282 "GENERATE_DATE_ARRAY".to_string(),
29283 vec![start, end, default_interval],
29284 ))))
29285 } else {
29286 Ok(Expression::Function(Box::new(Function::new(
29287 "GENERATE_DATE_ARRAY".to_string(),
29288 args,
29289 ))))
29290 }
29291 } else if matches!(target, DialectType::DuckDB) {
29292 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29293 let start = args.get(0).cloned();
29294 let end = args.get(1).cloned();
29295 let step = args.get(2).cloned().or_else(|| {
29296 Some(Expression::Interval(Box::new(
29297 crate::expressions::Interval {
29298 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29299 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29300 unit: crate::expressions::IntervalUnit::Day,
29301 use_plural: false,
29302 }),
29303 },
29304 )))
29305 });
29306
29307 // Wrap start/end in CAST(... AS DATE) only for string literals
29308 let maybe_cast_date = |expr: Expression| -> Expression {
29309 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29310 Expression::Cast(Box::new(Cast {
29311 this: expr,
29312 to: DataType::Date,
29313 trailing_comments: vec![],
29314 double_colon_syntax: false,
29315 format: None,
29316 default: None,
29317 }))
29318 } else {
29319 expr
29320 }
29321 };
29322 let cast_start = start.map(maybe_cast_date);
29323 let cast_end = end.map(maybe_cast_date);
29324
29325 let gen_series =
29326 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29327 start: cast_start.map(Box::new),
29328 end: cast_end.map(Box::new),
29329 step: step.map(Box::new),
29330 is_end_exclusive: None,
29331 }));
29332
29333 // Wrap in CAST(... AS DATE[])
29334 Ok(Expression::Cast(Box::new(Cast {
29335 this: gen_series,
29336 to: DataType::Array {
29337 element_type: Box::new(DataType::Date),
29338 dimension: None,
29339 },
29340 trailing_comments: vec![],
29341 double_colon_syntax: false,
29342 format: None,
29343 default: None,
29344 })))
29345 } else if matches!(target, DialectType::Snowflake) {
29346 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29347 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29348 if args.len() == 2 {
29349 let start = args.remove(0);
29350 let end = args.remove(0);
29351 let default_interval =
29352 Expression::Interval(Box::new(crate::expressions::Interval {
29353 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29354 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29355 unit: crate::expressions::IntervalUnit::Day,
29356 use_plural: false,
29357 }),
29358 }));
29359 Ok(Expression::Function(Box::new(Function::new(
29360 "GENERATE_DATE_ARRAY".to_string(),
29361 vec![start, end, default_interval],
29362 ))))
29363 } else {
29364 Ok(Expression::Function(Box::new(Function::new(
29365 "GENERATE_DATE_ARRAY".to_string(),
29366 args,
29367 ))))
29368 }
29369 } else {
29370 // Convert to GenerateSeries for other targets
29371 let start = args.get(0).cloned();
29372 let end = args.get(1).cloned();
29373 let step = args.get(2).cloned().or_else(|| {
29374 Some(Expression::Interval(Box::new(
29375 crate::expressions::Interval {
29376 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29377 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29378 unit: crate::expressions::IntervalUnit::Day,
29379 use_plural: false,
29380 }),
29381 },
29382 )))
29383 });
29384 Ok(Expression::GenerateSeries(Box::new(
29385 crate::expressions::GenerateSeries {
29386 start: start.map(Box::new),
29387 end: end.map(Box::new),
29388 step: step.map(Box::new),
29389 is_end_exclusive: None,
29390 },
29391 )))
29392 }
29393 }
29394
29395 // PARSE_DATE(format, str) -> target-specific
29396 "PARSE_DATE" if args.len() == 2 => {
29397 let format = args.remove(0);
29398 let str_expr = args.remove(0);
29399 match target {
29400 DialectType::DuckDB => {
29401 // CAST(STRPTIME(str, duck_format) AS DATE)
29402 let duck_format = Self::bq_format_to_duckdb(&format);
29403 let strptime = Expression::Function(Box::new(Function::new(
29404 "STRPTIME".to_string(),
29405 vec![str_expr, duck_format],
29406 )));
29407 Ok(Expression::Cast(Box::new(Cast {
29408 this: strptime,
29409 to: DataType::Date,
29410 trailing_comments: vec![],
29411 double_colon_syntax: false,
29412 format: None,
29413 default: None,
29414 })))
29415 }
29416 DialectType::Snowflake => {
29417 // _POLYGLOT_DATE(str, snowflake_format)
29418 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29419 let sf_format = Self::bq_format_to_snowflake(&format);
29420 Ok(Expression::Function(Box::new(Function::new(
29421 "_POLYGLOT_DATE".to_string(),
29422 vec![str_expr, sf_format],
29423 ))))
29424 }
29425 _ => Ok(Expression::Function(Box::new(Function::new(
29426 "PARSE_DATE".to_string(),
29427 vec![format, str_expr],
29428 )))),
29429 }
29430 }
29431
29432 // PARSE_TIMESTAMP(format, str) -> target-specific
29433 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29434 let format = args.remove(0);
29435 let str_expr = args.remove(0);
29436 let tz = if !args.is_empty() {
29437 Some(args.remove(0))
29438 } else {
29439 None
29440 };
29441 match target {
29442 DialectType::DuckDB => {
29443 let duck_format = Self::bq_format_to_duckdb(&format);
29444 let strptime = Expression::Function(Box::new(Function::new(
29445 "STRPTIME".to_string(),
29446 vec![str_expr, duck_format],
29447 )));
29448 Ok(strptime)
29449 }
29450 _ => {
29451 let mut result_args = vec![format, str_expr];
29452 if let Some(tz_arg) = tz {
29453 result_args.push(tz_arg);
29454 }
29455 Ok(Expression::Function(Box::new(Function::new(
29456 "PARSE_TIMESTAMP".to_string(),
29457 result_args,
29458 ))))
29459 }
29460 }
29461 }
29462
29463 // FORMAT_DATE(format, date) -> target-specific
29464 "FORMAT_DATE" if args.len() == 2 => {
29465 let format = args.remove(0);
29466 let date_expr = args.remove(0);
29467 match target {
29468 DialectType::DuckDB => {
29469 // STRFTIME(CAST(date AS DATE), format)
29470 let cast_date = Expression::Cast(Box::new(Cast {
29471 this: date_expr,
29472 to: DataType::Date,
29473 trailing_comments: vec![],
29474 double_colon_syntax: false,
29475 format: None,
29476 default: None,
29477 }));
29478 Ok(Expression::Function(Box::new(Function::new(
29479 "STRFTIME".to_string(),
29480 vec![cast_date, format],
29481 ))))
29482 }
29483 _ => Ok(Expression::Function(Box::new(Function::new(
29484 "FORMAT_DATE".to_string(),
29485 vec![format, date_expr],
29486 )))),
29487 }
29488 }
29489
29490 // FORMAT_DATETIME(format, datetime) -> target-specific
29491 "FORMAT_DATETIME" if args.len() == 2 => {
29492 let format = args.remove(0);
29493 let dt_expr = args.remove(0);
29494
29495 if matches!(target, DialectType::BigQuery) {
29496 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29497 let norm_format = Self::bq_format_normalize_bq(&format);
29498 // Also strip DATETIME keyword from typed literals
29499 let norm_dt = match dt_expr {
29500 Expression::Literal(Literal::Timestamp(s)) => {
29501 Expression::Cast(Box::new(Cast {
29502 this: Expression::Literal(Literal::String(s)),
29503 to: DataType::Custom {
29504 name: "DATETIME".to_string(),
29505 },
29506 trailing_comments: vec![],
29507 double_colon_syntax: false,
29508 format: None,
29509 default: None,
29510 }))
29511 }
29512 other => other,
29513 };
29514 return Ok(Expression::Function(Box::new(Function::new(
29515 "FORMAT_DATETIME".to_string(),
29516 vec![norm_format, norm_dt],
29517 ))));
29518 }
29519
29520 match target {
29521 DialectType::DuckDB => {
29522 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29523 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29524 let duck_format = Self::bq_format_to_duckdb(&format);
29525 Ok(Expression::Function(Box::new(Function::new(
29526 "STRFTIME".to_string(),
29527 vec![cast_dt, duck_format],
29528 ))))
29529 }
29530 _ => Ok(Expression::Function(Box::new(Function::new(
29531 "FORMAT_DATETIME".to_string(),
29532 vec![format, dt_expr],
29533 )))),
29534 }
29535 }
29536
29537 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29538 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29539 let format = args.remove(0);
29540 let ts_expr = args.remove(0);
29541 match target {
29542 DialectType::DuckDB => {
29543 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29544 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29545 let cast_ts = Expression::Cast(Box::new(Cast {
29546 this: cast_tstz,
29547 to: DataType::Timestamp {
29548 timezone: false,
29549 precision: None,
29550 },
29551 trailing_comments: vec![],
29552 double_colon_syntax: false,
29553 format: None,
29554 default: None,
29555 }));
29556 Ok(Expression::Function(Box::new(Function::new(
29557 "STRFTIME".to_string(),
29558 vec![cast_ts, format],
29559 ))))
29560 }
29561 DialectType::Snowflake => {
29562 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29563 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29564 let cast_ts = Expression::Cast(Box::new(Cast {
29565 this: cast_tstz,
29566 to: DataType::Timestamp {
29567 timezone: false,
29568 precision: None,
29569 },
29570 trailing_comments: vec![],
29571 double_colon_syntax: false,
29572 format: None,
29573 default: None,
29574 }));
29575 let sf_format = Self::bq_format_to_snowflake(&format);
29576 Ok(Expression::Function(Box::new(Function::new(
29577 "TO_CHAR".to_string(),
29578 vec![cast_ts, sf_format],
29579 ))))
29580 }
29581 _ => Ok(Expression::Function(Box::new(Function::new(
29582 "FORMAT_TIMESTAMP".to_string(),
29583 vec![format, ts_expr],
29584 )))),
29585 }
29586 }
29587
29588 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29589 "UNIX_DATE" if args.len() == 1 => {
29590 let date = args.remove(0);
29591 match target {
29592 DialectType::DuckDB => {
29593 let epoch = Expression::Cast(Box::new(Cast {
29594 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29595 to: DataType::Date,
29596 trailing_comments: vec![],
29597 double_colon_syntax: false,
29598 format: None,
29599 default: None,
29600 }));
29601 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29602 // Need to convert DATE literal to CAST
29603 let norm_date = Self::date_literal_to_cast(date);
29604 Ok(Expression::Function(Box::new(Function::new(
29605 "DATE_DIFF".to_string(),
29606 vec![
29607 Expression::Literal(Literal::String("DAY".to_string())),
29608 epoch,
29609 norm_date,
29610 ],
29611 ))))
29612 }
29613 _ => Ok(Expression::Function(Box::new(Function::new(
29614 "UNIX_DATE".to_string(),
29615 vec![date],
29616 )))),
29617 }
29618 }
29619
29620 // UNIX_SECONDS(ts) -> target-specific
29621 "UNIX_SECONDS" if args.len() == 1 => {
29622 let ts = args.remove(0);
29623 match target {
29624 DialectType::DuckDB => {
29625 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29626 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29627 let epoch = Expression::Function(Box::new(Function::new(
29628 "EPOCH".to_string(),
29629 vec![norm_ts],
29630 )));
29631 Ok(Expression::Cast(Box::new(Cast {
29632 this: epoch,
29633 to: DataType::BigInt { length: None },
29634 trailing_comments: vec![],
29635 double_colon_syntax: false,
29636 format: None,
29637 default: None,
29638 })))
29639 }
29640 DialectType::Snowflake => {
29641 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29642 let epoch = Expression::Cast(Box::new(Cast {
29643 this: Expression::Literal(Literal::String(
29644 "1970-01-01 00:00:00+00".to_string(),
29645 )),
29646 to: DataType::Timestamp {
29647 timezone: true,
29648 precision: None,
29649 },
29650 trailing_comments: vec![],
29651 double_colon_syntax: false,
29652 format: None,
29653 default: None,
29654 }));
29655 Ok(Expression::Function(Box::new(Function::new(
29656 "TIMESTAMPDIFF".to_string(),
29657 vec![
29658 Expression::Identifier(Identifier::new("SECONDS".to_string())),
29659 epoch,
29660 ts,
29661 ],
29662 ))))
29663 }
29664 _ => Ok(Expression::Function(Box::new(Function::new(
29665 "UNIX_SECONDS".to_string(),
29666 vec![ts],
29667 )))),
29668 }
29669 }
29670
29671 // UNIX_MILLIS(ts) -> target-specific
29672 "UNIX_MILLIS" if args.len() == 1 => {
29673 let ts = args.remove(0);
29674 match target {
29675 DialectType::DuckDB => {
29676 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29677 Ok(Expression::Function(Box::new(Function::new(
29678 "EPOCH_MS".to_string(),
29679 vec![norm_ts],
29680 ))))
29681 }
29682 _ => Ok(Expression::Function(Box::new(Function::new(
29683 "UNIX_MILLIS".to_string(),
29684 vec![ts],
29685 )))),
29686 }
29687 }
29688
29689 // UNIX_MICROS(ts) -> target-specific
29690 "UNIX_MICROS" if args.len() == 1 => {
29691 let ts = args.remove(0);
29692 match target {
29693 DialectType::DuckDB => {
29694 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29695 Ok(Expression::Function(Box::new(Function::new(
29696 "EPOCH_US".to_string(),
29697 vec![norm_ts],
29698 ))))
29699 }
29700 _ => Ok(Expression::Function(Box::new(Function::new(
29701 "UNIX_MICROS".to_string(),
29702 vec![ts],
29703 )))),
29704 }
29705 }
29706
29707 // INSTR(str, substr) -> target-specific
29708 "INSTR" => {
29709 if matches!(target, DialectType::BigQuery) {
29710 // BQ->BQ: keep as INSTR
29711 Ok(Expression::Function(Box::new(Function::new(
29712 "INSTR".to_string(),
29713 args,
29714 ))))
29715 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
29716 // Snowflake: CHARINDEX(substr, str) - swap args
29717 let str_expr = args.remove(0);
29718 let substr = args.remove(0);
29719 Ok(Expression::Function(Box::new(Function::new(
29720 "CHARINDEX".to_string(),
29721 vec![substr, str_expr],
29722 ))))
29723 } else {
29724 // Keep as INSTR for other targets
29725 Ok(Expression::Function(Box::new(Function::new(
29726 "INSTR".to_string(),
29727 args,
29728 ))))
29729 }
29730 }
29731
29732 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
29733 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
29734 if matches!(target, DialectType::BigQuery) {
29735 // BQ->BQ: always output with parens (function form), keep any timezone arg
29736 Ok(Expression::Function(Box::new(Function::new(name, args))))
29737 } else if name == "CURRENT_DATE" && args.len() == 1 {
29738 // CURRENT_DATE('UTC') - has timezone arg
29739 let tz_arg = args.remove(0);
29740 match target {
29741 DialectType::DuckDB => {
29742 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
29743 let ct = Expression::CurrentTimestamp(
29744 crate::expressions::CurrentTimestamp {
29745 precision: None,
29746 sysdate: false,
29747 },
29748 );
29749 let at_tz =
29750 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29751 this: ct,
29752 zone: tz_arg,
29753 }));
29754 Ok(Expression::Cast(Box::new(Cast {
29755 this: at_tz,
29756 to: DataType::Date,
29757 trailing_comments: vec![],
29758 double_colon_syntax: false,
29759 format: None,
29760 default: None,
29761 })))
29762 }
29763 DialectType::Snowflake => {
29764 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
29765 let ct = Expression::Function(Box::new(Function::new(
29766 "CURRENT_TIMESTAMP".to_string(),
29767 vec![],
29768 )));
29769 let convert = Expression::Function(Box::new(Function::new(
29770 "CONVERT_TIMEZONE".to_string(),
29771 vec![tz_arg, ct],
29772 )));
29773 Ok(Expression::Cast(Box::new(Cast {
29774 this: convert,
29775 to: DataType::Date,
29776 trailing_comments: vec![],
29777 double_colon_syntax: false,
29778 format: None,
29779 default: None,
29780 })))
29781 }
29782 _ => {
29783 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
29784 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
29785 Ok(Expression::AtTimeZone(Box::new(
29786 crate::expressions::AtTimeZone {
29787 this: cd,
29788 zone: tz_arg,
29789 },
29790 )))
29791 }
29792 }
29793 } else if (name == "CURRENT_TIMESTAMP"
29794 || name == "CURRENT_TIME"
29795 || name == "CURRENT_DATE")
29796 && args.is_empty()
29797 && matches!(
29798 target,
29799 DialectType::PostgreSQL
29800 | DialectType::DuckDB
29801 | DialectType::Presto
29802 | DialectType::Trino
29803 )
29804 {
29805 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
29806 if name == "CURRENT_TIMESTAMP" {
29807 Ok(Expression::CurrentTimestamp(
29808 crate::expressions::CurrentTimestamp {
29809 precision: None,
29810 sysdate: false,
29811 },
29812 ))
29813 } else if name == "CURRENT_DATE" {
29814 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
29815 } else {
29816 // CURRENT_TIME
29817 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
29818 precision: None,
29819 }))
29820 }
29821 } else {
29822 // All other targets: keep as function (with parens)
29823 Ok(Expression::Function(Box::new(Function::new(name, args))))
29824 }
29825 }
29826
29827 // JSON_QUERY(json, path) -> target-specific
29828 "JSON_QUERY" if args.len() == 2 => {
29829 match target {
29830 DialectType::DuckDB | DialectType::SQLite => {
29831 // json -> path syntax
29832 let json_expr = args.remove(0);
29833 let path = args.remove(0);
29834 Ok(Expression::JsonExtract(Box::new(
29835 crate::expressions::JsonExtractFunc {
29836 this: json_expr,
29837 path,
29838 returning: None,
29839 arrow_syntax: true,
29840 hash_arrow_syntax: false,
29841 wrapper_option: None,
29842 quotes_option: None,
29843 on_scalar_string: false,
29844 on_error: None,
29845 },
29846 )))
29847 }
29848 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29849 Ok(Expression::Function(Box::new(Function::new(
29850 "GET_JSON_OBJECT".to_string(),
29851 args,
29852 ))))
29853 }
29854 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
29855 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
29856 )),
29857 _ => Ok(Expression::Function(Box::new(Function::new(
29858 "JSON_QUERY".to_string(),
29859 args,
29860 )))),
29861 }
29862 }
29863
29864 // JSON_VALUE_ARRAY(json, path) -> target-specific
29865 "JSON_VALUE_ARRAY" if args.len() == 2 => {
29866 match target {
29867 DialectType::DuckDB => {
29868 // CAST(json -> path AS TEXT[])
29869 let json_expr = args.remove(0);
29870 let path = args.remove(0);
29871 let arrow = Expression::JsonExtract(Box::new(
29872 crate::expressions::JsonExtractFunc {
29873 this: json_expr,
29874 path,
29875 returning: None,
29876 arrow_syntax: true,
29877 hash_arrow_syntax: false,
29878 wrapper_option: None,
29879 quotes_option: None,
29880 on_scalar_string: false,
29881 on_error: None,
29882 },
29883 ));
29884 Ok(Expression::Cast(Box::new(Cast {
29885 this: arrow,
29886 to: DataType::Array {
29887 element_type: Box::new(DataType::Text),
29888 dimension: None,
29889 },
29890 trailing_comments: vec![],
29891 double_colon_syntax: false,
29892 format: None,
29893 default: None,
29894 })))
29895 }
29896 DialectType::Snowflake => {
29897 let json_expr = args.remove(0);
29898 let path_expr = args.remove(0);
29899 // Convert JSON path from $.path to just path
29900 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
29901 {
29902 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
29903 Expression::Literal(Literal::String(trimmed.to_string()))
29904 } else {
29905 path_expr
29906 };
29907 let parse_json = Expression::Function(Box::new(Function::new(
29908 "PARSE_JSON".to_string(),
29909 vec![json_expr],
29910 )));
29911 let get_path = Expression::Function(Box::new(Function::new(
29912 "GET_PATH".to_string(),
29913 vec![parse_json, sf_path],
29914 )));
29915 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
29916 let cast_expr = Expression::Cast(Box::new(Cast {
29917 this: Expression::Identifier(Identifier::new("x")),
29918 to: DataType::VarChar {
29919 length: None,
29920 parenthesized_length: false,
29921 },
29922 trailing_comments: vec![],
29923 double_colon_syntax: false,
29924 format: None,
29925 default: None,
29926 }));
29927 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29928 parameters: vec![Identifier::new("x")],
29929 body: cast_expr,
29930 colon: false,
29931 parameter_types: vec![],
29932 }));
29933 Ok(Expression::Function(Box::new(Function::new(
29934 "TRANSFORM".to_string(),
29935 vec![get_path, lambda],
29936 ))))
29937 }
29938 _ => Ok(Expression::Function(Box::new(Function::new(
29939 "JSON_VALUE_ARRAY".to_string(),
29940 args,
29941 )))),
29942 }
29943 }
29944
29945 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
29946 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
29947 // This is different from Hive/Spark where 3rd arg is "group_index"
29948 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
29949 match target {
29950 DialectType::DuckDB
29951 | DialectType::Presto
29952 | DialectType::Trino
29953 | DialectType::Athena => {
29954 if args.len() == 2 {
29955 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
29956 args.push(Expression::number(1));
29957 Ok(Expression::Function(Box::new(Function::new(
29958 "REGEXP_EXTRACT".to_string(),
29959 args,
29960 ))))
29961 } else if args.len() == 3 {
29962 let val = args.remove(0);
29963 let regex = args.remove(0);
29964 let position = args.remove(0);
29965 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29966 if is_pos_1 {
29967 Ok(Expression::Function(Box::new(Function::new(
29968 "REGEXP_EXTRACT".to_string(),
29969 vec![val, regex, Expression::number(1)],
29970 ))))
29971 } else {
29972 let substring_expr = Expression::Function(Box::new(Function::new(
29973 "SUBSTRING".to_string(),
29974 vec![val, position],
29975 )));
29976 let nullif_expr = Expression::Function(Box::new(Function::new(
29977 "NULLIF".to_string(),
29978 vec![
29979 substring_expr,
29980 Expression::Literal(Literal::String(String::new())),
29981 ],
29982 )));
29983 Ok(Expression::Function(Box::new(Function::new(
29984 "REGEXP_EXTRACT".to_string(),
29985 vec![nullif_expr, regex, Expression::number(1)],
29986 ))))
29987 }
29988 } else if args.len() == 4 {
29989 let val = args.remove(0);
29990 let regex = args.remove(0);
29991 let position = args.remove(0);
29992 let occurrence = args.remove(0);
29993 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29994 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
29995 if is_pos_1 && is_occ_1 {
29996 Ok(Expression::Function(Box::new(Function::new(
29997 "REGEXP_EXTRACT".to_string(),
29998 vec![val, regex, Expression::number(1)],
29999 ))))
30000 } else {
30001 let subject = if is_pos_1 {
30002 val
30003 } else {
30004 let substring_expr = Expression::Function(Box::new(
30005 Function::new("SUBSTRING".to_string(), vec![val, position]),
30006 ));
30007 Expression::Function(Box::new(Function::new(
30008 "NULLIF".to_string(),
30009 vec![
30010 substring_expr,
30011 Expression::Literal(Literal::String(String::new())),
30012 ],
30013 )))
30014 };
30015 let extract_all = Expression::Function(Box::new(Function::new(
30016 "REGEXP_EXTRACT_ALL".to_string(),
30017 vec![subject, regex, Expression::number(1)],
30018 )));
30019 Ok(Expression::Function(Box::new(Function::new(
30020 "ARRAY_EXTRACT".to_string(),
30021 vec![extract_all, occurrence],
30022 ))))
30023 }
30024 } else {
30025 Ok(Expression::Function(Box::new(Function {
30026 name: f.name,
30027 args,
30028 distinct: f.distinct,
30029 trailing_comments: f.trailing_comments,
30030 use_bracket_syntax: f.use_bracket_syntax,
30031 no_parens: f.no_parens,
30032 quoted: f.quoted,
30033 span: None,
30034 })))
30035 }
30036 }
30037 DialectType::Snowflake => {
30038 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
30039 Ok(Expression::Function(Box::new(Function::new(
30040 "REGEXP_SUBSTR".to_string(),
30041 args,
30042 ))))
30043 }
30044 _ => {
30045 // For other targets (Hive/Spark/BigQuery): pass through as-is
30046 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
30047 Ok(Expression::Function(Box::new(Function {
30048 name: f.name,
30049 args,
30050 distinct: f.distinct,
30051 trailing_comments: f.trailing_comments,
30052 use_bracket_syntax: f.use_bracket_syntax,
30053 no_parens: f.no_parens,
30054 quoted: f.quoted,
30055 span: None,
30056 })))
30057 }
30058 }
30059 }
30060
30061 // BigQuery STRUCT(args) -> target-specific struct expression
30062 "STRUCT" => {
30063 // Convert Function args to Struct fields
30064 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
30065 for (i, arg) in args.into_iter().enumerate() {
30066 match arg {
30067 Expression::Alias(a) => {
30068 // Named field: expr AS name
30069 fields.push((Some(a.alias.name.clone()), a.this));
30070 }
30071 other => {
30072 // Unnamed field: for Spark/Hive, keep as None
30073 // For Snowflake, auto-name as _N
30074 // For DuckDB, use column name for column refs, _N for others
30075 if matches!(target, DialectType::Snowflake) {
30076 fields.push((Some(format!("_{}", i)), other));
30077 } else if matches!(target, DialectType::DuckDB) {
30078 let auto_name = match &other {
30079 Expression::Column(col) => col.name.name.clone(),
30080 _ => format!("_{}", i),
30081 };
30082 fields.push((Some(auto_name), other));
30083 } else {
30084 fields.push((None, other));
30085 }
30086 }
30087 }
30088 }
30089
30090 match target {
30091 DialectType::Snowflake => {
30092 // OBJECT_CONSTRUCT('name', value, ...)
30093 let mut oc_args = Vec::new();
30094 for (name, val) in &fields {
30095 if let Some(n) = name {
30096 oc_args.push(Expression::Literal(Literal::String(n.clone())));
30097 oc_args.push(val.clone());
30098 } else {
30099 oc_args.push(val.clone());
30100 }
30101 }
30102 Ok(Expression::Function(Box::new(Function::new(
30103 "OBJECT_CONSTRUCT".to_string(),
30104 oc_args,
30105 ))))
30106 }
30107 DialectType::DuckDB => {
30108 // {'name': value, ...}
30109 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30110 fields,
30111 })))
30112 }
30113 DialectType::Hive => {
30114 // STRUCT(val1, val2, ...) - strip aliases
30115 let hive_fields: Vec<(Option<String>, Expression)> =
30116 fields.into_iter().map(|(_, v)| (None, v)).collect();
30117 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30118 fields: hive_fields,
30119 })))
30120 }
30121 DialectType::Spark | DialectType::Databricks => {
30122 // Use Expression::Struct to bypass Spark target transform auto-naming
30123 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30124 fields,
30125 })))
30126 }
30127 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30128 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
30129 let all_named =
30130 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
30131 let all_types_inferable = all_named
30132 && fields
30133 .iter()
30134 .all(|(_, val)| Self::can_infer_presto_type(val));
30135 let row_args: Vec<Expression> =
30136 fields.iter().map(|(_, v)| v.clone()).collect();
30137 let row_expr = Expression::Function(Box::new(Function::new(
30138 "ROW".to_string(),
30139 row_args,
30140 )));
30141 if all_named && all_types_inferable {
30142 // Build ROW type with inferred types
30143 let mut row_type_fields = Vec::new();
30144 for (name, val) in &fields {
30145 if let Some(n) = name {
30146 let type_str = Self::infer_sql_type_for_presto(val);
30147 row_type_fields.push(crate::expressions::StructField::new(
30148 n.clone(),
30149 crate::expressions::DataType::Custom { name: type_str },
30150 ));
30151 }
30152 }
30153 let row_type = crate::expressions::DataType::Struct {
30154 fields: row_type_fields,
30155 nested: true,
30156 };
30157 Ok(Expression::Cast(Box::new(Cast {
30158 this: row_expr,
30159 to: row_type,
30160 trailing_comments: Vec::new(),
30161 double_colon_syntax: false,
30162 format: None,
30163 default: None,
30164 })))
30165 } else {
30166 Ok(row_expr)
30167 }
30168 }
30169 _ => {
30170 // Default: keep as STRUCT function with original args
30171 let mut new_args = Vec::new();
30172 for (name, val) in fields {
30173 if let Some(n) = name {
30174 new_args.push(Expression::Alias(Box::new(
30175 crate::expressions::Alias::new(val, Identifier::new(n)),
30176 )));
30177 } else {
30178 new_args.push(val);
30179 }
30180 }
30181 Ok(Expression::Function(Box::new(Function::new(
30182 "STRUCT".to_string(),
30183 new_args,
30184 ))))
30185 }
30186 }
30187 }
30188
30189 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30190 "ROUND" if args.len() == 3 => {
30191 let x = args.remove(0);
30192 let n = args.remove(0);
30193 let mode = args.remove(0);
30194 // Check if mode is 'ROUND_HALF_EVEN'
30195 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30196 if is_half_even && matches!(target, DialectType::DuckDB) {
30197 Ok(Expression::Function(Box::new(Function::new(
30198 "ROUND_EVEN".to_string(),
30199 vec![x, n],
30200 ))))
30201 } else {
30202 // Pass through with all args
30203 Ok(Expression::Function(Box::new(Function::new(
30204 "ROUND".to_string(),
30205 vec![x, n, mode],
30206 ))))
30207 }
30208 }
30209
30210 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30211 "MAKE_INTERVAL" => {
30212 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30213 // The positional args are: year, month
30214 // Named args are: day =>, minute =>, etc.
30215 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30216 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30217 // For BigQuery->BigQuery: reorder named args (day before minute)
30218 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30219 let mut parts: Vec<(String, String)> = Vec::new();
30220 let mut pos_idx = 0;
30221 let pos_units = ["year", "month"];
30222 for arg in &args {
30223 if let Expression::NamedArgument(na) = arg {
30224 // Named arg like minute => 5
30225 let unit = na.name.name.clone();
30226 if let Expression::Literal(Literal::Number(n)) = &na.value {
30227 parts.push((unit, n.clone()));
30228 }
30229 } else if pos_idx < pos_units.len() {
30230 if let Expression::Literal(Literal::Number(n)) = arg {
30231 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30232 }
30233 pos_idx += 1;
30234 }
30235 }
30236 // Don't sort - preserve original argument order
30237 let separator = if matches!(target, DialectType::Snowflake) {
30238 ", "
30239 } else {
30240 " "
30241 };
30242 let interval_str = parts
30243 .iter()
30244 .map(|(u, v)| format!("{} {}", v, u))
30245 .collect::<Vec<_>>()
30246 .join(separator);
30247 Ok(Expression::Interval(Box::new(
30248 crate::expressions::Interval {
30249 this: Some(Expression::Literal(Literal::String(interval_str))),
30250 unit: None,
30251 },
30252 )))
30253 } else if matches!(target, DialectType::BigQuery) {
30254 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30255 let mut positional = Vec::new();
30256 let mut named: Vec<(
30257 String,
30258 Expression,
30259 crate::expressions::NamedArgSeparator,
30260 )> = Vec::new();
30261 let _pos_units = ["year", "month"];
30262 let mut _pos_idx = 0;
30263 for arg in args {
30264 if let Expression::NamedArgument(na) = arg {
30265 named.push((na.name.name.clone(), na.value, na.separator));
30266 } else {
30267 positional.push(arg);
30268 _pos_idx += 1;
30269 }
30270 }
30271 // Sort named args by: day, hour, minute, second
30272 let unit_order = |u: &str| -> usize {
30273 match u.to_lowercase().as_str() {
30274 "day" => 0,
30275 "hour" => 1,
30276 "minute" => 2,
30277 "second" => 3,
30278 _ => 4,
30279 }
30280 };
30281 named.sort_by_key(|(u, _, _)| unit_order(u));
30282 let mut result_args = positional;
30283 for (name, value, sep) in named {
30284 result_args.push(Expression::NamedArgument(Box::new(
30285 crate::expressions::NamedArgument {
30286 name: Identifier::new(&name),
30287 value,
30288 separator: sep,
30289 },
30290 )));
30291 }
30292 Ok(Expression::Function(Box::new(Function::new(
30293 "MAKE_INTERVAL".to_string(),
30294 result_args,
30295 ))))
30296 } else {
30297 Ok(Expression::Function(Box::new(Function::new(
30298 "MAKE_INTERVAL".to_string(),
30299 args,
30300 ))))
30301 }
30302 }
30303
30304 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30305 "ARRAY_TO_STRING" if args.len() == 3 => {
30306 let arr = args.remove(0);
30307 let sep = args.remove(0);
30308 let null_text = args.remove(0);
30309 match target {
30310 DialectType::DuckDB => {
30311 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30312 let _lambda_param =
30313 Expression::Identifier(crate::expressions::Identifier::new("x"));
30314 let coalesce =
30315 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30316 original_name: None,
30317 expressions: vec![
30318 Expression::Identifier(crate::expressions::Identifier::new(
30319 "x",
30320 )),
30321 null_text,
30322 ],
30323 }));
30324 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30325 parameters: vec![crate::expressions::Identifier::new("x")],
30326 body: coalesce,
30327 colon: false,
30328 parameter_types: vec![],
30329 }));
30330 let list_transform = Expression::Function(Box::new(Function::new(
30331 "LIST_TRANSFORM".to_string(),
30332 vec![arr, lambda],
30333 )));
30334 Ok(Expression::Function(Box::new(Function::new(
30335 "ARRAY_TO_STRING".to_string(),
30336 vec![list_transform, sep],
30337 ))))
30338 }
30339 _ => Ok(Expression::Function(Box::new(Function::new(
30340 "ARRAY_TO_STRING".to_string(),
30341 vec![arr, sep, null_text],
30342 )))),
30343 }
30344 }
30345
30346 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30347 "LENGTH" if args.len() == 1 => {
30348 let arg = args.remove(0);
30349 match target {
30350 DialectType::DuckDB => {
30351 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30352 let typeof_func = Expression::Function(Box::new(Function::new(
30353 "TYPEOF".to_string(),
30354 vec![arg.clone()],
30355 )));
30356 let blob_cast = Expression::Cast(Box::new(Cast {
30357 this: arg.clone(),
30358 to: DataType::VarBinary { length: None },
30359 trailing_comments: vec![],
30360 double_colon_syntax: false,
30361 format: None,
30362 default: None,
30363 }));
30364 let octet_length = Expression::Function(Box::new(Function::new(
30365 "OCTET_LENGTH".to_string(),
30366 vec![blob_cast],
30367 )));
30368 let text_cast = Expression::Cast(Box::new(Cast {
30369 this: arg,
30370 to: DataType::Text,
30371 trailing_comments: vec![],
30372 double_colon_syntax: false,
30373 format: None,
30374 default: None,
30375 }));
30376 let length_text = Expression::Function(Box::new(Function::new(
30377 "LENGTH".to_string(),
30378 vec![text_cast],
30379 )));
30380 Ok(Expression::Case(Box::new(crate::expressions::Case {
30381 operand: Some(typeof_func),
30382 whens: vec![(
30383 Expression::Literal(Literal::String("BLOB".to_string())),
30384 octet_length,
30385 )],
30386 else_: Some(length_text),
30387 comments: Vec::new(),
30388 })))
30389 }
30390 _ => Ok(Expression::Function(Box::new(Function::new(
30391 "LENGTH".to_string(),
30392 vec![arg],
30393 )))),
30394 }
30395 }
30396
30397 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30398 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30399 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30400 // The args should be [x, fraction] with the null handling stripped
30401 // For DuckDB: QUANTILE_CONT(x, fraction)
30402 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30403 match target {
30404 DialectType::DuckDB => {
30405 // Strip down to just 2 args, rename to QUANTILE_CONT
30406 let x = args[0].clone();
30407 let frac = args[1].clone();
30408 Ok(Expression::Function(Box::new(Function::new(
30409 "QUANTILE_CONT".to_string(),
30410 vec![x, frac],
30411 ))))
30412 }
30413 _ => Ok(Expression::Function(Box::new(Function::new(
30414 "PERCENTILE_CONT".to_string(),
30415 args,
30416 )))),
30417 }
30418 }
30419
30420 // All others: pass through
30421 _ => Ok(Expression::Function(Box::new(Function {
30422 name: f.name,
30423 args,
30424 distinct: f.distinct,
30425 trailing_comments: f.trailing_comments,
30426 use_bracket_syntax: f.use_bracket_syntax,
30427 no_parens: f.no_parens,
30428 quoted: f.quoted,
30429 span: None,
30430 }))),
30431 }
30432 }
30433
30434 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30435 /// Returns false for column references and other non-literal expressions where the type is unknown.
30436 fn can_infer_presto_type(expr: &Expression) -> bool {
30437 match expr {
30438 Expression::Literal(_) => true,
30439 Expression::Boolean(_) => true,
30440 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30441 Expression::Struct(_) | Expression::StructFunc(_) => true,
30442 Expression::Function(f) => {
30443 let up = f.name.to_uppercase();
30444 up == "STRUCT"
30445 || up == "ROW"
30446 || up == "CURRENT_DATE"
30447 || up == "CURRENT_TIMESTAMP"
30448 || up == "NOW"
30449 }
30450 Expression::Cast(_) => true,
30451 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30452 _ => false,
30453 }
30454 }
30455
30456 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30457 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30458 use crate::expressions::Literal;
30459 match expr {
30460 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30461 Expression::Literal(Literal::Number(n)) => {
30462 if n.contains('.') {
30463 "DOUBLE".to_string()
30464 } else {
30465 "INTEGER".to_string()
30466 }
30467 }
30468 Expression::Boolean(_) => "BOOLEAN".to_string(),
30469 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30470 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30471 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30472 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30473 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30474 Expression::Function(f) => {
30475 let up = f.name.to_uppercase();
30476 if up == "STRUCT" || up == "ROW" {
30477 "ROW".to_string()
30478 } else if up == "CURRENT_DATE" {
30479 "DATE".to_string()
30480 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30481 "TIMESTAMP".to_string()
30482 } else {
30483 "VARCHAR".to_string()
30484 }
30485 }
30486 Expression::Cast(c) => {
30487 // If already cast, use the target type
30488 Self::data_type_to_presto_string(&c.to)
30489 }
30490 _ => "VARCHAR".to_string(),
30491 }
30492 }
30493
30494 /// Convert a DataType to its Presto/Trino string representation for ROW type
30495 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30496 use crate::expressions::DataType;
30497 match dt {
30498 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30499 "VARCHAR".to_string()
30500 }
30501 DataType::Int { .. }
30502 | DataType::BigInt { .. }
30503 | DataType::SmallInt { .. }
30504 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30505 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30506 DataType::Boolean => "BOOLEAN".to_string(),
30507 DataType::Date => "DATE".to_string(),
30508 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30509 DataType::Struct { fields, .. } => {
30510 let field_strs: Vec<String> = fields
30511 .iter()
30512 .map(|f| {
30513 format!(
30514 "{} {}",
30515 f.name,
30516 Self::data_type_to_presto_string(&f.data_type)
30517 )
30518 })
30519 .collect();
30520 format!("ROW({})", field_strs.join(", "))
30521 }
30522 DataType::Array { element_type, .. } => {
30523 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30524 }
30525 DataType::Custom { name } => {
30526 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30527 name.clone()
30528 }
30529 _ => "VARCHAR".to_string(),
30530 }
30531 }
30532
30533 /// Convert IntervalUnit to string
30534 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30535 match unit {
30536 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30537 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30538 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30539 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30540 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30541 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30542 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30543 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30544 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30545 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30546 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30547 }
30548 }
30549
30550 /// Extract unit string from an expression (uppercased)
30551 fn get_unit_str_static(expr: &Expression) -> String {
30552 use crate::expressions::Literal;
30553 match expr {
30554 Expression::Identifier(id) => id.name.to_uppercase(),
30555 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30556 Expression::Column(col) => col.name.name.to_uppercase(),
30557 Expression::Function(f) => {
30558 let base = f.name.to_uppercase();
30559 if !f.args.is_empty() {
30560 let inner = Self::get_unit_str_static(&f.args[0]);
30561 format!("{}({})", base, inner)
30562 } else {
30563 base
30564 }
30565 }
30566 _ => "DAY".to_string(),
30567 }
30568 }
30569
30570 /// Parse unit string to IntervalUnit
30571 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30572 match s {
30573 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30574 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30575 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30576 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30577 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30578 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30579 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30580 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30581 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30582 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30583 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30584 _ => crate::expressions::IntervalUnit::Day,
30585 }
30586 }
30587
30588 /// Convert expression to simple string for interval building
30589 fn expr_to_string_static(expr: &Expression) -> String {
30590 use crate::expressions::Literal;
30591 match expr {
30592 Expression::Literal(Literal::Number(s)) => s.clone(),
30593 Expression::Literal(Literal::String(s)) => s.clone(),
30594 Expression::Identifier(id) => id.name.clone(),
30595 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30596 _ => "1".to_string(),
30597 }
30598 }
30599
30600 /// Extract a simple string representation from a literal expression
30601 fn expr_to_string(expr: &Expression) -> String {
30602 use crate::expressions::Literal;
30603 match expr {
30604 Expression::Literal(Literal::Number(s)) => s.clone(),
30605 Expression::Literal(Literal::String(s)) => s.clone(),
30606 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30607 Expression::Identifier(id) => id.name.clone(),
30608 _ => "1".to_string(),
30609 }
30610 }
30611
30612 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30613 fn quote_interval_val(expr: &Expression) -> Expression {
30614 use crate::expressions::Literal;
30615 match expr {
30616 Expression::Literal(Literal::Number(n)) => {
30617 Expression::Literal(Literal::String(n.clone()))
30618 }
30619 Expression::Literal(Literal::String(_)) => expr.clone(),
30620 Expression::Neg(inner) => {
30621 if let Expression::Literal(Literal::Number(n)) = &inner.this {
30622 Expression::Literal(Literal::String(format!("-{}", n)))
30623 } else {
30624 expr.clone()
30625 }
30626 }
30627 _ => expr.clone(),
30628 }
30629 }
30630
30631 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
30632 fn timestamp_string_has_timezone(ts: &str) -> bool {
30633 let trimmed = ts.trim();
30634 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
30635 if let Some(last_space) = trimmed.rfind(' ') {
30636 let suffix = &trimmed[last_space + 1..];
30637 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
30638 let rest = &suffix[1..];
30639 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
30640 return true;
30641 }
30642 }
30643 }
30644 // Check for named timezone abbreviations
30645 let ts_lower = trimmed.to_lowercase();
30646 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
30647 for abbrev in &tz_abbrevs {
30648 if ts_lower.ends_with(abbrev) {
30649 return true;
30650 }
30651 }
30652 false
30653 }
30654
30655 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
30656 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
30657 use crate::expressions::{Cast, DataType, Literal};
30658 match expr {
30659 Expression::Literal(Literal::Timestamp(s)) => {
30660 let tz = func_name.starts_with("TIMESTAMP");
30661 Expression::Cast(Box::new(Cast {
30662 this: Expression::Literal(Literal::String(s)),
30663 to: if tz {
30664 DataType::Timestamp {
30665 timezone: true,
30666 precision: None,
30667 }
30668 } else {
30669 DataType::Timestamp {
30670 timezone: false,
30671 precision: None,
30672 }
30673 },
30674 trailing_comments: vec![],
30675 double_colon_syntax: false,
30676 format: None,
30677 default: None,
30678 }))
30679 }
30680 other => other,
30681 }
30682 }
30683
30684 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
30685 fn maybe_cast_ts(expr: Expression) -> Expression {
30686 use crate::expressions::{Cast, DataType, Literal};
30687 match expr {
30688 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30689 this: Expression::Literal(Literal::String(s)),
30690 to: DataType::Timestamp {
30691 timezone: false,
30692 precision: None,
30693 },
30694 trailing_comments: vec![],
30695 double_colon_syntax: false,
30696 format: None,
30697 default: None,
30698 })),
30699 other => other,
30700 }
30701 }
30702
30703 /// Convert DATE 'x' literal to CAST('x' AS DATE)
30704 fn date_literal_to_cast(expr: Expression) -> Expression {
30705 use crate::expressions::{Cast, DataType, Literal};
30706 match expr {
30707 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30708 this: Expression::Literal(Literal::String(s)),
30709 to: DataType::Date,
30710 trailing_comments: vec![],
30711 double_colon_syntax: false,
30712 format: None,
30713 default: None,
30714 })),
30715 other => other,
30716 }
30717 }
30718
30719 /// Ensure an expression that should be a date is CAST(... AS DATE).
30720 /// Handles both DATE literals and string literals that look like dates.
30721 fn ensure_cast_date(expr: Expression) -> Expression {
30722 use crate::expressions::{Cast, DataType, Literal};
30723 match expr {
30724 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30725 this: Expression::Literal(Literal::String(s)),
30726 to: DataType::Date,
30727 trailing_comments: vec![],
30728 double_colon_syntax: false,
30729 format: None,
30730 default: None,
30731 })),
30732 Expression::Literal(Literal::String(ref _s)) => {
30733 // String literal that should be a date -> CAST('s' AS DATE)
30734 Expression::Cast(Box::new(Cast {
30735 this: expr,
30736 to: DataType::Date,
30737 trailing_comments: vec![],
30738 double_colon_syntax: false,
30739 format: None,
30740 default: None,
30741 }))
30742 }
30743 // Already a CAST or other expression -> leave as-is
30744 other => other,
30745 }
30746 }
30747
30748 /// Force CAST(expr AS DATE) for any expression (not just literals)
30749 /// Skips if the expression is already a CAST to DATE
30750 fn force_cast_date(expr: Expression) -> Expression {
30751 use crate::expressions::{Cast, DataType};
30752 // If it's already a CAST to DATE, don't double-wrap
30753 if let Expression::Cast(ref c) = expr {
30754 if matches!(c.to, DataType::Date) {
30755 return expr;
30756 }
30757 }
30758 Expression::Cast(Box::new(Cast {
30759 this: expr,
30760 to: DataType::Date,
30761 trailing_comments: vec![],
30762 double_colon_syntax: false,
30763 format: None,
30764 default: None,
30765 }))
30766 }
30767
30768 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
30769 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
30770 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
30771 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
30772
30773 fn ensure_to_date_preserved(expr: Expression) -> Expression {
30774 use crate::expressions::{Function, Literal};
30775 if matches!(expr, Expression::Literal(Literal::String(_))) {
30776 Expression::Function(Box::new(Function::new(
30777 Self::PRESERVED_TO_DATE.to_string(),
30778 vec![expr],
30779 )))
30780 } else {
30781 expr
30782 }
30783 }
30784
30785 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
30786 fn try_cast_date(expr: Expression) -> Expression {
30787 use crate::expressions::{Cast, DataType};
30788 Expression::TryCast(Box::new(Cast {
30789 this: expr,
30790 to: DataType::Date,
30791 trailing_comments: vec![],
30792 double_colon_syntax: false,
30793 format: None,
30794 default: None,
30795 }))
30796 }
30797
30798 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
30799 fn double_cast_timestamp_date(expr: Expression) -> Expression {
30800 use crate::expressions::{Cast, DataType};
30801 let inner = Expression::Cast(Box::new(Cast {
30802 this: expr,
30803 to: DataType::Timestamp {
30804 timezone: false,
30805 precision: None,
30806 },
30807 trailing_comments: vec![],
30808 double_colon_syntax: false,
30809 format: None,
30810 default: None,
30811 }));
30812 Expression::Cast(Box::new(Cast {
30813 this: inner,
30814 to: DataType::Date,
30815 trailing_comments: vec![],
30816 double_colon_syntax: false,
30817 format: None,
30818 default: None,
30819 }))
30820 }
30821
30822 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
30823 fn double_cast_datetime_date(expr: Expression) -> Expression {
30824 use crate::expressions::{Cast, DataType};
30825 let inner = Expression::Cast(Box::new(Cast {
30826 this: expr,
30827 to: DataType::Custom {
30828 name: "DATETIME".to_string(),
30829 },
30830 trailing_comments: vec![],
30831 double_colon_syntax: false,
30832 format: None,
30833 default: None,
30834 }));
30835 Expression::Cast(Box::new(Cast {
30836 this: inner,
30837 to: DataType::Date,
30838 trailing_comments: vec![],
30839 double_colon_syntax: false,
30840 format: None,
30841 default: None,
30842 }))
30843 }
30844
30845 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
30846 fn double_cast_datetime2_date(expr: Expression) -> Expression {
30847 use crate::expressions::{Cast, DataType};
30848 let inner = Expression::Cast(Box::new(Cast {
30849 this: expr,
30850 to: DataType::Custom {
30851 name: "DATETIME2".to_string(),
30852 },
30853 trailing_comments: vec![],
30854 double_colon_syntax: false,
30855 format: None,
30856 default: None,
30857 }));
30858 Expression::Cast(Box::new(Cast {
30859 this: inner,
30860 to: DataType::Date,
30861 trailing_comments: vec![],
30862 double_colon_syntax: false,
30863 format: None,
30864 default: None,
30865 }))
30866 }
30867
30868 /// Convert Hive/Java-style date format strings to C-style (strftime) format
30869 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
30870 fn hive_format_to_c_format(fmt: &str) -> String {
30871 let mut result = String::new();
30872 let chars: Vec<char> = fmt.chars().collect();
30873 let mut i = 0;
30874 while i < chars.len() {
30875 match chars[i] {
30876 'y' => {
30877 let mut count = 0;
30878 while i < chars.len() && chars[i] == 'y' {
30879 count += 1;
30880 i += 1;
30881 }
30882 if count >= 4 {
30883 result.push_str("%Y");
30884 } else if count == 2 {
30885 result.push_str("%y");
30886 } else {
30887 result.push_str("%Y");
30888 }
30889 }
30890 'M' => {
30891 let mut count = 0;
30892 while i < chars.len() && chars[i] == 'M' {
30893 count += 1;
30894 i += 1;
30895 }
30896 if count >= 3 {
30897 result.push_str("%b");
30898 } else if count == 2 {
30899 result.push_str("%m");
30900 } else {
30901 result.push_str("%m");
30902 }
30903 }
30904 'd' => {
30905 let mut _count = 0;
30906 while i < chars.len() && chars[i] == 'd' {
30907 _count += 1;
30908 i += 1;
30909 }
30910 result.push_str("%d");
30911 }
30912 'H' => {
30913 let mut _count = 0;
30914 while i < chars.len() && chars[i] == 'H' {
30915 _count += 1;
30916 i += 1;
30917 }
30918 result.push_str("%H");
30919 }
30920 'h' => {
30921 let mut _count = 0;
30922 while i < chars.len() && chars[i] == 'h' {
30923 _count += 1;
30924 i += 1;
30925 }
30926 result.push_str("%I");
30927 }
30928 'm' => {
30929 let mut _count = 0;
30930 while i < chars.len() && chars[i] == 'm' {
30931 _count += 1;
30932 i += 1;
30933 }
30934 result.push_str("%M");
30935 }
30936 's' => {
30937 let mut _count = 0;
30938 while i < chars.len() && chars[i] == 's' {
30939 _count += 1;
30940 i += 1;
30941 }
30942 result.push_str("%S");
30943 }
30944 'S' => {
30945 // Fractional seconds - skip
30946 while i < chars.len() && chars[i] == 'S' {
30947 i += 1;
30948 }
30949 result.push_str("%f");
30950 }
30951 'a' => {
30952 // AM/PM
30953 while i < chars.len() && chars[i] == 'a' {
30954 i += 1;
30955 }
30956 result.push_str("%p");
30957 }
30958 'E' => {
30959 let mut count = 0;
30960 while i < chars.len() && chars[i] == 'E' {
30961 count += 1;
30962 i += 1;
30963 }
30964 if count >= 4 {
30965 result.push_str("%A");
30966 } else {
30967 result.push_str("%a");
30968 }
30969 }
30970 '\'' => {
30971 // Quoted literal text - pass through the quotes and content
30972 result.push('\'');
30973 i += 1;
30974 while i < chars.len() && chars[i] != '\'' {
30975 result.push(chars[i]);
30976 i += 1;
30977 }
30978 if i < chars.len() {
30979 result.push('\'');
30980 i += 1;
30981 }
30982 }
30983 c => {
30984 result.push(c);
30985 i += 1;
30986 }
30987 }
30988 }
30989 result
30990 }
30991
30992 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
30993 fn hive_format_to_presto_format(fmt: &str) -> String {
30994 let c_fmt = Self::hive_format_to_c_format(fmt);
30995 // Presto uses %T for HH:MM:SS
30996 c_fmt.replace("%H:%M:%S", "%T")
30997 }
30998
30999 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
31000 fn ensure_cast_timestamp(expr: Expression) -> Expression {
31001 use crate::expressions::{Cast, DataType, Literal};
31002 match expr {
31003 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31004 this: Expression::Literal(Literal::String(s)),
31005 to: DataType::Timestamp {
31006 timezone: false,
31007 precision: None,
31008 },
31009 trailing_comments: vec![],
31010 double_colon_syntax: false,
31011 format: None,
31012 default: None,
31013 })),
31014 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31015 this: expr,
31016 to: DataType::Timestamp {
31017 timezone: false,
31018 precision: None,
31019 },
31020 trailing_comments: vec![],
31021 double_colon_syntax: false,
31022 format: None,
31023 default: None,
31024 })),
31025 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31026 this: Expression::Literal(Literal::String(s)),
31027 to: DataType::Timestamp {
31028 timezone: false,
31029 precision: None,
31030 },
31031 trailing_comments: vec![],
31032 double_colon_syntax: false,
31033 format: None,
31034 default: None,
31035 })),
31036 other => other,
31037 }
31038 }
31039
31040 /// Force CAST to TIMESTAMP for any expression (not just literals)
31041 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
31042 fn force_cast_timestamp(expr: Expression) -> Expression {
31043 use crate::expressions::{Cast, DataType};
31044 // Don't double-wrap if already a CAST to TIMESTAMP
31045 if let Expression::Cast(ref c) = expr {
31046 if matches!(c.to, DataType::Timestamp { .. }) {
31047 return expr;
31048 }
31049 }
31050 Expression::Cast(Box::new(Cast {
31051 this: expr,
31052 to: DataType::Timestamp {
31053 timezone: false,
31054 precision: None,
31055 },
31056 trailing_comments: vec![],
31057 double_colon_syntax: false,
31058 format: None,
31059 default: None,
31060 }))
31061 }
31062
31063 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
31064 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
31065 use crate::expressions::{Cast, DataType, Literal};
31066 match expr {
31067 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31068 this: Expression::Literal(Literal::String(s)),
31069 to: DataType::Timestamp {
31070 timezone: true,
31071 precision: None,
31072 },
31073 trailing_comments: vec![],
31074 double_colon_syntax: false,
31075 format: None,
31076 default: None,
31077 })),
31078 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31079 this: expr,
31080 to: DataType::Timestamp {
31081 timezone: true,
31082 precision: None,
31083 },
31084 trailing_comments: vec![],
31085 double_colon_syntax: false,
31086 format: None,
31087 default: None,
31088 })),
31089 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31090 this: Expression::Literal(Literal::String(s)),
31091 to: DataType::Timestamp {
31092 timezone: true,
31093 precision: None,
31094 },
31095 trailing_comments: vec![],
31096 double_colon_syntax: false,
31097 format: None,
31098 default: None,
31099 })),
31100 other => other,
31101 }
31102 }
31103
31104 /// Ensure expression is CAST to DATETIME (for BigQuery)
31105 fn ensure_cast_datetime(expr: Expression) -> Expression {
31106 use crate::expressions::{Cast, DataType, Literal};
31107 match expr {
31108 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31109 this: expr,
31110 to: DataType::Custom {
31111 name: "DATETIME".to_string(),
31112 },
31113 trailing_comments: vec![],
31114 double_colon_syntax: false,
31115 format: None,
31116 default: None,
31117 })),
31118 other => other,
31119 }
31120 }
31121
31122 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
31123 fn force_cast_datetime(expr: Expression) -> Expression {
31124 use crate::expressions::{Cast, DataType};
31125 if let Expression::Cast(ref c) = expr {
31126 if let DataType::Custom { ref name } = c.to {
31127 if name.eq_ignore_ascii_case("DATETIME") {
31128 return expr;
31129 }
31130 }
31131 }
31132 Expression::Cast(Box::new(Cast {
31133 this: expr,
31134 to: DataType::Custom {
31135 name: "DATETIME".to_string(),
31136 },
31137 trailing_comments: vec![],
31138 double_colon_syntax: false,
31139 format: None,
31140 default: None,
31141 }))
31142 }
31143
31144 /// Ensure expression is CAST to DATETIME2 (for TSQL)
31145 fn ensure_cast_datetime2(expr: Expression) -> Expression {
31146 use crate::expressions::{Cast, DataType, Literal};
31147 match expr {
31148 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31149 this: expr,
31150 to: DataType::Custom {
31151 name: "DATETIME2".to_string(),
31152 },
31153 trailing_comments: vec![],
31154 double_colon_syntax: false,
31155 format: None,
31156 default: None,
31157 })),
31158 other => other,
31159 }
31160 }
31161
31162 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31163 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31164 use crate::expressions::{Cast, DataType, Literal};
31165 match expr {
31166 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31167 this: Expression::Literal(Literal::String(s)),
31168 to: DataType::Timestamp {
31169 timezone: true,
31170 precision: None,
31171 },
31172 trailing_comments: vec![],
31173 double_colon_syntax: false,
31174 format: None,
31175 default: None,
31176 })),
31177 other => other,
31178 }
31179 }
31180
31181 /// Convert BigQuery format string to Snowflake format string
31182 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31183 use crate::expressions::Literal;
31184 if let Expression::Literal(Literal::String(s)) = format_expr {
31185 let sf = s
31186 .replace("%Y", "yyyy")
31187 .replace("%m", "mm")
31188 .replace("%d", "DD")
31189 .replace("%H", "HH24")
31190 .replace("%M", "MI")
31191 .replace("%S", "SS")
31192 .replace("%b", "mon")
31193 .replace("%B", "Month")
31194 .replace("%e", "FMDD");
31195 Expression::Literal(Literal::String(sf))
31196 } else {
31197 format_expr.clone()
31198 }
31199 }
31200
31201 /// Convert BigQuery format string to DuckDB format string
31202 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31203 use crate::expressions::Literal;
31204 if let Expression::Literal(Literal::String(s)) = format_expr {
31205 let duck = s
31206 .replace("%T", "%H:%M:%S")
31207 .replace("%F", "%Y-%m-%d")
31208 .replace("%D", "%m/%d/%y")
31209 .replace("%x", "%m/%d/%y")
31210 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31211 .replace("%e", "%-d")
31212 .replace("%E6S", "%S.%f");
31213 Expression::Literal(Literal::String(duck))
31214 } else {
31215 format_expr.clone()
31216 }
31217 }
31218
31219 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31220 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31221 use crate::expressions::Literal;
31222 if let Expression::Literal(Literal::String(s)) = format_expr {
31223 // Replace format elements from longest to shortest to avoid partial matches
31224 let result = s
31225 .replace("YYYYMMDD", "%Y%m%d")
31226 .replace("YYYY", "%Y")
31227 .replace("YY", "%y")
31228 .replace("MONTH", "%B")
31229 .replace("MON", "%b")
31230 .replace("MM", "%m")
31231 .replace("DD", "%d")
31232 .replace("HH24", "%H")
31233 .replace("HH12", "%I")
31234 .replace("HH", "%I")
31235 .replace("MI", "%M")
31236 .replace("SSTZH", "%S%z")
31237 .replace("SS", "%S")
31238 .replace("TZH", "%z");
31239 Expression::Literal(Literal::String(result))
31240 } else {
31241 format_expr.clone()
31242 }
31243 }
31244
31245 /// Normalize BigQuery format strings for BQ->BQ output
31246 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31247 use crate::expressions::Literal;
31248 if let Expression::Literal(Literal::String(s)) = format_expr {
31249 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31250 Expression::Literal(Literal::String(norm))
31251 } else {
31252 format_expr.clone()
31253 }
31254 }
31255}
31256
31257#[cfg(test)]
31258mod tests {
31259 use super::*;
31260
31261 #[test]
31262 fn test_dialect_type_from_str() {
31263 assert_eq!(
31264 "postgres".parse::<DialectType>().unwrap(),
31265 DialectType::PostgreSQL
31266 );
31267 assert_eq!(
31268 "postgresql".parse::<DialectType>().unwrap(),
31269 DialectType::PostgreSQL
31270 );
31271 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31272 assert_eq!(
31273 "bigquery".parse::<DialectType>().unwrap(),
31274 DialectType::BigQuery
31275 );
31276 }
31277
31278 #[test]
31279 fn test_basic_transpile() {
31280 let dialect = Dialect::get(DialectType::Generic);
31281 let result = dialect
31282 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31283 .unwrap();
31284 assert_eq!(result.len(), 1);
31285 assert_eq!(result[0], "SELECT 1");
31286 }
31287
31288 #[test]
31289 fn test_function_transformation_mysql() {
31290 // NVL should be transformed to IFNULL in MySQL
31291 let dialect = Dialect::get(DialectType::Generic);
31292 let result = dialect
31293 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31294 .unwrap();
31295 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31296 }
31297
31298 #[test]
31299 fn test_get_path_duckdb() {
31300 // Test: step by step
31301 let snowflake = Dialect::get(DialectType::Snowflake);
31302
31303 // Step 1: Parse and check what Snowflake produces as intermediate
31304 let result_sf_sf = snowflake
31305 .transpile_to(
31306 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31307 DialectType::Snowflake,
31308 )
31309 .unwrap();
31310 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31311
31312 // Step 2: DuckDB target
31313 let result_sf_dk = snowflake
31314 .transpile_to(
31315 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31316 DialectType::DuckDB,
31317 )
31318 .unwrap();
31319 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31320
31321 // Step 3: GET_PATH directly
31322 let result_gp = snowflake
31323 .transpile_to(
31324 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31325 DialectType::DuckDB,
31326 )
31327 .unwrap();
31328 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31329 }
31330
31331 #[test]
31332 fn test_function_transformation_postgres() {
31333 // IFNULL should be transformed to COALESCE in PostgreSQL
31334 let dialect = Dialect::get(DialectType::Generic);
31335 let result = dialect
31336 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31337 .unwrap();
31338 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31339
31340 // NVL should also be transformed to COALESCE
31341 let result = dialect
31342 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31343 .unwrap();
31344 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31345 }
31346
31347 #[test]
31348 fn test_hive_cast_to_trycast() {
31349 // Hive CAST should become TRY_CAST for targets that support it
31350 let hive = Dialect::get(DialectType::Hive);
31351 let result = hive
31352 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31353 .unwrap();
31354 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31355
31356 let result = hive
31357 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31358 .unwrap();
31359 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31360 }
31361
31362 #[test]
31363 fn test_hive_array_identity() {
31364 // Hive ARRAY<DATE> should preserve angle bracket syntax
31365 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31366 let hive = Dialect::get(DialectType::Hive);
31367
31368 // Test via transpile_to (this works)
31369 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31370 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31371 assert!(
31372 result[0].contains("ARRAY<DATE>"),
31373 "transpile_to: Expected ARRAY<DATE>, got: {}",
31374 result[0]
31375 );
31376
31377 // Test via parse -> transform -> generate (identity test path)
31378 let ast = hive.parse(sql).unwrap();
31379 let transformed = hive.transform(ast[0].clone()).unwrap();
31380 let output = hive.generate(&transformed).unwrap();
31381 eprintln!("Hive ARRAY via identity path: {}", output);
31382 assert!(
31383 output.contains("ARRAY<DATE>"),
31384 "identity path: Expected ARRAY<DATE>, got: {}",
31385 output
31386 );
31387 }
31388
31389 #[test]
31390 fn test_starrocks_delete_between_expansion() {
31391 // StarRocks doesn't support BETWEEN in DELETE statements
31392 let dialect = Dialect::get(DialectType::Generic);
31393
31394 // BETWEEN should be expanded to >= AND <= in DELETE
31395 let result = dialect
31396 .transpile_to(
31397 "DELETE FROM t WHERE a BETWEEN b AND c",
31398 DialectType::StarRocks,
31399 )
31400 .unwrap();
31401 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31402
31403 // NOT BETWEEN should be expanded to < OR > in DELETE
31404 let result = dialect
31405 .transpile_to(
31406 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31407 DialectType::StarRocks,
31408 )
31409 .unwrap();
31410 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31411
31412 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31413 let result = dialect
31414 .transpile_to(
31415 "SELECT * FROM t WHERE a BETWEEN b AND c",
31416 DialectType::StarRocks,
31417 )
31418 .unwrap();
31419 assert!(
31420 result[0].contains("BETWEEN"),
31421 "BETWEEN should be preserved in SELECT"
31422 );
31423 }
31424
31425 #[test]
31426 fn test_snowflake_ltrim_rtrim_parse() {
31427 let sf = Dialect::get(DialectType::Snowflake);
31428 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31429 let result = sf.transpile_to(sql, DialectType::DuckDB);
31430 match &result {
31431 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31432 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31433 }
31434 assert!(
31435 result.is_ok(),
31436 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31437 result.err()
31438 );
31439 }
31440
31441 #[test]
31442 fn test_duckdb_count_if_parse() {
31443 let duck = Dialect::get(DialectType::DuckDB);
31444 let sql = "COUNT_IF(x)";
31445 let result = duck.transpile_to(sql, DialectType::DuckDB);
31446 match &result {
31447 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31448 Err(e) => eprintln!("COUNT_IF error: {}", e),
31449 }
31450 assert!(
31451 result.is_ok(),
31452 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31453 result.err()
31454 );
31455 }
31456
31457 #[test]
31458 fn test_tsql_cast_tinyint_parse() {
31459 let tsql = Dialect::get(DialectType::TSQL);
31460 let sql = "CAST(X AS TINYINT)";
31461 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31462 match &result {
31463 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31464 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31465 }
31466 assert!(
31467 result.is_ok(),
31468 "Expected successful transpile, got error: {:?}",
31469 result.err()
31470 );
31471 }
31472
31473 #[test]
31474 fn test_pg_hash_bitwise_xor() {
31475 let dialect = Dialect::get(DialectType::PostgreSQL);
31476 let result = dialect
31477 .transpile_to("x # y", DialectType::PostgreSQL)
31478 .unwrap();
31479 assert_eq!(result[0], "x # y");
31480 }
31481
31482 #[test]
31483 fn test_pg_array_to_duckdb() {
31484 let dialect = Dialect::get(DialectType::PostgreSQL);
31485 let result = dialect
31486 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31487 .unwrap();
31488 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31489 }
31490
31491 #[test]
31492 fn test_array_remove_bigquery() {
31493 let dialect = Dialect::get(DialectType::Generic);
31494 let result = dialect
31495 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31496 .unwrap();
31497 assert_eq!(
31498 result[0],
31499 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31500 );
31501 }
31502
31503 #[test]
31504 fn test_map_clickhouse_case() {
31505 let dialect = Dialect::get(DialectType::Generic);
31506 let parsed = dialect
31507 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31508 .unwrap();
31509 eprintln!("MAP parsed: {:?}", parsed);
31510 let result = dialect
31511 .transpile_to(
31512 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31513 DialectType::ClickHouse,
31514 )
31515 .unwrap();
31516 eprintln!("MAP result: {}", result[0]);
31517 }
31518
31519 #[test]
31520 fn test_generate_date_array_presto() {
31521 let dialect = Dialect::get(DialectType::Generic);
31522 let result = dialect.transpile_to(
31523 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31524 DialectType::Presto,
31525 ).unwrap();
31526 eprintln!("GDA -> Presto: {}", result[0]);
31527 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31528 }
31529
31530 #[test]
31531 fn test_generate_date_array_postgres() {
31532 let dialect = Dialect::get(DialectType::Generic);
31533 let result = dialect.transpile_to(
31534 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31535 DialectType::PostgreSQL,
31536 ).unwrap();
31537 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31538 }
31539
31540 #[test]
31541 fn test_generate_date_array_snowflake() {
31542 std::thread::Builder::new()
31543 .stack_size(16 * 1024 * 1024)
31544 .spawn(|| {
31545 let dialect = Dialect::get(DialectType::Generic);
31546 let result = dialect.transpile_to(
31547 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31548 DialectType::Snowflake,
31549 ).unwrap();
31550 eprintln!("GDA -> Snowflake: {}", result[0]);
31551 })
31552 .unwrap()
31553 .join()
31554 .unwrap();
31555 }
31556
31557 #[test]
31558 fn test_array_length_generate_date_array_snowflake() {
31559 let dialect = Dialect::get(DialectType::Generic);
31560 let result = dialect.transpile_to(
31561 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31562 DialectType::Snowflake,
31563 ).unwrap();
31564 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31565 }
31566
31567 #[test]
31568 fn test_generate_date_array_mysql() {
31569 let dialect = Dialect::get(DialectType::Generic);
31570 let result = dialect.transpile_to(
31571 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31572 DialectType::MySQL,
31573 ).unwrap();
31574 eprintln!("GDA -> MySQL: {}", result[0]);
31575 }
31576
31577 #[test]
31578 fn test_generate_date_array_redshift() {
31579 let dialect = Dialect::get(DialectType::Generic);
31580 let result = dialect.transpile_to(
31581 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31582 DialectType::Redshift,
31583 ).unwrap();
31584 eprintln!("GDA -> Redshift: {}", result[0]);
31585 }
31586
31587 #[test]
31588 fn test_generate_date_array_tsql() {
31589 let dialect = Dialect::get(DialectType::Generic);
31590 let result = dialect.transpile_to(
31591 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31592 DialectType::TSQL,
31593 ).unwrap();
31594 eprintln!("GDA -> TSQL: {}", result[0]);
31595 }
31596
31597 #[test]
31598 fn test_struct_colon_syntax() {
31599 let dialect = Dialect::get(DialectType::Generic);
31600 // Test without colon first
31601 let result = dialect.transpile_to(
31602 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
31603 DialectType::ClickHouse,
31604 );
31605 match result {
31606 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
31607 Err(e) => eprintln!("STRUCT no colon error: {}", e),
31608 }
31609 // Now test with colon
31610 let result = dialect.transpile_to(
31611 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
31612 DialectType::ClickHouse,
31613 );
31614 match result {
31615 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
31616 Err(e) => eprintln!("STRUCT colon error: {}", e),
31617 }
31618 }
31619
31620 #[test]
31621 fn test_generate_date_array_cte_wrapped_mysql() {
31622 let dialect = Dialect::get(DialectType::Generic);
31623 let result = dialect.transpile_to(
31624 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31625 DialectType::MySQL,
31626 ).unwrap();
31627 eprintln!("GDA CTE -> MySQL: {}", result[0]);
31628 }
31629
31630 #[test]
31631 fn test_generate_date_array_cte_wrapped_tsql() {
31632 let dialect = Dialect::get(DialectType::Generic);
31633 let result = dialect.transpile_to(
31634 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31635 DialectType::TSQL,
31636 ).unwrap();
31637 eprintln!("GDA CTE -> TSQL: {}", result[0]);
31638 }
31639
31640 #[test]
31641 fn test_decode_literal_no_null_check() {
31642 // Oracle DECODE with all literals should produce simple equality, no IS NULL
31643 let dialect = Dialect::get(DialectType::Oracle);
31644 let result = dialect
31645 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
31646 .unwrap();
31647 assert_eq!(
31648 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
31649 "Literal DECODE should not have IS NULL checks"
31650 );
31651 }
31652
31653 #[test]
31654 fn test_decode_column_vs_literal_no_null_check() {
31655 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
31656 let dialect = Dialect::get(DialectType::Oracle);
31657 let result = dialect
31658 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
31659 .unwrap();
31660 assert_eq!(
31661 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
31662 "Column vs literal DECODE should not have IS NULL checks"
31663 );
31664 }
31665
31666 #[test]
31667 fn test_decode_column_vs_column_keeps_null_check() {
31668 // Oracle DECODE with column vs column should keep null-safe comparison
31669 let dialect = Dialect::get(DialectType::Oracle);
31670 let result = dialect
31671 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
31672 .unwrap();
31673 assert!(
31674 result[0].contains("IS NULL"),
31675 "Column vs column DECODE should have IS NULL checks, got: {}",
31676 result[0]
31677 );
31678 }
31679
31680 #[test]
31681 fn test_decode_null_search() {
31682 // Oracle DECODE with NULL search should use IS NULL
31683 let dialect = Dialect::get(DialectType::Oracle);
31684 let result = dialect
31685 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
31686 .unwrap();
31687 assert_eq!(
31688 result[0],
31689 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
31690 );
31691 }
31692}