polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 )),
341 }
342 }
343}
344
345/// Trait that each concrete SQL dialect must implement.
346///
347/// `DialectImpl` provides the configuration hooks and per-expression transform logic
348/// that distinguish one dialect from another. Implementors supply:
349///
350/// - A [`DialectType`] identifier.
351/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
352/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
353/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
354/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
355/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
356///
357/// The default implementations are no-ops, so a minimal dialect only needs to provide
358/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
359/// standard SQL.
360pub trait DialectImpl {
361 /// Returns the [`DialectType`] that identifies this dialect.
362 fn dialect_type(&self) -> DialectType;
363
364 /// Returns the tokenizer configuration for this dialect.
365 ///
366 /// Override to customize identifier quoting characters, string escape rules,
367 /// comment styles, and other lexing behavior.
368 fn tokenizer_config(&self) -> TokenizerConfig {
369 TokenizerConfig::default()
370 }
371
372 /// Returns the generator configuration for this dialect.
373 ///
374 /// Override to customize identifier quoting style, function name casing,
375 /// keyword casing, and other SQL generation behavior.
376 fn generator_config(&self) -> GeneratorConfig {
377 GeneratorConfig::default()
378 }
379
380 /// Returns a generator configuration tailored to a specific expression.
381 ///
382 /// Override this for hybrid dialects like Athena that route to different SQL engines
383 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
384 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
385 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
386 self.generator_config()
387 }
388
389 /// Transforms a single expression node for this dialect, without recursing into children.
390 ///
391 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
392 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
393 /// typically include function renaming, operator substitution, and type mapping.
394 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
395 Ok(expr)
396 }
397
398 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
399 ///
400 /// Override this to apply structural rewrites that must see the entire tree at once,
401 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
402 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
403 fn preprocess(&self, expr: Expression) -> Result<Expression> {
404 Ok(expr)
405 }
406}
407
408/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
409/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
410///
411/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
412/// and then nested element/field types are recursed into. This ensures that dialect-level
413/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
414fn transform_data_type_recursive<F>(
415 dt: crate::expressions::DataType,
416 transform_fn: &F,
417) -> Result<crate::expressions::DataType>
418where
419 F: Fn(Expression) -> Result<Expression>,
420{
421 use crate::expressions::DataType;
422 // First, transform the outermost type through the expression system
423 let dt_expr = transform_fn(Expression::DataType(dt))?;
424 let dt = match dt_expr {
425 Expression::DataType(d) => d,
426 _ => {
427 return Ok(match dt_expr {
428 _ => DataType::Custom {
429 name: "UNKNOWN".to_string(),
430 },
431 })
432 }
433 };
434 // Then recurse into nested types
435 match dt {
436 DataType::Array {
437 element_type,
438 dimension,
439 } => {
440 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
441 Ok(DataType::Array {
442 element_type: Box::new(inner),
443 dimension,
444 })
445 }
446 DataType::List { element_type } => {
447 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
448 Ok(DataType::List {
449 element_type: Box::new(inner),
450 })
451 }
452 DataType::Struct { fields, nested } => {
453 let mut new_fields = Vec::new();
454 for mut field in fields {
455 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
456 new_fields.push(field);
457 }
458 Ok(DataType::Struct {
459 fields: new_fields,
460 nested,
461 })
462 }
463 DataType::Map {
464 key_type,
465 value_type,
466 } => {
467 let k = transform_data_type_recursive(*key_type, transform_fn)?;
468 let v = transform_data_type_recursive(*value_type, transform_fn)?;
469 Ok(DataType::Map {
470 key_type: Box::new(k),
471 value_type: Box::new(v),
472 })
473 }
474 other => Ok(other),
475 }
476}
477
478/// Convert DuckDB C-style format strings to Presto C-style format strings.
479/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
480fn duckdb_to_presto_format(fmt: &str) -> String {
481 // Order matters: handle longer patterns first to avoid partial replacements
482 let mut result = fmt.to_string();
483 // First pass: mark multi-char patterns with placeholders
484 result = result.replace("%-m", "\x01NOPADM\x01");
485 result = result.replace("%-d", "\x01NOPADD\x01");
486 result = result.replace("%-I", "\x01NOPADI\x01");
487 result = result.replace("%-H", "\x01NOPADH\x01");
488 result = result.replace("%H:%M:%S", "\x01HMS\x01");
489 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
490 // Now convert individual specifiers
491 result = result.replace("%M", "%i");
492 result = result.replace("%S", "%s");
493 // Restore multi-char patterns with Presto equivalents
494 result = result.replace("\x01NOPADM\x01", "%c");
495 result = result.replace("\x01NOPADD\x01", "%e");
496 result = result.replace("\x01NOPADI\x01", "%l");
497 result = result.replace("\x01NOPADH\x01", "%k");
498 result = result.replace("\x01HMS\x01", "%T");
499 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
500 result
501}
502
503/// Convert DuckDB C-style format strings to BigQuery format strings.
504/// BigQuery uses a mix of strftime-like directives.
505fn duckdb_to_bigquery_format(fmt: &str) -> String {
506 let mut result = fmt.to_string();
507 // Handle longer patterns first
508 result = result.replace("%-d", "%e");
509 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
510 result = result.replace("%Y-%m-%d", "%F");
511 result = result.replace("%H:%M:%S", "%T");
512 result
513}
514
515/// Applies a transform function bottom-up through an entire expression tree.
516///
517/// This is the core tree-rewriting engine used by the dialect system. It performs
518/// a post-order (children-first) traversal: for each node, all children are recursively
519/// transformed before the node itself is passed to `transform_fn`. This bottom-up
520/// strategy means that when `transform_fn` sees a node, its children have already
521/// been rewritten, which simplifies pattern matching on sub-expressions.
522///
523/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
524/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
525/// function calls, CASE expressions, date/time functions, and more.
526///
527/// # Arguments
528///
529/// * `expr` - The root expression to transform (consumed).
530/// * `transform_fn` - A closure that receives each expression node (after its children
531/// have been transformed) and returns a possibly-rewritten expression.
532///
533/// # Errors
534///
535/// Returns an error if `transform_fn` returns an error for any node.
536pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
537where
538 F: Fn(Expression) -> Result<Expression>,
539{
540 use crate::expressions::BinaryOp;
541
542 // Helper macro to transform binary ops with Box<BinaryOp>
543 macro_rules! transform_binary {
544 ($variant:ident, $op:expr) => {{
545 let left = transform_recursive($op.left, transform_fn)?;
546 let right = transform_recursive($op.right, transform_fn)?;
547 Expression::$variant(Box::new(BinaryOp {
548 left,
549 right,
550 left_comments: $op.left_comments,
551 operator_comments: $op.operator_comments,
552 trailing_comments: $op.trailing_comments,
553 }))
554 }};
555 }
556
557 // First recursively transform children, then apply the transform function
558 let expr = match expr {
559 Expression::Select(mut select) => {
560 select.expressions = select
561 .expressions
562 .into_iter()
563 .map(|e| transform_recursive(e, transform_fn))
564 .collect::<Result<Vec<_>>>()?;
565
566 // Transform FROM clause
567 if let Some(mut from) = select.from.take() {
568 from.expressions = from
569 .expressions
570 .into_iter()
571 .map(|e| transform_recursive(e, transform_fn))
572 .collect::<Result<Vec<_>>>()?;
573 select.from = Some(from);
574 }
575
576 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
577 select.joins = select
578 .joins
579 .into_iter()
580 .map(|mut join| {
581 join.this = transform_recursive(join.this, transform_fn)?;
582 if let Some(on) = join.on.take() {
583 join.on = Some(transform_recursive(on, transform_fn)?);
584 }
585 // Wrap join in Expression::Join to allow transform_fn to transform it
586 match transform_fn(Expression::Join(Box::new(join)))? {
587 Expression::Join(j) => Ok(*j),
588 _ => Err(crate::error::Error::parse(
589 "Join transformation returned non-join expression",
590 0,
591 0,
592 )),
593 }
594 })
595 .collect::<Result<Vec<_>>>()?;
596
597 // Transform LATERAL VIEW expressions (Hive/Spark)
598 select.lateral_views = select
599 .lateral_views
600 .into_iter()
601 .map(|mut lv| {
602 lv.this = transform_recursive(lv.this, transform_fn)?;
603 Ok(lv)
604 })
605 .collect::<Result<Vec<_>>>()?;
606
607 // Transform WHERE clause
608 if let Some(mut where_clause) = select.where_clause.take() {
609 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
610 select.where_clause = Some(where_clause);
611 }
612
613 // Transform GROUP BY
614 if let Some(mut group_by) = select.group_by.take() {
615 group_by.expressions = group_by
616 .expressions
617 .into_iter()
618 .map(|e| transform_recursive(e, transform_fn))
619 .collect::<Result<Vec<_>>>()?;
620 select.group_by = Some(group_by);
621 }
622
623 // Transform HAVING
624 if let Some(mut having) = select.having.take() {
625 having.this = transform_recursive(having.this, transform_fn)?;
626 select.having = Some(having);
627 }
628
629 // Transform WITH (CTEs)
630 if let Some(mut with) = select.with.take() {
631 with.ctes = with
632 .ctes
633 .into_iter()
634 .map(|mut cte| {
635 let original = cte.this.clone();
636 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
637 cte
638 })
639 .collect();
640 select.with = Some(with);
641 }
642
643 // Transform ORDER BY
644 if let Some(mut order) = select.order_by.take() {
645 order.expressions = order
646 .expressions
647 .into_iter()
648 .map(|o| {
649 let mut o = o;
650 let original = o.this.clone();
651 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
652 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
653 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
654 Ok(Expression::Ordered(transformed)) => *transformed,
655 Ok(_) | Err(_) => o,
656 }
657 })
658 .collect();
659 select.order_by = Some(order);
660 }
661
662 // Transform WINDOW clause order_by
663 if let Some(ref mut windows) = select.windows {
664 for nw in windows.iter_mut() {
665 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
666 .into_iter()
667 .map(|o| {
668 let mut o = o;
669 let original = o.this.clone();
670 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
671 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
672 Ok(Expression::Ordered(transformed)) => *transformed,
673 Ok(_) | Err(_) => o,
674 }
675 })
676 .collect();
677 }
678 }
679
680 // Transform QUALIFY
681 if let Some(mut qual) = select.qualify.take() {
682 qual.this = transform_recursive(qual.this, transform_fn)?;
683 select.qualify = Some(qual);
684 }
685
686 Expression::Select(select)
687 }
688 Expression::Function(mut f) => {
689 f.args = f
690 .args
691 .into_iter()
692 .map(|e| transform_recursive(e, transform_fn))
693 .collect::<Result<Vec<_>>>()?;
694 Expression::Function(f)
695 }
696 Expression::AggregateFunction(mut f) => {
697 f.args = f
698 .args
699 .into_iter()
700 .map(|e| transform_recursive(e, transform_fn))
701 .collect::<Result<Vec<_>>>()?;
702 if let Some(filter) = f.filter {
703 f.filter = Some(transform_recursive(filter, transform_fn)?);
704 }
705 Expression::AggregateFunction(f)
706 }
707 Expression::WindowFunction(mut wf) => {
708 wf.this = transform_recursive(wf.this, transform_fn)?;
709 wf.over.partition_by = wf
710 .over
711 .partition_by
712 .into_iter()
713 .map(|e| transform_recursive(e, transform_fn))
714 .collect::<Result<Vec<_>>>()?;
715 // Transform order_by items through Expression::Ordered wrapper
716 wf.over.order_by = wf
717 .over
718 .order_by
719 .into_iter()
720 .map(|o| {
721 let mut o = o;
722 o.this = transform_recursive(o.this, transform_fn)?;
723 match transform_fn(Expression::Ordered(Box::new(o)))? {
724 Expression::Ordered(transformed) => Ok(*transformed),
725 _ => Err(crate::error::Error::parse(
726 "Ordered transformation returned non-Ordered expression",
727 0,
728 0,
729 )),
730 }
731 })
732 .collect::<Result<Vec<_>>>()?;
733 Expression::WindowFunction(wf)
734 }
735 Expression::Alias(mut a) => {
736 a.this = transform_recursive(a.this, transform_fn)?;
737 Expression::Alias(a)
738 }
739 Expression::Cast(mut c) => {
740 c.this = transform_recursive(c.this, transform_fn)?;
741 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
742 c.to = transform_data_type_recursive(c.to, transform_fn)?;
743 Expression::Cast(c)
744 }
745 Expression::And(op) => transform_binary!(And, *op),
746 Expression::Or(op) => transform_binary!(Or, *op),
747 Expression::Add(op) => transform_binary!(Add, *op),
748 Expression::Sub(op) => transform_binary!(Sub, *op),
749 Expression::Mul(op) => transform_binary!(Mul, *op),
750 Expression::Div(op) => transform_binary!(Div, *op),
751 Expression::Eq(op) => transform_binary!(Eq, *op),
752 Expression::Lt(op) => transform_binary!(Lt, *op),
753 Expression::Gt(op) => transform_binary!(Gt, *op),
754 Expression::Paren(mut p) => {
755 p.this = transform_recursive(p.this, transform_fn)?;
756 Expression::Paren(p)
757 }
758 Expression::Coalesce(mut f) => {
759 f.expressions = f
760 .expressions
761 .into_iter()
762 .map(|e| transform_recursive(e, transform_fn))
763 .collect::<Result<Vec<_>>>()?;
764 Expression::Coalesce(f)
765 }
766 Expression::IfNull(mut f) => {
767 f.this = transform_recursive(f.this, transform_fn)?;
768 f.expression = transform_recursive(f.expression, transform_fn)?;
769 Expression::IfNull(f)
770 }
771 Expression::Nvl(mut f) => {
772 f.this = transform_recursive(f.this, transform_fn)?;
773 f.expression = transform_recursive(f.expression, transform_fn)?;
774 Expression::Nvl(f)
775 }
776 Expression::In(mut i) => {
777 i.this = transform_recursive(i.this, transform_fn)?;
778 i.expressions = i
779 .expressions
780 .into_iter()
781 .map(|e| transform_recursive(e, transform_fn))
782 .collect::<Result<Vec<_>>>()?;
783 if let Some(query) = i.query {
784 i.query = Some(transform_recursive(query, transform_fn)?);
785 }
786 Expression::In(i)
787 }
788 Expression::Not(mut n) => {
789 n.this = transform_recursive(n.this, transform_fn)?;
790 Expression::Not(n)
791 }
792 Expression::ArraySlice(mut s) => {
793 s.this = transform_recursive(s.this, transform_fn)?;
794 if let Some(start) = s.start {
795 s.start = Some(transform_recursive(start, transform_fn)?);
796 }
797 if let Some(end) = s.end {
798 s.end = Some(transform_recursive(end, transform_fn)?);
799 }
800 Expression::ArraySlice(s)
801 }
802 Expression::Subscript(mut s) => {
803 s.this = transform_recursive(s.this, transform_fn)?;
804 s.index = transform_recursive(s.index, transform_fn)?;
805 Expression::Subscript(s)
806 }
807 Expression::Array(mut a) => {
808 a.expressions = a
809 .expressions
810 .into_iter()
811 .map(|e| transform_recursive(e, transform_fn))
812 .collect::<Result<Vec<_>>>()?;
813 Expression::Array(a)
814 }
815 Expression::Struct(mut s) => {
816 let mut new_fields = Vec::new();
817 for (name, expr) in s.fields {
818 let transformed = transform_recursive(expr, transform_fn)?;
819 new_fields.push((name, transformed));
820 }
821 s.fields = new_fields;
822 Expression::Struct(s)
823 }
824 Expression::NamedArgument(mut na) => {
825 na.value = transform_recursive(na.value, transform_fn)?;
826 Expression::NamedArgument(na)
827 }
828 Expression::MapFunc(mut m) => {
829 m.keys = m
830 .keys
831 .into_iter()
832 .map(|e| transform_recursive(e, transform_fn))
833 .collect::<Result<Vec<_>>>()?;
834 m.values = m
835 .values
836 .into_iter()
837 .map(|e| transform_recursive(e, transform_fn))
838 .collect::<Result<Vec<_>>>()?;
839 Expression::MapFunc(m)
840 }
841 Expression::ArrayFunc(mut a) => {
842 a.expressions = a
843 .expressions
844 .into_iter()
845 .map(|e| transform_recursive(e, transform_fn))
846 .collect::<Result<Vec<_>>>()?;
847 Expression::ArrayFunc(a)
848 }
849 Expression::Lambda(mut l) => {
850 l.body = transform_recursive(l.body, transform_fn)?;
851 Expression::Lambda(l)
852 }
853 Expression::JsonExtract(mut f) => {
854 f.this = transform_recursive(f.this, transform_fn)?;
855 f.path = transform_recursive(f.path, transform_fn)?;
856 Expression::JsonExtract(f)
857 }
858 Expression::JsonExtractScalar(mut f) => {
859 f.this = transform_recursive(f.this, transform_fn)?;
860 f.path = transform_recursive(f.path, transform_fn)?;
861 Expression::JsonExtractScalar(f)
862 }
863
864 // ===== UnaryFunc-based expressions =====
865 // These all have a single `this: Expression` child
866 Expression::Length(mut f) => {
867 f.this = transform_recursive(f.this, transform_fn)?;
868 Expression::Length(f)
869 }
870 Expression::Upper(mut f) => {
871 f.this = transform_recursive(f.this, transform_fn)?;
872 Expression::Upper(f)
873 }
874 Expression::Lower(mut f) => {
875 f.this = transform_recursive(f.this, transform_fn)?;
876 Expression::Lower(f)
877 }
878 Expression::LTrim(mut f) => {
879 f.this = transform_recursive(f.this, transform_fn)?;
880 Expression::LTrim(f)
881 }
882 Expression::RTrim(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 Expression::RTrim(f)
885 }
886 Expression::Reverse(mut f) => {
887 f.this = transform_recursive(f.this, transform_fn)?;
888 Expression::Reverse(f)
889 }
890 Expression::Abs(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::Abs(f)
893 }
894 Expression::Ceil(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Ceil(f)
897 }
898 Expression::Floor(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Floor(f)
901 }
902 Expression::Sign(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::Sign(f)
905 }
906 Expression::Sqrt(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::Sqrt(f)
909 }
910 Expression::Cbrt(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Cbrt(f)
913 }
914 Expression::Ln(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Ln(f)
917 }
918 Expression::Log(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 if let Some(base) = f.base {
921 f.base = Some(transform_recursive(base, transform_fn)?);
922 }
923 Expression::Log(f)
924 }
925 Expression::Exp(mut f) => {
926 f.this = transform_recursive(f.this, transform_fn)?;
927 Expression::Exp(f)
928 }
929 Expression::Date(mut f) => {
930 f.this = transform_recursive(f.this, transform_fn)?;
931 Expression::Date(f)
932 }
933 Expression::Stddev(mut f) => {
934 f.this = transform_recursive(f.this, transform_fn)?;
935 Expression::Stddev(f)
936 }
937 Expression::Variance(mut f) => {
938 f.this = transform_recursive(f.this, transform_fn)?;
939 Expression::Variance(f)
940 }
941
942 // ===== BinaryFunc-based expressions =====
943 Expression::ModFunc(mut f) => {
944 f.this = transform_recursive(f.this, transform_fn)?;
945 f.expression = transform_recursive(f.expression, transform_fn)?;
946 Expression::ModFunc(f)
947 }
948 Expression::Power(mut f) => {
949 f.this = transform_recursive(f.this, transform_fn)?;
950 f.expression = transform_recursive(f.expression, transform_fn)?;
951 Expression::Power(f)
952 }
953 Expression::MapFromArrays(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 f.expression = transform_recursive(f.expression, transform_fn)?;
956 Expression::MapFromArrays(f)
957 }
958 Expression::ElementAt(mut f) => {
959 f.this = transform_recursive(f.this, transform_fn)?;
960 f.expression = transform_recursive(f.expression, transform_fn)?;
961 Expression::ElementAt(f)
962 }
963 Expression::MapContainsKey(mut f) => {
964 f.this = transform_recursive(f.this, transform_fn)?;
965 f.expression = transform_recursive(f.expression, transform_fn)?;
966 Expression::MapContainsKey(f)
967 }
968 Expression::Left(mut f) => {
969 f.this = transform_recursive(f.this, transform_fn)?;
970 f.length = transform_recursive(f.length, transform_fn)?;
971 Expression::Left(f)
972 }
973 Expression::Right(mut f) => {
974 f.this = transform_recursive(f.this, transform_fn)?;
975 f.length = transform_recursive(f.length, transform_fn)?;
976 Expression::Right(f)
977 }
978 Expression::Repeat(mut f) => {
979 f.this = transform_recursive(f.this, transform_fn)?;
980 f.times = transform_recursive(f.times, transform_fn)?;
981 Expression::Repeat(f)
982 }
983
984 // ===== Complex function expressions =====
985 Expression::Substring(mut f) => {
986 f.this = transform_recursive(f.this, transform_fn)?;
987 f.start = transform_recursive(f.start, transform_fn)?;
988 if let Some(len) = f.length {
989 f.length = Some(transform_recursive(len, transform_fn)?);
990 }
991 Expression::Substring(f)
992 }
993 Expression::Replace(mut f) => {
994 f.this = transform_recursive(f.this, transform_fn)?;
995 f.old = transform_recursive(f.old, transform_fn)?;
996 f.new = transform_recursive(f.new, transform_fn)?;
997 Expression::Replace(f)
998 }
999 Expression::ConcatWs(mut f) => {
1000 f.separator = transform_recursive(f.separator, transform_fn)?;
1001 f.expressions = f
1002 .expressions
1003 .into_iter()
1004 .map(|e| transform_recursive(e, transform_fn))
1005 .collect::<Result<Vec<_>>>()?;
1006 Expression::ConcatWs(f)
1007 }
1008 Expression::Trim(mut f) => {
1009 f.this = transform_recursive(f.this, transform_fn)?;
1010 if let Some(chars) = f.characters {
1011 f.characters = Some(transform_recursive(chars, transform_fn)?);
1012 }
1013 Expression::Trim(f)
1014 }
1015 Expression::Split(mut f) => {
1016 f.this = transform_recursive(f.this, transform_fn)?;
1017 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1018 Expression::Split(f)
1019 }
1020 Expression::Lpad(mut f) => {
1021 f.this = transform_recursive(f.this, transform_fn)?;
1022 f.length = transform_recursive(f.length, transform_fn)?;
1023 if let Some(fill) = f.fill {
1024 f.fill = Some(transform_recursive(fill, transform_fn)?);
1025 }
1026 Expression::Lpad(f)
1027 }
1028 Expression::Rpad(mut f) => {
1029 f.this = transform_recursive(f.this, transform_fn)?;
1030 f.length = transform_recursive(f.length, transform_fn)?;
1031 if let Some(fill) = f.fill {
1032 f.fill = Some(transform_recursive(fill, transform_fn)?);
1033 }
1034 Expression::Rpad(f)
1035 }
1036
1037 // ===== Conditional expressions =====
1038 Expression::Case(mut c) => {
1039 if let Some(operand) = c.operand {
1040 c.operand = Some(transform_recursive(operand, transform_fn)?);
1041 }
1042 c.whens = c
1043 .whens
1044 .into_iter()
1045 .map(|(cond, then)| {
1046 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1047 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1048 (new_cond, new_then)
1049 })
1050 .collect();
1051 if let Some(else_expr) = c.else_ {
1052 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1053 }
1054 Expression::Case(c)
1055 }
1056 Expression::IfFunc(mut f) => {
1057 f.condition = transform_recursive(f.condition, transform_fn)?;
1058 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1059 if let Some(false_val) = f.false_value {
1060 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1061 }
1062 Expression::IfFunc(f)
1063 }
1064
1065 // ===== Date/Time expressions =====
1066 Expression::DateAdd(mut f) => {
1067 f.this = transform_recursive(f.this, transform_fn)?;
1068 f.interval = transform_recursive(f.interval, transform_fn)?;
1069 Expression::DateAdd(f)
1070 }
1071 Expression::DateSub(mut f) => {
1072 f.this = transform_recursive(f.this, transform_fn)?;
1073 f.interval = transform_recursive(f.interval, transform_fn)?;
1074 Expression::DateSub(f)
1075 }
1076 Expression::DateDiff(mut f) => {
1077 f.this = transform_recursive(f.this, transform_fn)?;
1078 f.expression = transform_recursive(f.expression, transform_fn)?;
1079 Expression::DateDiff(f)
1080 }
1081 Expression::DateTrunc(mut f) => {
1082 f.this = transform_recursive(f.this, transform_fn)?;
1083 Expression::DateTrunc(f)
1084 }
1085 Expression::Extract(mut f) => {
1086 f.this = transform_recursive(f.this, transform_fn)?;
1087 Expression::Extract(f)
1088 }
1089
1090 // ===== JSON expressions =====
1091 Expression::JsonObject(mut f) => {
1092 f.pairs = f
1093 .pairs
1094 .into_iter()
1095 .map(|(k, v)| {
1096 let new_k = transform_recursive(k, transform_fn)?;
1097 let new_v = transform_recursive(v, transform_fn)?;
1098 Ok((new_k, new_v))
1099 })
1100 .collect::<Result<Vec<_>>>()?;
1101 Expression::JsonObject(f)
1102 }
1103
1104 // ===== Subquery expressions =====
1105 Expression::Subquery(mut s) => {
1106 s.this = transform_recursive(s.this, transform_fn)?;
1107 Expression::Subquery(s)
1108 }
1109 Expression::Exists(mut e) => {
1110 e.this = transform_recursive(e.this, transform_fn)?;
1111 Expression::Exists(e)
1112 }
1113
1114 // ===== Set operations =====
1115 Expression::Union(mut u) => {
1116 u.left = transform_recursive(u.left, transform_fn)?;
1117 u.right = transform_recursive(u.right, transform_fn)?;
1118 Expression::Union(u)
1119 }
1120 Expression::Intersect(mut i) => {
1121 i.left = transform_recursive(i.left, transform_fn)?;
1122 i.right = transform_recursive(i.right, transform_fn)?;
1123 Expression::Intersect(i)
1124 }
1125 Expression::Except(mut e) => {
1126 e.left = transform_recursive(e.left, transform_fn)?;
1127 e.right = transform_recursive(e.right, transform_fn)?;
1128 Expression::Except(e)
1129 }
1130
1131 // ===== DML expressions =====
1132 Expression::Insert(mut ins) => {
1133 // Transform VALUES clause expressions
1134 let mut new_values = Vec::new();
1135 for row in ins.values {
1136 let mut new_row = Vec::new();
1137 for e in row {
1138 new_row.push(transform_recursive(e, transform_fn)?);
1139 }
1140 new_values.push(new_row);
1141 }
1142 ins.values = new_values;
1143
1144 // Transform query (for INSERT ... SELECT)
1145 if let Some(query) = ins.query {
1146 ins.query = Some(transform_recursive(query, transform_fn)?);
1147 }
1148
1149 // Transform RETURNING clause
1150 let mut new_returning = Vec::new();
1151 for e in ins.returning {
1152 new_returning.push(transform_recursive(e, transform_fn)?);
1153 }
1154 ins.returning = new_returning;
1155
1156 // Transform ON CONFLICT clause
1157 if let Some(on_conflict) = ins.on_conflict {
1158 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1159 }
1160
1161 Expression::Insert(ins)
1162 }
1163 Expression::Update(mut upd) => {
1164 upd.set = upd
1165 .set
1166 .into_iter()
1167 .map(|(id, val)| {
1168 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1169 (id, new_val)
1170 })
1171 .collect();
1172 if let Some(mut where_clause) = upd.where_clause.take() {
1173 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1174 upd.where_clause = Some(where_clause);
1175 }
1176 Expression::Update(upd)
1177 }
1178 Expression::Delete(mut del) => {
1179 if let Some(mut where_clause) = del.where_clause.take() {
1180 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1181 del.where_clause = Some(where_clause);
1182 }
1183 Expression::Delete(del)
1184 }
1185
1186 // ===== CTE expressions =====
1187 Expression::With(mut w) => {
1188 w.ctes = w
1189 .ctes
1190 .into_iter()
1191 .map(|mut cte| {
1192 let original = cte.this.clone();
1193 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1194 cte
1195 })
1196 .collect();
1197 Expression::With(w)
1198 }
1199 Expression::Cte(mut c) => {
1200 c.this = transform_recursive(c.this, transform_fn)?;
1201 Expression::Cte(c)
1202 }
1203
1204 // ===== Order expressions =====
1205 Expression::Ordered(mut o) => {
1206 o.this = transform_recursive(o.this, transform_fn)?;
1207 Expression::Ordered(o)
1208 }
1209
1210 // ===== Negation =====
1211 Expression::Neg(mut n) => {
1212 n.this = transform_recursive(n.this, transform_fn)?;
1213 Expression::Neg(n)
1214 }
1215
1216 // ===== Between =====
1217 Expression::Between(mut b) => {
1218 b.this = transform_recursive(b.this, transform_fn)?;
1219 b.low = transform_recursive(b.low, transform_fn)?;
1220 b.high = transform_recursive(b.high, transform_fn)?;
1221 Expression::Between(b)
1222 }
1223
1224 // ===== Like expressions =====
1225 Expression::Like(mut l) => {
1226 l.left = transform_recursive(l.left, transform_fn)?;
1227 l.right = transform_recursive(l.right, transform_fn)?;
1228 Expression::Like(l)
1229 }
1230 Expression::ILike(mut l) => {
1231 l.left = transform_recursive(l.left, transform_fn)?;
1232 l.right = transform_recursive(l.right, transform_fn)?;
1233 Expression::ILike(l)
1234 }
1235
1236 // ===== Additional binary ops not covered by macro =====
1237 Expression::Neq(op) => transform_binary!(Neq, *op),
1238 Expression::Lte(op) => transform_binary!(Lte, *op),
1239 Expression::Gte(op) => transform_binary!(Gte, *op),
1240 Expression::Mod(op) => transform_binary!(Mod, *op),
1241 Expression::Concat(op) => transform_binary!(Concat, *op),
1242 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1243 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1244 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1245 Expression::Is(op) => transform_binary!(Is, *op),
1246
1247 // ===== TryCast / SafeCast =====
1248 Expression::TryCast(mut c) => {
1249 c.this = transform_recursive(c.this, transform_fn)?;
1250 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1251 Expression::TryCast(c)
1252 }
1253 Expression::SafeCast(mut c) => {
1254 c.this = transform_recursive(c.this, transform_fn)?;
1255 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1256 Expression::SafeCast(c)
1257 }
1258
1259 // ===== Misc =====
1260 Expression::Unnest(mut f) => {
1261 f.this = transform_recursive(f.this, transform_fn)?;
1262 f.expressions = f
1263 .expressions
1264 .into_iter()
1265 .map(|e| transform_recursive(e, transform_fn))
1266 .collect::<Result<Vec<_>>>()?;
1267 Expression::Unnest(f)
1268 }
1269 Expression::Explode(mut f) => {
1270 f.this = transform_recursive(f.this, transform_fn)?;
1271 Expression::Explode(f)
1272 }
1273 Expression::GroupConcat(mut f) => {
1274 f.this = transform_recursive(f.this, transform_fn)?;
1275 Expression::GroupConcat(f)
1276 }
1277 Expression::StringAgg(mut f) => {
1278 f.this = transform_recursive(f.this, transform_fn)?;
1279 Expression::StringAgg(f)
1280 }
1281 Expression::ListAgg(mut f) => {
1282 f.this = transform_recursive(f.this, transform_fn)?;
1283 Expression::ListAgg(f)
1284 }
1285 Expression::ArrayAgg(mut f) => {
1286 f.this = transform_recursive(f.this, transform_fn)?;
1287 Expression::ArrayAgg(f)
1288 }
1289 Expression::ParseJson(mut f) => {
1290 f.this = transform_recursive(f.this, transform_fn)?;
1291 Expression::ParseJson(f)
1292 }
1293 Expression::ToJson(mut f) => {
1294 f.this = transform_recursive(f.this, transform_fn)?;
1295 Expression::ToJson(f)
1296 }
1297 Expression::JSONExtract(mut e) => {
1298 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1299 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1300 Expression::JSONExtract(e)
1301 }
1302 Expression::JSONExtractScalar(mut e) => {
1303 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1304 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1305 Expression::JSONExtractScalar(e)
1306 }
1307
1308 // StrToTime: recurse into this
1309 Expression::StrToTime(mut e) => {
1310 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1311 Expression::StrToTime(e)
1312 }
1313
1314 // UnixToTime: recurse into this
1315 Expression::UnixToTime(mut e) => {
1316 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1317 Expression::UnixToTime(e)
1318 }
1319
1320 // CreateTable: recurse into column defaults, on_update expressions, and data types
1321 Expression::CreateTable(mut ct) => {
1322 for col in &mut ct.columns {
1323 if let Some(default_expr) = col.default.take() {
1324 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1325 }
1326 if let Some(on_update_expr) = col.on_update.take() {
1327 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1328 }
1329 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1330 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1331 // contexts and may not produce correct results for DDL column definitions.
1332 // The DDL type mappings would need dedicated handling per source/target pair.
1333 }
1334 if let Some(as_select) = ct.as_select.take() {
1335 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1336 }
1337 Expression::CreateTable(ct)
1338 }
1339
1340 // CreateProcedure: recurse into body expressions
1341 Expression::CreateProcedure(mut cp) => {
1342 if let Some(body) = cp.body.take() {
1343 cp.body = Some(match body {
1344 FunctionBody::Expression(expr) => {
1345 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1346 }
1347 FunctionBody::Return(expr) => {
1348 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1349 }
1350 FunctionBody::Statements(stmts) => {
1351 let transformed_stmts = stmts
1352 .into_iter()
1353 .map(|s| transform_recursive(s, transform_fn))
1354 .collect::<Result<Vec<_>>>()?;
1355 FunctionBody::Statements(transformed_stmts)
1356 }
1357 other => other,
1358 });
1359 }
1360 Expression::CreateProcedure(cp)
1361 }
1362
1363 // CreateFunction: recurse into body expressions
1364 Expression::CreateFunction(mut cf) => {
1365 if let Some(body) = cf.body.take() {
1366 cf.body = Some(match body {
1367 FunctionBody::Expression(expr) => {
1368 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1369 }
1370 FunctionBody::Return(expr) => {
1371 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1372 }
1373 FunctionBody::Statements(stmts) => {
1374 let transformed_stmts = stmts
1375 .into_iter()
1376 .map(|s| transform_recursive(s, transform_fn))
1377 .collect::<Result<Vec<_>>>()?;
1378 FunctionBody::Statements(transformed_stmts)
1379 }
1380 other => other,
1381 });
1382 }
1383 Expression::CreateFunction(cf)
1384 }
1385
1386 // MemberOf: recurse into left and right operands
1387 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1388 // ArrayContainsAll (@>): recurse into left and right operands
1389 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1390 // ArrayContainedBy (<@): recurse into left and right operands
1391 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1392 // ArrayOverlaps (&&): recurse into left and right operands
1393 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1394 // TsMatch (@@): recurse into left and right operands
1395 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1396 // Adjacent (-|-): recurse into left and right operands
1397 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1398
1399 // Table: recurse into when (HistoricalData) and changes fields
1400 Expression::Table(mut t) => {
1401 if let Some(when) = t.when.take() {
1402 let transformed =
1403 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1404 if let Expression::HistoricalData(hd) = transformed {
1405 t.when = Some(hd);
1406 }
1407 }
1408 if let Some(changes) = t.changes.take() {
1409 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1410 if let Expression::Changes(c) = transformed {
1411 t.changes = Some(c);
1412 }
1413 }
1414 Expression::Table(t)
1415 }
1416
1417 // HistoricalData (Snowflake time travel): recurse into expression
1418 Expression::HistoricalData(mut hd) => {
1419 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1420 Expression::HistoricalData(hd)
1421 }
1422
1423 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1424 Expression::Changes(mut c) => {
1425 if let Some(at_before) = c.at_before.take() {
1426 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1427 }
1428 if let Some(end) = c.end.take() {
1429 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1430 }
1431 Expression::Changes(c)
1432 }
1433
1434 // TableArgument: TABLE(expr) or MODEL(expr)
1435 Expression::TableArgument(mut ta) => {
1436 ta.this = transform_recursive(ta.this, transform_fn)?;
1437 Expression::TableArgument(ta)
1438 }
1439
1440 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1441 Expression::JoinedTable(mut jt) => {
1442 jt.left = transform_recursive(jt.left, transform_fn)?;
1443 for join in &mut jt.joins {
1444 join.this = transform_recursive(
1445 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1446 transform_fn,
1447 )?;
1448 if let Some(on) = join.on.take() {
1449 join.on = Some(transform_recursive(on, transform_fn)?);
1450 }
1451 }
1452 jt.lateral_views = jt
1453 .lateral_views
1454 .into_iter()
1455 .map(|mut lv| {
1456 lv.this = transform_recursive(lv.this, transform_fn)?;
1457 Ok(lv)
1458 })
1459 .collect::<Result<Vec<_>>>()?;
1460 Expression::JoinedTable(jt)
1461 }
1462
1463 // Lateral: LATERAL func() - recurse into the function expression
1464 Expression::Lateral(mut lat) => {
1465 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1466 Expression::Lateral(lat)
1467 }
1468
1469 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1470 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1471 // as a unit together with the WithinGroup wrapper
1472 Expression::WithinGroup(mut wg) => {
1473 wg.order_by = wg
1474 .order_by
1475 .into_iter()
1476 .map(|mut o| {
1477 let original = o.this.clone();
1478 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1479 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1480 Ok(Expression::Ordered(transformed)) => *transformed,
1481 Ok(_) | Err(_) => o,
1482 }
1483 })
1484 .collect();
1485 Expression::WithinGroup(wg)
1486 }
1487
1488 // Filter: recurse into both the aggregate and the filter condition
1489 Expression::Filter(mut f) => {
1490 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1491 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1492 Expression::Filter(f)
1493 }
1494
1495 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1496 Expression::BitwiseOrAgg(mut f) => {
1497 f.this = transform_recursive(f.this, transform_fn)?;
1498 Expression::BitwiseOrAgg(f)
1499 }
1500 Expression::BitwiseAndAgg(mut f) => {
1501 f.this = transform_recursive(f.this, transform_fn)?;
1502 Expression::BitwiseAndAgg(f)
1503 }
1504 Expression::BitwiseXorAgg(mut f) => {
1505 f.this = transform_recursive(f.this, transform_fn)?;
1506 Expression::BitwiseXorAgg(f)
1507 }
1508 Expression::PipeOperator(mut pipe) => {
1509 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1510 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1511 Expression::PipeOperator(pipe)
1512 }
1513
1514 // Pass through leaf nodes unchanged
1515 other => other,
1516 };
1517
1518 // Then apply the transform function
1519 transform_fn(expr)
1520}
1521
1522/// Returns the tokenizer config, generator config, and expression transform closure
1523/// for a built-in dialect type. This is the shared implementation used by both
1524/// `Dialect::get()` and custom dialect construction.
1525fn configs_for_dialect_type(
1526 dt: DialectType,
1527) -> (
1528 TokenizerConfig,
1529 GeneratorConfig,
1530 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1531) {
1532 macro_rules! dialect_configs {
1533 ($dialect_struct:ident) => {{
1534 let d = $dialect_struct;
1535 (
1536 d.tokenizer_config(),
1537 d.generator_config(),
1538 Box::new(move |e| $dialect_struct.transform_expr(e)),
1539 )
1540 }};
1541 }
1542 match dt {
1543 #[cfg(feature = "dialect-postgresql")]
1544 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1545 #[cfg(feature = "dialect-mysql")]
1546 DialectType::MySQL => dialect_configs!(MySQLDialect),
1547 #[cfg(feature = "dialect-bigquery")]
1548 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1549 #[cfg(feature = "dialect-snowflake")]
1550 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1551 #[cfg(feature = "dialect-duckdb")]
1552 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1553 #[cfg(feature = "dialect-tsql")]
1554 DialectType::TSQL => dialect_configs!(TSQLDialect),
1555 #[cfg(feature = "dialect-oracle")]
1556 DialectType::Oracle => dialect_configs!(OracleDialect),
1557 #[cfg(feature = "dialect-hive")]
1558 DialectType::Hive => dialect_configs!(HiveDialect),
1559 #[cfg(feature = "dialect-spark")]
1560 DialectType::Spark => dialect_configs!(SparkDialect),
1561 #[cfg(feature = "dialect-sqlite")]
1562 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1563 #[cfg(feature = "dialect-presto")]
1564 DialectType::Presto => dialect_configs!(PrestoDialect),
1565 #[cfg(feature = "dialect-trino")]
1566 DialectType::Trino => dialect_configs!(TrinoDialect),
1567 #[cfg(feature = "dialect-redshift")]
1568 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1569 #[cfg(feature = "dialect-clickhouse")]
1570 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1571 #[cfg(feature = "dialect-databricks")]
1572 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1573 #[cfg(feature = "dialect-athena")]
1574 DialectType::Athena => dialect_configs!(AthenaDialect),
1575 #[cfg(feature = "dialect-teradata")]
1576 DialectType::Teradata => dialect_configs!(TeradataDialect),
1577 #[cfg(feature = "dialect-doris")]
1578 DialectType::Doris => dialect_configs!(DorisDialect),
1579 #[cfg(feature = "dialect-starrocks")]
1580 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1581 #[cfg(feature = "dialect-materialize")]
1582 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1583 #[cfg(feature = "dialect-risingwave")]
1584 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1585 #[cfg(feature = "dialect-singlestore")]
1586 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1587 #[cfg(feature = "dialect-cockroachdb")]
1588 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1589 #[cfg(feature = "dialect-tidb")]
1590 DialectType::TiDB => dialect_configs!(TiDBDialect),
1591 #[cfg(feature = "dialect-druid")]
1592 DialectType::Druid => dialect_configs!(DruidDialect),
1593 #[cfg(feature = "dialect-solr")]
1594 DialectType::Solr => dialect_configs!(SolrDialect),
1595 #[cfg(feature = "dialect-tableau")]
1596 DialectType::Tableau => dialect_configs!(TableauDialect),
1597 #[cfg(feature = "dialect-dune")]
1598 DialectType::Dune => dialect_configs!(DuneDialect),
1599 #[cfg(feature = "dialect-fabric")]
1600 DialectType::Fabric => dialect_configs!(FabricDialect),
1601 #[cfg(feature = "dialect-drill")]
1602 DialectType::Drill => dialect_configs!(DrillDialect),
1603 #[cfg(feature = "dialect-dremio")]
1604 DialectType::Dremio => dialect_configs!(DremioDialect),
1605 #[cfg(feature = "dialect-exasol")]
1606 DialectType::Exasol => dialect_configs!(ExasolDialect),
1607 #[cfg(feature = "dialect-datafusion")]
1608 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1609 _ => dialect_configs!(GenericDialect),
1610 }
1611}
1612
1613// ---------------------------------------------------------------------------
1614// Custom dialect registry
1615// ---------------------------------------------------------------------------
1616
1617static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1618 LazyLock::new(|| RwLock::new(HashMap::new()));
1619
1620struct CustomDialectConfig {
1621 name: String,
1622 base_dialect: DialectType,
1623 tokenizer_config: TokenizerConfig,
1624 generator_config: GeneratorConfig,
1625 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1626 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1627}
1628
1629/// Fluent builder for creating and registering custom SQL dialects.
1630///
1631/// A custom dialect is based on an existing built-in dialect and allows selective
1632/// overrides of tokenizer configuration, generator configuration, and expression
1633/// transforms.
1634///
1635/// # Example
1636///
1637/// ```rust,ignore
1638/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1639/// use polyglot_sql::generator::NormalizeFunctions;
1640///
1641/// CustomDialectBuilder::new("my_postgres")
1642/// .based_on(DialectType::PostgreSQL)
1643/// .generator_config_modifier(|gc| {
1644/// gc.normalize_functions = NormalizeFunctions::Lower;
1645/// })
1646/// .register()
1647/// .unwrap();
1648///
1649/// let d = Dialect::get_by_name("my_postgres").unwrap();
1650/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1651/// let sql = d.generate(&exprs[0]).unwrap();
1652/// assert_eq!(sql, "select count(*)");
1653///
1654/// polyglot_sql::unregister_custom_dialect("my_postgres");
1655/// ```
1656pub struct CustomDialectBuilder {
1657 name: String,
1658 base_dialect: DialectType,
1659 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1660 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1661 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1662 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1663}
1664
1665impl CustomDialectBuilder {
1666 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1667 pub fn new(name: impl Into<String>) -> Self {
1668 Self {
1669 name: name.into(),
1670 base_dialect: DialectType::Generic,
1671 tokenizer_modifier: None,
1672 generator_modifier: None,
1673 transform: None,
1674 preprocess: None,
1675 }
1676 }
1677
1678 /// Set the base built-in dialect to inherit configuration from.
1679 pub fn based_on(mut self, dialect: DialectType) -> Self {
1680 self.base_dialect = dialect;
1681 self
1682 }
1683
1684 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1685 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1686 where
1687 F: FnOnce(&mut TokenizerConfig) + 'static,
1688 {
1689 self.tokenizer_modifier = Some(Box::new(f));
1690 self
1691 }
1692
1693 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1694 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1695 where
1696 F: FnOnce(&mut GeneratorConfig) + 'static,
1697 {
1698 self.generator_modifier = Some(Box::new(f));
1699 self
1700 }
1701
1702 /// Set a custom per-node expression transform function.
1703 ///
1704 /// This replaces the base dialect's transform. It is called on every expression
1705 /// node during the recursive transform pass.
1706 pub fn transform_fn<F>(mut self, f: F) -> Self
1707 where
1708 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1709 {
1710 self.transform = Some(Arc::new(f));
1711 self
1712 }
1713
1714 /// Set a custom whole-tree preprocessing function.
1715 ///
1716 /// This replaces the base dialect's built-in preprocessing. It is called once
1717 /// on the entire expression tree before the recursive per-node transform.
1718 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1719 where
1720 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1721 {
1722 self.preprocess = Some(Arc::new(f));
1723 self
1724 }
1725
1726 /// Build the custom dialect configuration and register it in the global registry.
1727 ///
1728 /// Returns an error if:
1729 /// - The name collides with a built-in dialect name
1730 /// - A custom dialect with the same name is already registered
1731 pub fn register(self) -> Result<()> {
1732 // Reject names that collide with built-in dialects
1733 if DialectType::from_str(&self.name).is_ok() {
1734 return Err(crate::error::Error::parse(
1735 format!(
1736 "Cannot register custom dialect '{}': name collides with built-in dialect",
1737 self.name
1738 ),
1739 0,
1740 0,
1741 ));
1742 }
1743
1744 // Get base configs
1745 let (mut tok_config, mut gen_config, _base_transform) =
1746 configs_for_dialect_type(self.base_dialect);
1747
1748 // Apply modifiers
1749 if let Some(tok_mod) = self.tokenizer_modifier {
1750 tok_mod(&mut tok_config);
1751 }
1752 if let Some(gen_mod) = self.generator_modifier {
1753 gen_mod(&mut gen_config);
1754 }
1755
1756 let config = CustomDialectConfig {
1757 name: self.name.clone(),
1758 base_dialect: self.base_dialect,
1759 tokenizer_config: tok_config,
1760 generator_config: gen_config,
1761 transform: self.transform,
1762 preprocess: self.preprocess,
1763 };
1764
1765 register_custom_dialect(config)
1766 }
1767}
1768
1769use std::str::FromStr;
1770
1771fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1772 let mut registry = CUSTOM_DIALECT_REGISTRY
1773 .write()
1774 .map_err(|e| crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0))?;
1775
1776 if registry.contains_key(&config.name) {
1777 return Err(crate::error::Error::parse(
1778 format!("Custom dialect '{}' is already registered", config.name),
1779 0,
1780 0,
1781 ));
1782 }
1783
1784 registry.insert(config.name.clone(), Arc::new(config));
1785 Ok(())
1786}
1787
1788/// Remove a custom dialect from the global registry.
1789///
1790/// Returns `true` if a dialect with that name was found and removed,
1791/// `false` if no such custom dialect existed.
1792pub fn unregister_custom_dialect(name: &str) -> bool {
1793 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1794 registry.remove(name).is_some()
1795 } else {
1796 false
1797 }
1798}
1799
1800fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1801 CUSTOM_DIALECT_REGISTRY
1802 .read()
1803 .ok()
1804 .and_then(|registry| registry.get(name).cloned())
1805}
1806
1807/// Main entry point for dialect-specific SQL operations.
1808///
1809/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1810/// transformer for a specific SQL database engine. It is the high-level API through
1811/// which callers parse, generate, transform, and transpile SQL.
1812///
1813/// # Usage
1814///
1815/// ```rust,ignore
1816/// use polyglot_sql::dialects::{Dialect, DialectType};
1817///
1818/// // Parse PostgreSQL SQL into an AST
1819/// let pg = Dialect::get(DialectType::PostgreSQL);
1820/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1821///
1822/// // Transpile from PostgreSQL to BigQuery
1823/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1824/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1825/// ```
1826///
1827/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1828/// The struct is `Send + Sync` safe so it can be shared across threads.
1829pub struct Dialect {
1830 dialect_type: DialectType,
1831 tokenizer: Tokenizer,
1832 generator_config: GeneratorConfig,
1833 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1834 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1835 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1836 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1837 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1838}
1839
1840impl Dialect {
1841 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1842 ///
1843 /// This is the primary constructor. It initializes the tokenizer, generator config,
1844 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1845 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1846 /// config routing.
1847 pub fn get(dialect_type: DialectType) -> Self {
1848 let (tokenizer_config, generator_config, transformer) =
1849 configs_for_dialect_type(dialect_type);
1850
1851 // Set up expression-specific generator config for hybrid dialects
1852 let generator_config_for_expr: Option<
1853 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1854 > = match dialect_type {
1855 #[cfg(feature = "dialect-athena")]
1856 DialectType::Athena => Some(Box::new(|expr| {
1857 AthenaDialect.generator_config_for_expr(expr)
1858 })),
1859 _ => None,
1860 };
1861
1862 Self {
1863 dialect_type,
1864 tokenizer: Tokenizer::new(tokenizer_config),
1865 generator_config,
1866 transformer,
1867 generator_config_for_expr,
1868 custom_preprocess: None,
1869 }
1870 }
1871
1872 /// Look up a dialect by string name.
1873 ///
1874 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1875 /// falls back to the custom dialect registry. Returns `None` if no dialect
1876 /// with the given name exists.
1877 pub fn get_by_name(name: &str) -> Option<Self> {
1878 // Try built-in first
1879 if let Ok(dt) = DialectType::from_str(name) {
1880 return Some(Self::get(dt));
1881 }
1882
1883 // Try custom registry
1884 let config = get_custom_dialect_config(name)?;
1885 Some(Self::from_custom_config(&config))
1886 }
1887
1888 /// Construct a `Dialect` from a custom dialect configuration.
1889 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1890 // Build the transformer: use custom if provided, else use base dialect's
1891 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1892 if let Some(ref custom_transform) = config.transform {
1893 let t = Arc::clone(custom_transform);
1894 Box::new(move |e| t(e))
1895 } else {
1896 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1897 base_transform
1898 };
1899
1900 // Build the custom preprocess: use custom if provided
1901 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1902 config.preprocess.as_ref().map(|p| {
1903 let p = Arc::clone(p);
1904 Box::new(move |e: Expression| p(e))
1905 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1906 });
1907
1908 Self {
1909 dialect_type: config.base_dialect,
1910 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1911 generator_config: config.generator_config.clone(),
1912 transformer,
1913 generator_config_for_expr: None,
1914 custom_preprocess,
1915 }
1916 }
1917
1918 /// Get the dialect type
1919 pub fn dialect_type(&self) -> DialectType {
1920 self.dialect_type
1921 }
1922
1923 /// Get the generator configuration
1924 pub fn generator_config(&self) -> &GeneratorConfig {
1925 &self.generator_config
1926 }
1927
1928 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1929 ///
1930 /// The input may contain multiple semicolon-separated statements; each one
1931 /// produces a separate element in the returned vector. Tokenization uses
1932 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1933 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1934 let tokens = self.tokenizer.tokenize(sql)?;
1935 let config = crate::parser::ParserConfig {
1936 dialect: Some(self.dialect_type),
1937 ..Default::default()
1938 };
1939 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1940 parser.parse()
1941 }
1942
1943 /// Get the generator config for a specific expression (supports hybrid dialects)
1944 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1945 if let Some(ref config_fn) = self.generator_config_for_expr {
1946 config_fn(expr)
1947 } else {
1948 self.generator_config.clone()
1949 }
1950 }
1951
1952 /// Generates a SQL string from an [`Expression`] AST node.
1953 ///
1954 /// The output uses this dialect's generator configuration for identifier quoting,
1955 /// keyword casing, function name normalization, and syntax style. The result is
1956 /// a single-line (non-pretty) SQL string.
1957 pub fn generate(&self, expr: &Expression) -> Result<String> {
1958 let config = self.get_config_for_expr(expr);
1959 let mut generator = Generator::with_config(config);
1960 generator.generate(expr)
1961 }
1962
1963 /// Generate SQL from an expression with pretty printing enabled
1964 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1965 let mut config = self.get_config_for_expr(expr);
1966 config.pretty = true;
1967 let mut generator = Generator::with_config(config);
1968 generator.generate(expr)
1969 }
1970
1971 /// Generate SQL from an expression with source dialect info (for transpilation)
1972 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
1973 let mut config = self.get_config_for_expr(expr);
1974 config.source_dialect = Some(source);
1975 let mut generator = Generator::with_config(config);
1976 generator.generate(expr)
1977 }
1978
1979 /// Generate SQL from an expression with pretty printing and source dialect info
1980 pub fn generate_pretty_with_source(
1981 &self,
1982 expr: &Expression,
1983 source: DialectType,
1984 ) -> Result<String> {
1985 let mut config = self.get_config_for_expr(expr);
1986 config.pretty = true;
1987 config.source_dialect = Some(source);
1988 let mut generator = Generator::with_config(config);
1989 generator.generate(expr)
1990 }
1991
1992 /// Generate SQL from an expression with forced identifier quoting (identify=True)
1993 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
1994 let mut config = self.get_config_for_expr(expr);
1995 config.always_quote_identifiers = true;
1996 let mut generator = Generator::with_config(config);
1997 generator.generate(expr)
1998 }
1999
2000 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2001 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2002 let mut config = self.generator_config.clone();
2003 config.pretty = true;
2004 config.always_quote_identifiers = true;
2005 let mut generator = Generator::with_config(config);
2006 generator.generate(expr)
2007 }
2008
2009 /// Generate SQL from an expression with caller-specified config overrides
2010 pub fn generate_with_overrides(
2011 &self,
2012 expr: &Expression,
2013 overrides: impl FnOnce(&mut GeneratorConfig),
2014 ) -> Result<String> {
2015 let mut config = self.get_config_for_expr(expr);
2016 overrides(&mut config);
2017 let mut generator = Generator::with_config(config);
2018 generator.generate(expr)
2019 }
2020
2021 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2022 ///
2023 /// The transformation proceeds in two phases:
2024 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2025 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2026 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2027 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2028 ///
2029 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2030 /// and for identity transforms (normalizing SQL within the same dialect).
2031 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2032 // Apply preprocessing transforms based on dialect
2033 let preprocessed = self.preprocess(expr)?;
2034 // Then apply recursive transformation
2035 transform_recursive(preprocessed, &self.transformer)
2036 }
2037
2038 /// Apply dialect-specific preprocessing transforms
2039 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2040 // If a custom preprocess function is set, use it instead of the built-in logic
2041 if let Some(ref custom_preprocess) = self.custom_preprocess {
2042 return custom_preprocess(expr);
2043 }
2044
2045 #[cfg(any(
2046 feature = "dialect-mysql",
2047 feature = "dialect-postgresql",
2048 feature = "dialect-bigquery",
2049 feature = "dialect-snowflake",
2050 feature = "dialect-tsql",
2051 feature = "dialect-spark",
2052 feature = "dialect-databricks",
2053 feature = "dialect-hive",
2054 feature = "dialect-sqlite",
2055 feature = "dialect-trino",
2056 feature = "dialect-presto",
2057 feature = "dialect-duckdb",
2058 feature = "dialect-redshift",
2059 feature = "dialect-starrocks",
2060 feature = "dialect-oracle",
2061 feature = "dialect-clickhouse",
2062 ))]
2063 use crate::transforms;
2064
2065 match self.dialect_type {
2066 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2067 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2068 #[cfg(feature = "dialect-mysql")]
2069 DialectType::MySQL => {
2070 let expr = transforms::eliminate_qualify(expr)?;
2071 let expr = transforms::eliminate_full_outer_join(expr)?;
2072 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2073 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2074 Ok(expr)
2075 }
2076 // PostgreSQL doesn't support QUALIFY
2077 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2078 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2079 #[cfg(feature = "dialect-postgresql")]
2080 DialectType::PostgreSQL => {
2081 let expr = transforms::eliminate_qualify(expr)?;
2082 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2083 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2084 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2085 // Only normalize when sqlglot would fully parse (no body) —
2086 // sqlglot falls back to Command for complex function bodies,
2087 // preserving the original text including TO.
2088 let expr = if let Expression::CreateFunction(mut cf) = expr {
2089 if cf.body.is_none() {
2090 for opt in &mut cf.set_options {
2091 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2092 &mut opt.value
2093 {
2094 *use_to = false;
2095 }
2096 }
2097 }
2098 Expression::CreateFunction(cf)
2099 } else {
2100 expr
2101 };
2102 Ok(expr)
2103 }
2104 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2105 #[cfg(feature = "dialect-bigquery")]
2106 DialectType::BigQuery => {
2107 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2108 let expr = transforms::pushdown_cte_column_names(expr)?;
2109 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2110 Ok(expr)
2111 }
2112 // Snowflake
2113 #[cfg(feature = "dialect-snowflake")]
2114 DialectType::Snowflake => {
2115 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2116 let expr = transforms::eliminate_window_clause(expr)?;
2117 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2118 Ok(expr)
2119 }
2120 // TSQL doesn't support QUALIFY
2121 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2122 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2123 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2124 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2125 #[cfg(feature = "dialect-tsql")]
2126 DialectType::TSQL => {
2127 let expr = transforms::eliminate_qualify(expr)?;
2128 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2129 let expr = transforms::ensure_bools(expr)?;
2130 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2131 let expr = transforms::move_ctes_to_top_level(expr)?;
2132 let expr = transforms::qualify_derived_table_outputs(expr)?;
2133 Ok(expr)
2134 }
2135 // Spark doesn't support QUALIFY (but Databricks does)
2136 // Spark doesn't support CTEs in subqueries (hoist to top level)
2137 #[cfg(feature = "dialect-spark")]
2138 DialectType::Spark => {
2139 let expr = transforms::eliminate_qualify(expr)?;
2140 let expr = transforms::add_auto_table_alias(expr)?;
2141 let expr = transforms::simplify_nested_paren_values(expr)?;
2142 let expr = transforms::move_ctes_to_top_level(expr)?;
2143 Ok(expr)
2144 }
2145 // Databricks supports QUALIFY natively
2146 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2147 #[cfg(feature = "dialect-databricks")]
2148 DialectType::Databricks => {
2149 let expr = transforms::add_auto_table_alias(expr)?;
2150 let expr = transforms::simplify_nested_paren_values(expr)?;
2151 let expr = transforms::move_ctes_to_top_level(expr)?;
2152 Ok(expr)
2153 }
2154 // Hive doesn't support QUALIFY or CTEs in subqueries
2155 #[cfg(feature = "dialect-hive")]
2156 DialectType::Hive => {
2157 let expr = transforms::eliminate_qualify(expr)?;
2158 let expr = transforms::move_ctes_to_top_level(expr)?;
2159 Ok(expr)
2160 }
2161 // SQLite doesn't support QUALIFY
2162 #[cfg(feature = "dialect-sqlite")]
2163 DialectType::SQLite => {
2164 let expr = transforms::eliminate_qualify(expr)?;
2165 Ok(expr)
2166 }
2167 // Trino doesn't support QUALIFY
2168 #[cfg(feature = "dialect-trino")]
2169 DialectType::Trino => {
2170 let expr = transforms::eliminate_qualify(expr)?;
2171 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2172 Ok(expr)
2173 }
2174 // Presto doesn't support QUALIFY or WINDOW clause
2175 #[cfg(feature = "dialect-presto")]
2176 DialectType::Presto => {
2177 let expr = transforms::eliminate_qualify(expr)?;
2178 let expr = transforms::eliminate_window_clause(expr)?;
2179 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2180 Ok(expr)
2181 }
2182 // DuckDB supports QUALIFY - no elimination needed
2183 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2184 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2185 #[cfg(feature = "dialect-duckdb")]
2186 DialectType::DuckDB => {
2187 let expr = transforms::expand_posexplode_duckdb(expr)?;
2188 let expr = transforms::expand_like_any(expr)?;
2189 Ok(expr)
2190 }
2191 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2192 #[cfg(feature = "dialect-redshift")]
2193 DialectType::Redshift => {
2194 let expr = transforms::eliminate_qualify(expr)?;
2195 let expr = transforms::eliminate_window_clause(expr)?;
2196 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2197 Ok(expr)
2198 }
2199 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2200 #[cfg(feature = "dialect-starrocks")]
2201 DialectType::StarRocks => {
2202 let expr = transforms::eliminate_qualify(expr)?;
2203 let expr = transforms::expand_between_in_delete(expr)?;
2204 Ok(expr)
2205 }
2206 // DataFusion supports QUALIFY and semi/anti joins natively
2207 #[cfg(feature = "dialect-datafusion")]
2208 DialectType::DataFusion => Ok(expr),
2209 // Oracle doesn't support QUALIFY
2210 #[cfg(feature = "dialect-oracle")]
2211 DialectType::Oracle => {
2212 let expr = transforms::eliminate_qualify(expr)?;
2213 Ok(expr)
2214 }
2215 // Drill - no special preprocessing needed
2216 #[cfg(feature = "dialect-drill")]
2217 DialectType::Drill => Ok(expr),
2218 // Teradata - no special preprocessing needed
2219 #[cfg(feature = "dialect-teradata")]
2220 DialectType::Teradata => Ok(expr),
2221 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2222 #[cfg(feature = "dialect-clickhouse")]
2223 DialectType::ClickHouse => {
2224 let expr = transforms::no_limit_order_by_union(expr)?;
2225 Ok(expr)
2226 }
2227 // Other dialects - no preprocessing
2228 _ => Ok(expr),
2229 }
2230 }
2231
2232 /// Transpile SQL from this dialect to another
2233 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2234 self.transpile_to_inner(sql, target, false)
2235 }
2236
2237 /// Transpile SQL from this dialect to another with pretty printing enabled
2238 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2239 self.transpile_to_inner(sql, target, true)
2240 }
2241
2242 #[cfg(not(feature = "transpile"))]
2243 fn transpile_to_inner(
2244 &self,
2245 sql: &str,
2246 target: DialectType,
2247 pretty: bool,
2248 ) -> Result<Vec<String>> {
2249 // Without the transpile feature, only same-dialect or to/from generic is supported
2250 if self.dialect_type != target
2251 && self.dialect_type != DialectType::Generic
2252 && target != DialectType::Generic
2253 {
2254 return Err(crate::error::Error::parse(
2255 "Cross-dialect transpilation not available in this build",
2256 0,
2257 0,
2258 ));
2259 }
2260
2261 let expressions = self.parse(sql)?;
2262 let target_dialect = Dialect::get(target);
2263
2264 expressions
2265 .into_iter()
2266 .map(|expr| {
2267 let transformed = target_dialect.transform(expr)?;
2268 if pretty {
2269 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2270 } else {
2271 target_dialect.generate_with_source(&transformed, self.dialect_type)
2272 }
2273 })
2274 .collect()
2275 }
2276
2277 #[cfg(feature = "transpile")]
2278 fn transpile_to_inner(
2279 &self,
2280 sql: &str,
2281 target: DialectType,
2282 pretty: bool,
2283 ) -> Result<Vec<String>> {
2284 let expressions = self.parse(sql)?;
2285 let target_dialect = Dialect::get(target);
2286
2287 expressions
2288 .into_iter()
2289 .map(|expr| {
2290 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2291 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2292 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2293 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2294 use crate::expressions::DataType as DT;
2295 transform_recursive(expr, &|e| match e {
2296 Expression::DataType(DT::VarChar { .. }) => {
2297 Ok(Expression::DataType(DT::Text))
2298 }
2299 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2300 _ => Ok(e),
2301 })?
2302 } else {
2303 expr
2304 };
2305
2306 // When source and target differ, first normalize the source dialect's
2307 // AST constructs to standard SQL, so that the target dialect can handle them.
2308 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2309 let normalized =
2310 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2311 self.transform(expr)?
2312 } else {
2313 expr
2314 };
2315
2316 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2317 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2318 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2319 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2320 let normalized =
2321 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2322 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2323 {
2324 transform_recursive(normalized, &|e| {
2325 if let Expression::Function(ref f) = e {
2326 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2327 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2328 if let (
2329 Expression::Function(ref jq),
2330 Expression::Function(ref jv),
2331 ) = (&f.args[0], &f.args[1])
2332 {
2333 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2334 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2335 {
2336 // Unwrap: return just JSON_QUERY(...)
2337 return Ok(f.args[0].clone());
2338 }
2339 }
2340 }
2341 }
2342 Ok(e)
2343 })?
2344 } else {
2345 normalized
2346 };
2347
2348 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2349 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2350 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2351 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2352 && !matches!(target, DialectType::Snowflake)
2353 {
2354 transform_recursive(normalized, &|e| {
2355 if let Expression::Function(ref f) = e {
2356 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2357 return Ok(Expression::Localtime(Box::new(
2358 crate::expressions::Localtime { this: None },
2359 )));
2360 }
2361 }
2362 Ok(e)
2363 })?
2364 } else {
2365 normalized
2366 };
2367
2368 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2369 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2370 // transform. DuckDB requires the count argument to be BIGINT.
2371 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2372 && matches!(target, DialectType::DuckDB)
2373 {
2374 transform_recursive(normalized, &|e| {
2375 if let Expression::Function(ref f) = e {
2376 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2377 // Check if first arg is space string literal
2378 if let Expression::Literal(crate::expressions::Literal::String(
2379 ref s,
2380 )) = f.args[0]
2381 {
2382 if s == " " {
2383 // Wrap second arg in CAST(... AS BIGINT) if not already
2384 if !matches!(f.args[1], Expression::Cast(_)) {
2385 let mut new_args = f.args.clone();
2386 new_args[1] = Expression::Cast(Box::new(
2387 crate::expressions::Cast {
2388 this: new_args[1].clone(),
2389 to: crate::expressions::DataType::BigInt {
2390 length: None,
2391 },
2392 trailing_comments: Vec::new(),
2393 double_colon_syntax: false,
2394 format: None,
2395 default: None,
2396 },
2397 ));
2398 return Ok(Expression::Function(Box::new(
2399 crate::expressions::Function {
2400 name: f.name.clone(),
2401 args: new_args,
2402 distinct: f.distinct,
2403 trailing_comments: f.trailing_comments.clone(),
2404 use_bracket_syntax: f.use_bracket_syntax,
2405 no_parens: f.no_parens,
2406 quoted: f.quoted,
2407 },
2408 )));
2409 }
2410 }
2411 }
2412 }
2413 }
2414 Ok(e)
2415 })?
2416 } else {
2417 normalized
2418 };
2419
2420 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2421 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2422 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2423 && !matches!(target, DialectType::BigQuery)
2424 {
2425 crate::transforms::propagate_struct_field_names(normalized)?
2426 } else {
2427 normalized
2428 };
2429
2430 // Apply cross-dialect semantic normalizations
2431 let normalized =
2432 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2433
2434 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2435 // (SELECT UNNEST(..., max_depth => 2)) subquery
2436 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2437 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2438 && matches!(target, DialectType::DuckDB)
2439 {
2440 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2441 } else {
2442 normalized
2443 };
2444
2445 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2446 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2447 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2448 && matches!(
2449 target,
2450 DialectType::DuckDB
2451 | DialectType::Presto
2452 | DialectType::Trino
2453 | DialectType::Athena
2454 | DialectType::Spark
2455 | DialectType::Databricks
2456 ) {
2457 crate::transforms::unnest_alias_to_column_alias(normalized)?
2458 } else if matches!(self.dialect_type, DialectType::BigQuery)
2459 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2460 {
2461 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2462 // but don't convert alias format (no _t0 wrapper)
2463 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2464 // For Redshift: strip UNNEST when arg is a column reference path
2465 if matches!(target, DialectType::Redshift) {
2466 crate::transforms::strip_unnest_column_refs(result)?
2467 } else {
2468 result
2469 }
2470 } else {
2471 normalized
2472 };
2473
2474 // For Presto/Trino targets from PostgreSQL/Redshift source:
2475 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2476 let normalized = if matches!(
2477 self.dialect_type,
2478 DialectType::PostgreSQL | DialectType::Redshift
2479 ) && matches!(
2480 target,
2481 DialectType::Presto | DialectType::Trino | DialectType::Athena
2482 ) {
2483 crate::transforms::wrap_unnest_join_aliases(normalized)?
2484 } else {
2485 normalized
2486 };
2487
2488 // Eliminate DISTINCT ON with target-dialect awareness
2489 // This must happen after source transform (which may produce DISTINCT ON)
2490 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2491 let normalized =
2492 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2493
2494 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2495 let normalized = if matches!(target, DialectType::Snowflake) {
2496 Self::transform_generate_date_array_snowflake(normalized)?
2497 } else {
2498 normalized
2499 };
2500
2501 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2502 let normalized = if matches!(
2503 target,
2504 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2505 ) {
2506 crate::transforms::unnest_to_explode_select(normalized)?
2507 } else {
2508 normalized
2509 };
2510
2511 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2512 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2513 crate::transforms::no_limit_order_by_union(normalized)?
2514 } else {
2515 normalized
2516 };
2517
2518 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2519 // Python sqlglot does this in the TSQL generator, but we can't do it there
2520 // because it would break TSQL -> TSQL identity
2521 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2522 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2523 {
2524 transform_recursive(normalized, &|e| {
2525 if let Expression::Count(ref c) = e {
2526 // Build COUNT_BIG(...) as an AggregateFunction
2527 let args = if c.star {
2528 vec![Expression::Star(crate::expressions::Star {
2529 table: None,
2530 except: None,
2531 replace: None,
2532 rename: None,
2533 trailing_comments: Vec::new(),
2534 })]
2535 } else if let Some(ref this) = c.this {
2536 vec![this.clone()]
2537 } else {
2538 vec![]
2539 };
2540 Ok(Expression::AggregateFunction(Box::new(
2541 crate::expressions::AggregateFunction {
2542 name: "COUNT_BIG".to_string(),
2543 args,
2544 distinct: c.distinct,
2545 filter: c.filter.clone(),
2546 order_by: Vec::new(),
2547 limit: None,
2548 ignore_nulls: None,
2549 },
2550 )))
2551 } else {
2552 Ok(e)
2553 }
2554 })?
2555 } else {
2556 normalized
2557 };
2558
2559 let transformed = target_dialect.transform(normalized)?;
2560 let mut sql = if pretty {
2561 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2562 } else {
2563 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2564 };
2565
2566 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2567 if pretty && target == DialectType::Snowflake {
2568 sql = Self::normalize_snowflake_pretty(sql);
2569 }
2570
2571 Ok(sql)
2572 })
2573 .collect()
2574 }
2575}
2576
2577// Transpile-only methods: cross-dialect normalization and helpers
2578#[cfg(feature = "transpile")]
2579impl Dialect {
2580 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2581 /// Converts:
2582 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2583 /// To:
2584 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2585 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2586 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2587 use crate::expressions::*;
2588 transform_recursive(expr, &|e| {
2589 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2590 if let Expression::ArraySize(ref af) = e {
2591 if let Expression::Function(ref f) = af.this {
2592 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2593 let result = Self::convert_array_size_gda_snowflake(f)?;
2594 return Ok(result);
2595 }
2596 }
2597 }
2598
2599 let Expression::Select(mut sel) = e else {
2600 return Ok(e);
2601 };
2602
2603 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2604 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2605 let mut gda_join_idx: Option<usize> = None;
2606
2607 for (idx, join) in sel.joins.iter().enumerate() {
2608 // The join.this may be:
2609 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2610 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2611 let (unnest_ref, alias_name) = match &join.this {
2612 Expression::Unnest(ref unnest) => {
2613 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2614 (Some(unnest.as_ref()), alias)
2615 }
2616 Expression::Alias(ref a) => {
2617 if let Expression::Unnest(ref unnest) = a.this {
2618 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2619 } else {
2620 (None, None)
2621 }
2622 }
2623 _ => (None, None),
2624 };
2625
2626 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2627 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2628 if let Expression::Function(ref f) = unnest.this {
2629 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2630 let start_expr = f.args[0].clone();
2631 let end_expr = f.args[1].clone();
2632 let step = f.args.get(2).cloned();
2633
2634 // Extract unit from step interval
2635 let unit = if let Some(Expression::Interval(ref iv)) = step {
2636 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2637 Some(format!("{:?}", unit).to_uppercase())
2638 } else if let Some(ref this) = iv.this {
2639 // The interval may be stored as a string like "1 MONTH"
2640 if let Expression::Literal(Literal::String(ref s)) = this {
2641 let parts: Vec<&str> = s.split_whitespace().collect();
2642 if parts.len() == 2 {
2643 Some(parts[1].to_uppercase())
2644 } else if parts.len() == 1 {
2645 // Single word like "MONTH" or just "1"
2646 let upper = parts[0].to_uppercase();
2647 if matches!(
2648 upper.as_str(),
2649 "YEAR"
2650 | "QUARTER"
2651 | "MONTH"
2652 | "WEEK"
2653 | "DAY"
2654 | "HOUR"
2655 | "MINUTE"
2656 | "SECOND"
2657 ) {
2658 Some(upper)
2659 } else {
2660 None
2661 }
2662 } else {
2663 None
2664 }
2665 } else {
2666 None
2667 }
2668 } else {
2669 None
2670 }
2671 } else {
2672 None
2673 };
2674
2675 if let Some(unit_str) = unit {
2676 gda_info = Some((alias, start_expr, end_expr, unit_str));
2677 gda_join_idx = Some(idx);
2678 }
2679 }
2680 }
2681 }
2682 if gda_info.is_some() {
2683 break;
2684 }
2685 }
2686
2687 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2688 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2689 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2690 let result = Self::try_transform_from_gda_snowflake(sel);
2691 return result;
2692 };
2693 let join_idx = gda_join_idx.unwrap();
2694
2695 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2696 let datediff = Expression::Function(Box::new(Function::new(
2697 "DATEDIFF".to_string(),
2698 vec![
2699 Expression::Column(Column {
2700 name: Identifier::new(&unit_str),
2701 table: None,
2702 join_mark: false,
2703 trailing_comments: vec![],
2704 }),
2705 start_expr.clone(),
2706 end_expr.clone(),
2707 ],
2708 )));
2709 // (DATEDIFF(...) + 1 - 1) + 1
2710 let plus_one = Expression::Add(Box::new(BinaryOp {
2711 left: datediff,
2712 right: Expression::Literal(Literal::Number("1".to_string())),
2713 left_comments: vec![],
2714 operator_comments: vec![],
2715 trailing_comments: vec![],
2716 }));
2717 let minus_one = Expression::Sub(Box::new(BinaryOp {
2718 left: plus_one,
2719 right: Expression::Literal(Literal::Number("1".to_string())),
2720 left_comments: vec![],
2721 operator_comments: vec![],
2722 trailing_comments: vec![],
2723 }));
2724 let paren_inner = Expression::Paren(Box::new(Paren {
2725 this: minus_one,
2726 trailing_comments: vec![],
2727 }));
2728 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2729 left: paren_inner,
2730 right: Expression::Literal(Literal::Number("1".to_string())),
2731 left_comments: vec![],
2732 operator_comments: vec![],
2733 trailing_comments: vec![],
2734 }));
2735
2736 let array_gen_range = Expression::Function(Box::new(Function::new(
2737 "ARRAY_GENERATE_RANGE".to_string(),
2738 vec![
2739 Expression::Literal(Literal::Number("0".to_string())),
2740 outer_plus_one,
2741 ],
2742 )));
2743
2744 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2745 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2746 name: Identifier::new("INPUT"),
2747 value: array_gen_range,
2748 separator: crate::expressions::NamedArgSeparator::DArrow,
2749 }));
2750 let flatten = Expression::Function(Box::new(Function::new(
2751 "FLATTEN".to_string(),
2752 vec![flatten_input],
2753 )));
2754
2755 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2756 let alias_table = Alias {
2757 this: flatten,
2758 alias: Identifier::new("_t0"),
2759 column_aliases: vec![
2760 Identifier::new("seq"),
2761 Identifier::new("key"),
2762 Identifier::new("path"),
2763 Identifier::new("index"),
2764 Identifier::new(&alias_name),
2765 Identifier::new("this"),
2766 ],
2767 pre_alias_comments: vec![],
2768 trailing_comments: vec![],
2769 };
2770 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2771 this: Box::new(Expression::Alias(Box::new(alias_table))),
2772 view: None,
2773 outer: None,
2774 alias: None,
2775 alias_quoted: false,
2776 cross_apply: None,
2777 ordinality: None,
2778 column_aliases: vec![],
2779 }));
2780
2781 // Remove the original join and add to FROM expressions
2782 sel.joins.remove(join_idx);
2783 if let Some(ref mut from) = sel.from {
2784 from.expressions.push(lateral_expr);
2785 }
2786
2787 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2788 let dateadd_expr = Expression::Function(Box::new(Function::new(
2789 "DATEADD".to_string(),
2790 vec![
2791 Expression::Column(Column {
2792 name: Identifier::new(&unit_str),
2793 table: None,
2794 join_mark: false,
2795 trailing_comments: vec![],
2796 }),
2797 Expression::Cast(Box::new(Cast {
2798 this: Expression::Column(Column {
2799 name: Identifier::new(&alias_name),
2800 table: None,
2801 join_mark: false,
2802 trailing_comments: vec![],
2803 }),
2804 to: DataType::Int {
2805 length: None,
2806 integer_spelling: false,
2807 },
2808 trailing_comments: vec![],
2809 double_colon_syntax: false,
2810 format: None,
2811 default: None,
2812 })),
2813 Expression::Cast(Box::new(Cast {
2814 this: start_expr.clone(),
2815 to: DataType::Date,
2816 trailing_comments: vec![],
2817 double_colon_syntax: false,
2818 format: None,
2819 default: None,
2820 })),
2821 ],
2822 )));
2823
2824 // Replace references to the alias in the SELECT list
2825 let new_exprs: Vec<Expression> = sel
2826 .expressions
2827 .iter()
2828 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2829 .collect();
2830 sel.expressions = new_exprs;
2831
2832 Ok(Expression::Select(sel))
2833 })
2834 }
2835
2836 /// Helper: replace column references to `alias_name` with dateadd expression
2837 fn replace_column_ref_with_dateadd(
2838 expr: &Expression,
2839 alias_name: &str,
2840 dateadd: &Expression,
2841 ) -> Expression {
2842 use crate::expressions::*;
2843 match expr {
2844 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2845 // Plain column reference -> DATEADD(...) AS alias_name
2846 Expression::Alias(Box::new(Alias {
2847 this: dateadd.clone(),
2848 alias: Identifier::new(alias_name),
2849 column_aliases: vec![],
2850 pre_alias_comments: vec![],
2851 trailing_comments: vec![],
2852 }))
2853 }
2854 Expression::Alias(a) => {
2855 // Check if the inner expression references the alias
2856 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2857 Expression::Alias(Box::new(Alias {
2858 this: new_this,
2859 alias: a.alias.clone(),
2860 column_aliases: a.column_aliases.clone(),
2861 pre_alias_comments: a.pre_alias_comments.clone(),
2862 trailing_comments: a.trailing_comments.clone(),
2863 }))
2864 }
2865 _ => expr.clone(),
2866 }
2867 }
2868
2869 /// Helper: replace column references in inner expression (not top-level)
2870 fn replace_column_ref_inner(
2871 expr: &Expression,
2872 alias_name: &str,
2873 dateadd: &Expression,
2874 ) -> Expression {
2875 use crate::expressions::*;
2876 match expr {
2877 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2878 dateadd.clone()
2879 }
2880 Expression::Add(op) => {
2881 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2882 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2883 Expression::Add(Box::new(BinaryOp {
2884 left,
2885 right,
2886 left_comments: op.left_comments.clone(),
2887 operator_comments: op.operator_comments.clone(),
2888 trailing_comments: op.trailing_comments.clone(),
2889 }))
2890 }
2891 Expression::Sub(op) => {
2892 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2893 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2894 Expression::Sub(Box::new(BinaryOp {
2895 left,
2896 right,
2897 left_comments: op.left_comments.clone(),
2898 operator_comments: op.operator_comments.clone(),
2899 trailing_comments: op.trailing_comments.clone(),
2900 }))
2901 }
2902 Expression::Mul(op) => {
2903 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2904 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2905 Expression::Mul(Box::new(BinaryOp {
2906 left,
2907 right,
2908 left_comments: op.left_comments.clone(),
2909 operator_comments: op.operator_comments.clone(),
2910 trailing_comments: op.trailing_comments.clone(),
2911 }))
2912 }
2913 _ => expr.clone(),
2914 }
2915 }
2916
2917 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
2918 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
2919 fn try_transform_from_gda_snowflake(
2920 mut sel: Box<crate::expressions::Select>,
2921 ) -> Result<Expression> {
2922 use crate::expressions::*;
2923
2924 // Extract GDA info from FROM clause
2925 let mut gda_info: Option<(
2926 usize,
2927 String,
2928 Expression,
2929 Expression,
2930 String,
2931 Option<(String, Vec<Identifier>)>,
2932 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
2933
2934 if let Some(ref from) = sel.from {
2935 for (idx, table_expr) in from.expressions.iter().enumerate() {
2936 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
2937 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
2938 let (unnest_opt, outer_alias_info) = match table_expr {
2939 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
2940 Expression::Alias(ref a) => {
2941 if let Expression::Unnest(ref unnest) = a.this {
2942 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
2943 (Some(unnest.as_ref()), Some(alias_info))
2944 } else {
2945 (None, None)
2946 }
2947 }
2948 _ => (None, None),
2949 };
2950
2951 if let Some(unnest) = unnest_opt {
2952 // Check for GENERATE_DATE_ARRAY function
2953 let func_opt = match &unnest.this {
2954 Expression::Function(ref f)
2955 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
2956 && f.args.len() >= 2 =>
2957 {
2958 Some(f)
2959 }
2960 // Also check for GenerateSeries (from earlier normalization)
2961 _ => None,
2962 };
2963
2964 if let Some(f) = func_opt {
2965 let start_expr = f.args[0].clone();
2966 let end_expr = f.args[1].clone();
2967 let step = f.args.get(2).cloned();
2968
2969 // Extract unit and column name
2970 let unit = Self::extract_interval_unit_str(&step);
2971 let col_name = outer_alias_info
2972 .as_ref()
2973 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
2974 .unwrap_or_else(|| "value".to_string());
2975
2976 if let Some(unit_str) = unit {
2977 gda_info = Some((
2978 idx,
2979 col_name,
2980 start_expr,
2981 end_expr,
2982 unit_str,
2983 outer_alias_info,
2984 ));
2985 break;
2986 }
2987 }
2988 }
2989 }
2990 }
2991
2992 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
2993 else {
2994 return Ok(Expression::Select(sel));
2995 };
2996
2997 // Build the Snowflake subquery:
2998 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
2999 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3000
3001 // DATEDIFF(unit, start, end)
3002 let datediff = Expression::Function(Box::new(Function::new(
3003 "DATEDIFF".to_string(),
3004 vec![
3005 Expression::Column(Column {
3006 name: Identifier::new(&unit_str),
3007 table: None,
3008 join_mark: false,
3009 trailing_comments: vec![],
3010 }),
3011 start_expr.clone(),
3012 end_expr.clone(),
3013 ],
3014 )));
3015 // (DATEDIFF(...) + 1 - 1) + 1
3016 let plus_one = Expression::Add(Box::new(BinaryOp {
3017 left: datediff,
3018 right: Expression::Literal(Literal::Number("1".to_string())),
3019 left_comments: vec![],
3020 operator_comments: vec![],
3021 trailing_comments: vec![],
3022 }));
3023 let minus_one = Expression::Sub(Box::new(BinaryOp {
3024 left: plus_one,
3025 right: Expression::Literal(Literal::Number("1".to_string())),
3026 left_comments: vec![],
3027 operator_comments: vec![],
3028 trailing_comments: vec![],
3029 }));
3030 let paren_inner = Expression::Paren(Box::new(Paren {
3031 this: minus_one,
3032 trailing_comments: vec![],
3033 }));
3034 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3035 left: paren_inner,
3036 right: Expression::Literal(Literal::Number("1".to_string())),
3037 left_comments: vec![],
3038 operator_comments: vec![],
3039 trailing_comments: vec![],
3040 }));
3041
3042 let array_gen_range = Expression::Function(Box::new(Function::new(
3043 "ARRAY_GENERATE_RANGE".to_string(),
3044 vec![
3045 Expression::Literal(Literal::Number("0".to_string())),
3046 outer_plus_one,
3047 ],
3048 )));
3049
3050 // TABLE(FLATTEN(INPUT => ...))
3051 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3052 name: Identifier::new("INPUT"),
3053 value: array_gen_range,
3054 separator: crate::expressions::NamedArgSeparator::DArrow,
3055 }));
3056 let flatten = Expression::Function(Box::new(Function::new(
3057 "FLATTEN".to_string(),
3058 vec![flatten_input],
3059 )));
3060
3061 // Determine alias name for the table: use outer alias or _t0
3062 let table_alias_name = outer_alias_info
3063 .as_ref()
3064 .map(|(name, _)| name.clone())
3065 .unwrap_or_else(|| "_t0".to_string());
3066
3067 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3068 let table_func =
3069 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3070 let flatten_aliased = Expression::Alias(Box::new(Alias {
3071 this: table_func,
3072 alias: Identifier::new(&table_alias_name),
3073 column_aliases: vec![
3074 Identifier::new("seq"),
3075 Identifier::new("key"),
3076 Identifier::new("path"),
3077 Identifier::new("index"),
3078 Identifier::new(&col_name),
3079 Identifier::new("this"),
3080 ],
3081 pre_alias_comments: vec![],
3082 trailing_comments: vec![],
3083 }));
3084
3085 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3086 let dateadd_expr = Expression::Function(Box::new(Function::new(
3087 "DATEADD".to_string(),
3088 vec![
3089 Expression::Column(Column {
3090 name: Identifier::new(&unit_str),
3091 table: None,
3092 join_mark: false,
3093 trailing_comments: vec![],
3094 }),
3095 Expression::Cast(Box::new(Cast {
3096 this: Expression::Column(Column {
3097 name: Identifier::new(&col_name),
3098 table: None,
3099 join_mark: false,
3100 trailing_comments: vec![],
3101 }),
3102 to: DataType::Int {
3103 length: None,
3104 integer_spelling: false,
3105 },
3106 trailing_comments: vec![],
3107 double_colon_syntax: false,
3108 format: None,
3109 default: None,
3110 })),
3111 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3112 start_expr.clone(),
3113 ],
3114 )));
3115 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3116 this: dateadd_expr,
3117 alias: Identifier::new(&col_name),
3118 column_aliases: vec![],
3119 pre_alias_comments: vec![],
3120 trailing_comments: vec![],
3121 }));
3122
3123 // Build inner SELECT
3124 let mut inner_select = Select::new();
3125 inner_select.expressions = vec![dateadd_aliased];
3126 inner_select.from = Some(From {
3127 expressions: vec![flatten_aliased],
3128 });
3129
3130 let inner_select_expr = Expression::Select(Box::new(inner_select));
3131 let subquery = Expression::Subquery(Box::new(Subquery {
3132 this: inner_select_expr,
3133 alias: None,
3134 column_aliases: vec![],
3135 order_by: None,
3136 limit: None,
3137 offset: None,
3138 distribute_by: None,
3139 sort_by: None,
3140 cluster_by: None,
3141 lateral: false,
3142 modifiers_inside: false,
3143 trailing_comments: vec![],
3144 }));
3145
3146 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3147 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3148 Expression::Alias(Box::new(Alias {
3149 this: subquery,
3150 alias: Identifier::new(&alias_name),
3151 column_aliases: col_aliases,
3152 pre_alias_comments: vec![],
3153 trailing_comments: vec![],
3154 }))
3155 } else {
3156 subquery
3157 };
3158
3159 // Replace the FROM expression
3160 if let Some(ref mut from) = sel.from {
3161 from.expressions[from_idx] = replacement;
3162 }
3163
3164 Ok(Expression::Select(sel))
3165 }
3166
3167 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3168 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3169 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3170 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3171 use crate::expressions::*;
3172
3173 let start_expr = f.args[0].clone();
3174 let end_expr = f.args[1].clone();
3175 let step = f.args.get(2).cloned();
3176 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3177 let col_name = "value";
3178
3179 // Build the inner subquery: same as try_transform_from_gda_snowflake
3180 let datediff = Expression::Function(Box::new(Function::new(
3181 "DATEDIFF".to_string(),
3182 vec![
3183 Expression::Column(Column {
3184 name: Identifier::new(&unit_str),
3185 table: None,
3186 join_mark: false,
3187 trailing_comments: vec![],
3188 }),
3189 start_expr.clone(),
3190 end_expr.clone(),
3191 ],
3192 )));
3193 let plus_one = Expression::Add(Box::new(BinaryOp {
3194 left: datediff,
3195 right: Expression::Literal(Literal::Number("1".to_string())),
3196 left_comments: vec![],
3197 operator_comments: vec![],
3198 trailing_comments: vec![],
3199 }));
3200 let minus_one = Expression::Sub(Box::new(BinaryOp {
3201 left: plus_one,
3202 right: Expression::Literal(Literal::Number("1".to_string())),
3203 left_comments: vec![],
3204 operator_comments: vec![],
3205 trailing_comments: vec![],
3206 }));
3207 let paren_inner = Expression::Paren(Box::new(Paren {
3208 this: minus_one,
3209 trailing_comments: vec![],
3210 }));
3211 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3212 left: paren_inner,
3213 right: Expression::Literal(Literal::Number("1".to_string())),
3214 left_comments: vec![],
3215 operator_comments: vec![],
3216 trailing_comments: vec![],
3217 }));
3218
3219 let array_gen_range = Expression::Function(Box::new(Function::new(
3220 "ARRAY_GENERATE_RANGE".to_string(),
3221 vec![
3222 Expression::Literal(Literal::Number("0".to_string())),
3223 outer_plus_one,
3224 ],
3225 )));
3226
3227 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3228 name: Identifier::new("INPUT"),
3229 value: array_gen_range,
3230 separator: crate::expressions::NamedArgSeparator::DArrow,
3231 }));
3232 let flatten = Expression::Function(Box::new(Function::new(
3233 "FLATTEN".to_string(),
3234 vec![flatten_input],
3235 )));
3236
3237 let table_func =
3238 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3239 let flatten_aliased = Expression::Alias(Box::new(Alias {
3240 this: table_func,
3241 alias: Identifier::new("_t0"),
3242 column_aliases: vec![
3243 Identifier::new("seq"),
3244 Identifier::new("key"),
3245 Identifier::new("path"),
3246 Identifier::new("index"),
3247 Identifier::new(col_name),
3248 Identifier::new("this"),
3249 ],
3250 pre_alias_comments: vec![],
3251 trailing_comments: vec![],
3252 }));
3253
3254 let dateadd_expr = Expression::Function(Box::new(Function::new(
3255 "DATEADD".to_string(),
3256 vec![
3257 Expression::Column(Column {
3258 name: Identifier::new(&unit_str),
3259 table: None,
3260 join_mark: false,
3261 trailing_comments: vec![],
3262 }),
3263 Expression::Cast(Box::new(Cast {
3264 this: Expression::Column(Column {
3265 name: Identifier::new(col_name),
3266 table: None,
3267 join_mark: false,
3268 trailing_comments: vec![],
3269 }),
3270 to: DataType::Int {
3271 length: None,
3272 integer_spelling: false,
3273 },
3274 trailing_comments: vec![],
3275 double_colon_syntax: false,
3276 format: None,
3277 default: None,
3278 })),
3279 start_expr.clone(),
3280 ],
3281 )));
3282 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3283 this: dateadd_expr,
3284 alias: Identifier::new(col_name),
3285 column_aliases: vec![],
3286 pre_alias_comments: vec![],
3287 trailing_comments: vec![],
3288 }));
3289
3290 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3291 let mut inner_select = Select::new();
3292 inner_select.expressions = vec![dateadd_aliased];
3293 inner_select.from = Some(From {
3294 expressions: vec![flatten_aliased],
3295 });
3296
3297 // Wrap in subquery for the inner part
3298 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3299 this: Expression::Select(Box::new(inner_select)),
3300 alias: None,
3301 column_aliases: vec![],
3302 order_by: None,
3303 limit: None,
3304 offset: None,
3305 distribute_by: None,
3306 sort_by: None,
3307 cluster_by: None,
3308 lateral: false,
3309 modifiers_inside: false,
3310 trailing_comments: vec![],
3311 }));
3312
3313 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3314 let star = Expression::Star(Star {
3315 table: None,
3316 except: None,
3317 replace: None,
3318 rename: None,
3319 trailing_comments: vec![],
3320 });
3321 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3322 this: star,
3323 distinct: false,
3324 filter: None,
3325 order_by: vec![],
3326 name: Some("ARRAY_AGG".to_string()),
3327 ignore_nulls: None,
3328 having_max: None,
3329 limit: None,
3330 }));
3331
3332 let mut outer_select = Select::new();
3333 outer_select.expressions = vec![array_agg];
3334 outer_select.from = Some(From {
3335 expressions: vec![inner_subquery],
3336 });
3337
3338 // Wrap in a subquery
3339 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3340 this: Expression::Select(Box::new(outer_select)),
3341 alias: None,
3342 column_aliases: vec![],
3343 order_by: None,
3344 limit: None,
3345 offset: None,
3346 distribute_by: None,
3347 sort_by: None,
3348 cluster_by: None,
3349 lateral: false,
3350 modifiers_inside: false,
3351 trailing_comments: vec![],
3352 }));
3353
3354 // ARRAY_SIZE(subquery)
3355 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3356 outer_subquery,
3357 ))))
3358 }
3359
3360 /// Extract interval unit string from an optional step expression.
3361 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3362 use crate::expressions::*;
3363 if let Some(Expression::Interval(ref iv)) = step {
3364 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3365 return Some(format!("{:?}", unit).to_uppercase());
3366 }
3367 if let Some(ref this) = iv.this {
3368 if let Expression::Literal(Literal::String(ref s)) = this {
3369 let parts: Vec<&str> = s.split_whitespace().collect();
3370 if parts.len() == 2 {
3371 return Some(parts[1].to_uppercase());
3372 } else if parts.len() == 1 {
3373 let upper = parts[0].to_uppercase();
3374 if matches!(
3375 upper.as_str(),
3376 "YEAR"
3377 | "QUARTER"
3378 | "MONTH"
3379 | "WEEK"
3380 | "DAY"
3381 | "HOUR"
3382 | "MINUTE"
3383 | "SECOND"
3384 ) {
3385 return Some(upper);
3386 }
3387 }
3388 }
3389 }
3390 }
3391 // Default to DAY if no step or no interval
3392 if step.is_none() {
3393 return Some("DAY".to_string());
3394 }
3395 None
3396 }
3397
3398 fn normalize_snowflake_pretty(mut sql: String) -> String {
3399 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3400 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3401 {
3402 sql = sql.replace(
3403 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3404 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3405 );
3406
3407 sql = sql.replace(
3408 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3409 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3410 );
3411
3412 sql = sql.replace(
3413 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3414 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3415 );
3416 }
3417
3418 sql
3419 }
3420
3421 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3422 /// This handles cases where the same syntax has different semantics across dialects.
3423 fn cross_dialect_normalize(
3424 expr: Expression,
3425 source: DialectType,
3426 target: DialectType,
3427 ) -> Result<Expression> {
3428 use crate::expressions::{
3429 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3430 Function, Identifier, IsNull, Literal, Null, Paren,
3431 };
3432
3433 // Helper to tag which kind of transform to apply
3434 #[derive(Debug)]
3435 enum Action {
3436 None,
3437 GreatestLeastNull,
3438 ArrayGenerateRange,
3439 Div0TypedDivision,
3440 ArrayAggCollectList,
3441 ArrayAggWithinGroupFilter,
3442 ArrayAggFilter,
3443 CastTimestampToDatetime,
3444 DateTruncWrapCast,
3445 ToDateToCast,
3446 ConvertTimezoneToExpr,
3447 SetToVariable,
3448 RegexpReplaceSnowflakeToDuckDB,
3449 BigQueryFunctionNormalize,
3450 BigQuerySafeDivide,
3451 BigQueryCastType,
3452 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3453 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3454 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3455 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3456 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3457 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3458 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3459 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3460 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3461 EpochConvert, // Expression::Epoch -> target-specific epoch function
3462 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3463 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3464 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3465 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3466 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3467 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3468 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3469 TempTableHash, // TSQL #table -> temp table normalization
3470 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3471 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3472 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3473 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3474 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3475 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3476 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3477 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3478 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3479 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3480 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3481 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3482 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3483 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3484 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3485 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3486 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3487 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3488 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3489 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3490 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3491 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3492 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3493 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3494 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3495 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3496 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3497 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3498 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3499 DollarParamConvert, // $foo -> @foo for BigQuery
3500 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3501 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3502 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3503 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3504 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3505 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3506 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3507 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3508 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3509 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3510 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3511 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3512 RespectNullsConvert, // RESPECT NULLS window function handling
3513 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3514 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3515 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3516 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3517 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3518 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3519 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3520 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3521 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3522 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3523 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3524 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3525 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3526 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3527 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3528 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3529 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3530 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3531 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3532 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3533 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3534 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3535 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3536 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3537 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3538 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3539 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3540 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3541 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3542 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3543 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3544 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3545 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3546 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3547 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3548 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3549 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3550 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3551 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3552 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3553 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3554 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3555 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3556 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3557 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3558 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3559 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3560 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3561 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3562 ArraySumConvert, // ARRAY_SUM -> target-specific
3563 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3564 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3565 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3566 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3567 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3568 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3569 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3570 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3571 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3572 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3573 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3574 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3575 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3576 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3577 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3578 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3579 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3580 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3581 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3582 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3583 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3584 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3585 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3586 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3587 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3588 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3589 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3590 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3591 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3592 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3593 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3594 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3595 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3596 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3597 }
3598
3599 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3600 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3601 Self::transform_select_into(expr, source, target)
3602 } else {
3603 expr
3604 };
3605
3606 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3607 let expr = if !matches!(
3608 target,
3609 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3610 ) {
3611 if let Expression::Select(mut select) = expr {
3612 if let Some(ref mut offset) = select.offset {
3613 offset.rows = None;
3614 }
3615 Expression::Select(select)
3616 } else {
3617 expr
3618 }
3619 } else {
3620 expr
3621 };
3622
3623 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3624 let expr = if matches!(target, DialectType::Oracle) {
3625 if let Expression::Select(mut select) = expr {
3626 if let Some(limit) = select.limit.take() {
3627 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3628 select.fetch = Some(crate::expressions::Fetch {
3629 direction: "FIRST".to_string(),
3630 count: Some(limit.this),
3631 percent: false,
3632 rows: true,
3633 with_ties: false,
3634 });
3635 }
3636 // Add ROWS to OFFSET if present
3637 if let Some(ref mut offset) = select.offset {
3638 offset.rows = Some(true);
3639 }
3640 Expression::Select(select)
3641 } else {
3642 expr
3643 }
3644 } else {
3645 expr
3646 };
3647
3648 // Handle CreateTable WITH properties transformation before recursive transforms
3649 let expr = if let Expression::CreateTable(mut ct) = expr {
3650 Self::transform_create_table_properties(&mut ct, source, target);
3651
3652 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3653 // When the PARTITIONED BY clause contains column definitions, merge them into the
3654 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3655 if matches!(
3656 source,
3657 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3658 ) {
3659 let mut partition_col_names: Vec<String> = Vec::new();
3660 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3661 let mut has_col_def_partitions = false;
3662
3663 // Check if any PARTITIONED BY property contains ColumnDef expressions
3664 for prop in &ct.properties {
3665 if let Expression::PartitionedByProperty(ref pbp) = prop {
3666 if let Expression::Tuple(ref tuple) = *pbp.this {
3667 for expr in &tuple.expressions {
3668 if let Expression::ColumnDef(ref cd) = expr {
3669 has_col_def_partitions = true;
3670 partition_col_names.push(cd.name.name.clone());
3671 partition_col_defs.push(*cd.clone());
3672 }
3673 }
3674 }
3675 }
3676 }
3677
3678 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3679 // Merge partition columns into main column list
3680 for cd in partition_col_defs {
3681 ct.columns.push(cd);
3682 }
3683
3684 // Replace PARTITIONED BY property with column-name-only version
3685 ct.properties
3686 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3687
3688 if matches!(
3689 target,
3690 DialectType::Presto | DialectType::Trino | DialectType::Athena
3691 ) {
3692 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3693 let array_elements: Vec<String> = partition_col_names
3694 .iter()
3695 .map(|n| format!("'{}'", n))
3696 .collect();
3697 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3698 ct.with_properties
3699 .push(("PARTITIONED_BY".to_string(), array_value));
3700 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3701 // Spark: PARTITIONED BY (y, z) - just column names
3702 let name_exprs: Vec<Expression> = partition_col_names
3703 .iter()
3704 .map(|n| {
3705 Expression::Column(crate::expressions::Column {
3706 name: crate::expressions::Identifier::new(n.clone()),
3707 table: None,
3708 join_mark: false,
3709 trailing_comments: Vec::new(),
3710 })
3711 })
3712 .collect();
3713 ct.properties.insert(
3714 0,
3715 Expression::PartitionedByProperty(Box::new(
3716 crate::expressions::PartitionedByProperty {
3717 this: Box::new(Expression::Tuple(Box::new(
3718 crate::expressions::Tuple {
3719 expressions: name_exprs,
3720 },
3721 ))),
3722 },
3723 )),
3724 );
3725 }
3726 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3727 }
3728
3729 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3730 // are handled by transform_create_table_properties which runs first
3731 }
3732
3733 // Strip LOCATION property for Presto/Trino (not supported)
3734 if matches!(
3735 target,
3736 DialectType::Presto | DialectType::Trino | DialectType::Athena
3737 ) {
3738 ct.properties
3739 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3740 }
3741
3742 // Strip table-level constraints for Spark/Hive/Databricks
3743 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3744 if matches!(
3745 target,
3746 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3747 ) {
3748 ct.constraints.retain(|c| {
3749 matches!(
3750 c,
3751 crate::expressions::TableConstraint::PrimaryKey { .. }
3752 | crate::expressions::TableConstraint::Like { .. }
3753 )
3754 });
3755 for constraint in &mut ct.constraints {
3756 if let crate::expressions::TableConstraint::PrimaryKey {
3757 columns,
3758 modifiers,
3759 ..
3760 } = constraint
3761 {
3762 // Strip ASC/DESC from column names
3763 for col in columns.iter_mut() {
3764 if col.name.ends_with(" ASC") {
3765 col.name = col.name[..col.name.len() - 4].to_string();
3766 } else if col.name.ends_with(" DESC") {
3767 col.name = col.name[..col.name.len() - 5].to_string();
3768 }
3769 }
3770 // Strip TSQL-specific modifiers
3771 modifiers.clustered = None;
3772 modifiers.with_options.clear();
3773 modifiers.on_filegroup = None;
3774 }
3775 }
3776 }
3777
3778 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3779 if matches!(target, DialectType::Databricks) {
3780 for col in &mut ct.columns {
3781 if col.auto_increment {
3782 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3783 col.data_type = crate::expressions::DataType::BigInt { length: None };
3784 }
3785 }
3786 }
3787 }
3788
3789 // Spark/Databricks: INTEGER -> INT in column definitions
3790 // Python sqlglot always outputs INT for Spark/Databricks
3791 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3792 for col in &mut ct.columns {
3793 if let crate::expressions::DataType::Int {
3794 integer_spelling, ..
3795 } = &mut col.data_type
3796 {
3797 *integer_spelling = false;
3798 }
3799 }
3800 }
3801
3802 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3803 if matches!(target, DialectType::Hive | DialectType::Spark) {
3804 for col in &mut ct.columns {
3805 // If nullable is explicitly true (NULL), change to None (omit it)
3806 if col.nullable == Some(true) {
3807 col.nullable = None;
3808 }
3809 // Also remove from constraints if stored there
3810 col.constraints
3811 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3812 }
3813 }
3814
3815 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3816 if ct.on_property.is_some()
3817 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3818 {
3819 ct.on_property = None;
3820 }
3821
3822 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3823 // Snowflake doesn't support typed arrays in DDL
3824 if matches!(target, DialectType::Snowflake) {
3825 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3826 if let crate::expressions::DataType::Array { .. } = dt {
3827 *dt = crate::expressions::DataType::Custom {
3828 name: "ARRAY".to_string(),
3829 };
3830 }
3831 }
3832 for col in &mut ct.columns {
3833 strip_array_type_params(&mut col.data_type);
3834 }
3835 }
3836
3837 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3838 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3839 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3840 if matches!(target, DialectType::PostgreSQL) {
3841 for col in &mut ct.columns {
3842 if col.auto_increment && !col.constraint_order.is_empty() {
3843 use crate::expressions::ConstraintType;
3844 let has_explicit_not_null = col
3845 .constraint_order
3846 .iter()
3847 .any(|ct| *ct == ConstraintType::NotNull);
3848
3849 if has_explicit_not_null {
3850 // Source had explicit NOT NULL - preserve original order
3851 // Just ensure nullable is set
3852 if col.nullable != Some(false) {
3853 col.nullable = Some(false);
3854 }
3855 } else {
3856 // Source didn't have explicit NOT NULL - build order with
3857 // AutoIncrement + NotNull first, then remaining constraints
3858 let mut new_order = Vec::new();
3859 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3860 new_order.push(ConstraintType::AutoIncrement);
3861 new_order.push(ConstraintType::NotNull);
3862 // Add remaining constraints in original order (except AutoIncrement)
3863 for ct_type in &col.constraint_order {
3864 if *ct_type != ConstraintType::AutoIncrement {
3865 new_order.push(ct_type.clone());
3866 }
3867 }
3868 col.constraint_order = new_order;
3869 col.nullable = Some(false);
3870 }
3871 }
3872 }
3873 }
3874
3875 Expression::CreateTable(ct)
3876 } else {
3877 expr
3878 };
3879
3880 // Handle CreateView column stripping for Presto/Trino target
3881 let expr = if let Expression::CreateView(mut cv) = expr {
3882 // Presto/Trino: drop column list when view has a SELECT body
3883 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
3884 {
3885 if !matches!(&cv.query, Expression::Null(_)) {
3886 cv.columns.clear();
3887 }
3888 }
3889 Expression::CreateView(cv)
3890 } else {
3891 expr
3892 };
3893
3894 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
3895 let expr = if !matches!(
3896 target,
3897 DialectType::Presto | DialectType::Trino | DialectType::Athena
3898 ) {
3899 if let Expression::Select(mut select) = expr {
3900 if let Some(ref mut with) = select.with {
3901 for cte in &mut with.ctes {
3902 if let Expression::Values(ref vals) = cte.this {
3903 // Build: SELECT * FROM (VALUES ...) AS _values
3904 let values_subquery =
3905 Expression::Subquery(Box::new(crate::expressions::Subquery {
3906 this: Expression::Values(vals.clone()),
3907 alias: Some(Identifier::new("_values".to_string())),
3908 column_aliases: Vec::new(),
3909 order_by: None,
3910 limit: None,
3911 offset: None,
3912 distribute_by: None,
3913 sort_by: None,
3914 cluster_by: None,
3915 lateral: false,
3916 modifiers_inside: false,
3917 trailing_comments: Vec::new(),
3918 }));
3919 let mut new_select = crate::expressions::Select::new();
3920 new_select.expressions =
3921 vec![Expression::Star(crate::expressions::Star {
3922 table: None,
3923 except: None,
3924 replace: None,
3925 rename: None,
3926 trailing_comments: Vec::new(),
3927 })];
3928 new_select.from = Some(crate::expressions::From {
3929 expressions: vec![values_subquery],
3930 });
3931 cte.this = Expression::Select(Box::new(new_select));
3932 }
3933 }
3934 }
3935 Expression::Select(select)
3936 } else {
3937 expr
3938 }
3939 } else {
3940 expr
3941 };
3942
3943 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
3944 let expr = if matches!(target, DialectType::PostgreSQL) {
3945 if let Expression::CreateIndex(mut ci) = expr {
3946 for col in &mut ci.columns {
3947 if col.nulls_first.is_none() {
3948 col.nulls_first = Some(true);
3949 }
3950 }
3951 Expression::CreateIndex(ci)
3952 } else {
3953 expr
3954 }
3955 } else {
3956 expr
3957 };
3958
3959 transform_recursive(expr, &|e| {
3960 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
3961 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
3962 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
3963 if let Expression::Cast(ref c) = e {
3964 // Check if this is a CAST of an array to a struct array type
3965 let is_struct_array_cast =
3966 matches!(&c.to, crate::expressions::DataType::Array { .. });
3967 if is_struct_array_cast {
3968 let has_auto_named_structs = match &c.this {
3969 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
3970 if let Expression::Struct(s) = elem {
3971 s.fields.iter().all(|(name, _)| {
3972 name.as_ref().map_or(true, |n| {
3973 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
3974 })
3975 })
3976 } else {
3977 false
3978 }
3979 }),
3980 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
3981 if let Expression::Struct(s) = elem {
3982 s.fields.iter().all(|(name, _)| {
3983 name.as_ref().map_or(true, |n| {
3984 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
3985 })
3986 })
3987 } else {
3988 false
3989 }
3990 }),
3991 _ => false,
3992 };
3993 if has_auto_named_structs {
3994 let convert_struct_to_row = |elem: Expression| -> Expression {
3995 if let Expression::Struct(s) = elem {
3996 let row_args: Vec<Expression> =
3997 s.fields.into_iter().map(|(_, v)| v).collect();
3998 Expression::Function(Box::new(Function::new(
3999 "ROW".to_string(),
4000 row_args,
4001 )))
4002 } else {
4003 elem
4004 }
4005 };
4006 let mut c_clone = c.as_ref().clone();
4007 match &mut c_clone.this {
4008 Expression::Array(arr) => {
4009 arr.expressions = arr
4010 .expressions
4011 .drain(..)
4012 .map(convert_struct_to_row)
4013 .collect();
4014 }
4015 Expression::ArrayFunc(arr) => {
4016 arr.expressions = arr
4017 .expressions
4018 .drain(..)
4019 .map(convert_struct_to_row)
4020 .collect();
4021 }
4022 _ => {}
4023 }
4024 return Ok(Expression::Cast(Box::new(c_clone)));
4025 }
4026 }
4027 }
4028 }
4029
4030 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4031 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4032 if let Expression::Select(ref sel) = e {
4033 if sel.kind.as_deref() == Some("STRUCT") {
4034 let mut fields = Vec::new();
4035 for expr in &sel.expressions {
4036 match expr {
4037 Expression::Alias(a) => {
4038 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4039 }
4040 Expression::Column(c) => {
4041 fields.push((Some(c.name.name.clone()), expr.clone()));
4042 }
4043 _ => {
4044 fields.push((None, expr.clone()));
4045 }
4046 }
4047 }
4048 let struct_lit =
4049 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4050 let mut new_select = sel.as_ref().clone();
4051 new_select.kind = None;
4052 new_select.expressions = vec![struct_lit];
4053 return Ok(Expression::Select(Box::new(new_select)));
4054 }
4055 }
4056 }
4057
4058 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4059 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4060 && matches!(
4061 target,
4062 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4063 )
4064 {
4065 if let Expression::Parameter(ref p) = e {
4066 if p.style == crate::expressions::ParameterStyle::At {
4067 if let Some(ref name) = p.name {
4068 return Ok(Expression::Parameter(Box::new(
4069 crate::expressions::Parameter {
4070 name: Some(name.clone()),
4071 index: p.index,
4072 style: crate::expressions::ParameterStyle::DollarBrace,
4073 quoted: p.quoted,
4074 string_quoted: p.string_quoted,
4075 expression: None,
4076 },
4077 )));
4078 }
4079 }
4080 }
4081 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4082 if let Expression::Column(ref col) = e {
4083 if col.name.name.starts_with('@') && col.table.is_none() {
4084 let var_name = col.name.name.trim_start_matches('@').to_string();
4085 return Ok(Expression::Parameter(Box::new(
4086 crate::expressions::Parameter {
4087 name: Some(var_name),
4088 index: None,
4089 style: crate::expressions::ParameterStyle::DollarBrace,
4090 quoted: false,
4091 string_quoted: false,
4092 expression: None,
4093 },
4094 )));
4095 }
4096 }
4097 }
4098
4099 // Convert @variable -> variable in SET statements for Spark/Databricks
4100 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4101 && matches!(target, DialectType::Spark | DialectType::Databricks)
4102 {
4103 if let Expression::SetStatement(ref s) = e {
4104 let mut new_items = s.items.clone();
4105 let mut changed = false;
4106 for item in &mut new_items {
4107 // Strip @ from the SET name (Parameter style)
4108 if let Expression::Parameter(ref p) = item.name {
4109 if p.style == crate::expressions::ParameterStyle::At {
4110 if let Some(ref name) = p.name {
4111 item.name = Expression::Identifier(Identifier::new(name));
4112 changed = true;
4113 }
4114 }
4115 }
4116 // Strip @ from the SET name (Identifier style - SET parser)
4117 if let Expression::Identifier(ref id) = item.name {
4118 if id.name.starts_with('@') {
4119 let var_name = id.name.trim_start_matches('@').to_string();
4120 item.name = Expression::Identifier(Identifier::new(&var_name));
4121 changed = true;
4122 }
4123 }
4124 // Strip @ from the SET name (Column style - alternative parsing)
4125 if let Expression::Column(ref col) = item.name {
4126 if col.name.name.starts_with('@') && col.table.is_none() {
4127 let var_name = col.name.name.trim_start_matches('@').to_string();
4128 item.name = Expression::Identifier(Identifier::new(&var_name));
4129 changed = true;
4130 }
4131 }
4132 }
4133 if changed {
4134 let mut new_set = (**s).clone();
4135 new_set.items = new_items;
4136 return Ok(Expression::SetStatement(Box::new(new_set)));
4137 }
4138 }
4139 }
4140
4141 // Strip NOLOCK hint for non-TSQL targets
4142 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4143 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4144 {
4145 if let Expression::Table(ref tr) = e {
4146 if !tr.hints.is_empty() {
4147 let mut new_tr = tr.clone();
4148 new_tr.hints.clear();
4149 return Ok(Expression::Table(new_tr));
4150 }
4151 }
4152 }
4153
4154 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4155 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4156 if matches!(target, DialectType::Snowflake) {
4157 if let Expression::IsTrue(ref itf) = e {
4158 if let Expression::Boolean(ref b) = itf.this {
4159 if !itf.not {
4160 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4161 value: b.value,
4162 }));
4163 } else {
4164 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4165 value: !b.value,
4166 }));
4167 }
4168 }
4169 }
4170 if let Expression::IsFalse(ref itf) = e {
4171 if let Expression::Boolean(ref b) = itf.this {
4172 if !itf.not {
4173 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4174 value: !b.value,
4175 }));
4176 } else {
4177 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4178 value: b.value,
4179 }));
4180 }
4181 }
4182 }
4183 }
4184
4185 // BigQuery: split dotted backtick identifiers in table names
4186 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4187 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4188 if let Expression::CreateTable(ref ct) = e {
4189 let mut changed = false;
4190 let mut new_ct = ct.clone();
4191 // Split the table name
4192 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4193 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4194 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4195 let was_quoted = ct.name.name.quoted;
4196 let mk_id = |s: &str| {
4197 if was_quoted {
4198 Identifier::quoted(s)
4199 } else {
4200 Identifier::new(s)
4201 }
4202 };
4203 if parts.len() == 3 {
4204 new_ct.name.catalog = Some(mk_id(parts[0]));
4205 new_ct.name.schema = Some(mk_id(parts[1]));
4206 new_ct.name.name = mk_id(parts[2]);
4207 changed = true;
4208 } else if parts.len() == 2 {
4209 new_ct.name.schema = Some(mk_id(parts[0]));
4210 new_ct.name.name = mk_id(parts[1]);
4211 changed = true;
4212 }
4213 }
4214 // Split the clone source name
4215 if let Some(ref clone_src) = ct.clone_source {
4216 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4217 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4218 let was_quoted = clone_src.name.quoted;
4219 let mk_id = |s: &str| {
4220 if was_quoted {
4221 Identifier::quoted(s)
4222 } else {
4223 Identifier::new(s)
4224 }
4225 };
4226 let mut new_src = clone_src.clone();
4227 if parts.len() == 3 {
4228 new_src.catalog = Some(mk_id(parts[0]));
4229 new_src.schema = Some(mk_id(parts[1]));
4230 new_src.name = mk_id(parts[2]);
4231 new_ct.clone_source = Some(new_src);
4232 changed = true;
4233 } else if parts.len() == 2 {
4234 new_src.schema = Some(mk_id(parts[0]));
4235 new_src.name = mk_id(parts[1]);
4236 new_ct.clone_source = Some(new_src);
4237 changed = true;
4238 }
4239 }
4240 }
4241 if changed {
4242 return Ok(Expression::CreateTable(new_ct));
4243 }
4244 }
4245 }
4246
4247 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4248 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4249 if matches!(source, DialectType::BigQuery)
4250 && matches!(
4251 target,
4252 DialectType::DuckDB
4253 | DialectType::Presto
4254 | DialectType::Trino
4255 | DialectType::Athena
4256 )
4257 {
4258 if let Expression::Subscript(ref sub) = e {
4259 let (new_index, is_safe) = match &sub.index {
4260 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4261 Expression::Literal(Literal::Number(n)) => {
4262 if let Ok(val) = n.parse::<i64>() {
4263 (
4264 Some(Expression::Literal(Literal::Number(
4265 (val + 1).to_string(),
4266 ))),
4267 false,
4268 )
4269 } else {
4270 (None, false)
4271 }
4272 }
4273 // OFFSET(n) -> n+1 (0-based)
4274 Expression::Function(ref f)
4275 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4276 {
4277 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4278 if let Ok(val) = n.parse::<i64>() {
4279 (
4280 Some(Expression::Literal(Literal::Number(
4281 (val + 1).to_string(),
4282 ))),
4283 false,
4284 )
4285 } else {
4286 (
4287 Some(Expression::Add(Box::new(
4288 crate::expressions::BinaryOp::new(
4289 f.args[0].clone(),
4290 Expression::number(1),
4291 ),
4292 ))),
4293 false,
4294 )
4295 }
4296 } else {
4297 (
4298 Some(Expression::Add(Box::new(
4299 crate::expressions::BinaryOp::new(
4300 f.args[0].clone(),
4301 Expression::number(1),
4302 ),
4303 ))),
4304 false,
4305 )
4306 }
4307 }
4308 // ORDINAL(n) -> n (already 1-based)
4309 Expression::Function(ref f)
4310 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4311 {
4312 (Some(f.args[0].clone()), false)
4313 }
4314 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4315 Expression::Function(ref f)
4316 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4317 {
4318 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4319 if let Ok(val) = n.parse::<i64>() {
4320 (
4321 Some(Expression::Literal(Literal::Number(
4322 (val + 1).to_string(),
4323 ))),
4324 true,
4325 )
4326 } else {
4327 (
4328 Some(Expression::Add(Box::new(
4329 crate::expressions::BinaryOp::new(
4330 f.args[0].clone(),
4331 Expression::number(1),
4332 ),
4333 ))),
4334 true,
4335 )
4336 }
4337 } else {
4338 (
4339 Some(Expression::Add(Box::new(
4340 crate::expressions::BinaryOp::new(
4341 f.args[0].clone(),
4342 Expression::number(1),
4343 ),
4344 ))),
4345 true,
4346 )
4347 }
4348 }
4349 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4350 Expression::Function(ref f)
4351 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4352 {
4353 (Some(f.args[0].clone()), true)
4354 }
4355 _ => (None, false),
4356 };
4357 if let Some(idx) = new_index {
4358 if is_safe
4359 && matches!(
4360 target,
4361 DialectType::Presto | DialectType::Trino | DialectType::Athena
4362 )
4363 {
4364 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4365 return Ok(Expression::Function(Box::new(Function::new(
4366 "ELEMENT_AT".to_string(),
4367 vec![sub.this.clone(), idx],
4368 ))));
4369 } else {
4370 // DuckDB or non-safe: just use subscript with converted index
4371 return Ok(Expression::Subscript(Box::new(
4372 crate::expressions::Subscript {
4373 this: sub.this.clone(),
4374 index: idx,
4375 },
4376 )));
4377 }
4378 }
4379 }
4380 }
4381
4382 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4383 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4384 if let Expression::Length(ref uf) = e {
4385 let arg = uf.this.clone();
4386 let typeof_func = Expression::Function(Box::new(Function::new(
4387 "TYPEOF".to_string(),
4388 vec![arg.clone()],
4389 )));
4390 let blob_cast = Expression::Cast(Box::new(Cast {
4391 this: arg.clone(),
4392 to: DataType::VarBinary { length: None },
4393 trailing_comments: vec![],
4394 double_colon_syntax: false,
4395 format: None,
4396 default: None,
4397 }));
4398 let octet_length = Expression::Function(Box::new(Function::new(
4399 "OCTET_LENGTH".to_string(),
4400 vec![blob_cast],
4401 )));
4402 let text_cast = Expression::Cast(Box::new(Cast {
4403 this: arg,
4404 to: DataType::Text,
4405 trailing_comments: vec![],
4406 double_colon_syntax: false,
4407 format: None,
4408 default: None,
4409 }));
4410 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4411 this: text_cast,
4412 original_name: None,
4413 }));
4414 return Ok(Expression::Case(Box::new(Case {
4415 operand: Some(typeof_func),
4416 whens: vec![(
4417 Expression::Literal(Literal::String("BLOB".to_string())),
4418 octet_length,
4419 )],
4420 else_: Some(length_text),
4421 comments: Vec::new(),
4422 })));
4423 }
4424 }
4425
4426 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4427 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4428 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4429 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4430 if let Expression::Alias(ref a) = e {
4431 if matches!(&a.this, Expression::Unnest(_)) {
4432 if a.column_aliases.is_empty() {
4433 // Drop the entire alias, return just the UNNEST expression
4434 return Ok(a.this.clone());
4435 } else {
4436 // Use first column alias as the main alias
4437 let mut new_alias = a.as_ref().clone();
4438 new_alias.alias = a.column_aliases[0].clone();
4439 new_alias.column_aliases.clear();
4440 return Ok(Expression::Alias(Box::new(new_alias)));
4441 }
4442 }
4443 }
4444 }
4445
4446 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4447 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4448 if let Expression::In(ref in_expr) = e {
4449 if let Some(ref unnest_inner) = in_expr.unnest {
4450 // Build the function call for the target dialect
4451 let func_expr = if matches!(
4452 target,
4453 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4454 ) {
4455 // Use EXPLODE for Hive/Spark
4456 Expression::Function(Box::new(Function::new(
4457 "EXPLODE".to_string(),
4458 vec![*unnest_inner.clone()],
4459 )))
4460 } else {
4461 // Use UNNEST for Presto/Trino/DuckDB/etc.
4462 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4463 this: *unnest_inner.clone(),
4464 expressions: Vec::new(),
4465 with_ordinality: false,
4466 alias: None,
4467 offset_alias: None,
4468 }))
4469 };
4470
4471 // Wrap in SELECT
4472 let mut inner_select = crate::expressions::Select::new();
4473 inner_select.expressions = vec![func_expr];
4474
4475 let subquery_expr = Expression::Select(Box::new(inner_select));
4476
4477 return Ok(Expression::In(Box::new(crate::expressions::In {
4478 this: in_expr.this.clone(),
4479 expressions: Vec::new(),
4480 query: Some(subquery_expr),
4481 not: in_expr.not,
4482 global: in_expr.global,
4483 unnest: None,
4484 is_field: false,
4485 })));
4486 }
4487 }
4488 }
4489
4490 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4491 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4492 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4493 if let Expression::Alias(ref a) = e {
4494 if let Expression::Function(ref f) = a.this {
4495 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4496 && !a.column_aliases.is_empty()
4497 {
4498 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4499 let col_alias = a.column_aliases[0].clone();
4500 let mut inner_select = crate::expressions::Select::new();
4501 inner_select.expressions =
4502 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4503 Expression::Identifier(Identifier::new("value".to_string())),
4504 col_alias,
4505 )))];
4506 inner_select.from = Some(crate::expressions::From {
4507 expressions: vec![a.this.clone()],
4508 });
4509 let subquery =
4510 Expression::Subquery(Box::new(crate::expressions::Subquery {
4511 this: Expression::Select(Box::new(inner_select)),
4512 alias: Some(a.alias.clone()),
4513 column_aliases: Vec::new(),
4514 order_by: None,
4515 limit: None,
4516 offset: None,
4517 lateral: false,
4518 modifiers_inside: false,
4519 trailing_comments: Vec::new(),
4520 distribute_by: None,
4521 sort_by: None,
4522 cluster_by: None,
4523 }));
4524 return Ok(subquery);
4525 }
4526 }
4527 }
4528 }
4529
4530 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4531 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4532 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4533 if matches!(source, DialectType::BigQuery) {
4534 if let Expression::Select(ref s) = e {
4535 if let Some(ref from) = s.from {
4536 if from.expressions.len() >= 2 {
4537 // Collect table names from first expression
4538 let first_tables: Vec<String> = from
4539 .expressions
4540 .iter()
4541 .take(1)
4542 .filter_map(|expr| {
4543 if let Expression::Table(t) = expr {
4544 Some(t.name.name.to_lowercase())
4545 } else {
4546 None
4547 }
4548 })
4549 .collect();
4550
4551 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4552 // or have a dotted name matching a table
4553 let mut needs_rewrite = false;
4554 for expr in from.expressions.iter().skip(1) {
4555 if let Expression::Table(t) = expr {
4556 if let Some(ref schema) = t.schema {
4557 if first_tables.contains(&schema.name.to_lowercase()) {
4558 needs_rewrite = true;
4559 break;
4560 }
4561 }
4562 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4563 if t.schema.is_none() && t.name.name.contains('.') {
4564 let parts: Vec<&str> = t.name.name.split('.').collect();
4565 if parts.len() >= 2
4566 && first_tables.contains(&parts[0].to_lowercase())
4567 {
4568 needs_rewrite = true;
4569 break;
4570 }
4571 }
4572 }
4573 }
4574
4575 if needs_rewrite {
4576 let mut new_select = s.clone();
4577 let mut new_from_exprs = vec![from.expressions[0].clone()];
4578 let mut new_joins = s.joins.clone();
4579
4580 for expr in from.expressions.iter().skip(1) {
4581 if let Expression::Table(ref t) = expr {
4582 if let Some(ref schema) = t.schema {
4583 if first_tables.contains(&schema.name.to_lowercase()) {
4584 // This is an array path reference, convert to CROSS JOIN UNNEST
4585 let col_expr = Expression::Column(
4586 crate::expressions::Column {
4587 name: t.name.clone(),
4588 table: Some(schema.clone()),
4589 join_mark: false,
4590 trailing_comments: vec![],
4591 },
4592 );
4593 let unnest_expr = Expression::Unnest(Box::new(
4594 crate::expressions::UnnestFunc {
4595 this: col_expr,
4596 expressions: Vec::new(),
4597 with_ordinality: false,
4598 alias: None,
4599 offset_alias: None,
4600 },
4601 ));
4602 let join_this = if let Some(ref alias) = t.alias {
4603 if matches!(
4604 target,
4605 DialectType::Presto
4606 | DialectType::Trino
4607 | DialectType::Athena
4608 ) {
4609 // Presto: UNNEST(x) AS _t0(results)
4610 Expression::Alias(Box::new(
4611 crate::expressions::Alias {
4612 this: unnest_expr,
4613 alias: Identifier::new("_t0"),
4614 column_aliases: vec![alias.clone()],
4615 pre_alias_comments: vec![],
4616 trailing_comments: vec![],
4617 },
4618 ))
4619 } else {
4620 // BigQuery: UNNEST(x) AS results
4621 Expression::Alias(Box::new(
4622 crate::expressions::Alias {
4623 this: unnest_expr,
4624 alias: alias.clone(),
4625 column_aliases: vec![],
4626 pre_alias_comments: vec![],
4627 trailing_comments: vec![],
4628 },
4629 ))
4630 }
4631 } else {
4632 unnest_expr
4633 };
4634 new_joins.push(crate::expressions::Join {
4635 kind: crate::expressions::JoinKind::Cross,
4636 this: join_this,
4637 on: None,
4638 using: Vec::new(),
4639 use_inner_keyword: false,
4640 use_outer_keyword: false,
4641 deferred_condition: false,
4642 join_hint: None,
4643 match_condition: None,
4644 pivots: Vec::new(),
4645 comments: Vec::new(),
4646 nesting_group: 0,
4647 directed: false,
4648 });
4649 } else {
4650 new_from_exprs.push(expr.clone());
4651 }
4652 } else if t.schema.is_none() && t.name.name.contains('.') {
4653 // Dotted name in quoted identifier: `Coordinates.position`
4654 let parts: Vec<&str> = t.name.name.split('.').collect();
4655 if parts.len() >= 2
4656 && first_tables.contains(&parts[0].to_lowercase())
4657 {
4658 let join_this =
4659 if matches!(target, DialectType::BigQuery) {
4660 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4661 Expression::Table(t.clone())
4662 } else {
4663 // Other targets: split into "schema"."name"
4664 let mut new_t = t.clone();
4665 new_t.schema =
4666 Some(Identifier::quoted(parts[0]));
4667 new_t.name = Identifier::quoted(parts[1]);
4668 Expression::Table(new_t)
4669 };
4670 new_joins.push(crate::expressions::Join {
4671 kind: crate::expressions::JoinKind::Cross,
4672 this: join_this,
4673 on: None,
4674 using: Vec::new(),
4675 use_inner_keyword: false,
4676 use_outer_keyword: false,
4677 deferred_condition: false,
4678 join_hint: None,
4679 match_condition: None,
4680 pivots: Vec::new(),
4681 comments: Vec::new(),
4682 nesting_group: 0,
4683 directed: false,
4684 });
4685 } else {
4686 new_from_exprs.push(expr.clone());
4687 }
4688 } else {
4689 new_from_exprs.push(expr.clone());
4690 }
4691 } else {
4692 new_from_exprs.push(expr.clone());
4693 }
4694 }
4695
4696 new_select.from = Some(crate::expressions::From {
4697 expressions: new_from_exprs,
4698 ..from.clone()
4699 });
4700 new_select.joins = new_joins;
4701 return Ok(Expression::Select(new_select));
4702 }
4703 }
4704 }
4705 }
4706 }
4707
4708 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4709 if matches!(
4710 target,
4711 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4712 ) {
4713 if let Expression::Select(ref s) = e {
4714 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4715 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4716 matches!(expr, Expression::Unnest(_))
4717 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4718 };
4719 let has_unnest_join = s.joins.iter().any(|j| {
4720 j.kind == crate::expressions::JoinKind::Cross && (
4721 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4722 || is_unnest_or_explode_expr(&j.this)
4723 )
4724 });
4725 if has_unnest_join {
4726 let mut select = s.clone();
4727 let mut new_joins = Vec::new();
4728 for join in select.joins.drain(..) {
4729 if join.kind == crate::expressions::JoinKind::Cross {
4730 // Extract the UNNEST/EXPLODE from the join
4731 let (func_expr, table_alias, col_aliases) = match &join.this {
4732 Expression::Alias(a) => {
4733 let ta = if a.alias.is_empty() {
4734 None
4735 } else {
4736 Some(a.alias.clone())
4737 };
4738 let cas = a.column_aliases.clone();
4739 match &a.this {
4740 Expression::Unnest(u) => {
4741 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4742 if !u.expressions.is_empty() {
4743 let mut all_args = vec![u.this.clone()];
4744 all_args.extend(u.expressions.clone());
4745 let arrays_zip =
4746 Expression::Function(Box::new(
4747 crate::expressions::Function::new(
4748 "ARRAYS_ZIP".to_string(),
4749 all_args,
4750 ),
4751 ));
4752 let inline = Expression::Function(Box::new(
4753 crate::expressions::Function::new(
4754 "INLINE".to_string(),
4755 vec![arrays_zip],
4756 ),
4757 ));
4758 (Some(inline), ta, a.column_aliases.clone())
4759 } else {
4760 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4761 let func_name = if u.with_ordinality {
4762 "POSEXPLODE"
4763 } else {
4764 "EXPLODE"
4765 };
4766 let explode = Expression::Function(Box::new(
4767 crate::expressions::Function::new(
4768 func_name.to_string(),
4769 vec![u.this.clone()],
4770 ),
4771 ));
4772 // For POSEXPLODE, add 'pos' to column aliases
4773 let cas = if u.with_ordinality {
4774 let mut pos_aliases =
4775 vec![Identifier::new(
4776 "pos".to_string(),
4777 )];
4778 pos_aliases
4779 .extend(a.column_aliases.clone());
4780 pos_aliases
4781 } else {
4782 a.column_aliases.clone()
4783 };
4784 (Some(explode), ta, cas)
4785 }
4786 }
4787 Expression::Function(f)
4788 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4789 {
4790 (Some(Expression::Function(f.clone())), ta, cas)
4791 }
4792 _ => (None, None, Vec::new()),
4793 }
4794 }
4795 Expression::Unnest(u) => {
4796 let func_name = if u.with_ordinality {
4797 "POSEXPLODE"
4798 } else {
4799 "EXPLODE"
4800 };
4801 let explode = Expression::Function(Box::new(
4802 crate::expressions::Function::new(
4803 func_name.to_string(),
4804 vec![u.this.clone()],
4805 ),
4806 ));
4807 let ta = u.alias.clone();
4808 let col_aliases = if u.with_ordinality {
4809 vec![Identifier::new("pos".to_string())]
4810 } else {
4811 Vec::new()
4812 };
4813 (Some(explode), ta, col_aliases)
4814 }
4815 _ => (None, None, Vec::new()),
4816 };
4817 if let Some(func) = func_expr {
4818 select.lateral_views.push(crate::expressions::LateralView {
4819 this: func,
4820 table_alias,
4821 column_aliases: col_aliases,
4822 outer: false,
4823 });
4824 } else {
4825 new_joins.push(join);
4826 }
4827 } else {
4828 new_joins.push(join);
4829 }
4830 }
4831 select.joins = new_joins;
4832 return Ok(Expression::Select(select));
4833 }
4834 }
4835 }
4836
4837 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4838 // for BigQuery, Presto/Trino, Snowflake
4839 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4840 && matches!(
4841 target,
4842 DialectType::BigQuery
4843 | DialectType::Presto
4844 | DialectType::Trino
4845 | DialectType::Snowflake
4846 )
4847 {
4848 if let Expression::Select(ref s) = e {
4849 // Check if any SELECT expressions contain UNNEST
4850 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4851 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4852 fn contains_unnest(e: &Expression) -> bool {
4853 match e {
4854 Expression::Unnest(_) => true,
4855 Expression::Function(f)
4856 if f.name.eq_ignore_ascii_case("UNNEST") =>
4857 {
4858 true
4859 }
4860 Expression::Alias(a) => contains_unnest(&a.this),
4861 Expression::Add(op)
4862 | Expression::Sub(op)
4863 | Expression::Mul(op)
4864 | Expression::Div(op) => {
4865 contains_unnest(&op.left) || contains_unnest(&op.right)
4866 }
4867 _ => false,
4868 }
4869 }
4870 contains_unnest(expr)
4871 });
4872
4873 if has_unnest_in_select {
4874 let rewritten = Self::rewrite_unnest_expansion(s, target);
4875 if let Some(new_select) = rewritten {
4876 return Ok(Expression::Select(Box::new(new_select)));
4877 }
4878 }
4879 }
4880 }
4881
4882 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
4883 // BigQuery '\n' -> PostgreSQL literal newline in string
4884 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
4885 {
4886 if let Expression::Literal(Literal::String(ref s)) = e {
4887 if s.contains("\\n")
4888 || s.contains("\\t")
4889 || s.contains("\\r")
4890 || s.contains("\\\\")
4891 {
4892 let converted = s
4893 .replace("\\n", "\n")
4894 .replace("\\t", "\t")
4895 .replace("\\r", "\r")
4896 .replace("\\\\", "\\");
4897 return Ok(Expression::Literal(Literal::String(converted)));
4898 }
4899 }
4900 }
4901
4902 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
4903 // when source != target (identity tests keep the Literal::Timestamp for native handling)
4904 if source != target {
4905 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
4906 let s = s.clone();
4907 // MySQL: TIMESTAMP handling depends on source dialect
4908 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
4909 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
4910 if matches!(target, DialectType::MySQL) {
4911 if matches!(source, DialectType::BigQuery) {
4912 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
4913 return Ok(Expression::Function(Box::new(Function::new(
4914 "TIMESTAMP".to_string(),
4915 vec![Expression::Literal(Literal::String(s))],
4916 ))));
4917 } else {
4918 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
4919 return Ok(Expression::Cast(Box::new(Cast {
4920 this: Expression::Literal(Literal::String(s)),
4921 to: DataType::Custom {
4922 name: "DATETIME".to_string(),
4923 },
4924 trailing_comments: Vec::new(),
4925 double_colon_syntax: false,
4926 format: None,
4927 default: None,
4928 })));
4929 }
4930 }
4931 let dt = match target {
4932 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
4933 name: "DATETIME".to_string(),
4934 },
4935 DialectType::Snowflake => {
4936 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
4937 if matches!(source, DialectType::BigQuery) {
4938 DataType::Custom {
4939 name: "TIMESTAMPTZ".to_string(),
4940 }
4941 } else if matches!(
4942 source,
4943 DialectType::PostgreSQL
4944 | DialectType::Redshift
4945 | DialectType::Snowflake
4946 ) {
4947 DataType::Timestamp {
4948 precision: None,
4949 timezone: false,
4950 }
4951 } else {
4952 DataType::Custom {
4953 name: "TIMESTAMPNTZ".to_string(),
4954 }
4955 }
4956 }
4957 DialectType::Spark | DialectType::Databricks => {
4958 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
4959 if matches!(source, DialectType::BigQuery) {
4960 DataType::Timestamp {
4961 precision: None,
4962 timezone: false,
4963 }
4964 } else {
4965 DataType::Custom {
4966 name: "TIMESTAMP_NTZ".to_string(),
4967 }
4968 }
4969 }
4970 DialectType::ClickHouse => DataType::Nullable {
4971 inner: Box::new(DataType::Custom {
4972 name: "DateTime".to_string(),
4973 }),
4974 },
4975 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
4976 name: "DATETIME2".to_string(),
4977 },
4978 DialectType::DuckDB => {
4979 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
4980 // or when the timestamp string explicitly has timezone info
4981 if matches!(source, DialectType::BigQuery)
4982 || Self::timestamp_string_has_timezone(&s)
4983 {
4984 DataType::Custom {
4985 name: "TIMESTAMPTZ".to_string(),
4986 }
4987 } else {
4988 DataType::Timestamp {
4989 precision: None,
4990 timezone: false,
4991 }
4992 }
4993 }
4994 _ => DataType::Timestamp {
4995 precision: None,
4996 timezone: false,
4997 },
4998 };
4999 return Ok(Expression::Cast(Box::new(Cast {
5000 this: Expression::Literal(Literal::String(s)),
5001 to: dt,
5002 trailing_comments: vec![],
5003 double_colon_syntax: false,
5004 format: None,
5005 default: None,
5006 })));
5007 }
5008 }
5009
5010 // PostgreSQL DELETE requires explicit AS for table aliases
5011 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5012 if let Expression::Delete(ref del) = e {
5013 if del.alias.is_some() && !del.alias_explicit_as {
5014 let mut new_del = del.clone();
5015 new_del.alias_explicit_as = true;
5016 return Ok(Expression::Delete(new_del));
5017 }
5018 }
5019 }
5020
5021 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5022 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5023 // while others don't support it (Presto, Spark, DuckDB, etc.)
5024 {
5025 let needs_distinct =
5026 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5027 let drop_distinct = matches!(
5028 target,
5029 DialectType::Presto
5030 | DialectType::Trino
5031 | DialectType::Athena
5032 | DialectType::Spark
5033 | DialectType::Databricks
5034 | DialectType::DuckDB
5035 | DialectType::Hive
5036 | DialectType::MySQL
5037 | DialectType::PostgreSQL
5038 | DialectType::SQLite
5039 | DialectType::TSQL
5040 | DialectType::Redshift
5041 | DialectType::Snowflake
5042 | DialectType::Oracle
5043 | DialectType::Teradata
5044 | DialectType::Drill
5045 | DialectType::Doris
5046 | DialectType::StarRocks
5047 );
5048 match &e {
5049 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5050 let mut new_u = (**u).clone();
5051 new_u.distinct = true;
5052 return Ok(Expression::Union(Box::new(new_u)));
5053 }
5054 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5055 let mut new_i = (**i).clone();
5056 new_i.distinct = true;
5057 return Ok(Expression::Intersect(Box::new(new_i)));
5058 }
5059 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5060 let mut new_ex = (**ex).clone();
5061 new_ex.distinct = true;
5062 return Ok(Expression::Except(Box::new(new_ex)));
5063 }
5064 Expression::Union(u) if u.distinct && drop_distinct => {
5065 let mut new_u = (**u).clone();
5066 new_u.distinct = false;
5067 return Ok(Expression::Union(Box::new(new_u)));
5068 }
5069 Expression::Intersect(i) if i.distinct && drop_distinct => {
5070 let mut new_i = (**i).clone();
5071 new_i.distinct = false;
5072 return Ok(Expression::Intersect(Box::new(new_i)));
5073 }
5074 Expression::Except(ex) if ex.distinct && drop_distinct => {
5075 let mut new_ex = (**ex).clone();
5076 new_ex.distinct = false;
5077 return Ok(Expression::Except(Box::new(new_ex)));
5078 }
5079 _ => {}
5080 }
5081 }
5082
5083 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5084 if matches!(target, DialectType::ClickHouse) {
5085 if let Expression::Function(ref f) = e {
5086 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5087 let mut new_f = f.as_ref().clone();
5088 new_f.name = "map".to_string();
5089 return Ok(Expression::Function(Box::new(new_f)));
5090 }
5091 }
5092 }
5093
5094 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5095 if matches!(target, DialectType::ClickHouse) {
5096 if let Expression::Intersect(ref i) = e {
5097 if i.all {
5098 let mut new_i = (**i).clone();
5099 new_i.all = false;
5100 return Ok(Expression::Intersect(Box::new(new_i)));
5101 }
5102 }
5103 }
5104
5105 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5106 // Only from Generic source, to prevent double-wrapping
5107 if matches!(source, DialectType::Generic) {
5108 if let Expression::Div(ref op) = e {
5109 let cast_type = match target {
5110 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5111 precision: None,
5112 scale: None,
5113 real_spelling: false,
5114 }),
5115 DialectType::Drill
5116 | DialectType::Trino
5117 | DialectType::Athena
5118 | DialectType::Presto => Some(DataType::Double {
5119 precision: None,
5120 scale: None,
5121 }),
5122 DialectType::PostgreSQL
5123 | DialectType::Redshift
5124 | DialectType::Materialize
5125 | DialectType::Teradata
5126 | DialectType::RisingWave => Some(DataType::Double {
5127 precision: None,
5128 scale: None,
5129 }),
5130 _ => None,
5131 };
5132 if let Some(dt) = cast_type {
5133 let cast_left = Expression::Cast(Box::new(Cast {
5134 this: op.left.clone(),
5135 to: dt,
5136 double_colon_syntax: false,
5137 trailing_comments: Vec::new(),
5138 format: None,
5139 default: None,
5140 }));
5141 let new_op = crate::expressions::BinaryOp {
5142 left: cast_left,
5143 right: op.right.clone(),
5144 left_comments: op.left_comments.clone(),
5145 operator_comments: op.operator_comments.clone(),
5146 trailing_comments: op.trailing_comments.clone(),
5147 };
5148 return Ok(Expression::Div(Box::new(new_op)));
5149 }
5150 }
5151 }
5152
5153 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5154 if matches!(target, DialectType::DuckDB) {
5155 if let Expression::CreateDatabase(db) = e {
5156 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5157 schema.if_not_exists = db.if_not_exists;
5158 return Ok(Expression::CreateSchema(Box::new(schema)));
5159 }
5160 if let Expression::DropDatabase(db) = e {
5161 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5162 schema.if_exists = db.if_exists;
5163 return Ok(Expression::DropSchema(Box::new(schema)));
5164 }
5165 }
5166
5167 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5168 if matches!(source, DialectType::ClickHouse)
5169 && !matches!(target, DialectType::ClickHouse)
5170 {
5171 if let Expression::Cast(ref c) = e {
5172 if let DataType::Custom { ref name } = c.to {
5173 let upper = name.to_uppercase();
5174 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5175 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5176 let inner_upper = inner.to_uppercase();
5177 let new_dt = match inner_upper.as_str() {
5178 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5179 precision: None,
5180 timezone: false,
5181 },
5182 "DATE" => DataType::Date,
5183 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5184 "INT32" | "INT" | "INTEGER" => DataType::Int {
5185 length: None,
5186 integer_spelling: false,
5187 },
5188 "FLOAT64" | "DOUBLE" => DataType::Double {
5189 precision: None,
5190 scale: None,
5191 },
5192 "STRING" => DataType::Text,
5193 _ => DataType::Custom {
5194 name: inner.to_string(),
5195 },
5196 };
5197 let mut new_cast = c.clone();
5198 new_cast.to = new_dt;
5199 return Ok(Expression::Cast(new_cast));
5200 }
5201 }
5202 }
5203 }
5204
5205 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5206 if matches!(target, DialectType::Snowflake) {
5207 if let Expression::ArrayConcatAgg(ref agg) = e {
5208 let mut agg_clone = agg.as_ref().clone();
5209 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5210 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5211 let flatten = Expression::Function(Box::new(Function::new(
5212 "ARRAY_FLATTEN".to_string(),
5213 vec![array_agg],
5214 )));
5215 return Ok(flatten);
5216 }
5217 }
5218
5219 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5220 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5221 if let Expression::ArrayConcatAgg(agg) = e {
5222 let arg = agg.this;
5223 return Ok(Expression::Function(Box::new(Function::new(
5224 "ARRAY_CONCAT_AGG".to_string(),
5225 vec![arg],
5226 ))));
5227 }
5228 }
5229
5230 // Determine what action to take by inspecting e immutably
5231 let action = {
5232 let source_propagates_nulls =
5233 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5234 let target_ignores_nulls =
5235 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5236
5237 match &e {
5238 Expression::Function(f) => {
5239 let name = f.name.to_uppercase();
5240 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5241 if (name == "DATE_PART" || name == "DATEPART")
5242 && f.args.len() == 2
5243 && matches!(target, DialectType::Snowflake)
5244 && !matches!(source, DialectType::Snowflake)
5245 && matches!(
5246 &f.args[0],
5247 Expression::Literal(crate::expressions::Literal::String(_))
5248 )
5249 {
5250 Action::DatePartUnquote
5251 } else if source_propagates_nulls
5252 && target_ignores_nulls
5253 && (name == "GREATEST" || name == "LEAST")
5254 && f.args.len() >= 2
5255 {
5256 Action::GreatestLeastNull
5257 } else if matches!(source, DialectType::Snowflake)
5258 && name == "ARRAY_GENERATE_RANGE"
5259 && f.args.len() >= 2
5260 {
5261 Action::ArrayGenerateRange
5262 } else if matches!(source, DialectType::Snowflake)
5263 && matches!(target, DialectType::DuckDB)
5264 && name == "DATE_TRUNC"
5265 && f.args.len() == 2
5266 {
5267 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5268 // Logic based on Python sqlglot's input_type_preserved flag:
5269 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5270 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5271 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5272 let unit_str = match &f.args[0] {
5273 Expression::Literal(crate::expressions::Literal::String(s)) => {
5274 Some(s.to_uppercase())
5275 }
5276 _ => None,
5277 };
5278 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5279 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5280 });
5281 match &f.args[1] {
5282 Expression::Cast(c) => match &c.to {
5283 DataType::Time { .. } => Action::DateTruncWrapCast,
5284 DataType::Custom { name }
5285 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5286 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5287 {
5288 Action::DateTruncWrapCast
5289 }
5290 DataType::Timestamp { timezone: true, .. } => {
5291 Action::DateTruncWrapCast
5292 }
5293 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5294 DataType::Timestamp {
5295 timezone: false, ..
5296 } if is_date_unit => Action::DateTruncWrapCast,
5297 _ => Action::None,
5298 },
5299 _ => Action::None,
5300 }
5301 } else if matches!(source, DialectType::Snowflake)
5302 && matches!(target, DialectType::DuckDB)
5303 && name == "TO_DATE"
5304 && f.args.len() == 1
5305 && !matches!(
5306 &f.args[0],
5307 Expression::Literal(crate::expressions::Literal::String(_))
5308 )
5309 {
5310 Action::ToDateToCast
5311 } else if !matches!(source, DialectType::Redshift)
5312 && matches!(target, DialectType::Redshift)
5313 && name == "CONVERT_TIMEZONE"
5314 && (f.args.len() == 2 || f.args.len() == 3)
5315 {
5316 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5317 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5318 // The Redshift parser adds 'UTC' as default source_tz, but when
5319 // transpiling from other dialects, we should preserve the original form.
5320 Action::ConvertTimezoneToExpr
5321 } else if matches!(source, DialectType::Snowflake)
5322 && matches!(target, DialectType::DuckDB)
5323 && name == "REGEXP_REPLACE"
5324 && f.args.len() == 4
5325 && !matches!(
5326 &f.args[3],
5327 Expression::Literal(crate::expressions::Literal::String(_))
5328 )
5329 {
5330 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5331 Action::RegexpReplaceSnowflakeToDuckDB
5332 } else if name == "_BQ_TO_HEX" {
5333 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5334 Action::BigQueryToHexBare
5335 } else if matches!(source, DialectType::BigQuery)
5336 && !matches!(target, DialectType::BigQuery)
5337 {
5338 // BigQuery-specific functions that need to be converted to standard forms
5339 match name.as_str() {
5340 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5341 | "DATE_DIFF"
5342 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5343 | "DATETIME_ADD" | "DATETIME_SUB"
5344 | "TIME_ADD" | "TIME_SUB"
5345 | "DATE_ADD" | "DATE_SUB"
5346 | "SAFE_DIVIDE"
5347 | "GENERATE_UUID"
5348 | "COUNTIF"
5349 | "EDIT_DISTANCE"
5350 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5351 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5352 | "TO_HEX"
5353 | "TO_JSON_STRING"
5354 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5355 | "DIV"
5356 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5357 | "LAST_DAY"
5358 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5359 | "REGEXP_CONTAINS"
5360 | "CONTAINS_SUBSTR"
5361 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5362 | "SAFE_CAST"
5363 | "GENERATE_DATE_ARRAY"
5364 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5365 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5366 | "ARRAY_CONCAT"
5367 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5368 | "INSTR"
5369 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5370 | "GENERATE_UUID()" // just in case
5371 | "REGEXP_EXTRACT_ALL"
5372 | "REGEXP_EXTRACT"
5373 | "INT64"
5374 | "ARRAY_CONCAT_AGG"
5375 | "DATE_DIFF(" // just in case
5376 | "TO_HEX_MD5" // internal
5377 | "MOD"
5378 | "CONCAT"
5379 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5380 | "STRUCT"
5381 | "ROUND"
5382 | "MAKE_INTERVAL"
5383 | "ARRAY_TO_STRING"
5384 | "PERCENTILE_CONT"
5385 => Action::BigQueryFunctionNormalize,
5386 "ARRAY" if matches!(target, DialectType::Snowflake)
5387 && f.args.len() == 1
5388 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5389 => Action::BigQueryArraySelectAsStructToSnowflake,
5390 _ => Action::None,
5391 }
5392 } else if matches!(source, DialectType::BigQuery)
5393 && matches!(target, DialectType::BigQuery)
5394 {
5395 // BigQuery -> BigQuery normalizations
5396 match name.as_str() {
5397 "TIMESTAMP_DIFF"
5398 | "DATETIME_DIFF"
5399 | "TIME_DIFF"
5400 | "DATE_DIFF"
5401 | "DATE_ADD"
5402 | "TO_HEX"
5403 | "CURRENT_TIMESTAMP"
5404 | "CURRENT_DATE"
5405 | "CURRENT_TIME"
5406 | "CURRENT_DATETIME"
5407 | "GENERATE_DATE_ARRAY"
5408 | "INSTR"
5409 | "FORMAT_DATETIME"
5410 | "DATETIME"
5411 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5412 _ => Action::None,
5413 }
5414 } else {
5415 // Generic function normalization for non-BigQuery sources
5416 match name.as_str() {
5417 "ARBITRARY" | "AGGREGATE"
5418 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5419 | "STRUCT_EXTRACT"
5420 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5421 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5422 | "SUBSTRINGINDEX"
5423 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5424 | "UNICODE"
5425 | "XOR"
5426 | "ARRAY_REVERSE_SORT"
5427 | "ENCODE" | "DECODE"
5428 | "QUANTILE"
5429 | "EPOCH" | "EPOCH_MS"
5430 | "HASHBYTES"
5431 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5432 | "APPROX_DISTINCT"
5433 | "DATE_PARSE" | "FORMAT_DATETIME"
5434 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5435 | "RLIKE"
5436 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5437 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5438 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5439 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5440 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5441 | "MAP" | "MAP_FROM_ENTRIES"
5442 | "COLLECT_LIST" | "COLLECT_SET"
5443 | "ISNAN" | "IS_NAN"
5444 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5445 | "FORMAT_NUMBER"
5446 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5447 | "ELEMENT_AT"
5448 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5449 | "SPLIT_PART"
5450 // GENERATE_SERIES: handled separately below
5451 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5452 | "JSON_QUERY" | "JSON_VALUE"
5453 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5454 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5455 | "CURDATE" | "CURTIME"
5456 | "ARRAY_TO_STRING"
5457 | "ARRAY_SORT" | "SORT_ARRAY"
5458 | "LEFT" | "RIGHT"
5459 | "MAP_FROM_ARRAYS"
5460 | "LIKE" | "ILIKE"
5461 | "ARRAY_CONCAT" | "LIST_CONCAT"
5462 | "QUANTILE_CONT" | "QUANTILE_DISC"
5463 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5464 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5465 | "LOCATE" | "STRPOS" | "INSTR"
5466 | "CHAR"
5467 // CONCAT: handled separately for COALESCE wrapping
5468 | "ARRAY_JOIN"
5469 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5470 | "ISNULL"
5471 | "MONTHNAME"
5472 | "TO_TIMESTAMP"
5473 | "TO_DATE"
5474 | "TO_JSON"
5475 | "REGEXP_SPLIT"
5476 | "SPLIT"
5477 | "FORMATDATETIME"
5478 | "ARRAYJOIN"
5479 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5480 | "NVL"
5481 | "TO_CHAR"
5482 | "DBMS_RANDOM.VALUE"
5483 | "REGEXP_LIKE"
5484 | "REPLICATE"
5485 | "LEN"
5486 | "COUNT_BIG"
5487 | "DATEFROMPARTS"
5488 | "DATETIMEFROMPARTS"
5489 | "CONVERT" | "TRY_CONVERT"
5490 | "STRFTIME" | "STRPTIME"
5491 | "DATE_FORMAT" | "FORMAT_DATE"
5492 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5493 | "FROM_BASE64" | "TO_BASE64"
5494 | "GETDATE"
5495 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5496 | "TO_UTF8" | "FROM_UTF8"
5497 | "STARTS_WITH" | "STARTSWITH"
5498 | "APPROX_COUNT_DISTINCT"
5499 | "JSON_FORMAT"
5500 | "SYSDATE"
5501 | "LOGICAL_OR" | "LOGICAL_AND"
5502 | "MONTHS_ADD"
5503 | "SCHEMA_NAME"
5504 | "STRTOL"
5505 | "EDITDIST3"
5506 | "FORMAT"
5507 | "LIST_CONTAINS" | "LIST_HAS"
5508 | "VARIANCE" | "STDDEV"
5509 | "ISINF"
5510 | "TO_UNIXTIME"
5511 | "FROM_UNIXTIME"
5512 | "DATEPART" | "DATE_PART"
5513 | "DATENAME"
5514 | "STRING_AGG"
5515 | "JSON_ARRAYAGG"
5516 | "APPROX_QUANTILE"
5517 | "MAKE_DATE"
5518 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5519 | "RANGE"
5520 | "TRY_ELEMENT_AT"
5521 | "STR_TO_MAP"
5522 | "STRING"
5523 | "STR_TO_TIME"
5524 | "CURRENT_SCHEMA"
5525 | "LTRIM" | "RTRIM"
5526 | "UUID"
5527 | "FARM_FINGERPRINT"
5528 | "JSON_KEYS"
5529 | "WEEKOFYEAR"
5530 | "CONCAT_WS"
5531 | "ARRAY_SLICE"
5532 | "ARRAY_PREPEND"
5533 | "ARRAY_REMOVE"
5534 | "GENERATE_DATE_ARRAY"
5535 | "PARSE_JSON"
5536 | "JSON_REMOVE"
5537 | "JSON_SET"
5538 | "LEVENSHTEIN"
5539 => Action::GenericFunctionNormalize,
5540 // Canonical date functions -> dialect-specific
5541 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5542 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5543 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5544 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5545 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5546 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5547 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5548 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5549 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5550 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5551 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5552 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5553 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5554 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5555 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5556 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5557 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5558 // STR_TO_DATE(x, fmt) -> dialect-specific
5559 "STR_TO_DATE" if f.args.len() == 2
5560 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5561 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5562 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5563 "TS_OR_DS_ADD" if f.args.len() == 3
5564 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5565 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5566 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5567 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5568 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5569 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5570 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5571 // IS_ASCII(x) -> dialect-specific
5572 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5573 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5574 "STR_POSITION" => Action::StrPositionConvert,
5575 // ARRAY_SUM -> dialect-specific
5576 "ARRAY_SUM" => Action::ArraySumConvert,
5577 // ARRAY_SIZE -> dialect-specific (Drill only)
5578 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5579 // ARRAY_ANY -> dialect-specific
5580 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5581 // Functions needing specific cross-dialect transforms
5582 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5583 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5584 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5585 "ARRAY" if matches!(source, DialectType::BigQuery)
5586 && matches!(target, DialectType::Snowflake)
5587 && f.args.len() == 1
5588 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5589 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5590 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5591 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5592 "DATE_TRUNC" if f.args.len() == 2
5593 && matches!(source, DialectType::Generic)
5594 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5595 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5596 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5597 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5598 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5599 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5600 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5601 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5602 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5603 // GENERATE_SERIES with interval normalization for PG target
5604 "GENERATE_SERIES" if f.args.len() >= 3
5605 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5606 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5607 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5608 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5609 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5610 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5611 "CONCAT" => Action::GenericFunctionNormalize,
5612 // DIV(a, b) -> target-specific integer division
5613 "DIV" if f.args.len() == 2
5614 && matches!(source, DialectType::PostgreSQL)
5615 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5616 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5617 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5618 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5619 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5620 "JSONB_EXISTS" if f.args.len() == 2
5621 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5622 // DATE_BIN -> TIME_BUCKET for DuckDB
5623 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5624 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5625 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5626 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5627 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5628 // ClickHouse any -> ANY_VALUE for other dialects
5629 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5630 _ => Action::None,
5631 }
5632 }
5633 }
5634 Expression::AggregateFunction(af) => {
5635 let name = af.name.to_uppercase();
5636 match name.as_str() {
5637 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5638 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5639 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5640 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5641 if matches!(target, DialectType::DuckDB) =>
5642 {
5643 Action::JsonObjectAggConvert
5644 }
5645 "ARRAY_AGG"
5646 if matches!(
5647 target,
5648 DialectType::Hive
5649 | DialectType::Spark
5650 | DialectType::Databricks
5651 ) =>
5652 {
5653 Action::ArrayAggToCollectList
5654 }
5655 "MAX_BY" | "MIN_BY"
5656 if matches!(
5657 target,
5658 DialectType::ClickHouse
5659 | DialectType::Spark
5660 | DialectType::Databricks
5661 | DialectType::DuckDB
5662 ) =>
5663 {
5664 Action::MaxByMinByConvert
5665 }
5666 "COLLECT_LIST"
5667 if matches!(
5668 target,
5669 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5670 ) =>
5671 {
5672 Action::CollectListToArrayAgg
5673 }
5674 "COLLECT_SET"
5675 if matches!(
5676 target,
5677 DialectType::Presto
5678 | DialectType::Trino
5679 | DialectType::Snowflake
5680 | DialectType::DuckDB
5681 ) =>
5682 {
5683 Action::CollectSetConvert
5684 }
5685 "PERCENTILE"
5686 if matches!(
5687 target,
5688 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5689 ) =>
5690 {
5691 Action::PercentileConvert
5692 }
5693 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5694 "CORR"
5695 if matches!(target, DialectType::DuckDB)
5696 && matches!(source, DialectType::Snowflake) =>
5697 {
5698 Action::CorrIsnanWrap
5699 }
5700 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5701 "APPROX_QUANTILES"
5702 if matches!(source, DialectType::BigQuery)
5703 && matches!(target, DialectType::DuckDB) =>
5704 {
5705 Action::BigQueryApproxQuantiles
5706 }
5707 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5708 "PERCENTILE_CONT"
5709 if matches!(source, DialectType::BigQuery)
5710 && matches!(target, DialectType::DuckDB)
5711 && af.args.len() >= 2 =>
5712 {
5713 Action::BigQueryPercentileContToDuckDB
5714 }
5715 _ => Action::None,
5716 }
5717 }
5718 Expression::JSONArrayAgg(_) => match target {
5719 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5720 _ => Action::None,
5721 },
5722 Expression::ToNumber(tn) => {
5723 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5724 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5725 match target {
5726 DialectType::Oracle
5727 | DialectType::Snowflake
5728 | DialectType::Teradata => Action::None,
5729 _ => Action::GenericFunctionNormalize,
5730 }
5731 } else {
5732 Action::None
5733 }
5734 }
5735 Expression::Nvl2(_) => {
5736 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5737 // Keep as NVL2 for dialects that support it natively
5738 match target {
5739 DialectType::Oracle
5740 | DialectType::Snowflake
5741 | DialectType::Teradata
5742 | DialectType::Spark
5743 | DialectType::Databricks
5744 | DialectType::Redshift => Action::None,
5745 _ => Action::Nvl2Expand,
5746 }
5747 }
5748 Expression::Decode(_) | Expression::DecodeCase(_) => {
5749 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5750 // Keep as DECODE for Oracle/Snowflake
5751 match target {
5752 DialectType::Oracle | DialectType::Snowflake => Action::None,
5753 _ => Action::DecodeSimplify,
5754 }
5755 }
5756 Expression::Coalesce(ref cf) => {
5757 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5758 // BigQuery keeps IFNULL natively when source is also BigQuery
5759 if cf.original_name.as_deref() == Some("IFNULL")
5760 && !(matches!(source, DialectType::BigQuery)
5761 && matches!(target, DialectType::BigQuery))
5762 {
5763 Action::IfnullToCoalesce
5764 } else {
5765 Action::None
5766 }
5767 }
5768 Expression::IfFunc(if_func) => {
5769 if matches!(source, DialectType::Snowflake)
5770 && matches!(
5771 target,
5772 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5773 )
5774 && matches!(if_func.false_value, Some(Expression::Div(_)))
5775 {
5776 Action::Div0TypedDivision
5777 } else {
5778 Action::None
5779 }
5780 }
5781 Expression::ToJson(_) => match target {
5782 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5783 DialectType::BigQuery => Action::ToJsonConvert,
5784 DialectType::DuckDB => Action::ToJsonConvert,
5785 _ => Action::None,
5786 },
5787 Expression::ArrayAgg(ref agg) => {
5788 if matches!(
5789 target,
5790 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5791 ) {
5792 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5793 Action::ArrayAggToCollectList
5794 } else if matches!(
5795 source,
5796 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5797 ) && matches!(target, DialectType::DuckDB)
5798 && agg.filter.is_some()
5799 {
5800 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5801 // Need to add NOT x IS NULL to existing filter
5802 Action::ArrayAggNullFilter
5803 } else if matches!(target, DialectType::DuckDB)
5804 && agg.ignore_nulls == Some(true)
5805 && !agg.order_by.is_empty()
5806 {
5807 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5808 Action::ArrayAggIgnoreNullsDuckDB
5809 } else if !matches!(source, DialectType::Snowflake) {
5810 Action::None
5811 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5812 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5813 == Some("ARRAY_AGG".to_string())
5814 || agg.name.is_none();
5815 if is_array_agg {
5816 Action::ArrayAggCollectList
5817 } else {
5818 Action::None
5819 }
5820 } else if matches!(
5821 target,
5822 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5823 ) && agg.filter.is_none()
5824 {
5825 Action::ArrayAggFilter
5826 } else {
5827 Action::None
5828 }
5829 }
5830 Expression::WithinGroup(wg) => {
5831 if matches!(source, DialectType::Snowflake)
5832 && matches!(
5833 target,
5834 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5835 )
5836 && matches!(wg.this, Expression::ArrayAgg(_))
5837 {
5838 Action::ArrayAggWithinGroupFilter
5839 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5840 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5841 || matches!(&wg.this, Expression::StringAgg(_))
5842 {
5843 Action::StringAggConvert
5844 } else if matches!(
5845 target,
5846 DialectType::Presto
5847 | DialectType::Trino
5848 | DialectType::Athena
5849 | DialectType::Spark
5850 | DialectType::Databricks
5851 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5852 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5853 || matches!(&wg.this, Expression::PercentileCont(_)))
5854 {
5855 Action::PercentileContConvert
5856 } else {
5857 Action::None
5858 }
5859 }
5860 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5861 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5862 // DATETIME is the timezone-unaware type
5863 Expression::Cast(ref c) => {
5864 if c.format.is_some()
5865 && (matches!(source, DialectType::BigQuery)
5866 || matches!(source, DialectType::Teradata))
5867 {
5868 Action::BigQueryCastFormat
5869 } else if matches!(target, DialectType::BigQuery)
5870 && !matches!(source, DialectType::BigQuery)
5871 && matches!(
5872 c.to,
5873 DataType::Timestamp {
5874 timezone: false,
5875 ..
5876 }
5877 )
5878 {
5879 Action::CastTimestampToDatetime
5880 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
5881 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
5882 && matches!(
5883 c.to,
5884 DataType::Timestamp {
5885 timezone: false,
5886 ..
5887 }
5888 )
5889 {
5890 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5891 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
5892 Action::CastTimestampToDatetime
5893 } else if matches!(
5894 source,
5895 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5896 ) && matches!(
5897 target,
5898 DialectType::Presto
5899 | DialectType::Trino
5900 | DialectType::Athena
5901 | DialectType::DuckDB
5902 | DialectType::Snowflake
5903 | DialectType::BigQuery
5904 | DialectType::Databricks
5905 | DialectType::TSQL
5906 ) {
5907 Action::HiveCastToTryCast
5908 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5909 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
5910 {
5911 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
5912 Action::CastTimestamptzToFunc
5913 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
5914 && matches!(
5915 target,
5916 DialectType::Hive
5917 | DialectType::Spark
5918 | DialectType::Databricks
5919 | DialectType::BigQuery
5920 )
5921 {
5922 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
5923 Action::CastTimestampStripTz
5924 } else if matches!(&c.to, DataType::Json)
5925 && matches!(&c.this, Expression::Literal(Literal::String(_)))
5926 && matches!(
5927 target,
5928 DialectType::Presto
5929 | DialectType::Trino
5930 | DialectType::Athena
5931 | DialectType::Snowflake
5932 )
5933 {
5934 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
5935 // Only when the input is a string literal (JSON 'value' syntax)
5936 Action::JsonLiteralToJsonParse
5937 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
5938 && matches!(target, DialectType::Spark | DialectType::Databricks)
5939 {
5940 // CAST(x AS JSON) -> TO_JSON(x) for Spark
5941 Action::CastToJsonForSpark
5942 } else if (matches!(
5943 &c.to,
5944 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
5945 )) && matches!(
5946 target,
5947 DialectType::Spark | DialectType::Databricks
5948 ) && (matches!(&c.this, Expression::ParseJson(_))
5949 || matches!(
5950 &c.this,
5951 Expression::Function(f)
5952 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
5953 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
5954 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
5955 ))
5956 {
5957 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
5958 // -> FROM_JSON(..., type_string) for Spark
5959 Action::CastJsonToFromJson
5960 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
5961 && matches!(
5962 c.to,
5963 DataType::Timestamp {
5964 timezone: false,
5965 ..
5966 }
5967 )
5968 && matches!(source, DialectType::DuckDB)
5969 {
5970 Action::StrftimeCastTimestamp
5971 } else if matches!(source, DialectType::DuckDB)
5972 && matches!(
5973 c.to,
5974 DataType::Decimal {
5975 precision: None,
5976 ..
5977 }
5978 )
5979 {
5980 Action::DecimalDefaultPrecision
5981 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
5982 && matches!(c.to, DataType::Char { length: None })
5983 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
5984 {
5985 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
5986 Action::MysqlCastCharToText
5987 } else if matches!(
5988 source,
5989 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5990 ) && matches!(
5991 target,
5992 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5993 ) && Self::has_varchar_char_type(&c.to)
5994 {
5995 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
5996 Action::SparkCastVarcharToString
5997 } else {
5998 Action::None
5999 }
6000 }
6001 Expression::SafeCast(ref c) => {
6002 if c.format.is_some()
6003 && matches!(source, DialectType::BigQuery)
6004 && !matches!(target, DialectType::BigQuery)
6005 {
6006 Action::BigQueryCastFormat
6007 } else {
6008 Action::None
6009 }
6010 }
6011 // For DuckDB: DATE_TRUNC should preserve the input type
6012 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6013 if matches!(source, DialectType::Snowflake)
6014 && matches!(target, DialectType::DuckDB)
6015 {
6016 Action::DateTruncWrapCast
6017 } else {
6018 Action::None
6019 }
6020 }
6021 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6022 Expression::SetStatement(s) => {
6023 if matches!(target, DialectType::DuckDB)
6024 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6025 && s.items.iter().any(|item| item.kind.is_none())
6026 {
6027 Action::SetToVariable
6028 } else {
6029 Action::None
6030 }
6031 }
6032 // Cross-dialect NULL ordering normalization.
6033 // When nulls_first is not specified, fill in the source dialect's implied
6034 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6035 Expression::Ordered(o) => {
6036 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6037 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6038 Action::MysqlNullsOrdering
6039 } else {
6040 // Skip targets that don't support NULLS FIRST/LAST syntax
6041 let target_supports_nulls = !matches!(
6042 target,
6043 DialectType::MySQL
6044 | DialectType::TSQL
6045 | DialectType::StarRocks
6046 | DialectType::Doris
6047 );
6048 if o.nulls_first.is_none() && source != target && target_supports_nulls
6049 {
6050 Action::NullsOrdering
6051 } else {
6052 Action::None
6053 }
6054 }
6055 }
6056 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6057 Expression::DataType(dt) => {
6058 if matches!(source, DialectType::BigQuery)
6059 && !matches!(target, DialectType::BigQuery)
6060 {
6061 match dt {
6062 DataType::Custom { ref name }
6063 if name.eq_ignore_ascii_case("INT64")
6064 || name.eq_ignore_ascii_case("FLOAT64")
6065 || name.eq_ignore_ascii_case("BOOL")
6066 || name.eq_ignore_ascii_case("BYTES")
6067 || name.eq_ignore_ascii_case("NUMERIC")
6068 || name.eq_ignore_ascii_case("STRING")
6069 || name.eq_ignore_ascii_case("DATETIME") =>
6070 {
6071 Action::BigQueryCastType
6072 }
6073 _ => Action::None,
6074 }
6075 } else if matches!(source, DialectType::TSQL) {
6076 // For TSQL source -> any target (including TSQL itself for REAL)
6077 match dt {
6078 // REAL -> FLOAT even for TSQL->TSQL
6079 DataType::Custom { ref name }
6080 if name.eq_ignore_ascii_case("REAL") =>
6081 {
6082 Action::TSQLTypeNormalize
6083 }
6084 DataType::Float {
6085 real_spelling: true,
6086 ..
6087 } => Action::TSQLTypeNormalize,
6088 // Other TSQL type normalizations only for non-TSQL targets
6089 DataType::Custom { ref name }
6090 if !matches!(target, DialectType::TSQL)
6091 && (name.eq_ignore_ascii_case("MONEY")
6092 || name.eq_ignore_ascii_case("SMALLMONEY")
6093 || name.eq_ignore_ascii_case("DATETIME2")
6094 || name.eq_ignore_ascii_case("IMAGE")
6095 || name.eq_ignore_ascii_case("BIT")
6096 || name.eq_ignore_ascii_case("ROWVERSION")
6097 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6098 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6099 || name.to_uppercase().starts_with("NUMERIC")
6100 || name.to_uppercase().starts_with("DATETIME2(")
6101 || name.to_uppercase().starts_with("TIME(")) =>
6102 {
6103 Action::TSQLTypeNormalize
6104 }
6105 DataType::Float {
6106 precision: Some(_), ..
6107 } if !matches!(target, DialectType::TSQL) => {
6108 Action::TSQLTypeNormalize
6109 }
6110 DataType::TinyInt { .. }
6111 if !matches!(target, DialectType::TSQL) =>
6112 {
6113 Action::TSQLTypeNormalize
6114 }
6115 // INTEGER -> INT for Databricks/Spark targets
6116 DataType::Int {
6117 integer_spelling: true,
6118 ..
6119 } if matches!(
6120 target,
6121 DialectType::Databricks | DialectType::Spark
6122 ) =>
6123 {
6124 Action::TSQLTypeNormalize
6125 }
6126 _ => Action::None,
6127 }
6128 } else if (matches!(source, DialectType::Oracle)
6129 || matches!(source, DialectType::Generic))
6130 && !matches!(target, DialectType::Oracle)
6131 {
6132 match dt {
6133 DataType::Custom { ref name }
6134 if name.to_uppercase().starts_with("VARCHAR2(")
6135 || name.to_uppercase().starts_with("NVARCHAR2(")
6136 || name.eq_ignore_ascii_case("VARCHAR2")
6137 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6138 {
6139 Action::OracleVarchar2ToVarchar
6140 }
6141 _ => Action::None,
6142 }
6143 } else if matches!(target, DialectType::Snowflake)
6144 && !matches!(source, DialectType::Snowflake)
6145 {
6146 // When target is Snowflake but source is NOT Snowflake,
6147 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6148 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6149 // should keep their FLOAT spelling.
6150 match dt {
6151 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6152 _ => Action::None,
6153 }
6154 } else {
6155 Action::None
6156 }
6157 }
6158 // LOWER patterns from BigQuery TO_HEX conversions:
6159 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6160 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6161 Expression::Lower(uf) => {
6162 if matches!(source, DialectType::BigQuery) {
6163 match &uf.this {
6164 Expression::Lower(_) => Action::BigQueryToHexLower,
6165 Expression::Function(f)
6166 if f.name == "TO_HEX"
6167 && matches!(target, DialectType::BigQuery) =>
6168 {
6169 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6170 Action::BigQueryToHexLower
6171 }
6172 _ => Action::None,
6173 }
6174 } else {
6175 Action::None
6176 }
6177 }
6178 // UPPER patterns from BigQuery TO_HEX conversions:
6179 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6180 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6181 Expression::Upper(uf) => {
6182 if matches!(source, DialectType::BigQuery) {
6183 match &uf.this {
6184 Expression::Lower(_) => Action::BigQueryToHexUpper,
6185 _ => Action::None,
6186 }
6187 } else {
6188 Action::None
6189 }
6190 }
6191 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6192 // Snowflake supports LAST_DAY with unit, so keep it there
6193 Expression::LastDay(ld) => {
6194 if matches!(source, DialectType::BigQuery)
6195 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6196 && ld.unit.is_some()
6197 {
6198 Action::BigQueryLastDayStripUnit
6199 } else {
6200 Action::None
6201 }
6202 }
6203 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6204 Expression::SafeDivide(_) => {
6205 if matches!(source, DialectType::BigQuery)
6206 && !matches!(target, DialectType::BigQuery)
6207 {
6208 Action::BigQuerySafeDivide
6209 } else {
6210 Action::None
6211 }
6212 }
6213 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6214 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6215 Expression::AnyValue(ref agg) => {
6216 if matches!(source, DialectType::BigQuery)
6217 && matches!(target, DialectType::DuckDB)
6218 && agg.having_max.is_some()
6219 {
6220 Action::BigQueryAnyValueHaving
6221 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6222 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6223 && agg.ignore_nulls.is_none()
6224 {
6225 Action::AnyValueIgnoreNulls
6226 } else {
6227 Action::None
6228 }
6229 }
6230 Expression::Any(ref q) => {
6231 if matches!(source, DialectType::PostgreSQL)
6232 && matches!(
6233 target,
6234 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6235 )
6236 && q.op.is_some()
6237 && !matches!(
6238 q.subquery,
6239 Expression::Select(_) | Expression::Subquery(_)
6240 )
6241 {
6242 Action::AnyToExists
6243 } else {
6244 Action::None
6245 }
6246 }
6247 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6248 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6249 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6250 Expression::RegexpLike(_)
6251 if !matches!(source, DialectType::DuckDB)
6252 && matches!(target, DialectType::DuckDB) =>
6253 {
6254 Action::RegexpLikeToDuckDB
6255 }
6256 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6257 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6258 Expression::Div(ref op)
6259 if matches!(
6260 source,
6261 DialectType::MySQL
6262 | DialectType::DuckDB
6263 | DialectType::SingleStore
6264 | DialectType::TiDB
6265 | DialectType::ClickHouse
6266 | DialectType::Doris
6267 ) && matches!(
6268 target,
6269 DialectType::PostgreSQL
6270 | DialectType::Redshift
6271 | DialectType::Drill
6272 | DialectType::Trino
6273 | DialectType::Presto
6274 | DialectType::Athena
6275 | DialectType::TSQL
6276 | DialectType::Teradata
6277 | DialectType::SQLite
6278 | DialectType::BigQuery
6279 | DialectType::Snowflake
6280 | DialectType::Databricks
6281 | DialectType::Oracle
6282 | DialectType::Materialize
6283 | DialectType::RisingWave
6284 ) =>
6285 {
6286 // Only wrap if RHS is not already NULLIF
6287 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6288 {
6289 Action::MySQLSafeDivide
6290 } else {
6291 Action::None
6292 }
6293 }
6294 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6295 // For TSQL/Fabric, convert to sp_rename instead
6296 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6297 if let Some(crate::expressions::AlterTableAction::RenameTable(
6298 ref new_tbl,
6299 )) = at.actions.first()
6300 {
6301 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6302 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6303 Action::AlterTableToSpRename
6304 } else if new_tbl.schema.is_some()
6305 && matches!(
6306 target,
6307 DialectType::BigQuery
6308 | DialectType::Doris
6309 | DialectType::StarRocks
6310 | DialectType::DuckDB
6311 | DialectType::PostgreSQL
6312 | DialectType::Redshift
6313 )
6314 {
6315 Action::AlterTableRenameStripSchema
6316 } else {
6317 Action::None
6318 }
6319 } else {
6320 Action::None
6321 }
6322 }
6323 // EPOCH(x) expression -> target-specific epoch conversion
6324 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6325 Action::EpochConvert
6326 }
6327 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6328 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6329 Action::EpochMsConvert
6330 }
6331 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6332 Expression::StringAgg(_) => {
6333 if matches!(
6334 target,
6335 DialectType::MySQL
6336 | DialectType::SingleStore
6337 | DialectType::Doris
6338 | DialectType::StarRocks
6339 | DialectType::SQLite
6340 ) {
6341 Action::StringAggConvert
6342 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6343 Action::StringAggConvert
6344 } else {
6345 Action::None
6346 }
6347 }
6348 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6349 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6350 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6351 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6352 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6353 Action::ArrayLengthConvert
6354 }
6355 Expression::ArraySize(_) => {
6356 if matches!(target, DialectType::Drill) {
6357 Action::ArraySizeDrill
6358 } else {
6359 Action::ArrayLengthConvert
6360 }
6361 }
6362 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6363 Expression::ArrayRemove(_) => match target {
6364 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6365 Action::ArrayRemoveConvert
6366 }
6367 _ => Action::None,
6368 },
6369 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6370 Expression::ArrayReverse(_) => match target {
6371 DialectType::ClickHouse => Action::ArrayReverseConvert,
6372 _ => Action::None,
6373 },
6374 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6375 Expression::JsonKeys(_) => match target {
6376 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6377 Action::JsonKeysConvert
6378 }
6379 _ => Action::None,
6380 },
6381 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6382 Expression::ParseJson(_) => match target {
6383 DialectType::SQLite
6384 | DialectType::Doris
6385 | DialectType::MySQL
6386 | DialectType::StarRocks => Action::ParseJsonStrip,
6387 _ => Action::None,
6388 },
6389 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6390 Expression::WeekOfYear(_)
6391 if matches!(target, DialectType::Snowflake)
6392 && !matches!(source, DialectType::Snowflake) =>
6393 {
6394 Action::WeekOfYearToWeekIso
6395 }
6396 // NVL: clear original_name so generator uses dialect-specific function names
6397 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6398 // XOR: expand for dialects that don't support the XOR keyword
6399 Expression::Xor(_) => {
6400 let target_supports_xor = matches!(
6401 target,
6402 DialectType::MySQL
6403 | DialectType::SingleStore
6404 | DialectType::Doris
6405 | DialectType::StarRocks
6406 );
6407 if !target_supports_xor {
6408 Action::XorExpand
6409 } else {
6410 Action::None
6411 }
6412 }
6413 // TSQL #table -> temp table normalization (CREATE TABLE)
6414 Expression::CreateTable(ct)
6415 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6416 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6417 && ct.name.name.name.starts_with('#') =>
6418 {
6419 Action::TempTableHash
6420 }
6421 // TSQL #table -> strip # from table references in SELECT/etc.
6422 Expression::Table(tr)
6423 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6424 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6425 && tr.name.name.starts_with('#') =>
6426 {
6427 Action::TempTableHash
6428 }
6429 // TSQL #table -> strip # from DROP TABLE names
6430 Expression::DropTable(ref dt)
6431 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6432 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6433 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6434 {
6435 Action::TempTableHash
6436 }
6437 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6438 Expression::JsonExtract(_)
6439 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6440 {
6441 Action::JsonExtractToTsql
6442 }
6443 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6444 Expression::JsonExtractScalar(_)
6445 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6446 {
6447 Action::JsonExtractToTsql
6448 }
6449 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6450 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6451 Action::JsonExtractToClickHouse
6452 }
6453 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6454 Expression::JsonExtractScalar(_)
6455 if matches!(target, DialectType::ClickHouse) =>
6456 {
6457 Action::JsonExtractToClickHouse
6458 }
6459 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6460 Expression::JsonExtract(ref f)
6461 if !f.arrow_syntax
6462 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6463 {
6464 Action::JsonExtractToArrow
6465 }
6466 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6467 Expression::JsonExtract(ref f)
6468 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6469 && !matches!(
6470 source,
6471 DialectType::PostgreSQL
6472 | DialectType::Redshift
6473 | DialectType::Materialize
6474 )
6475 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6476 {
6477 Action::JsonExtractToGetJsonObject
6478 }
6479 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6480 Expression::JsonExtract(_)
6481 if matches!(
6482 target,
6483 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6484 ) =>
6485 {
6486 Action::JsonExtractToGetJsonObject
6487 }
6488 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6489 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6490 Expression::JsonExtractScalar(ref f)
6491 if !f.arrow_syntax
6492 && !f.hash_arrow_syntax
6493 && matches!(
6494 target,
6495 DialectType::PostgreSQL
6496 | DialectType::Redshift
6497 | DialectType::Snowflake
6498 | DialectType::SQLite
6499 | DialectType::DuckDB
6500 ) =>
6501 {
6502 Action::JsonExtractScalarConvert
6503 }
6504 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6505 Expression::JsonExtractScalar(_)
6506 if matches!(
6507 target,
6508 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6509 ) =>
6510 {
6511 Action::JsonExtractScalarToGetJsonObject
6512 }
6513 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6514 Expression::JsonExtract(ref f)
6515 if !f.arrow_syntax
6516 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6517 {
6518 Action::JsonPathNormalize
6519 }
6520 // JsonQuery (parsed JSON_QUERY) -> target-specific
6521 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6522 // JsonValue (parsed JSON_VALUE) -> target-specific
6523 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6524 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6525 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6526 Expression::AtTimeZone(_)
6527 if matches!(
6528 target,
6529 DialectType::Presto
6530 | DialectType::Trino
6531 | DialectType::Athena
6532 | DialectType::Spark
6533 | DialectType::Databricks
6534 | DialectType::BigQuery
6535 | DialectType::Snowflake
6536 ) =>
6537 {
6538 Action::AtTimeZoneConvert
6539 }
6540 // DAY_OF_WEEK -> dialect-specific
6541 Expression::DayOfWeek(_)
6542 if matches!(
6543 target,
6544 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6545 ) =>
6546 {
6547 Action::DayOfWeekConvert
6548 }
6549 // CURRENT_USER -> CURRENT_USER() for Snowflake
6550 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6551 Action::CurrentUserParens
6552 }
6553 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6554 Expression::ElementAt(_)
6555 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6556 {
6557 Action::ElementAtConvert
6558 }
6559 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6560 Expression::ArrayFunc(ref arr)
6561 if !arr.bracket_notation
6562 && matches!(
6563 target,
6564 DialectType::Spark
6565 | DialectType::Databricks
6566 | DialectType::Hive
6567 | DialectType::BigQuery
6568 | DialectType::DuckDB
6569 | DialectType::Snowflake
6570 | DialectType::Presto
6571 | DialectType::Trino
6572 | DialectType::Athena
6573 | DialectType::ClickHouse
6574 | DialectType::StarRocks
6575 ) =>
6576 {
6577 Action::ArraySyntaxConvert
6578 }
6579 // VARIANCE expression -> varSamp for ClickHouse
6580 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6581 Action::VarianceToClickHouse
6582 }
6583 // STDDEV expression -> stddevSamp for ClickHouse
6584 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6585 Action::StddevToClickHouse
6586 }
6587 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6588 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6589 Action::ApproxQuantileConvert
6590 }
6591 // MonthsBetween -> target-specific
6592 Expression::MonthsBetween(_)
6593 if !matches!(
6594 target,
6595 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6596 ) =>
6597 {
6598 Action::MonthsBetweenConvert
6599 }
6600 // AddMonths -> target-specific DATEADD/DATE_ADD
6601 Expression::AddMonths(_) => Action::AddMonthsConvert,
6602 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6603 Expression::MapFromArrays(_)
6604 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6605 {
6606 Action::MapFromArraysConvert
6607 }
6608 // CURRENT_USER -> CURRENT_USER() for Spark
6609 Expression::CurrentUser(_)
6610 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6611 {
6612 Action::CurrentUserSparkParens
6613 }
6614 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6615 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6616 if matches!(
6617 source,
6618 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6619 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6620 && matches!(
6621 target,
6622 DialectType::DuckDB
6623 | DialectType::Presto
6624 | DialectType::Trino
6625 | DialectType::Athena
6626 | DialectType::PostgreSQL
6627 | DialectType::Redshift
6628 ) =>
6629 {
6630 Action::SparkDateFuncCast
6631 }
6632 // $parameter -> @parameter for BigQuery
6633 Expression::Parameter(ref p)
6634 if matches!(target, DialectType::BigQuery)
6635 && matches!(source, DialectType::DuckDB)
6636 && (p.style == crate::expressions::ParameterStyle::Dollar
6637 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6638 {
6639 Action::DollarParamConvert
6640 }
6641 // EscapeString literal: normalize literal newlines to \n
6642 Expression::Literal(Literal::EscapeString(ref s))
6643 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6644 {
6645 Action::EscapeStringNormalize
6646 }
6647 // straight_join: keep lowercase for DuckDB, quote for MySQL
6648 Expression::Column(ref col)
6649 if col.name.name == "STRAIGHT_JOIN"
6650 && col.table.is_none()
6651 && matches!(source, DialectType::DuckDB)
6652 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6653 {
6654 Action::StraightJoinCase
6655 }
6656 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6657 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6658 Expression::Interval(ref iv)
6659 if matches!(
6660 target,
6661 DialectType::Snowflake
6662 | DialectType::PostgreSQL
6663 | DialectType::Redshift
6664 ) && iv.unit.is_some()
6665 && matches!(
6666 &iv.this,
6667 Some(Expression::Literal(Literal::String(_)))
6668 ) =>
6669 {
6670 Action::SnowflakeIntervalFormat
6671 }
6672 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6673 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6674 if let Some(ref sample) = ts.sample {
6675 if !sample.explicit_method {
6676 Action::TablesampleReservoir
6677 } else {
6678 Action::None
6679 }
6680 } else {
6681 Action::None
6682 }
6683 }
6684 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6685 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6686 Expression::TableSample(ref ts)
6687 if matches!(target, DialectType::Snowflake)
6688 && !matches!(source, DialectType::Snowflake)
6689 && ts.sample.is_some() =>
6690 {
6691 if let Some(ref sample) = ts.sample {
6692 if !sample.explicit_method {
6693 Action::TablesampleSnowflakeStrip
6694 } else {
6695 Action::None
6696 }
6697 } else {
6698 Action::None
6699 }
6700 }
6701 Expression::Table(ref t)
6702 if matches!(target, DialectType::Snowflake)
6703 && !matches!(source, DialectType::Snowflake)
6704 && t.table_sample.is_some() =>
6705 {
6706 if let Some(ref sample) = t.table_sample {
6707 if !sample.explicit_method {
6708 Action::TablesampleSnowflakeStrip
6709 } else {
6710 Action::None
6711 }
6712 } else {
6713 Action::None
6714 }
6715 }
6716 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6717 Expression::AlterTable(ref at)
6718 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6719 && !at.actions.is_empty()
6720 && matches!(
6721 at.actions.first(),
6722 Some(crate::expressions::AlterTableAction::RenameTable(_))
6723 ) =>
6724 {
6725 Action::AlterTableToSpRename
6726 }
6727 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6728 Expression::Subscript(ref sub)
6729 if matches!(
6730 target,
6731 DialectType::BigQuery
6732 | DialectType::Hive
6733 | DialectType::Spark
6734 | DialectType::Databricks
6735 ) && matches!(
6736 source,
6737 DialectType::DuckDB
6738 | DialectType::PostgreSQL
6739 | DialectType::Presto
6740 | DialectType::Trino
6741 | DialectType::Redshift
6742 | DialectType::ClickHouse
6743 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6744 {
6745 Action::ArrayIndexConvert
6746 }
6747 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6748 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6749 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6750 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6751 Expression::WindowFunction(ref wf) => {
6752 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6753 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6754 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6755 if matches!(target, DialectType::BigQuery)
6756 && !is_row_number
6757 && !wf.over.order_by.is_empty()
6758 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6759 {
6760 Action::BigQueryNullsOrdering
6761 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6762 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6763 } else {
6764 let source_nulls_last = matches!(source, DialectType::DuckDB);
6765 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6766 matches!(
6767 f.kind,
6768 crate::expressions::WindowFrameKind::Range
6769 | crate::expressions::WindowFrameKind::Groups
6770 )
6771 });
6772 if source_nulls_last
6773 && matches!(target, DialectType::MySQL)
6774 && !wf.over.order_by.is_empty()
6775 && wf.over.order_by.iter().any(|o| !o.desc)
6776 && !has_range_frame
6777 {
6778 Action::MysqlNullsLastRewrite
6779 } else {
6780 match &wf.this {
6781 Expression::FirstValue(ref vf)
6782 | Expression::LastValue(ref vf)
6783 if vf.ignore_nulls == Some(false) =>
6784 {
6785 // RESPECT NULLS
6786 match target {
6787 DialectType::SQLite => Action::RespectNullsConvert,
6788 _ => Action::None,
6789 }
6790 }
6791 _ => Action::None,
6792 }
6793 }
6794 }
6795 }
6796 // CREATE TABLE a LIKE b -> dialect-specific transformations
6797 Expression::CreateTable(ref ct)
6798 if ct.columns.is_empty()
6799 && ct.constraints.iter().any(|c| {
6800 matches!(c, crate::expressions::TableConstraint::Like { .. })
6801 })
6802 && matches!(
6803 target,
6804 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6805 ) =>
6806 {
6807 Action::CreateTableLikeToCtas
6808 }
6809 Expression::CreateTable(ref ct)
6810 if ct.columns.is_empty()
6811 && ct.constraints.iter().any(|c| {
6812 matches!(c, crate::expressions::TableConstraint::Like { .. })
6813 })
6814 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6815 {
6816 Action::CreateTableLikeToSelectInto
6817 }
6818 Expression::CreateTable(ref ct)
6819 if ct.columns.is_empty()
6820 && ct.constraints.iter().any(|c| {
6821 matches!(c, crate::expressions::TableConstraint::Like { .. })
6822 })
6823 && matches!(target, DialectType::ClickHouse) =>
6824 {
6825 Action::CreateTableLikeToAs
6826 }
6827 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6828 Expression::CreateTable(ref ct)
6829 if matches!(target, DialectType::DuckDB)
6830 && matches!(
6831 source,
6832 DialectType::DuckDB
6833 | DialectType::Spark
6834 | DialectType::Databricks
6835 | DialectType::Hive
6836 ) =>
6837 {
6838 let has_comment = ct.columns.iter().any(|c| {
6839 c.comment.is_some()
6840 || c.constraints.iter().any(|con| {
6841 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6842 })
6843 });
6844 let has_props = !ct.properties.is_empty();
6845 if has_comment || has_props {
6846 Action::CreateTableStripComment
6847 } else {
6848 Action::None
6849 }
6850 }
6851 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6852 Expression::Array(_)
6853 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6854 {
6855 Action::ArrayConcatBracketConvert
6856 }
6857 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6858 Expression::ArrayFunc(ref arr)
6859 if arr.bracket_notation
6860 && matches!(source, DialectType::BigQuery)
6861 && matches!(target, DialectType::Redshift) =>
6862 {
6863 Action::ArrayConcatBracketConvert
6864 }
6865 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6866 Expression::BitwiseOrAgg(ref f)
6867 | Expression::BitwiseAndAgg(ref f)
6868 | Expression::BitwiseXorAgg(ref f) => {
6869 if matches!(target, DialectType::DuckDB) {
6870 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
6871 if let Expression::Cast(ref c) = f.this {
6872 match &c.to {
6873 DataType::Float { .. }
6874 | DataType::Double { .. }
6875 | DataType::Decimal { .. } => Action::BitAggFloatCast,
6876 DataType::Custom { ref name }
6877 if name.eq_ignore_ascii_case("REAL") =>
6878 {
6879 Action::BitAggFloatCast
6880 }
6881 _ => Action::None,
6882 }
6883 } else {
6884 Action::None
6885 }
6886 } else if matches!(target, DialectType::Snowflake) {
6887 Action::BitAggSnowflakeRename
6888 } else {
6889 Action::None
6890 }
6891 }
6892 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
6893 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
6894 Action::FilterToIff
6895 }
6896 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6897 Expression::Avg(ref f)
6898 | Expression::Sum(ref f)
6899 | Expression::Min(ref f)
6900 | Expression::Max(ref f)
6901 | Expression::CountIf(ref f)
6902 | Expression::Stddev(ref f)
6903 | Expression::StddevPop(ref f)
6904 | Expression::StddevSamp(ref f)
6905 | Expression::Variance(ref f)
6906 | Expression::VarPop(ref f)
6907 | Expression::VarSamp(ref f)
6908 | Expression::Median(ref f)
6909 | Expression::Mode(ref f)
6910 | Expression::First(ref f)
6911 | Expression::Last(ref f)
6912 | Expression::ApproxDistinct(ref f)
6913 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6914 {
6915 Action::AggFilterToIff
6916 }
6917 Expression::Count(ref c)
6918 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
6919 {
6920 Action::AggFilterToIff
6921 }
6922 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
6923 Expression::Count(ref c)
6924 if c.distinct
6925 && matches!(&c.this, Some(Expression::Tuple(_)))
6926 && matches!(
6927 target,
6928 DialectType::Presto
6929 | DialectType::Trino
6930 | DialectType::DuckDB
6931 | DialectType::PostgreSQL
6932 ) =>
6933 {
6934 Action::CountDistinctMultiArg
6935 }
6936 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
6937 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
6938 Action::JsonToGetPath
6939 }
6940 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
6941 Expression::Struct(_)
6942 if matches!(
6943 target,
6944 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6945 ) && matches!(source, DialectType::DuckDB) =>
6946 {
6947 Action::StructToRow
6948 }
6949 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
6950 Expression::MapFunc(ref m)
6951 if m.curly_brace_syntax
6952 && matches!(
6953 target,
6954 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
6955 )
6956 && matches!(source, DialectType::DuckDB) =>
6957 {
6958 Action::StructToRow
6959 }
6960 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
6961 Expression::ApproxCountDistinct(_)
6962 if matches!(
6963 target,
6964 DialectType::Presto | DialectType::Trino | DialectType::Athena
6965 ) =>
6966 {
6967 Action::ApproxCountDistinctToApproxDistinct
6968 }
6969 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
6970 Expression::ArrayContains(_)
6971 if matches!(
6972 target,
6973 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
6974 ) =>
6975 {
6976 Action::ArrayContainsConvert
6977 }
6978 // StrPosition with position -> complex expansion for Presto/DuckDB
6979 // STRPOS doesn't support a position arg in these dialects
6980 Expression::StrPosition(ref sp)
6981 if sp.position.is_some()
6982 && matches!(
6983 target,
6984 DialectType::Presto
6985 | DialectType::Trino
6986 | DialectType::Athena
6987 | DialectType::DuckDB
6988 ) =>
6989 {
6990 Action::StrPositionExpand
6991 }
6992 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
6993 Expression::First(ref f)
6994 if f.ignore_nulls == Some(true)
6995 && matches!(target, DialectType::DuckDB) =>
6996 {
6997 Action::FirstToAnyValue
6998 }
6999 // BEGIN -> START TRANSACTION for Presto/Trino
7000 Expression::Command(ref cmd)
7001 if cmd.this.eq_ignore_ascii_case("BEGIN")
7002 && matches!(
7003 target,
7004 DialectType::Presto | DialectType::Trino | DialectType::Athena
7005 ) =>
7006 {
7007 // Handled inline below
7008 Action::None // We'll handle it directly
7009 }
7010 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7011 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7012 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7013 Expression::Concat(ref _op)
7014 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7015 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7016 {
7017 Action::PipeConcatToConcat
7018 }
7019 _ => Action::None,
7020 }
7021 };
7022
7023 match action {
7024 Action::None => {
7025 // Handle inline transforms that don't need a dedicated action
7026
7027 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7028 if let Expression::Between(ref b) = e {
7029 if let Some(sym) = b.symmetric {
7030 let keeps_symmetric =
7031 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7032 if !keeps_symmetric {
7033 if sym {
7034 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7035 let b = if let Expression::Between(b) = e {
7036 *b
7037 } else {
7038 unreachable!()
7039 };
7040 let between1 = Expression::Between(Box::new(
7041 crate::expressions::Between {
7042 this: b.this.clone(),
7043 low: b.low.clone(),
7044 high: b.high.clone(),
7045 not: b.not,
7046 symmetric: None,
7047 },
7048 ));
7049 let between2 = Expression::Between(Box::new(
7050 crate::expressions::Between {
7051 this: b.this,
7052 low: b.high,
7053 high: b.low,
7054 not: b.not,
7055 symmetric: None,
7056 },
7057 ));
7058 return Ok(Expression::Paren(Box::new(
7059 crate::expressions::Paren {
7060 this: Expression::Or(Box::new(
7061 crate::expressions::BinaryOp::new(
7062 between1, between2,
7063 ),
7064 )),
7065 trailing_comments: vec![],
7066 },
7067 )));
7068 } else {
7069 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7070 let b = if let Expression::Between(b) = e {
7071 *b
7072 } else {
7073 unreachable!()
7074 };
7075 return Ok(Expression::Between(Box::new(
7076 crate::expressions::Between {
7077 this: b.this,
7078 low: b.low,
7079 high: b.high,
7080 not: b.not,
7081 symmetric: None,
7082 },
7083 )));
7084 }
7085 }
7086 }
7087 }
7088
7089 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7090 if let Expression::ILike(ref _like) = e {
7091 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7092 let like = if let Expression::ILike(l) = e {
7093 *l
7094 } else {
7095 unreachable!()
7096 };
7097 let lower_left = Expression::Function(Box::new(Function::new(
7098 "LOWER".to_string(),
7099 vec![like.left],
7100 )));
7101 let lower_right = Expression::Function(Box::new(Function::new(
7102 "LOWER".to_string(),
7103 vec![like.right],
7104 )));
7105 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7106 left: lower_left,
7107 right: lower_right,
7108 escape: like.escape,
7109 quantifier: like.quantifier,
7110 })));
7111 }
7112 }
7113
7114 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7115 if let Expression::MethodCall(ref mc) = e {
7116 if matches!(source, DialectType::Oracle)
7117 && mc.method.name.eq_ignore_ascii_case("VALUE")
7118 && mc.args.is_empty()
7119 {
7120 let is_dbms_random = match &mc.this {
7121 Expression::Identifier(id) => {
7122 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7123 }
7124 Expression::Column(col) => {
7125 col.table.is_none()
7126 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7127 }
7128 _ => false,
7129 };
7130 if is_dbms_random {
7131 let func_name = match target {
7132 DialectType::PostgreSQL
7133 | DialectType::Redshift
7134 | DialectType::DuckDB
7135 | DialectType::SQLite => "RANDOM",
7136 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7137 _ => "RAND",
7138 };
7139 return Ok(Expression::Function(Box::new(Function::new(
7140 func_name.to_string(),
7141 vec![],
7142 ))));
7143 }
7144 }
7145 }
7146 // TRIM without explicit position -> add BOTH for ClickHouse
7147 if let Expression::Trim(ref trim) = e {
7148 if matches!(target, DialectType::ClickHouse)
7149 && trim.sql_standard_syntax
7150 && trim.characters.is_some()
7151 && !trim.position_explicit
7152 {
7153 let mut new_trim = (**trim).clone();
7154 new_trim.position_explicit = true;
7155 return Ok(Expression::Trim(Box::new(new_trim)));
7156 }
7157 }
7158 // BEGIN -> START TRANSACTION for Presto/Trino
7159 if let Expression::Transaction(ref txn) = e {
7160 if matches!(
7161 target,
7162 DialectType::Presto | DialectType::Trino | DialectType::Athena
7163 ) {
7164 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7165 let mut txn = txn.clone();
7166 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7167 "START".to_string(),
7168 ))));
7169 return Ok(Expression::Transaction(Box::new(*txn)));
7170 }
7171 }
7172 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7173 if matches!(
7174 target,
7175 DialectType::Presto | DialectType::Trino | DialectType::Athena
7176 ) {
7177 match &e {
7178 Expression::IsTrue(itf) if !itf.not => {
7179 // x IS TRUE -> x
7180 return Ok(itf.this.clone());
7181 }
7182 Expression::IsTrue(itf) if itf.not => {
7183 // x IS NOT TRUE -> NOT x
7184 return Ok(Expression::Not(Box::new(
7185 crate::expressions::UnaryOp {
7186 this: itf.this.clone(),
7187 },
7188 )));
7189 }
7190 Expression::IsFalse(itf) if !itf.not => {
7191 // x IS FALSE -> NOT x
7192 return Ok(Expression::Not(Box::new(
7193 crate::expressions::UnaryOp {
7194 this: itf.this.clone(),
7195 },
7196 )));
7197 }
7198 Expression::IsFalse(itf) if itf.not => {
7199 // x IS NOT FALSE -> NOT NOT x
7200 let not_x =
7201 Expression::Not(Box::new(crate::expressions::UnaryOp {
7202 this: itf.this.clone(),
7203 }));
7204 return Ok(Expression::Not(Box::new(
7205 crate::expressions::UnaryOp { this: not_x },
7206 )));
7207 }
7208 _ => {}
7209 }
7210 }
7211 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7212 if matches!(target, DialectType::Redshift) {
7213 if let Expression::IsFalse(ref itf) = e {
7214 if itf.not {
7215 return Ok(Expression::Not(Box::new(
7216 crate::expressions::UnaryOp {
7217 this: Expression::IsFalse(Box::new(
7218 crate::expressions::IsTrueFalse {
7219 this: itf.this.clone(),
7220 not: false,
7221 },
7222 )),
7223 },
7224 )));
7225 }
7226 }
7227 }
7228 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7229 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7230 if let Expression::Function(ref f) = e {
7231 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7232 && matches!(source, DialectType::Snowflake)
7233 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7234 {
7235 if f.args.len() == 3 {
7236 let mut args = f.args.clone();
7237 args.push(Expression::string("g"));
7238 return Ok(Expression::Function(Box::new(Function::new(
7239 "REGEXP_REPLACE".to_string(),
7240 args,
7241 ))));
7242 } else if f.args.len() == 4 {
7243 // 4th arg might be position, add 'g' as 5th
7244 let mut args = f.args.clone();
7245 args.push(Expression::string("g"));
7246 return Ok(Expression::Function(Box::new(Function::new(
7247 "REGEXP_REPLACE".to_string(),
7248 args,
7249 ))));
7250 }
7251 }
7252 }
7253 Ok(e)
7254 }
7255
7256 Action::GreatestLeastNull => {
7257 let f = if let Expression::Function(f) = e {
7258 *f
7259 } else {
7260 unreachable!("action only triggered for Function expressions")
7261 };
7262 let mut null_checks: Vec<Expression> = f
7263 .args
7264 .iter()
7265 .map(|a| {
7266 Expression::IsNull(Box::new(IsNull {
7267 this: a.clone(),
7268 not: false,
7269 postfix_form: false,
7270 }))
7271 })
7272 .collect();
7273 let condition = if null_checks.len() == 1 {
7274 null_checks.remove(0)
7275 } else {
7276 let first = null_checks.remove(0);
7277 null_checks.into_iter().fold(first, |acc, check| {
7278 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7279 })
7280 };
7281 Ok(Expression::Case(Box::new(Case {
7282 operand: None,
7283 whens: vec![(condition, Expression::Null(Null))],
7284 else_: Some(Expression::Function(Box::new(Function::new(
7285 f.name, f.args,
7286 )))),
7287 comments: Vec::new(),
7288 })))
7289 }
7290
7291 Action::ArrayGenerateRange => {
7292 let f = if let Expression::Function(f) = e {
7293 *f
7294 } else {
7295 unreachable!("action only triggered for Function expressions")
7296 };
7297 let start = f.args[0].clone();
7298 let end = f.args[1].clone();
7299 let step = f.args.get(2).cloned();
7300
7301 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7302 end.clone(),
7303 Expression::number(1),
7304 )));
7305
7306 match target {
7307 DialectType::PostgreSQL | DialectType::Redshift => {
7308 let mut args = vec![start, end_minus_1];
7309 if let Some(s) = step {
7310 args.push(s);
7311 }
7312 Ok(Expression::Function(Box::new(Function::new(
7313 "GENERATE_SERIES".to_string(),
7314 args,
7315 ))))
7316 }
7317 DialectType::Presto | DialectType::Trino => {
7318 let mut args = vec![start, end_minus_1];
7319 if let Some(s) = step {
7320 args.push(s);
7321 }
7322 Ok(Expression::Function(Box::new(Function::new(
7323 "SEQUENCE".to_string(),
7324 args,
7325 ))))
7326 }
7327 DialectType::BigQuery => {
7328 let mut args = vec![start, end_minus_1];
7329 if let Some(s) = step {
7330 args.push(s);
7331 }
7332 Ok(Expression::Function(Box::new(Function::new(
7333 "GENERATE_ARRAY".to_string(),
7334 args,
7335 ))))
7336 }
7337 DialectType::Snowflake => {
7338 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7339 Expression::Paren(Box::new(Paren {
7340 this: end_minus_1,
7341 trailing_comments: vec![],
7342 })),
7343 Expression::number(1),
7344 )));
7345 let mut args = vec![start, normalized_end];
7346 if let Some(s) = step {
7347 args.push(s);
7348 }
7349 Ok(Expression::Function(Box::new(Function::new(
7350 "ARRAY_GENERATE_RANGE".to_string(),
7351 args,
7352 ))))
7353 }
7354 _ => Ok(Expression::Function(Box::new(Function::new(
7355 f.name, f.args,
7356 )))),
7357 }
7358 }
7359
7360 Action::Div0TypedDivision => {
7361 let if_func = if let Expression::IfFunc(f) = e {
7362 *f
7363 } else {
7364 unreachable!("action only triggered for IfFunc expressions")
7365 };
7366 if let Some(Expression::Div(div)) = if_func.false_value {
7367 let cast_type = if matches!(target, DialectType::SQLite) {
7368 DataType::Float {
7369 precision: None,
7370 scale: None,
7371 real_spelling: true,
7372 }
7373 } else {
7374 DataType::Double {
7375 precision: None,
7376 scale: None,
7377 }
7378 };
7379 let casted_left = Expression::Cast(Box::new(Cast {
7380 this: div.left,
7381 to: cast_type,
7382 trailing_comments: vec![],
7383 double_colon_syntax: false,
7384 format: None,
7385 default: None,
7386 }));
7387 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7388 condition: if_func.condition,
7389 true_value: if_func.true_value,
7390 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7391 casted_left,
7392 div.right,
7393 )))),
7394 original_name: if_func.original_name,
7395 })))
7396 } else {
7397 // Not actually a Div, reconstruct
7398 Ok(Expression::IfFunc(Box::new(if_func)))
7399 }
7400 }
7401
7402 Action::ArrayAggCollectList => {
7403 let agg = if let Expression::ArrayAgg(a) = e {
7404 *a
7405 } else {
7406 unreachable!("action only triggered for ArrayAgg expressions")
7407 };
7408 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7409 name: Some("COLLECT_LIST".to_string()),
7410 ..agg
7411 })))
7412 }
7413
7414 Action::ArrayAggWithinGroupFilter => {
7415 let wg = if let Expression::WithinGroup(w) = e {
7416 *w
7417 } else {
7418 unreachable!("action only triggered for WithinGroup expressions")
7419 };
7420 if let Expression::ArrayAgg(inner_agg) = wg.this {
7421 let col = inner_agg.this.clone();
7422 let filter = Expression::IsNull(Box::new(IsNull {
7423 this: col,
7424 not: true,
7425 postfix_form: false,
7426 }));
7427 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7428 let order_by = if matches!(target, DialectType::DuckDB) {
7429 wg.order_by
7430 .into_iter()
7431 .map(|mut o| {
7432 if o.desc && o.nulls_first.is_none() {
7433 o.nulls_first = Some(true);
7434 }
7435 o
7436 })
7437 .collect()
7438 } else {
7439 wg.order_by
7440 };
7441 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7442 this: inner_agg.this,
7443 distinct: inner_agg.distinct,
7444 filter: Some(filter),
7445 order_by,
7446 name: inner_agg.name,
7447 ignore_nulls: inner_agg.ignore_nulls,
7448 having_max: inner_agg.having_max,
7449 limit: inner_agg.limit,
7450 })))
7451 } else {
7452 Ok(Expression::WithinGroup(Box::new(wg)))
7453 }
7454 }
7455
7456 Action::ArrayAggFilter => {
7457 let agg = if let Expression::ArrayAgg(a) = e {
7458 *a
7459 } else {
7460 unreachable!("action only triggered for ArrayAgg expressions")
7461 };
7462 let col = agg.this.clone();
7463 let filter = Expression::IsNull(Box::new(IsNull {
7464 this: col,
7465 not: true,
7466 postfix_form: false,
7467 }));
7468 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7469 filter: Some(filter),
7470 ..agg
7471 })))
7472 }
7473
7474 Action::ArrayAggNullFilter => {
7475 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7476 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7477 let agg = if let Expression::ArrayAgg(a) = e {
7478 *a
7479 } else {
7480 unreachable!("action only triggered for ArrayAgg expressions")
7481 };
7482 let col = agg.this.clone();
7483 let not_null = Expression::IsNull(Box::new(IsNull {
7484 this: col,
7485 not: true,
7486 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7487 }));
7488 let new_filter = if let Some(existing_filter) = agg.filter {
7489 // AND the NOT IS NULL with existing filter
7490 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7491 existing_filter,
7492 not_null,
7493 )))
7494 } else {
7495 not_null
7496 };
7497 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7498 filter: Some(new_filter),
7499 ..agg
7500 })))
7501 }
7502
7503 Action::BigQueryArraySelectAsStructToSnowflake => {
7504 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7505 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7506 if let Expression::Function(mut f) = e {
7507 let is_match = f.args.len() == 1
7508 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7509 if is_match {
7510 let inner_select = match f.args.remove(0) {
7511 Expression::Select(s) => *s,
7512 _ => unreachable!(
7513 "argument already verified to be a Select expression"
7514 ),
7515 };
7516 // Build OBJECT_CONSTRUCT args from SELECT expressions
7517 let mut oc_args = Vec::new();
7518 for expr in &inner_select.expressions {
7519 match expr {
7520 Expression::Alias(a) => {
7521 let key = Expression::Literal(Literal::String(
7522 a.alias.name.clone(),
7523 ));
7524 let value = a.this.clone();
7525 oc_args.push(key);
7526 oc_args.push(value);
7527 }
7528 Expression::Column(c) => {
7529 let key = Expression::Literal(Literal::String(
7530 c.name.name.clone(),
7531 ));
7532 oc_args.push(key);
7533 oc_args.push(expr.clone());
7534 }
7535 _ => {
7536 oc_args.push(expr.clone());
7537 }
7538 }
7539 }
7540 let object_construct = Expression::Function(Box::new(Function::new(
7541 "OBJECT_CONSTRUCT".to_string(),
7542 oc_args,
7543 )));
7544 let array_agg = Expression::Function(Box::new(Function::new(
7545 "ARRAY_AGG".to_string(),
7546 vec![object_construct],
7547 )));
7548 let mut new_select = crate::expressions::Select::new();
7549 new_select.expressions = vec![array_agg];
7550 new_select.from = inner_select.from.clone();
7551 new_select.where_clause = inner_select.where_clause.clone();
7552 new_select.group_by = inner_select.group_by.clone();
7553 new_select.having = inner_select.having.clone();
7554 new_select.joins = inner_select.joins.clone();
7555 Ok(Expression::Subquery(Box::new(
7556 crate::expressions::Subquery {
7557 this: Expression::Select(Box::new(new_select)),
7558 alias: None,
7559 column_aliases: Vec::new(),
7560 order_by: None,
7561 limit: None,
7562 offset: None,
7563 distribute_by: None,
7564 sort_by: None,
7565 cluster_by: None,
7566 lateral: false,
7567 modifiers_inside: false,
7568 trailing_comments: Vec::new(),
7569 },
7570 )))
7571 } else {
7572 Ok(Expression::Function(f))
7573 }
7574 } else {
7575 Ok(e)
7576 }
7577 }
7578
7579 Action::BigQueryPercentileContToDuckDB => {
7580 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7581 if let Expression::AggregateFunction(mut af) = e {
7582 af.name = "QUANTILE_CONT".to_string();
7583 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7584 // Keep only first 2 args
7585 if af.args.len() > 2 {
7586 af.args.truncate(2);
7587 }
7588 Ok(Expression::AggregateFunction(af))
7589 } else {
7590 Ok(e)
7591 }
7592 }
7593
7594 Action::ArrayAggIgnoreNullsDuckDB => {
7595 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7596 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7597 let mut agg = if let Expression::ArrayAgg(a) = e {
7598 *a
7599 } else {
7600 unreachable!("action only triggered for ArrayAgg expressions")
7601 };
7602 agg.ignore_nulls = None; // Strip IGNORE NULLS
7603 if !agg.order_by.is_empty() {
7604 agg.order_by[0].nulls_first = Some(true);
7605 }
7606 Ok(Expression::ArrayAgg(Box::new(agg)))
7607 }
7608
7609 Action::CountDistinctMultiArg => {
7610 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7611 if let Expression::Count(c) = e {
7612 if let Some(Expression::Tuple(t)) = c.this {
7613 let args = t.expressions;
7614 // Build CASE expression:
7615 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7616 let mut whens = Vec::new();
7617 for arg in &args {
7618 whens.push((
7619 Expression::IsNull(Box::new(IsNull {
7620 this: arg.clone(),
7621 not: false,
7622 postfix_form: false,
7623 })),
7624 Expression::Null(crate::expressions::Null),
7625 ));
7626 }
7627 // Build the tuple for ELSE
7628 let tuple_expr =
7629 Expression::Tuple(Box::new(crate::expressions::Tuple {
7630 expressions: args,
7631 }));
7632 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7633 operand: None,
7634 whens,
7635 else_: Some(tuple_expr),
7636 comments: Vec::new(),
7637 }));
7638 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7639 this: Some(case_expr),
7640 star: false,
7641 distinct: true,
7642 filter: c.filter,
7643 ignore_nulls: c.ignore_nulls,
7644 original_name: c.original_name,
7645 })))
7646 } else {
7647 Ok(Expression::Count(c))
7648 }
7649 } else {
7650 Ok(e)
7651 }
7652 }
7653
7654 Action::CastTimestampToDatetime => {
7655 let c = if let Expression::Cast(c) = e {
7656 *c
7657 } else {
7658 unreachable!("action only triggered for Cast expressions")
7659 };
7660 Ok(Expression::Cast(Box::new(Cast {
7661 to: DataType::Custom {
7662 name: "DATETIME".to_string(),
7663 },
7664 ..c
7665 })))
7666 }
7667
7668 Action::CastTimestampStripTz => {
7669 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7670 let c = if let Expression::Cast(c) = e {
7671 *c
7672 } else {
7673 unreachable!("action only triggered for Cast expressions")
7674 };
7675 Ok(Expression::Cast(Box::new(Cast {
7676 to: DataType::Timestamp {
7677 precision: None,
7678 timezone: false,
7679 },
7680 ..c
7681 })))
7682 }
7683
7684 Action::CastTimestamptzToFunc => {
7685 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7686 let c = if let Expression::Cast(c) = e {
7687 *c
7688 } else {
7689 unreachable!("action only triggered for Cast expressions")
7690 };
7691 Ok(Expression::Function(Box::new(Function::new(
7692 "TIMESTAMP".to_string(),
7693 vec![c.this],
7694 ))))
7695 }
7696
7697 Action::ToDateToCast => {
7698 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7699 if let Expression::Function(f) = e {
7700 let arg = f.args.into_iter().next().unwrap();
7701 Ok(Expression::Cast(Box::new(Cast {
7702 this: arg,
7703 to: DataType::Date,
7704 double_colon_syntax: false,
7705 trailing_comments: vec![],
7706 format: None,
7707 default: None,
7708 })))
7709 } else {
7710 Ok(e)
7711 }
7712 }
7713 Action::DateTruncWrapCast => {
7714 // Handle both Expression::DateTrunc/TimestampTrunc and
7715 // Expression::Function("DATE_TRUNC", [unit, expr])
7716 match e {
7717 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7718 let input_type = match &d.this {
7719 Expression::Cast(c) => Some(c.to.clone()),
7720 _ => None,
7721 };
7722 if let Some(cast_type) = input_type {
7723 let is_time = matches!(cast_type, DataType::Time { .. });
7724 if is_time {
7725 let date_expr = Expression::Cast(Box::new(Cast {
7726 this: Expression::Literal(
7727 crate::expressions::Literal::String(
7728 "1970-01-01".to_string(),
7729 ),
7730 ),
7731 to: DataType::Date,
7732 double_colon_syntax: false,
7733 trailing_comments: vec![],
7734 format: None,
7735 default: None,
7736 }));
7737 let add_expr =
7738 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7739 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7740 this: add_expr,
7741 unit: d.unit,
7742 }));
7743 Ok(Expression::Cast(Box::new(Cast {
7744 this: inner,
7745 to: cast_type,
7746 double_colon_syntax: false,
7747 trailing_comments: vec![],
7748 format: None,
7749 default: None,
7750 })))
7751 } else {
7752 let inner = Expression::DateTrunc(Box::new(*d));
7753 Ok(Expression::Cast(Box::new(Cast {
7754 this: inner,
7755 to: cast_type,
7756 double_colon_syntax: false,
7757 trailing_comments: vec![],
7758 format: None,
7759 default: None,
7760 })))
7761 }
7762 } else {
7763 Ok(Expression::DateTrunc(d))
7764 }
7765 }
7766 Expression::Function(f) if f.args.len() == 2 => {
7767 // Function-based DATE_TRUNC(unit, expr)
7768 let input_type = match &f.args[1] {
7769 Expression::Cast(c) => Some(c.to.clone()),
7770 _ => None,
7771 };
7772 if let Some(cast_type) = input_type {
7773 let is_time = matches!(cast_type, DataType::Time { .. });
7774 if is_time {
7775 let date_expr = Expression::Cast(Box::new(Cast {
7776 this: Expression::Literal(
7777 crate::expressions::Literal::String(
7778 "1970-01-01".to_string(),
7779 ),
7780 ),
7781 to: DataType::Date,
7782 double_colon_syntax: false,
7783 trailing_comments: vec![],
7784 format: None,
7785 default: None,
7786 }));
7787 let mut args = f.args;
7788 let unit_arg = args.remove(0);
7789 let time_expr = args.remove(0);
7790 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7791 date_expr, time_expr,
7792 )));
7793 let inner = Expression::Function(Box::new(Function::new(
7794 "DATE_TRUNC".to_string(),
7795 vec![unit_arg, add_expr],
7796 )));
7797 Ok(Expression::Cast(Box::new(Cast {
7798 this: inner,
7799 to: cast_type,
7800 double_colon_syntax: false,
7801 trailing_comments: vec![],
7802 format: None,
7803 default: None,
7804 })))
7805 } else {
7806 // Wrap the function in CAST
7807 Ok(Expression::Cast(Box::new(Cast {
7808 this: Expression::Function(f),
7809 to: cast_type,
7810 double_colon_syntax: false,
7811 trailing_comments: vec![],
7812 format: None,
7813 default: None,
7814 })))
7815 }
7816 } else {
7817 Ok(Expression::Function(f))
7818 }
7819 }
7820 other => Ok(other),
7821 }
7822 }
7823
7824 Action::RegexpReplaceSnowflakeToDuckDB => {
7825 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7826 if let Expression::Function(f) = e {
7827 let mut args = f.args;
7828 let subject = args.remove(0);
7829 let pattern = args.remove(0);
7830 let replacement = args.remove(0);
7831 Ok(Expression::Function(Box::new(Function::new(
7832 "REGEXP_REPLACE".to_string(),
7833 vec![
7834 subject,
7835 pattern,
7836 replacement,
7837 Expression::Literal(crate::expressions::Literal::String(
7838 "g".to_string(),
7839 )),
7840 ],
7841 ))))
7842 } else {
7843 Ok(e)
7844 }
7845 }
7846
7847 Action::SetToVariable => {
7848 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7849 if let Expression::SetStatement(mut s) = e {
7850 for item in &mut s.items {
7851 if item.kind.is_none() {
7852 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
7853 let already_variable = match &item.name {
7854 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
7855 _ => false,
7856 };
7857 if already_variable {
7858 // Extract the actual name and set kind
7859 if let Expression::Identifier(ref mut id) = item.name {
7860 let actual_name = id.name["VARIABLE ".len()..].to_string();
7861 id.name = actual_name;
7862 }
7863 }
7864 item.kind = Some("VARIABLE".to_string());
7865 }
7866 }
7867 Ok(Expression::SetStatement(s))
7868 } else {
7869 Ok(e)
7870 }
7871 }
7872
7873 Action::ConvertTimezoneToExpr => {
7874 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
7875 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
7876 if let Expression::Function(f) = e {
7877 if f.args.len() == 2 {
7878 let mut args = f.args;
7879 let target_tz = args.remove(0);
7880 let timestamp = args.remove(0);
7881 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7882 source_tz: None,
7883 target_tz: Some(Box::new(target_tz)),
7884 timestamp: Some(Box::new(timestamp)),
7885 options: vec![],
7886 })))
7887 } else if f.args.len() == 3 {
7888 let mut args = f.args;
7889 let source_tz = args.remove(0);
7890 let target_tz = args.remove(0);
7891 let timestamp = args.remove(0);
7892 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
7893 source_tz: Some(Box::new(source_tz)),
7894 target_tz: Some(Box::new(target_tz)),
7895 timestamp: Some(Box::new(timestamp)),
7896 options: vec![],
7897 })))
7898 } else {
7899 Ok(Expression::Function(f))
7900 }
7901 } else {
7902 Ok(e)
7903 }
7904 }
7905
7906 Action::BigQueryCastType => {
7907 // Convert BigQuery types to standard SQL types
7908 if let Expression::DataType(dt) = e {
7909 match dt {
7910 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
7911 Ok(Expression::DataType(DataType::BigInt { length: None }))
7912 }
7913 DataType::Custom { ref name }
7914 if name.eq_ignore_ascii_case("FLOAT64") =>
7915 {
7916 Ok(Expression::DataType(DataType::Double {
7917 precision: None,
7918 scale: None,
7919 }))
7920 }
7921 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
7922 Ok(Expression::DataType(DataType::Boolean))
7923 }
7924 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
7925 Ok(Expression::DataType(DataType::VarBinary { length: None }))
7926 }
7927 DataType::Custom { ref name }
7928 if name.eq_ignore_ascii_case("NUMERIC") =>
7929 {
7930 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
7931 // default precision (18, 3) being added to bare DECIMAL
7932 if matches!(target, DialectType::DuckDB) {
7933 Ok(Expression::DataType(DataType::Custom {
7934 name: "DECIMAL".to_string(),
7935 }))
7936 } else {
7937 Ok(Expression::DataType(DataType::Decimal {
7938 precision: None,
7939 scale: None,
7940 }))
7941 }
7942 }
7943 DataType::Custom { ref name }
7944 if name.eq_ignore_ascii_case("STRING") =>
7945 {
7946 Ok(Expression::DataType(DataType::String { length: None }))
7947 }
7948 DataType::Custom { ref name }
7949 if name.eq_ignore_ascii_case("DATETIME") =>
7950 {
7951 Ok(Expression::DataType(DataType::Timestamp {
7952 precision: None,
7953 timezone: false,
7954 }))
7955 }
7956 _ => Ok(Expression::DataType(dt)),
7957 }
7958 } else {
7959 Ok(e)
7960 }
7961 }
7962
7963 Action::BigQuerySafeDivide => {
7964 // Convert SafeDivide expression to IF/CASE form for most targets
7965 if let Expression::SafeDivide(sd) = e {
7966 let x = *sd.this;
7967 let y = *sd.expression;
7968 // Wrap x and y in parens if they're complex expressions
7969 let y_ref = match &y {
7970 Expression::Column(_)
7971 | Expression::Literal(_)
7972 | Expression::Identifier(_) => y.clone(),
7973 _ => Expression::Paren(Box::new(Paren {
7974 this: y.clone(),
7975 trailing_comments: vec![],
7976 })),
7977 };
7978 let x_ref = match &x {
7979 Expression::Column(_)
7980 | Expression::Literal(_)
7981 | Expression::Identifier(_) => x.clone(),
7982 _ => Expression::Paren(Box::new(Paren {
7983 this: x.clone(),
7984 trailing_comments: vec![],
7985 })),
7986 };
7987 let condition = Expression::Neq(Box::new(BinaryOp::new(
7988 y_ref.clone(),
7989 Expression::number(0),
7990 )));
7991 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
7992
7993 if matches!(target, DialectType::Presto | DialectType::Trino) {
7994 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
7995 let cast_x = Expression::Cast(Box::new(Cast {
7996 this: match &x {
7997 Expression::Column(_)
7998 | Expression::Literal(_)
7999 | Expression::Identifier(_) => x,
8000 _ => Expression::Paren(Box::new(Paren {
8001 this: x,
8002 trailing_comments: vec![],
8003 })),
8004 },
8005 to: DataType::Double {
8006 precision: None,
8007 scale: None,
8008 },
8009 trailing_comments: vec![],
8010 double_colon_syntax: false,
8011 format: None,
8012 default: None,
8013 }));
8014 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8015 cast_x,
8016 match &y {
8017 Expression::Column(_)
8018 | Expression::Literal(_)
8019 | Expression::Identifier(_) => y,
8020 _ => Expression::Paren(Box::new(Paren {
8021 this: y,
8022 trailing_comments: vec![],
8023 })),
8024 },
8025 )));
8026 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8027 condition,
8028 true_value: cast_div,
8029 false_value: Some(Expression::Null(Null)),
8030 original_name: None,
8031 })))
8032 } else if matches!(target, DialectType::PostgreSQL) {
8033 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8034 let cast_x = Expression::Cast(Box::new(Cast {
8035 this: match &x {
8036 Expression::Column(_)
8037 | Expression::Literal(_)
8038 | Expression::Identifier(_) => x,
8039 _ => Expression::Paren(Box::new(Paren {
8040 this: x,
8041 trailing_comments: vec![],
8042 })),
8043 },
8044 to: DataType::Custom {
8045 name: "DOUBLE PRECISION".to_string(),
8046 },
8047 trailing_comments: vec![],
8048 double_colon_syntax: false,
8049 format: None,
8050 default: None,
8051 }));
8052 let y_paren = match &y {
8053 Expression::Column(_)
8054 | Expression::Literal(_)
8055 | Expression::Identifier(_) => y,
8056 _ => Expression::Paren(Box::new(Paren {
8057 this: y,
8058 trailing_comments: vec![],
8059 })),
8060 };
8061 let cast_div =
8062 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8063 Ok(Expression::Case(Box::new(Case {
8064 operand: None,
8065 whens: vec![(condition, cast_div)],
8066 else_: Some(Expression::Null(Null)),
8067 comments: Vec::new(),
8068 })))
8069 } else if matches!(target, DialectType::DuckDB) {
8070 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8071 Ok(Expression::Case(Box::new(Case {
8072 operand: None,
8073 whens: vec![(condition, div_expr)],
8074 else_: Some(Expression::Null(Null)),
8075 comments: Vec::new(),
8076 })))
8077 } else if matches!(target, DialectType::Snowflake) {
8078 // Snowflake: IFF(y <> 0, x / y, NULL)
8079 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8080 condition,
8081 true_value: div_expr,
8082 false_value: Some(Expression::Null(Null)),
8083 original_name: Some("IFF".to_string()),
8084 })))
8085 } else {
8086 // All others: IF(y <> 0, x / y, NULL)
8087 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8088 condition,
8089 true_value: div_expr,
8090 false_value: Some(Expression::Null(Null)),
8091 original_name: None,
8092 })))
8093 }
8094 } else {
8095 Ok(e)
8096 }
8097 }
8098
8099 Action::BigQueryLastDayStripUnit => {
8100 if let Expression::LastDay(mut ld) = e {
8101 ld.unit = None; // Strip the unit (MONTH is default)
8102 match target {
8103 DialectType::PostgreSQL => {
8104 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8105 let date_trunc = Expression::Function(Box::new(Function::new(
8106 "DATE_TRUNC".to_string(),
8107 vec![
8108 Expression::Literal(crate::expressions::Literal::String(
8109 "MONTH".to_string(),
8110 )),
8111 ld.this.clone(),
8112 ],
8113 )));
8114 let plus_month =
8115 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8116 date_trunc,
8117 Expression::Interval(Box::new(
8118 crate::expressions::Interval {
8119 this: Some(Expression::Literal(
8120 crate::expressions::Literal::String(
8121 "1 MONTH".to_string(),
8122 ),
8123 )),
8124 unit: None,
8125 },
8126 )),
8127 )));
8128 let minus_day =
8129 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8130 plus_month,
8131 Expression::Interval(Box::new(
8132 crate::expressions::Interval {
8133 this: Some(Expression::Literal(
8134 crate::expressions::Literal::String(
8135 "1 DAY".to_string(),
8136 ),
8137 )),
8138 unit: None,
8139 },
8140 )),
8141 )));
8142 Ok(Expression::Cast(Box::new(Cast {
8143 this: minus_day,
8144 to: DataType::Date,
8145 trailing_comments: vec![],
8146 double_colon_syntax: false,
8147 format: None,
8148 default: None,
8149 })))
8150 }
8151 DialectType::Presto => {
8152 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8153 Ok(Expression::Function(Box::new(Function::new(
8154 "LAST_DAY_OF_MONTH".to_string(),
8155 vec![ld.this],
8156 ))))
8157 }
8158 DialectType::ClickHouse => {
8159 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8160 // Need to wrap the DATE type in Nullable
8161 let nullable_date = match ld.this {
8162 Expression::Cast(mut c) => {
8163 c.to = DataType::Nullable {
8164 inner: Box::new(DataType::Date),
8165 };
8166 Expression::Cast(c)
8167 }
8168 other => other,
8169 };
8170 ld.this = nullable_date;
8171 Ok(Expression::LastDay(ld))
8172 }
8173 _ => Ok(Expression::LastDay(ld)),
8174 }
8175 } else {
8176 Ok(e)
8177 }
8178 }
8179
8180 Action::BigQueryCastFormat => {
8181 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8182 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8183 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8184 let (this, to, format_expr, is_safe) = match e {
8185 Expression::Cast(ref c) if c.format.is_some() => (
8186 c.this.clone(),
8187 c.to.clone(),
8188 c.format.as_ref().unwrap().as_ref().clone(),
8189 false,
8190 ),
8191 Expression::SafeCast(ref c) if c.format.is_some() => (
8192 c.this.clone(),
8193 c.to.clone(),
8194 c.format.as_ref().unwrap().as_ref().clone(),
8195 true,
8196 ),
8197 _ => return Ok(e),
8198 };
8199 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8200 if matches!(target, DialectType::BigQuery) {
8201 match &to {
8202 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8203 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8204 return Ok(e);
8205 }
8206 _ => {}
8207 }
8208 }
8209 // Extract timezone from format if AT TIME ZONE is present
8210 let (actual_format_expr, timezone) = match &format_expr {
8211 Expression::AtTimeZone(ref atz) => {
8212 (atz.this.clone(), Some(atz.zone.clone()))
8213 }
8214 _ => (format_expr.clone(), None),
8215 };
8216 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8217 match target {
8218 DialectType::BigQuery => {
8219 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8220 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8221 let func_name = match &to {
8222 DataType::Date => "PARSE_DATE",
8223 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8224 DataType::Time { .. } => "PARSE_TIMESTAMP",
8225 _ => "PARSE_TIMESTAMP",
8226 };
8227 let mut func_args = vec![strftime_fmt, this];
8228 if let Some(tz) = timezone {
8229 func_args.push(tz);
8230 }
8231 Ok(Expression::Function(Box::new(Function::new(
8232 func_name.to_string(),
8233 func_args,
8234 ))))
8235 }
8236 DialectType::DuckDB => {
8237 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8238 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8239 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8240 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8241 let parse_call = Expression::Function(Box::new(Function::new(
8242 parse_fn_name.to_string(),
8243 vec![this, duck_fmt],
8244 )));
8245 Ok(Expression::Cast(Box::new(Cast {
8246 this: parse_call,
8247 to,
8248 trailing_comments: vec![],
8249 double_colon_syntax: false,
8250 format: None,
8251 default: None,
8252 })))
8253 }
8254 _ => Ok(e),
8255 }
8256 }
8257
8258 Action::BigQueryFunctionNormalize => {
8259 Self::normalize_bigquery_function(e, source, target)
8260 }
8261
8262 Action::BigQueryToHexBare => {
8263 // Not used anymore - handled directly in normalize_bigquery_function
8264 Ok(e)
8265 }
8266
8267 Action::BigQueryToHexLower => {
8268 if let Expression::Lower(uf) = e {
8269 match uf.this {
8270 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8271 Expression::Function(f)
8272 if matches!(target, DialectType::BigQuery)
8273 && f.name == "TO_HEX" =>
8274 {
8275 Ok(Expression::Function(f))
8276 }
8277 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8278 Expression::Lower(inner_uf) => {
8279 if matches!(target, DialectType::BigQuery) {
8280 // BQ->BQ: extract TO_HEX
8281 if let Expression::Function(f) = inner_uf.this {
8282 Ok(Expression::Function(Box::new(Function::new(
8283 "TO_HEX".to_string(),
8284 f.args,
8285 ))))
8286 } else {
8287 Ok(Expression::Lower(inner_uf))
8288 }
8289 } else {
8290 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8291 Ok(Expression::Lower(inner_uf))
8292 }
8293 }
8294 other => {
8295 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8296 this: other,
8297 original_name: None,
8298 })))
8299 }
8300 }
8301 } else {
8302 Ok(e)
8303 }
8304 }
8305
8306 Action::BigQueryToHexUpper => {
8307 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8308 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8309 if let Expression::Upper(uf) = e {
8310 if let Expression::Lower(inner_uf) = uf.this {
8311 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8312 if matches!(target, DialectType::BigQuery) {
8313 // Restore TO_HEX name in inner function
8314 if let Expression::Function(f) = inner_uf.this {
8315 let restored = Expression::Function(Box::new(Function::new(
8316 "TO_HEX".to_string(),
8317 f.args,
8318 )));
8319 Ok(Expression::Upper(Box::new(
8320 crate::expressions::UnaryFunc::new(restored),
8321 )))
8322 } else {
8323 Ok(Expression::Upper(inner_uf))
8324 }
8325 } else {
8326 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8327 Ok(inner_uf.this)
8328 }
8329 } else {
8330 Ok(Expression::Upper(uf))
8331 }
8332 } else {
8333 Ok(e)
8334 }
8335 }
8336
8337 Action::BigQueryAnyValueHaving => {
8338 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8339 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8340 if let Expression::AnyValue(agg) = e {
8341 if let Some((having_expr, is_max)) = agg.having_max {
8342 let func_name = if is_max {
8343 "ARG_MAX_NULL"
8344 } else {
8345 "ARG_MIN_NULL"
8346 };
8347 Ok(Expression::Function(Box::new(Function::new(
8348 func_name.to_string(),
8349 vec![agg.this, *having_expr],
8350 ))))
8351 } else {
8352 Ok(Expression::AnyValue(agg))
8353 }
8354 } else {
8355 Ok(e)
8356 }
8357 }
8358
8359 Action::BigQueryApproxQuantiles => {
8360 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8361 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8362 if let Expression::AggregateFunction(agg) = e {
8363 if agg.args.len() >= 2 {
8364 let x_expr = agg.args[0].clone();
8365 let n_expr = &agg.args[1];
8366
8367 // Extract the numeric value from n_expr
8368 let n = match n_expr {
8369 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8370 s.parse::<usize>().unwrap_or(2)
8371 }
8372 _ => 2,
8373 };
8374
8375 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8376 let mut quantiles = Vec::new();
8377 for i in 0..=n {
8378 let q = i as f64 / n as f64;
8379 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8380 if q == 0.0 {
8381 quantiles.push(Expression::number(0));
8382 } else if q == 1.0 {
8383 quantiles.push(Expression::number(1));
8384 } else {
8385 quantiles.push(Expression::Literal(
8386 crate::expressions::Literal::Number(format!("{}", q)),
8387 ));
8388 }
8389 }
8390
8391 let array_expr =
8392 Expression::Array(Box::new(crate::expressions::Array {
8393 expressions: quantiles,
8394 }));
8395
8396 // Preserve DISTINCT modifier
8397 let mut new_func = Function::new(
8398 "APPROX_QUANTILE".to_string(),
8399 vec![x_expr, array_expr],
8400 );
8401 new_func.distinct = agg.distinct;
8402 Ok(Expression::Function(Box::new(new_func)))
8403 } else {
8404 Ok(Expression::AggregateFunction(agg))
8405 }
8406 } else {
8407 Ok(e)
8408 }
8409 }
8410
8411 Action::GenericFunctionNormalize => {
8412 // Helper closure to convert ARBITRARY to target-specific function
8413 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8414 let name = match target {
8415 DialectType::ClickHouse => "any",
8416 DialectType::TSQL | DialectType::SQLite => "MAX",
8417 DialectType::Hive => "FIRST",
8418 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8419 "ARBITRARY"
8420 }
8421 _ => "ANY_VALUE",
8422 };
8423 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8424 }
8425
8426 if let Expression::Function(f) = e {
8427 let name = f.name.to_uppercase();
8428 match name.as_str() {
8429 "ARBITRARY" if f.args.len() == 1 => {
8430 let arg = f.args.into_iter().next().unwrap();
8431 Ok(convert_arbitrary(arg, target))
8432 }
8433 "TO_NUMBER" if f.args.len() == 1 => {
8434 let arg = f.args.into_iter().next().unwrap();
8435 match target {
8436 DialectType::Oracle | DialectType::Snowflake => {
8437 Ok(Expression::Function(Box::new(Function::new(
8438 "TO_NUMBER".to_string(),
8439 vec![arg],
8440 ))))
8441 }
8442 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8443 this: arg,
8444 to: crate::expressions::DataType::Double {
8445 precision: None,
8446 scale: None,
8447 },
8448 double_colon_syntax: false,
8449 trailing_comments: Vec::new(),
8450 format: None,
8451 default: None,
8452 }))),
8453 }
8454 }
8455 "AGGREGATE" if f.args.len() >= 3 => match target {
8456 DialectType::DuckDB
8457 | DialectType::Hive
8458 | DialectType::Presto
8459 | DialectType::Trino => Ok(Expression::Function(Box::new(
8460 Function::new("REDUCE".to_string(), f.args),
8461 ))),
8462 _ => Ok(Expression::Function(f)),
8463 },
8464 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8465 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8466 if matches!(target, DialectType::DuckDB) {
8467 Ok(Expression::Function(f))
8468 } else {
8469 let mut args = f.args;
8470 let this = args.remove(0);
8471 let pattern = args.remove(0);
8472 let flags = if args.is_empty() {
8473 None
8474 } else {
8475 Some(args.remove(0))
8476 };
8477 Ok(Expression::RegexpLike(Box::new(
8478 crate::expressions::RegexpFunc {
8479 this,
8480 pattern,
8481 flags,
8482 },
8483 )))
8484 }
8485 }
8486 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8487 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8488 if matches!(target, DialectType::DuckDB) {
8489 Ok(Expression::Function(f))
8490 } else {
8491 let mut args = f.args;
8492 let this = args.remove(0);
8493 let pattern = args.remove(0);
8494 let flags = if args.is_empty() {
8495 None
8496 } else {
8497 Some(args.remove(0))
8498 };
8499 Ok(Expression::RegexpLike(Box::new(
8500 crate::expressions::RegexpFunc {
8501 this,
8502 pattern,
8503 flags,
8504 },
8505 )))
8506 }
8507 }
8508 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8509 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8510 let mut args = f.args;
8511 let this = args.remove(0);
8512 let field_expr = args.remove(0);
8513 // Extract string literal to get field name
8514 let field_name = match &field_expr {
8515 Expression::Literal(crate::expressions::Literal::String(s)) => {
8516 s.clone()
8517 }
8518 Expression::Identifier(id) => id.name.clone(),
8519 _ => {
8520 return Ok(Expression::Function(Box::new(Function::new(
8521 "STRUCT_EXTRACT".to_string(),
8522 vec![this, field_expr],
8523 ))))
8524 }
8525 };
8526 Ok(Expression::StructExtract(Box::new(
8527 crate::expressions::StructExtractFunc {
8528 this,
8529 field: crate::expressions::Identifier::new(field_name),
8530 },
8531 )))
8532 }
8533 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8534 "LIST_FILTER" if f.args.len() == 2 => {
8535 let name = match target {
8536 DialectType::DuckDB => "LIST_FILTER",
8537 _ => "FILTER",
8538 };
8539 Ok(Expression::Function(Box::new(Function::new(
8540 name.to_string(),
8541 f.args,
8542 ))))
8543 }
8544 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8545 "LIST_TRANSFORM" if f.args.len() == 2 => {
8546 let name = match target {
8547 DialectType::DuckDB => "LIST_TRANSFORM",
8548 _ => "TRANSFORM",
8549 };
8550 Ok(Expression::Function(Box::new(Function::new(
8551 name.to_string(),
8552 f.args,
8553 ))))
8554 }
8555 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8556 "LIST_SORT" if f.args.len() >= 1 => {
8557 let name = match target {
8558 DialectType::DuckDB
8559 | DialectType::Presto
8560 | DialectType::Trino => "ARRAY_SORT",
8561 _ => "SORT_ARRAY",
8562 };
8563 Ok(Expression::Function(Box::new(Function::new(
8564 name.to_string(),
8565 f.args,
8566 ))))
8567 }
8568 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8569 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8570 match target {
8571 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8572 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8573 ))),
8574 DialectType::Spark
8575 | DialectType::Databricks
8576 | DialectType::Hive => {
8577 let mut args = f.args;
8578 args.push(Expression::Identifier(
8579 crate::expressions::Identifier::new("FALSE"),
8580 ));
8581 Ok(Expression::Function(Box::new(Function::new(
8582 "SORT_ARRAY".to_string(),
8583 args,
8584 ))))
8585 }
8586 DialectType::Presto
8587 | DialectType::Trino
8588 | DialectType::Athena => {
8589 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8590 let arr = f.args.into_iter().next().unwrap();
8591 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8592 parameters: vec![
8593 crate::expressions::Identifier::new("a"),
8594 crate::expressions::Identifier::new("b"),
8595 ],
8596 body: Expression::Case(Box::new(Case {
8597 operand: None,
8598 whens: vec![
8599 (
8600 Expression::Lt(Box::new(BinaryOp::new(
8601 Expression::Identifier(crate::expressions::Identifier::new("a")),
8602 Expression::Identifier(crate::expressions::Identifier::new("b")),
8603 ))),
8604 Expression::number(1),
8605 ),
8606 (
8607 Expression::Gt(Box::new(BinaryOp::new(
8608 Expression::Identifier(crate::expressions::Identifier::new("a")),
8609 Expression::Identifier(crate::expressions::Identifier::new("b")),
8610 ))),
8611 Expression::Literal(Literal::Number("-1".to_string())),
8612 ),
8613 ],
8614 else_: Some(Expression::number(0)),
8615 comments: Vec::new(),
8616 })),
8617 colon: false,
8618 parameter_types: Vec::new(),
8619 }));
8620 Ok(Expression::Function(Box::new(Function::new(
8621 "ARRAY_SORT".to_string(),
8622 vec![arr, lambda],
8623 ))))
8624 }
8625 _ => Ok(Expression::Function(Box::new(Function::new(
8626 "LIST_REVERSE_SORT".to_string(),
8627 f.args,
8628 )))),
8629 }
8630 }
8631 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8632 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8633 let mut args = f.args;
8634 args.push(Expression::string(","));
8635 let name = match target {
8636 DialectType::DuckDB => "STR_SPLIT",
8637 DialectType::Presto | DialectType::Trino => "SPLIT",
8638 DialectType::Spark
8639 | DialectType::Databricks
8640 | DialectType::Hive => "SPLIT",
8641 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8642 DialectType::Redshift => "SPLIT_TO_ARRAY",
8643 _ => "SPLIT",
8644 };
8645 Ok(Expression::Function(Box::new(Function::new(
8646 name.to_string(),
8647 args,
8648 ))))
8649 }
8650 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8651 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8652 let name = match target {
8653 DialectType::DuckDB => "STR_SPLIT",
8654 DialectType::Presto | DialectType::Trino => "SPLIT",
8655 DialectType::Spark
8656 | DialectType::Databricks
8657 | DialectType::Hive => "SPLIT",
8658 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8659 DialectType::Redshift => "SPLIT_TO_ARRAY",
8660 _ => "SPLIT",
8661 };
8662 Ok(Expression::Function(Box::new(Function::new(
8663 name.to_string(),
8664 f.args,
8665 ))))
8666 }
8667 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8668 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8669 let name = match target {
8670 DialectType::DuckDB => "STR_SPLIT",
8671 DialectType::Presto | DialectType::Trino => "SPLIT",
8672 DialectType::Spark
8673 | DialectType::Databricks
8674 | DialectType::Hive => "SPLIT",
8675 DialectType::Doris | DialectType::StarRocks => {
8676 "SPLIT_BY_STRING"
8677 }
8678 DialectType::PostgreSQL | DialectType::Redshift => {
8679 "STRING_TO_ARRAY"
8680 }
8681 _ => "SPLIT",
8682 };
8683 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8684 if matches!(
8685 target,
8686 DialectType::Spark
8687 | DialectType::Databricks
8688 | DialectType::Hive
8689 ) {
8690 let mut args = f.args;
8691 let x = args.remove(0);
8692 let sep = args.remove(0);
8693 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8694 let escaped_sep =
8695 Expression::Function(Box::new(Function::new(
8696 "CONCAT".to_string(),
8697 vec![
8698 Expression::string("\\Q"),
8699 sep,
8700 Expression::string("\\E"),
8701 ],
8702 )));
8703 Ok(Expression::Function(Box::new(Function::new(
8704 name.to_string(),
8705 vec![x, escaped_sep],
8706 ))))
8707 } else {
8708 Ok(Expression::Function(Box::new(Function::new(
8709 name.to_string(),
8710 f.args,
8711 ))))
8712 }
8713 }
8714 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8715 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8716 let name = match target {
8717 DialectType::DuckDB => "STR_SPLIT_REGEX",
8718 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8719 DialectType::Spark
8720 | DialectType::Databricks
8721 | DialectType::Hive => "SPLIT",
8722 _ => "REGEXP_SPLIT",
8723 };
8724 Ok(Expression::Function(Box::new(Function::new(
8725 name.to_string(),
8726 f.args,
8727 ))))
8728 }
8729 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8730 "SPLIT"
8731 if f.args.len() == 2
8732 && matches!(
8733 source,
8734 DialectType::Presto
8735 | DialectType::Trino
8736 | DialectType::Athena
8737 | DialectType::StarRocks
8738 | DialectType::Doris
8739 )
8740 && matches!(
8741 target,
8742 DialectType::Spark
8743 | DialectType::Databricks
8744 | DialectType::Hive
8745 ) =>
8746 {
8747 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8748 let mut args = f.args;
8749 let x = args.remove(0);
8750 let sep = args.remove(0);
8751 let escaped_sep = Expression::Function(Box::new(Function::new(
8752 "CONCAT".to_string(),
8753 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8754 )));
8755 Ok(Expression::Function(Box::new(Function::new(
8756 "SPLIT".to_string(),
8757 vec![x, escaped_sep],
8758 ))))
8759 }
8760 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8761 // For ClickHouse target, preserve original name to maintain camelCase
8762 "SUBSTRINGINDEX" => {
8763 let name = if matches!(target, DialectType::ClickHouse) {
8764 f.name.clone()
8765 } else {
8766 "SUBSTRING_INDEX".to_string()
8767 };
8768 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8769 }
8770 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8771 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8772 // Get the array argument (first arg, drop dimension args)
8773 let mut args = f.args;
8774 let arr = if args.is_empty() {
8775 return Ok(Expression::Function(Box::new(Function::new(
8776 name.to_string(),
8777 args,
8778 ))));
8779 } else {
8780 args.remove(0)
8781 };
8782 let name =
8783 match target {
8784 DialectType::Spark
8785 | DialectType::Databricks
8786 | DialectType::Hive => "SIZE",
8787 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8788 DialectType::BigQuery => "ARRAY_LENGTH",
8789 DialectType::DuckDB => {
8790 // DuckDB: use ARRAY_LENGTH with all args
8791 let mut all_args = vec![arr];
8792 all_args.extend(args);
8793 return Ok(Expression::Function(Box::new(
8794 Function::new("ARRAY_LENGTH".to_string(), all_args),
8795 )));
8796 }
8797 DialectType::PostgreSQL | DialectType::Redshift => {
8798 // Keep ARRAY_LENGTH with dimension arg
8799 let mut all_args = vec![arr];
8800 all_args.extend(args);
8801 return Ok(Expression::Function(Box::new(
8802 Function::new("ARRAY_LENGTH".to_string(), all_args),
8803 )));
8804 }
8805 DialectType::ClickHouse => "LENGTH",
8806 _ => "ARRAY_LENGTH",
8807 };
8808 Ok(Expression::Function(Box::new(Function::new(
8809 name.to_string(),
8810 vec![arr],
8811 ))))
8812 }
8813 // UNICODE(x) -> target-specific codepoint function
8814 "UNICODE" if f.args.len() == 1 => {
8815 match target {
8816 DialectType::SQLite | DialectType::DuckDB => {
8817 Ok(Expression::Function(Box::new(Function::new(
8818 "UNICODE".to_string(),
8819 f.args,
8820 ))))
8821 }
8822 DialectType::Oracle => {
8823 // ASCII(UNISTR(x))
8824 let inner = Expression::Function(Box::new(Function::new(
8825 "UNISTR".to_string(),
8826 f.args,
8827 )));
8828 Ok(Expression::Function(Box::new(Function::new(
8829 "ASCII".to_string(),
8830 vec![inner],
8831 ))))
8832 }
8833 DialectType::MySQL => {
8834 // ORD(CONVERT(x USING utf32))
8835 let arg = f.args.into_iter().next().unwrap();
8836 let convert_expr = Expression::ConvertToCharset(Box::new(
8837 crate::expressions::ConvertToCharset {
8838 this: Box::new(arg),
8839 dest: Some(Box::new(Expression::Identifier(
8840 crate::expressions::Identifier::new("utf32"),
8841 ))),
8842 source: None,
8843 },
8844 ));
8845 Ok(Expression::Function(Box::new(Function::new(
8846 "ORD".to_string(),
8847 vec![convert_expr],
8848 ))))
8849 }
8850 _ => Ok(Expression::Function(Box::new(Function::new(
8851 "ASCII".to_string(),
8852 f.args,
8853 )))),
8854 }
8855 }
8856 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
8857 "XOR" if f.args.len() >= 2 => {
8858 match target {
8859 DialectType::ClickHouse => {
8860 // ClickHouse: keep as xor() function with lowercase name
8861 Ok(Expression::Function(Box::new(Function::new(
8862 "xor".to_string(),
8863 f.args,
8864 ))))
8865 }
8866 DialectType::Presto | DialectType::Trino => {
8867 if f.args.len() == 2 {
8868 Ok(Expression::Function(Box::new(Function::new(
8869 "BITWISE_XOR".to_string(),
8870 f.args,
8871 ))))
8872 } else {
8873 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
8874 let mut args = f.args;
8875 let first = args.remove(0);
8876 let second = args.remove(0);
8877 let mut result =
8878 Expression::Function(Box::new(Function::new(
8879 "BITWISE_XOR".to_string(),
8880 vec![first, second],
8881 )));
8882 for arg in args {
8883 result =
8884 Expression::Function(Box::new(Function::new(
8885 "BITWISE_XOR".to_string(),
8886 vec![result, arg],
8887 )));
8888 }
8889 Ok(result)
8890 }
8891 }
8892 DialectType::MySQL
8893 | DialectType::SingleStore
8894 | DialectType::Doris
8895 | DialectType::StarRocks => {
8896 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
8897 let args = f.args;
8898 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
8899 this: None,
8900 expression: None,
8901 expressions: args,
8902 })))
8903 }
8904 DialectType::PostgreSQL | DialectType::Redshift => {
8905 // PostgreSQL: a # b (hash operator for XOR)
8906 let mut args = f.args;
8907 let first = args.remove(0);
8908 let second = args.remove(0);
8909 let mut result = Expression::BitwiseXor(Box::new(
8910 BinaryOp::new(first, second),
8911 ));
8912 for arg in args {
8913 result = Expression::BitwiseXor(Box::new(
8914 BinaryOp::new(result, arg),
8915 ));
8916 }
8917 Ok(result)
8918 }
8919 DialectType::DuckDB => {
8920 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
8921 Ok(Expression::Function(Box::new(Function::new(
8922 "XOR".to_string(),
8923 f.args,
8924 ))))
8925 }
8926 DialectType::BigQuery => {
8927 // BigQuery: a ^ b (caret operator for XOR)
8928 let mut args = f.args;
8929 let first = args.remove(0);
8930 let second = args.remove(0);
8931 let mut result = Expression::BitwiseXor(Box::new(
8932 BinaryOp::new(first, second),
8933 ));
8934 for arg in args {
8935 result = Expression::BitwiseXor(Box::new(
8936 BinaryOp::new(result, arg),
8937 ));
8938 }
8939 Ok(result)
8940 }
8941 _ => Ok(Expression::Function(Box::new(Function::new(
8942 "XOR".to_string(),
8943 f.args,
8944 )))),
8945 }
8946 }
8947 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8948 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
8949 match target {
8950 DialectType::Spark
8951 | DialectType::Databricks
8952 | DialectType::Hive => {
8953 let mut args = f.args;
8954 args.push(Expression::Identifier(
8955 crate::expressions::Identifier::new("FALSE"),
8956 ));
8957 Ok(Expression::Function(Box::new(Function::new(
8958 "SORT_ARRAY".to_string(),
8959 args,
8960 ))))
8961 }
8962 DialectType::Presto
8963 | DialectType::Trino
8964 | DialectType::Athena => {
8965 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8966 let arr = f.args.into_iter().next().unwrap();
8967 let lambda = Expression::Lambda(Box::new(
8968 crate::expressions::LambdaExpr {
8969 parameters: vec![
8970 Identifier::new("a"),
8971 Identifier::new("b"),
8972 ],
8973 colon: false,
8974 parameter_types: Vec::new(),
8975 body: Expression::Case(Box::new(Case {
8976 operand: None,
8977 whens: vec![
8978 (
8979 Expression::Lt(Box::new(
8980 BinaryOp::new(
8981 Expression::Identifier(
8982 Identifier::new("a"),
8983 ),
8984 Expression::Identifier(
8985 Identifier::new("b"),
8986 ),
8987 ),
8988 )),
8989 Expression::number(1),
8990 ),
8991 (
8992 Expression::Gt(Box::new(
8993 BinaryOp::new(
8994 Expression::Identifier(
8995 Identifier::new("a"),
8996 ),
8997 Expression::Identifier(
8998 Identifier::new("b"),
8999 ),
9000 ),
9001 )),
9002 Expression::Neg(Box::new(
9003 crate::expressions::UnaryOp {
9004 this: Expression::number(1),
9005 },
9006 )),
9007 ),
9008 ],
9009 else_: Some(Expression::number(0)),
9010 comments: Vec::new(),
9011 })),
9012 },
9013 ));
9014 Ok(Expression::Function(Box::new(Function::new(
9015 "ARRAY_SORT".to_string(),
9016 vec![arr, lambda],
9017 ))))
9018 }
9019 _ => Ok(Expression::Function(Box::new(Function::new(
9020 "ARRAY_REVERSE_SORT".to_string(),
9021 f.args,
9022 )))),
9023 }
9024 }
9025 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9026 "ENCODE" if f.args.len() == 1 => match target {
9027 DialectType::Spark
9028 | DialectType::Databricks
9029 | DialectType::Hive => {
9030 let mut args = f.args;
9031 args.push(Expression::string("utf-8"));
9032 Ok(Expression::Function(Box::new(Function::new(
9033 "ENCODE".to_string(),
9034 args,
9035 ))))
9036 }
9037 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9038 Ok(Expression::Function(Box::new(Function::new(
9039 "TO_UTF8".to_string(),
9040 f.args,
9041 ))))
9042 }
9043 _ => Ok(Expression::Function(Box::new(Function::new(
9044 "ENCODE".to_string(),
9045 f.args,
9046 )))),
9047 },
9048 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9049 "DECODE" if f.args.len() == 1 => match target {
9050 DialectType::Spark
9051 | DialectType::Databricks
9052 | DialectType::Hive => {
9053 let mut args = f.args;
9054 args.push(Expression::string("utf-8"));
9055 Ok(Expression::Function(Box::new(Function::new(
9056 "DECODE".to_string(),
9057 args,
9058 ))))
9059 }
9060 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9061 Ok(Expression::Function(Box::new(Function::new(
9062 "FROM_UTF8".to_string(),
9063 f.args,
9064 ))))
9065 }
9066 _ => Ok(Expression::Function(Box::new(Function::new(
9067 "DECODE".to_string(),
9068 f.args,
9069 )))),
9070 },
9071 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9072 "QUANTILE" if f.args.len() == 2 => {
9073 let name = match target {
9074 DialectType::Spark
9075 | DialectType::Databricks
9076 | DialectType::Hive => "PERCENTILE",
9077 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9078 DialectType::BigQuery => "PERCENTILE_CONT",
9079 _ => "QUANTILE",
9080 };
9081 Ok(Expression::Function(Box::new(Function::new(
9082 name.to_string(),
9083 f.args,
9084 ))))
9085 }
9086 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9087 "QUANTILE_CONT" if f.args.len() == 2 => {
9088 let mut args = f.args;
9089 let column = args.remove(0);
9090 let quantile = args.remove(0);
9091 match target {
9092 DialectType::DuckDB => {
9093 Ok(Expression::Function(Box::new(Function::new(
9094 "QUANTILE_CONT".to_string(),
9095 vec![column, quantile],
9096 ))))
9097 }
9098 DialectType::PostgreSQL
9099 | DialectType::Redshift
9100 | DialectType::Snowflake => {
9101 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9102 let inner = Expression::PercentileCont(Box::new(
9103 crate::expressions::PercentileFunc {
9104 this: column.clone(),
9105 percentile: quantile,
9106 order_by: None,
9107 filter: None,
9108 },
9109 ));
9110 Ok(Expression::WithinGroup(Box::new(
9111 crate::expressions::WithinGroup {
9112 this: inner,
9113 order_by: vec![crate::expressions::Ordered {
9114 this: column,
9115 desc: false,
9116 nulls_first: None,
9117 explicit_asc: false,
9118 with_fill: None,
9119 }],
9120 },
9121 )))
9122 }
9123 _ => Ok(Expression::Function(Box::new(Function::new(
9124 "QUANTILE_CONT".to_string(),
9125 vec![column, quantile],
9126 )))),
9127 }
9128 }
9129 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9130 "QUANTILE_DISC" if f.args.len() == 2 => {
9131 let mut args = f.args;
9132 let column = args.remove(0);
9133 let quantile = args.remove(0);
9134 match target {
9135 DialectType::DuckDB => {
9136 Ok(Expression::Function(Box::new(Function::new(
9137 "QUANTILE_DISC".to_string(),
9138 vec![column, quantile],
9139 ))))
9140 }
9141 DialectType::PostgreSQL
9142 | DialectType::Redshift
9143 | DialectType::Snowflake => {
9144 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9145 let inner = Expression::PercentileDisc(Box::new(
9146 crate::expressions::PercentileFunc {
9147 this: column.clone(),
9148 percentile: quantile,
9149 order_by: None,
9150 filter: None,
9151 },
9152 ));
9153 Ok(Expression::WithinGroup(Box::new(
9154 crate::expressions::WithinGroup {
9155 this: inner,
9156 order_by: vec![crate::expressions::Ordered {
9157 this: column,
9158 desc: false,
9159 nulls_first: None,
9160 explicit_asc: false,
9161 with_fill: None,
9162 }],
9163 },
9164 )))
9165 }
9166 _ => Ok(Expression::Function(Box::new(Function::new(
9167 "QUANTILE_DISC".to_string(),
9168 vec![column, quantile],
9169 )))),
9170 }
9171 }
9172 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9173 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9174 let name = match target {
9175 DialectType::Presto
9176 | DialectType::Trino
9177 | DialectType::Athena => "APPROX_PERCENTILE",
9178 DialectType::Spark
9179 | DialectType::Databricks
9180 | DialectType::Hive => "PERCENTILE_APPROX",
9181 DialectType::DuckDB => "APPROX_QUANTILE",
9182 DialectType::PostgreSQL | DialectType::Redshift => {
9183 "PERCENTILE_CONT"
9184 }
9185 _ => &f.name,
9186 };
9187 Ok(Expression::Function(Box::new(Function::new(
9188 name.to_string(),
9189 f.args,
9190 ))))
9191 }
9192 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9193 "EPOCH" if f.args.len() == 1 => {
9194 let name = match target {
9195 DialectType::Spark
9196 | DialectType::Databricks
9197 | DialectType::Hive => "UNIX_TIMESTAMP",
9198 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9199 _ => "EPOCH",
9200 };
9201 Ok(Expression::Function(Box::new(Function::new(
9202 name.to_string(),
9203 f.args,
9204 ))))
9205 }
9206 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9207 "EPOCH_MS" if f.args.len() == 1 => {
9208 match target {
9209 DialectType::Spark | DialectType::Databricks => {
9210 Ok(Expression::Function(Box::new(Function::new(
9211 "TIMESTAMP_MILLIS".to_string(),
9212 f.args,
9213 ))))
9214 }
9215 DialectType::Hive => {
9216 // Hive: FROM_UNIXTIME(x / 1000)
9217 let arg = f.args.into_iter().next().unwrap();
9218 let div_expr = Expression::Div(Box::new(
9219 crate::expressions::BinaryOp::new(
9220 arg,
9221 Expression::number(1000),
9222 ),
9223 ));
9224 Ok(Expression::Function(Box::new(Function::new(
9225 "FROM_UNIXTIME".to_string(),
9226 vec![div_expr],
9227 ))))
9228 }
9229 DialectType::Presto | DialectType::Trino => {
9230 Ok(Expression::Function(Box::new(Function::new(
9231 "FROM_UNIXTIME".to_string(),
9232 vec![Expression::Div(Box::new(
9233 crate::expressions::BinaryOp::new(
9234 f.args.into_iter().next().unwrap(),
9235 Expression::number(1000),
9236 ),
9237 ))],
9238 ))))
9239 }
9240 _ => Ok(Expression::Function(Box::new(Function::new(
9241 "EPOCH_MS".to_string(),
9242 f.args,
9243 )))),
9244 }
9245 }
9246 // HASHBYTES('algorithm', x) -> target-specific hash function
9247 "HASHBYTES" if f.args.len() == 2 => {
9248 // Keep HASHBYTES as-is for TSQL target
9249 if matches!(target, DialectType::TSQL) {
9250 return Ok(Expression::Function(f));
9251 }
9252 let algo_expr = &f.args[0];
9253 let algo = match algo_expr {
9254 Expression::Literal(crate::expressions::Literal::String(s)) => {
9255 s.to_uppercase()
9256 }
9257 _ => return Ok(Expression::Function(f)),
9258 };
9259 let data_arg = f.args.into_iter().nth(1).unwrap();
9260 match algo.as_str() {
9261 "SHA1" => {
9262 let name = match target {
9263 DialectType::Spark | DialectType::Databricks => "SHA",
9264 DialectType::Hive => "SHA1",
9265 _ => "SHA1",
9266 };
9267 Ok(Expression::Function(Box::new(Function::new(
9268 name.to_string(),
9269 vec![data_arg],
9270 ))))
9271 }
9272 "SHA2_256" => {
9273 Ok(Expression::Function(Box::new(Function::new(
9274 "SHA2".to_string(),
9275 vec![data_arg, Expression::number(256)],
9276 ))))
9277 }
9278 "SHA2_512" => {
9279 Ok(Expression::Function(Box::new(Function::new(
9280 "SHA2".to_string(),
9281 vec![data_arg, Expression::number(512)],
9282 ))))
9283 }
9284 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9285 "MD5".to_string(),
9286 vec![data_arg],
9287 )))),
9288 _ => Ok(Expression::Function(Box::new(Function::new(
9289 "HASHBYTES".to_string(),
9290 vec![Expression::string(&algo), data_arg],
9291 )))),
9292 }
9293 }
9294 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9295 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9296 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9297 let mut args = f.args;
9298 let json_expr = args.remove(0);
9299 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9300 let mut json_path = "$".to_string();
9301 for a in &args {
9302 match a {
9303 Expression::Literal(
9304 crate::expressions::Literal::String(s),
9305 ) => {
9306 // Numeric string keys become array indices: [0]
9307 if s.chars().all(|c| c.is_ascii_digit()) {
9308 json_path.push('[');
9309 json_path.push_str(s);
9310 json_path.push(']');
9311 } else {
9312 json_path.push('.');
9313 json_path.push_str(s);
9314 }
9315 }
9316 _ => {
9317 json_path.push_str(".?");
9318 }
9319 }
9320 }
9321 match target {
9322 DialectType::Spark
9323 | DialectType::Databricks
9324 | DialectType::Hive => {
9325 Ok(Expression::Function(Box::new(Function::new(
9326 "GET_JSON_OBJECT".to_string(),
9327 vec![json_expr, Expression::string(&json_path)],
9328 ))))
9329 }
9330 DialectType::Presto | DialectType::Trino => {
9331 let func_name = if is_text {
9332 "JSON_EXTRACT_SCALAR"
9333 } else {
9334 "JSON_EXTRACT"
9335 };
9336 Ok(Expression::Function(Box::new(Function::new(
9337 func_name.to_string(),
9338 vec![json_expr, Expression::string(&json_path)],
9339 ))))
9340 }
9341 DialectType::BigQuery | DialectType::MySQL => {
9342 let func_name = if is_text {
9343 "JSON_EXTRACT_SCALAR"
9344 } else {
9345 "JSON_EXTRACT"
9346 };
9347 Ok(Expression::Function(Box::new(Function::new(
9348 func_name.to_string(),
9349 vec![json_expr, Expression::string(&json_path)],
9350 ))))
9351 }
9352 DialectType::PostgreSQL | DialectType::Materialize => {
9353 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9354 let func_name = if is_text {
9355 "JSON_EXTRACT_PATH_TEXT"
9356 } else {
9357 "JSON_EXTRACT_PATH"
9358 };
9359 let mut new_args = vec![json_expr];
9360 new_args.extend(args);
9361 Ok(Expression::Function(Box::new(Function::new(
9362 func_name.to_string(),
9363 new_args,
9364 ))))
9365 }
9366 DialectType::DuckDB | DialectType::SQLite => {
9367 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9368 if is_text {
9369 Ok(Expression::JsonExtractScalar(Box::new(
9370 crate::expressions::JsonExtractFunc {
9371 this: json_expr,
9372 path: Expression::string(&json_path),
9373 returning: None,
9374 arrow_syntax: true,
9375 hash_arrow_syntax: false,
9376 wrapper_option: None,
9377 quotes_option: None,
9378 on_scalar_string: false,
9379 on_error: None,
9380 },
9381 )))
9382 } else {
9383 Ok(Expression::JsonExtract(Box::new(
9384 crate::expressions::JsonExtractFunc {
9385 this: json_expr,
9386 path: Expression::string(&json_path),
9387 returning: None,
9388 arrow_syntax: true,
9389 hash_arrow_syntax: false,
9390 wrapper_option: None,
9391 quotes_option: None,
9392 on_scalar_string: false,
9393 on_error: None,
9394 },
9395 )))
9396 }
9397 }
9398 DialectType::Redshift => {
9399 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9400 let mut new_args = vec![json_expr];
9401 new_args.extend(args);
9402 Ok(Expression::Function(Box::new(Function::new(
9403 "JSON_EXTRACT_PATH_TEXT".to_string(),
9404 new_args,
9405 ))))
9406 }
9407 DialectType::TSQL => {
9408 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9409 let jq = Expression::Function(Box::new(Function::new(
9410 "JSON_QUERY".to_string(),
9411 vec![json_expr.clone(), Expression::string(&json_path)],
9412 )));
9413 let jv = Expression::Function(Box::new(Function::new(
9414 "JSON_VALUE".to_string(),
9415 vec![json_expr, Expression::string(&json_path)],
9416 )));
9417 Ok(Expression::Function(Box::new(Function::new(
9418 "ISNULL".to_string(),
9419 vec![jq, jv],
9420 ))))
9421 }
9422 DialectType::ClickHouse => {
9423 let func_name = if is_text {
9424 "JSONExtractString"
9425 } else {
9426 "JSONExtractRaw"
9427 };
9428 let mut new_args = vec![json_expr];
9429 new_args.extend(args);
9430 Ok(Expression::Function(Box::new(Function::new(
9431 func_name.to_string(),
9432 new_args,
9433 ))))
9434 }
9435 _ => {
9436 let func_name = if is_text {
9437 "JSON_EXTRACT_SCALAR"
9438 } else {
9439 "JSON_EXTRACT"
9440 };
9441 Ok(Expression::Function(Box::new(Function::new(
9442 func_name.to_string(),
9443 vec![json_expr, Expression::string(&json_path)],
9444 ))))
9445 }
9446 }
9447 }
9448 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9449 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9450 let name = match target {
9451 DialectType::Spark
9452 | DialectType::Databricks
9453 | DialectType::Hive
9454 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9455 _ => "APPROX_DISTINCT",
9456 };
9457 let mut args = f.args;
9458 // Hive doesn't support the accuracy parameter
9459 if name == "APPROX_COUNT_DISTINCT"
9460 && matches!(target, DialectType::Hive)
9461 {
9462 args.truncate(1);
9463 }
9464 Ok(Expression::Function(Box::new(Function::new(
9465 name.to_string(),
9466 args,
9467 ))))
9468 }
9469 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9470 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9471 // Determine source default group index
9472 let source_default = match source {
9473 DialectType::Presto
9474 | DialectType::Trino
9475 | DialectType::DuckDB => 0,
9476 _ => 1, // Hive/Spark/Databricks default = 1
9477 };
9478 // Determine target default group index
9479 let target_default = match target {
9480 DialectType::Presto
9481 | DialectType::Trino
9482 | DialectType::DuckDB
9483 | DialectType::BigQuery => 0,
9484 DialectType::Snowflake => {
9485 // Snowflake uses REGEXP_SUBSTR
9486 return Ok(Expression::Function(Box::new(Function::new(
9487 "REGEXP_SUBSTR".to_string(),
9488 f.args,
9489 ))));
9490 }
9491 _ => 1, // Hive/Spark/Databricks default = 1
9492 };
9493 if source_default != target_default {
9494 let mut args = f.args;
9495 args.push(Expression::number(source_default));
9496 Ok(Expression::Function(Box::new(Function::new(
9497 "REGEXP_EXTRACT".to_string(),
9498 args,
9499 ))))
9500 } else {
9501 Ok(Expression::Function(Box::new(Function::new(
9502 "REGEXP_EXTRACT".to_string(),
9503 f.args,
9504 ))))
9505 }
9506 }
9507 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9508 "RLIKE" if f.args.len() == 2 => {
9509 let mut args = f.args;
9510 let str_expr = args.remove(0);
9511 let pattern = args.remove(0);
9512 match target {
9513 DialectType::DuckDB => {
9514 // REGEXP_MATCHES(str, pattern)
9515 Ok(Expression::Function(Box::new(Function::new(
9516 "REGEXP_MATCHES".to_string(),
9517 vec![str_expr, pattern],
9518 ))))
9519 }
9520 _ => {
9521 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9522 Ok(Expression::RegexpLike(Box::new(
9523 crate::expressions::RegexpFunc {
9524 this: str_expr,
9525 pattern,
9526 flags: None,
9527 },
9528 )))
9529 }
9530 }
9531 }
9532 // EOMONTH(date[, month_offset]) -> target-specific
9533 "EOMONTH" if f.args.len() >= 1 => {
9534 let mut args = f.args;
9535 let date_arg = args.remove(0);
9536 let month_offset = if !args.is_empty() {
9537 Some(args.remove(0))
9538 } else {
9539 None
9540 };
9541
9542 // Helper: wrap date in CAST to DATE
9543 let cast_to_date = |e: Expression| -> Expression {
9544 Expression::Cast(Box::new(Cast {
9545 this: e,
9546 to: DataType::Date,
9547 trailing_comments: vec![],
9548 double_colon_syntax: false,
9549 format: None,
9550 default: None,
9551 }))
9552 };
9553
9554 match target {
9555 DialectType::TSQL | DialectType::Fabric => {
9556 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9557 let date = cast_to_date(date_arg);
9558 let date = if let Some(offset) = month_offset {
9559 Expression::Function(Box::new(Function::new(
9560 "DATEADD".to_string(),
9561 vec![
9562 Expression::Identifier(Identifier::new(
9563 "MONTH",
9564 )),
9565 offset,
9566 date,
9567 ],
9568 )))
9569 } else {
9570 date
9571 };
9572 Ok(Expression::Function(Box::new(Function::new(
9573 "EOMONTH".to_string(),
9574 vec![date],
9575 ))))
9576 }
9577 DialectType::Presto
9578 | DialectType::Trino
9579 | DialectType::Athena => {
9580 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9581 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9582 let cast_ts = Expression::Cast(Box::new(Cast {
9583 this: date_arg,
9584 to: DataType::Timestamp {
9585 timezone: false,
9586 precision: None,
9587 },
9588 trailing_comments: vec![],
9589 double_colon_syntax: false,
9590 format: None,
9591 default: None,
9592 }));
9593 let date = cast_to_date(cast_ts);
9594 let date = if let Some(offset) = month_offset {
9595 Expression::Function(Box::new(Function::new(
9596 "DATE_ADD".to_string(),
9597 vec![Expression::string("MONTH"), offset, date],
9598 )))
9599 } else {
9600 date
9601 };
9602 Ok(Expression::Function(Box::new(Function::new(
9603 "LAST_DAY_OF_MONTH".to_string(),
9604 vec![date],
9605 ))))
9606 }
9607 DialectType::PostgreSQL => {
9608 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9609 let date = cast_to_date(date_arg);
9610 let date = if let Some(offset) = month_offset {
9611 let interval_str = format!(
9612 "{} MONTH",
9613 Self::expr_to_string_static(&offset)
9614 );
9615 Expression::Add(Box::new(
9616 crate::expressions::BinaryOp::new(
9617 date,
9618 Expression::Interval(Box::new(
9619 crate::expressions::Interval {
9620 this: Some(Expression::string(
9621 &interval_str,
9622 )),
9623 unit: None,
9624 },
9625 )),
9626 ),
9627 ))
9628 } else {
9629 date
9630 };
9631 let truncated =
9632 Expression::Function(Box::new(Function::new(
9633 "DATE_TRUNC".to_string(),
9634 vec![Expression::string("MONTH"), date],
9635 )));
9636 let plus_month = Expression::Add(Box::new(
9637 crate::expressions::BinaryOp::new(
9638 truncated,
9639 Expression::Interval(Box::new(
9640 crate::expressions::Interval {
9641 this: Some(Expression::string("1 MONTH")),
9642 unit: None,
9643 },
9644 )),
9645 ),
9646 ));
9647 let minus_day = Expression::Sub(Box::new(
9648 crate::expressions::BinaryOp::new(
9649 plus_month,
9650 Expression::Interval(Box::new(
9651 crate::expressions::Interval {
9652 this: Some(Expression::string("1 DAY")),
9653 unit: None,
9654 },
9655 )),
9656 ),
9657 ));
9658 Ok(Expression::Cast(Box::new(Cast {
9659 this: minus_day,
9660 to: DataType::Date,
9661 trailing_comments: vec![],
9662 double_colon_syntax: false,
9663 format: None,
9664 default: None,
9665 })))
9666 }
9667 DialectType::DuckDB => {
9668 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9669 let date = cast_to_date(date_arg);
9670 let date = if let Some(offset) = month_offset {
9671 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9672 let interval_val =
9673 if matches!(&offset, Expression::Neg(_)) {
9674 Expression::Paren(Box::new(
9675 crate::expressions::Paren {
9676 this: offset,
9677 trailing_comments: Vec::new(),
9678 },
9679 ))
9680 } else {
9681 offset
9682 };
9683 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9684 date,
9685 Expression::Interval(Box::new(crate::expressions::Interval {
9686 this: Some(interval_val),
9687 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9688 unit: crate::expressions::IntervalUnit::Month,
9689 use_plural: false,
9690 }),
9691 })),
9692 )))
9693 } else {
9694 date
9695 };
9696 Ok(Expression::Function(Box::new(Function::new(
9697 "LAST_DAY".to_string(),
9698 vec![date],
9699 ))))
9700 }
9701 DialectType::Snowflake | DialectType::Redshift => {
9702 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9703 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9704 let date = if matches!(target, DialectType::Snowflake) {
9705 Expression::Function(Box::new(Function::new(
9706 "TO_DATE".to_string(),
9707 vec![date_arg],
9708 )))
9709 } else {
9710 cast_to_date(date_arg)
9711 };
9712 let date = if let Some(offset) = month_offset {
9713 Expression::Function(Box::new(Function::new(
9714 "DATEADD".to_string(),
9715 vec![
9716 Expression::Identifier(Identifier::new(
9717 "MONTH",
9718 )),
9719 offset,
9720 date,
9721 ],
9722 )))
9723 } else {
9724 date
9725 };
9726 Ok(Expression::Function(Box::new(Function::new(
9727 "LAST_DAY".to_string(),
9728 vec![date],
9729 ))))
9730 }
9731 DialectType::Spark | DialectType::Databricks => {
9732 // Spark: LAST_DAY(TO_DATE(date))
9733 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9734 let date = Expression::Function(Box::new(Function::new(
9735 "TO_DATE".to_string(),
9736 vec![date_arg],
9737 )));
9738 let date = if let Some(offset) = month_offset {
9739 Expression::Function(Box::new(Function::new(
9740 "ADD_MONTHS".to_string(),
9741 vec![date, offset],
9742 )))
9743 } else {
9744 date
9745 };
9746 Ok(Expression::Function(Box::new(Function::new(
9747 "LAST_DAY".to_string(),
9748 vec![date],
9749 ))))
9750 }
9751 DialectType::MySQL => {
9752 // MySQL: LAST_DAY(DATE(date)) - no offset
9753 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9754 let date = if let Some(offset) = month_offset {
9755 let iu = crate::expressions::IntervalUnit::Month;
9756 Expression::DateAdd(Box::new(
9757 crate::expressions::DateAddFunc {
9758 this: date_arg,
9759 interval: offset,
9760 unit: iu,
9761 },
9762 ))
9763 } else {
9764 Expression::Function(Box::new(Function::new(
9765 "DATE".to_string(),
9766 vec![date_arg],
9767 )))
9768 };
9769 Ok(Expression::Function(Box::new(Function::new(
9770 "LAST_DAY".to_string(),
9771 vec![date],
9772 ))))
9773 }
9774 DialectType::BigQuery => {
9775 // BigQuery: LAST_DAY(CAST(date AS DATE))
9776 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9777 let date = cast_to_date(date_arg);
9778 let date = if let Some(offset) = month_offset {
9779 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9780 this: Some(offset),
9781 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9782 unit: crate::expressions::IntervalUnit::Month,
9783 use_plural: false,
9784 }),
9785 }));
9786 Expression::Function(Box::new(Function::new(
9787 "DATE_ADD".to_string(),
9788 vec![date, interval],
9789 )))
9790 } else {
9791 date
9792 };
9793 Ok(Expression::Function(Box::new(Function::new(
9794 "LAST_DAY".to_string(),
9795 vec![date],
9796 ))))
9797 }
9798 DialectType::ClickHouse => {
9799 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9800 let date = Expression::Cast(Box::new(Cast {
9801 this: date_arg,
9802 to: DataType::Nullable {
9803 inner: Box::new(DataType::Date),
9804 },
9805 trailing_comments: vec![],
9806 double_colon_syntax: false,
9807 format: None,
9808 default: None,
9809 }));
9810 let date = if let Some(offset) = month_offset {
9811 Expression::Function(Box::new(Function::new(
9812 "DATE_ADD".to_string(),
9813 vec![
9814 Expression::Identifier(Identifier::new(
9815 "MONTH",
9816 )),
9817 offset,
9818 date,
9819 ],
9820 )))
9821 } else {
9822 date
9823 };
9824 Ok(Expression::Function(Box::new(Function::new(
9825 "LAST_DAY".to_string(),
9826 vec![date],
9827 ))))
9828 }
9829 DialectType::Hive => {
9830 // Hive: LAST_DAY(date)
9831 let date = if let Some(offset) = month_offset {
9832 Expression::Function(Box::new(Function::new(
9833 "ADD_MONTHS".to_string(),
9834 vec![date_arg, offset],
9835 )))
9836 } else {
9837 date_arg
9838 };
9839 Ok(Expression::Function(Box::new(Function::new(
9840 "LAST_DAY".to_string(),
9841 vec![date],
9842 ))))
9843 }
9844 _ => {
9845 // Default: LAST_DAY(date)
9846 let date = if let Some(offset) = month_offset {
9847 let unit =
9848 Expression::Identifier(Identifier::new("MONTH"));
9849 Expression::Function(Box::new(Function::new(
9850 "DATEADD".to_string(),
9851 vec![unit, offset, date_arg],
9852 )))
9853 } else {
9854 date_arg
9855 };
9856 Ok(Expression::Function(Box::new(Function::new(
9857 "LAST_DAY".to_string(),
9858 vec![date],
9859 ))))
9860 }
9861 }
9862 }
9863 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
9864 "LAST_DAY" | "LAST_DAY_OF_MONTH"
9865 if !matches!(source, DialectType::BigQuery)
9866 && f.args.len() >= 1 =>
9867 {
9868 let first_arg = f.args.into_iter().next().unwrap();
9869 match target {
9870 DialectType::TSQL | DialectType::Fabric => {
9871 Ok(Expression::Function(Box::new(Function::new(
9872 "EOMONTH".to_string(),
9873 vec![first_arg],
9874 ))))
9875 }
9876 DialectType::Presto
9877 | DialectType::Trino
9878 | DialectType::Athena => {
9879 Ok(Expression::Function(Box::new(Function::new(
9880 "LAST_DAY_OF_MONTH".to_string(),
9881 vec![first_arg],
9882 ))))
9883 }
9884 _ => Ok(Expression::Function(Box::new(Function::new(
9885 "LAST_DAY".to_string(),
9886 vec![first_arg],
9887 )))),
9888 }
9889 }
9890 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
9891 "MAP"
9892 if f.args.len() == 2
9893 && matches!(
9894 source,
9895 DialectType::Presto
9896 | DialectType::Trino
9897 | DialectType::Athena
9898 ) =>
9899 {
9900 let keys_arg = f.args[0].clone();
9901 let vals_arg = f.args[1].clone();
9902
9903 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
9904 fn extract_array_elements(
9905 expr: &Expression,
9906 ) -> Option<&Vec<Expression>> {
9907 match expr {
9908 Expression::Array(arr) => Some(&arr.expressions),
9909 Expression::ArrayFunc(arr) => Some(&arr.expressions),
9910 Expression::Function(f)
9911 if f.name.eq_ignore_ascii_case("ARRAY") =>
9912 {
9913 Some(&f.args)
9914 }
9915 _ => None,
9916 }
9917 }
9918
9919 match target {
9920 DialectType::Spark | DialectType::Databricks => {
9921 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
9922 Ok(Expression::Function(Box::new(Function::new(
9923 "MAP_FROM_ARRAYS".to_string(),
9924 f.args,
9925 ))))
9926 }
9927 DialectType::Hive => {
9928 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
9929 if let (Some(keys), Some(vals)) = (
9930 extract_array_elements(&keys_arg),
9931 extract_array_elements(&vals_arg),
9932 ) {
9933 if keys.len() == vals.len() {
9934 let mut interleaved = Vec::new();
9935 for (k, v) in keys.iter().zip(vals.iter()) {
9936 interleaved.push(k.clone());
9937 interleaved.push(v.clone());
9938 }
9939 Ok(Expression::Function(Box::new(Function::new(
9940 "MAP".to_string(),
9941 interleaved,
9942 ))))
9943 } else {
9944 Ok(Expression::Function(Box::new(Function::new(
9945 "MAP".to_string(),
9946 f.args,
9947 ))))
9948 }
9949 } else {
9950 Ok(Expression::Function(Box::new(Function::new(
9951 "MAP".to_string(),
9952 f.args,
9953 ))))
9954 }
9955 }
9956 DialectType::Snowflake => {
9957 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
9958 if let (Some(keys), Some(vals)) = (
9959 extract_array_elements(&keys_arg),
9960 extract_array_elements(&vals_arg),
9961 ) {
9962 if keys.len() == vals.len() {
9963 let mut interleaved = Vec::new();
9964 for (k, v) in keys.iter().zip(vals.iter()) {
9965 interleaved.push(k.clone());
9966 interleaved.push(v.clone());
9967 }
9968 Ok(Expression::Function(Box::new(Function::new(
9969 "OBJECT_CONSTRUCT".to_string(),
9970 interleaved,
9971 ))))
9972 } else {
9973 Ok(Expression::Function(Box::new(Function::new(
9974 "MAP".to_string(),
9975 f.args,
9976 ))))
9977 }
9978 } else {
9979 Ok(Expression::Function(Box::new(Function::new(
9980 "MAP".to_string(),
9981 f.args,
9982 ))))
9983 }
9984 }
9985 _ => Ok(Expression::Function(f)),
9986 }
9987 }
9988 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
9989 "MAP"
9990 if f.args.is_empty()
9991 && matches!(
9992 source,
9993 DialectType::Hive
9994 | DialectType::Spark
9995 | DialectType::Databricks
9996 )
9997 && matches!(
9998 target,
9999 DialectType::Presto
10000 | DialectType::Trino
10001 | DialectType::Athena
10002 ) =>
10003 {
10004 let empty_keys =
10005 Expression::Array(Box::new(crate::expressions::Array {
10006 expressions: vec![],
10007 }));
10008 let empty_vals =
10009 Expression::Array(Box::new(crate::expressions::Array {
10010 expressions: vec![],
10011 }));
10012 Ok(Expression::Function(Box::new(Function::new(
10013 "MAP".to_string(),
10014 vec![empty_keys, empty_vals],
10015 ))))
10016 }
10017 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10018 "MAP"
10019 if f.args.len() >= 2
10020 && f.args.len() % 2 == 0
10021 && matches!(
10022 source,
10023 DialectType::Hive
10024 | DialectType::Spark
10025 | DialectType::Databricks
10026 | DialectType::ClickHouse
10027 ) =>
10028 {
10029 let args = f.args;
10030 match target {
10031 DialectType::DuckDB => {
10032 // MAP([k1, k2], [v1, v2])
10033 let mut keys = Vec::new();
10034 let mut vals = Vec::new();
10035 for (i, arg) in args.into_iter().enumerate() {
10036 if i % 2 == 0 {
10037 keys.push(arg);
10038 } else {
10039 vals.push(arg);
10040 }
10041 }
10042 let keys_arr = Expression::Array(Box::new(
10043 crate::expressions::Array { expressions: keys },
10044 ));
10045 let vals_arr = Expression::Array(Box::new(
10046 crate::expressions::Array { expressions: vals },
10047 ));
10048 Ok(Expression::Function(Box::new(Function::new(
10049 "MAP".to_string(),
10050 vec![keys_arr, vals_arr],
10051 ))))
10052 }
10053 DialectType::Presto | DialectType::Trino => {
10054 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10055 let mut keys = Vec::new();
10056 let mut vals = Vec::new();
10057 for (i, arg) in args.into_iter().enumerate() {
10058 if i % 2 == 0 {
10059 keys.push(arg);
10060 } else {
10061 vals.push(arg);
10062 }
10063 }
10064 let keys_arr = Expression::Array(Box::new(
10065 crate::expressions::Array { expressions: keys },
10066 ));
10067 let vals_arr = Expression::Array(Box::new(
10068 crate::expressions::Array { expressions: vals },
10069 ));
10070 Ok(Expression::Function(Box::new(Function::new(
10071 "MAP".to_string(),
10072 vec![keys_arr, vals_arr],
10073 ))))
10074 }
10075 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10076 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10077 ))),
10078 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10079 Function::new("map".to_string(), args),
10080 ))),
10081 _ => Ok(Expression::Function(Box::new(Function::new(
10082 "MAP".to_string(),
10083 args,
10084 )))),
10085 }
10086 }
10087 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10088 "COLLECT_LIST" if f.args.len() >= 1 => {
10089 let name = match target {
10090 DialectType::Spark
10091 | DialectType::Databricks
10092 | DialectType::Hive => "COLLECT_LIST",
10093 DialectType::DuckDB
10094 | DialectType::PostgreSQL
10095 | DialectType::Redshift
10096 | DialectType::Snowflake
10097 | DialectType::BigQuery => "ARRAY_AGG",
10098 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10099 _ => "ARRAY_AGG",
10100 };
10101 Ok(Expression::Function(Box::new(Function::new(
10102 name.to_string(),
10103 f.args,
10104 ))))
10105 }
10106 // COLLECT_SET(x) -> target-specific distinct array aggregation
10107 "COLLECT_SET" if f.args.len() >= 1 => {
10108 let name = match target {
10109 DialectType::Spark
10110 | DialectType::Databricks
10111 | DialectType::Hive => "COLLECT_SET",
10112 DialectType::Presto
10113 | DialectType::Trino
10114 | DialectType::Athena => "SET_AGG",
10115 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10116 _ => "ARRAY_AGG",
10117 };
10118 Ok(Expression::Function(Box::new(Function::new(
10119 name.to_string(),
10120 f.args,
10121 ))))
10122 }
10123 // ISNAN(x) / IS_NAN(x) - normalize
10124 "ISNAN" | "IS_NAN" => {
10125 let name = match target {
10126 DialectType::Spark
10127 | DialectType::Databricks
10128 | DialectType::Hive => "ISNAN",
10129 DialectType::Presto
10130 | DialectType::Trino
10131 | DialectType::Athena => "IS_NAN",
10132 DialectType::BigQuery
10133 | DialectType::PostgreSQL
10134 | DialectType::Redshift => "IS_NAN",
10135 DialectType::ClickHouse => "IS_NAN",
10136 _ => "ISNAN",
10137 };
10138 Ok(Expression::Function(Box::new(Function::new(
10139 name.to_string(),
10140 f.args,
10141 ))))
10142 }
10143 // SPLIT_PART(str, delim, index) -> target-specific
10144 "SPLIT_PART" if f.args.len() == 3 => {
10145 match target {
10146 DialectType::Spark | DialectType::Databricks => {
10147 // Keep as SPLIT_PART (Spark 3.4+)
10148 Ok(Expression::Function(Box::new(Function::new(
10149 "SPLIT_PART".to_string(),
10150 f.args,
10151 ))))
10152 }
10153 DialectType::DuckDB
10154 | DialectType::PostgreSQL
10155 | DialectType::Snowflake
10156 | DialectType::Redshift
10157 | DialectType::Trino
10158 | DialectType::Presto => Ok(Expression::Function(Box::new(
10159 Function::new("SPLIT_PART".to_string(), f.args),
10160 ))),
10161 DialectType::Hive => {
10162 // SPLIT(str, delim)[index]
10163 // Complex conversion, just keep as-is for now
10164 Ok(Expression::Function(Box::new(Function::new(
10165 "SPLIT_PART".to_string(),
10166 f.args,
10167 ))))
10168 }
10169 _ => Ok(Expression::Function(Box::new(Function::new(
10170 "SPLIT_PART".to_string(),
10171 f.args,
10172 )))),
10173 }
10174 }
10175 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10176 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10177 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10178 match target {
10179 DialectType::Spark
10180 | DialectType::Databricks
10181 | DialectType::Hive => {
10182 let mut args = f.args;
10183 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10184 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10185 if let Some(Expression::Function(inner)) = args.first() {
10186 if inner.name.eq_ignore_ascii_case("TRY")
10187 && inner.args.len() == 1
10188 {
10189 let mut inner_args = inner.args.clone();
10190 args[0] = inner_args.remove(0);
10191 }
10192 }
10193 Ok(Expression::Function(Box::new(Function::new(
10194 "GET_JSON_OBJECT".to_string(),
10195 args,
10196 ))))
10197 }
10198 DialectType::DuckDB | DialectType::SQLite => {
10199 // json -> path syntax
10200 let mut args = f.args;
10201 let json_expr = args.remove(0);
10202 let path = args.remove(0);
10203 Ok(Expression::JsonExtract(Box::new(
10204 crate::expressions::JsonExtractFunc {
10205 this: json_expr,
10206 path,
10207 returning: None,
10208 arrow_syntax: true,
10209 hash_arrow_syntax: false,
10210 wrapper_option: None,
10211 quotes_option: None,
10212 on_scalar_string: false,
10213 on_error: None,
10214 },
10215 )))
10216 }
10217 DialectType::TSQL => {
10218 let func_name = if is_scalar {
10219 "JSON_VALUE"
10220 } else {
10221 "JSON_QUERY"
10222 };
10223 Ok(Expression::Function(Box::new(Function::new(
10224 func_name.to_string(),
10225 f.args,
10226 ))))
10227 }
10228 DialectType::PostgreSQL | DialectType::Redshift => {
10229 let func_name = if is_scalar {
10230 "JSON_EXTRACT_PATH_TEXT"
10231 } else {
10232 "JSON_EXTRACT_PATH"
10233 };
10234 Ok(Expression::Function(Box::new(Function::new(
10235 func_name.to_string(),
10236 f.args,
10237 ))))
10238 }
10239 _ => Ok(Expression::Function(Box::new(Function::new(
10240 name.to_string(),
10241 f.args,
10242 )))),
10243 }
10244 }
10245 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10246 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10247 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10248 if f.args.len() >= 2
10249 && matches!(source, DialectType::SingleStore) =>
10250 {
10251 let is_bson = name == "BSON_EXTRACT_BSON";
10252 let mut args = f.args;
10253 let json_expr = args.remove(0);
10254
10255 // Build JSONPath from remaining arguments
10256 let mut path = String::from("$");
10257 for arg in &args {
10258 if let Expression::Literal(
10259 crate::expressions::Literal::String(s),
10260 ) = arg
10261 {
10262 // Check if it's a numeric string (array index)
10263 if s.parse::<i64>().is_ok() {
10264 path.push('[');
10265 path.push_str(s);
10266 path.push(']');
10267 } else {
10268 path.push('.');
10269 path.push_str(s);
10270 }
10271 }
10272 }
10273
10274 let target_func = if is_bson {
10275 "JSONB_EXTRACT"
10276 } else {
10277 "JSON_EXTRACT"
10278 };
10279 Ok(Expression::Function(Box::new(Function::new(
10280 target_func.to_string(),
10281 vec![json_expr, Expression::string(&path)],
10282 ))))
10283 }
10284 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10285 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10286 Ok(Expression::Function(Box::new(Function {
10287 name: "arraySum".to_string(),
10288 args: f.args,
10289 distinct: f.distinct,
10290 trailing_comments: f.trailing_comments,
10291 use_bracket_syntax: f.use_bracket_syntax,
10292 no_parens: f.no_parens,
10293 quoted: f.quoted,
10294 })))
10295 }
10296 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10297 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10298 // and is handled by JsonQueryValueConvert action. This handles the case where
10299 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10300 "JSON_QUERY" | "JSON_VALUE"
10301 if f.args.len() == 2
10302 && matches!(
10303 source,
10304 DialectType::TSQL | DialectType::Fabric
10305 ) =>
10306 {
10307 match target {
10308 DialectType::Spark
10309 | DialectType::Databricks
10310 | DialectType::Hive => Ok(Expression::Function(Box::new(
10311 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10312 ))),
10313 _ => Ok(Expression::Function(Box::new(Function::new(
10314 name.to_string(),
10315 f.args,
10316 )))),
10317 }
10318 }
10319 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10320 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10321 let arg = f.args.into_iter().next().unwrap();
10322 let is_hive_source = matches!(
10323 source,
10324 DialectType::Hive
10325 | DialectType::Spark
10326 | DialectType::Databricks
10327 );
10328 match target {
10329 DialectType::DuckDB if is_hive_source => {
10330 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10331 let strptime =
10332 Expression::Function(Box::new(Function::new(
10333 "STRPTIME".to_string(),
10334 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10335 )));
10336 Ok(Expression::Function(Box::new(Function::new(
10337 "EPOCH".to_string(),
10338 vec![strptime],
10339 ))))
10340 }
10341 DialectType::Presto | DialectType::Trino if is_hive_source => {
10342 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10343 let cast_varchar =
10344 Expression::Cast(Box::new(crate::expressions::Cast {
10345 this: arg.clone(),
10346 to: DataType::VarChar {
10347 length: None,
10348 parenthesized_length: false,
10349 },
10350 trailing_comments: vec![],
10351 double_colon_syntax: false,
10352 format: None,
10353 default: None,
10354 }));
10355 let date_parse =
10356 Expression::Function(Box::new(Function::new(
10357 "DATE_PARSE".to_string(),
10358 vec![
10359 cast_varchar,
10360 Expression::string("%Y-%m-%d %T"),
10361 ],
10362 )));
10363 let try_expr = Expression::Function(Box::new(
10364 Function::new("TRY".to_string(), vec![date_parse]),
10365 ));
10366 let date_format =
10367 Expression::Function(Box::new(Function::new(
10368 "DATE_FORMAT".to_string(),
10369 vec![arg, Expression::string("%Y-%m-%d %T")],
10370 )));
10371 let parse_datetime =
10372 Expression::Function(Box::new(Function::new(
10373 "PARSE_DATETIME".to_string(),
10374 vec![
10375 date_format,
10376 Expression::string("yyyy-MM-dd HH:mm:ss"),
10377 ],
10378 )));
10379 let coalesce =
10380 Expression::Function(Box::new(Function::new(
10381 "COALESCE".to_string(),
10382 vec![try_expr, parse_datetime],
10383 )));
10384 Ok(Expression::Function(Box::new(Function::new(
10385 "TO_UNIXTIME".to_string(),
10386 vec![coalesce],
10387 ))))
10388 }
10389 DialectType::Presto | DialectType::Trino => {
10390 Ok(Expression::Function(Box::new(Function::new(
10391 "TO_UNIXTIME".to_string(),
10392 vec![arg],
10393 ))))
10394 }
10395 _ => Ok(Expression::Function(Box::new(Function::new(
10396 "UNIX_TIMESTAMP".to_string(),
10397 vec![arg],
10398 )))),
10399 }
10400 }
10401 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10402 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10403 DialectType::Spark
10404 | DialectType::Databricks
10405 | DialectType::Hive => Ok(Expression::Function(Box::new(
10406 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10407 ))),
10408 _ => Ok(Expression::Function(Box::new(Function::new(
10409 "TO_UNIX_TIMESTAMP".to_string(),
10410 f.args,
10411 )))),
10412 },
10413 // CURDATE() -> CURRENT_DATE
10414 "CURDATE" => {
10415 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10416 }
10417 // CURTIME() -> CURRENT_TIME
10418 "CURTIME" => {
10419 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10420 precision: None,
10421 }))
10422 }
10423 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10424 "ARRAY_SORT" if f.args.len() >= 1 => {
10425 match target {
10426 DialectType::Hive => {
10427 let mut args = f.args;
10428 args.truncate(1); // Drop lambda comparator
10429 Ok(Expression::Function(Box::new(Function::new(
10430 "SORT_ARRAY".to_string(),
10431 args,
10432 ))))
10433 }
10434 _ => Ok(Expression::Function(f)),
10435 }
10436 }
10437 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10438 "SORT_ARRAY" if f.args.len() == 1 => match target {
10439 DialectType::Hive
10440 | DialectType::Spark
10441 | DialectType::Databricks => Ok(Expression::Function(f)),
10442 _ => Ok(Expression::Function(Box::new(Function::new(
10443 "ARRAY_SORT".to_string(),
10444 f.args,
10445 )))),
10446 },
10447 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10448 "SORT_ARRAY" if f.args.len() == 2 => {
10449 let is_desc =
10450 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10451 if is_desc {
10452 match target {
10453 DialectType::DuckDB => {
10454 Ok(Expression::Function(Box::new(Function::new(
10455 "ARRAY_REVERSE_SORT".to_string(),
10456 vec![f.args.into_iter().next().unwrap()],
10457 ))))
10458 }
10459 DialectType::Presto | DialectType::Trino => {
10460 let arr_arg = f.args.into_iter().next().unwrap();
10461 let a =
10462 Expression::Column(crate::expressions::Column {
10463 name: crate::expressions::Identifier::new("a"),
10464 table: None,
10465 join_mark: false,
10466 trailing_comments: Vec::new(),
10467 });
10468 let b =
10469 Expression::Column(crate::expressions::Column {
10470 name: crate::expressions::Identifier::new("b"),
10471 table: None,
10472 join_mark: false,
10473 trailing_comments: Vec::new(),
10474 });
10475 let case_expr = Expression::Case(Box::new(
10476 crate::expressions::Case {
10477 operand: None,
10478 whens: vec![
10479 (
10480 Expression::Lt(Box::new(
10481 BinaryOp::new(a.clone(), b.clone()),
10482 )),
10483 Expression::Literal(Literal::Number(
10484 "1".to_string(),
10485 )),
10486 ),
10487 (
10488 Expression::Gt(Box::new(
10489 BinaryOp::new(a.clone(), b.clone()),
10490 )),
10491 Expression::Literal(Literal::Number(
10492 "-1".to_string(),
10493 )),
10494 ),
10495 ],
10496 else_: Some(Expression::Literal(
10497 Literal::Number("0".to_string()),
10498 )),
10499 comments: Vec::new(),
10500 },
10501 ));
10502 let lambda = Expression::Lambda(Box::new(
10503 crate::expressions::LambdaExpr {
10504 parameters: vec![
10505 crate::expressions::Identifier::new("a"),
10506 crate::expressions::Identifier::new("b"),
10507 ],
10508 body: case_expr,
10509 colon: false,
10510 parameter_types: Vec::new(),
10511 },
10512 ));
10513 Ok(Expression::Function(Box::new(Function::new(
10514 "ARRAY_SORT".to_string(),
10515 vec![arr_arg, lambda],
10516 ))))
10517 }
10518 _ => Ok(Expression::Function(f)),
10519 }
10520 } else {
10521 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10522 match target {
10523 DialectType::Hive => Ok(Expression::Function(f)),
10524 _ => Ok(Expression::Function(Box::new(Function::new(
10525 "ARRAY_SORT".to_string(),
10526 vec![f.args.into_iter().next().unwrap()],
10527 )))),
10528 }
10529 }
10530 }
10531 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10532 "LEFT" if f.args.len() == 2 => {
10533 match target {
10534 DialectType::Hive
10535 | DialectType::Presto
10536 | DialectType::Trino
10537 | DialectType::Athena => {
10538 let x = f.args[0].clone();
10539 let n = f.args[1].clone();
10540 Ok(Expression::Function(Box::new(Function::new(
10541 "SUBSTRING".to_string(),
10542 vec![x, Expression::number(1), n],
10543 ))))
10544 }
10545 DialectType::Spark | DialectType::Databricks
10546 if matches!(
10547 source,
10548 DialectType::TSQL | DialectType::Fabric
10549 ) =>
10550 {
10551 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10552 let x = f.args[0].clone();
10553 let n = f.args[1].clone();
10554 let cast_x = Expression::Cast(Box::new(Cast {
10555 this: x,
10556 to: DataType::VarChar {
10557 length: None,
10558 parenthesized_length: false,
10559 },
10560 double_colon_syntax: false,
10561 trailing_comments: Vec::new(),
10562 format: None,
10563 default: None,
10564 }));
10565 Ok(Expression::Function(Box::new(Function::new(
10566 "LEFT".to_string(),
10567 vec![cast_x, n],
10568 ))))
10569 }
10570 _ => Ok(Expression::Function(f)),
10571 }
10572 }
10573 "RIGHT" if f.args.len() == 2 => {
10574 match target {
10575 DialectType::Hive
10576 | DialectType::Presto
10577 | DialectType::Trino
10578 | DialectType::Athena => {
10579 let x = f.args[0].clone();
10580 let n = f.args[1].clone();
10581 // SUBSTRING(x, LENGTH(x) - (n - 1))
10582 let len_x = Expression::Function(Box::new(Function::new(
10583 "LENGTH".to_string(),
10584 vec![x.clone()],
10585 )));
10586 let n_minus_1 = Expression::Sub(Box::new(
10587 crate::expressions::BinaryOp::new(
10588 n,
10589 Expression::number(1),
10590 ),
10591 ));
10592 let n_minus_1_paren = Expression::Paren(Box::new(
10593 crate::expressions::Paren {
10594 this: n_minus_1,
10595 trailing_comments: Vec::new(),
10596 },
10597 ));
10598 let offset = Expression::Sub(Box::new(
10599 crate::expressions::BinaryOp::new(
10600 len_x,
10601 n_minus_1_paren,
10602 ),
10603 ));
10604 Ok(Expression::Function(Box::new(Function::new(
10605 "SUBSTRING".to_string(),
10606 vec![x, offset],
10607 ))))
10608 }
10609 DialectType::Spark | DialectType::Databricks
10610 if matches!(
10611 source,
10612 DialectType::TSQL | DialectType::Fabric
10613 ) =>
10614 {
10615 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10616 let x = f.args[0].clone();
10617 let n = f.args[1].clone();
10618 let cast_x = Expression::Cast(Box::new(Cast {
10619 this: x,
10620 to: DataType::VarChar {
10621 length: None,
10622 parenthesized_length: false,
10623 },
10624 double_colon_syntax: false,
10625 trailing_comments: Vec::new(),
10626 format: None,
10627 default: None,
10628 }));
10629 Ok(Expression::Function(Box::new(Function::new(
10630 "RIGHT".to_string(),
10631 vec![cast_x, n],
10632 ))))
10633 }
10634 _ => Ok(Expression::Function(f)),
10635 }
10636 }
10637 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10638 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10639 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10640 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10641 ))),
10642 DialectType::Spark | DialectType::Databricks => {
10643 Ok(Expression::Function(Box::new(Function::new(
10644 "MAP_FROM_ARRAYS".to_string(),
10645 f.args,
10646 ))))
10647 }
10648 _ => Ok(Expression::Function(Box::new(Function::new(
10649 "MAP".to_string(),
10650 f.args,
10651 )))),
10652 },
10653 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10654 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10655 "LIKE" if f.args.len() >= 2 => {
10656 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10657 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10658 (f.args[1].clone(), f.args[0].clone())
10659 } else {
10660 // Standard: LIKE(string, pattern) -> string LIKE pattern
10661 (f.args[0].clone(), f.args[1].clone())
10662 };
10663 let escape = if f.args.len() >= 3 {
10664 Some(f.args[2].clone())
10665 } else {
10666 None
10667 };
10668 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10669 left: this,
10670 right: pattern,
10671 escape,
10672 quantifier: None,
10673 })))
10674 }
10675 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10676 "ILIKE" if f.args.len() >= 2 => {
10677 let this = f.args[0].clone();
10678 let pattern = f.args[1].clone();
10679 let escape = if f.args.len() >= 3 {
10680 Some(f.args[2].clone())
10681 } else {
10682 None
10683 };
10684 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10685 left: this,
10686 right: pattern,
10687 escape,
10688 quantifier: None,
10689 })))
10690 }
10691 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10692 "CHAR" if f.args.len() == 1 => match target {
10693 DialectType::MySQL
10694 | DialectType::SingleStore
10695 | DialectType::TSQL => Ok(Expression::Function(f)),
10696 _ => Ok(Expression::Function(Box::new(Function::new(
10697 "CHR".to_string(),
10698 f.args,
10699 )))),
10700 },
10701 // CONCAT(a, b) -> a || b for PostgreSQL
10702 "CONCAT"
10703 if f.args.len() == 2
10704 && matches!(target, DialectType::PostgreSQL)
10705 && matches!(
10706 source,
10707 DialectType::ClickHouse | DialectType::MySQL
10708 ) =>
10709 {
10710 let mut args = f.args;
10711 let right = args.pop().unwrap();
10712 let left = args.pop().unwrap();
10713 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10714 this: Box::new(left),
10715 expression: Box::new(right),
10716 safe: None,
10717 })))
10718 }
10719 // ARRAY_TO_STRING(arr, delim) -> target-specific
10720 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10721 DialectType::Presto | DialectType::Trino => {
10722 Ok(Expression::Function(Box::new(Function::new(
10723 "ARRAY_JOIN".to_string(),
10724 f.args,
10725 ))))
10726 }
10727 DialectType::TSQL => Ok(Expression::Function(Box::new(
10728 Function::new("STRING_AGG".to_string(), f.args),
10729 ))),
10730 _ => Ok(Expression::Function(f)),
10731 },
10732 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10733 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10734 DialectType::Spark
10735 | DialectType::Databricks
10736 | DialectType::Hive => Ok(Expression::Function(Box::new(
10737 Function::new("CONCAT".to_string(), f.args),
10738 ))),
10739 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10740 Function::new("ARRAY_CAT".to_string(), f.args),
10741 ))),
10742 DialectType::Redshift => Ok(Expression::Function(Box::new(
10743 Function::new("ARRAY_CONCAT".to_string(), f.args),
10744 ))),
10745 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10746 Function::new("ARRAY_CAT".to_string(), f.args),
10747 ))),
10748 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10749 Function::new("LIST_CONCAT".to_string(), f.args),
10750 ))),
10751 DialectType::Presto | DialectType::Trino => {
10752 Ok(Expression::Function(Box::new(Function::new(
10753 "CONCAT".to_string(),
10754 f.args,
10755 ))))
10756 }
10757 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10758 Function::new("ARRAY_CONCAT".to_string(), f.args),
10759 ))),
10760 _ => Ok(Expression::Function(f)),
10761 },
10762 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10763 "HAS" if f.args.len() == 2 => match target {
10764 DialectType::Spark
10765 | DialectType::Databricks
10766 | DialectType::Hive => Ok(Expression::Function(Box::new(
10767 Function::new("ARRAY_CONTAINS".to_string(), f.args),
10768 ))),
10769 DialectType::Presto | DialectType::Trino => {
10770 Ok(Expression::Function(Box::new(Function::new(
10771 "CONTAINS".to_string(),
10772 f.args,
10773 ))))
10774 }
10775 _ => Ok(Expression::Function(f)),
10776 },
10777 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
10778 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
10779 Function::new("COALESCE".to_string(), f.args),
10780 ))),
10781 // ISNULL(x) in MySQL -> (x IS NULL)
10782 "ISNULL"
10783 if f.args.len() == 1
10784 && matches!(source, DialectType::MySQL)
10785 && matches!(target, DialectType::MySQL) =>
10786 {
10787 let arg = f.args.into_iter().next().unwrap();
10788 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
10789 this: Expression::IsNull(Box::new(
10790 crate::expressions::IsNull {
10791 this: arg,
10792 not: false,
10793 postfix_form: false,
10794 },
10795 )),
10796 trailing_comments: Vec::new(),
10797 })))
10798 }
10799 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
10800 "MONTHNAME"
10801 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
10802 {
10803 let arg = f.args.into_iter().next().unwrap();
10804 Ok(Expression::Function(Box::new(Function::new(
10805 "DATE_FORMAT".to_string(),
10806 vec![arg, Expression::string("%M")],
10807 ))))
10808 }
10809 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
10810 "SPLITBYSTRING" if f.args.len() == 2 => {
10811 let sep = f.args[0].clone();
10812 let str_arg = f.args[1].clone();
10813 match target {
10814 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10815 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
10816 ))),
10817 DialectType::Doris => {
10818 Ok(Expression::Function(Box::new(Function::new(
10819 "SPLIT_BY_STRING".to_string(),
10820 vec![str_arg, sep],
10821 ))))
10822 }
10823 DialectType::Hive
10824 | DialectType::Spark
10825 | DialectType::Databricks => {
10826 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
10827 let escaped =
10828 Expression::Function(Box::new(Function::new(
10829 "CONCAT".to_string(),
10830 vec![
10831 Expression::string("\\Q"),
10832 sep,
10833 Expression::string("\\E"),
10834 ],
10835 )));
10836 Ok(Expression::Function(Box::new(Function::new(
10837 "SPLIT".to_string(),
10838 vec![str_arg, escaped],
10839 ))))
10840 }
10841 _ => Ok(Expression::Function(f)),
10842 }
10843 }
10844 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
10845 "SPLITBYREGEXP" if f.args.len() == 2 => {
10846 let sep = f.args[0].clone();
10847 let str_arg = f.args[1].clone();
10848 match target {
10849 DialectType::DuckDB => {
10850 Ok(Expression::Function(Box::new(Function::new(
10851 "STR_SPLIT_REGEX".to_string(),
10852 vec![str_arg, sep],
10853 ))))
10854 }
10855 DialectType::Hive
10856 | DialectType::Spark
10857 | DialectType::Databricks => {
10858 Ok(Expression::Function(Box::new(Function::new(
10859 "SPLIT".to_string(),
10860 vec![str_arg, sep],
10861 ))))
10862 }
10863 _ => Ok(Expression::Function(f)),
10864 }
10865 }
10866 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
10867 "TOMONDAY" => {
10868 if f.args.len() == 1 {
10869 let arg = f.args.into_iter().next().unwrap();
10870 match target {
10871 DialectType::Doris => {
10872 Ok(Expression::Function(Box::new(Function::new(
10873 "DATE_TRUNC".to_string(),
10874 vec![arg, Expression::string("WEEK")],
10875 ))))
10876 }
10877 _ => Ok(Expression::Function(Box::new(Function::new(
10878 "DATE_TRUNC".to_string(),
10879 vec![Expression::string("WEEK"), arg],
10880 )))),
10881 }
10882 } else {
10883 Ok(Expression::Function(f))
10884 }
10885 }
10886 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
10887 "COLLECT_LIST" if f.args.len() == 1 => match target {
10888 DialectType::Spark
10889 | DialectType::Databricks
10890 | DialectType::Hive => Ok(Expression::Function(f)),
10891 _ => Ok(Expression::Function(Box::new(Function::new(
10892 "ARRAY_AGG".to_string(),
10893 f.args,
10894 )))),
10895 },
10896 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
10897 "TO_CHAR"
10898 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
10899 {
10900 let arg = f.args.into_iter().next().unwrap();
10901 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10902 this: arg,
10903 to: DataType::Custom {
10904 name: "STRING".to_string(),
10905 },
10906 double_colon_syntax: false,
10907 trailing_comments: Vec::new(),
10908 format: None,
10909 default: None,
10910 })))
10911 }
10912 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
10913 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
10914 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10915 Function::new("RANDOM".to_string(), vec![]),
10916 ))),
10917 _ => Ok(Expression::Function(f)),
10918 },
10919 // ClickHouse formatDateTime -> target-specific
10920 "FORMATDATETIME" if f.args.len() >= 2 => match target {
10921 DialectType::MySQL => Ok(Expression::Function(Box::new(
10922 Function::new("DATE_FORMAT".to_string(), f.args),
10923 ))),
10924 _ => Ok(Expression::Function(f)),
10925 },
10926 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
10927 "REPLICATE" if f.args.len() == 2 => match target {
10928 DialectType::TSQL => Ok(Expression::Function(f)),
10929 _ => Ok(Expression::Function(Box::new(Function::new(
10930 "REPEAT".to_string(),
10931 f.args,
10932 )))),
10933 },
10934 // LEN(x) -> LENGTH(x) for non-TSQL targets
10935 // No CAST needed when arg is already a string literal
10936 "LEN" if f.args.len() == 1 => {
10937 match target {
10938 DialectType::TSQL => Ok(Expression::Function(f)),
10939 DialectType::Spark | DialectType::Databricks => {
10940 let arg = f.args.into_iter().next().unwrap();
10941 // Don't wrap string literals with CAST - they're already strings
10942 let is_string = matches!(
10943 &arg,
10944 Expression::Literal(
10945 crate::expressions::Literal::String(_)
10946 )
10947 );
10948 let final_arg = if is_string {
10949 arg
10950 } else {
10951 Expression::Cast(Box::new(Cast {
10952 this: arg,
10953 to: DataType::VarChar {
10954 length: None,
10955 parenthesized_length: false,
10956 },
10957 double_colon_syntax: false,
10958 trailing_comments: Vec::new(),
10959 format: None,
10960 default: None,
10961 }))
10962 };
10963 Ok(Expression::Function(Box::new(Function::new(
10964 "LENGTH".to_string(),
10965 vec![final_arg],
10966 ))))
10967 }
10968 _ => {
10969 let arg = f.args.into_iter().next().unwrap();
10970 Ok(Expression::Function(Box::new(Function::new(
10971 "LENGTH".to_string(),
10972 vec![arg],
10973 ))))
10974 }
10975 }
10976 }
10977 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
10978 "COUNT_BIG" if f.args.len() == 1 => match target {
10979 DialectType::TSQL => Ok(Expression::Function(f)),
10980 _ => Ok(Expression::Function(Box::new(Function::new(
10981 "COUNT".to_string(),
10982 f.args,
10983 )))),
10984 },
10985 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
10986 "DATEFROMPARTS" if f.args.len() == 3 => match target {
10987 DialectType::TSQL => Ok(Expression::Function(f)),
10988 _ => Ok(Expression::Function(Box::new(Function::new(
10989 "MAKE_DATE".to_string(),
10990 f.args,
10991 )))),
10992 },
10993 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
10994 "REGEXP_LIKE" if f.args.len() >= 2 => {
10995 let str_expr = f.args[0].clone();
10996 let pattern = f.args[1].clone();
10997 let flags = if f.args.len() >= 3 {
10998 Some(f.args[2].clone())
10999 } else {
11000 None
11001 };
11002 match target {
11003 DialectType::DuckDB => {
11004 let mut new_args = vec![str_expr, pattern];
11005 if let Some(fl) = flags {
11006 new_args.push(fl);
11007 }
11008 Ok(Expression::Function(Box::new(Function::new(
11009 "REGEXP_MATCHES".to_string(),
11010 new_args,
11011 ))))
11012 }
11013 _ => Ok(Expression::RegexpLike(Box::new(
11014 crate::expressions::RegexpFunc {
11015 this: str_expr,
11016 pattern,
11017 flags,
11018 },
11019 ))),
11020 }
11021 }
11022 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11023 "ARRAYJOIN" if f.args.len() == 1 => match target {
11024 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11025 Function::new("UNNEST".to_string(), f.args),
11026 ))),
11027 _ => Ok(Expression::Function(f)),
11028 },
11029 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11030 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11031 match target {
11032 DialectType::TSQL => Ok(Expression::Function(f)),
11033 DialectType::DuckDB => {
11034 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11035 let mut args = f.args;
11036 let ms = args.pop().unwrap();
11037 let s = args.pop().unwrap();
11038 // s + (ms / 1000.0)
11039 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11040 ms,
11041 Expression::Literal(
11042 crate::expressions::Literal::Number(
11043 "1000.0".to_string(),
11044 ),
11045 ),
11046 )));
11047 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11048 s,
11049 Expression::Paren(Box::new(Paren {
11050 this: ms_frac,
11051 trailing_comments: vec![],
11052 })),
11053 )));
11054 args.push(s_with_ms);
11055 Ok(Expression::Function(Box::new(Function::new(
11056 "MAKE_TIMESTAMP".to_string(),
11057 args,
11058 ))))
11059 }
11060 DialectType::Snowflake => {
11061 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11062 let mut args = f.args;
11063 let ms = args.pop().unwrap();
11064 // ms * 1000000
11065 let ns = Expression::Mul(Box::new(BinaryOp::new(
11066 ms,
11067 Expression::number(1000000),
11068 )));
11069 args.push(ns);
11070 Ok(Expression::Function(Box::new(Function::new(
11071 "TIMESTAMP_FROM_PARTS".to_string(),
11072 args,
11073 ))))
11074 }
11075 _ => {
11076 // Default: keep function name for other targets
11077 Ok(Expression::Function(Box::new(Function::new(
11078 "DATETIMEFROMPARTS".to_string(),
11079 f.args,
11080 ))))
11081 }
11082 }
11083 }
11084 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11085 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11086 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11087 let is_try = name == "TRY_CONVERT";
11088 let type_expr = f.args[0].clone();
11089 let value_expr = f.args[1].clone();
11090 let style = if f.args.len() >= 3 {
11091 Some(&f.args[2])
11092 } else {
11093 None
11094 };
11095
11096 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11097 if matches!(target, DialectType::TSQL) {
11098 let normalized_type = match &type_expr {
11099 Expression::DataType(dt) => {
11100 let new_dt = match dt {
11101 DataType::Int { .. } => DataType::Custom {
11102 name: "INTEGER".to_string(),
11103 },
11104 _ => dt.clone(),
11105 };
11106 Expression::DataType(new_dt)
11107 }
11108 Expression::Identifier(id) => {
11109 let upper = id.name.to_uppercase();
11110 let normalized = match upper.as_str() {
11111 "INT" => "INTEGER",
11112 _ => &upper,
11113 };
11114 Expression::Identifier(
11115 crate::expressions::Identifier::new(normalized),
11116 )
11117 }
11118 Expression::Column(col) => {
11119 let upper = col.name.name.to_uppercase();
11120 let normalized = match upper.as_str() {
11121 "INT" => "INTEGER",
11122 _ => &upper,
11123 };
11124 Expression::Identifier(
11125 crate::expressions::Identifier::new(normalized),
11126 )
11127 }
11128 _ => type_expr.clone(),
11129 };
11130 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11131 let mut new_args = vec![normalized_type, value_expr];
11132 if let Some(s) = style {
11133 new_args.push(s.clone());
11134 }
11135 return Ok(Expression::Function(Box::new(Function::new(
11136 func_name.to_string(),
11137 new_args,
11138 ))));
11139 }
11140
11141 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11142 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11143 match e {
11144 Expression::DataType(dt) => {
11145 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11146 match dt {
11147 DataType::Custom { name }
11148 if name.starts_with("NVARCHAR(")
11149 || name.starts_with("NCHAR(") =>
11150 {
11151 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11152 let inner = &name[name.find('(').unwrap() + 1
11153 ..name.len() - 1];
11154 if inner.eq_ignore_ascii_case("MAX") {
11155 Some(DataType::Text)
11156 } else if let Ok(len) = inner.parse::<u32>() {
11157 if name.starts_with("NCHAR") {
11158 Some(DataType::Char {
11159 length: Some(len),
11160 })
11161 } else {
11162 Some(DataType::VarChar {
11163 length: Some(len),
11164 parenthesized_length: false,
11165 })
11166 }
11167 } else {
11168 Some(dt.clone())
11169 }
11170 }
11171 DataType::Custom { name } if name == "NVARCHAR" => {
11172 Some(DataType::VarChar {
11173 length: None,
11174 parenthesized_length: false,
11175 })
11176 }
11177 DataType::Custom { name } if name == "NCHAR" => {
11178 Some(DataType::Char { length: None })
11179 }
11180 DataType::Custom { name }
11181 if name == "NVARCHAR(MAX)"
11182 || name == "VARCHAR(MAX)" =>
11183 {
11184 Some(DataType::Text)
11185 }
11186 _ => Some(dt.clone()),
11187 }
11188 }
11189 Expression::Identifier(id) => {
11190 let name = id.name.to_uppercase();
11191 match name.as_str() {
11192 "INT" | "INTEGER" => Some(DataType::Int {
11193 length: None,
11194 integer_spelling: false,
11195 }),
11196 "BIGINT" => Some(DataType::BigInt { length: None }),
11197 "SMALLINT" => {
11198 Some(DataType::SmallInt { length: None })
11199 }
11200 "TINYINT" => {
11201 Some(DataType::TinyInt { length: None })
11202 }
11203 "FLOAT" => Some(DataType::Float {
11204 precision: None,
11205 scale: None,
11206 real_spelling: false,
11207 }),
11208 "REAL" => Some(DataType::Float {
11209 precision: None,
11210 scale: None,
11211 real_spelling: true,
11212 }),
11213 "DATETIME" | "DATETIME2" => {
11214 Some(DataType::Timestamp {
11215 timezone: false,
11216 precision: None,
11217 })
11218 }
11219 "DATE" => Some(DataType::Date),
11220 "BIT" => Some(DataType::Boolean),
11221 "TEXT" => Some(DataType::Text),
11222 "NUMERIC" => Some(DataType::Decimal {
11223 precision: None,
11224 scale: None,
11225 }),
11226 "MONEY" => Some(DataType::Decimal {
11227 precision: Some(15),
11228 scale: Some(4),
11229 }),
11230 "SMALLMONEY" => Some(DataType::Decimal {
11231 precision: Some(6),
11232 scale: Some(4),
11233 }),
11234 "VARCHAR" => Some(DataType::VarChar {
11235 length: None,
11236 parenthesized_length: false,
11237 }),
11238 "NVARCHAR" => Some(DataType::VarChar {
11239 length: None,
11240 parenthesized_length: false,
11241 }),
11242 "CHAR" => Some(DataType::Char { length: None }),
11243 "NCHAR" => Some(DataType::Char { length: None }),
11244 _ => Some(DataType::Custom { name }),
11245 }
11246 }
11247 Expression::Column(col) => {
11248 let name = col.name.name.to_uppercase();
11249 match name.as_str() {
11250 "INT" | "INTEGER" => Some(DataType::Int {
11251 length: None,
11252 integer_spelling: false,
11253 }),
11254 "BIGINT" => Some(DataType::BigInt { length: None }),
11255 "FLOAT" => Some(DataType::Float {
11256 precision: None,
11257 scale: None,
11258 real_spelling: false,
11259 }),
11260 "DATETIME" | "DATETIME2" => {
11261 Some(DataType::Timestamp {
11262 timezone: false,
11263 precision: None,
11264 })
11265 }
11266 "DATE" => Some(DataType::Date),
11267 "NUMERIC" => Some(DataType::Decimal {
11268 precision: None,
11269 scale: None,
11270 }),
11271 "VARCHAR" => Some(DataType::VarChar {
11272 length: None,
11273 parenthesized_length: false,
11274 }),
11275 "NVARCHAR" => Some(DataType::VarChar {
11276 length: None,
11277 parenthesized_length: false,
11278 }),
11279 "CHAR" => Some(DataType::Char { length: None }),
11280 "NCHAR" => Some(DataType::Char { length: None }),
11281 _ => Some(DataType::Custom { name }),
11282 }
11283 }
11284 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11285 Expression::Function(f) => {
11286 let fname = f.name.to_uppercase();
11287 match fname.as_str() {
11288 "VARCHAR" | "NVARCHAR" => {
11289 let len = f.args.first().and_then(|a| {
11290 if let Expression::Literal(
11291 crate::expressions::Literal::Number(n),
11292 ) = a
11293 {
11294 n.parse::<u32>().ok()
11295 } else if let Expression::Identifier(id) = a
11296 {
11297 if id.name.eq_ignore_ascii_case("MAX") {
11298 None
11299 } else {
11300 None
11301 }
11302 } else {
11303 None
11304 }
11305 });
11306 // Check for VARCHAR(MAX) -> TEXT
11307 let is_max = f.args.first().map_or(false, |a| {
11308 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11309 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11310 });
11311 if is_max {
11312 Some(DataType::Text)
11313 } else {
11314 Some(DataType::VarChar {
11315 length: len,
11316 parenthesized_length: false,
11317 })
11318 }
11319 }
11320 "NCHAR" | "CHAR" => {
11321 let len = f.args.first().and_then(|a| {
11322 if let Expression::Literal(
11323 crate::expressions::Literal::Number(n),
11324 ) = a
11325 {
11326 n.parse::<u32>().ok()
11327 } else {
11328 None
11329 }
11330 });
11331 Some(DataType::Char { length: len })
11332 }
11333 "NUMERIC" | "DECIMAL" => {
11334 let precision = f.args.first().and_then(|a| {
11335 if let Expression::Literal(
11336 crate::expressions::Literal::Number(n),
11337 ) = a
11338 {
11339 n.parse::<u32>().ok()
11340 } else {
11341 None
11342 }
11343 });
11344 let scale = f.args.get(1).and_then(|a| {
11345 if let Expression::Literal(
11346 crate::expressions::Literal::Number(n),
11347 ) = a
11348 {
11349 n.parse::<u32>().ok()
11350 } else {
11351 None
11352 }
11353 });
11354 Some(DataType::Decimal { precision, scale })
11355 }
11356 _ => None,
11357 }
11358 }
11359 _ => None,
11360 }
11361 }
11362
11363 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11364 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11365 let is_tsql_source =
11366 matches!(source, DialectType::TSQL | DialectType::Fabric);
11367 if is_tsql_source {
11368 match &dt {
11369 DataType::VarChar { length: None, .. } => {
11370 dt = DataType::VarChar {
11371 length: Some(30),
11372 parenthesized_length: false,
11373 };
11374 }
11375 DataType::Char { length: None } => {
11376 dt = DataType::Char { length: Some(30) };
11377 }
11378 _ => {}
11379 }
11380 }
11381
11382 // Determine if this is a string type
11383 let is_string_type = matches!(
11384 dt,
11385 DataType::VarChar { .. }
11386 | DataType::Char { .. }
11387 | DataType::Text
11388 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11389 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11390 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11391 || name == "STRING");
11392
11393 // Determine if this is a date/time type
11394 let is_datetime_type = matches!(
11395 dt,
11396 DataType::Timestamp { .. } | DataType::Date
11397 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11398 || name == "DATETIME2" || name == "SMALLDATETIME");
11399
11400 // Check for date conversion with style
11401 if style.is_some() {
11402 let style_num = style.and_then(|s| {
11403 if let Expression::Literal(
11404 crate::expressions::Literal::Number(n),
11405 ) = s
11406 {
11407 n.parse::<u32>().ok()
11408 } else {
11409 None
11410 }
11411 });
11412
11413 // TSQL CONVERT date styles (Java format)
11414 let format_str = style_num.and_then(|n| match n {
11415 101 => Some("MM/dd/yyyy"),
11416 102 => Some("yyyy.MM.dd"),
11417 103 => Some("dd/MM/yyyy"),
11418 104 => Some("dd.MM.yyyy"),
11419 105 => Some("dd-MM-yyyy"),
11420 108 => Some("HH:mm:ss"),
11421 110 => Some("MM-dd-yyyy"),
11422 112 => Some("yyyyMMdd"),
11423 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11424 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11425 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11426 _ => None,
11427 });
11428
11429 // Non-string, non-datetime types with style: just CAST, ignore the style
11430 if !is_string_type && !is_datetime_type {
11431 let cast_expr = if is_try {
11432 Expression::TryCast(Box::new(
11433 crate::expressions::Cast {
11434 this: value_expr,
11435 to: dt,
11436 trailing_comments: Vec::new(),
11437 double_colon_syntax: false,
11438 format: None,
11439 default: None,
11440 },
11441 ))
11442 } else {
11443 Expression::Cast(Box::new(
11444 crate::expressions::Cast {
11445 this: value_expr,
11446 to: dt,
11447 trailing_comments: Vec::new(),
11448 double_colon_syntax: false,
11449 format: None,
11450 default: None,
11451 },
11452 ))
11453 };
11454 return Ok(cast_expr);
11455 }
11456
11457 if let Some(java_fmt) = format_str {
11458 let c_fmt = java_fmt
11459 .replace("yyyy", "%Y")
11460 .replace("MM", "%m")
11461 .replace("dd", "%d")
11462 .replace("HH", "%H")
11463 .replace("mm", "%M")
11464 .replace("ss", "%S")
11465 .replace("SSSSSS", "%f")
11466 .replace("SSS", "%f")
11467 .replace("'T'", "T");
11468
11469 // For datetime target types: style is the INPUT format for parsing strings -> dates
11470 if is_datetime_type {
11471 match target {
11472 DialectType::DuckDB => {
11473 return Ok(Expression::Function(Box::new(
11474 Function::new(
11475 "STRPTIME".to_string(),
11476 vec![
11477 value_expr,
11478 Expression::string(&c_fmt),
11479 ],
11480 ),
11481 )));
11482 }
11483 DialectType::Spark
11484 | DialectType::Databricks => {
11485 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11486 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11487 let func_name =
11488 if matches!(dt, DataType::Date) {
11489 "TO_DATE"
11490 } else {
11491 "TO_TIMESTAMP"
11492 };
11493 return Ok(Expression::Function(Box::new(
11494 Function::new(
11495 func_name.to_string(),
11496 vec![
11497 value_expr,
11498 Expression::string(java_fmt),
11499 ],
11500 ),
11501 )));
11502 }
11503 DialectType::Hive => {
11504 return Ok(Expression::Function(Box::new(
11505 Function::new(
11506 "TO_TIMESTAMP".to_string(),
11507 vec![
11508 value_expr,
11509 Expression::string(java_fmt),
11510 ],
11511 ),
11512 )));
11513 }
11514 _ => {
11515 return Ok(Expression::Cast(Box::new(
11516 crate::expressions::Cast {
11517 this: value_expr,
11518 to: dt,
11519 trailing_comments: Vec::new(),
11520 double_colon_syntax: false,
11521 format: None,
11522 default: None,
11523 },
11524 )));
11525 }
11526 }
11527 }
11528
11529 // For string target types: style is the OUTPUT format for dates -> strings
11530 match target {
11531 DialectType::DuckDB => Ok(Expression::Function(
11532 Box::new(Function::new(
11533 "STRPTIME".to_string(),
11534 vec![
11535 value_expr,
11536 Expression::string(&c_fmt),
11537 ],
11538 )),
11539 )),
11540 DialectType::Spark | DialectType::Databricks => {
11541 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11542 // Determine the target string type
11543 let string_dt = match &dt {
11544 DataType::VarChar {
11545 length: Some(l),
11546 ..
11547 } => DataType::VarChar {
11548 length: Some(*l),
11549 parenthesized_length: false,
11550 },
11551 DataType::Text => DataType::Custom {
11552 name: "STRING".to_string(),
11553 },
11554 _ => DataType::Custom {
11555 name: "STRING".to_string(),
11556 },
11557 };
11558 let date_format_expr = Expression::Function(
11559 Box::new(Function::new(
11560 "DATE_FORMAT".to_string(),
11561 vec![
11562 value_expr,
11563 Expression::string(java_fmt),
11564 ],
11565 )),
11566 );
11567 let cast_expr = if is_try {
11568 Expression::TryCast(Box::new(
11569 crate::expressions::Cast {
11570 this: date_format_expr,
11571 to: string_dt,
11572 trailing_comments: Vec::new(),
11573 double_colon_syntax: false,
11574 format: None,
11575 default: None,
11576 },
11577 ))
11578 } else {
11579 Expression::Cast(Box::new(
11580 crate::expressions::Cast {
11581 this: date_format_expr,
11582 to: string_dt,
11583 trailing_comments: Vec::new(),
11584 double_colon_syntax: false,
11585 format: None,
11586 default: None,
11587 },
11588 ))
11589 };
11590 Ok(cast_expr)
11591 }
11592 DialectType::MySQL | DialectType::SingleStore => {
11593 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11594 let mysql_fmt = java_fmt
11595 .replace("yyyy", "%Y")
11596 .replace("MM", "%m")
11597 .replace("dd", "%d")
11598 .replace("HH:mm:ss.SSSSSS", "%T")
11599 .replace("HH:mm:ss", "%T")
11600 .replace("HH", "%H")
11601 .replace("mm", "%i")
11602 .replace("ss", "%S");
11603 let date_format_expr = Expression::Function(
11604 Box::new(Function::new(
11605 "DATE_FORMAT".to_string(),
11606 vec![
11607 value_expr,
11608 Expression::string(&mysql_fmt),
11609 ],
11610 )),
11611 );
11612 // MySQL uses CHAR for string casts
11613 let mysql_dt = match &dt {
11614 DataType::VarChar { length, .. } => {
11615 DataType::Char { length: *length }
11616 }
11617 _ => dt,
11618 };
11619 Ok(Expression::Cast(Box::new(
11620 crate::expressions::Cast {
11621 this: date_format_expr,
11622 to: mysql_dt,
11623 trailing_comments: Vec::new(),
11624 double_colon_syntax: false,
11625 format: None,
11626 default: None,
11627 },
11628 )))
11629 }
11630 DialectType::Hive => {
11631 let func_name = "TO_TIMESTAMP";
11632 Ok(Expression::Function(Box::new(
11633 Function::new(
11634 func_name.to_string(),
11635 vec![
11636 value_expr,
11637 Expression::string(java_fmt),
11638 ],
11639 ),
11640 )))
11641 }
11642 _ => Ok(Expression::Cast(Box::new(
11643 crate::expressions::Cast {
11644 this: value_expr,
11645 to: dt,
11646 trailing_comments: Vec::new(),
11647 double_colon_syntax: false,
11648 format: None,
11649 default: None,
11650 },
11651 ))),
11652 }
11653 } else {
11654 // Unknown style, just CAST
11655 let cast_expr = if is_try {
11656 Expression::TryCast(Box::new(
11657 crate::expressions::Cast {
11658 this: value_expr,
11659 to: dt,
11660 trailing_comments: Vec::new(),
11661 double_colon_syntax: false,
11662 format: None,
11663 default: None,
11664 },
11665 ))
11666 } else {
11667 Expression::Cast(Box::new(
11668 crate::expressions::Cast {
11669 this: value_expr,
11670 to: dt,
11671 trailing_comments: Vec::new(),
11672 double_colon_syntax: false,
11673 format: None,
11674 default: None,
11675 },
11676 ))
11677 };
11678 Ok(cast_expr)
11679 }
11680 } else {
11681 // No style - simple CAST
11682 let final_dt = if matches!(
11683 target,
11684 DialectType::MySQL | DialectType::SingleStore
11685 ) {
11686 match &dt {
11687 DataType::Int { .. }
11688 | DataType::BigInt { .. }
11689 | DataType::SmallInt { .. }
11690 | DataType::TinyInt { .. } => DataType::Custom {
11691 name: "SIGNED".to_string(),
11692 },
11693 DataType::VarChar { length, .. } => {
11694 DataType::Char { length: *length }
11695 }
11696 _ => dt,
11697 }
11698 } else {
11699 dt
11700 };
11701 let cast_expr = if is_try {
11702 Expression::TryCast(Box::new(
11703 crate::expressions::Cast {
11704 this: value_expr,
11705 to: final_dt,
11706 trailing_comments: Vec::new(),
11707 double_colon_syntax: false,
11708 format: None,
11709 default: None,
11710 },
11711 ))
11712 } else {
11713 Expression::Cast(Box::new(crate::expressions::Cast {
11714 this: value_expr,
11715 to: final_dt,
11716 trailing_comments: Vec::new(),
11717 double_colon_syntax: false,
11718 format: None,
11719 default: None,
11720 }))
11721 };
11722 Ok(cast_expr)
11723 }
11724 } else {
11725 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11726 Ok(Expression::Function(f))
11727 }
11728 }
11729 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11730 "STRFTIME" if f.args.len() == 2 => {
11731 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11732 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11733 // SQLite: args[0] = format, args[1] = value
11734 (f.args[1].clone(), &f.args[0])
11735 } else {
11736 // DuckDB and others: args[0] = value, args[1] = format
11737 (f.args[0].clone(), &f.args[1])
11738 };
11739
11740 // Helper to convert C-style format to Java-style
11741 fn c_to_java_format(fmt: &str) -> String {
11742 fmt.replace("%Y", "yyyy")
11743 .replace("%m", "MM")
11744 .replace("%d", "dd")
11745 .replace("%H", "HH")
11746 .replace("%M", "mm")
11747 .replace("%S", "ss")
11748 .replace("%f", "SSSSSS")
11749 .replace("%y", "yy")
11750 .replace("%-m", "M")
11751 .replace("%-d", "d")
11752 .replace("%-H", "H")
11753 .replace("%-I", "h")
11754 .replace("%I", "hh")
11755 .replace("%p", "a")
11756 .replace("%j", "DDD")
11757 .replace("%a", "EEE")
11758 .replace("%b", "MMM")
11759 .replace("%F", "yyyy-MM-dd")
11760 .replace("%T", "HH:mm:ss")
11761 }
11762
11763 // Helper: recursively convert format strings within expressions (handles CONCAT)
11764 fn convert_fmt_expr(
11765 expr: &Expression,
11766 converter: &dyn Fn(&str) -> String,
11767 ) -> Expression {
11768 match expr {
11769 Expression::Literal(
11770 crate::expressions::Literal::String(s),
11771 ) => Expression::string(&converter(s)),
11772 Expression::Function(func)
11773 if func.name.eq_ignore_ascii_case("CONCAT") =>
11774 {
11775 let new_args: Vec<Expression> = func
11776 .args
11777 .iter()
11778 .map(|a| convert_fmt_expr(a, converter))
11779 .collect();
11780 Expression::Function(Box::new(Function::new(
11781 "CONCAT".to_string(),
11782 new_args,
11783 )))
11784 }
11785 other => other.clone(),
11786 }
11787 }
11788
11789 match target {
11790 DialectType::DuckDB => {
11791 if matches!(source, DialectType::SQLite) {
11792 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
11793 let cast_val = Expression::Cast(Box::new(Cast {
11794 this: val,
11795 to: crate::expressions::DataType::Timestamp {
11796 precision: None,
11797 timezone: false,
11798 },
11799 trailing_comments: Vec::new(),
11800 double_colon_syntax: false,
11801 format: None,
11802 default: None,
11803 }));
11804 Ok(Expression::Function(Box::new(Function::new(
11805 "STRFTIME".to_string(),
11806 vec![cast_val, fmt_expr.clone()],
11807 ))))
11808 } else {
11809 Ok(Expression::Function(f))
11810 }
11811 }
11812 DialectType::Spark
11813 | DialectType::Databricks
11814 | DialectType::Hive => {
11815 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
11816 let converted_fmt =
11817 convert_fmt_expr(fmt_expr, &c_to_java_format);
11818 Ok(Expression::Function(Box::new(Function::new(
11819 "DATE_FORMAT".to_string(),
11820 vec![val, converted_fmt],
11821 ))))
11822 }
11823 DialectType::TSQL | DialectType::Fabric => {
11824 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
11825 let converted_fmt =
11826 convert_fmt_expr(fmt_expr, &c_to_java_format);
11827 Ok(Expression::Function(Box::new(Function::new(
11828 "FORMAT".to_string(),
11829 vec![val, converted_fmt],
11830 ))))
11831 }
11832 DialectType::Presto
11833 | DialectType::Trino
11834 | DialectType::Athena => {
11835 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
11836 if let Expression::Literal(
11837 crate::expressions::Literal::String(s),
11838 ) = fmt_expr
11839 {
11840 let presto_fmt = duckdb_to_presto_format(s);
11841 Ok(Expression::Function(Box::new(Function::new(
11842 "DATE_FORMAT".to_string(),
11843 vec![val, Expression::string(&presto_fmt)],
11844 ))))
11845 } else {
11846 Ok(Expression::Function(Box::new(Function::new(
11847 "DATE_FORMAT".to_string(),
11848 vec![val, fmt_expr.clone()],
11849 ))))
11850 }
11851 }
11852 DialectType::BigQuery => {
11853 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
11854 if let Expression::Literal(
11855 crate::expressions::Literal::String(s),
11856 ) = fmt_expr
11857 {
11858 let bq_fmt = duckdb_to_bigquery_format(s);
11859 Ok(Expression::Function(Box::new(Function::new(
11860 "FORMAT_DATE".to_string(),
11861 vec![Expression::string(&bq_fmt), val],
11862 ))))
11863 } else {
11864 Ok(Expression::Function(Box::new(Function::new(
11865 "FORMAT_DATE".to_string(),
11866 vec![fmt_expr.clone(), val],
11867 ))))
11868 }
11869 }
11870 DialectType::PostgreSQL | DialectType::Redshift => {
11871 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
11872 if let Expression::Literal(
11873 crate::expressions::Literal::String(s),
11874 ) = fmt_expr
11875 {
11876 let pg_fmt = s
11877 .replace("%Y", "YYYY")
11878 .replace("%m", "MM")
11879 .replace("%d", "DD")
11880 .replace("%H", "HH24")
11881 .replace("%M", "MI")
11882 .replace("%S", "SS")
11883 .replace("%y", "YY")
11884 .replace("%-m", "FMMM")
11885 .replace("%-d", "FMDD")
11886 .replace("%-H", "FMHH24")
11887 .replace("%-I", "FMHH12")
11888 .replace("%p", "AM")
11889 .replace("%F", "YYYY-MM-DD")
11890 .replace("%T", "HH24:MI:SS");
11891 Ok(Expression::Function(Box::new(Function::new(
11892 "TO_CHAR".to_string(),
11893 vec![val, Expression::string(&pg_fmt)],
11894 ))))
11895 } else {
11896 Ok(Expression::Function(Box::new(Function::new(
11897 "TO_CHAR".to_string(),
11898 vec![val, fmt_expr.clone()],
11899 ))))
11900 }
11901 }
11902 _ => Ok(Expression::Function(f)),
11903 }
11904 }
11905 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
11906 "STRPTIME" if f.args.len() == 2 => {
11907 let val = f.args[0].clone();
11908 let fmt_expr = &f.args[1];
11909
11910 fn c_to_java_format_parse(fmt: &str) -> String {
11911 fmt.replace("%Y", "yyyy")
11912 .replace("%m", "MM")
11913 .replace("%d", "dd")
11914 .replace("%H", "HH")
11915 .replace("%M", "mm")
11916 .replace("%S", "ss")
11917 .replace("%f", "SSSSSS")
11918 .replace("%y", "yy")
11919 .replace("%-m", "M")
11920 .replace("%-d", "d")
11921 .replace("%-H", "H")
11922 .replace("%-I", "h")
11923 .replace("%I", "hh")
11924 .replace("%p", "a")
11925 .replace("%F", "yyyy-MM-dd")
11926 .replace("%T", "HH:mm:ss")
11927 }
11928
11929 match target {
11930 DialectType::DuckDB => Ok(Expression::Function(f)),
11931 DialectType::Spark | DialectType::Databricks => {
11932 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
11933 if let Expression::Literal(
11934 crate::expressions::Literal::String(s),
11935 ) = fmt_expr
11936 {
11937 let java_fmt = c_to_java_format_parse(s);
11938 Ok(Expression::Function(Box::new(Function::new(
11939 "TO_TIMESTAMP".to_string(),
11940 vec![val, Expression::string(&java_fmt)],
11941 ))))
11942 } else {
11943 Ok(Expression::Function(Box::new(Function::new(
11944 "TO_TIMESTAMP".to_string(),
11945 vec![val, fmt_expr.clone()],
11946 ))))
11947 }
11948 }
11949 DialectType::Hive => {
11950 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
11951 if let Expression::Literal(
11952 crate::expressions::Literal::String(s),
11953 ) = fmt_expr
11954 {
11955 let java_fmt = c_to_java_format_parse(s);
11956 let unix_ts =
11957 Expression::Function(Box::new(Function::new(
11958 "UNIX_TIMESTAMP".to_string(),
11959 vec![val, Expression::string(&java_fmt)],
11960 )));
11961 let from_unix =
11962 Expression::Function(Box::new(Function::new(
11963 "FROM_UNIXTIME".to_string(),
11964 vec![unix_ts],
11965 )));
11966 Ok(Expression::Cast(Box::new(
11967 crate::expressions::Cast {
11968 this: from_unix,
11969 to: DataType::Timestamp {
11970 timezone: false,
11971 precision: None,
11972 },
11973 trailing_comments: Vec::new(),
11974 double_colon_syntax: false,
11975 format: None,
11976 default: None,
11977 },
11978 )))
11979 } else {
11980 Ok(Expression::Function(f))
11981 }
11982 }
11983 DialectType::Presto
11984 | DialectType::Trino
11985 | DialectType::Athena => {
11986 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
11987 if let Expression::Literal(
11988 crate::expressions::Literal::String(s),
11989 ) = fmt_expr
11990 {
11991 let presto_fmt = duckdb_to_presto_format(s);
11992 Ok(Expression::Function(Box::new(Function::new(
11993 "DATE_PARSE".to_string(),
11994 vec![val, Expression::string(&presto_fmt)],
11995 ))))
11996 } else {
11997 Ok(Expression::Function(Box::new(Function::new(
11998 "DATE_PARSE".to_string(),
11999 vec![val, fmt_expr.clone()],
12000 ))))
12001 }
12002 }
12003 DialectType::BigQuery => {
12004 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12005 if let Expression::Literal(
12006 crate::expressions::Literal::String(s),
12007 ) = fmt_expr
12008 {
12009 let bq_fmt = duckdb_to_bigquery_format(s);
12010 Ok(Expression::Function(Box::new(Function::new(
12011 "PARSE_TIMESTAMP".to_string(),
12012 vec![Expression::string(&bq_fmt), val],
12013 ))))
12014 } else {
12015 Ok(Expression::Function(Box::new(Function::new(
12016 "PARSE_TIMESTAMP".to_string(),
12017 vec![fmt_expr.clone(), val],
12018 ))))
12019 }
12020 }
12021 _ => Ok(Expression::Function(f)),
12022 }
12023 }
12024 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12025 "DATE_FORMAT"
12026 if f.args.len() >= 2
12027 && matches!(
12028 source,
12029 DialectType::Presto
12030 | DialectType::Trino
12031 | DialectType::Athena
12032 ) =>
12033 {
12034 let val = f.args[0].clone();
12035 let fmt_expr = &f.args[1];
12036
12037 match target {
12038 DialectType::Presto
12039 | DialectType::Trino
12040 | DialectType::Athena => {
12041 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12042 if let Expression::Literal(
12043 crate::expressions::Literal::String(s),
12044 ) = fmt_expr
12045 {
12046 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12047 Ok(Expression::Function(Box::new(Function::new(
12048 "DATE_FORMAT".to_string(),
12049 vec![val, Expression::string(&normalized)],
12050 ))))
12051 } else {
12052 Ok(Expression::Function(f))
12053 }
12054 }
12055 DialectType::Hive
12056 | DialectType::Spark
12057 | DialectType::Databricks => {
12058 // Convert Presto C-style to Java-style format
12059 if let Expression::Literal(
12060 crate::expressions::Literal::String(s),
12061 ) = fmt_expr
12062 {
12063 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12064 Ok(Expression::Function(Box::new(Function::new(
12065 "DATE_FORMAT".to_string(),
12066 vec![val, Expression::string(&java_fmt)],
12067 ))))
12068 } else {
12069 Ok(Expression::Function(f))
12070 }
12071 }
12072 DialectType::DuckDB => {
12073 // Convert to STRFTIME(val, duckdb_fmt)
12074 if let Expression::Literal(
12075 crate::expressions::Literal::String(s),
12076 ) = fmt_expr
12077 {
12078 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12079 Ok(Expression::Function(Box::new(Function::new(
12080 "STRFTIME".to_string(),
12081 vec![val, Expression::string(&duckdb_fmt)],
12082 ))))
12083 } else {
12084 Ok(Expression::Function(Box::new(Function::new(
12085 "STRFTIME".to_string(),
12086 vec![val, fmt_expr.clone()],
12087 ))))
12088 }
12089 }
12090 DialectType::BigQuery => {
12091 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12092 if let Expression::Literal(
12093 crate::expressions::Literal::String(s),
12094 ) = fmt_expr
12095 {
12096 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12097 Ok(Expression::Function(Box::new(Function::new(
12098 "FORMAT_DATE".to_string(),
12099 vec![Expression::string(&bq_fmt), val],
12100 ))))
12101 } else {
12102 Ok(Expression::Function(Box::new(Function::new(
12103 "FORMAT_DATE".to_string(),
12104 vec![fmt_expr.clone(), val],
12105 ))))
12106 }
12107 }
12108 _ => Ok(Expression::Function(f)),
12109 }
12110 }
12111 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12112 "DATE_PARSE"
12113 if f.args.len() >= 2
12114 && matches!(
12115 source,
12116 DialectType::Presto
12117 | DialectType::Trino
12118 | DialectType::Athena
12119 ) =>
12120 {
12121 let val = f.args[0].clone();
12122 let fmt_expr = &f.args[1];
12123
12124 match target {
12125 DialectType::Presto
12126 | DialectType::Trino
12127 | DialectType::Athena => {
12128 // Presto -> Presto: normalize format
12129 if let Expression::Literal(
12130 crate::expressions::Literal::String(s),
12131 ) = fmt_expr
12132 {
12133 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12134 Ok(Expression::Function(Box::new(Function::new(
12135 "DATE_PARSE".to_string(),
12136 vec![val, Expression::string(&normalized)],
12137 ))))
12138 } else {
12139 Ok(Expression::Function(f))
12140 }
12141 }
12142 DialectType::Hive => {
12143 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12144 if let Expression::Literal(
12145 crate::expressions::Literal::String(s),
12146 ) = fmt_expr
12147 {
12148 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12149 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12150 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12151 this: val,
12152 to: DataType::Timestamp { timezone: false, precision: None },
12153 trailing_comments: Vec::new(),
12154 double_colon_syntax: false,
12155 format: None,
12156 default: None,
12157 })))
12158 } else {
12159 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12160 Ok(Expression::Function(Box::new(Function::new(
12161 "TO_TIMESTAMP".to_string(),
12162 vec![val, Expression::string(&java_fmt)],
12163 ))))
12164 }
12165 } else {
12166 Ok(Expression::Function(f))
12167 }
12168 }
12169 DialectType::Spark | DialectType::Databricks => {
12170 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12171 if let Expression::Literal(
12172 crate::expressions::Literal::String(s),
12173 ) = fmt_expr
12174 {
12175 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12176 Ok(Expression::Function(Box::new(Function::new(
12177 "TO_TIMESTAMP".to_string(),
12178 vec![val, Expression::string(&java_fmt)],
12179 ))))
12180 } else {
12181 Ok(Expression::Function(f))
12182 }
12183 }
12184 DialectType::DuckDB => {
12185 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12186 if let Expression::Literal(
12187 crate::expressions::Literal::String(s),
12188 ) = fmt_expr
12189 {
12190 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12191 Ok(Expression::Function(Box::new(Function::new(
12192 "STRPTIME".to_string(),
12193 vec![val, Expression::string(&duckdb_fmt)],
12194 ))))
12195 } else {
12196 Ok(Expression::Function(Box::new(Function::new(
12197 "STRPTIME".to_string(),
12198 vec![val, fmt_expr.clone()],
12199 ))))
12200 }
12201 }
12202 _ => Ok(Expression::Function(f)),
12203 }
12204 }
12205 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12206 "FROM_BASE64"
12207 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12208 {
12209 Ok(Expression::Function(Box::new(Function::new(
12210 "UNBASE64".to_string(),
12211 f.args,
12212 ))))
12213 }
12214 "TO_BASE64"
12215 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12216 {
12217 Ok(Expression::Function(Box::new(Function::new(
12218 "BASE64".to_string(),
12219 f.args,
12220 ))))
12221 }
12222 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12223 "FROM_UNIXTIME"
12224 if f.args.len() == 1
12225 && matches!(
12226 source,
12227 DialectType::Presto
12228 | DialectType::Trino
12229 | DialectType::Athena
12230 )
12231 && matches!(
12232 target,
12233 DialectType::Spark | DialectType::Databricks
12234 ) =>
12235 {
12236 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12237 let from_unix = Expression::Function(Box::new(Function::new(
12238 "FROM_UNIXTIME".to_string(),
12239 f.args,
12240 )));
12241 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12242 this: from_unix,
12243 to: DataType::Timestamp {
12244 timezone: false,
12245 precision: None,
12246 },
12247 trailing_comments: Vec::new(),
12248 double_colon_syntax: false,
12249 format: None,
12250 default: None,
12251 })))
12252 }
12253 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12254 "DATE_FORMAT"
12255 if f.args.len() >= 2
12256 && !matches!(
12257 target,
12258 DialectType::Hive
12259 | DialectType::Spark
12260 | DialectType::Databricks
12261 | DialectType::MySQL
12262 | DialectType::SingleStore
12263 ) =>
12264 {
12265 let val = f.args[0].clone();
12266 let fmt_expr = &f.args[1];
12267 let is_hive_source = matches!(
12268 source,
12269 DialectType::Hive
12270 | DialectType::Spark
12271 | DialectType::Databricks
12272 );
12273
12274 fn java_to_c_format(fmt: &str) -> String {
12275 // Replace Java patterns with C strftime patterns.
12276 // Uses multi-pass to handle patterns that conflict.
12277 // First pass: replace multi-char patterns (longer first)
12278 let result = fmt
12279 .replace("yyyy", "%Y")
12280 .replace("SSSSSS", "%f")
12281 .replace("EEEE", "%W")
12282 .replace("MM", "%m")
12283 .replace("dd", "%d")
12284 .replace("HH", "%H")
12285 .replace("mm", "%M")
12286 .replace("ss", "%S")
12287 .replace("yy", "%y");
12288 // Second pass: handle single-char timezone patterns
12289 // z -> %Z (timezone name), Z -> %z (timezone offset)
12290 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12291 let mut out = String::new();
12292 let chars: Vec<char> = result.chars().collect();
12293 let mut i = 0;
12294 while i < chars.len() {
12295 if chars[i] == '%' && i + 1 < chars.len() {
12296 // Already a format specifier, skip both chars
12297 out.push(chars[i]);
12298 out.push(chars[i + 1]);
12299 i += 2;
12300 } else if chars[i] == 'z' {
12301 out.push_str("%Z");
12302 i += 1;
12303 } else if chars[i] == 'Z' {
12304 out.push_str("%z");
12305 i += 1;
12306 } else {
12307 out.push(chars[i]);
12308 i += 1;
12309 }
12310 }
12311 out
12312 }
12313
12314 fn java_to_presto_format(fmt: &str) -> String {
12315 // Presto uses %T for HH:MM:SS
12316 let c_fmt = java_to_c_format(fmt);
12317 c_fmt.replace("%H:%M:%S", "%T")
12318 }
12319
12320 fn java_to_bq_format(fmt: &str) -> String {
12321 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12322 let c_fmt = java_to_c_format(fmt);
12323 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12324 }
12325
12326 // For Hive source, CAST string literals to appropriate type
12327 let cast_val = if is_hive_source {
12328 match &val {
12329 Expression::Literal(
12330 crate::expressions::Literal::String(_),
12331 ) => {
12332 match target {
12333 DialectType::DuckDB
12334 | DialectType::Presto
12335 | DialectType::Trino
12336 | DialectType::Athena => {
12337 Self::ensure_cast_timestamp(val.clone())
12338 }
12339 DialectType::BigQuery => {
12340 // BigQuery: CAST(val AS DATETIME)
12341 Expression::Cast(Box::new(
12342 crate::expressions::Cast {
12343 this: val.clone(),
12344 to: DataType::Custom {
12345 name: "DATETIME".to_string(),
12346 },
12347 trailing_comments: vec![],
12348 double_colon_syntax: false,
12349 format: None,
12350 default: None,
12351 },
12352 ))
12353 }
12354 _ => val.clone(),
12355 }
12356 }
12357 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12358 Expression::Cast(c)
12359 if matches!(c.to, DataType::Date)
12360 && matches!(
12361 target,
12362 DialectType::Presto
12363 | DialectType::Trino
12364 | DialectType::Athena
12365 ) =>
12366 {
12367 Expression::Cast(Box::new(crate::expressions::Cast {
12368 this: val.clone(),
12369 to: DataType::Timestamp {
12370 timezone: false,
12371 precision: None,
12372 },
12373 trailing_comments: vec![],
12374 double_colon_syntax: false,
12375 format: None,
12376 default: None,
12377 }))
12378 }
12379 Expression::Literal(crate::expressions::Literal::Date(
12380 _,
12381 )) if matches!(
12382 target,
12383 DialectType::Presto
12384 | DialectType::Trino
12385 | DialectType::Athena
12386 ) =>
12387 {
12388 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12389 let cast_date = Self::date_literal_to_cast(val.clone());
12390 Expression::Cast(Box::new(crate::expressions::Cast {
12391 this: cast_date,
12392 to: DataType::Timestamp {
12393 timezone: false,
12394 precision: None,
12395 },
12396 trailing_comments: vec![],
12397 double_colon_syntax: false,
12398 format: None,
12399 default: None,
12400 }))
12401 }
12402 _ => val.clone(),
12403 }
12404 } else {
12405 val.clone()
12406 };
12407
12408 match target {
12409 DialectType::DuckDB => {
12410 if let Expression::Literal(
12411 crate::expressions::Literal::String(s),
12412 ) = fmt_expr
12413 {
12414 let c_fmt = if is_hive_source {
12415 java_to_c_format(s)
12416 } else {
12417 s.clone()
12418 };
12419 Ok(Expression::Function(Box::new(Function::new(
12420 "STRFTIME".to_string(),
12421 vec![cast_val, Expression::string(&c_fmt)],
12422 ))))
12423 } else {
12424 Ok(Expression::Function(Box::new(Function::new(
12425 "STRFTIME".to_string(),
12426 vec![cast_val, fmt_expr.clone()],
12427 ))))
12428 }
12429 }
12430 DialectType::Presto
12431 | DialectType::Trino
12432 | DialectType::Athena => {
12433 if is_hive_source {
12434 if let Expression::Literal(
12435 crate::expressions::Literal::String(s),
12436 ) = fmt_expr
12437 {
12438 let p_fmt = java_to_presto_format(s);
12439 Ok(Expression::Function(Box::new(Function::new(
12440 "DATE_FORMAT".to_string(),
12441 vec![cast_val, Expression::string(&p_fmt)],
12442 ))))
12443 } else {
12444 Ok(Expression::Function(Box::new(Function::new(
12445 "DATE_FORMAT".to_string(),
12446 vec![cast_val, fmt_expr.clone()],
12447 ))))
12448 }
12449 } else {
12450 Ok(Expression::Function(Box::new(Function::new(
12451 "DATE_FORMAT".to_string(),
12452 f.args,
12453 ))))
12454 }
12455 }
12456 DialectType::BigQuery => {
12457 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12458 if let Expression::Literal(
12459 crate::expressions::Literal::String(s),
12460 ) = fmt_expr
12461 {
12462 let bq_fmt = if is_hive_source {
12463 java_to_bq_format(s)
12464 } else {
12465 java_to_c_format(s)
12466 };
12467 Ok(Expression::Function(Box::new(Function::new(
12468 "FORMAT_DATE".to_string(),
12469 vec![Expression::string(&bq_fmt), cast_val],
12470 ))))
12471 } else {
12472 Ok(Expression::Function(Box::new(Function::new(
12473 "FORMAT_DATE".to_string(),
12474 vec![fmt_expr.clone(), cast_val],
12475 ))))
12476 }
12477 }
12478 DialectType::PostgreSQL | DialectType::Redshift => {
12479 if let Expression::Literal(
12480 crate::expressions::Literal::String(s),
12481 ) = fmt_expr
12482 {
12483 let pg_fmt = s
12484 .replace("yyyy", "YYYY")
12485 .replace("MM", "MM")
12486 .replace("dd", "DD")
12487 .replace("HH", "HH24")
12488 .replace("mm", "MI")
12489 .replace("ss", "SS")
12490 .replace("yy", "YY");
12491 Ok(Expression::Function(Box::new(Function::new(
12492 "TO_CHAR".to_string(),
12493 vec![val, Expression::string(&pg_fmt)],
12494 ))))
12495 } else {
12496 Ok(Expression::Function(Box::new(Function::new(
12497 "TO_CHAR".to_string(),
12498 vec![val, fmt_expr.clone()],
12499 ))))
12500 }
12501 }
12502 _ => Ok(Expression::Function(f)),
12503 }
12504 }
12505 // DATEDIFF(unit, start, end) - 3-arg form
12506 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12507 "DATEDIFF" if f.args.len() == 3 => {
12508 let mut args = f.args;
12509 // SQLite source: args = (date1, date2, unit_string)
12510 // Standard source: args = (unit, start, end)
12511 let (_arg0, arg1, arg2, unit_str) =
12512 if matches!(source, DialectType::SQLite) {
12513 let date1 = args.remove(0);
12514 let date2 = args.remove(0);
12515 let unit_expr = args.remove(0);
12516 let unit_s = Self::get_unit_str_static(&unit_expr);
12517
12518 // For SQLite target, generate JULIANDAY arithmetic directly
12519 if matches!(target, DialectType::SQLite) {
12520 let jd_first = Expression::Function(Box::new(
12521 Function::new("JULIANDAY".to_string(), vec![date1]),
12522 ));
12523 let jd_second = Expression::Function(Box::new(
12524 Function::new("JULIANDAY".to_string(), vec![date2]),
12525 ));
12526 let diff = Expression::Sub(Box::new(
12527 crate::expressions::BinaryOp::new(
12528 jd_first, jd_second,
12529 ),
12530 ));
12531 let paren_diff = Expression::Paren(Box::new(
12532 crate::expressions::Paren {
12533 this: diff,
12534 trailing_comments: Vec::new(),
12535 },
12536 ));
12537 let adjusted = match unit_s.as_str() {
12538 "HOUR" => Expression::Mul(Box::new(
12539 crate::expressions::BinaryOp::new(
12540 paren_diff,
12541 Expression::Literal(Literal::Number(
12542 "24.0".to_string(),
12543 )),
12544 ),
12545 )),
12546 "MINUTE" => Expression::Mul(Box::new(
12547 crate::expressions::BinaryOp::new(
12548 paren_diff,
12549 Expression::Literal(Literal::Number(
12550 "1440.0".to_string(),
12551 )),
12552 ),
12553 )),
12554 "SECOND" => Expression::Mul(Box::new(
12555 crate::expressions::BinaryOp::new(
12556 paren_diff,
12557 Expression::Literal(Literal::Number(
12558 "86400.0".to_string(),
12559 )),
12560 ),
12561 )),
12562 "MONTH" => Expression::Div(Box::new(
12563 crate::expressions::BinaryOp::new(
12564 paren_diff,
12565 Expression::Literal(Literal::Number(
12566 "30.0".to_string(),
12567 )),
12568 ),
12569 )),
12570 "YEAR" => Expression::Div(Box::new(
12571 crate::expressions::BinaryOp::new(
12572 paren_diff,
12573 Expression::Literal(Literal::Number(
12574 "365.0".to_string(),
12575 )),
12576 ),
12577 )),
12578 _ => paren_diff,
12579 };
12580 return Ok(Expression::Cast(Box::new(Cast {
12581 this: adjusted,
12582 to: DataType::Int {
12583 length: None,
12584 integer_spelling: true,
12585 },
12586 trailing_comments: vec![],
12587 double_colon_syntax: false,
12588 format: None,
12589 default: None,
12590 })));
12591 }
12592
12593 // For other targets, remap to standard (unit, start, end) form
12594 let unit_ident =
12595 Expression::Identifier(Identifier::new(&unit_s));
12596 (unit_ident, date1, date2, unit_s)
12597 } else {
12598 let arg0 = args.remove(0);
12599 let arg1 = args.remove(0);
12600 let arg2 = args.remove(0);
12601 let unit_s = Self::get_unit_str_static(&arg0);
12602 (arg0, arg1, arg2, unit_s)
12603 };
12604
12605 // For Hive/Spark source, string literal dates need to be cast
12606 // Note: Databricks is excluded - it handles string args like standard SQL
12607 let is_hive_spark =
12608 matches!(source, DialectType::Hive | DialectType::Spark);
12609
12610 match target {
12611 DialectType::Snowflake => {
12612 let unit =
12613 Expression::Identifier(Identifier::new(&unit_str));
12614 // Use ensure_to_date_preserved to add TO_DATE with a marker
12615 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12616 let d1 = if is_hive_spark {
12617 Self::ensure_to_date_preserved(arg1)
12618 } else {
12619 arg1
12620 };
12621 let d2 = if is_hive_spark {
12622 Self::ensure_to_date_preserved(arg2)
12623 } else {
12624 arg2
12625 };
12626 Ok(Expression::Function(Box::new(Function::new(
12627 "DATEDIFF".to_string(),
12628 vec![unit, d1, d2],
12629 ))))
12630 }
12631 DialectType::Redshift => {
12632 let unit =
12633 Expression::Identifier(Identifier::new(&unit_str));
12634 let d1 = if is_hive_spark {
12635 Self::ensure_cast_date(arg1)
12636 } else {
12637 arg1
12638 };
12639 let d2 = if is_hive_spark {
12640 Self::ensure_cast_date(arg2)
12641 } else {
12642 arg2
12643 };
12644 Ok(Expression::Function(Box::new(Function::new(
12645 "DATEDIFF".to_string(),
12646 vec![unit, d1, d2],
12647 ))))
12648 }
12649 DialectType::TSQL => {
12650 let unit =
12651 Expression::Identifier(Identifier::new(&unit_str));
12652 Ok(Expression::Function(Box::new(Function::new(
12653 "DATEDIFF".to_string(),
12654 vec![unit, arg1, arg2],
12655 ))))
12656 }
12657 DialectType::DuckDB => {
12658 let is_redshift_tsql = matches!(
12659 source,
12660 DialectType::Redshift | DialectType::TSQL
12661 );
12662 if is_hive_spark {
12663 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12664 let d1 = Self::ensure_cast_date(arg1);
12665 let d2 = Self::ensure_cast_date(arg2);
12666 Ok(Expression::Function(Box::new(Function::new(
12667 "DATE_DIFF".to_string(),
12668 vec![Expression::string(&unit_str), d1, d2],
12669 ))))
12670 } else if matches!(source, DialectType::Snowflake) {
12671 // For Snowflake source: special handling per unit
12672 match unit_str.as_str() {
12673 "NANOSECOND" => {
12674 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12675 fn cast_to_timestamp_ns(
12676 expr: Expression,
12677 ) -> Expression
12678 {
12679 Expression::Cast(Box::new(Cast {
12680 this: expr,
12681 to: DataType::Custom {
12682 name: "TIMESTAMP_NS".to_string(),
12683 },
12684 trailing_comments: vec![],
12685 double_colon_syntax: false,
12686 format: None,
12687 default: None,
12688 }))
12689 }
12690 let epoch_end = Expression::Function(Box::new(
12691 Function::new(
12692 "EPOCH_NS".to_string(),
12693 vec![cast_to_timestamp_ns(arg2)],
12694 ),
12695 ));
12696 let epoch_start = Expression::Function(
12697 Box::new(Function::new(
12698 "EPOCH_NS".to_string(),
12699 vec![cast_to_timestamp_ns(arg1)],
12700 )),
12701 );
12702 Ok(Expression::Sub(Box::new(BinaryOp::new(
12703 epoch_end,
12704 epoch_start,
12705 ))))
12706 }
12707 "WEEK" => {
12708 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12709 let d1 = Self::force_cast_date(arg1);
12710 let d2 = Self::force_cast_date(arg2);
12711 let dt1 = Expression::Function(Box::new(
12712 Function::new(
12713 "DATE_TRUNC".to_string(),
12714 vec![Expression::string("WEEK"), d1],
12715 ),
12716 ));
12717 let dt2 = Expression::Function(Box::new(
12718 Function::new(
12719 "DATE_TRUNC".to_string(),
12720 vec![Expression::string("WEEK"), d2],
12721 ),
12722 ));
12723 Ok(Expression::Function(Box::new(
12724 Function::new(
12725 "DATE_DIFF".to_string(),
12726 vec![
12727 Expression::string(&unit_str),
12728 dt1,
12729 dt2,
12730 ],
12731 ),
12732 )))
12733 }
12734 _ => {
12735 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12736 let d1 = Self::force_cast_date(arg1);
12737 let d2 = Self::force_cast_date(arg2);
12738 Ok(Expression::Function(Box::new(
12739 Function::new(
12740 "DATE_DIFF".to_string(),
12741 vec![
12742 Expression::string(&unit_str),
12743 d1,
12744 d2,
12745 ],
12746 ),
12747 )))
12748 }
12749 }
12750 } else if is_redshift_tsql {
12751 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
12752 let d1 = Self::force_cast_timestamp(arg1);
12753 let d2 = Self::force_cast_timestamp(arg2);
12754 Ok(Expression::Function(Box::new(Function::new(
12755 "DATE_DIFF".to_string(),
12756 vec![Expression::string(&unit_str), d1, d2],
12757 ))))
12758 } else {
12759 // Keep as DATEDIFF so DuckDB's transform_datediff handles
12760 // DATE_TRUNC for WEEK, CAST for string literals, etc.
12761 let unit =
12762 Expression::Identifier(Identifier::new(&unit_str));
12763 Ok(Expression::Function(Box::new(Function::new(
12764 "DATEDIFF".to_string(),
12765 vec![unit, arg1, arg2],
12766 ))))
12767 }
12768 }
12769 DialectType::BigQuery => {
12770 let is_redshift_tsql = matches!(
12771 source,
12772 DialectType::Redshift
12773 | DialectType::TSQL
12774 | DialectType::Snowflake
12775 );
12776 let cast_d1 = if is_hive_spark {
12777 Self::ensure_cast_date(arg1)
12778 } else if is_redshift_tsql {
12779 Self::force_cast_datetime(arg1)
12780 } else {
12781 Self::ensure_cast_datetime(arg1)
12782 };
12783 let cast_d2 = if is_hive_spark {
12784 Self::ensure_cast_date(arg2)
12785 } else if is_redshift_tsql {
12786 Self::force_cast_datetime(arg2)
12787 } else {
12788 Self::ensure_cast_datetime(arg2)
12789 };
12790 let unit =
12791 Expression::Identifier(Identifier::new(&unit_str));
12792 Ok(Expression::Function(Box::new(Function::new(
12793 "DATE_DIFF".to_string(),
12794 vec![cast_d2, cast_d1, unit],
12795 ))))
12796 }
12797 DialectType::Presto
12798 | DialectType::Trino
12799 | DialectType::Athena => {
12800 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
12801 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
12802 let is_redshift_tsql = matches!(
12803 source,
12804 DialectType::Redshift
12805 | DialectType::TSQL
12806 | DialectType::Snowflake
12807 );
12808 let d1 = if is_hive_spark {
12809 Self::double_cast_timestamp_date(arg1)
12810 } else if is_redshift_tsql {
12811 Self::force_cast_timestamp(arg1)
12812 } else {
12813 arg1
12814 };
12815 let d2 = if is_hive_spark {
12816 Self::double_cast_timestamp_date(arg2)
12817 } else if is_redshift_tsql {
12818 Self::force_cast_timestamp(arg2)
12819 } else {
12820 arg2
12821 };
12822 Ok(Expression::Function(Box::new(Function::new(
12823 "DATE_DIFF".to_string(),
12824 vec![Expression::string(&unit_str), d1, d2],
12825 ))))
12826 }
12827 DialectType::Hive => match unit_str.as_str() {
12828 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
12829 this: Expression::Function(Box::new(Function::new(
12830 "MONTHS_BETWEEN".to_string(),
12831 vec![arg2, arg1],
12832 ))),
12833 to: DataType::Int {
12834 length: None,
12835 integer_spelling: false,
12836 },
12837 trailing_comments: vec![],
12838 double_colon_syntax: false,
12839 format: None,
12840 default: None,
12841 }))),
12842 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
12843 this: Expression::Div(Box::new(
12844 crate::expressions::BinaryOp::new(
12845 Expression::Function(Box::new(Function::new(
12846 "DATEDIFF".to_string(),
12847 vec![arg2, arg1],
12848 ))),
12849 Expression::number(7),
12850 ),
12851 )),
12852 to: DataType::Int {
12853 length: None,
12854 integer_spelling: false,
12855 },
12856 trailing_comments: vec![],
12857 double_colon_syntax: false,
12858 format: None,
12859 default: None,
12860 }))),
12861 _ => Ok(Expression::Function(Box::new(Function::new(
12862 "DATEDIFF".to_string(),
12863 vec![arg2, arg1],
12864 )))),
12865 },
12866 DialectType::Spark | DialectType::Databricks => {
12867 let unit =
12868 Expression::Identifier(Identifier::new(&unit_str));
12869 Ok(Expression::Function(Box::new(Function::new(
12870 "DATEDIFF".to_string(),
12871 vec![unit, arg1, arg2],
12872 ))))
12873 }
12874 _ => {
12875 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
12876 let d1 = if is_hive_spark {
12877 Self::ensure_cast_date(arg1)
12878 } else {
12879 arg1
12880 };
12881 let d2 = if is_hive_spark {
12882 Self::ensure_cast_date(arg2)
12883 } else {
12884 arg2
12885 };
12886 let unit =
12887 Expression::Identifier(Identifier::new(&unit_str));
12888 Ok(Expression::Function(Box::new(Function::new(
12889 "DATEDIFF".to_string(),
12890 vec![unit, d1, d2],
12891 ))))
12892 }
12893 }
12894 }
12895 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
12896 "DATEDIFF" if f.args.len() == 2 => {
12897 let mut args = f.args;
12898 let arg0 = args.remove(0);
12899 let arg1 = args.remove(0);
12900
12901 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
12902 // Also recognizes TryCast/Cast to DATE that may have been produced by
12903 // cross-dialect TO_DATE -> TRY_CAST conversion
12904 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
12905 if let Expression::Function(ref f) = e {
12906 if f.name.eq_ignore_ascii_case("TO_DATE")
12907 && f.args.len() == 1
12908 {
12909 return (f.args[0].clone(), true);
12910 }
12911 }
12912 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
12913 if let Expression::TryCast(ref c) = e {
12914 if matches!(c.to, DataType::Date) {
12915 return (e, true); // Already properly cast, return as-is
12916 }
12917 }
12918 (e, false)
12919 };
12920
12921 match target {
12922 DialectType::DuckDB => {
12923 // For Hive source, always CAST to DATE
12924 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
12925 let cast_d0 = if matches!(
12926 source,
12927 DialectType::Hive
12928 | DialectType::Spark
12929 | DialectType::Databricks
12930 ) {
12931 let (inner, was_to_date) = unwrap_to_date(arg1);
12932 if was_to_date {
12933 // Already a date expression, use directly
12934 if matches!(&inner, Expression::TryCast(_)) {
12935 inner // Already TRY_CAST(x AS DATE)
12936 } else {
12937 Self::try_cast_date(inner)
12938 }
12939 } else {
12940 Self::force_cast_date(inner)
12941 }
12942 } else {
12943 Self::ensure_cast_date(arg1)
12944 };
12945 let cast_d1 = if matches!(
12946 source,
12947 DialectType::Hive
12948 | DialectType::Spark
12949 | DialectType::Databricks
12950 ) {
12951 let (inner, was_to_date) = unwrap_to_date(arg0);
12952 if was_to_date {
12953 if matches!(&inner, Expression::TryCast(_)) {
12954 inner
12955 } else {
12956 Self::try_cast_date(inner)
12957 }
12958 } else {
12959 Self::force_cast_date(inner)
12960 }
12961 } else {
12962 Self::ensure_cast_date(arg0)
12963 };
12964 Ok(Expression::Function(Box::new(Function::new(
12965 "DATE_DIFF".to_string(),
12966 vec![Expression::string("DAY"), cast_d0, cast_d1],
12967 ))))
12968 }
12969 DialectType::Presto
12970 | DialectType::Trino
12971 | DialectType::Athena => {
12972 // For Hive/Spark source, apply double_cast_timestamp_date
12973 // For other sources (MySQL etc.), just swap args without casting
12974 if matches!(
12975 source,
12976 DialectType::Hive
12977 | DialectType::Spark
12978 | DialectType::Databricks
12979 ) {
12980 let cast_fn = |e: Expression| -> Expression {
12981 let (inner, was_to_date) = unwrap_to_date(e);
12982 if was_to_date {
12983 let first_cast =
12984 Self::double_cast_timestamp_date(inner);
12985 Self::double_cast_timestamp_date(first_cast)
12986 } else {
12987 Self::double_cast_timestamp_date(inner)
12988 }
12989 };
12990 Ok(Expression::Function(Box::new(Function::new(
12991 "DATE_DIFF".to_string(),
12992 vec![
12993 Expression::string("DAY"),
12994 cast_fn(arg1),
12995 cast_fn(arg0),
12996 ],
12997 ))))
12998 } else {
12999 Ok(Expression::Function(Box::new(Function::new(
13000 "DATE_DIFF".to_string(),
13001 vec![Expression::string("DAY"), arg1, arg0],
13002 ))))
13003 }
13004 }
13005 DialectType::Redshift => {
13006 let unit = Expression::Identifier(Identifier::new("DAY"));
13007 Ok(Expression::Function(Box::new(Function::new(
13008 "DATEDIFF".to_string(),
13009 vec![unit, arg1, arg0],
13010 ))))
13011 }
13012 _ => Ok(Expression::Function(Box::new(Function::new(
13013 "DATEDIFF".to_string(),
13014 vec![arg0, arg1],
13015 )))),
13016 }
13017 }
13018 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13019 "DATE_DIFF" if f.args.len() == 3 => {
13020 let mut args = f.args;
13021 let arg0 = args.remove(0);
13022 let arg1 = args.remove(0);
13023 let arg2 = args.remove(0);
13024 let unit_str = Self::get_unit_str_static(&arg0);
13025
13026 match target {
13027 DialectType::DuckDB => {
13028 // DuckDB: DATE_DIFF('UNIT', start, end)
13029 Ok(Expression::Function(Box::new(Function::new(
13030 "DATE_DIFF".to_string(),
13031 vec![Expression::string(&unit_str), arg1, arg2],
13032 ))))
13033 }
13034 DialectType::Presto
13035 | DialectType::Trino
13036 | DialectType::Athena => {
13037 Ok(Expression::Function(Box::new(Function::new(
13038 "DATE_DIFF".to_string(),
13039 vec![Expression::string(&unit_str), arg1, arg2],
13040 ))))
13041 }
13042 DialectType::ClickHouse => {
13043 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13044 let unit =
13045 Expression::Identifier(Identifier::new(&unit_str));
13046 Ok(Expression::Function(Box::new(Function::new(
13047 "DATE_DIFF".to_string(),
13048 vec![unit, arg1, arg2],
13049 ))))
13050 }
13051 DialectType::Snowflake | DialectType::Redshift => {
13052 let unit =
13053 Expression::Identifier(Identifier::new(&unit_str));
13054 Ok(Expression::Function(Box::new(Function::new(
13055 "DATEDIFF".to_string(),
13056 vec![unit, arg1, arg2],
13057 ))))
13058 }
13059 _ => {
13060 let unit =
13061 Expression::Identifier(Identifier::new(&unit_str));
13062 Ok(Expression::Function(Box::new(Function::new(
13063 "DATEDIFF".to_string(),
13064 vec![unit, arg1, arg2],
13065 ))))
13066 }
13067 }
13068 }
13069 // DATEADD(unit, val, date) - 3-arg form
13070 "DATEADD" if f.args.len() == 3 => {
13071 let mut args = f.args;
13072 let arg0 = args.remove(0);
13073 let arg1 = args.remove(0);
13074 let arg2 = args.remove(0);
13075 let unit_str = Self::get_unit_str_static(&arg0);
13076
13077 // Normalize TSQL unit abbreviations to standard names
13078 let unit_str = match unit_str.as_str() {
13079 "YY" | "YYYY" => "YEAR".to_string(),
13080 "QQ" | "Q" => "QUARTER".to_string(),
13081 "MM" | "M" => "MONTH".to_string(),
13082 "WK" | "WW" => "WEEK".to_string(),
13083 "DD" | "D" | "DY" => "DAY".to_string(),
13084 "HH" => "HOUR".to_string(),
13085 "MI" | "N" => "MINUTE".to_string(),
13086 "SS" | "S" => "SECOND".to_string(),
13087 "MS" => "MILLISECOND".to_string(),
13088 "MCS" | "US" => "MICROSECOND".to_string(),
13089 _ => unit_str,
13090 };
13091 match target {
13092 DialectType::Snowflake => {
13093 let unit =
13094 Expression::Identifier(Identifier::new(&unit_str));
13095 // Cast string literal to TIMESTAMP, but not for Snowflake source
13096 // (Snowflake natively accepts string literals in DATEADD)
13097 let arg2 = if matches!(
13098 &arg2,
13099 Expression::Literal(Literal::String(_))
13100 ) && !matches!(source, DialectType::Snowflake)
13101 {
13102 Expression::Cast(Box::new(Cast {
13103 this: arg2,
13104 to: DataType::Timestamp {
13105 precision: None,
13106 timezone: false,
13107 },
13108 trailing_comments: Vec::new(),
13109 double_colon_syntax: false,
13110 format: None,
13111 default: None,
13112 }))
13113 } else {
13114 arg2
13115 };
13116 Ok(Expression::Function(Box::new(Function::new(
13117 "DATEADD".to_string(),
13118 vec![unit, arg1, arg2],
13119 ))))
13120 }
13121 DialectType::TSQL => {
13122 let unit =
13123 Expression::Identifier(Identifier::new(&unit_str));
13124 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13125 let arg2 = if matches!(
13126 &arg2,
13127 Expression::Literal(Literal::String(_))
13128 ) && !matches!(
13129 source,
13130 DialectType::Spark
13131 | DialectType::Databricks
13132 | DialectType::Hive
13133 ) {
13134 Expression::Cast(Box::new(Cast {
13135 this: arg2,
13136 to: DataType::Custom {
13137 name: "DATETIME2".to_string(),
13138 },
13139 trailing_comments: Vec::new(),
13140 double_colon_syntax: false,
13141 format: None,
13142 default: None,
13143 }))
13144 } else {
13145 arg2
13146 };
13147 Ok(Expression::Function(Box::new(Function::new(
13148 "DATEADD".to_string(),
13149 vec![unit, arg1, arg2],
13150 ))))
13151 }
13152 DialectType::Redshift => {
13153 let unit =
13154 Expression::Identifier(Identifier::new(&unit_str));
13155 Ok(Expression::Function(Box::new(Function::new(
13156 "DATEADD".to_string(),
13157 vec![unit, arg1, arg2],
13158 ))))
13159 }
13160 DialectType::Databricks => {
13161 let unit =
13162 Expression::Identifier(Identifier::new(&unit_str));
13163 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13164 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13165 let func_name = if matches!(
13166 source,
13167 DialectType::TSQL
13168 | DialectType::Fabric
13169 | DialectType::Databricks
13170 | DialectType::Snowflake
13171 ) {
13172 "DATEADD"
13173 } else {
13174 "DATE_ADD"
13175 };
13176 Ok(Expression::Function(Box::new(Function::new(
13177 func_name.to_string(),
13178 vec![unit, arg1, arg2],
13179 ))))
13180 }
13181 DialectType::DuckDB => {
13182 // Special handling for NANOSECOND from Snowflake
13183 if unit_str == "NANOSECOND"
13184 && matches!(source, DialectType::Snowflake)
13185 {
13186 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13187 let cast_ts = Expression::Cast(Box::new(Cast {
13188 this: arg2,
13189 to: DataType::Custom {
13190 name: "TIMESTAMP_NS".to_string(),
13191 },
13192 trailing_comments: vec![],
13193 double_colon_syntax: false,
13194 format: None,
13195 default: None,
13196 }));
13197 let epoch_ns =
13198 Expression::Function(Box::new(Function::new(
13199 "EPOCH_NS".to_string(),
13200 vec![cast_ts],
13201 )));
13202 let sum = Expression::Add(Box::new(BinaryOp::new(
13203 epoch_ns, arg1,
13204 )));
13205 Ok(Expression::Function(Box::new(Function::new(
13206 "MAKE_TIMESTAMP_NS".to_string(),
13207 vec![sum],
13208 ))))
13209 } else {
13210 // DuckDB: convert to date + INTERVAL syntax with CAST
13211 let iu = Self::parse_interval_unit_static(&unit_str);
13212 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13213 this: Some(arg1),
13214 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13215 }));
13216 // Cast string literal to TIMESTAMP
13217 let arg2 = if matches!(
13218 &arg2,
13219 Expression::Literal(Literal::String(_))
13220 ) {
13221 Expression::Cast(Box::new(Cast {
13222 this: arg2,
13223 to: DataType::Timestamp {
13224 precision: None,
13225 timezone: false,
13226 },
13227 trailing_comments: Vec::new(),
13228 double_colon_syntax: false,
13229 format: None,
13230 default: None,
13231 }))
13232 } else {
13233 arg2
13234 };
13235 Ok(Expression::Add(Box::new(
13236 crate::expressions::BinaryOp::new(arg2, interval),
13237 )))
13238 }
13239 }
13240 DialectType::Spark => {
13241 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13242 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13243 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13244 {
13245 fn multiply_expr_spark(
13246 expr: Expression,
13247 factor: i64,
13248 ) -> Expression
13249 {
13250 if let Expression::Literal(
13251 crate::expressions::Literal::Number(n),
13252 ) = &expr
13253 {
13254 if let Ok(val) = n.parse::<i64>() {
13255 return Expression::Literal(
13256 crate::expressions::Literal::Number(
13257 (val * factor).to_string(),
13258 ),
13259 );
13260 }
13261 }
13262 Expression::Mul(Box::new(
13263 crate::expressions::BinaryOp::new(
13264 expr,
13265 Expression::Literal(
13266 crate::expressions::Literal::Number(
13267 factor.to_string(),
13268 ),
13269 ),
13270 ),
13271 ))
13272 }
13273 let normalized_unit = match unit_str.as_str() {
13274 "YEAR" | "YY" | "YYYY" => "YEAR",
13275 "QUARTER" | "QQ" | "Q" => "QUARTER",
13276 "MONTH" | "MM" | "M" => "MONTH",
13277 "WEEK" | "WK" | "WW" => "WEEK",
13278 "DAY" | "DD" | "D" | "DY" => "DAY",
13279 _ => &unit_str,
13280 };
13281 match normalized_unit {
13282 "YEAR" => {
13283 let months = multiply_expr_spark(arg1, 12);
13284 Ok(Expression::Function(Box::new(
13285 Function::new(
13286 "ADD_MONTHS".to_string(),
13287 vec![arg2, months],
13288 ),
13289 )))
13290 }
13291 "QUARTER" => {
13292 let months = multiply_expr_spark(arg1, 3);
13293 Ok(Expression::Function(Box::new(
13294 Function::new(
13295 "ADD_MONTHS".to_string(),
13296 vec![arg2, months],
13297 ),
13298 )))
13299 }
13300 "MONTH" => Ok(Expression::Function(Box::new(
13301 Function::new(
13302 "ADD_MONTHS".to_string(),
13303 vec![arg2, arg1],
13304 ),
13305 ))),
13306 "WEEK" => {
13307 let days = multiply_expr_spark(arg1, 7);
13308 Ok(Expression::Function(Box::new(
13309 Function::new(
13310 "DATE_ADD".to_string(),
13311 vec![arg2, days],
13312 ),
13313 )))
13314 }
13315 "DAY" => Ok(Expression::Function(Box::new(
13316 Function::new(
13317 "DATE_ADD".to_string(),
13318 vec![arg2, arg1],
13319 ),
13320 ))),
13321 _ => {
13322 let unit = Expression::Identifier(
13323 Identifier::new(&unit_str),
13324 );
13325 Ok(Expression::Function(Box::new(
13326 Function::new(
13327 "DATE_ADD".to_string(),
13328 vec![unit, arg1, arg2],
13329 ),
13330 )))
13331 }
13332 }
13333 } else {
13334 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13335 let unit =
13336 Expression::Identifier(Identifier::new(&unit_str));
13337 Ok(Expression::Function(Box::new(Function::new(
13338 "DATE_ADD".to_string(),
13339 vec![unit, arg1, arg2],
13340 ))))
13341 }
13342 }
13343 DialectType::Hive => match unit_str.as_str() {
13344 "MONTH" => {
13345 Ok(Expression::Function(Box::new(Function::new(
13346 "ADD_MONTHS".to_string(),
13347 vec![arg2, arg1],
13348 ))))
13349 }
13350 _ => Ok(Expression::Function(Box::new(Function::new(
13351 "DATE_ADD".to_string(),
13352 vec![arg2, arg1],
13353 )))),
13354 },
13355 DialectType::Presto
13356 | DialectType::Trino
13357 | DialectType::Athena => {
13358 // Cast string literal date to TIMESTAMP
13359 let arg2 = if matches!(
13360 &arg2,
13361 Expression::Literal(Literal::String(_))
13362 ) {
13363 Expression::Cast(Box::new(Cast {
13364 this: arg2,
13365 to: DataType::Timestamp {
13366 precision: None,
13367 timezone: false,
13368 },
13369 trailing_comments: Vec::new(),
13370 double_colon_syntax: false,
13371 format: None,
13372 default: None,
13373 }))
13374 } else {
13375 arg2
13376 };
13377 Ok(Expression::Function(Box::new(Function::new(
13378 "DATE_ADD".to_string(),
13379 vec![Expression::string(&unit_str), arg1, arg2],
13380 ))))
13381 }
13382 DialectType::MySQL => {
13383 let iu = Self::parse_interval_unit_static(&unit_str);
13384 Ok(Expression::DateAdd(Box::new(
13385 crate::expressions::DateAddFunc {
13386 this: arg2,
13387 interval: arg1,
13388 unit: iu,
13389 },
13390 )))
13391 }
13392 DialectType::PostgreSQL => {
13393 // Cast string literal date to TIMESTAMP
13394 let arg2 = if matches!(
13395 &arg2,
13396 Expression::Literal(Literal::String(_))
13397 ) {
13398 Expression::Cast(Box::new(Cast {
13399 this: arg2,
13400 to: DataType::Timestamp {
13401 precision: None,
13402 timezone: false,
13403 },
13404 trailing_comments: Vec::new(),
13405 double_colon_syntax: false,
13406 format: None,
13407 default: None,
13408 }))
13409 } else {
13410 arg2
13411 };
13412 let interval = Expression::Interval(Box::new(
13413 crate::expressions::Interval {
13414 this: Some(Expression::string(&format!(
13415 "{} {}",
13416 Self::expr_to_string_static(&arg1),
13417 unit_str
13418 ))),
13419 unit: None,
13420 },
13421 ));
13422 Ok(Expression::Add(Box::new(
13423 crate::expressions::BinaryOp::new(arg2, interval),
13424 )))
13425 }
13426 DialectType::BigQuery => {
13427 let iu = Self::parse_interval_unit_static(&unit_str);
13428 let interval = Expression::Interval(Box::new(
13429 crate::expressions::Interval {
13430 this: Some(arg1),
13431 unit: Some(
13432 crate::expressions::IntervalUnitSpec::Simple {
13433 unit: iu,
13434 use_plural: false,
13435 },
13436 ),
13437 },
13438 ));
13439 // Non-TSQL sources: CAST string literal to DATETIME
13440 let arg2 = if !matches!(
13441 source,
13442 DialectType::TSQL | DialectType::Fabric
13443 ) && matches!(
13444 &arg2,
13445 Expression::Literal(Literal::String(_))
13446 ) {
13447 Expression::Cast(Box::new(Cast {
13448 this: arg2,
13449 to: DataType::Custom {
13450 name: "DATETIME".to_string(),
13451 },
13452 trailing_comments: Vec::new(),
13453 double_colon_syntax: false,
13454 format: None,
13455 default: None,
13456 }))
13457 } else {
13458 arg2
13459 };
13460 Ok(Expression::Function(Box::new(Function::new(
13461 "DATE_ADD".to_string(),
13462 vec![arg2, interval],
13463 ))))
13464 }
13465 _ => {
13466 let unit =
13467 Expression::Identifier(Identifier::new(&unit_str));
13468 Ok(Expression::Function(Box::new(Function::new(
13469 "DATEADD".to_string(),
13470 vec![unit, arg1, arg2],
13471 ))))
13472 }
13473 }
13474 }
13475 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13476 // or (date, val, 'UNIT') from Generic canonical form
13477 "DATE_ADD" if f.args.len() == 3 => {
13478 let mut args = f.args;
13479 let arg0 = args.remove(0);
13480 let arg1 = args.remove(0);
13481 let arg2 = args.remove(0);
13482 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13483 // where arg2 is a string literal matching a unit name
13484 let arg2_unit = match &arg2 {
13485 Expression::Literal(Literal::String(s)) => {
13486 let u = s.to_uppercase();
13487 if matches!(
13488 u.as_str(),
13489 "DAY"
13490 | "MONTH"
13491 | "YEAR"
13492 | "HOUR"
13493 | "MINUTE"
13494 | "SECOND"
13495 | "WEEK"
13496 | "QUARTER"
13497 | "MILLISECOND"
13498 | "MICROSECOND"
13499 ) {
13500 Some(u)
13501 } else {
13502 None
13503 }
13504 }
13505 _ => None,
13506 };
13507 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13508 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13509 (u, arg1, arg0)
13510 } else {
13511 (Self::get_unit_str_static(&arg0), arg1, arg2)
13512 };
13513 // Alias for backward compat with the rest of the match
13514 let arg1 = val;
13515 let arg2 = date;
13516
13517 match target {
13518 DialectType::Presto
13519 | DialectType::Trino
13520 | DialectType::Athena => {
13521 Ok(Expression::Function(Box::new(Function::new(
13522 "DATE_ADD".to_string(),
13523 vec![Expression::string(&unit_str), arg1, arg2],
13524 ))))
13525 }
13526 DialectType::DuckDB => {
13527 let iu = Self::parse_interval_unit_static(&unit_str);
13528 let interval = Expression::Interval(Box::new(
13529 crate::expressions::Interval {
13530 this: Some(arg1),
13531 unit: Some(
13532 crate::expressions::IntervalUnitSpec::Simple {
13533 unit: iu,
13534 use_plural: false,
13535 },
13536 ),
13537 },
13538 ));
13539 Ok(Expression::Add(Box::new(
13540 crate::expressions::BinaryOp::new(arg2, interval),
13541 )))
13542 }
13543 DialectType::PostgreSQL
13544 | DialectType::Materialize
13545 | DialectType::RisingWave => {
13546 // PostgreSQL: x + INTERVAL '1 DAY'
13547 let amount_str = Self::expr_to_string_static(&arg1);
13548 let interval = Expression::Interval(Box::new(
13549 crate::expressions::Interval {
13550 this: Some(Expression::string(&format!(
13551 "{} {}",
13552 amount_str, unit_str
13553 ))),
13554 unit: None,
13555 },
13556 ));
13557 Ok(Expression::Add(Box::new(
13558 crate::expressions::BinaryOp::new(arg2, interval),
13559 )))
13560 }
13561 DialectType::Snowflake
13562 | DialectType::TSQL
13563 | DialectType::Redshift => {
13564 let unit =
13565 Expression::Identifier(Identifier::new(&unit_str));
13566 Ok(Expression::Function(Box::new(Function::new(
13567 "DATEADD".to_string(),
13568 vec![unit, arg1, arg2],
13569 ))))
13570 }
13571 DialectType::BigQuery
13572 | DialectType::MySQL
13573 | DialectType::Doris
13574 | DialectType::StarRocks
13575 | DialectType::Drill => {
13576 // DATE_ADD(date, INTERVAL amount UNIT)
13577 let iu = Self::parse_interval_unit_static(&unit_str);
13578 let interval = Expression::Interval(Box::new(
13579 crate::expressions::Interval {
13580 this: Some(arg1),
13581 unit: Some(
13582 crate::expressions::IntervalUnitSpec::Simple {
13583 unit: iu,
13584 use_plural: false,
13585 },
13586 ),
13587 },
13588 ));
13589 Ok(Expression::Function(Box::new(Function::new(
13590 "DATE_ADD".to_string(),
13591 vec![arg2, interval],
13592 ))))
13593 }
13594 DialectType::SQLite => {
13595 // SQLite: DATE(x, '1 DAY')
13596 // Build the string '1 DAY' from amount and unit
13597 let amount_str = match &arg1 {
13598 Expression::Literal(Literal::Number(n)) => n.clone(),
13599 _ => "1".to_string(),
13600 };
13601 Ok(Expression::Function(Box::new(Function::new(
13602 "DATE".to_string(),
13603 vec![
13604 arg2,
13605 Expression::string(format!(
13606 "{} {}",
13607 amount_str, unit_str
13608 )),
13609 ],
13610 ))))
13611 }
13612 DialectType::Dremio => {
13613 // Dremio: DATE_ADD(date, amount) - drops unit
13614 Ok(Expression::Function(Box::new(Function::new(
13615 "DATE_ADD".to_string(),
13616 vec![arg2, arg1],
13617 ))))
13618 }
13619 DialectType::Spark => {
13620 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13621 if unit_str == "DAY" {
13622 Ok(Expression::Function(Box::new(Function::new(
13623 "DATE_ADD".to_string(),
13624 vec![arg2, arg1],
13625 ))))
13626 } else {
13627 let unit =
13628 Expression::Identifier(Identifier::new(&unit_str));
13629 Ok(Expression::Function(Box::new(Function::new(
13630 "DATE_ADD".to_string(),
13631 vec![unit, arg1, arg2],
13632 ))))
13633 }
13634 }
13635 DialectType::Databricks => {
13636 let unit =
13637 Expression::Identifier(Identifier::new(&unit_str));
13638 Ok(Expression::Function(Box::new(Function::new(
13639 "DATE_ADD".to_string(),
13640 vec![unit, arg1, arg2],
13641 ))))
13642 }
13643 DialectType::Hive => {
13644 // Hive: DATE_ADD(date, val) for DAY
13645 Ok(Expression::Function(Box::new(Function::new(
13646 "DATE_ADD".to_string(),
13647 vec![arg2, arg1],
13648 ))))
13649 }
13650 _ => {
13651 let unit =
13652 Expression::Identifier(Identifier::new(&unit_str));
13653 Ok(Expression::Function(Box::new(Function::new(
13654 "DATE_ADD".to_string(),
13655 vec![unit, arg1, arg2],
13656 ))))
13657 }
13658 }
13659 }
13660 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13661 "DATE_ADD"
13662 if f.args.len() == 2
13663 && matches!(
13664 source,
13665 DialectType::Hive
13666 | DialectType::Spark
13667 | DialectType::Databricks
13668 | DialectType::Generic
13669 ) =>
13670 {
13671 let mut args = f.args;
13672 let date = args.remove(0);
13673 let days = args.remove(0);
13674 match target {
13675 DialectType::Hive | DialectType::Spark => {
13676 // Keep as DATE_ADD(date, days) for Hive/Spark
13677 Ok(Expression::Function(Box::new(Function::new(
13678 "DATE_ADD".to_string(),
13679 vec![date, days],
13680 ))))
13681 }
13682 DialectType::Databricks => {
13683 // Databricks: DATEADD(DAY, days, date)
13684 Ok(Expression::Function(Box::new(Function::new(
13685 "DATEADD".to_string(),
13686 vec![
13687 Expression::Identifier(Identifier::new("DAY")),
13688 days,
13689 date,
13690 ],
13691 ))))
13692 }
13693 DialectType::DuckDB => {
13694 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13695 let cast_date = Self::ensure_cast_date(date);
13696 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13697 let interval_val = if matches!(
13698 days,
13699 Expression::Mul(_)
13700 | Expression::Sub(_)
13701 | Expression::Add(_)
13702 ) {
13703 Expression::Paren(Box::new(crate::expressions::Paren {
13704 this: days,
13705 trailing_comments: vec![],
13706 }))
13707 } else {
13708 days
13709 };
13710 let interval = Expression::Interval(Box::new(
13711 crate::expressions::Interval {
13712 this: Some(interval_val),
13713 unit: Some(
13714 crate::expressions::IntervalUnitSpec::Simple {
13715 unit: crate::expressions::IntervalUnit::Day,
13716 use_plural: false,
13717 },
13718 ),
13719 },
13720 ));
13721 Ok(Expression::Add(Box::new(
13722 crate::expressions::BinaryOp::new(cast_date, interval),
13723 )))
13724 }
13725 DialectType::Snowflake => {
13726 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13727 let cast_date = if matches!(
13728 source,
13729 DialectType::Hive
13730 | DialectType::Spark
13731 | DialectType::Databricks
13732 ) {
13733 if matches!(
13734 date,
13735 Expression::Literal(Literal::String(_))
13736 ) {
13737 Self::double_cast_timestamp_date(date)
13738 } else {
13739 date
13740 }
13741 } else {
13742 date
13743 };
13744 Ok(Expression::Function(Box::new(Function::new(
13745 "DATEADD".to_string(),
13746 vec![
13747 Expression::Identifier(Identifier::new("DAY")),
13748 days,
13749 cast_date,
13750 ],
13751 ))))
13752 }
13753 DialectType::Redshift => {
13754 Ok(Expression::Function(Box::new(Function::new(
13755 "DATEADD".to_string(),
13756 vec![
13757 Expression::Identifier(Identifier::new("DAY")),
13758 days,
13759 date,
13760 ],
13761 ))))
13762 }
13763 DialectType::TSQL | DialectType::Fabric => {
13764 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
13765 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
13766 let cast_date = if matches!(
13767 source,
13768 DialectType::Hive | DialectType::Spark
13769 ) {
13770 if matches!(
13771 date,
13772 Expression::Literal(Literal::String(_))
13773 ) {
13774 Self::double_cast_datetime2_date(date)
13775 } else {
13776 date
13777 }
13778 } else {
13779 date
13780 };
13781 Ok(Expression::Function(Box::new(Function::new(
13782 "DATEADD".to_string(),
13783 vec![
13784 Expression::Identifier(Identifier::new("DAY")),
13785 days,
13786 cast_date,
13787 ],
13788 ))))
13789 }
13790 DialectType::Presto
13791 | DialectType::Trino
13792 | DialectType::Athena => {
13793 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13794 let cast_date = if matches!(
13795 source,
13796 DialectType::Hive
13797 | DialectType::Spark
13798 | DialectType::Databricks
13799 ) {
13800 if matches!(
13801 date,
13802 Expression::Literal(Literal::String(_))
13803 ) {
13804 Self::double_cast_timestamp_date(date)
13805 } else {
13806 date
13807 }
13808 } else {
13809 date
13810 };
13811 Ok(Expression::Function(Box::new(Function::new(
13812 "DATE_ADD".to_string(),
13813 vec![Expression::string("DAY"), days, cast_date],
13814 ))))
13815 }
13816 DialectType::BigQuery => {
13817 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
13818 let cast_date = if matches!(
13819 source,
13820 DialectType::Hive
13821 | DialectType::Spark
13822 | DialectType::Databricks
13823 ) {
13824 Self::double_cast_datetime_date(date)
13825 } else {
13826 date
13827 };
13828 // Wrap complex expressions in Paren for interval
13829 let interval_val = if matches!(
13830 days,
13831 Expression::Mul(_)
13832 | Expression::Sub(_)
13833 | Expression::Add(_)
13834 ) {
13835 Expression::Paren(Box::new(crate::expressions::Paren {
13836 this: days,
13837 trailing_comments: vec![],
13838 }))
13839 } else {
13840 days
13841 };
13842 let interval = Expression::Interval(Box::new(
13843 crate::expressions::Interval {
13844 this: Some(interval_val),
13845 unit: Some(
13846 crate::expressions::IntervalUnitSpec::Simple {
13847 unit: crate::expressions::IntervalUnit::Day,
13848 use_plural: false,
13849 },
13850 ),
13851 },
13852 ));
13853 Ok(Expression::Function(Box::new(Function::new(
13854 "DATE_ADD".to_string(),
13855 vec![cast_date, interval],
13856 ))))
13857 }
13858 DialectType::MySQL => {
13859 let iu = crate::expressions::IntervalUnit::Day;
13860 Ok(Expression::DateAdd(Box::new(
13861 crate::expressions::DateAddFunc {
13862 this: date,
13863 interval: days,
13864 unit: iu,
13865 },
13866 )))
13867 }
13868 DialectType::PostgreSQL => {
13869 let interval = Expression::Interval(Box::new(
13870 crate::expressions::Interval {
13871 this: Some(Expression::string(&format!(
13872 "{} DAY",
13873 Self::expr_to_string_static(&days)
13874 ))),
13875 unit: None,
13876 },
13877 ));
13878 Ok(Expression::Add(Box::new(
13879 crate::expressions::BinaryOp::new(date, interval),
13880 )))
13881 }
13882 DialectType::Doris
13883 | DialectType::StarRocks
13884 | DialectType::Drill => {
13885 // DATE_ADD(date, INTERVAL days DAY)
13886 let interval = Expression::Interval(Box::new(
13887 crate::expressions::Interval {
13888 this: Some(days),
13889 unit: Some(
13890 crate::expressions::IntervalUnitSpec::Simple {
13891 unit: crate::expressions::IntervalUnit::Day,
13892 use_plural: false,
13893 },
13894 ),
13895 },
13896 ));
13897 Ok(Expression::Function(Box::new(Function::new(
13898 "DATE_ADD".to_string(),
13899 vec![date, interval],
13900 ))))
13901 }
13902 _ => Ok(Expression::Function(Box::new(Function::new(
13903 "DATE_ADD".to_string(),
13904 vec![date, days],
13905 )))),
13906 }
13907 }
13908 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
13909 "DATE_SUB"
13910 if f.args.len() == 2
13911 && matches!(
13912 source,
13913 DialectType::Hive
13914 | DialectType::Spark
13915 | DialectType::Databricks
13916 ) =>
13917 {
13918 let mut args = f.args;
13919 let date = args.remove(0);
13920 let days = args.remove(0);
13921 // Helper to create days * -1
13922 let make_neg_days = |d: Expression| -> Expression {
13923 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
13924 d,
13925 Expression::Literal(Literal::Number("-1".to_string())),
13926 )))
13927 };
13928 let is_string_literal =
13929 matches!(date, Expression::Literal(Literal::String(_)));
13930 match target {
13931 DialectType::Hive
13932 | DialectType::Spark
13933 | DialectType::Databricks => {
13934 // Keep as DATE_SUB(date, days) for Hive/Spark
13935 Ok(Expression::Function(Box::new(Function::new(
13936 "DATE_SUB".to_string(),
13937 vec![date, days],
13938 ))))
13939 }
13940 DialectType::DuckDB => {
13941 let cast_date = Self::ensure_cast_date(date);
13942 let neg = make_neg_days(days);
13943 let interval = Expression::Interval(Box::new(
13944 crate::expressions::Interval {
13945 this: Some(Expression::Paren(Box::new(
13946 crate::expressions::Paren {
13947 this: neg,
13948 trailing_comments: vec![],
13949 },
13950 ))),
13951 unit: Some(
13952 crate::expressions::IntervalUnitSpec::Simple {
13953 unit: crate::expressions::IntervalUnit::Day,
13954 use_plural: false,
13955 },
13956 ),
13957 },
13958 ));
13959 Ok(Expression::Add(Box::new(
13960 crate::expressions::BinaryOp::new(cast_date, interval),
13961 )))
13962 }
13963 DialectType::Snowflake => {
13964 let cast_date = if is_string_literal {
13965 Self::double_cast_timestamp_date(date)
13966 } else {
13967 date
13968 };
13969 let neg = make_neg_days(days);
13970 Ok(Expression::Function(Box::new(Function::new(
13971 "DATEADD".to_string(),
13972 vec![
13973 Expression::Identifier(Identifier::new("DAY")),
13974 neg,
13975 cast_date,
13976 ],
13977 ))))
13978 }
13979 DialectType::Redshift => {
13980 let neg = make_neg_days(days);
13981 Ok(Expression::Function(Box::new(Function::new(
13982 "DATEADD".to_string(),
13983 vec![
13984 Expression::Identifier(Identifier::new("DAY")),
13985 neg,
13986 date,
13987 ],
13988 ))))
13989 }
13990 DialectType::TSQL | DialectType::Fabric => {
13991 let cast_date = if is_string_literal {
13992 Self::double_cast_datetime2_date(date)
13993 } else {
13994 date
13995 };
13996 let neg = make_neg_days(days);
13997 Ok(Expression::Function(Box::new(Function::new(
13998 "DATEADD".to_string(),
13999 vec![
14000 Expression::Identifier(Identifier::new("DAY")),
14001 neg,
14002 cast_date,
14003 ],
14004 ))))
14005 }
14006 DialectType::Presto
14007 | DialectType::Trino
14008 | DialectType::Athena => {
14009 let cast_date = if is_string_literal {
14010 Self::double_cast_timestamp_date(date)
14011 } else {
14012 date
14013 };
14014 let neg = make_neg_days(days);
14015 Ok(Expression::Function(Box::new(Function::new(
14016 "DATE_ADD".to_string(),
14017 vec![Expression::string("DAY"), neg, cast_date],
14018 ))))
14019 }
14020 DialectType::BigQuery => {
14021 let cast_date = if is_string_literal {
14022 Self::double_cast_datetime_date(date)
14023 } else {
14024 date
14025 };
14026 let neg = make_neg_days(days);
14027 let interval = Expression::Interval(Box::new(
14028 crate::expressions::Interval {
14029 this: Some(Expression::Paren(Box::new(
14030 crate::expressions::Paren {
14031 this: neg,
14032 trailing_comments: vec![],
14033 },
14034 ))),
14035 unit: Some(
14036 crate::expressions::IntervalUnitSpec::Simple {
14037 unit: crate::expressions::IntervalUnit::Day,
14038 use_plural: false,
14039 },
14040 ),
14041 },
14042 ));
14043 Ok(Expression::Function(Box::new(Function::new(
14044 "DATE_ADD".to_string(),
14045 vec![cast_date, interval],
14046 ))))
14047 }
14048 _ => Ok(Expression::Function(Box::new(Function::new(
14049 "DATE_SUB".to_string(),
14050 vec![date, days],
14051 )))),
14052 }
14053 }
14054 // ADD_MONTHS(date, val) -> target-specific
14055 "ADD_MONTHS" if f.args.len() == 2 => {
14056 let mut args = f.args;
14057 let date = args.remove(0);
14058 let val = args.remove(0);
14059 match target {
14060 DialectType::TSQL => {
14061 let cast_date = Self::ensure_cast_datetime2(date);
14062 Ok(Expression::Function(Box::new(Function::new(
14063 "DATEADD".to_string(),
14064 vec![
14065 Expression::Identifier(Identifier::new("MONTH")),
14066 val,
14067 cast_date,
14068 ],
14069 ))))
14070 }
14071 DialectType::DuckDB => {
14072 let interval = Expression::Interval(Box::new(
14073 crate::expressions::Interval {
14074 this: Some(val),
14075 unit: Some(
14076 crate::expressions::IntervalUnitSpec::Simple {
14077 unit:
14078 crate::expressions::IntervalUnit::Month,
14079 use_plural: false,
14080 },
14081 ),
14082 },
14083 ));
14084 Ok(Expression::Add(Box::new(
14085 crate::expressions::BinaryOp::new(date, interval),
14086 )))
14087 }
14088 DialectType::Snowflake => {
14089 // Keep ADD_MONTHS when source is Snowflake
14090 if matches!(source, DialectType::Snowflake) {
14091 Ok(Expression::Function(Box::new(Function::new(
14092 "ADD_MONTHS".to_string(),
14093 vec![date, val],
14094 ))))
14095 } else {
14096 Ok(Expression::Function(Box::new(Function::new(
14097 "DATEADD".to_string(),
14098 vec![
14099 Expression::Identifier(Identifier::new(
14100 "MONTH",
14101 )),
14102 val,
14103 date,
14104 ],
14105 ))))
14106 }
14107 }
14108 DialectType::Redshift => {
14109 Ok(Expression::Function(Box::new(Function::new(
14110 "DATEADD".to_string(),
14111 vec![
14112 Expression::Identifier(Identifier::new("MONTH")),
14113 val,
14114 date,
14115 ],
14116 ))))
14117 }
14118 DialectType::Presto
14119 | DialectType::Trino
14120 | DialectType::Athena => {
14121 Ok(Expression::Function(Box::new(Function::new(
14122 "DATE_ADD".to_string(),
14123 vec![Expression::string("MONTH"), val, date],
14124 ))))
14125 }
14126 DialectType::BigQuery => {
14127 let interval = Expression::Interval(Box::new(
14128 crate::expressions::Interval {
14129 this: Some(val),
14130 unit: Some(
14131 crate::expressions::IntervalUnitSpec::Simple {
14132 unit:
14133 crate::expressions::IntervalUnit::Month,
14134 use_plural: false,
14135 },
14136 ),
14137 },
14138 ));
14139 Ok(Expression::Function(Box::new(Function::new(
14140 "DATE_ADD".to_string(),
14141 vec![date, interval],
14142 ))))
14143 }
14144 _ => Ok(Expression::Function(Box::new(Function::new(
14145 "ADD_MONTHS".to_string(),
14146 vec![date, val],
14147 )))),
14148 }
14149 }
14150 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14151 "DATETRUNC" if f.args.len() == 2 => {
14152 let mut args = f.args;
14153 let arg0 = args.remove(0);
14154 let arg1 = args.remove(0);
14155 let unit_str = Self::get_unit_str_static(&arg0);
14156 match target {
14157 DialectType::TSQL | DialectType::Fabric => {
14158 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14159 Ok(Expression::Function(Box::new(Function::new(
14160 "DATETRUNC".to_string(),
14161 vec![
14162 Expression::Identifier(Identifier::new(&unit_str)),
14163 arg1,
14164 ],
14165 ))))
14166 }
14167 DialectType::DuckDB => {
14168 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14169 let date = Self::ensure_cast_timestamp(arg1);
14170 Ok(Expression::Function(Box::new(Function::new(
14171 "DATE_TRUNC".to_string(),
14172 vec![Expression::string(&unit_str), date],
14173 ))))
14174 }
14175 DialectType::ClickHouse => {
14176 // ClickHouse: dateTrunc('UNIT', expr)
14177 Ok(Expression::Function(Box::new(Function::new(
14178 "dateTrunc".to_string(),
14179 vec![Expression::string(&unit_str), arg1],
14180 ))))
14181 }
14182 _ => {
14183 // Standard: DATE_TRUNC('UNIT', expr)
14184 let unit = Expression::string(&unit_str);
14185 Ok(Expression::Function(Box::new(Function::new(
14186 "DATE_TRUNC".to_string(),
14187 vec![unit, arg1],
14188 ))))
14189 }
14190 }
14191 }
14192 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14193 "GETDATE" if f.args.is_empty() => match target {
14194 DialectType::TSQL => Ok(Expression::Function(f)),
14195 DialectType::Redshift => Ok(Expression::Function(Box::new(
14196 Function::new("GETDATE".to_string(), vec![]),
14197 ))),
14198 _ => Ok(Expression::CurrentTimestamp(
14199 crate::expressions::CurrentTimestamp {
14200 precision: None,
14201 sysdate: false,
14202 },
14203 )),
14204 },
14205 // TO_HEX(x) / HEX(x) -> target-specific hex function
14206 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14207 let name = match target {
14208 DialectType::Presto | DialectType::Trino => "TO_HEX",
14209 DialectType::Spark
14210 | DialectType::Databricks
14211 | DialectType::Hive => "HEX",
14212 DialectType::DuckDB
14213 | DialectType::PostgreSQL
14214 | DialectType::Redshift => "TO_HEX",
14215 _ => &f.name,
14216 };
14217 Ok(Expression::Function(Box::new(Function::new(
14218 name.to_string(),
14219 f.args,
14220 ))))
14221 }
14222 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14223 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14224 match target {
14225 DialectType::BigQuery => {
14226 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14227 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14228 // because BigQuery MD5 returns BYTES, not hex string
14229 let arg = &f.args[0];
14230 let wrapped_arg = match arg {
14231 Expression::Function(inner_f)
14232 if inner_f.name.to_uppercase() == "MD5"
14233 || inner_f.name.to_uppercase() == "SHA1"
14234 || inner_f.name.to_uppercase() == "SHA256"
14235 || inner_f.name.to_uppercase() == "SHA512" =>
14236 {
14237 // Wrap hash function in TO_HEX for BigQuery
14238 Expression::Function(Box::new(Function::new(
14239 "TO_HEX".to_string(),
14240 vec![arg.clone()],
14241 )))
14242 }
14243 _ => f.args.into_iter().next().unwrap(),
14244 };
14245 Ok(Expression::Function(Box::new(Function::new(
14246 "FROM_HEX".to_string(),
14247 vec![wrapped_arg],
14248 ))))
14249 }
14250 _ => {
14251 let name = match target {
14252 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14253 DialectType::Spark
14254 | DialectType::Databricks
14255 | DialectType::Hive => "UNHEX",
14256 _ => &f.name,
14257 };
14258 Ok(Expression::Function(Box::new(Function::new(
14259 name.to_string(),
14260 f.args,
14261 ))))
14262 }
14263 }
14264 }
14265 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14266 "TO_UTF8" if f.args.len() == 1 => match target {
14267 DialectType::Spark | DialectType::Databricks => {
14268 let mut args = f.args;
14269 args.push(Expression::string("utf-8"));
14270 Ok(Expression::Function(Box::new(Function::new(
14271 "ENCODE".to_string(),
14272 args,
14273 ))))
14274 }
14275 _ => Ok(Expression::Function(f)),
14276 },
14277 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14278 "FROM_UTF8" if f.args.len() == 1 => match target {
14279 DialectType::Spark | DialectType::Databricks => {
14280 let mut args = f.args;
14281 args.push(Expression::string("utf-8"));
14282 Ok(Expression::Function(Box::new(Function::new(
14283 "DECODE".to_string(),
14284 args,
14285 ))))
14286 }
14287 _ => Ok(Expression::Function(f)),
14288 },
14289 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14290 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14291 let name = match target {
14292 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14293 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14294 DialectType::PostgreSQL | DialectType::Redshift => {
14295 "STARTS_WITH"
14296 }
14297 _ => &f.name,
14298 };
14299 Ok(Expression::Function(Box::new(Function::new(
14300 name.to_string(),
14301 f.args,
14302 ))))
14303 }
14304 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14305 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14306 let name = match target {
14307 DialectType::Presto
14308 | DialectType::Trino
14309 | DialectType::Athena => "APPROX_DISTINCT",
14310 _ => "APPROX_COUNT_DISTINCT",
14311 };
14312 Ok(Expression::Function(Box::new(Function::new(
14313 name.to_string(),
14314 f.args,
14315 ))))
14316 }
14317 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14318 "JSON_EXTRACT"
14319 if f.args.len() == 2
14320 && !matches!(source, DialectType::BigQuery)
14321 && matches!(
14322 target,
14323 DialectType::Spark
14324 | DialectType::Databricks
14325 | DialectType::Hive
14326 ) =>
14327 {
14328 Ok(Expression::Function(Box::new(Function::new(
14329 "GET_JSON_OBJECT".to_string(),
14330 f.args,
14331 ))))
14332 }
14333 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14334 "JSON_EXTRACT"
14335 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14336 {
14337 let mut args = f.args;
14338 let path = args.remove(1);
14339 let this = args.remove(0);
14340 Ok(Expression::JsonExtract(Box::new(
14341 crate::expressions::JsonExtractFunc {
14342 this,
14343 path,
14344 returning: None,
14345 arrow_syntax: true,
14346 hash_arrow_syntax: false,
14347 wrapper_option: None,
14348 quotes_option: None,
14349 on_scalar_string: false,
14350 on_error: None,
14351 },
14352 )))
14353 }
14354 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14355 "JSON_FORMAT" if f.args.len() == 1 => {
14356 match target {
14357 DialectType::Spark | DialectType::Databricks => {
14358 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14359 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14360 if matches!(
14361 source,
14362 DialectType::Presto
14363 | DialectType::Trino
14364 | DialectType::Athena
14365 ) {
14366 if let Some(Expression::ParseJson(pj)) = f.args.first()
14367 {
14368 if let Expression::Literal(Literal::String(s)) =
14369 &pj.this
14370 {
14371 let wrapped = Expression::Literal(
14372 Literal::String(format!("[{}]", s)),
14373 );
14374 let schema_of_json = Expression::Function(
14375 Box::new(Function::new(
14376 "SCHEMA_OF_JSON".to_string(),
14377 vec![wrapped.clone()],
14378 )),
14379 );
14380 let from_json = Expression::Function(Box::new(
14381 Function::new(
14382 "FROM_JSON".to_string(),
14383 vec![wrapped, schema_of_json],
14384 ),
14385 ));
14386 let to_json = Expression::Function(Box::new(
14387 Function::new(
14388 "TO_JSON".to_string(),
14389 vec![from_json],
14390 ),
14391 ));
14392 return Ok(Expression::Function(Box::new(
14393 Function::new(
14394 "REGEXP_EXTRACT".to_string(),
14395 vec![
14396 to_json,
14397 Expression::Literal(
14398 Literal::String(
14399 "^.(.*).$".to_string(),
14400 ),
14401 ),
14402 Expression::Literal(
14403 Literal::Number(
14404 "1".to_string(),
14405 ),
14406 ),
14407 ],
14408 ),
14409 )));
14410 }
14411 }
14412 }
14413
14414 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14415 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14416 let mut args = f.args;
14417 if let Some(Expression::Cast(ref c)) = args.first() {
14418 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14419 args = vec![c.this.clone()];
14420 }
14421 } else if let Some(Expression::Function(ref inner_f)) =
14422 args.first()
14423 {
14424 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14425 && inner_f.args.len() == 1
14426 {
14427 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14428 args = inner_f.args.clone();
14429 }
14430 }
14431 Ok(Expression::Function(Box::new(Function::new(
14432 "TO_JSON".to_string(),
14433 args,
14434 ))))
14435 }
14436 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14437 Function::new("TO_JSON_STRING".to_string(), f.args),
14438 ))),
14439 DialectType::DuckDB => {
14440 // CAST(TO_JSON(x) AS TEXT)
14441 let to_json = Expression::Function(Box::new(
14442 Function::new("TO_JSON".to_string(), f.args),
14443 ));
14444 Ok(Expression::Cast(Box::new(Cast {
14445 this: to_json,
14446 to: DataType::Text,
14447 trailing_comments: Vec::new(),
14448 double_colon_syntax: false,
14449 format: None,
14450 default: None,
14451 })))
14452 }
14453 _ => Ok(Expression::Function(f)),
14454 }
14455 }
14456 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14457 "SYSDATE" if f.args.is_empty() => {
14458 match target {
14459 DialectType::Oracle | DialectType::Redshift => {
14460 Ok(Expression::Function(f))
14461 }
14462 DialectType::Snowflake => {
14463 // Snowflake uses SYSDATE() with parens
14464 let mut f = *f;
14465 f.no_parens = false;
14466 Ok(Expression::Function(Box::new(f)))
14467 }
14468 DialectType::DuckDB => {
14469 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14470 Ok(Expression::AtTimeZone(Box::new(
14471 crate::expressions::AtTimeZone {
14472 this: Expression::CurrentTimestamp(
14473 crate::expressions::CurrentTimestamp {
14474 precision: None,
14475 sysdate: false,
14476 },
14477 ),
14478 zone: Expression::Literal(Literal::String(
14479 "UTC".to_string(),
14480 )),
14481 },
14482 )))
14483 }
14484 _ => Ok(Expression::CurrentTimestamp(
14485 crate::expressions::CurrentTimestamp {
14486 precision: None,
14487 sysdate: true,
14488 },
14489 )),
14490 }
14491 }
14492 // LOGICAL_OR(x) -> BOOL_OR(x)
14493 "LOGICAL_OR" if f.args.len() == 1 => {
14494 let name = match target {
14495 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14496 _ => &f.name,
14497 };
14498 Ok(Expression::Function(Box::new(Function::new(
14499 name.to_string(),
14500 f.args,
14501 ))))
14502 }
14503 // LOGICAL_AND(x) -> BOOL_AND(x)
14504 "LOGICAL_AND" if f.args.len() == 1 => {
14505 let name = match target {
14506 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14507 _ => &f.name,
14508 };
14509 Ok(Expression::Function(Box::new(Function::new(
14510 name.to_string(),
14511 f.args,
14512 ))))
14513 }
14514 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14515 "MONTHS_ADD" if f.args.len() == 2 => match target {
14516 DialectType::Oracle => Ok(Expression::Function(Box::new(
14517 Function::new("ADD_MONTHS".to_string(), f.args),
14518 ))),
14519 _ => Ok(Expression::Function(f)),
14520 },
14521 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14522 "ARRAY_JOIN" if f.args.len() >= 2 => {
14523 match target {
14524 DialectType::Spark | DialectType::Databricks => {
14525 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14526 Ok(Expression::Function(f))
14527 }
14528 DialectType::Hive => {
14529 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14530 let mut args = f.args;
14531 let arr = args.remove(0);
14532 let sep = args.remove(0);
14533 // Drop any remaining args (null_replacement)
14534 Ok(Expression::Function(Box::new(Function::new(
14535 "CONCAT_WS".to_string(),
14536 vec![sep, arr],
14537 ))))
14538 }
14539 DialectType::Presto | DialectType::Trino => {
14540 Ok(Expression::Function(f))
14541 }
14542 _ => Ok(Expression::Function(f)),
14543 }
14544 }
14545 // LOCATE(substr, str, pos) 3-arg -> target-specific
14546 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14547 "LOCATE"
14548 if f.args.len() == 3
14549 && matches!(
14550 target,
14551 DialectType::Presto
14552 | DialectType::Trino
14553 | DialectType::Athena
14554 | DialectType::DuckDB
14555 ) =>
14556 {
14557 let mut args = f.args;
14558 let substr = args.remove(0);
14559 let string = args.remove(0);
14560 let pos = args.remove(0);
14561 // STRPOS(SUBSTRING(string, pos), substr)
14562 let substring_call = Expression::Function(Box::new(Function::new(
14563 "SUBSTRING".to_string(),
14564 vec![string.clone(), pos.clone()],
14565 )));
14566 let strpos_call = Expression::Function(Box::new(Function::new(
14567 "STRPOS".to_string(),
14568 vec![substring_call, substr.clone()],
14569 )));
14570 // STRPOS(...) + pos - 1
14571 let pos_adjusted =
14572 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14573 Expression::Add(Box::new(
14574 crate::expressions::BinaryOp::new(
14575 strpos_call.clone(),
14576 pos.clone(),
14577 ),
14578 )),
14579 Expression::number(1),
14580 )));
14581 // STRPOS(...) = 0
14582 let is_zero =
14583 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14584 strpos_call.clone(),
14585 Expression::number(0),
14586 )));
14587
14588 match target {
14589 DialectType::Presto
14590 | DialectType::Trino
14591 | DialectType::Athena => {
14592 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14593 Ok(Expression::Function(Box::new(Function::new(
14594 "IF".to_string(),
14595 vec![is_zero, Expression::number(0), pos_adjusted],
14596 ))))
14597 }
14598 DialectType::DuckDB => {
14599 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14600 Ok(Expression::Case(Box::new(crate::expressions::Case {
14601 operand: None,
14602 whens: vec![(is_zero, Expression::number(0))],
14603 else_: Some(pos_adjusted),
14604 comments: Vec::new(),
14605 })))
14606 }
14607 _ => Ok(Expression::Function(Box::new(Function::new(
14608 "LOCATE".to_string(),
14609 vec![substr, string, pos],
14610 )))),
14611 }
14612 }
14613 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14614 "STRPOS"
14615 if f.args.len() == 3
14616 && matches!(
14617 target,
14618 DialectType::BigQuery
14619 | DialectType::Oracle
14620 | DialectType::Teradata
14621 ) =>
14622 {
14623 let mut args = f.args;
14624 let haystack = args.remove(0);
14625 let needle = args.remove(0);
14626 let occurrence = args.remove(0);
14627 Ok(Expression::Function(Box::new(Function::new(
14628 "INSTR".to_string(),
14629 vec![haystack, needle, Expression::number(1), occurrence],
14630 ))))
14631 }
14632 // SCHEMA_NAME(id) -> target-specific
14633 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14634 DialectType::MySQL | DialectType::SingleStore => {
14635 Ok(Expression::Function(Box::new(Function::new(
14636 "SCHEMA".to_string(),
14637 vec![],
14638 ))))
14639 }
14640 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14641 crate::expressions::CurrentSchema { this: None },
14642 ))),
14643 DialectType::SQLite => Ok(Expression::string("main")),
14644 _ => Ok(Expression::Function(f)),
14645 },
14646 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14647 "STRTOL" if f.args.len() == 2 => match target {
14648 DialectType::Presto | DialectType::Trino => {
14649 Ok(Expression::Function(Box::new(Function::new(
14650 "FROM_BASE".to_string(),
14651 f.args,
14652 ))))
14653 }
14654 _ => Ok(Expression::Function(f)),
14655 },
14656 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14657 "EDITDIST3" if f.args.len() == 2 => match target {
14658 DialectType::Spark | DialectType::Databricks => {
14659 Ok(Expression::Function(Box::new(Function::new(
14660 "LEVENSHTEIN".to_string(),
14661 f.args,
14662 ))))
14663 }
14664 _ => Ok(Expression::Function(f)),
14665 },
14666 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14667 "FORMAT"
14668 if f.args.len() == 2
14669 && matches!(
14670 source,
14671 DialectType::MySQL | DialectType::SingleStore
14672 )
14673 && matches!(target, DialectType::DuckDB) =>
14674 {
14675 let mut args = f.args;
14676 let num_expr = args.remove(0);
14677 let decimals_expr = args.remove(0);
14678 // Extract decimal count
14679 let dec_count = match &decimals_expr {
14680 Expression::Literal(Literal::Number(n)) => n.clone(),
14681 _ => "0".to_string(),
14682 };
14683 let fmt_str = format!("{{:,.{}f}}", dec_count);
14684 Ok(Expression::Function(Box::new(Function::new(
14685 "FORMAT".to_string(),
14686 vec![Expression::string(&fmt_str), num_expr],
14687 ))))
14688 }
14689 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14690 "FORMAT"
14691 if f.args.len() == 2
14692 && matches!(
14693 source,
14694 DialectType::TSQL | DialectType::Fabric
14695 ) =>
14696 {
14697 let val_expr = f.args[0].clone();
14698 let fmt_expr = f.args[1].clone();
14699 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14700 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14701 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14702 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14703 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14704 Expression::Literal(crate::expressions::Literal::String(s)) => {
14705 match s.as_str() {
14706 "m" | "M" => (Expression::string("MMMM d"), true),
14707 "t" => (Expression::string("h:mm tt"), true),
14708 "T" => (Expression::string("h:mm:ss tt"), true),
14709 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14710 _ => (fmt_expr.clone(), false),
14711 }
14712 }
14713 _ => (fmt_expr.clone(), false),
14714 };
14715 // Check if the format looks like a date format
14716 let is_date_format = is_shortcode
14717 || match &expanded_fmt {
14718 Expression::Literal(
14719 crate::expressions::Literal::String(s),
14720 ) => {
14721 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14722 s.contains("yyyy")
14723 || s.contains("YYYY")
14724 || s.contains("MM")
14725 || s.contains("dd")
14726 || s.contains("MMMM")
14727 || s.contains("HH")
14728 || s.contains("hh")
14729 || s.contains("ss")
14730 }
14731 _ => false,
14732 };
14733 match target {
14734 DialectType::Spark | DialectType::Databricks => {
14735 let func_name = if is_date_format {
14736 "DATE_FORMAT"
14737 } else {
14738 "FORMAT_NUMBER"
14739 };
14740 Ok(Expression::Function(Box::new(Function::new(
14741 func_name.to_string(),
14742 vec![val_expr, expanded_fmt],
14743 ))))
14744 }
14745 _ => {
14746 // For TSQL and other targets, expand shortcodes but keep FORMAT
14747 if is_shortcode {
14748 Ok(Expression::Function(Box::new(Function::new(
14749 "FORMAT".to_string(),
14750 vec![val_expr, expanded_fmt],
14751 ))))
14752 } else {
14753 Ok(Expression::Function(f))
14754 }
14755 }
14756 }
14757 }
14758 // FORMAT('%s', x) from Trino/Presto -> target-specific
14759 "FORMAT"
14760 if f.args.len() >= 2
14761 && matches!(
14762 source,
14763 DialectType::Trino
14764 | DialectType::Presto
14765 | DialectType::Athena
14766 ) =>
14767 {
14768 let fmt_expr = f.args[0].clone();
14769 let value_args: Vec<Expression> = f.args[1..].to_vec();
14770 match target {
14771 // DuckDB: replace %s with {} in format string
14772 DialectType::DuckDB => {
14773 let new_fmt = match &fmt_expr {
14774 Expression::Literal(Literal::String(s)) => {
14775 Expression::Literal(Literal::String(
14776 s.replace("%s", "{}"),
14777 ))
14778 }
14779 _ => fmt_expr,
14780 };
14781 let mut args = vec![new_fmt];
14782 args.extend(value_args);
14783 Ok(Expression::Function(Box::new(Function::new(
14784 "FORMAT".to_string(),
14785 args,
14786 ))))
14787 }
14788 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
14789 DialectType::Snowflake => match &fmt_expr {
14790 Expression::Literal(Literal::String(s))
14791 if s == "%s" && value_args.len() == 1 =>
14792 {
14793 Ok(Expression::Function(Box::new(Function::new(
14794 "TO_CHAR".to_string(),
14795 value_args,
14796 ))))
14797 }
14798 _ => Ok(Expression::Function(f)),
14799 },
14800 // Default: keep FORMAT as-is
14801 _ => Ok(Expression::Function(f)),
14802 }
14803 }
14804 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
14805 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
14806 if f.args.len() == 2 =>
14807 {
14808 match target {
14809 DialectType::PostgreSQL | DialectType::Redshift => {
14810 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
14811 let arr = f.args[0].clone();
14812 let needle = f.args[1].clone();
14813 // Convert [] to ARRAY[] for PostgreSQL
14814 let pg_arr = match arr {
14815 Expression::Array(a) => Expression::ArrayFunc(
14816 Box::new(crate::expressions::ArrayConstructor {
14817 expressions: a.expressions,
14818 bracket_notation: false,
14819 use_list_keyword: false,
14820 }),
14821 ),
14822 _ => arr,
14823 };
14824 // needle = ANY(arr) using the Any quantified expression
14825 let any_expr = Expression::Any(Box::new(
14826 crate::expressions::QuantifiedExpr {
14827 this: needle.clone(),
14828 subquery: pg_arr,
14829 op: Some(crate::expressions::QuantifiedOp::Eq),
14830 },
14831 ));
14832 let coalesce = Expression::Coalesce(Box::new(
14833 crate::expressions::VarArgFunc {
14834 expressions: vec![
14835 any_expr,
14836 Expression::Boolean(
14837 crate::expressions::BooleanLiteral {
14838 value: false,
14839 },
14840 ),
14841 ],
14842 original_name: None,
14843 },
14844 ));
14845 let is_null_check = Expression::IsNull(Box::new(
14846 crate::expressions::IsNull {
14847 this: needle,
14848 not: false,
14849 postfix_form: false,
14850 },
14851 ));
14852 Ok(Expression::Case(Box::new(Case {
14853 operand: None,
14854 whens: vec![(
14855 is_null_check,
14856 Expression::Null(crate::expressions::Null),
14857 )],
14858 else_: Some(coalesce),
14859 comments: Vec::new(),
14860 })))
14861 }
14862 _ => Ok(Expression::Function(Box::new(Function::new(
14863 "ARRAY_CONTAINS".to_string(),
14864 f.args,
14865 )))),
14866 }
14867 }
14868 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
14869 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
14870 match target {
14871 DialectType::PostgreSQL | DialectType::Redshift => {
14872 // arr1 && arr2 with ARRAY[] syntax
14873 let mut args = f.args;
14874 let arr1 = args.remove(0);
14875 let arr2 = args.remove(0);
14876 let pg_arr1 = match arr1 {
14877 Expression::Array(a) => Expression::ArrayFunc(
14878 Box::new(crate::expressions::ArrayConstructor {
14879 expressions: a.expressions,
14880 bracket_notation: false,
14881 use_list_keyword: false,
14882 }),
14883 ),
14884 _ => arr1,
14885 };
14886 let pg_arr2 = match arr2 {
14887 Expression::Array(a) => Expression::ArrayFunc(
14888 Box::new(crate::expressions::ArrayConstructor {
14889 expressions: a.expressions,
14890 bracket_notation: false,
14891 use_list_keyword: false,
14892 }),
14893 ),
14894 _ => arr2,
14895 };
14896 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14897 pg_arr1, pg_arr2,
14898 ))))
14899 }
14900 DialectType::DuckDB => {
14901 // DuckDB: arr1 && arr2 (native support)
14902 let mut args = f.args;
14903 let arr1 = args.remove(0);
14904 let arr2 = args.remove(0);
14905 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
14906 arr1, arr2,
14907 ))))
14908 }
14909 _ => Ok(Expression::Function(Box::new(Function::new(
14910 "LIST_HAS_ANY".to_string(),
14911 f.args,
14912 )))),
14913 }
14914 }
14915 // APPROX_QUANTILE(x, q) -> target-specific
14916 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
14917 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14918 Function::new("APPROX_PERCENTILE".to_string(), f.args),
14919 ))),
14920 DialectType::DuckDB => Ok(Expression::Function(f)),
14921 _ => Ok(Expression::Function(f)),
14922 },
14923 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
14924 "MAKE_DATE" if f.args.len() == 3 => match target {
14925 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14926 Function::new("DATE".to_string(), f.args),
14927 ))),
14928 _ => Ok(Expression::Function(f)),
14929 },
14930 // RANGE(start, end[, step]) -> target-specific
14931 "RANGE"
14932 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
14933 {
14934 let start = f.args[0].clone();
14935 let end = f.args[1].clone();
14936 let step = f.args.get(2).cloned();
14937 match target {
14938 DialectType::Spark | DialectType::Databricks => {
14939 // RANGE(start, end) -> SEQUENCE(start, end-1)
14940 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
14941 // RANGE(start, start) -> ARRAY() (empty)
14942 // RANGE(start, end, 0) -> ARRAY() (empty)
14943 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
14944
14945 // Check for constant args
14946 fn extract_i64(e: &Expression) -> Option<i64> {
14947 match e {
14948 Expression::Literal(Literal::Number(n)) => {
14949 n.parse::<i64>().ok()
14950 }
14951 Expression::Neg(u) => {
14952 if let Expression::Literal(Literal::Number(n)) =
14953 &u.this
14954 {
14955 n.parse::<i64>().ok().map(|v| -v)
14956 } else {
14957 None
14958 }
14959 }
14960 _ => None,
14961 }
14962 }
14963 let start_val = extract_i64(&start);
14964 let end_val = extract_i64(&end);
14965 let step_val = step.as_ref().and_then(|s| extract_i64(s));
14966
14967 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
14968 if step_val == Some(0) {
14969 return Ok(Expression::Function(Box::new(
14970 Function::new("ARRAY".to_string(), vec![]),
14971 )));
14972 }
14973 if let (Some(s), Some(e_val)) = (start_val, end_val) {
14974 if s == e_val {
14975 return Ok(Expression::Function(Box::new(
14976 Function::new("ARRAY".to_string(), vec![]),
14977 )));
14978 }
14979 }
14980
14981 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
14982 // All constants - compute new end = end - step (if step provided) or end - 1
14983 match step_val {
14984 Some(st) if st < 0 => {
14985 // Negative step: SEQUENCE(start, end - step, step)
14986 let new_end = e_val - st; // end - step (= end + |step|)
14987 let mut args =
14988 vec![start, Expression::number(new_end)];
14989 if let Some(s) = step {
14990 args.push(s);
14991 }
14992 Ok(Expression::Function(Box::new(
14993 Function::new("SEQUENCE".to_string(), args),
14994 )))
14995 }
14996 Some(st) => {
14997 let new_end = e_val - st;
14998 let mut args =
14999 vec![start, Expression::number(new_end)];
15000 if let Some(s) = step {
15001 args.push(s);
15002 }
15003 Ok(Expression::Function(Box::new(
15004 Function::new("SEQUENCE".to_string(), args),
15005 )))
15006 }
15007 None => {
15008 // No step: SEQUENCE(start, end - 1)
15009 let new_end = e_val - 1;
15010 Ok(Expression::Function(Box::new(
15011 Function::new(
15012 "SEQUENCE".to_string(),
15013 vec![
15014 start,
15015 Expression::number(new_end),
15016 ],
15017 ),
15018 )))
15019 }
15020 }
15021 } else {
15022 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15023 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15024 end.clone(),
15025 Expression::number(1),
15026 )));
15027 let cond = Expression::Lte(Box::new(BinaryOp::new(
15028 Expression::Paren(Box::new(Paren {
15029 this: end_m1.clone(),
15030 trailing_comments: Vec::new(),
15031 })),
15032 start.clone(),
15033 )));
15034 let empty = Expression::Function(Box::new(
15035 Function::new("ARRAY".to_string(), vec![]),
15036 ));
15037 let mut seq_args = vec![
15038 start,
15039 Expression::Paren(Box::new(Paren {
15040 this: end_m1,
15041 trailing_comments: Vec::new(),
15042 })),
15043 ];
15044 if let Some(s) = step {
15045 seq_args.push(s);
15046 }
15047 let seq = Expression::Function(Box::new(
15048 Function::new("SEQUENCE".to_string(), seq_args),
15049 ));
15050 Ok(Expression::IfFunc(Box::new(
15051 crate::expressions::IfFunc {
15052 condition: cond,
15053 true_value: empty,
15054 false_value: Some(seq),
15055 original_name: None,
15056 },
15057 )))
15058 }
15059 }
15060 DialectType::SQLite => {
15061 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15062 // The subquery wrapping is handled at the Alias level
15063 let mut args = vec![start, end];
15064 if let Some(s) = step {
15065 args.push(s);
15066 }
15067 Ok(Expression::Function(Box::new(Function::new(
15068 "GENERATE_SERIES".to_string(),
15069 args,
15070 ))))
15071 }
15072 _ => Ok(Expression::Function(f)),
15073 }
15074 }
15075 // ARRAY_REVERSE_SORT -> target-specific
15076 // (handled above as well, but also need DuckDB self-normalization)
15077 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15078 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15079 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15080 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15081 ))),
15082 DialectType::Spark | DialectType::Databricks => {
15083 Ok(Expression::Function(Box::new(Function::new(
15084 "MAP_FROM_ARRAYS".to_string(),
15085 f.args,
15086 ))))
15087 }
15088 _ => Ok(Expression::Function(Box::new(Function::new(
15089 "MAP".to_string(),
15090 f.args,
15091 )))),
15092 },
15093 // VARIANCE(x) -> varSamp(x) for ClickHouse
15094 "VARIANCE" if f.args.len() == 1 => match target {
15095 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15096 Function::new("varSamp".to_string(), f.args),
15097 ))),
15098 _ => Ok(Expression::Function(f)),
15099 },
15100 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15101 "STDDEV" if f.args.len() == 1 => match target {
15102 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15103 Function::new("stddevSamp".to_string(), f.args),
15104 ))),
15105 _ => Ok(Expression::Function(f)),
15106 },
15107 // ISINF(x) -> IS_INF(x) for BigQuery
15108 "ISINF" if f.args.len() == 1 => match target {
15109 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15110 Function::new("IS_INF".to_string(), f.args),
15111 ))),
15112 _ => Ok(Expression::Function(f)),
15113 },
15114 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15115 "CONTAINS" if f.args.len() == 2 => match target {
15116 DialectType::Spark
15117 | DialectType::Databricks
15118 | DialectType::Hive => Ok(Expression::Function(Box::new(
15119 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15120 ))),
15121 _ => Ok(Expression::Function(f)),
15122 },
15123 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15124 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15125 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15126 Ok(Expression::Function(Box::new(Function::new(
15127 "CONTAINS".to_string(),
15128 f.args,
15129 ))))
15130 }
15131 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15132 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15133 ))),
15134 _ => Ok(Expression::Function(f)),
15135 },
15136 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15137 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15138 DialectType::Hive
15139 | DialectType::Spark
15140 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15141 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15142 ))),
15143 _ => Ok(Expression::Function(f)),
15144 },
15145 // FROM_UNIXTIME(x) -> target-specific
15146 "FROM_UNIXTIME" if f.args.len() == 1 => {
15147 match target {
15148 DialectType::Hive
15149 | DialectType::Spark
15150 | DialectType::Databricks
15151 | DialectType::Presto
15152 | DialectType::Trino => Ok(Expression::Function(f)),
15153 DialectType::DuckDB => {
15154 // DuckDB: TO_TIMESTAMP(x)
15155 let arg = f.args.into_iter().next().unwrap();
15156 Ok(Expression::Function(Box::new(Function::new(
15157 "TO_TIMESTAMP".to_string(),
15158 vec![arg],
15159 ))))
15160 }
15161 DialectType::PostgreSQL => {
15162 // PG: TO_TIMESTAMP(col)
15163 let arg = f.args.into_iter().next().unwrap();
15164 Ok(Expression::Function(Box::new(Function::new(
15165 "TO_TIMESTAMP".to_string(),
15166 vec![arg],
15167 ))))
15168 }
15169 DialectType::Redshift => {
15170 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15171 let arg = f.args.into_iter().next().unwrap();
15172 let epoch_ts = Expression::Literal(Literal::Timestamp(
15173 "epoch".to_string(),
15174 ));
15175 let interval = Expression::Interval(Box::new(
15176 crate::expressions::Interval {
15177 this: Some(Expression::string("1 SECOND")),
15178 unit: None,
15179 },
15180 ));
15181 let mul =
15182 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15183 let add =
15184 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15185 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15186 this: add,
15187 trailing_comments: Vec::new(),
15188 })))
15189 }
15190 _ => Ok(Expression::Function(f)),
15191 }
15192 }
15193 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15194 "FROM_UNIXTIME"
15195 if f.args.len() == 2
15196 && matches!(
15197 source,
15198 DialectType::Hive
15199 | DialectType::Spark
15200 | DialectType::Databricks
15201 ) =>
15202 {
15203 let mut args = f.args;
15204 let unix_ts = args.remove(0);
15205 let fmt_expr = args.remove(0);
15206 match target {
15207 DialectType::DuckDB => {
15208 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15209 let to_ts = Expression::Function(Box::new(Function::new(
15210 "TO_TIMESTAMP".to_string(),
15211 vec![unix_ts],
15212 )));
15213 if let Expression::Literal(
15214 crate::expressions::Literal::String(s),
15215 ) = &fmt_expr
15216 {
15217 let c_fmt = Self::hive_format_to_c_format(s);
15218 Ok(Expression::Function(Box::new(Function::new(
15219 "STRFTIME".to_string(),
15220 vec![to_ts, Expression::string(&c_fmt)],
15221 ))))
15222 } else {
15223 Ok(Expression::Function(Box::new(Function::new(
15224 "STRFTIME".to_string(),
15225 vec![to_ts, fmt_expr],
15226 ))))
15227 }
15228 }
15229 DialectType::Presto
15230 | DialectType::Trino
15231 | DialectType::Athena => {
15232 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15233 let from_unix =
15234 Expression::Function(Box::new(Function::new(
15235 "FROM_UNIXTIME".to_string(),
15236 vec![unix_ts],
15237 )));
15238 if let Expression::Literal(
15239 crate::expressions::Literal::String(s),
15240 ) = &fmt_expr
15241 {
15242 let p_fmt = Self::hive_format_to_presto_format(s);
15243 Ok(Expression::Function(Box::new(Function::new(
15244 "DATE_FORMAT".to_string(),
15245 vec![from_unix, Expression::string(&p_fmt)],
15246 ))))
15247 } else {
15248 Ok(Expression::Function(Box::new(Function::new(
15249 "DATE_FORMAT".to_string(),
15250 vec![from_unix, fmt_expr],
15251 ))))
15252 }
15253 }
15254 _ => {
15255 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15256 Ok(Expression::Function(Box::new(Function::new(
15257 "FROM_UNIXTIME".to_string(),
15258 vec![unix_ts, fmt_expr],
15259 ))))
15260 }
15261 }
15262 }
15263 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15264 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15265 let unit_str = Self::get_unit_str_static(&f.args[0]);
15266 // Get the raw unit text preserving original case
15267 let raw_unit = match &f.args[0] {
15268 Expression::Identifier(id) => id.name.clone(),
15269 Expression::Literal(crate::expressions::Literal::String(s)) => {
15270 s.clone()
15271 }
15272 Expression::Column(col) => col.name.name.clone(),
15273 _ => unit_str.clone(),
15274 };
15275 match target {
15276 DialectType::TSQL | DialectType::Fabric => {
15277 // Preserve original case of unit for TSQL
15278 let unit_name = match unit_str.as_str() {
15279 "YY" | "YYYY" => "YEAR".to_string(),
15280 "QQ" | "Q" => "QUARTER".to_string(),
15281 "MM" | "M" => "MONTH".to_string(),
15282 "WK" | "WW" => "WEEK".to_string(),
15283 "DD" | "D" | "DY" => "DAY".to_string(),
15284 "HH" => "HOUR".to_string(),
15285 "MI" | "N" => "MINUTE".to_string(),
15286 "SS" | "S" => "SECOND".to_string(),
15287 _ => raw_unit.clone(), // preserve original case
15288 };
15289 let mut args = f.args;
15290 args[0] =
15291 Expression::Identifier(Identifier::new(&unit_name));
15292 Ok(Expression::Function(Box::new(Function::new(
15293 "DATEPART".to_string(),
15294 args,
15295 ))))
15296 }
15297 DialectType::Spark | DialectType::Databricks => {
15298 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15299 // Preserve original case for non-abbreviation units
15300 let unit = match unit_str.as_str() {
15301 "YY" | "YYYY" => "YEAR".to_string(),
15302 "QQ" | "Q" => "QUARTER".to_string(),
15303 "MM" | "M" => "MONTH".to_string(),
15304 "WK" | "WW" => "WEEK".to_string(),
15305 "DD" | "D" | "DY" => "DAY".to_string(),
15306 "HH" => "HOUR".to_string(),
15307 "MI" | "N" => "MINUTE".to_string(),
15308 "SS" | "S" => "SECOND".to_string(),
15309 _ => raw_unit, // preserve original case
15310 };
15311 Ok(Expression::Extract(Box::new(
15312 crate::expressions::ExtractFunc {
15313 this: f.args[1].clone(),
15314 field: crate::expressions::DateTimeField::Custom(
15315 unit,
15316 ),
15317 },
15318 )))
15319 }
15320 _ => Ok(Expression::Function(Box::new(Function::new(
15321 "DATE_PART".to_string(),
15322 f.args,
15323 )))),
15324 }
15325 }
15326 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15327 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15328 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15329 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15330 "DATENAME" if f.args.len() == 2 => {
15331 let unit_str = Self::get_unit_str_static(&f.args[0]);
15332 let date_expr = f.args[1].clone();
15333 match unit_str.as_str() {
15334 "MM" | "M" | "MONTH" => match target {
15335 DialectType::TSQL => {
15336 let cast_date = Expression::Cast(Box::new(
15337 crate::expressions::Cast {
15338 this: date_expr,
15339 to: DataType::Custom {
15340 name: "DATETIME2".to_string(),
15341 },
15342 trailing_comments: Vec::new(),
15343 double_colon_syntax: false,
15344 format: None,
15345 default: None,
15346 },
15347 ));
15348 Ok(Expression::Function(Box::new(Function::new(
15349 "FORMAT".to_string(),
15350 vec![cast_date, Expression::string("MMMM")],
15351 ))))
15352 }
15353 DialectType::Spark | DialectType::Databricks => {
15354 let cast_date = Expression::Cast(Box::new(
15355 crate::expressions::Cast {
15356 this: date_expr,
15357 to: DataType::Timestamp {
15358 timezone: false,
15359 precision: None,
15360 },
15361 trailing_comments: Vec::new(),
15362 double_colon_syntax: false,
15363 format: None,
15364 default: None,
15365 },
15366 ));
15367 Ok(Expression::Function(Box::new(Function::new(
15368 "DATE_FORMAT".to_string(),
15369 vec![cast_date, Expression::string("MMMM")],
15370 ))))
15371 }
15372 _ => Ok(Expression::Function(f)),
15373 },
15374 "DW" | "WEEKDAY" => match target {
15375 DialectType::TSQL => {
15376 let cast_date = Expression::Cast(Box::new(
15377 crate::expressions::Cast {
15378 this: date_expr,
15379 to: DataType::Custom {
15380 name: "DATETIME2".to_string(),
15381 },
15382 trailing_comments: Vec::new(),
15383 double_colon_syntax: false,
15384 format: None,
15385 default: None,
15386 },
15387 ));
15388 Ok(Expression::Function(Box::new(Function::new(
15389 "FORMAT".to_string(),
15390 vec![cast_date, Expression::string("dddd")],
15391 ))))
15392 }
15393 DialectType::Spark | DialectType::Databricks => {
15394 let cast_date = Expression::Cast(Box::new(
15395 crate::expressions::Cast {
15396 this: date_expr,
15397 to: DataType::Timestamp {
15398 timezone: false,
15399 precision: None,
15400 },
15401 trailing_comments: Vec::new(),
15402 double_colon_syntax: false,
15403 format: None,
15404 default: None,
15405 },
15406 ));
15407 Ok(Expression::Function(Box::new(Function::new(
15408 "DATE_FORMAT".to_string(),
15409 vec![cast_date, Expression::string("EEEE")],
15410 ))))
15411 }
15412 _ => Ok(Expression::Function(f)),
15413 },
15414 _ => Ok(Expression::Function(f)),
15415 }
15416 }
15417 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15418 "STRING_AGG" if f.args.len() >= 2 => {
15419 let x = f.args[0].clone();
15420 let sep = f.args[1].clone();
15421 match target {
15422 DialectType::MySQL
15423 | DialectType::SingleStore
15424 | DialectType::Doris
15425 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15426 Box::new(crate::expressions::GroupConcatFunc {
15427 this: x,
15428 separator: Some(sep),
15429 order_by: None,
15430 distinct: false,
15431 filter: None,
15432 }),
15433 )),
15434 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15435 crate::expressions::GroupConcatFunc {
15436 this: x,
15437 separator: Some(sep),
15438 order_by: None,
15439 distinct: false,
15440 filter: None,
15441 },
15442 ))),
15443 DialectType::PostgreSQL | DialectType::Redshift => {
15444 Ok(Expression::StringAgg(Box::new(
15445 crate::expressions::StringAggFunc {
15446 this: x,
15447 separator: Some(sep),
15448 order_by: None,
15449 distinct: false,
15450 filter: None,
15451 limit: None,
15452 },
15453 )))
15454 }
15455 _ => Ok(Expression::Function(f)),
15456 }
15457 }
15458 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15459 "JSON_ARRAYAGG" => match target {
15460 DialectType::PostgreSQL => {
15461 Ok(Expression::Function(Box::new(Function {
15462 name: "JSON_AGG".to_string(),
15463 ..(*f)
15464 })))
15465 }
15466 _ => Ok(Expression::Function(f)),
15467 },
15468 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15469 "SCHEMA_NAME" => match target {
15470 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15471 crate::expressions::CurrentSchema { this: None },
15472 ))),
15473 DialectType::SQLite => Ok(Expression::string("main")),
15474 _ => Ok(Expression::Function(f)),
15475 },
15476 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15477 "TO_TIMESTAMP"
15478 if f.args.len() == 2
15479 && matches!(
15480 source,
15481 DialectType::Spark
15482 | DialectType::Databricks
15483 | DialectType::Hive
15484 )
15485 && matches!(target, DialectType::DuckDB) =>
15486 {
15487 let mut args = f.args;
15488 let val = args.remove(0);
15489 let fmt_expr = args.remove(0);
15490 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15491 // Convert Java/Spark format to C strptime format
15492 fn java_to_c_fmt(fmt: &str) -> String {
15493 let result = fmt
15494 .replace("yyyy", "%Y")
15495 .replace("SSSSSS", "%f")
15496 .replace("EEEE", "%W")
15497 .replace("MM", "%m")
15498 .replace("dd", "%d")
15499 .replace("HH", "%H")
15500 .replace("mm", "%M")
15501 .replace("ss", "%S")
15502 .replace("yy", "%y");
15503 let mut out = String::new();
15504 let chars: Vec<char> = result.chars().collect();
15505 let mut i = 0;
15506 while i < chars.len() {
15507 if chars[i] == '%' && i + 1 < chars.len() {
15508 out.push(chars[i]);
15509 out.push(chars[i + 1]);
15510 i += 2;
15511 } else if chars[i] == 'z' {
15512 out.push_str("%Z");
15513 i += 1;
15514 } else if chars[i] == 'Z' {
15515 out.push_str("%z");
15516 i += 1;
15517 } else {
15518 out.push(chars[i]);
15519 i += 1;
15520 }
15521 }
15522 out
15523 }
15524 let c_fmt = java_to_c_fmt(s);
15525 Ok(Expression::Function(Box::new(Function::new(
15526 "STRPTIME".to_string(),
15527 vec![val, Expression::string(&c_fmt)],
15528 ))))
15529 } else {
15530 Ok(Expression::Function(Box::new(Function::new(
15531 "STRPTIME".to_string(),
15532 vec![val, fmt_expr],
15533 ))))
15534 }
15535 }
15536 // TO_DATE(x) 1-arg from Doris: date conversion
15537 "TO_DATE"
15538 if f.args.len() == 1
15539 && matches!(
15540 source,
15541 DialectType::Doris | DialectType::StarRocks
15542 ) =>
15543 {
15544 let arg = f.args.into_iter().next().unwrap();
15545 match target {
15546 DialectType::Oracle
15547 | DialectType::DuckDB
15548 | DialectType::TSQL => {
15549 // CAST(x AS DATE)
15550 Ok(Expression::Cast(Box::new(Cast {
15551 this: arg,
15552 to: DataType::Date,
15553 double_colon_syntax: false,
15554 trailing_comments: vec![],
15555 format: None,
15556 default: None,
15557 })))
15558 }
15559 DialectType::MySQL | DialectType::SingleStore => {
15560 // DATE(x)
15561 Ok(Expression::Function(Box::new(Function::new(
15562 "DATE".to_string(),
15563 vec![arg],
15564 ))))
15565 }
15566 _ => {
15567 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15568 Ok(Expression::Function(Box::new(Function::new(
15569 "TO_DATE".to_string(),
15570 vec![arg],
15571 ))))
15572 }
15573 }
15574 }
15575 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15576 "TO_DATE"
15577 if f.args.len() == 1
15578 && matches!(
15579 source,
15580 DialectType::Spark
15581 | DialectType::Databricks
15582 | DialectType::Hive
15583 ) =>
15584 {
15585 let arg = f.args.into_iter().next().unwrap();
15586 match target {
15587 DialectType::DuckDB => {
15588 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15589 Ok(Expression::TryCast(Box::new(Cast {
15590 this: arg,
15591 to: DataType::Date,
15592 double_colon_syntax: false,
15593 trailing_comments: vec![],
15594 format: None,
15595 default: None,
15596 })))
15597 }
15598 DialectType::Presto
15599 | DialectType::Trino
15600 | DialectType::Athena => {
15601 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15602 Ok(Self::double_cast_timestamp_date(arg))
15603 }
15604 DialectType::Snowflake => {
15605 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15606 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15607 Ok(Expression::Function(Box::new(Function::new(
15608 "TRY_TO_DATE".to_string(),
15609 vec![arg, Expression::string("yyyy-mm-DD")],
15610 ))))
15611 }
15612 _ => {
15613 // Default: keep as TO_DATE(x)
15614 Ok(Expression::Function(Box::new(Function::new(
15615 "TO_DATE".to_string(),
15616 vec![arg],
15617 ))))
15618 }
15619 }
15620 }
15621 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15622 "TO_DATE"
15623 if f.args.len() == 2
15624 && matches!(
15625 source,
15626 DialectType::Spark
15627 | DialectType::Databricks
15628 | DialectType::Hive
15629 ) =>
15630 {
15631 let mut args = f.args;
15632 let val = args.remove(0);
15633 let fmt_expr = args.remove(0);
15634 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15635
15636 if is_default_format {
15637 // Default format: same as 1-arg form
15638 match target {
15639 DialectType::DuckDB => {
15640 Ok(Expression::TryCast(Box::new(Cast {
15641 this: val,
15642 to: DataType::Date,
15643 double_colon_syntax: false,
15644 trailing_comments: vec![],
15645 format: None,
15646 default: None,
15647 })))
15648 }
15649 DialectType::Presto
15650 | DialectType::Trino
15651 | DialectType::Athena => {
15652 Ok(Self::double_cast_timestamp_date(val))
15653 }
15654 DialectType::Snowflake => {
15655 // TRY_TO_DATE(x, format) with Snowflake format mapping
15656 let sf_fmt = "yyyy-MM-dd"
15657 .replace("yyyy", "yyyy")
15658 .replace("MM", "mm")
15659 .replace("dd", "DD");
15660 Ok(Expression::Function(Box::new(Function::new(
15661 "TRY_TO_DATE".to_string(),
15662 vec![val, Expression::string(&sf_fmt)],
15663 ))))
15664 }
15665 _ => Ok(Expression::Function(Box::new(Function::new(
15666 "TO_DATE".to_string(),
15667 vec![val],
15668 )))),
15669 }
15670 } else {
15671 // Non-default format: use format-based parsing
15672 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15673 match target {
15674 DialectType::DuckDB => {
15675 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15676 fn java_to_c_fmt_todate(fmt: &str) -> String {
15677 let result = fmt
15678 .replace("yyyy", "%Y")
15679 .replace("SSSSSS", "%f")
15680 .replace("EEEE", "%W")
15681 .replace("MM", "%m")
15682 .replace("dd", "%d")
15683 .replace("HH", "%H")
15684 .replace("mm", "%M")
15685 .replace("ss", "%S")
15686 .replace("yy", "%y");
15687 let mut out = String::new();
15688 let chars: Vec<char> = result.chars().collect();
15689 let mut i = 0;
15690 while i < chars.len() {
15691 if chars[i] == '%' && i + 1 < chars.len() {
15692 out.push(chars[i]);
15693 out.push(chars[i + 1]);
15694 i += 2;
15695 } else if chars[i] == 'z' {
15696 out.push_str("%Z");
15697 i += 1;
15698 } else if chars[i] == 'Z' {
15699 out.push_str("%z");
15700 i += 1;
15701 } else {
15702 out.push(chars[i]);
15703 i += 1;
15704 }
15705 }
15706 out
15707 }
15708 let c_fmt = java_to_c_fmt_todate(s);
15709 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15710 let try_strptime =
15711 Expression::Function(Box::new(Function::new(
15712 "TRY_STRPTIME".to_string(),
15713 vec![val, Expression::string(&c_fmt)],
15714 )));
15715 let cast_ts = Expression::Cast(Box::new(Cast {
15716 this: try_strptime,
15717 to: DataType::Timestamp {
15718 precision: None,
15719 timezone: false,
15720 },
15721 double_colon_syntax: false,
15722 trailing_comments: vec![],
15723 format: None,
15724 default: None,
15725 }));
15726 Ok(Expression::Cast(Box::new(Cast {
15727 this: cast_ts,
15728 to: DataType::Date,
15729 double_colon_syntax: false,
15730 trailing_comments: vec![],
15731 format: None,
15732 default: None,
15733 })))
15734 }
15735 DialectType::Presto
15736 | DialectType::Trino
15737 | DialectType::Athena => {
15738 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
15739 let p_fmt = s
15740 .replace("yyyy", "%Y")
15741 .replace("SSSSSS", "%f")
15742 .replace("MM", "%m")
15743 .replace("dd", "%d")
15744 .replace("HH", "%H")
15745 .replace("mm", "%M")
15746 .replace("ss", "%S")
15747 .replace("yy", "%y");
15748 let date_parse =
15749 Expression::Function(Box::new(Function::new(
15750 "DATE_PARSE".to_string(),
15751 vec![val, Expression::string(&p_fmt)],
15752 )));
15753 Ok(Expression::Cast(Box::new(Cast {
15754 this: date_parse,
15755 to: DataType::Date,
15756 double_colon_syntax: false,
15757 trailing_comments: vec![],
15758 format: None,
15759 default: None,
15760 })))
15761 }
15762 DialectType::Snowflake => {
15763 // TRY_TO_DATE(x, snowflake_fmt)
15764 Ok(Expression::Function(Box::new(Function::new(
15765 "TRY_TO_DATE".to_string(),
15766 vec![val, Expression::string(s)],
15767 ))))
15768 }
15769 _ => Ok(Expression::Function(Box::new(Function::new(
15770 "TO_DATE".to_string(),
15771 vec![val, fmt_expr],
15772 )))),
15773 }
15774 } else {
15775 Ok(Expression::Function(Box::new(Function::new(
15776 "TO_DATE".to_string(),
15777 vec![val, fmt_expr],
15778 ))))
15779 }
15780 }
15781 }
15782 // TO_TIMESTAMP(x) 1-arg: epoch conversion
15783 "TO_TIMESTAMP"
15784 if f.args.len() == 1
15785 && matches!(source, DialectType::DuckDB)
15786 && matches!(
15787 target,
15788 DialectType::BigQuery
15789 | DialectType::Presto
15790 | DialectType::Trino
15791 | DialectType::Hive
15792 | DialectType::Spark
15793 | DialectType::Databricks
15794 | DialectType::Athena
15795 ) =>
15796 {
15797 let arg = f.args.into_iter().next().unwrap();
15798 let func_name = match target {
15799 DialectType::BigQuery => "TIMESTAMP_SECONDS",
15800 DialectType::Presto
15801 | DialectType::Trino
15802 | DialectType::Athena
15803 | DialectType::Hive
15804 | DialectType::Spark
15805 | DialectType::Databricks => "FROM_UNIXTIME",
15806 _ => "TO_TIMESTAMP",
15807 };
15808 Ok(Expression::Function(Box::new(Function::new(
15809 func_name.to_string(),
15810 vec![arg],
15811 ))))
15812 }
15813 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
15814 "CONCAT" if f.args.len() == 1 => {
15815 let arg = f.args.into_iter().next().unwrap();
15816 match target {
15817 DialectType::Presto
15818 | DialectType::Trino
15819 | DialectType::Athena => {
15820 // CONCAT(a) -> CAST(a AS VARCHAR)
15821 Ok(Expression::Cast(Box::new(Cast {
15822 this: arg,
15823 to: DataType::VarChar {
15824 length: None,
15825 parenthesized_length: false,
15826 },
15827 trailing_comments: vec![],
15828 double_colon_syntax: false,
15829 format: None,
15830 default: None,
15831 })))
15832 }
15833 DialectType::TSQL => {
15834 // CONCAT(a) -> a
15835 Ok(arg)
15836 }
15837 DialectType::DuckDB => {
15838 // Keep CONCAT(a) for DuckDB (native support)
15839 Ok(Expression::Function(Box::new(Function::new(
15840 "CONCAT".to_string(),
15841 vec![arg],
15842 ))))
15843 }
15844 DialectType::Spark | DialectType::Databricks => {
15845 let coalesced = Expression::Coalesce(Box::new(
15846 crate::expressions::VarArgFunc {
15847 expressions: vec![arg, Expression::string("")],
15848 original_name: None,
15849 },
15850 ));
15851 Ok(Expression::Function(Box::new(Function::new(
15852 "CONCAT".to_string(),
15853 vec![coalesced],
15854 ))))
15855 }
15856 _ => Ok(Expression::Function(Box::new(Function::new(
15857 "CONCAT".to_string(),
15858 vec![arg],
15859 )))),
15860 }
15861 }
15862 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
15863 "REGEXP_EXTRACT"
15864 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
15865 {
15866 // If group_index is 0, drop it
15867 let drop_group = match &f.args[2] {
15868 Expression::Literal(Literal::Number(n)) => n == "0",
15869 _ => false,
15870 };
15871 if drop_group {
15872 let mut args = f.args;
15873 args.truncate(2);
15874 Ok(Expression::Function(Box::new(Function::new(
15875 "REGEXP_EXTRACT".to_string(),
15876 args,
15877 ))))
15878 } else {
15879 Ok(Expression::Function(f))
15880 }
15881 }
15882 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
15883 "REGEXP_EXTRACT"
15884 if f.args.len() == 4
15885 && matches!(target, DialectType::Snowflake) =>
15886 {
15887 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
15888 let mut args = f.args;
15889 let this = args.remove(0);
15890 let pattern = args.remove(0);
15891 let group = args.remove(0);
15892 let flags = args.remove(0);
15893 Ok(Expression::Function(Box::new(Function::new(
15894 "REGEXP_SUBSTR".to_string(),
15895 vec![
15896 this,
15897 pattern,
15898 Expression::number(1),
15899 Expression::number(1),
15900 flags,
15901 group,
15902 ],
15903 ))))
15904 }
15905 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
15906 "REGEXP_SUBSTR"
15907 if f.args.len() == 3
15908 && matches!(
15909 target,
15910 DialectType::DuckDB
15911 | DialectType::Presto
15912 | DialectType::Trino
15913 | DialectType::Spark
15914 | DialectType::Databricks
15915 ) =>
15916 {
15917 let mut args = f.args;
15918 let this = args.remove(0);
15919 let pattern = args.remove(0);
15920 let position = args.remove(0);
15921 // Wrap subject in SUBSTRING(this, position) to apply the offset
15922 let substring_expr = Expression::Function(Box::new(Function::new(
15923 "SUBSTRING".to_string(),
15924 vec![this, position],
15925 )));
15926 let target_name = match target {
15927 DialectType::DuckDB => "REGEXP_EXTRACT",
15928 _ => "REGEXP_EXTRACT",
15929 };
15930 Ok(Expression::Function(Box::new(Function::new(
15931 target_name.to_string(),
15932 vec![substring_expr, pattern],
15933 ))))
15934 }
15935 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
15936 "TO_DAYS" if f.args.len() == 1 => {
15937 let x = f.args.into_iter().next().unwrap();
15938 let epoch = Expression::string("0000-01-01");
15939 // Build the final target-specific expression directly
15940 let datediff_expr = match target {
15941 DialectType::MySQL | DialectType::SingleStore => {
15942 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
15943 Expression::Function(Box::new(Function::new(
15944 "DATEDIFF".to_string(),
15945 vec![x, epoch],
15946 )))
15947 }
15948 DialectType::DuckDB => {
15949 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
15950 let cast_epoch = Expression::Cast(Box::new(Cast {
15951 this: epoch,
15952 to: DataType::Date,
15953 trailing_comments: Vec::new(),
15954 double_colon_syntax: false,
15955 format: None,
15956 default: None,
15957 }));
15958 let cast_x = Expression::Cast(Box::new(Cast {
15959 this: x,
15960 to: DataType::Date,
15961 trailing_comments: Vec::new(),
15962 double_colon_syntax: false,
15963 format: None,
15964 default: None,
15965 }));
15966 Expression::Function(Box::new(Function::new(
15967 "DATE_DIFF".to_string(),
15968 vec![Expression::string("DAY"), cast_epoch, cast_x],
15969 )))
15970 }
15971 DialectType::Presto
15972 | DialectType::Trino
15973 | DialectType::Athena => {
15974 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
15975 let cast_epoch = Self::double_cast_timestamp_date(epoch);
15976 let cast_x = Self::double_cast_timestamp_date(x);
15977 Expression::Function(Box::new(Function::new(
15978 "DATE_DIFF".to_string(),
15979 vec![Expression::string("DAY"), cast_epoch, cast_x],
15980 )))
15981 }
15982 _ => {
15983 // Default: (DATEDIFF(x, '0000-01-01') + 1)
15984 Expression::Function(Box::new(Function::new(
15985 "DATEDIFF".to_string(),
15986 vec![x, epoch],
15987 )))
15988 }
15989 };
15990 let add_one = Expression::Add(Box::new(BinaryOp::new(
15991 datediff_expr,
15992 Expression::number(1),
15993 )));
15994 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15995 this: add_one,
15996 trailing_comments: Vec::new(),
15997 })))
15998 }
15999 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16000 "STR_TO_DATE"
16001 if f.args.len() == 2
16002 && matches!(
16003 target,
16004 DialectType::Presto | DialectType::Trino
16005 ) =>
16006 {
16007 let mut args = f.args;
16008 let x = args.remove(0);
16009 let format_expr = args.remove(0);
16010 // Check if the format contains time components
16011 let has_time =
16012 if let Expression::Literal(Literal::String(ref fmt)) =
16013 format_expr
16014 {
16015 fmt.contains("%H")
16016 || fmt.contains("%T")
16017 || fmt.contains("%M")
16018 || fmt.contains("%S")
16019 || fmt.contains("%I")
16020 || fmt.contains("%p")
16021 } else {
16022 false
16023 };
16024 let date_parse = Expression::Function(Box::new(Function::new(
16025 "DATE_PARSE".to_string(),
16026 vec![x, format_expr],
16027 )));
16028 if has_time {
16029 // Has time components: just DATE_PARSE
16030 Ok(date_parse)
16031 } else {
16032 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16033 Ok(Expression::Cast(Box::new(Cast {
16034 this: date_parse,
16035 to: DataType::Date,
16036 trailing_comments: Vec::new(),
16037 double_colon_syntax: false,
16038 format: None,
16039 default: None,
16040 })))
16041 }
16042 }
16043 "STR_TO_DATE"
16044 if f.args.len() == 2
16045 && matches!(
16046 target,
16047 DialectType::PostgreSQL | DialectType::Redshift
16048 ) =>
16049 {
16050 let mut args = f.args;
16051 let x = args.remove(0);
16052 let fmt = args.remove(0);
16053 let pg_fmt = match fmt {
16054 Expression::Literal(Literal::String(s)) => Expression::string(
16055 &s.replace("%Y", "YYYY")
16056 .replace("%m", "MM")
16057 .replace("%d", "DD")
16058 .replace("%H", "HH24")
16059 .replace("%M", "MI")
16060 .replace("%S", "SS"),
16061 ),
16062 other => other,
16063 };
16064 let to_date = Expression::Function(Box::new(Function::new(
16065 "TO_DATE".to_string(),
16066 vec![x, pg_fmt],
16067 )));
16068 Ok(Expression::Cast(Box::new(Cast {
16069 this: to_date,
16070 to: DataType::Timestamp {
16071 timezone: false,
16072 precision: None,
16073 },
16074 trailing_comments: Vec::new(),
16075 double_colon_syntax: false,
16076 format: None,
16077 default: None,
16078 })))
16079 }
16080 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16081 "RANGE"
16082 if (f.args.len() == 1 || f.args.len() == 2)
16083 && matches!(target, DialectType::SQLite) =>
16084 {
16085 if f.args.len() == 2 {
16086 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16087 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16088 let mut args = f.args;
16089 let start = args.remove(0);
16090 let end = args.remove(0);
16091 Ok(Expression::Function(Box::new(Function::new(
16092 "GENERATE_SERIES".to_string(),
16093 vec![start, end],
16094 ))))
16095 } else {
16096 Ok(Expression::Function(f))
16097 }
16098 }
16099 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16100 // When source is Snowflake, keep as-is (args already in correct form)
16101 "UNIFORM"
16102 if matches!(target, DialectType::Snowflake)
16103 && (f.args.len() == 2 || f.args.len() == 3) =>
16104 {
16105 if matches!(source, DialectType::Snowflake) {
16106 // Snowflake -> Snowflake: keep as-is
16107 Ok(Expression::Function(f))
16108 } else {
16109 let mut args = f.args;
16110 let low = args.remove(0);
16111 let high = args.remove(0);
16112 let random = if !args.is_empty() {
16113 let seed = args.remove(0);
16114 Expression::Function(Box::new(Function::new(
16115 "RANDOM".to_string(),
16116 vec![seed],
16117 )))
16118 } else {
16119 Expression::Function(Box::new(Function::new(
16120 "RANDOM".to_string(),
16121 vec![],
16122 )))
16123 };
16124 Ok(Expression::Function(Box::new(Function::new(
16125 "UNIFORM".to_string(),
16126 vec![low, high, random],
16127 ))))
16128 }
16129 }
16130 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16131 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16132 let mut args = f.args;
16133 let ts_arg = args.remove(0);
16134 let tz_arg = args.remove(0);
16135 // Cast string literal to TIMESTAMP for all targets
16136 let ts_cast =
16137 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16138 Expression::Cast(Box::new(Cast {
16139 this: ts_arg,
16140 to: DataType::Timestamp {
16141 timezone: false,
16142 precision: None,
16143 },
16144 trailing_comments: vec![],
16145 double_colon_syntax: false,
16146 format: None,
16147 default: None,
16148 }))
16149 } else {
16150 ts_arg
16151 };
16152 match target {
16153 DialectType::Spark | DialectType::Databricks => {
16154 Ok(Expression::Function(Box::new(Function::new(
16155 "TO_UTC_TIMESTAMP".to_string(),
16156 vec![ts_cast, tz_arg],
16157 ))))
16158 }
16159 DialectType::Snowflake => {
16160 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16161 Ok(Expression::Function(Box::new(Function::new(
16162 "CONVERT_TIMEZONE".to_string(),
16163 vec![tz_arg, Expression::string("UTC"), ts_cast],
16164 ))))
16165 }
16166 DialectType::Presto
16167 | DialectType::Trino
16168 | DialectType::Athena => {
16169 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16170 let wtz = Expression::Function(Box::new(Function::new(
16171 "WITH_TIMEZONE".to_string(),
16172 vec![ts_cast, tz_arg],
16173 )));
16174 Ok(Expression::AtTimeZone(Box::new(
16175 crate::expressions::AtTimeZone {
16176 this: wtz,
16177 zone: Expression::string("UTC"),
16178 },
16179 )))
16180 }
16181 DialectType::BigQuery => {
16182 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16183 let cast_dt = Expression::Cast(Box::new(Cast {
16184 this: if let Expression::Cast(c) = ts_cast {
16185 c.this
16186 } else {
16187 ts_cast.clone()
16188 },
16189 to: DataType::Custom {
16190 name: "DATETIME".to_string(),
16191 },
16192 trailing_comments: vec![],
16193 double_colon_syntax: false,
16194 format: None,
16195 default: None,
16196 }));
16197 let ts_func =
16198 Expression::Function(Box::new(Function::new(
16199 "TIMESTAMP".to_string(),
16200 vec![cast_dt, tz_arg],
16201 )));
16202 Ok(Expression::Function(Box::new(Function::new(
16203 "DATETIME".to_string(),
16204 vec![ts_func, Expression::string("UTC")],
16205 ))))
16206 }
16207 _ => {
16208 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16209 let atz1 = Expression::AtTimeZone(Box::new(
16210 crate::expressions::AtTimeZone {
16211 this: ts_cast,
16212 zone: tz_arg,
16213 },
16214 ));
16215 Ok(Expression::AtTimeZone(Box::new(
16216 crate::expressions::AtTimeZone {
16217 this: atz1,
16218 zone: Expression::string("UTC"),
16219 },
16220 )))
16221 }
16222 }
16223 }
16224 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16225 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16226 let mut args = f.args;
16227 let ts_arg = args.remove(0);
16228 let tz_arg = args.remove(0);
16229 // Cast string literal to TIMESTAMP
16230 let ts_cast =
16231 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16232 Expression::Cast(Box::new(Cast {
16233 this: ts_arg,
16234 to: DataType::Timestamp {
16235 timezone: false,
16236 precision: None,
16237 },
16238 trailing_comments: vec![],
16239 double_colon_syntax: false,
16240 format: None,
16241 default: None,
16242 }))
16243 } else {
16244 ts_arg
16245 };
16246 match target {
16247 DialectType::Spark | DialectType::Databricks => {
16248 Ok(Expression::Function(Box::new(Function::new(
16249 "FROM_UTC_TIMESTAMP".to_string(),
16250 vec![ts_cast, tz_arg],
16251 ))))
16252 }
16253 DialectType::Presto
16254 | DialectType::Trino
16255 | DialectType::Athena => {
16256 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16257 Ok(Expression::Function(Box::new(Function::new(
16258 "AT_TIMEZONE".to_string(),
16259 vec![ts_cast, tz_arg],
16260 ))))
16261 }
16262 DialectType::Snowflake => {
16263 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16264 Ok(Expression::Function(Box::new(Function::new(
16265 "CONVERT_TIMEZONE".to_string(),
16266 vec![Expression::string("UTC"), tz_arg, ts_cast],
16267 ))))
16268 }
16269 _ => {
16270 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16271 Ok(Expression::AtTimeZone(Box::new(
16272 crate::expressions::AtTimeZone {
16273 this: ts_cast,
16274 zone: tz_arg,
16275 },
16276 )))
16277 }
16278 }
16279 }
16280 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16281 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16282 let name = match target {
16283 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16284 _ => "MAP",
16285 };
16286 Ok(Expression::Function(Box::new(Function::new(
16287 name.to_string(),
16288 f.args,
16289 ))))
16290 }
16291 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16292 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16293 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16294 Ok(Expression::Function(Box::new(Function::new(
16295 "SPLIT_TO_MAP".to_string(),
16296 f.args,
16297 ))))
16298 }
16299 _ => Ok(Expression::Function(f)),
16300 },
16301 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16302 "TIME_TO_STR" if f.args.len() == 2 => {
16303 let mut args = f.args;
16304 let this = args.remove(0);
16305 let fmt_expr = args.remove(0);
16306 let format =
16307 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16308 s
16309 } else {
16310 "%Y-%m-%d %H:%M:%S".to_string()
16311 };
16312 Ok(Expression::TimeToStr(Box::new(
16313 crate::expressions::TimeToStr {
16314 this: Box::new(this),
16315 format,
16316 culture: None,
16317 zone: None,
16318 },
16319 )))
16320 }
16321 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16322 "STR_TO_TIME" if f.args.len() == 2 => {
16323 let mut args = f.args;
16324 let this = args.remove(0);
16325 let fmt_expr = args.remove(0);
16326 let format =
16327 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16328 s
16329 } else {
16330 "%Y-%m-%d %H:%M:%S".to_string()
16331 };
16332 Ok(Expression::StrToTime(Box::new(
16333 crate::expressions::StrToTime {
16334 this: Box::new(this),
16335 format,
16336 zone: None,
16337 safe: None,
16338 target_type: None,
16339 },
16340 )))
16341 }
16342 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16343 "STR_TO_UNIX" if f.args.len() >= 1 => {
16344 let mut args = f.args;
16345 let this = args.remove(0);
16346 let format = if !args.is_empty() {
16347 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16348 {
16349 Some(s)
16350 } else {
16351 None
16352 }
16353 } else {
16354 None
16355 };
16356 Ok(Expression::StrToUnix(Box::new(
16357 crate::expressions::StrToUnix {
16358 this: Some(Box::new(this)),
16359 format,
16360 },
16361 )))
16362 }
16363 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16364 "TIME_TO_UNIX" if f.args.len() == 1 => {
16365 let mut args = f.args;
16366 let this = args.remove(0);
16367 Ok(Expression::TimeToUnix(Box::new(
16368 crate::expressions::UnaryFunc {
16369 this,
16370 original_name: None,
16371 },
16372 )))
16373 }
16374 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16375 "UNIX_TO_STR" if f.args.len() >= 1 => {
16376 let mut args = f.args;
16377 let this = args.remove(0);
16378 let format = if !args.is_empty() {
16379 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16380 {
16381 Some(s)
16382 } else {
16383 None
16384 }
16385 } else {
16386 None
16387 };
16388 Ok(Expression::UnixToStr(Box::new(
16389 crate::expressions::UnixToStr {
16390 this: Box::new(this),
16391 format,
16392 },
16393 )))
16394 }
16395 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16396 "UNIX_TO_TIME" if f.args.len() == 1 => {
16397 let mut args = f.args;
16398 let this = args.remove(0);
16399 Ok(Expression::UnixToTime(Box::new(
16400 crate::expressions::UnixToTime {
16401 this: Box::new(this),
16402 scale: None,
16403 zone: None,
16404 hours: None,
16405 minutes: None,
16406 format: None,
16407 target_type: None,
16408 },
16409 )))
16410 }
16411 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16412 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16413 let mut args = f.args;
16414 let this = args.remove(0);
16415 Ok(Expression::TimeStrToDate(Box::new(
16416 crate::expressions::UnaryFunc {
16417 this,
16418 original_name: None,
16419 },
16420 )))
16421 }
16422 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16423 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16424 let mut args = f.args;
16425 let this = args.remove(0);
16426 Ok(Expression::TimeStrToTime(Box::new(
16427 crate::expressions::TimeStrToTime {
16428 this: Box::new(this),
16429 zone: None,
16430 },
16431 )))
16432 }
16433 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16434 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16435 match target {
16436 DialectType::DuckDB => {
16437 let mut args = f.args;
16438 let end_date = args.remove(0);
16439 let start_date = args.remove(0);
16440 let cast_end = Self::ensure_cast_date(end_date);
16441 let cast_start = Self::ensure_cast_date(start_date);
16442 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16443 let dd = Expression::Function(Box::new(Function::new(
16444 "DATE_DIFF".to_string(),
16445 vec![
16446 Expression::string("MONTH"),
16447 cast_start.clone(),
16448 cast_end.clone(),
16449 ],
16450 )));
16451 let day_end =
16452 Expression::Function(Box::new(Function::new(
16453 "DAY".to_string(),
16454 vec![cast_end.clone()],
16455 )));
16456 let day_start =
16457 Expression::Function(Box::new(Function::new(
16458 "DAY".to_string(),
16459 vec![cast_start.clone()],
16460 )));
16461 let last_day_end =
16462 Expression::Function(Box::new(Function::new(
16463 "LAST_DAY".to_string(),
16464 vec![cast_end.clone()],
16465 )));
16466 let last_day_start =
16467 Expression::Function(Box::new(Function::new(
16468 "LAST_DAY".to_string(),
16469 vec![cast_start.clone()],
16470 )));
16471 let day_last_end = Expression::Function(Box::new(
16472 Function::new("DAY".to_string(), vec![last_day_end]),
16473 ));
16474 let day_last_start = Expression::Function(Box::new(
16475 Function::new("DAY".to_string(), vec![last_day_start]),
16476 ));
16477 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16478 day_end.clone(),
16479 day_last_end,
16480 )));
16481 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16482 day_start.clone(),
16483 day_last_start,
16484 )));
16485 let both_cond =
16486 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16487 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16488 day_end, day_start,
16489 )));
16490 let day_diff_paren = Expression::Paren(Box::new(
16491 crate::expressions::Paren {
16492 this: day_diff,
16493 trailing_comments: Vec::new(),
16494 },
16495 ));
16496 let frac = Expression::Div(Box::new(BinaryOp::new(
16497 day_diff_paren,
16498 Expression::Literal(Literal::Number(
16499 "31.0".to_string(),
16500 )),
16501 )));
16502 let case_expr = Expression::Case(Box::new(Case {
16503 operand: None,
16504 whens: vec![(both_cond, Expression::number(0))],
16505 else_: Some(frac),
16506 comments: Vec::new(),
16507 }));
16508 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16509 }
16510 DialectType::Snowflake | DialectType::Redshift => {
16511 let mut args = f.args;
16512 let end_date = args.remove(0);
16513 let start_date = args.remove(0);
16514 let unit = Expression::Identifier(Identifier::new("MONTH"));
16515 Ok(Expression::Function(Box::new(Function::new(
16516 "DATEDIFF".to_string(),
16517 vec![unit, start_date, end_date],
16518 ))))
16519 }
16520 DialectType::Presto
16521 | DialectType::Trino
16522 | DialectType::Athena => {
16523 let mut args = f.args;
16524 let end_date = args.remove(0);
16525 let start_date = args.remove(0);
16526 Ok(Expression::Function(Box::new(Function::new(
16527 "DATE_DIFF".to_string(),
16528 vec![Expression::string("MONTH"), start_date, end_date],
16529 ))))
16530 }
16531 _ => Ok(Expression::Function(f)),
16532 }
16533 }
16534 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16535 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16536 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16537 match target {
16538 DialectType::Spark | DialectType::Databricks => {
16539 Ok(Expression::Function(f))
16540 }
16541 _ => {
16542 // Drop the 3rd arg and delegate to the 2-arg logic
16543 let mut args = f.args;
16544 let end_date = args.remove(0);
16545 let start_date = args.remove(0);
16546 // Re-create as 2-arg and process
16547 let f2 = Function::new(
16548 "MONTHS_BETWEEN".to_string(),
16549 vec![end_date, start_date],
16550 );
16551 let e2 = Expression::Function(Box::new(f2));
16552 Self::cross_dialect_normalize(e2, source, target)
16553 }
16554 }
16555 }
16556 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16557 "TO_TIMESTAMP"
16558 if f.args.len() == 1
16559 && matches!(
16560 source,
16561 DialectType::Spark
16562 | DialectType::Databricks
16563 | DialectType::Hive
16564 ) =>
16565 {
16566 let arg = f.args.into_iter().next().unwrap();
16567 Ok(Expression::Cast(Box::new(Cast {
16568 this: arg,
16569 to: DataType::Timestamp {
16570 timezone: false,
16571 precision: None,
16572 },
16573 trailing_comments: vec![],
16574 double_colon_syntax: false,
16575 format: None,
16576 default: None,
16577 })))
16578 }
16579 // STRING(x) -> CAST(x AS STRING) for Spark target
16580 "STRING"
16581 if f.args.len() == 1
16582 && matches!(
16583 source,
16584 DialectType::Spark | DialectType::Databricks
16585 ) =>
16586 {
16587 let arg = f.args.into_iter().next().unwrap();
16588 let dt = match target {
16589 DialectType::Spark
16590 | DialectType::Databricks
16591 | DialectType::Hive => DataType::Custom {
16592 name: "STRING".to_string(),
16593 },
16594 _ => DataType::Text,
16595 };
16596 Ok(Expression::Cast(Box::new(Cast {
16597 this: arg,
16598 to: dt,
16599 trailing_comments: vec![],
16600 double_colon_syntax: false,
16601 format: None,
16602 default: None,
16603 })))
16604 }
16605 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16606 "LOGICAL_OR" if f.args.len() == 1 => {
16607 let name = match target {
16608 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16609 _ => "LOGICAL_OR",
16610 };
16611 Ok(Expression::Function(Box::new(Function::new(
16612 name.to_string(),
16613 f.args,
16614 ))))
16615 }
16616 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16617 "SPLIT"
16618 if f.args.len() == 2
16619 && matches!(
16620 source,
16621 DialectType::Spark
16622 | DialectType::Databricks
16623 | DialectType::Hive
16624 ) =>
16625 {
16626 let name = match target {
16627 DialectType::DuckDB => "STR_SPLIT_REGEX",
16628 DialectType::Presto
16629 | DialectType::Trino
16630 | DialectType::Athena => "REGEXP_SPLIT",
16631 DialectType::Spark
16632 | DialectType::Databricks
16633 | DialectType::Hive => "SPLIT",
16634 _ => "SPLIT",
16635 };
16636 Ok(Expression::Function(Box::new(Function::new(
16637 name.to_string(),
16638 f.args,
16639 ))))
16640 }
16641 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16642 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16643 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16644 Ok(Expression::Function(Box::new(Function::new(
16645 "ELEMENT_AT".to_string(),
16646 f.args,
16647 ))))
16648 }
16649 DialectType::DuckDB => {
16650 let mut args = f.args;
16651 let arr = args.remove(0);
16652 let idx = args.remove(0);
16653 Ok(Expression::Subscript(Box::new(
16654 crate::expressions::Subscript {
16655 this: arr,
16656 index: idx,
16657 },
16658 )))
16659 }
16660 _ => Ok(Expression::Function(f)),
16661 },
16662 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16663 "ARRAY_FILTER" if f.args.len() == 2 => {
16664 let name = match target {
16665 DialectType::DuckDB => "LIST_FILTER",
16666 DialectType::StarRocks => "ARRAY_FILTER",
16667 _ => "FILTER",
16668 };
16669 Ok(Expression::Function(Box::new(Function::new(
16670 name.to_string(),
16671 f.args,
16672 ))))
16673 }
16674 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16675 "FILTER" if f.args.len() == 2 => {
16676 let name = match target {
16677 DialectType::DuckDB => "LIST_FILTER",
16678 DialectType::StarRocks => "ARRAY_FILTER",
16679 _ => "FILTER",
16680 };
16681 Ok(Expression::Function(Box::new(Function::new(
16682 name.to_string(),
16683 f.args,
16684 ))))
16685 }
16686 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16687 "REDUCE" if f.args.len() >= 3 => {
16688 let name = match target {
16689 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16690 _ => "REDUCE",
16691 };
16692 Ok(Expression::Function(Box::new(Function::new(
16693 name.to_string(),
16694 f.args,
16695 ))))
16696 }
16697 // CURRENT_SCHEMA() -> dialect-specific
16698 "CURRENT_SCHEMA" => {
16699 match target {
16700 DialectType::PostgreSQL => {
16701 // PostgreSQL: CURRENT_SCHEMA (no parens)
16702 Ok(Expression::Function(Box::new(Function {
16703 name: "CURRENT_SCHEMA".to_string(),
16704 args: vec![],
16705 distinct: false,
16706 trailing_comments: vec![],
16707 use_bracket_syntax: false,
16708 no_parens: true,
16709 quoted: false,
16710 })))
16711 }
16712 DialectType::MySQL
16713 | DialectType::Doris
16714 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
16715 Function::new("SCHEMA".to_string(), vec![]),
16716 ))),
16717 DialectType::TSQL => Ok(Expression::Function(Box::new(
16718 Function::new("SCHEMA_NAME".to_string(), vec![]),
16719 ))),
16720 DialectType::SQLite => {
16721 Ok(Expression::Literal(Literal::String("main".to_string())))
16722 }
16723 _ => Ok(Expression::Function(f)),
16724 }
16725 }
16726 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16727 "LTRIM" if f.args.len() == 2 => match target {
16728 DialectType::Spark
16729 | DialectType::Hive
16730 | DialectType::Databricks
16731 | DialectType::ClickHouse => {
16732 let mut args = f.args;
16733 let str_expr = args.remove(0);
16734 let chars = args.remove(0);
16735 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16736 this: str_expr,
16737 characters: Some(chars),
16738 position: crate::expressions::TrimPosition::Leading,
16739 sql_standard_syntax: true,
16740 position_explicit: true,
16741 })))
16742 }
16743 _ => Ok(Expression::Function(f)),
16744 },
16745 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
16746 "RTRIM" if f.args.len() == 2 => match target {
16747 DialectType::Spark
16748 | DialectType::Hive
16749 | DialectType::Databricks
16750 | DialectType::ClickHouse => {
16751 let mut args = f.args;
16752 let str_expr = args.remove(0);
16753 let chars = args.remove(0);
16754 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
16755 this: str_expr,
16756 characters: Some(chars),
16757 position: crate::expressions::TrimPosition::Trailing,
16758 sql_standard_syntax: true,
16759 position_explicit: true,
16760 })))
16761 }
16762 _ => Ok(Expression::Function(f)),
16763 },
16764 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
16765 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
16766 DialectType::ClickHouse => {
16767 let mut new_f = *f;
16768 new_f.name = "arrayReverse".to_string();
16769 Ok(Expression::Function(Box::new(new_f)))
16770 }
16771 _ => Ok(Expression::Function(f)),
16772 },
16773 // UUID() -> NEWID() for TSQL
16774 "UUID" if f.args.is_empty() => match target {
16775 DialectType::TSQL | DialectType::Fabric => {
16776 Ok(Expression::Function(Box::new(Function::new(
16777 "NEWID".to_string(),
16778 vec![],
16779 ))))
16780 }
16781 _ => Ok(Expression::Function(f)),
16782 },
16783 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
16784 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
16785 DialectType::ClickHouse => {
16786 let mut new_f = *f;
16787 new_f.name = "farmFingerprint64".to_string();
16788 Ok(Expression::Function(Box::new(new_f)))
16789 }
16790 DialectType::Redshift => {
16791 let mut new_f = *f;
16792 new_f.name = "FARMFINGERPRINT64".to_string();
16793 Ok(Expression::Function(Box::new(new_f)))
16794 }
16795 _ => Ok(Expression::Function(f)),
16796 },
16797 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
16798 "JSON_KEYS" => match target {
16799 DialectType::Databricks | DialectType::Spark => {
16800 let mut new_f = *f;
16801 new_f.name = "JSON_OBJECT_KEYS".to_string();
16802 Ok(Expression::Function(Box::new(new_f)))
16803 }
16804 DialectType::Snowflake => {
16805 let mut new_f = *f;
16806 new_f.name = "OBJECT_KEYS".to_string();
16807 Ok(Expression::Function(Box::new(new_f)))
16808 }
16809 _ => Ok(Expression::Function(f)),
16810 },
16811 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
16812 "WEEKOFYEAR" => match target {
16813 DialectType::Snowflake => {
16814 let mut new_f = *f;
16815 new_f.name = "WEEKISO".to_string();
16816 Ok(Expression::Function(Box::new(new_f)))
16817 }
16818 _ => Ok(Expression::Function(f)),
16819 },
16820 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
16821 "FORMAT"
16822 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
16823 {
16824 match target {
16825 DialectType::Databricks | DialectType::Spark => {
16826 let mut new_f = *f;
16827 new_f.name = "FORMAT_STRING".to_string();
16828 Ok(Expression::Function(Box::new(new_f)))
16829 }
16830 _ => Ok(Expression::Function(f)),
16831 }
16832 }
16833 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
16834 "CONCAT_WS" if f.args.len() >= 2 => match target {
16835 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16836 let mut args = f.args;
16837 let sep = args.remove(0);
16838 let cast_args: Vec<Expression> = args
16839 .into_iter()
16840 .map(|a| {
16841 Expression::Cast(Box::new(Cast {
16842 this: a,
16843 to: DataType::VarChar {
16844 length: None,
16845 parenthesized_length: false,
16846 },
16847 double_colon_syntax: false,
16848 trailing_comments: Vec::new(),
16849 format: None,
16850 default: None,
16851 }))
16852 })
16853 .collect();
16854 let mut new_args = vec![sep];
16855 new_args.extend(cast_args);
16856 Ok(Expression::Function(Box::new(Function::new(
16857 "CONCAT_WS".to_string(),
16858 new_args,
16859 ))))
16860 }
16861 _ => Ok(Expression::Function(f)),
16862 },
16863 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
16864 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
16865 DialectType::Presto
16866 | DialectType::Trino
16867 | DialectType::Athena
16868 | DialectType::Databricks
16869 | DialectType::Spark => {
16870 let mut new_f = *f;
16871 new_f.name = "SLICE".to_string();
16872 Ok(Expression::Function(Box::new(new_f)))
16873 }
16874 DialectType::ClickHouse => {
16875 let mut new_f = *f;
16876 new_f.name = "arraySlice".to_string();
16877 Ok(Expression::Function(Box::new(new_f)))
16878 }
16879 _ => Ok(Expression::Function(f)),
16880 },
16881 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
16882 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
16883 DialectType::DuckDB => {
16884 let mut args = f.args;
16885 let arr = args.remove(0);
16886 let val = args.remove(0);
16887 Ok(Expression::Function(Box::new(Function::new(
16888 "LIST_PREPEND".to_string(),
16889 vec![val, arr],
16890 ))))
16891 }
16892 _ => Ok(Expression::Function(f)),
16893 },
16894 // ARRAY_REMOVE(arr, target) -> dialect-specific
16895 "ARRAY_REMOVE" if f.args.len() == 2 => {
16896 match target {
16897 DialectType::DuckDB => {
16898 let mut args = f.args;
16899 let arr = args.remove(0);
16900 let target_val = args.remove(0);
16901 let u_id = crate::expressions::Identifier::new("_u");
16902 // LIST_FILTER(arr, _u -> _u <> target)
16903 let lambda = Expression::Lambda(Box::new(
16904 crate::expressions::LambdaExpr {
16905 parameters: vec![u_id.clone()],
16906 body: Expression::Neq(Box::new(BinaryOp {
16907 left: Expression::Identifier(u_id),
16908 right: target_val,
16909 left_comments: Vec::new(),
16910 operator_comments: Vec::new(),
16911 trailing_comments: Vec::new(),
16912 })),
16913 colon: false,
16914 parameter_types: Vec::new(),
16915 },
16916 ));
16917 Ok(Expression::Function(Box::new(Function::new(
16918 "LIST_FILTER".to_string(),
16919 vec![arr, lambda],
16920 ))))
16921 }
16922 DialectType::ClickHouse => {
16923 let mut args = f.args;
16924 let arr = args.remove(0);
16925 let target_val = args.remove(0);
16926 let u_id = crate::expressions::Identifier::new("_u");
16927 // arrayFilter(_u -> _u <> target, arr)
16928 let lambda = Expression::Lambda(Box::new(
16929 crate::expressions::LambdaExpr {
16930 parameters: vec![u_id.clone()],
16931 body: Expression::Neq(Box::new(BinaryOp {
16932 left: Expression::Identifier(u_id),
16933 right: target_val,
16934 left_comments: Vec::new(),
16935 operator_comments: Vec::new(),
16936 trailing_comments: Vec::new(),
16937 })),
16938 colon: false,
16939 parameter_types: Vec::new(),
16940 },
16941 ));
16942 Ok(Expression::Function(Box::new(Function::new(
16943 "arrayFilter".to_string(),
16944 vec![lambda, arr],
16945 ))))
16946 }
16947 DialectType::BigQuery => {
16948 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
16949 let mut args = f.args;
16950 let arr = args.remove(0);
16951 let target_val = args.remove(0);
16952 let u_id = crate::expressions::Identifier::new("_u");
16953 let u_col =
16954 Expression::Column(crate::expressions::Column {
16955 name: u_id.clone(),
16956 table: None,
16957 join_mark: false,
16958 trailing_comments: Vec::new(),
16959 });
16960 // UNNEST(the_array) AS _u
16961 let unnest_expr = Expression::Unnest(Box::new(
16962 crate::expressions::UnnestFunc {
16963 this: arr,
16964 expressions: Vec::new(),
16965 with_ordinality: false,
16966 alias: None,
16967 offset_alias: None,
16968 },
16969 ));
16970 let aliased_unnest = Expression::Alias(Box::new(
16971 crate::expressions::Alias {
16972 this: unnest_expr,
16973 alias: u_id.clone(),
16974 column_aliases: Vec::new(),
16975 pre_alias_comments: Vec::new(),
16976 trailing_comments: Vec::new(),
16977 },
16978 ));
16979 // _u <> target
16980 let where_cond = Expression::Neq(Box::new(BinaryOp {
16981 left: u_col.clone(),
16982 right: target_val,
16983 left_comments: Vec::new(),
16984 operator_comments: Vec::new(),
16985 trailing_comments: Vec::new(),
16986 }));
16987 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
16988 let subquery = Expression::Select(Box::new(
16989 crate::expressions::Select::new()
16990 .column(u_col)
16991 .from(aliased_unnest)
16992 .where_(where_cond),
16993 ));
16994 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
16995 Ok(Expression::ArrayFunc(Box::new(
16996 crate::expressions::ArrayConstructor {
16997 expressions: vec![subquery],
16998 bracket_notation: false,
16999 use_list_keyword: false,
17000 },
17001 )))
17002 }
17003 _ => Ok(Expression::Function(f)),
17004 }
17005 }
17006 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17007 "PARSE_JSON" if f.args.len() == 1 => {
17008 match target {
17009 DialectType::SQLite
17010 | DialectType::Doris
17011 | DialectType::MySQL
17012 | DialectType::StarRocks => {
17013 // Strip PARSE_JSON, return the inner argument
17014 Ok(f.args.into_iter().next().unwrap())
17015 }
17016 _ => Ok(Expression::Function(f)),
17017 }
17018 }
17019 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17020 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17021 "JSON_REMOVE" => Ok(Expression::Function(f)),
17022 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17023 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17024 "JSON_SET" => Ok(Expression::Function(f)),
17025 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17026 // Behavior per search value type:
17027 // NULL literal -> CASE WHEN x IS NULL THEN result
17028 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17029 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17030 "DECODE" if f.args.len() >= 3 => {
17031 // Keep as DECODE for targets that support it natively
17032 let keep_as_decode = matches!(
17033 target,
17034 DialectType::Oracle
17035 | DialectType::Snowflake
17036 | DialectType::Redshift
17037 | DialectType::Teradata
17038 | DialectType::Spark
17039 | DialectType::Databricks
17040 );
17041 if keep_as_decode {
17042 return Ok(Expression::Function(f));
17043 }
17044
17045 let mut args = f.args;
17046 let this_expr = args.remove(0);
17047 let mut pairs = Vec::new();
17048 let mut default = None;
17049 let mut i = 0;
17050 while i + 1 < args.len() {
17051 pairs.push((args[i].clone(), args[i + 1].clone()));
17052 i += 2;
17053 }
17054 if i < args.len() {
17055 default = Some(args[i].clone());
17056 }
17057 // Helper: check if expression is a literal value
17058 fn is_literal(e: &Expression) -> bool {
17059 matches!(
17060 e,
17061 Expression::Literal(_)
17062 | Expression::Boolean(_)
17063 | Expression::Neg(_)
17064 )
17065 }
17066 let whens: Vec<(Expression, Expression)> = pairs
17067 .into_iter()
17068 .map(|(search, result)| {
17069 if matches!(&search, Expression::Null(_)) {
17070 // NULL search -> IS NULL
17071 let condition = Expression::Is(Box::new(BinaryOp {
17072 left: this_expr.clone(),
17073 right: Expression::Null(crate::expressions::Null),
17074 left_comments: Vec::new(),
17075 operator_comments: Vec::new(),
17076 trailing_comments: Vec::new(),
17077 }));
17078 (condition, result)
17079 } else if is_literal(&search) {
17080 // Literal search -> simple equality
17081 let eq = Expression::Eq(Box::new(BinaryOp {
17082 left: this_expr.clone(),
17083 right: search,
17084 left_comments: Vec::new(),
17085 operator_comments: Vec::new(),
17086 trailing_comments: Vec::new(),
17087 }));
17088 (eq, result)
17089 } else {
17090 // Non-literal (column ref, expression) -> null-safe comparison
17091 let needs_paren = matches!(
17092 &search,
17093 Expression::Eq(_)
17094 | Expression::Neq(_)
17095 | Expression::Gt(_)
17096 | Expression::Gte(_)
17097 | Expression::Lt(_)
17098 | Expression::Lte(_)
17099 );
17100 let search_for_eq = if needs_paren {
17101 Expression::Paren(Box::new(
17102 crate::expressions::Paren {
17103 this: search.clone(),
17104 trailing_comments: Vec::new(),
17105 },
17106 ))
17107 } else {
17108 search.clone()
17109 };
17110 let eq = Expression::Eq(Box::new(BinaryOp {
17111 left: this_expr.clone(),
17112 right: search_for_eq,
17113 left_comments: Vec::new(),
17114 operator_comments: Vec::new(),
17115 trailing_comments: Vec::new(),
17116 }));
17117 let search_for_null = if needs_paren {
17118 Expression::Paren(Box::new(
17119 crate::expressions::Paren {
17120 this: search.clone(),
17121 trailing_comments: Vec::new(),
17122 },
17123 ))
17124 } else {
17125 search.clone()
17126 };
17127 let x_is_null = Expression::Is(Box::new(BinaryOp {
17128 left: this_expr.clone(),
17129 right: Expression::Null(crate::expressions::Null),
17130 left_comments: Vec::new(),
17131 operator_comments: Vec::new(),
17132 trailing_comments: Vec::new(),
17133 }));
17134 let s_is_null = Expression::Is(Box::new(BinaryOp {
17135 left: search_for_null,
17136 right: Expression::Null(crate::expressions::Null),
17137 left_comments: Vec::new(),
17138 operator_comments: Vec::new(),
17139 trailing_comments: Vec::new(),
17140 }));
17141 let both_null = Expression::And(Box::new(BinaryOp {
17142 left: x_is_null,
17143 right: s_is_null,
17144 left_comments: Vec::new(),
17145 operator_comments: Vec::new(),
17146 trailing_comments: Vec::new(),
17147 }));
17148 let condition = Expression::Or(Box::new(BinaryOp {
17149 left: eq,
17150 right: Expression::Paren(Box::new(
17151 crate::expressions::Paren {
17152 this: both_null,
17153 trailing_comments: Vec::new(),
17154 },
17155 )),
17156 left_comments: Vec::new(),
17157 operator_comments: Vec::new(),
17158 trailing_comments: Vec::new(),
17159 }));
17160 (condition, result)
17161 }
17162 })
17163 .collect();
17164 Ok(Expression::Case(Box::new(Case {
17165 operand: None,
17166 whens,
17167 else_: default,
17168 comments: Vec::new(),
17169 })))
17170 }
17171 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17172 "LEVENSHTEIN" => {
17173 match target {
17174 DialectType::BigQuery => {
17175 let mut new_f = *f;
17176 new_f.name = "EDIT_DISTANCE".to_string();
17177 Ok(Expression::Function(Box::new(new_f)))
17178 }
17179 DialectType::Drill => {
17180 let mut new_f = *f;
17181 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17182 Ok(Expression::Function(Box::new(new_f)))
17183 }
17184 DialectType::PostgreSQL if f.args.len() == 6 => {
17185 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17186 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17187 let mut new_f = *f;
17188 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17189 Ok(Expression::Function(Box::new(new_f)))
17190 }
17191 _ => Ok(Expression::Function(f)),
17192 }
17193 }
17194 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17195 "ARRAY_REVERSE" => match target {
17196 DialectType::ClickHouse => {
17197 let mut new_f = *f;
17198 new_f.name = "arrayReverse".to_string();
17199 Ok(Expression::Function(Box::new(new_f)))
17200 }
17201 _ => Ok(Expression::Function(f)),
17202 },
17203 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17204 "GENERATE_DATE_ARRAY" => {
17205 let mut args = f.args;
17206 if matches!(target, DialectType::BigQuery) {
17207 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17208 if args.len() == 2 {
17209 let default_interval = Expression::Interval(Box::new(
17210 crate::expressions::Interval {
17211 this: Some(Expression::Literal(Literal::String(
17212 "1".to_string(),
17213 ))),
17214 unit: Some(
17215 crate::expressions::IntervalUnitSpec::Simple {
17216 unit: crate::expressions::IntervalUnit::Day,
17217 use_plural: false,
17218 },
17219 ),
17220 },
17221 ));
17222 args.push(default_interval);
17223 }
17224 Ok(Expression::Function(Box::new(Function::new(
17225 "GENERATE_DATE_ARRAY".to_string(),
17226 args,
17227 ))))
17228 } else if matches!(target, DialectType::DuckDB) {
17229 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17230 let start = args.get(0).cloned();
17231 let end = args.get(1).cloned();
17232 let step = args.get(2).cloned().or_else(|| {
17233 Some(Expression::Interval(Box::new(
17234 crate::expressions::Interval {
17235 this: Some(Expression::Literal(Literal::String(
17236 "1".to_string(),
17237 ))),
17238 unit: Some(
17239 crate::expressions::IntervalUnitSpec::Simple {
17240 unit: crate::expressions::IntervalUnit::Day,
17241 use_plural: false,
17242 },
17243 ),
17244 },
17245 )))
17246 });
17247 let gen_series = Expression::GenerateSeries(Box::new(
17248 crate::expressions::GenerateSeries {
17249 start: start.map(Box::new),
17250 end: end.map(Box::new),
17251 step: step.map(Box::new),
17252 is_end_exclusive: None,
17253 },
17254 ));
17255 Ok(Expression::Cast(Box::new(Cast {
17256 this: gen_series,
17257 to: DataType::Array {
17258 element_type: Box::new(DataType::Date),
17259 dimension: None,
17260 },
17261 trailing_comments: vec![],
17262 double_colon_syntax: false,
17263 format: None,
17264 default: None,
17265 })))
17266 } else if matches!(
17267 target,
17268 DialectType::Presto | DialectType::Trino | DialectType::Athena
17269 ) {
17270 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17271 let start = args.get(0).cloned();
17272 let end = args.get(1).cloned();
17273 let step = args.get(2).cloned().or_else(|| {
17274 Some(Expression::Interval(Box::new(
17275 crate::expressions::Interval {
17276 this: Some(Expression::Literal(Literal::String(
17277 "1".to_string(),
17278 ))),
17279 unit: Some(
17280 crate::expressions::IntervalUnitSpec::Simple {
17281 unit: crate::expressions::IntervalUnit::Day,
17282 use_plural: false,
17283 },
17284 ),
17285 },
17286 )))
17287 });
17288 let gen_series = Expression::GenerateSeries(Box::new(
17289 crate::expressions::GenerateSeries {
17290 start: start.map(Box::new),
17291 end: end.map(Box::new),
17292 step: step.map(Box::new),
17293 is_end_exclusive: None,
17294 },
17295 ));
17296 Ok(gen_series)
17297 } else if matches!(
17298 target,
17299 DialectType::Spark | DialectType::Databricks
17300 ) {
17301 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17302 let start = args.get(0).cloned();
17303 let end = args.get(1).cloned();
17304 let step = args.get(2).cloned().or_else(|| {
17305 Some(Expression::Interval(Box::new(
17306 crate::expressions::Interval {
17307 this: Some(Expression::Literal(Literal::String(
17308 "1".to_string(),
17309 ))),
17310 unit: Some(
17311 crate::expressions::IntervalUnitSpec::Simple {
17312 unit: crate::expressions::IntervalUnit::Day,
17313 use_plural: false,
17314 },
17315 ),
17316 },
17317 )))
17318 });
17319 let gen_series = Expression::GenerateSeries(Box::new(
17320 crate::expressions::GenerateSeries {
17321 start: start.map(Box::new),
17322 end: end.map(Box::new),
17323 step: step.map(Box::new),
17324 is_end_exclusive: None,
17325 },
17326 ));
17327 Ok(gen_series)
17328 } else if matches!(target, DialectType::Snowflake) {
17329 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17330 if args.len() == 2 {
17331 let default_interval = Expression::Interval(Box::new(
17332 crate::expressions::Interval {
17333 this: Some(Expression::Literal(Literal::String(
17334 "1".to_string(),
17335 ))),
17336 unit: Some(
17337 crate::expressions::IntervalUnitSpec::Simple {
17338 unit: crate::expressions::IntervalUnit::Day,
17339 use_plural: false,
17340 },
17341 ),
17342 },
17343 ));
17344 args.push(default_interval);
17345 }
17346 Ok(Expression::Function(Box::new(Function::new(
17347 "GENERATE_DATE_ARRAY".to_string(),
17348 args,
17349 ))))
17350 } else if matches!(
17351 target,
17352 DialectType::MySQL
17353 | DialectType::TSQL
17354 | DialectType::Fabric
17355 | DialectType::Redshift
17356 ) {
17357 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17358 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17359 Ok(Expression::Function(Box::new(Function::new(
17360 "GENERATE_DATE_ARRAY".to_string(),
17361 args,
17362 ))))
17363 } else {
17364 // PostgreSQL/others: convert to GenerateSeries
17365 let start = args.get(0).cloned();
17366 let end = args.get(1).cloned();
17367 let step = args.get(2).cloned().or_else(|| {
17368 Some(Expression::Interval(Box::new(
17369 crate::expressions::Interval {
17370 this: Some(Expression::Literal(Literal::String(
17371 "1".to_string(),
17372 ))),
17373 unit: Some(
17374 crate::expressions::IntervalUnitSpec::Simple {
17375 unit: crate::expressions::IntervalUnit::Day,
17376 use_plural: false,
17377 },
17378 ),
17379 },
17380 )))
17381 });
17382 Ok(Expression::GenerateSeries(Box::new(
17383 crate::expressions::GenerateSeries {
17384 start: start.map(Box::new),
17385 end: end.map(Box::new),
17386 step: step.map(Box::new),
17387 is_end_exclusive: None,
17388 },
17389 )))
17390 }
17391 }
17392 _ => Ok(Expression::Function(f)),
17393 }
17394 } else if let Expression::AggregateFunction(mut af) = e {
17395 let name = af.name.to_uppercase();
17396 match name.as_str() {
17397 "ARBITRARY" if af.args.len() == 1 => {
17398 let arg = af.args.into_iter().next().unwrap();
17399 Ok(convert_arbitrary(arg, target))
17400 }
17401 "JSON_ARRAYAGG" => {
17402 match target {
17403 DialectType::PostgreSQL => {
17404 af.name = "JSON_AGG".to_string();
17405 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17406 for ordered in af.order_by.iter_mut() {
17407 if ordered.nulls_first.is_none() {
17408 ordered.nulls_first = Some(true);
17409 }
17410 }
17411 Ok(Expression::AggregateFunction(af))
17412 }
17413 _ => Ok(Expression::AggregateFunction(af)),
17414 }
17415 }
17416 _ => Ok(Expression::AggregateFunction(af)),
17417 }
17418 } else if let Expression::JSONArrayAgg(ja) = e {
17419 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17420 match target {
17421 DialectType::PostgreSQL => {
17422 let mut order_by = Vec::new();
17423 if let Some(order_expr) = ja.order {
17424 if let Expression::OrderBy(ob) = *order_expr {
17425 for mut ordered in ob.expressions {
17426 if ordered.nulls_first.is_none() {
17427 ordered.nulls_first = Some(true);
17428 }
17429 order_by.push(ordered);
17430 }
17431 }
17432 }
17433 Ok(Expression::AggregateFunction(Box::new(
17434 crate::expressions::AggregateFunction {
17435 name: "JSON_AGG".to_string(),
17436 args: vec![*ja.this],
17437 distinct: false,
17438 filter: None,
17439 order_by,
17440 limit: None,
17441 ignore_nulls: None,
17442 },
17443 )))
17444 }
17445 _ => Ok(Expression::JSONArrayAgg(ja)),
17446 }
17447 } else if let Expression::ToNumber(tn) = e {
17448 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17449 let arg = *tn.this;
17450 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17451 this: arg,
17452 to: crate::expressions::DataType::Double {
17453 precision: None,
17454 scale: None,
17455 },
17456 double_colon_syntax: false,
17457 trailing_comments: Vec::new(),
17458 format: None,
17459 default: None,
17460 })))
17461 } else {
17462 Ok(e)
17463 }
17464 }
17465
17466 Action::RegexpLikeToDuckDB => {
17467 if let Expression::RegexpLike(f) = e {
17468 let mut args = vec![f.this, f.pattern];
17469 if let Some(flags) = f.flags {
17470 args.push(flags);
17471 }
17472 Ok(Expression::Function(Box::new(Function::new(
17473 "REGEXP_MATCHES".to_string(),
17474 args,
17475 ))))
17476 } else {
17477 Ok(e)
17478 }
17479 }
17480 Action::EpochConvert => {
17481 if let Expression::Epoch(f) = e {
17482 let arg = f.this;
17483 let name = match target {
17484 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17485 "UNIX_TIMESTAMP"
17486 }
17487 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17488 DialectType::BigQuery => "TIME_TO_UNIX",
17489 _ => "EPOCH",
17490 };
17491 Ok(Expression::Function(Box::new(Function::new(
17492 name.to_string(),
17493 vec![arg],
17494 ))))
17495 } else {
17496 Ok(e)
17497 }
17498 }
17499 Action::EpochMsConvert => {
17500 use crate::expressions::{BinaryOp, Cast};
17501 if let Expression::EpochMs(f) = e {
17502 let arg = f.this;
17503 match target {
17504 DialectType::Spark | DialectType::Databricks => {
17505 Ok(Expression::Function(Box::new(Function::new(
17506 "TIMESTAMP_MILLIS".to_string(),
17507 vec![arg],
17508 ))))
17509 }
17510 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17511 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17512 ))),
17513 DialectType::Presto | DialectType::Trino => {
17514 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17515 let cast_arg = Expression::Cast(Box::new(Cast {
17516 this: arg,
17517 to: DataType::Double {
17518 precision: None,
17519 scale: None,
17520 },
17521 trailing_comments: Vec::new(),
17522 double_colon_syntax: false,
17523 format: None,
17524 default: None,
17525 }));
17526 let div = Expression::Div(Box::new(BinaryOp::new(
17527 cast_arg,
17528 Expression::Function(Box::new(Function::new(
17529 "POW".to_string(),
17530 vec![Expression::number(10), Expression::number(3)],
17531 ))),
17532 )));
17533 Ok(Expression::Function(Box::new(Function::new(
17534 "FROM_UNIXTIME".to_string(),
17535 vec![div],
17536 ))))
17537 }
17538 DialectType::MySQL => {
17539 // FROM_UNIXTIME(x / POWER(10, 3))
17540 let div = Expression::Div(Box::new(BinaryOp::new(
17541 arg,
17542 Expression::Function(Box::new(Function::new(
17543 "POWER".to_string(),
17544 vec![Expression::number(10), Expression::number(3)],
17545 ))),
17546 )));
17547 Ok(Expression::Function(Box::new(Function::new(
17548 "FROM_UNIXTIME".to_string(),
17549 vec![div],
17550 ))))
17551 }
17552 DialectType::PostgreSQL | DialectType::Redshift => {
17553 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17554 let cast_arg = Expression::Cast(Box::new(Cast {
17555 this: arg,
17556 to: DataType::Custom {
17557 name: "DOUBLE PRECISION".to_string(),
17558 },
17559 trailing_comments: Vec::new(),
17560 double_colon_syntax: false,
17561 format: None,
17562 default: None,
17563 }));
17564 let div = Expression::Div(Box::new(BinaryOp::new(
17565 cast_arg,
17566 Expression::Function(Box::new(Function::new(
17567 "POWER".to_string(),
17568 vec![Expression::number(10), Expression::number(3)],
17569 ))),
17570 )));
17571 Ok(Expression::Function(Box::new(Function::new(
17572 "TO_TIMESTAMP".to_string(),
17573 vec![div],
17574 ))))
17575 }
17576 DialectType::ClickHouse => {
17577 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17578 let cast_arg = Expression::Cast(Box::new(Cast {
17579 this: arg,
17580 to: DataType::Nullable {
17581 inner: Box::new(DataType::BigInt { length: None }),
17582 },
17583 trailing_comments: Vec::new(),
17584 double_colon_syntax: false,
17585 format: None,
17586 default: None,
17587 }));
17588 Ok(Expression::Function(Box::new(Function::new(
17589 "fromUnixTimestamp64Milli".to_string(),
17590 vec![cast_arg],
17591 ))))
17592 }
17593 _ => Ok(Expression::Function(Box::new(Function::new(
17594 "EPOCH_MS".to_string(),
17595 vec![arg],
17596 )))),
17597 }
17598 } else {
17599 Ok(e)
17600 }
17601 }
17602 Action::TSQLTypeNormalize => {
17603 if let Expression::DataType(dt) = e {
17604 let new_dt = match &dt {
17605 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17606 DataType::Decimal {
17607 precision: Some(15),
17608 scale: Some(4),
17609 }
17610 }
17611 DataType::Custom { name }
17612 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17613 {
17614 DataType::Decimal {
17615 precision: Some(6),
17616 scale: Some(4),
17617 }
17618 }
17619 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17620 DataType::Timestamp {
17621 timezone: false,
17622 precision: None,
17623 }
17624 }
17625 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17626 DataType::Float {
17627 precision: None,
17628 scale: None,
17629 real_spelling: false,
17630 }
17631 }
17632 DataType::Float {
17633 real_spelling: true,
17634 ..
17635 } => DataType::Float {
17636 precision: None,
17637 scale: None,
17638 real_spelling: false,
17639 },
17640 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17641 DataType::Custom {
17642 name: "BLOB".to_string(),
17643 }
17644 }
17645 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17646 DataType::Boolean
17647 }
17648 DataType::Custom { name }
17649 if name.eq_ignore_ascii_case("ROWVERSION") =>
17650 {
17651 DataType::Custom {
17652 name: "BINARY".to_string(),
17653 }
17654 }
17655 DataType::Custom { name }
17656 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17657 {
17658 match target {
17659 DialectType::Spark
17660 | DialectType::Databricks
17661 | DialectType::Hive => DataType::Custom {
17662 name: "STRING".to_string(),
17663 },
17664 _ => DataType::VarChar {
17665 length: Some(36),
17666 parenthesized_length: true,
17667 },
17668 }
17669 }
17670 DataType::Custom { name }
17671 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17672 {
17673 match target {
17674 DialectType::Spark
17675 | DialectType::Databricks
17676 | DialectType::Hive => DataType::Timestamp {
17677 timezone: false,
17678 precision: None,
17679 },
17680 _ => DataType::Timestamp {
17681 timezone: true,
17682 precision: None,
17683 },
17684 }
17685 }
17686 DataType::Custom { ref name }
17687 if name.to_uppercase().starts_with("DATETIME2(") =>
17688 {
17689 // DATETIME2(n) -> TIMESTAMP
17690 DataType::Timestamp {
17691 timezone: false,
17692 precision: None,
17693 }
17694 }
17695 DataType::Custom { ref name }
17696 if name.to_uppercase().starts_with("TIME(") =>
17697 {
17698 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
17699 match target {
17700 DialectType::Spark
17701 | DialectType::Databricks
17702 | DialectType::Hive => DataType::Timestamp {
17703 timezone: false,
17704 precision: None,
17705 },
17706 _ => return Ok(Expression::DataType(dt)),
17707 }
17708 }
17709 DataType::Custom { ref name }
17710 if name.to_uppercase().starts_with("NUMERIC") =>
17711 {
17712 // Parse NUMERIC(p,s) back to Decimal(p,s)
17713 let upper = name.to_uppercase();
17714 if let Some(inner) = upper
17715 .strip_prefix("NUMERIC(")
17716 .and_then(|s| s.strip_suffix(')'))
17717 {
17718 let parts: Vec<&str> = inner.split(',').collect();
17719 let precision =
17720 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
17721 let scale =
17722 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
17723 DataType::Decimal { precision, scale }
17724 } else if upper == "NUMERIC" {
17725 DataType::Decimal {
17726 precision: None,
17727 scale: None,
17728 }
17729 } else {
17730 return Ok(Expression::DataType(dt));
17731 }
17732 }
17733 DataType::Float {
17734 precision: Some(p), ..
17735 } => {
17736 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
17737 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
17738 let boundary = match target {
17739 DialectType::Hive
17740 | DialectType::Spark
17741 | DialectType::Databricks => 32,
17742 _ => 24,
17743 };
17744 if *p <= boundary {
17745 DataType::Float {
17746 precision: None,
17747 scale: None,
17748 real_spelling: false,
17749 }
17750 } else {
17751 DataType::Double {
17752 precision: None,
17753 scale: None,
17754 }
17755 }
17756 }
17757 DataType::TinyInt { .. } => match target {
17758 DialectType::DuckDB => DataType::Custom {
17759 name: "UTINYINT".to_string(),
17760 },
17761 DialectType::Hive
17762 | DialectType::Spark
17763 | DialectType::Databricks => DataType::SmallInt { length: None },
17764 _ => return Ok(Expression::DataType(dt)),
17765 },
17766 // INTEGER -> INT for Spark/Databricks
17767 DataType::Int {
17768 length,
17769 integer_spelling: true,
17770 } => DataType::Int {
17771 length: *length,
17772 integer_spelling: false,
17773 },
17774 _ => return Ok(Expression::DataType(dt)),
17775 };
17776 Ok(Expression::DataType(new_dt))
17777 } else {
17778 Ok(e)
17779 }
17780 }
17781 Action::MySQLSafeDivide => {
17782 use crate::expressions::{BinaryOp, Cast};
17783 if let Expression::Div(op) = e {
17784 let left = op.left;
17785 let right = op.right;
17786 // For SQLite: CAST left as REAL but NO NULLIF wrapping
17787 if matches!(target, DialectType::SQLite) {
17788 let new_left = Expression::Cast(Box::new(Cast {
17789 this: left,
17790 to: DataType::Float {
17791 precision: None,
17792 scale: None,
17793 real_spelling: true,
17794 },
17795 trailing_comments: Vec::new(),
17796 double_colon_syntax: false,
17797 format: None,
17798 default: None,
17799 }));
17800 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
17801 }
17802 // Wrap right in NULLIF(right, 0)
17803 let nullif_right = Expression::Function(Box::new(Function::new(
17804 "NULLIF".to_string(),
17805 vec![right, Expression::number(0)],
17806 )));
17807 // For some dialects, also CAST the left side
17808 let new_left = match target {
17809 DialectType::PostgreSQL
17810 | DialectType::Redshift
17811 | DialectType::Teradata
17812 | DialectType::Materialize
17813 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
17814 this: left,
17815 to: DataType::Custom {
17816 name: "DOUBLE PRECISION".to_string(),
17817 },
17818 trailing_comments: Vec::new(),
17819 double_colon_syntax: false,
17820 format: None,
17821 default: None,
17822 })),
17823 DialectType::Drill
17824 | DialectType::Trino
17825 | DialectType::Presto
17826 | DialectType::Athena => Expression::Cast(Box::new(Cast {
17827 this: left,
17828 to: DataType::Double {
17829 precision: None,
17830 scale: None,
17831 },
17832 trailing_comments: Vec::new(),
17833 double_colon_syntax: false,
17834 format: None,
17835 default: None,
17836 })),
17837 DialectType::TSQL => Expression::Cast(Box::new(Cast {
17838 this: left,
17839 to: DataType::Float {
17840 precision: None,
17841 scale: None,
17842 real_spelling: false,
17843 },
17844 trailing_comments: Vec::new(),
17845 double_colon_syntax: false,
17846 format: None,
17847 default: None,
17848 })),
17849 _ => left,
17850 };
17851 Ok(Expression::Div(Box::new(BinaryOp::new(
17852 new_left,
17853 nullif_right,
17854 ))))
17855 } else {
17856 Ok(e)
17857 }
17858 }
17859 Action::AlterTableRenameStripSchema => {
17860 if let Expression::AlterTable(mut at) = e {
17861 if let Some(crate::expressions::AlterTableAction::RenameTable(
17862 ref mut new_tbl,
17863 )) = at.actions.first_mut()
17864 {
17865 new_tbl.schema = None;
17866 new_tbl.catalog = None;
17867 }
17868 Ok(Expression::AlterTable(at))
17869 } else {
17870 Ok(e)
17871 }
17872 }
17873 Action::NullsOrdering => {
17874 // Fill in the source dialect's implied null ordering default.
17875 // This makes implicit null ordering explicit so the target generator
17876 // can correctly strip or keep it.
17877 //
17878 // Dialect null ordering categories:
17879 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
17880 // ASC -> NULLS LAST, DESC -> NULLS FIRST
17881 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
17882 // ASC -> NULLS FIRST, DESC -> NULLS LAST
17883 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
17884 // NULLS LAST always (both ASC and DESC)
17885 if let Expression::Ordered(mut o) = e {
17886 let is_asc = !o.desc;
17887
17888 let is_source_nulls_large = matches!(
17889 source,
17890 DialectType::Oracle
17891 | DialectType::PostgreSQL
17892 | DialectType::Redshift
17893 | DialectType::Snowflake
17894 );
17895 let is_source_nulls_last = matches!(
17896 source,
17897 DialectType::DuckDB
17898 | DialectType::Presto
17899 | DialectType::Trino
17900 | DialectType::Dremio
17901 | DialectType::Athena
17902 | DialectType::ClickHouse
17903 | DialectType::Drill
17904 | DialectType::Exasol
17905 | DialectType::DataFusion
17906 );
17907
17908 // Determine target category to check if default matches
17909 let is_target_nulls_large = matches!(
17910 target,
17911 DialectType::Oracle
17912 | DialectType::PostgreSQL
17913 | DialectType::Redshift
17914 | DialectType::Snowflake
17915 );
17916 let is_target_nulls_last = matches!(
17917 target,
17918 DialectType::DuckDB
17919 | DialectType::Presto
17920 | DialectType::Trino
17921 | DialectType::Dremio
17922 | DialectType::Athena
17923 | DialectType::ClickHouse
17924 | DialectType::Drill
17925 | DialectType::Exasol
17926 | DialectType::DataFusion
17927 );
17928
17929 // Compute the implied nulls_first for source
17930 let source_nulls_first = if is_source_nulls_large {
17931 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
17932 } else if is_source_nulls_last {
17933 false // NULLS LAST always
17934 } else {
17935 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
17936 };
17937
17938 // Compute the target's default
17939 let target_nulls_first = if is_target_nulls_large {
17940 !is_asc
17941 } else if is_target_nulls_last {
17942 false
17943 } else {
17944 is_asc
17945 };
17946
17947 // Only add explicit nulls ordering if source and target defaults differ
17948 if source_nulls_first != target_nulls_first {
17949 o.nulls_first = Some(source_nulls_first);
17950 }
17951 // If they match, leave nulls_first as None so the generator won't output it
17952
17953 Ok(Expression::Ordered(o))
17954 } else {
17955 Ok(e)
17956 }
17957 }
17958 Action::StringAggConvert => {
17959 match e {
17960 Expression::WithinGroup(wg) => {
17961 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
17962 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
17963 let (x_opt, sep_opt, distinct) = match wg.this {
17964 Expression::AggregateFunction(ref af)
17965 if af.name.eq_ignore_ascii_case("STRING_AGG")
17966 && af.args.len() >= 2 =>
17967 {
17968 (
17969 Some(af.args[0].clone()),
17970 Some(af.args[1].clone()),
17971 af.distinct,
17972 )
17973 }
17974 Expression::Function(ref f)
17975 if f.name.eq_ignore_ascii_case("STRING_AGG")
17976 && f.args.len() >= 2 =>
17977 {
17978 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
17979 }
17980 Expression::StringAgg(ref sa) => {
17981 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
17982 }
17983 _ => (None, None, false),
17984 };
17985 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
17986 let order_by = wg.order_by;
17987
17988 match target {
17989 DialectType::TSQL | DialectType::Fabric => {
17990 // Keep as WithinGroup(StringAgg) for TSQL
17991 Ok(Expression::WithinGroup(Box::new(
17992 crate::expressions::WithinGroup {
17993 this: Expression::StringAgg(Box::new(
17994 crate::expressions::StringAggFunc {
17995 this: x,
17996 separator: Some(sep),
17997 order_by: None, // order_by goes in WithinGroup, not StringAgg
17998 distinct,
17999 filter: None,
18000 limit: None,
18001 },
18002 )),
18003 order_by,
18004 },
18005 )))
18006 }
18007 DialectType::MySQL
18008 | DialectType::SingleStore
18009 | DialectType::Doris
18010 | DialectType::StarRocks => {
18011 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18012 Ok(Expression::GroupConcat(Box::new(
18013 crate::expressions::GroupConcatFunc {
18014 this: x,
18015 separator: Some(sep),
18016 order_by: Some(order_by),
18017 distinct,
18018 filter: None,
18019 },
18020 )))
18021 }
18022 DialectType::SQLite => {
18023 // GROUP_CONCAT(x, sep) - no ORDER BY support
18024 Ok(Expression::GroupConcat(Box::new(
18025 crate::expressions::GroupConcatFunc {
18026 this: x,
18027 separator: Some(sep),
18028 order_by: None,
18029 distinct,
18030 filter: None,
18031 },
18032 )))
18033 }
18034 DialectType::PostgreSQL | DialectType::Redshift => {
18035 // STRING_AGG(x, sep ORDER BY z)
18036 Ok(Expression::StringAgg(Box::new(
18037 crate::expressions::StringAggFunc {
18038 this: x,
18039 separator: Some(sep),
18040 order_by: Some(order_by),
18041 distinct,
18042 filter: None,
18043 limit: None,
18044 },
18045 )))
18046 }
18047 _ => {
18048 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18049 Ok(Expression::StringAgg(Box::new(
18050 crate::expressions::StringAggFunc {
18051 this: x,
18052 separator: Some(sep),
18053 order_by: Some(order_by),
18054 distinct,
18055 filter: None,
18056 limit: None,
18057 },
18058 )))
18059 }
18060 }
18061 } else {
18062 Ok(Expression::WithinGroup(wg))
18063 }
18064 }
18065 Expression::StringAgg(sa) => {
18066 match target {
18067 DialectType::MySQL
18068 | DialectType::SingleStore
18069 | DialectType::Doris
18070 | DialectType::StarRocks => {
18071 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18072 Ok(Expression::GroupConcat(Box::new(
18073 crate::expressions::GroupConcatFunc {
18074 this: sa.this,
18075 separator: sa.separator,
18076 order_by: sa.order_by,
18077 distinct: sa.distinct,
18078 filter: sa.filter,
18079 },
18080 )))
18081 }
18082 DialectType::SQLite => {
18083 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18084 Ok(Expression::GroupConcat(Box::new(
18085 crate::expressions::GroupConcatFunc {
18086 this: sa.this,
18087 separator: sa.separator,
18088 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18089 distinct: sa.distinct,
18090 filter: sa.filter,
18091 },
18092 )))
18093 }
18094 DialectType::Spark | DialectType::Databricks => {
18095 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18096 Ok(Expression::ListAgg(Box::new(
18097 crate::expressions::ListAggFunc {
18098 this: sa.this,
18099 separator: sa.separator,
18100 on_overflow: None,
18101 order_by: sa.order_by,
18102 distinct: sa.distinct,
18103 filter: None,
18104 },
18105 )))
18106 }
18107 _ => Ok(Expression::StringAgg(sa)),
18108 }
18109 }
18110 _ => Ok(e),
18111 }
18112 }
18113 Action::GroupConcatConvert => {
18114 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18115 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18116 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18117 if let Expression::Function(ref f) = expr {
18118 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18119 let mut result = f.args[0].clone();
18120 for arg in &f.args[1..] {
18121 result = Expression::Concat(Box::new(BinaryOp {
18122 left: result,
18123 right: arg.clone(),
18124 left_comments: vec![],
18125 operator_comments: vec![],
18126 trailing_comments: vec![],
18127 }));
18128 }
18129 return result;
18130 }
18131 }
18132 expr
18133 }
18134 fn expand_concat_to_plus(expr: Expression) -> Expression {
18135 if let Expression::Function(ref f) = expr {
18136 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18137 let mut result = f.args[0].clone();
18138 for arg in &f.args[1..] {
18139 result = Expression::Add(Box::new(BinaryOp {
18140 left: result,
18141 right: arg.clone(),
18142 left_comments: vec![],
18143 operator_comments: vec![],
18144 trailing_comments: vec![],
18145 }));
18146 }
18147 return result;
18148 }
18149 }
18150 expr
18151 }
18152 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18153 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18154 if let Expression::Function(ref f) = expr {
18155 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18156 let new_args: Vec<Expression> = f
18157 .args
18158 .iter()
18159 .map(|arg| {
18160 Expression::Cast(Box::new(crate::expressions::Cast {
18161 this: arg.clone(),
18162 to: crate::expressions::DataType::VarChar {
18163 length: None,
18164 parenthesized_length: false,
18165 },
18166 trailing_comments: Vec::new(),
18167 double_colon_syntax: false,
18168 format: None,
18169 default: None,
18170 }))
18171 })
18172 .collect();
18173 return Expression::Function(Box::new(
18174 crate::expressions::Function::new(
18175 "CONCAT".to_string(),
18176 new_args,
18177 ),
18178 ));
18179 }
18180 }
18181 expr
18182 }
18183 if let Expression::GroupConcat(gc) = e {
18184 match target {
18185 DialectType::Presto => {
18186 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18187 let sep = gc.separator.unwrap_or(Expression::string(","));
18188 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18189 let this = wrap_concat_args_in_varchar_cast(gc.this);
18190 let array_agg =
18191 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18192 this,
18193 distinct: gc.distinct,
18194 filter: gc.filter,
18195 order_by: gc.order_by.unwrap_or_default(),
18196 name: None,
18197 ignore_nulls: None,
18198 having_max: None,
18199 limit: None,
18200 }));
18201 Ok(Expression::ArrayJoin(Box::new(
18202 crate::expressions::ArrayJoinFunc {
18203 this: array_agg,
18204 separator: sep,
18205 null_replacement: None,
18206 },
18207 )))
18208 }
18209 DialectType::Trino => {
18210 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18211 let sep = gc.separator.unwrap_or(Expression::string(","));
18212 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18213 let this = wrap_concat_args_in_varchar_cast(gc.this);
18214 Ok(Expression::ListAgg(Box::new(
18215 crate::expressions::ListAggFunc {
18216 this,
18217 separator: Some(sep),
18218 on_overflow: None,
18219 order_by: gc.order_by,
18220 distinct: gc.distinct,
18221 filter: gc.filter,
18222 },
18223 )))
18224 }
18225 DialectType::PostgreSQL
18226 | DialectType::Redshift
18227 | DialectType::Snowflake
18228 | DialectType::DuckDB
18229 | DialectType::Hive
18230 | DialectType::ClickHouse => {
18231 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18232 let sep = gc.separator.unwrap_or(Expression::string(","));
18233 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18234 let this = expand_concat_to_dpipe(gc.this);
18235 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18236 let order_by = if target == DialectType::PostgreSQL {
18237 gc.order_by.map(|ords| {
18238 ords.into_iter()
18239 .map(|mut o| {
18240 if o.nulls_first.is_none() {
18241 if o.desc {
18242 o.nulls_first = Some(false);
18243 // NULLS LAST
18244 } else {
18245 o.nulls_first = Some(true);
18246 // NULLS FIRST
18247 }
18248 }
18249 o
18250 })
18251 .collect()
18252 })
18253 } else {
18254 gc.order_by
18255 };
18256 Ok(Expression::StringAgg(Box::new(
18257 crate::expressions::StringAggFunc {
18258 this,
18259 separator: Some(sep),
18260 order_by,
18261 distinct: gc.distinct,
18262 filter: gc.filter,
18263 limit: None,
18264 },
18265 )))
18266 }
18267 DialectType::TSQL => {
18268 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18269 // TSQL doesn't support DISTINCT in STRING_AGG
18270 let sep = gc.separator.unwrap_or(Expression::string(","));
18271 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18272 let this = expand_concat_to_plus(gc.this);
18273 Ok(Expression::StringAgg(Box::new(
18274 crate::expressions::StringAggFunc {
18275 this,
18276 separator: Some(sep),
18277 order_by: gc.order_by,
18278 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18279 filter: gc.filter,
18280 limit: None,
18281 },
18282 )))
18283 }
18284 DialectType::SQLite => {
18285 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18286 // SQLite GROUP_CONCAT doesn't support ORDER BY
18287 // Expand CONCAT(a,b,c) -> a || b || c
18288 let this = expand_concat_to_dpipe(gc.this);
18289 Ok(Expression::GroupConcat(Box::new(
18290 crate::expressions::GroupConcatFunc {
18291 this,
18292 separator: gc.separator,
18293 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18294 distinct: gc.distinct,
18295 filter: gc.filter,
18296 },
18297 )))
18298 }
18299 DialectType::Spark | DialectType::Databricks => {
18300 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18301 let sep = gc.separator.unwrap_or(Expression::string(","));
18302 Ok(Expression::ListAgg(Box::new(
18303 crate::expressions::ListAggFunc {
18304 this: gc.this,
18305 separator: Some(sep),
18306 on_overflow: None,
18307 order_by: gc.order_by,
18308 distinct: gc.distinct,
18309 filter: None,
18310 },
18311 )))
18312 }
18313 DialectType::MySQL
18314 | DialectType::SingleStore
18315 | DialectType::StarRocks => {
18316 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18317 if gc.separator.is_none() {
18318 let mut gc = gc;
18319 gc.separator = Some(Expression::string(","));
18320 Ok(Expression::GroupConcat(gc))
18321 } else {
18322 Ok(Expression::GroupConcat(gc))
18323 }
18324 }
18325 _ => Ok(Expression::GroupConcat(gc)),
18326 }
18327 } else {
18328 Ok(e)
18329 }
18330 }
18331 Action::TempTableHash => {
18332 match e {
18333 Expression::CreateTable(mut ct) => {
18334 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18335 let name = &ct.name.name.name;
18336 if name.starts_with('#') {
18337 ct.name.name.name = name.trim_start_matches('#').to_string();
18338 }
18339 // Set temporary flag
18340 ct.temporary = true;
18341 Ok(Expression::CreateTable(ct))
18342 }
18343 Expression::Table(mut tr) => {
18344 // Strip # from table references
18345 let name = &tr.name.name;
18346 if name.starts_with('#') {
18347 tr.name.name = name.trim_start_matches('#').to_string();
18348 }
18349 Ok(Expression::Table(tr))
18350 }
18351 Expression::DropTable(mut dt) => {
18352 // Strip # from DROP TABLE names
18353 for table_ref in &mut dt.names {
18354 if table_ref.name.name.starts_with('#') {
18355 table_ref.name.name =
18356 table_ref.name.name.trim_start_matches('#').to_string();
18357 }
18358 }
18359 Ok(Expression::DropTable(dt))
18360 }
18361 _ => Ok(e),
18362 }
18363 }
18364 Action::NvlClearOriginal => {
18365 if let Expression::Nvl(mut f) = e {
18366 f.original_name = None;
18367 Ok(Expression::Nvl(f))
18368 } else {
18369 Ok(e)
18370 }
18371 }
18372 Action::HiveCastToTryCast => {
18373 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18374 if let Expression::Cast(mut c) = e {
18375 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18376 // (Spark's TIMESTAMP is always timezone-aware)
18377 if matches!(target, DialectType::DuckDB)
18378 && matches!(source, DialectType::Spark | DialectType::Databricks)
18379 && matches!(
18380 c.to,
18381 DataType::Timestamp {
18382 timezone: false,
18383 ..
18384 }
18385 )
18386 {
18387 c.to = DataType::Custom {
18388 name: "TIMESTAMPTZ".to_string(),
18389 };
18390 }
18391 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18392 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18393 if matches!(target, DialectType::Databricks | DialectType::Spark)
18394 && matches!(
18395 source,
18396 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18397 )
18398 && Self::has_varchar_char_type(&c.to)
18399 {
18400 c.to = Self::normalize_varchar_to_string(c.to);
18401 }
18402 Ok(Expression::TryCast(c))
18403 } else {
18404 Ok(e)
18405 }
18406 }
18407 Action::XorExpand => {
18408 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18409 // Snowflake: use BOOLXOR(a, b) instead
18410 if let Expression::Xor(xor) = e {
18411 // Collect all XOR operands
18412 let mut operands = Vec::new();
18413 if let Some(this) = xor.this {
18414 operands.push(*this);
18415 }
18416 if let Some(expr) = xor.expression {
18417 operands.push(*expr);
18418 }
18419 operands.extend(xor.expressions);
18420
18421 // Snowflake: use BOOLXOR(a, b)
18422 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18423 let a = operands.remove(0);
18424 let b = operands.remove(0);
18425 return Ok(Expression::Function(Box::new(Function::new(
18426 "BOOLXOR".to_string(),
18427 vec![a, b],
18428 ))));
18429 }
18430
18431 // Helper to build (a AND NOT b) OR (NOT a AND b)
18432 let make_xor = |a: Expression, b: Expression| -> Expression {
18433 let not_b = Expression::Not(Box::new(
18434 crate::expressions::UnaryOp::new(b.clone()),
18435 ));
18436 let not_a = Expression::Not(Box::new(
18437 crate::expressions::UnaryOp::new(a.clone()),
18438 ));
18439 let left_and = Expression::And(Box::new(BinaryOp {
18440 left: a,
18441 right: Expression::Paren(Box::new(Paren {
18442 this: not_b,
18443 trailing_comments: Vec::new(),
18444 })),
18445 left_comments: Vec::new(),
18446 operator_comments: Vec::new(),
18447 trailing_comments: Vec::new(),
18448 }));
18449 let right_and = Expression::And(Box::new(BinaryOp {
18450 left: Expression::Paren(Box::new(Paren {
18451 this: not_a,
18452 trailing_comments: Vec::new(),
18453 })),
18454 right: b,
18455 left_comments: Vec::new(),
18456 operator_comments: Vec::new(),
18457 trailing_comments: Vec::new(),
18458 }));
18459 Expression::Or(Box::new(BinaryOp {
18460 left: Expression::Paren(Box::new(Paren {
18461 this: left_and,
18462 trailing_comments: Vec::new(),
18463 })),
18464 right: Expression::Paren(Box::new(Paren {
18465 this: right_and,
18466 trailing_comments: Vec::new(),
18467 })),
18468 left_comments: Vec::new(),
18469 operator_comments: Vec::new(),
18470 trailing_comments: Vec::new(),
18471 }))
18472 };
18473
18474 if operands.len() >= 2 {
18475 let mut result = make_xor(operands.remove(0), operands.remove(0));
18476 for operand in operands {
18477 result = make_xor(result, operand);
18478 }
18479 Ok(result)
18480 } else if operands.len() == 1 {
18481 Ok(operands.remove(0))
18482 } else {
18483 // No operands - return FALSE (shouldn't happen)
18484 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18485 value: false,
18486 }))
18487 }
18488 } else {
18489 Ok(e)
18490 }
18491 }
18492 Action::DatePartUnquote => {
18493 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18494 // Convert the quoted string first arg to a bare Column/Identifier
18495 if let Expression::Function(mut f) = e {
18496 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18497 f.args.first()
18498 {
18499 let bare_name = s.to_lowercase();
18500 f.args[0] = Expression::Column(crate::expressions::Column {
18501 name: Identifier::new(bare_name),
18502 table: None,
18503 join_mark: false,
18504 trailing_comments: Vec::new(),
18505 });
18506 }
18507 Ok(Expression::Function(f))
18508 } else {
18509 Ok(e)
18510 }
18511 }
18512 Action::ArrayLengthConvert => {
18513 // Extract the argument from the expression
18514 let arg = match e {
18515 Expression::Cardinality(ref f) => f.this.clone(),
18516 Expression::ArrayLength(ref f) => f.this.clone(),
18517 Expression::ArraySize(ref f) => f.this.clone(),
18518 _ => return Ok(e),
18519 };
18520 match target {
18521 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18522 Ok(Expression::Function(Box::new(Function::new(
18523 "SIZE".to_string(),
18524 vec![arg],
18525 ))))
18526 }
18527 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18528 Ok(Expression::Cardinality(Box::new(
18529 crate::expressions::UnaryFunc::new(arg),
18530 )))
18531 }
18532 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18533 crate::expressions::UnaryFunc::new(arg),
18534 ))),
18535 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18536 crate::expressions::UnaryFunc::new(arg),
18537 ))),
18538 DialectType::PostgreSQL | DialectType::Redshift => {
18539 // PostgreSQL ARRAY_LENGTH requires dimension arg
18540 Ok(Expression::Function(Box::new(Function::new(
18541 "ARRAY_LENGTH".to_string(),
18542 vec![arg, Expression::number(1)],
18543 ))))
18544 }
18545 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18546 crate::expressions::UnaryFunc::new(arg),
18547 ))),
18548 _ => Ok(e), // Keep original
18549 }
18550 }
18551
18552 Action::JsonExtractToArrow => {
18553 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18554 if let Expression::JsonExtract(mut f) = e {
18555 f.arrow_syntax = true;
18556 // Transform path: convert bracket notation to dot notation
18557 // SQLite strips wildcards, DuckDB preserves them
18558 if let Expression::Literal(Literal::String(ref s)) = f.path {
18559 let mut transformed = s.clone();
18560 if matches!(target, DialectType::SQLite) {
18561 transformed = Self::strip_json_wildcards(&transformed);
18562 }
18563 transformed = Self::bracket_to_dot_notation(&transformed);
18564 if transformed != *s {
18565 f.path = Expression::string(&transformed);
18566 }
18567 }
18568 Ok(Expression::JsonExtract(f))
18569 } else {
18570 Ok(e)
18571 }
18572 }
18573
18574 Action::JsonExtractToGetJsonObject => {
18575 if let Expression::JsonExtract(f) = e {
18576 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18577 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18578 // Use proper decomposition that handles brackets
18579 let keys: Vec<Expression> =
18580 if let Expression::Literal(Literal::String(ref s)) = f.path {
18581 let parts = Self::decompose_json_path(s);
18582 parts.into_iter().map(|k| Expression::string(&k)).collect()
18583 } else {
18584 vec![f.path]
18585 };
18586 let func_name = if matches!(target, DialectType::Redshift) {
18587 "JSON_EXTRACT_PATH_TEXT"
18588 } else {
18589 "JSON_EXTRACT_PATH"
18590 };
18591 let mut args = vec![f.this];
18592 args.extend(keys);
18593 Ok(Expression::Function(Box::new(Function::new(
18594 func_name.to_string(),
18595 args,
18596 ))))
18597 } else {
18598 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18599 // Convert bracket double quotes to single quotes
18600 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18601 let normalized = Self::bracket_to_single_quotes(s);
18602 if normalized != *s {
18603 Expression::string(&normalized)
18604 } else {
18605 f.path
18606 }
18607 } else {
18608 f.path
18609 };
18610 Ok(Expression::Function(Box::new(Function::new(
18611 "GET_JSON_OBJECT".to_string(),
18612 vec![f.this, path],
18613 ))))
18614 }
18615 } else {
18616 Ok(e)
18617 }
18618 }
18619
18620 Action::JsonExtractScalarToGetJsonObject => {
18621 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18622 if let Expression::JsonExtractScalar(f) = e {
18623 Ok(Expression::Function(Box::new(Function::new(
18624 "GET_JSON_OBJECT".to_string(),
18625 vec![f.this, f.path],
18626 ))))
18627 } else {
18628 Ok(e)
18629 }
18630 }
18631
18632 Action::JsonExtractToTsql => {
18633 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18634 let (this, path) = match e {
18635 Expression::JsonExtract(f) => (f.this, f.path),
18636 Expression::JsonExtractScalar(f) => (f.this, f.path),
18637 _ => return Ok(e),
18638 };
18639 // Transform path: strip wildcards, convert bracket notation to dot notation
18640 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18641 {
18642 let stripped = Self::strip_json_wildcards(s);
18643 let dotted = Self::bracket_to_dot_notation(&stripped);
18644 Expression::string(&dotted)
18645 } else {
18646 path
18647 };
18648 let json_query = Expression::Function(Box::new(Function::new(
18649 "JSON_QUERY".to_string(),
18650 vec![this.clone(), transformed_path.clone()],
18651 )));
18652 let json_value = Expression::Function(Box::new(Function::new(
18653 "JSON_VALUE".to_string(),
18654 vec![this, transformed_path],
18655 )));
18656 Ok(Expression::Function(Box::new(Function::new(
18657 "ISNULL".to_string(),
18658 vec![json_query, json_value],
18659 ))))
18660 }
18661
18662 Action::JsonExtractToClickHouse => {
18663 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18664 let (this, path) = match e {
18665 Expression::JsonExtract(f) => (f.this, f.path),
18666 Expression::JsonExtractScalar(f) => (f.this, f.path),
18667 _ => return Ok(e),
18668 };
18669 let args: Vec<Expression> =
18670 if let Expression::Literal(Literal::String(ref s)) = path {
18671 let parts = Self::decompose_json_path(s);
18672 let mut result = vec![this];
18673 for part in parts {
18674 // ClickHouse uses 1-based integer indices for array access
18675 if let Ok(idx) = part.parse::<i64>() {
18676 result.push(Expression::number(idx + 1));
18677 } else {
18678 result.push(Expression::string(&part));
18679 }
18680 }
18681 result
18682 } else {
18683 vec![this, path]
18684 };
18685 Ok(Expression::Function(Box::new(Function::new(
18686 "JSONExtractString".to_string(),
18687 args,
18688 ))))
18689 }
18690
18691 Action::JsonExtractScalarConvert => {
18692 // JSON_EXTRACT_SCALAR -> target-specific
18693 if let Expression::JsonExtractScalar(f) = e {
18694 match target {
18695 DialectType::PostgreSQL | DialectType::Redshift => {
18696 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
18697 let keys: Vec<Expression> =
18698 if let Expression::Literal(Literal::String(ref s)) = f.path {
18699 let parts = Self::decompose_json_path(s);
18700 parts.into_iter().map(|k| Expression::string(&k)).collect()
18701 } else {
18702 vec![f.path]
18703 };
18704 let mut args = vec![f.this];
18705 args.extend(keys);
18706 Ok(Expression::Function(Box::new(Function::new(
18707 "JSON_EXTRACT_PATH_TEXT".to_string(),
18708 args,
18709 ))))
18710 }
18711 DialectType::Snowflake => {
18712 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
18713 let stripped_path =
18714 if let Expression::Literal(Literal::String(ref s)) = f.path {
18715 let stripped = Self::strip_json_dollar_prefix(s);
18716 Expression::string(&stripped)
18717 } else {
18718 f.path
18719 };
18720 Ok(Expression::Function(Box::new(Function::new(
18721 "JSON_EXTRACT_PATH_TEXT".to_string(),
18722 vec![f.this, stripped_path],
18723 ))))
18724 }
18725 DialectType::SQLite | DialectType::DuckDB => {
18726 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
18727 Ok(Expression::JsonExtractScalar(Box::new(
18728 crate::expressions::JsonExtractFunc {
18729 this: f.this,
18730 path: f.path,
18731 returning: f.returning,
18732 arrow_syntax: true,
18733 hash_arrow_syntax: false,
18734 wrapper_option: None,
18735 quotes_option: None,
18736 on_scalar_string: false,
18737 on_error: None,
18738 },
18739 )))
18740 }
18741 _ => Ok(Expression::JsonExtractScalar(f)),
18742 }
18743 } else {
18744 Ok(e)
18745 }
18746 }
18747
18748 Action::JsonPathNormalize => {
18749 // Normalize JSON path format for BigQuery, MySQL, etc.
18750 if let Expression::JsonExtract(mut f) = e {
18751 if let Expression::Literal(Literal::String(ref s)) = f.path {
18752 let mut normalized = s.clone();
18753 // Convert bracket notation and handle wildcards per dialect
18754 match target {
18755 DialectType::BigQuery => {
18756 // BigQuery strips wildcards and uses single quotes in brackets
18757 normalized = Self::strip_json_wildcards(&normalized);
18758 normalized = Self::bracket_to_single_quotes(&normalized);
18759 }
18760 DialectType::MySQL => {
18761 // MySQL preserves wildcards, converts brackets to dot notation
18762 normalized = Self::bracket_to_dot_notation(&normalized);
18763 }
18764 _ => {}
18765 }
18766 if normalized != *s {
18767 f.path = Expression::string(&normalized);
18768 }
18769 }
18770 Ok(Expression::JsonExtract(f))
18771 } else {
18772 Ok(e)
18773 }
18774 }
18775
18776 Action::JsonQueryValueConvert => {
18777 // JsonQuery/JsonValue -> target-specific
18778 let (f, is_query) = match e {
18779 Expression::JsonQuery(f) => (f, true),
18780 Expression::JsonValue(f) => (f, false),
18781 _ => return Ok(e),
18782 };
18783 match target {
18784 DialectType::TSQL | DialectType::Fabric => {
18785 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
18786 let json_query = Expression::Function(Box::new(Function::new(
18787 "JSON_QUERY".to_string(),
18788 vec![f.this.clone(), f.path.clone()],
18789 )));
18790 let json_value = Expression::Function(Box::new(Function::new(
18791 "JSON_VALUE".to_string(),
18792 vec![f.this, f.path],
18793 )));
18794 Ok(Expression::Function(Box::new(Function::new(
18795 "ISNULL".to_string(),
18796 vec![json_query, json_value],
18797 ))))
18798 }
18799 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18800 Ok(Expression::Function(Box::new(Function::new(
18801 "GET_JSON_OBJECT".to_string(),
18802 vec![f.this, f.path],
18803 ))))
18804 }
18805 DialectType::PostgreSQL | DialectType::Redshift => {
18806 Ok(Expression::Function(Box::new(Function::new(
18807 "JSON_EXTRACT_PATH_TEXT".to_string(),
18808 vec![f.this, f.path],
18809 ))))
18810 }
18811 DialectType::DuckDB | DialectType::SQLite => {
18812 // json -> path arrow syntax
18813 Ok(Expression::JsonExtract(Box::new(
18814 crate::expressions::JsonExtractFunc {
18815 this: f.this,
18816 path: f.path,
18817 returning: f.returning,
18818 arrow_syntax: true,
18819 hash_arrow_syntax: false,
18820 wrapper_option: f.wrapper_option,
18821 quotes_option: f.quotes_option,
18822 on_scalar_string: f.on_scalar_string,
18823 on_error: f.on_error,
18824 },
18825 )))
18826 }
18827 DialectType::Snowflake => {
18828 // GET_PATH(PARSE_JSON(json), 'path')
18829 // Strip $. prefix from path
18830 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
18831 let json_expr = match &f.this {
18832 Expression::Function(ref inner_f)
18833 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
18834 {
18835 f.this
18836 }
18837 Expression::ParseJson(_) => {
18838 // Already a ParseJson expression, which generates as PARSE_JSON(...)
18839 f.this
18840 }
18841 _ => Expression::Function(Box::new(Function::new(
18842 "PARSE_JSON".to_string(),
18843 vec![f.this],
18844 ))),
18845 };
18846 let path_str = match &f.path {
18847 Expression::Literal(Literal::String(s)) => {
18848 let stripped = s.strip_prefix("$.").unwrap_or(s);
18849 Expression::Literal(Literal::String(stripped.to_string()))
18850 }
18851 other => other.clone(),
18852 };
18853 Ok(Expression::Function(Box::new(Function::new(
18854 "GET_PATH".to_string(),
18855 vec![json_expr, path_str],
18856 ))))
18857 }
18858 _ => {
18859 // Default: keep as JSON_QUERY/JSON_VALUE function
18860 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
18861 Ok(Expression::Function(Box::new(Function::new(
18862 func_name.to_string(),
18863 vec![f.this, f.path],
18864 ))))
18865 }
18866 }
18867 }
18868
18869 Action::JsonLiteralToJsonParse => {
18870 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
18871 if let Expression::Cast(c) = e {
18872 let func_name = if matches!(target, DialectType::Snowflake) {
18873 "PARSE_JSON"
18874 } else {
18875 "JSON_PARSE"
18876 };
18877 Ok(Expression::Function(Box::new(Function::new(
18878 func_name.to_string(),
18879 vec![c.this],
18880 ))))
18881 } else {
18882 Ok(e)
18883 }
18884 }
18885
18886 Action::AtTimeZoneConvert => {
18887 // AT TIME ZONE -> target-specific conversion
18888 if let Expression::AtTimeZone(atz) = e {
18889 match target {
18890 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18891 Ok(Expression::Function(Box::new(Function::new(
18892 "AT_TIMEZONE".to_string(),
18893 vec![atz.this, atz.zone],
18894 ))))
18895 }
18896 DialectType::Spark | DialectType::Databricks => {
18897 Ok(Expression::Function(Box::new(Function::new(
18898 "FROM_UTC_TIMESTAMP".to_string(),
18899 vec![atz.this, atz.zone],
18900 ))))
18901 }
18902 DialectType::Snowflake => {
18903 // CONVERT_TIMEZONE('zone', expr)
18904 Ok(Expression::Function(Box::new(Function::new(
18905 "CONVERT_TIMEZONE".to_string(),
18906 vec![atz.zone, atz.this],
18907 ))))
18908 }
18909 DialectType::BigQuery => {
18910 // TIMESTAMP(DATETIME(expr, 'zone'))
18911 let datetime_call = Expression::Function(Box::new(Function::new(
18912 "DATETIME".to_string(),
18913 vec![atz.this, atz.zone],
18914 )));
18915 Ok(Expression::Function(Box::new(Function::new(
18916 "TIMESTAMP".to_string(),
18917 vec![datetime_call],
18918 ))))
18919 }
18920 _ => Ok(Expression::Function(Box::new(Function::new(
18921 "AT_TIMEZONE".to_string(),
18922 vec![atz.this, atz.zone],
18923 )))),
18924 }
18925 } else {
18926 Ok(e)
18927 }
18928 }
18929
18930 Action::DayOfWeekConvert => {
18931 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
18932 if let Expression::DayOfWeek(f) = e {
18933 match target {
18934 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18935 Function::new("ISODOW".to_string(), vec![f.this]),
18936 ))),
18937 DialectType::Spark | DialectType::Databricks => {
18938 // ((DAYOFWEEK(x) % 7) + 1)
18939 let dayofweek = Expression::Function(Box::new(Function::new(
18940 "DAYOFWEEK".to_string(),
18941 vec![f.this],
18942 )));
18943 let modulo = Expression::Mod(Box::new(BinaryOp {
18944 left: dayofweek,
18945 right: Expression::number(7),
18946 left_comments: Vec::new(),
18947 operator_comments: Vec::new(),
18948 trailing_comments: Vec::new(),
18949 }));
18950 let paren_mod = Expression::Paren(Box::new(Paren {
18951 this: modulo,
18952 trailing_comments: Vec::new(),
18953 }));
18954 let add_one = Expression::Add(Box::new(BinaryOp {
18955 left: paren_mod,
18956 right: Expression::number(1),
18957 left_comments: Vec::new(),
18958 operator_comments: Vec::new(),
18959 trailing_comments: Vec::new(),
18960 }));
18961 Ok(Expression::Paren(Box::new(Paren {
18962 this: add_one,
18963 trailing_comments: Vec::new(),
18964 })))
18965 }
18966 _ => Ok(Expression::DayOfWeek(f)),
18967 }
18968 } else {
18969 Ok(e)
18970 }
18971 }
18972
18973 Action::MaxByMinByConvert => {
18974 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
18975 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
18976 // Handle both Expression::Function and Expression::AggregateFunction
18977 let (is_max, args) = match &e {
18978 Expression::Function(f) => {
18979 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
18980 }
18981 Expression::AggregateFunction(af) => {
18982 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
18983 }
18984 _ => return Ok(e),
18985 };
18986 match target {
18987 DialectType::ClickHouse => {
18988 let name = if is_max { "argMax" } else { "argMin" };
18989 let mut args = args;
18990 args.truncate(2);
18991 Ok(Expression::Function(Box::new(Function::new(
18992 name.to_string(),
18993 args,
18994 ))))
18995 }
18996 DialectType::DuckDB => {
18997 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
18998 Ok(Expression::Function(Box::new(Function::new(
18999 name.to_string(),
19000 args,
19001 ))))
19002 }
19003 DialectType::Spark | DialectType::Databricks => {
19004 let mut args = args;
19005 args.truncate(2);
19006 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19007 Ok(Expression::Function(Box::new(Function::new(
19008 name.to_string(),
19009 args,
19010 ))))
19011 }
19012 _ => Ok(e),
19013 }
19014 }
19015
19016 Action::ElementAtConvert => {
19017 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19018 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19019 (bf.this, bf.expression)
19020 } else if let Expression::Function(ref f) = e {
19021 if f.args.len() >= 2 {
19022 if let Expression::Function(f) = e {
19023 let mut args = f.args;
19024 let arr = args.remove(0);
19025 let idx = args.remove(0);
19026 (arr, idx)
19027 } else {
19028 unreachable!("outer condition already matched Expression::Function")
19029 }
19030 } else {
19031 return Ok(e);
19032 }
19033 } else {
19034 return Ok(e);
19035 };
19036 match target {
19037 DialectType::PostgreSQL => {
19038 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19039 let arr_expr = Expression::Paren(Box::new(Paren {
19040 this: arr,
19041 trailing_comments: vec![],
19042 }));
19043 Ok(Expression::Subscript(Box::new(
19044 crate::expressions::Subscript {
19045 this: arr_expr,
19046 index: idx,
19047 },
19048 )))
19049 }
19050 DialectType::BigQuery => {
19051 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19052 let arr_expr = match arr {
19053 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19054 crate::expressions::ArrayConstructor {
19055 expressions: af.expressions,
19056 bracket_notation: true,
19057 use_list_keyword: false,
19058 },
19059 )),
19060 other => other,
19061 };
19062 let safe_ordinal = Expression::Function(Box::new(Function::new(
19063 "SAFE_ORDINAL".to_string(),
19064 vec![idx],
19065 )));
19066 Ok(Expression::Subscript(Box::new(
19067 crate::expressions::Subscript {
19068 this: arr_expr,
19069 index: safe_ordinal,
19070 },
19071 )))
19072 }
19073 _ => Ok(Expression::Function(Box::new(Function::new(
19074 "ELEMENT_AT".to_string(),
19075 vec![arr, idx],
19076 )))),
19077 }
19078 }
19079
19080 Action::CurrentUserParens => {
19081 // CURRENT_USER -> CURRENT_USER() for Snowflake
19082 Ok(Expression::Function(Box::new(Function::new(
19083 "CURRENT_USER".to_string(),
19084 vec![],
19085 ))))
19086 }
19087
19088 Action::ArrayAggToCollectList => {
19089 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19090 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19091 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19092 match e {
19093 Expression::AggregateFunction(mut af) => {
19094 let is_simple =
19095 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19096 let args = if af.args.is_empty() {
19097 vec![]
19098 } else {
19099 vec![af.args[0].clone()]
19100 };
19101 af.name = "COLLECT_LIST".to_string();
19102 af.args = args;
19103 if is_simple {
19104 af.order_by = Vec::new();
19105 }
19106 Ok(Expression::AggregateFunction(af))
19107 }
19108 Expression::ArrayAgg(agg) => {
19109 let is_simple =
19110 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19111 Ok(Expression::AggregateFunction(Box::new(
19112 crate::expressions::AggregateFunction {
19113 name: "COLLECT_LIST".to_string(),
19114 args: vec![agg.this.clone()],
19115 distinct: agg.distinct,
19116 filter: agg.filter.clone(),
19117 order_by: if is_simple {
19118 Vec::new()
19119 } else {
19120 agg.order_by.clone()
19121 },
19122 limit: agg.limit.clone(),
19123 ignore_nulls: agg.ignore_nulls,
19124 },
19125 )))
19126 }
19127 _ => Ok(e),
19128 }
19129 }
19130
19131 Action::ArraySyntaxConvert => {
19132 match e {
19133 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19134 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19135 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19136 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19137 expressions: arr.expressions,
19138 bracket_notation: true,
19139 use_list_keyword: false,
19140 })),
19141 ),
19142 // ARRAY(y) function style -> ArrayFunc for target dialect
19143 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19144 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19145 let bracket = matches!(
19146 target,
19147 DialectType::BigQuery
19148 | DialectType::DuckDB
19149 | DialectType::ClickHouse
19150 | DialectType::StarRocks
19151 );
19152 Ok(Expression::ArrayFunc(Box::new(
19153 crate::expressions::ArrayConstructor {
19154 expressions: f.args,
19155 bracket_notation: bracket,
19156 use_list_keyword: false,
19157 },
19158 )))
19159 }
19160 _ => Ok(e),
19161 }
19162 }
19163
19164 Action::CastToJsonForSpark => {
19165 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19166 if let Expression::Cast(c) = e {
19167 Ok(Expression::Function(Box::new(Function::new(
19168 "TO_JSON".to_string(),
19169 vec![c.this],
19170 ))))
19171 } else {
19172 Ok(e)
19173 }
19174 }
19175
19176 Action::CastJsonToFromJson => {
19177 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19178 if let Expression::Cast(c) = e {
19179 // Extract the string literal from ParseJson
19180 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19181 pj.this
19182 } else {
19183 c.this
19184 };
19185 // Convert the target DataType to Spark's type string format
19186 let type_str = Self::data_type_to_spark_string(&c.to);
19187 Ok(Expression::Function(Box::new(Function::new(
19188 "FROM_JSON".to_string(),
19189 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19190 ))))
19191 } else {
19192 Ok(e)
19193 }
19194 }
19195
19196 Action::ToJsonConvert => {
19197 // TO_JSON(x) -> target-specific conversion
19198 if let Expression::ToJson(f) = e {
19199 let arg = f.this;
19200 match target {
19201 DialectType::Presto | DialectType::Trino => {
19202 // JSON_FORMAT(CAST(x AS JSON))
19203 let cast_json = Expression::Cast(Box::new(Cast {
19204 this: arg,
19205 to: DataType::Custom {
19206 name: "JSON".to_string(),
19207 },
19208 trailing_comments: vec![],
19209 double_colon_syntax: false,
19210 format: None,
19211 default: None,
19212 }));
19213 Ok(Expression::Function(Box::new(Function::new(
19214 "JSON_FORMAT".to_string(),
19215 vec![cast_json],
19216 ))))
19217 }
19218 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19219 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19220 ))),
19221 DialectType::DuckDB => {
19222 // CAST(TO_JSON(x) AS TEXT)
19223 let to_json =
19224 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19225 this: arg,
19226 original_name: None,
19227 }));
19228 Ok(Expression::Cast(Box::new(Cast {
19229 this: to_json,
19230 to: DataType::Text,
19231 trailing_comments: vec![],
19232 double_colon_syntax: false,
19233 format: None,
19234 default: None,
19235 })))
19236 }
19237 _ => Ok(Expression::ToJson(Box::new(
19238 crate::expressions::UnaryFunc {
19239 this: arg,
19240 original_name: None,
19241 },
19242 ))),
19243 }
19244 } else {
19245 Ok(e)
19246 }
19247 }
19248
19249 Action::VarianceToClickHouse => {
19250 if let Expression::Variance(f) = e {
19251 Ok(Expression::Function(Box::new(Function::new(
19252 "varSamp".to_string(),
19253 vec![f.this],
19254 ))))
19255 } else {
19256 Ok(e)
19257 }
19258 }
19259
19260 Action::StddevToClickHouse => {
19261 if let Expression::Stddev(f) = e {
19262 Ok(Expression::Function(Box::new(Function::new(
19263 "stddevSamp".to_string(),
19264 vec![f.this],
19265 ))))
19266 } else {
19267 Ok(e)
19268 }
19269 }
19270
19271 Action::ApproxQuantileConvert => {
19272 if let Expression::ApproxQuantile(aq) = e {
19273 let mut args = vec![*aq.this];
19274 if let Some(q) = aq.quantile {
19275 args.push(*q);
19276 }
19277 Ok(Expression::Function(Box::new(Function::new(
19278 "APPROX_PERCENTILE".to_string(),
19279 args,
19280 ))))
19281 } else {
19282 Ok(e)
19283 }
19284 }
19285
19286 Action::DollarParamConvert => {
19287 if let Expression::Parameter(p) = e {
19288 Ok(Expression::Parameter(Box::new(
19289 crate::expressions::Parameter {
19290 name: p.name,
19291 index: p.index,
19292 style: crate::expressions::ParameterStyle::At,
19293 quoted: p.quoted,
19294 string_quoted: p.string_quoted,
19295 expression: p.expression,
19296 },
19297 )))
19298 } else {
19299 Ok(e)
19300 }
19301 }
19302
19303 Action::EscapeStringNormalize => {
19304 if let Expression::Literal(Literal::EscapeString(s)) = e {
19305 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19306 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19307 s[2..].to_string()
19308 } else {
19309 s
19310 };
19311 let normalized = stripped
19312 .replace('\n', "\\n")
19313 .replace('\r', "\\r")
19314 .replace('\t', "\\t");
19315 match target {
19316 DialectType::BigQuery => {
19317 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19318 // Use Raw for the b'...' part to avoid double-escaping
19319 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19320 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19321 }
19322 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19323 }
19324 } else {
19325 Ok(e)
19326 }
19327 }
19328
19329 Action::StraightJoinCase => {
19330 // straight_join: keep lowercase for DuckDB, quote for MySQL
19331 if let Expression::Column(col) = e {
19332 if col.name.name == "STRAIGHT_JOIN" {
19333 let mut new_col = col;
19334 new_col.name.name = "straight_join".to_string();
19335 if matches!(target, DialectType::MySQL) {
19336 // MySQL: needs quoting since it's a reserved keyword
19337 new_col.name.quoted = true;
19338 }
19339 Ok(Expression::Column(new_col))
19340 } else {
19341 Ok(Expression::Column(col))
19342 }
19343 } else {
19344 Ok(e)
19345 }
19346 }
19347
19348 Action::TablesampleReservoir => {
19349 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19350 if let Expression::TableSample(mut ts) = e {
19351 if let Some(ref mut sample) = ts.sample {
19352 sample.method = crate::expressions::SampleMethod::Reservoir;
19353 sample.explicit_method = true;
19354 }
19355 Ok(Expression::TableSample(ts))
19356 } else {
19357 Ok(e)
19358 }
19359 }
19360
19361 Action::TablesampleSnowflakeStrip => {
19362 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19363 match e {
19364 Expression::TableSample(mut ts) => {
19365 if let Some(ref mut sample) = ts.sample {
19366 sample.suppress_method_output = true;
19367 sample.unit_after_size = false;
19368 sample.is_percent = false;
19369 }
19370 Ok(Expression::TableSample(ts))
19371 }
19372 Expression::Table(mut t) => {
19373 if let Some(ref mut sample) = t.table_sample {
19374 sample.suppress_method_output = true;
19375 sample.unit_after_size = false;
19376 sample.is_percent = false;
19377 }
19378 Ok(Expression::Table(t))
19379 }
19380 _ => Ok(e),
19381 }
19382 }
19383
19384 Action::FirstToAnyValue => {
19385 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19386 if let Expression::First(mut agg) = e {
19387 agg.ignore_nulls = None;
19388 agg.name = Some("ANY_VALUE".to_string());
19389 Ok(Expression::AnyValue(agg))
19390 } else {
19391 Ok(e)
19392 }
19393 }
19394
19395 Action::ArrayIndexConvert => {
19396 // Subscript index: 1-based to 0-based for BigQuery
19397 if let Expression::Subscript(mut sub) = e {
19398 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19399 if let Ok(val) = n.parse::<i64>() {
19400 sub.index =
19401 Expression::Literal(Literal::Number((val - 1).to_string()));
19402 }
19403 }
19404 Ok(Expression::Subscript(sub))
19405 } else {
19406 Ok(e)
19407 }
19408 }
19409
19410 Action::AnyValueIgnoreNulls => {
19411 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19412 if let Expression::AnyValue(mut av) = e {
19413 if av.ignore_nulls.is_none() {
19414 av.ignore_nulls = Some(true);
19415 }
19416 Ok(Expression::AnyValue(av))
19417 } else {
19418 Ok(e)
19419 }
19420 }
19421
19422 Action::BigQueryNullsOrdering => {
19423 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19424 if let Expression::WindowFunction(mut wf) = e {
19425 for o in &mut wf.over.order_by {
19426 o.nulls_first = None;
19427 }
19428 Ok(Expression::WindowFunction(wf))
19429 } else if let Expression::Ordered(mut o) = e {
19430 o.nulls_first = None;
19431 Ok(Expression::Ordered(o))
19432 } else {
19433 Ok(e)
19434 }
19435 }
19436
19437 Action::SnowflakeFloatProtect => {
19438 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19439 // Snowflake's target transform from converting it to DOUBLE.
19440 // Non-Snowflake sources should keep their FLOAT spelling.
19441 if let Expression::DataType(DataType::Float { .. }) = e {
19442 Ok(Expression::DataType(DataType::Custom {
19443 name: "FLOAT".to_string(),
19444 }))
19445 } else {
19446 Ok(e)
19447 }
19448 }
19449
19450 Action::MysqlNullsOrdering => {
19451 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19452 if let Expression::Ordered(mut o) = e {
19453 let nulls_last = o.nulls_first == Some(false);
19454 let desc = o.desc;
19455 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19456 // If requested ordering matches default, just strip NULLS clause
19457 let matches_default = if desc {
19458 // DESC default is NULLS FIRST, so nulls_first=true matches
19459 o.nulls_first == Some(true)
19460 } else {
19461 // ASC default is NULLS LAST, so nulls_first=false matches
19462 nulls_last
19463 };
19464 if matches_default {
19465 o.nulls_first = None;
19466 Ok(Expression::Ordered(o))
19467 } else {
19468 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19469 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19470 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19471 let null_val = if desc { 1 } else { 0 };
19472 let non_null_val = if desc { 0 } else { 1 };
19473 let _case_expr = Expression::Case(Box::new(Case {
19474 operand: None,
19475 whens: vec![(
19476 Expression::IsNull(Box::new(crate::expressions::IsNull {
19477 this: o.this.clone(),
19478 not: false,
19479 postfix_form: false,
19480 })),
19481 Expression::number(null_val),
19482 )],
19483 else_: Some(Expression::number(non_null_val)),
19484 comments: Vec::new(),
19485 }));
19486 o.nulls_first = None;
19487 // Return a tuple of [case_expr, ordered_expr]
19488 // We need to return both as part of the ORDER BY
19489 // But since transform_recursive processes individual expressions,
19490 // we can't easily add extra ORDER BY items here.
19491 // Instead, strip the nulls_first
19492 o.nulls_first = None;
19493 Ok(Expression::Ordered(o))
19494 }
19495 } else {
19496 Ok(e)
19497 }
19498 }
19499
19500 Action::MysqlNullsLastRewrite => {
19501 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19502 // to simulate NULLS LAST for ASC ordering
19503 if let Expression::WindowFunction(mut wf) = e {
19504 let mut new_order_by = Vec::new();
19505 for o in wf.over.order_by {
19506 if !o.desc {
19507 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19508 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19509 let case_expr = Expression::Case(Box::new(Case {
19510 operand: None,
19511 whens: vec![(
19512 Expression::IsNull(Box::new(crate::expressions::IsNull {
19513 this: o.this.clone(),
19514 not: false,
19515 postfix_form: false,
19516 })),
19517 Expression::Literal(Literal::Number("1".to_string())),
19518 )],
19519 else_: Some(Expression::Literal(Literal::Number(
19520 "0".to_string(),
19521 ))),
19522 comments: Vec::new(),
19523 }));
19524 new_order_by.push(crate::expressions::Ordered {
19525 this: case_expr,
19526 desc: false,
19527 nulls_first: None,
19528 explicit_asc: false,
19529 with_fill: None,
19530 });
19531 let mut ordered = o;
19532 ordered.nulls_first = None;
19533 new_order_by.push(ordered);
19534 } else {
19535 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19536 // No change needed
19537 let mut ordered = o;
19538 ordered.nulls_first = None;
19539 new_order_by.push(ordered);
19540 }
19541 }
19542 wf.over.order_by = new_order_by;
19543 Ok(Expression::WindowFunction(wf))
19544 } else {
19545 Ok(e)
19546 }
19547 }
19548
19549 Action::RespectNullsConvert => {
19550 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19551 if let Expression::WindowFunction(mut wf) = e {
19552 match &mut wf.this {
19553 Expression::FirstValue(ref mut vf) => {
19554 if vf.ignore_nulls == Some(false) {
19555 vf.ignore_nulls = None;
19556 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19557 // but that's handled by the generator's NULLS ordering
19558 }
19559 }
19560 Expression::LastValue(ref mut vf) => {
19561 if vf.ignore_nulls == Some(false) {
19562 vf.ignore_nulls = None;
19563 }
19564 }
19565 _ => {}
19566 }
19567 Ok(Expression::WindowFunction(wf))
19568 } else {
19569 Ok(e)
19570 }
19571 }
19572
19573 Action::CreateTableStripComment => {
19574 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19575 if let Expression::CreateTable(mut ct) = e {
19576 for col in &mut ct.columns {
19577 col.comment = None;
19578 col.constraints.retain(|c| {
19579 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19580 });
19581 // Also remove Comment from constraint_order
19582 col.constraint_order.retain(|c| {
19583 !matches!(c, crate::expressions::ConstraintType::Comment)
19584 });
19585 }
19586 // Strip properties (USING, PARTITIONED BY, etc.)
19587 ct.properties.clear();
19588 Ok(Expression::CreateTable(ct))
19589 } else {
19590 Ok(e)
19591 }
19592 }
19593
19594 Action::AlterTableToSpRename => {
19595 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19596 if let Expression::AlterTable(ref at) = e {
19597 if let Some(crate::expressions::AlterTableAction::RenameTable(
19598 ref new_tbl,
19599 )) = at.actions.first()
19600 {
19601 // Build the old table name using TSQL bracket quoting
19602 let old_name = if let Some(ref schema) = at.name.schema {
19603 if at.name.name.quoted || schema.quoted {
19604 format!("[{}].[{}]", schema.name, at.name.name.name)
19605 } else {
19606 format!("{}.{}", schema.name, at.name.name.name)
19607 }
19608 } else {
19609 if at.name.name.quoted {
19610 format!("[{}]", at.name.name.name)
19611 } else {
19612 at.name.name.name.clone()
19613 }
19614 };
19615 let new_name = new_tbl.name.name.clone();
19616 // EXEC sp_rename 'old_name', 'new_name'
19617 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19618 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19619 } else {
19620 Ok(e)
19621 }
19622 } else {
19623 Ok(e)
19624 }
19625 }
19626
19627 Action::SnowflakeIntervalFormat => {
19628 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19629 if let Expression::Interval(mut iv) = e {
19630 if let (
19631 Some(Expression::Literal(Literal::String(ref val))),
19632 Some(ref unit_spec),
19633 ) = (&iv.this, &iv.unit)
19634 {
19635 let unit_str = match unit_spec {
19636 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19637 match unit {
19638 crate::expressions::IntervalUnit::Year => "YEAR",
19639 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19640 crate::expressions::IntervalUnit::Month => "MONTH",
19641 crate::expressions::IntervalUnit::Week => "WEEK",
19642 crate::expressions::IntervalUnit::Day => "DAY",
19643 crate::expressions::IntervalUnit::Hour => "HOUR",
19644 crate::expressions::IntervalUnit::Minute => "MINUTE",
19645 crate::expressions::IntervalUnit::Second => "SECOND",
19646 crate::expressions::IntervalUnit::Millisecond => {
19647 "MILLISECOND"
19648 }
19649 crate::expressions::IntervalUnit::Microsecond => {
19650 "MICROSECOND"
19651 }
19652 crate::expressions::IntervalUnit::Nanosecond => {
19653 "NANOSECOND"
19654 }
19655 }
19656 }
19657 _ => "",
19658 };
19659 if !unit_str.is_empty() {
19660 let combined = format!("{} {}", val, unit_str);
19661 iv.this = Some(Expression::Literal(Literal::String(combined)));
19662 iv.unit = None;
19663 }
19664 }
19665 Ok(Expression::Interval(iv))
19666 } else {
19667 Ok(e)
19668 }
19669 }
19670
19671 Action::ArrayConcatBracketConvert => {
19672 // Expression::Array/ArrayFunc -> target-specific
19673 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
19674 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
19675 match e {
19676 Expression::Array(arr) => {
19677 if matches!(target, DialectType::Redshift) {
19678 Ok(Expression::Function(Box::new(Function::new(
19679 "ARRAY".to_string(),
19680 arr.expressions,
19681 ))))
19682 } else {
19683 Ok(Expression::ArrayFunc(Box::new(
19684 crate::expressions::ArrayConstructor {
19685 expressions: arr.expressions,
19686 bracket_notation: false,
19687 use_list_keyword: false,
19688 },
19689 )))
19690 }
19691 }
19692 Expression::ArrayFunc(arr) => {
19693 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
19694 if matches!(target, DialectType::Redshift) {
19695 Ok(Expression::Function(Box::new(Function::new(
19696 "ARRAY".to_string(),
19697 arr.expressions,
19698 ))))
19699 } else {
19700 Ok(Expression::ArrayFunc(arr))
19701 }
19702 }
19703 _ => Ok(e),
19704 }
19705 }
19706
19707 Action::BitAggFloatCast => {
19708 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
19709 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19710 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19711 let int_type = DataType::Int {
19712 length: None,
19713 integer_spelling: false,
19714 };
19715 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
19716 if let Expression::Cast(c) = agg_this {
19717 match &c.to {
19718 DataType::Float { .. }
19719 | DataType::Double { .. }
19720 | DataType::Custom { .. } => {
19721 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
19722 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
19723 let inner_type = match &c.to {
19724 DataType::Float {
19725 precision, scale, ..
19726 } => DataType::Float {
19727 precision: *precision,
19728 scale: *scale,
19729 real_spelling: true,
19730 },
19731 other => other.clone(),
19732 };
19733 let inner_cast =
19734 Expression::Cast(Box::new(crate::expressions::Cast {
19735 this: c.this.clone(),
19736 to: inner_type,
19737 trailing_comments: Vec::new(),
19738 double_colon_syntax: false,
19739 format: None,
19740 default: None,
19741 }));
19742 let rounded = Expression::Function(Box::new(Function::new(
19743 "ROUND".to_string(),
19744 vec![inner_cast],
19745 )));
19746 Expression::Cast(Box::new(crate::expressions::Cast {
19747 this: rounded,
19748 to: int_dt,
19749 trailing_comments: Vec::new(),
19750 double_colon_syntax: false,
19751 format: None,
19752 default: None,
19753 }))
19754 }
19755 DataType::Decimal { .. } => {
19756 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
19757 Expression::Cast(Box::new(crate::expressions::Cast {
19758 this: Expression::Cast(c),
19759 to: int_dt,
19760 trailing_comments: Vec::new(),
19761 double_colon_syntax: false,
19762 format: None,
19763 default: None,
19764 }))
19765 }
19766 _ => Expression::Cast(c),
19767 }
19768 } else {
19769 agg_this
19770 }
19771 };
19772 match e {
19773 Expression::BitwiseOrAgg(mut f) => {
19774 f.this = wrap_agg(f.this, int_type);
19775 Ok(Expression::BitwiseOrAgg(f))
19776 }
19777 Expression::BitwiseAndAgg(mut f) => {
19778 let int_type = DataType::Int {
19779 length: None,
19780 integer_spelling: false,
19781 };
19782 f.this = wrap_agg(f.this, int_type);
19783 Ok(Expression::BitwiseAndAgg(f))
19784 }
19785 Expression::BitwiseXorAgg(mut f) => {
19786 let int_type = DataType::Int {
19787 length: None,
19788 integer_spelling: false,
19789 };
19790 f.this = wrap_agg(f.this, int_type);
19791 Ok(Expression::BitwiseXorAgg(f))
19792 }
19793 _ => Ok(e),
19794 }
19795 }
19796
19797 Action::BitAggSnowflakeRename => {
19798 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
19799 match e {
19800 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
19801 Function::new("BITORAGG".to_string(), vec![f.this]),
19802 ))),
19803 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
19804 Function::new("BITANDAGG".to_string(), vec![f.this]),
19805 ))),
19806 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
19807 Function::new("BITXORAGG".to_string(), vec![f.this]),
19808 ))),
19809 _ => Ok(e),
19810 }
19811 }
19812
19813 Action::StrftimeCastTimestamp => {
19814 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
19815 if let Expression::Cast(mut c) = e {
19816 if matches!(
19817 c.to,
19818 DataType::Timestamp {
19819 timezone: false,
19820 ..
19821 }
19822 ) {
19823 c.to = DataType::Custom {
19824 name: "TIMESTAMP_NTZ".to_string(),
19825 };
19826 }
19827 Ok(Expression::Cast(c))
19828 } else {
19829 Ok(e)
19830 }
19831 }
19832
19833 Action::DecimalDefaultPrecision => {
19834 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
19835 if let Expression::Cast(mut c) = e {
19836 if matches!(
19837 c.to,
19838 DataType::Decimal {
19839 precision: None,
19840 ..
19841 }
19842 ) {
19843 c.to = DataType::Decimal {
19844 precision: Some(18),
19845 scale: Some(3),
19846 };
19847 }
19848 Ok(Expression::Cast(c))
19849 } else {
19850 Ok(e)
19851 }
19852 }
19853
19854 Action::FilterToIff => {
19855 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
19856 if let Expression::Filter(f) = e {
19857 let condition = *f.expression;
19858 let agg = *f.this;
19859 // Strip WHERE from condition
19860 let cond = match condition {
19861 Expression::Where(w) => w.this,
19862 other => other,
19863 };
19864 // Extract the aggregate function and its argument
19865 // We want AVG(IFF(condition, x, NULL))
19866 match agg {
19867 Expression::Function(mut func) => {
19868 if !func.args.is_empty() {
19869 let orig_arg = func.args[0].clone();
19870 let iff_call = Expression::Function(Box::new(Function::new(
19871 "IFF".to_string(),
19872 vec![cond, orig_arg, Expression::Null(Null)],
19873 )));
19874 func.args[0] = iff_call;
19875 Ok(Expression::Function(func))
19876 } else {
19877 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19878 this: Box::new(Expression::Function(func)),
19879 expression: Box::new(cond),
19880 })))
19881 }
19882 }
19883 Expression::Avg(mut avg) => {
19884 let iff_call = Expression::Function(Box::new(Function::new(
19885 "IFF".to_string(),
19886 vec![cond, avg.this.clone(), Expression::Null(Null)],
19887 )));
19888 avg.this = iff_call;
19889 Ok(Expression::Avg(avg))
19890 }
19891 Expression::Sum(mut s) => {
19892 let iff_call = Expression::Function(Box::new(Function::new(
19893 "IFF".to_string(),
19894 vec![cond, s.this.clone(), Expression::Null(Null)],
19895 )));
19896 s.this = iff_call;
19897 Ok(Expression::Sum(s))
19898 }
19899 Expression::Count(mut c) => {
19900 if let Some(ref this_expr) = c.this {
19901 let iff_call = Expression::Function(Box::new(Function::new(
19902 "IFF".to_string(),
19903 vec![cond, this_expr.clone(), Expression::Null(Null)],
19904 )));
19905 c.this = Some(iff_call);
19906 }
19907 Ok(Expression::Count(c))
19908 }
19909 other => {
19910 // Fallback: keep as Filter
19911 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
19912 this: Box::new(other),
19913 expression: Box::new(cond),
19914 })))
19915 }
19916 }
19917 } else {
19918 Ok(e)
19919 }
19920 }
19921
19922 Action::AggFilterToIff => {
19923 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
19924 // Helper macro to handle the common AggFunc case
19925 macro_rules! handle_agg_filter_to_iff {
19926 ($variant:ident, $agg:expr) => {{
19927 let mut agg = $agg;
19928 if let Some(filter_cond) = agg.filter.take() {
19929 let iff_call = Expression::Function(Box::new(Function::new(
19930 "IFF".to_string(),
19931 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
19932 )));
19933 agg.this = iff_call;
19934 }
19935 Ok(Expression::$variant(agg))
19936 }};
19937 }
19938
19939 match e {
19940 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
19941 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
19942 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
19943 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
19944 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
19945 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
19946 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
19947 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
19948 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
19949 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
19950 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
19951 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
19952 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
19953 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
19954 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
19955 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
19956 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
19957 Expression::ApproxDistinct(agg) => {
19958 handle_agg_filter_to_iff!(ApproxDistinct, agg)
19959 }
19960 Expression::Count(mut c) => {
19961 if let Some(filter_cond) = c.filter.take() {
19962 if let Some(ref this_expr) = c.this {
19963 let iff_call = Expression::Function(Box::new(Function::new(
19964 "IFF".to_string(),
19965 vec![
19966 filter_cond,
19967 this_expr.clone(),
19968 Expression::Null(Null),
19969 ],
19970 )));
19971 c.this = Some(iff_call);
19972 }
19973 }
19974 Ok(Expression::Count(c))
19975 }
19976 other => Ok(other),
19977 }
19978 }
19979
19980 Action::JsonToGetPath => {
19981 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
19982 if let Expression::JsonExtract(je) = e {
19983 // Convert to PARSE_JSON() wrapper:
19984 // - JSON(x) -> PARSE_JSON(x)
19985 // - PARSE_JSON(x) -> keep as-is
19986 // - anything else -> wrap in PARSE_JSON()
19987 let this = match &je.this {
19988 Expression::Function(f)
19989 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
19990 {
19991 Expression::Function(Box::new(Function::new(
19992 "PARSE_JSON".to_string(),
19993 f.args.clone(),
19994 )))
19995 }
19996 Expression::Function(f)
19997 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
19998 {
19999 je.this.clone()
20000 }
20001 // GET_PATH result is already JSON, don't wrap
20002 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20003 je.this.clone()
20004 }
20005 other => {
20006 // Wrap non-JSON expressions in PARSE_JSON()
20007 Expression::Function(Box::new(Function::new(
20008 "PARSE_JSON".to_string(),
20009 vec![other.clone()],
20010 )))
20011 }
20012 };
20013 // Convert path: extract key from JSONPath or strip $. prefix from string
20014 let path = match &je.path {
20015 Expression::JSONPath(jp) => {
20016 // Extract the key from JSONPath: $root.key -> 'key'
20017 let mut key_parts = Vec::new();
20018 for expr in &jp.expressions {
20019 match expr {
20020 Expression::JSONPathRoot(_) => {} // skip root
20021 Expression::JSONPathKey(k) => {
20022 if let Expression::Literal(Literal::String(s)) =
20023 &*k.this
20024 {
20025 key_parts.push(s.clone());
20026 }
20027 }
20028 _ => {}
20029 }
20030 }
20031 if !key_parts.is_empty() {
20032 Expression::Literal(Literal::String(key_parts.join(".")))
20033 } else {
20034 je.path.clone()
20035 }
20036 }
20037 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20038 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20039 Expression::Literal(Literal::String(stripped))
20040 }
20041 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20042 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20043 Expression::Literal(Literal::String(stripped))
20044 }
20045 _ => je.path.clone(),
20046 };
20047 Ok(Expression::Function(Box::new(Function::new(
20048 "GET_PATH".to_string(),
20049 vec![this, path],
20050 ))))
20051 } else {
20052 Ok(e)
20053 }
20054 }
20055
20056 Action::StructToRow => {
20057 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20058 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20059
20060 // Extract key-value pairs from either Struct or MapFunc
20061 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20062 Expression::Struct(s) => Some(
20063 s.fields
20064 .iter()
20065 .map(|(opt_name, field_expr)| {
20066 if let Some(name) = opt_name {
20067 (name.clone(), field_expr.clone())
20068 } else if let Expression::NamedArgument(na) = field_expr {
20069 (na.name.name.clone(), na.value.clone())
20070 } else {
20071 (String::new(), field_expr.clone())
20072 }
20073 })
20074 .collect(),
20075 ),
20076 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20077 m.keys
20078 .iter()
20079 .zip(m.values.iter())
20080 .map(|(key, value)| {
20081 let key_name = match key {
20082 Expression::Literal(Literal::String(s)) => s.clone(),
20083 Expression::Identifier(id) => id.name.clone(),
20084 _ => String::new(),
20085 };
20086 (key_name, value.clone())
20087 })
20088 .collect(),
20089 ),
20090 _ => None,
20091 };
20092
20093 if let Some(pairs) = kv_pairs {
20094 let mut named_args = Vec::new();
20095 for (key_name, value) in pairs {
20096 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20097 named_args.push(Expression::Alias(Box::new(
20098 crate::expressions::Alias::new(
20099 value,
20100 Identifier::new(key_name),
20101 ),
20102 )));
20103 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20104 named_args.push(value);
20105 } else {
20106 named_args.push(value);
20107 }
20108 }
20109
20110 if matches!(target, DialectType::BigQuery) {
20111 Ok(Expression::Function(Box::new(Function::new(
20112 "STRUCT".to_string(),
20113 named_args,
20114 ))))
20115 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20116 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20117 let row_func = Expression::Function(Box::new(Function::new(
20118 "ROW".to_string(),
20119 named_args,
20120 )));
20121
20122 // Try to infer types for each pair
20123 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20124 Expression::Struct(s) => Some(
20125 s.fields
20126 .iter()
20127 .map(|(opt_name, field_expr)| {
20128 if let Some(name) = opt_name {
20129 (name.clone(), field_expr.clone())
20130 } else if let Expression::NamedArgument(na) = field_expr
20131 {
20132 (na.name.name.clone(), na.value.clone())
20133 } else {
20134 (String::new(), field_expr.clone())
20135 }
20136 })
20137 .collect(),
20138 ),
20139 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20140 m.keys
20141 .iter()
20142 .zip(m.values.iter())
20143 .map(|(key, value)| {
20144 let key_name = match key {
20145 Expression::Literal(Literal::String(s)) => {
20146 s.clone()
20147 }
20148 Expression::Identifier(id) => id.name.clone(),
20149 _ => String::new(),
20150 };
20151 (key_name, value.clone())
20152 })
20153 .collect(),
20154 ),
20155 _ => None,
20156 };
20157
20158 if let Some(pairs) = kv_pairs_again {
20159 // Infer types for all values
20160 let mut all_inferred = true;
20161 let mut fields = Vec::new();
20162 for (name, value) in &pairs {
20163 let inferred_type = match value {
20164 Expression::Literal(Literal::Number(n)) => {
20165 if n.contains('.') {
20166 Some(DataType::Double {
20167 precision: None,
20168 scale: None,
20169 })
20170 } else {
20171 Some(DataType::Int {
20172 length: None,
20173 integer_spelling: true,
20174 })
20175 }
20176 }
20177 Expression::Literal(Literal::String(_)) => {
20178 Some(DataType::VarChar {
20179 length: None,
20180 parenthesized_length: false,
20181 })
20182 }
20183 Expression::Boolean(_) => Some(DataType::Boolean),
20184 _ => None,
20185 };
20186 if let Some(dt) = inferred_type {
20187 fields.push(crate::expressions::StructField::new(
20188 name.clone(),
20189 dt,
20190 ));
20191 } else {
20192 all_inferred = false;
20193 break;
20194 }
20195 }
20196
20197 if all_inferred && !fields.is_empty() {
20198 let row_type = DataType::Struct {
20199 fields,
20200 nested: true,
20201 };
20202 Ok(Expression::Cast(Box::new(Cast {
20203 this: row_func,
20204 to: row_type,
20205 trailing_comments: Vec::new(),
20206 double_colon_syntax: false,
20207 format: None,
20208 default: None,
20209 })))
20210 } else {
20211 Ok(row_func)
20212 }
20213 } else {
20214 Ok(row_func)
20215 }
20216 } else {
20217 Ok(Expression::Function(Box::new(Function::new(
20218 "ROW".to_string(),
20219 named_args,
20220 ))))
20221 }
20222 } else {
20223 Ok(e)
20224 }
20225 }
20226
20227 Action::SparkStructConvert => {
20228 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20229 // or DuckDB {'name': val, ...}
20230 if let Expression::Function(f) = e {
20231 // Extract name-value pairs from aliased args
20232 let mut pairs: Vec<(String, Expression)> = Vec::new();
20233 for arg in &f.args {
20234 match arg {
20235 Expression::Alias(a) => {
20236 pairs.push((a.alias.name.clone(), a.this.clone()));
20237 }
20238 _ => {
20239 pairs.push((String::new(), arg.clone()));
20240 }
20241 }
20242 }
20243
20244 match target {
20245 DialectType::DuckDB => {
20246 // Convert to DuckDB struct literal {'name': value, ...}
20247 let mut keys = Vec::new();
20248 let mut values = Vec::new();
20249 for (name, value) in &pairs {
20250 keys.push(Expression::Literal(Literal::String(name.clone())));
20251 values.push(value.clone());
20252 }
20253 Ok(Expression::MapFunc(Box::new(
20254 crate::expressions::MapConstructor {
20255 keys,
20256 values,
20257 curly_brace_syntax: true,
20258 with_map_keyword: false,
20259 },
20260 )))
20261 }
20262 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20263 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20264 let row_args: Vec<Expression> =
20265 pairs.iter().map(|(_, v)| v.clone()).collect();
20266 let row_func = Expression::Function(Box::new(Function::new(
20267 "ROW".to_string(),
20268 row_args,
20269 )));
20270
20271 // Infer types
20272 let mut all_inferred = true;
20273 let mut fields = Vec::new();
20274 for (name, value) in &pairs {
20275 let inferred_type = match value {
20276 Expression::Literal(Literal::Number(n)) => {
20277 if n.contains('.') {
20278 Some(DataType::Double {
20279 precision: None,
20280 scale: None,
20281 })
20282 } else {
20283 Some(DataType::Int {
20284 length: None,
20285 integer_spelling: true,
20286 })
20287 }
20288 }
20289 Expression::Literal(Literal::String(_)) => {
20290 Some(DataType::VarChar {
20291 length: None,
20292 parenthesized_length: false,
20293 })
20294 }
20295 Expression::Boolean(_) => Some(DataType::Boolean),
20296 _ => None,
20297 };
20298 if let Some(dt) = inferred_type {
20299 fields.push(crate::expressions::StructField::new(
20300 name.clone(),
20301 dt,
20302 ));
20303 } else {
20304 all_inferred = false;
20305 break;
20306 }
20307 }
20308
20309 if all_inferred && !fields.is_empty() {
20310 let row_type = DataType::Struct {
20311 fields,
20312 nested: true,
20313 };
20314 Ok(Expression::Cast(Box::new(Cast {
20315 this: row_func,
20316 to: row_type,
20317 trailing_comments: Vec::new(),
20318 double_colon_syntax: false,
20319 format: None,
20320 default: None,
20321 })))
20322 } else {
20323 Ok(row_func)
20324 }
20325 }
20326 _ => Ok(Expression::Function(f)),
20327 }
20328 } else {
20329 Ok(e)
20330 }
20331 }
20332
20333 Action::ApproxCountDistinctToApproxDistinct => {
20334 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20335 if let Expression::ApproxCountDistinct(f) = e {
20336 Ok(Expression::ApproxDistinct(f))
20337 } else {
20338 Ok(e)
20339 }
20340 }
20341
20342 Action::CollectListToArrayAgg => {
20343 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20344 if let Expression::AggregateFunction(f) = e {
20345 let filter_expr = if !f.args.is_empty() {
20346 let arg = f.args[0].clone();
20347 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20348 this: arg,
20349 not: true,
20350 postfix_form: false,
20351 })))
20352 } else {
20353 None
20354 };
20355 let agg = crate::expressions::AggFunc {
20356 this: if f.args.is_empty() {
20357 Expression::Null(crate::expressions::Null)
20358 } else {
20359 f.args[0].clone()
20360 },
20361 distinct: f.distinct,
20362 order_by: f.order_by.clone(),
20363 filter: filter_expr,
20364 ignore_nulls: None,
20365 name: None,
20366 having_max: None,
20367 limit: None,
20368 };
20369 Ok(Expression::ArrayAgg(Box::new(agg)))
20370 } else {
20371 Ok(e)
20372 }
20373 }
20374
20375 Action::CollectSetConvert => {
20376 // COLLECT_SET(x) -> target-specific
20377 if let Expression::AggregateFunction(f) = e {
20378 match target {
20379 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20380 crate::expressions::AggregateFunction {
20381 name: "SET_AGG".to_string(),
20382 args: f.args,
20383 distinct: false,
20384 order_by: f.order_by,
20385 filter: f.filter,
20386 limit: f.limit,
20387 ignore_nulls: f.ignore_nulls,
20388 },
20389 ))),
20390 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20391 crate::expressions::AggregateFunction {
20392 name: "ARRAY_UNIQUE_AGG".to_string(),
20393 args: f.args,
20394 distinct: false,
20395 order_by: f.order_by,
20396 filter: f.filter,
20397 limit: f.limit,
20398 ignore_nulls: f.ignore_nulls,
20399 },
20400 ))),
20401 DialectType::Trino | DialectType::DuckDB => {
20402 let agg = crate::expressions::AggFunc {
20403 this: if f.args.is_empty() {
20404 Expression::Null(crate::expressions::Null)
20405 } else {
20406 f.args[0].clone()
20407 },
20408 distinct: true,
20409 order_by: Vec::new(),
20410 filter: None,
20411 ignore_nulls: None,
20412 name: None,
20413 having_max: None,
20414 limit: None,
20415 };
20416 Ok(Expression::ArrayAgg(Box::new(agg)))
20417 }
20418 _ => Ok(Expression::AggregateFunction(f)),
20419 }
20420 } else {
20421 Ok(e)
20422 }
20423 }
20424
20425 Action::PercentileConvert => {
20426 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20427 if let Expression::AggregateFunction(f) = e {
20428 let name = match target {
20429 DialectType::DuckDB => "QUANTILE",
20430 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20431 _ => "PERCENTILE",
20432 };
20433 Ok(Expression::AggregateFunction(Box::new(
20434 crate::expressions::AggregateFunction {
20435 name: name.to_string(),
20436 args: f.args,
20437 distinct: f.distinct,
20438 order_by: f.order_by,
20439 filter: f.filter,
20440 limit: f.limit,
20441 ignore_nulls: f.ignore_nulls,
20442 },
20443 )))
20444 } else {
20445 Ok(e)
20446 }
20447 }
20448
20449 Action::CorrIsnanWrap => {
20450 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20451 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20452 let corr_clone = e.clone();
20453 let isnan = Expression::Function(Box::new(Function::new(
20454 "ISNAN".to_string(),
20455 vec![corr_clone.clone()],
20456 )));
20457 let case_expr = Expression::Case(Box::new(Case {
20458 operand: None,
20459 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20460 else_: Some(corr_clone),
20461 comments: Vec::new(),
20462 }));
20463 Ok(case_expr)
20464 }
20465
20466 Action::TruncToDateTrunc => {
20467 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20468 if let Expression::Function(f) = e {
20469 if f.args.len() == 2 {
20470 let timestamp = f.args[0].clone();
20471 let unit_expr = f.args[1].clone();
20472
20473 if matches!(target, DialectType::ClickHouse) {
20474 // For ClickHouse, produce Expression::DateTrunc which the generator
20475 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20476 // target transform that would convert it to dateTrunc
20477 let unit_str = Self::get_unit_str_static(&unit_expr);
20478 let dt_field = match unit_str.as_str() {
20479 "YEAR" => DateTimeField::Year,
20480 "MONTH" => DateTimeField::Month,
20481 "DAY" => DateTimeField::Day,
20482 "HOUR" => DateTimeField::Hour,
20483 "MINUTE" => DateTimeField::Minute,
20484 "SECOND" => DateTimeField::Second,
20485 "WEEK" => DateTimeField::Week,
20486 "QUARTER" => DateTimeField::Quarter,
20487 _ => DateTimeField::Custom(unit_str),
20488 };
20489 Ok(Expression::DateTrunc(Box::new(
20490 crate::expressions::DateTruncFunc {
20491 this: timestamp,
20492 unit: dt_field,
20493 },
20494 )))
20495 } else {
20496 let new_args = vec![unit_expr, timestamp];
20497 Ok(Expression::Function(Box::new(Function::new(
20498 "DATE_TRUNC".to_string(),
20499 new_args,
20500 ))))
20501 }
20502 } else {
20503 Ok(Expression::Function(f))
20504 }
20505 } else {
20506 Ok(e)
20507 }
20508 }
20509
20510 Action::ArrayContainsConvert => {
20511 if let Expression::ArrayContains(f) = e {
20512 match target {
20513 DialectType::Presto | DialectType::Trino => {
20514 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20515 Ok(Expression::Function(Box::new(Function::new(
20516 "CONTAINS".to_string(),
20517 vec![f.this, f.expression],
20518 ))))
20519 }
20520 DialectType::Snowflake => {
20521 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20522 let cast_val =
20523 Expression::Cast(Box::new(crate::expressions::Cast {
20524 this: f.expression,
20525 to: crate::expressions::DataType::Custom {
20526 name: "VARIANT".to_string(),
20527 },
20528 trailing_comments: Vec::new(),
20529 double_colon_syntax: false,
20530 format: None,
20531 default: None,
20532 }));
20533 Ok(Expression::Function(Box::new(Function::new(
20534 "ARRAY_CONTAINS".to_string(),
20535 vec![cast_val, f.this],
20536 ))))
20537 }
20538 _ => Ok(Expression::ArrayContains(f)),
20539 }
20540 } else {
20541 Ok(e)
20542 }
20543 }
20544
20545 Action::StrPositionExpand => {
20546 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20547 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20548 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20549 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20550 if let Expression::StrPosition(sp) = e {
20551 let crate::expressions::StrPosition {
20552 this,
20553 substr,
20554 position,
20555 occurrence,
20556 } = *sp;
20557 let string = *this;
20558 let substr_expr = match substr {
20559 Some(s) => *s,
20560 None => Expression::Null(Null),
20561 };
20562 let pos = match position {
20563 Some(p) => *p,
20564 None => Expression::number(1),
20565 };
20566
20567 // SUBSTRING(string, pos)
20568 let substring_call = Expression::Function(Box::new(Function::new(
20569 "SUBSTRING".to_string(),
20570 vec![string.clone(), pos.clone()],
20571 )));
20572 // STRPOS(SUBSTRING(string, pos), substr)
20573 let strpos_call = Expression::Function(Box::new(Function::new(
20574 "STRPOS".to_string(),
20575 vec![substring_call, substr_expr.clone()],
20576 )));
20577 // STRPOS(...) + pos - 1
20578 let pos_adjusted =
20579 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20580 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20581 strpos_call.clone(),
20582 pos.clone(),
20583 ))),
20584 Expression::number(1),
20585 )));
20586 // STRPOS(...) = 0
20587 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20588 strpos_call.clone(),
20589 Expression::number(0),
20590 )));
20591
20592 match target {
20593 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20594 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20595 Ok(Expression::Function(Box::new(Function::new(
20596 "IF".to_string(),
20597 vec![is_zero, Expression::number(0), pos_adjusted],
20598 ))))
20599 }
20600 DialectType::DuckDB => {
20601 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20602 Ok(Expression::Case(Box::new(Case {
20603 operand: None,
20604 whens: vec![(is_zero, Expression::number(0))],
20605 else_: Some(pos_adjusted),
20606 comments: Vec::new(),
20607 })))
20608 }
20609 _ => {
20610 // Reconstruct StrPosition
20611 Ok(Expression::StrPosition(Box::new(
20612 crate::expressions::StrPosition {
20613 this: Box::new(string),
20614 substr: Some(Box::new(substr_expr)),
20615 position: Some(Box::new(pos)),
20616 occurrence,
20617 },
20618 )))
20619 }
20620 }
20621 } else {
20622 Ok(e)
20623 }
20624 }
20625
20626 Action::MonthsBetweenConvert => {
20627 if let Expression::MonthsBetween(mb) = e {
20628 let crate::expressions::BinaryFunc {
20629 this: end_date,
20630 expression: start_date,
20631 ..
20632 } = *mb;
20633 match target {
20634 DialectType::DuckDB => {
20635 let cast_end = Self::ensure_cast_date(end_date);
20636 let cast_start = Self::ensure_cast_date(start_date);
20637 let dd = Expression::Function(Box::new(Function::new(
20638 "DATE_DIFF".to_string(),
20639 vec![
20640 Expression::string("MONTH"),
20641 cast_start.clone(),
20642 cast_end.clone(),
20643 ],
20644 )));
20645 let day_end = Expression::Function(Box::new(Function::new(
20646 "DAY".to_string(),
20647 vec![cast_end.clone()],
20648 )));
20649 let day_start = Expression::Function(Box::new(Function::new(
20650 "DAY".to_string(),
20651 vec![cast_start.clone()],
20652 )));
20653 let last_day_end = Expression::Function(Box::new(Function::new(
20654 "LAST_DAY".to_string(),
20655 vec![cast_end.clone()],
20656 )));
20657 let last_day_start = Expression::Function(Box::new(Function::new(
20658 "LAST_DAY".to_string(),
20659 vec![cast_start.clone()],
20660 )));
20661 let day_last_end = Expression::Function(Box::new(Function::new(
20662 "DAY".to_string(),
20663 vec![last_day_end],
20664 )));
20665 let day_last_start = Expression::Function(Box::new(Function::new(
20666 "DAY".to_string(),
20667 vec![last_day_start],
20668 )));
20669 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20670 day_end.clone(),
20671 day_last_end,
20672 )));
20673 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20674 day_start.clone(),
20675 day_last_start,
20676 )));
20677 let both_cond =
20678 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20679 let day_diff =
20680 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
20681 let day_diff_paren =
20682 Expression::Paren(Box::new(crate::expressions::Paren {
20683 this: day_diff,
20684 trailing_comments: Vec::new(),
20685 }));
20686 let frac = Expression::Div(Box::new(BinaryOp::new(
20687 day_diff_paren,
20688 Expression::Literal(Literal::Number("31.0".to_string())),
20689 )));
20690 let case_expr = Expression::Case(Box::new(Case {
20691 operand: None,
20692 whens: vec![(both_cond, Expression::number(0))],
20693 else_: Some(frac),
20694 comments: Vec::new(),
20695 }));
20696 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20697 }
20698 DialectType::Snowflake | DialectType::Redshift => {
20699 let unit = Expression::Identifier(Identifier::new("MONTH"));
20700 Ok(Expression::Function(Box::new(Function::new(
20701 "DATEDIFF".to_string(),
20702 vec![unit, start_date, end_date],
20703 ))))
20704 }
20705 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20706 Ok(Expression::Function(Box::new(Function::new(
20707 "DATE_DIFF".to_string(),
20708 vec![Expression::string("MONTH"), start_date, end_date],
20709 ))))
20710 }
20711 _ => Ok(Expression::MonthsBetween(Box::new(
20712 crate::expressions::BinaryFunc {
20713 this: end_date,
20714 expression: start_date,
20715 original_name: None,
20716 },
20717 ))),
20718 }
20719 } else {
20720 Ok(e)
20721 }
20722 }
20723
20724 Action::AddMonthsConvert => {
20725 if let Expression::AddMonths(am) = e {
20726 let date = am.this;
20727 let val = am.expression;
20728 match target {
20729 DialectType::TSQL | DialectType::Fabric => {
20730 let cast_date = Self::ensure_cast_datetime2(date);
20731 Ok(Expression::Function(Box::new(Function::new(
20732 "DATEADD".to_string(),
20733 vec![
20734 Expression::Identifier(Identifier::new("MONTH")),
20735 val,
20736 cast_date,
20737 ],
20738 ))))
20739 }
20740 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
20741 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20742 // Optionally wrapped in CAST(... AS type) if the input had a specific type
20743
20744 // Determine the cast type from the date expression
20745 let (cast_date, return_type) = match &date {
20746 Expression::Literal(Literal::String(_)) => {
20747 // String literal: CAST(str AS TIMESTAMP), no outer CAST
20748 (
20749 Expression::Cast(Box::new(Cast {
20750 this: date.clone(),
20751 to: DataType::Timestamp {
20752 precision: None,
20753 timezone: false,
20754 },
20755 trailing_comments: Vec::new(),
20756 double_colon_syntax: false,
20757 format: None,
20758 default: None,
20759 })),
20760 None,
20761 )
20762 }
20763 Expression::Cast(c) => {
20764 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
20765 (date.clone(), Some(c.to.clone()))
20766 }
20767 _ => {
20768 // Expression or NULL::TYPE - keep as-is, check for cast type
20769 if let Expression::Cast(c) = &date {
20770 (date.clone(), Some(c.to.clone()))
20771 } else {
20772 (date.clone(), None)
20773 }
20774 }
20775 };
20776
20777 // Build the interval expression
20778 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
20779 // For integer values, use INTERVAL val MONTH
20780 let is_non_integer_val = match &val {
20781 Expression::Literal(Literal::Number(n)) => n.contains('.'),
20782 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
20783 Expression::Neg(n) => {
20784 if let Expression::Literal(Literal::Number(s)) = &n.this {
20785 s.contains('.')
20786 } else {
20787 false
20788 }
20789 }
20790 _ => false,
20791 };
20792
20793 let add_interval = if is_non_integer_val {
20794 // TO_MONTHS(CAST(ROUND(val) AS INT))
20795 let round_val = Expression::Function(Box::new(Function::new(
20796 "ROUND".to_string(),
20797 vec![val.clone()],
20798 )));
20799 let cast_int = Expression::Cast(Box::new(Cast {
20800 this: round_val,
20801 to: DataType::Int {
20802 length: None,
20803 integer_spelling: false,
20804 },
20805 trailing_comments: Vec::new(),
20806 double_colon_syntax: false,
20807 format: None,
20808 default: None,
20809 }));
20810 Expression::Function(Box::new(Function::new(
20811 "TO_MONTHS".to_string(),
20812 vec![cast_int],
20813 )))
20814 } else {
20815 // INTERVAL val MONTH
20816 // For negative numbers, wrap in parens
20817 let interval_val = match &val {
20818 Expression::Literal(Literal::Number(n))
20819 if n.starts_with('-') =>
20820 {
20821 Expression::Paren(Box::new(Paren {
20822 this: val.clone(),
20823 trailing_comments: Vec::new(),
20824 }))
20825 }
20826 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
20827 this: val.clone(),
20828 trailing_comments: Vec::new(),
20829 })),
20830 Expression::Null(_) => Expression::Paren(Box::new(Paren {
20831 this: val.clone(),
20832 trailing_comments: Vec::new(),
20833 })),
20834 _ => val.clone(),
20835 };
20836 Expression::Interval(Box::new(crate::expressions::Interval {
20837 this: Some(interval_val),
20838 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20839 unit: crate::expressions::IntervalUnit::Month,
20840 use_plural: false,
20841 }),
20842 }))
20843 };
20844
20845 // Build: date + interval
20846 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
20847 cast_date.clone(),
20848 add_interval.clone(),
20849 )));
20850
20851 // Build LAST_DAY(date)
20852 let last_day_date = Expression::Function(Box::new(Function::new(
20853 "LAST_DAY".to_string(),
20854 vec![cast_date.clone()],
20855 )));
20856
20857 // Build LAST_DAY(date + interval)
20858 let last_day_date_plus =
20859 Expression::Function(Box::new(Function::new(
20860 "LAST_DAY".to_string(),
20861 vec![date_plus_interval.clone()],
20862 )));
20863
20864 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
20865 let case_expr = Expression::Case(Box::new(Case {
20866 operand: None,
20867 whens: vec![(
20868 Expression::Eq(Box::new(BinaryOp::new(
20869 last_day_date,
20870 cast_date.clone(),
20871 ))),
20872 last_day_date_plus,
20873 )],
20874 else_: Some(date_plus_interval),
20875 comments: Vec::new(),
20876 }));
20877
20878 // Wrap in CAST(... AS type) if needed
20879 if let Some(dt) = return_type {
20880 Ok(Expression::Cast(Box::new(Cast {
20881 this: case_expr,
20882 to: dt,
20883 trailing_comments: Vec::new(),
20884 double_colon_syntax: false,
20885 format: None,
20886 default: None,
20887 })))
20888 } else {
20889 Ok(case_expr)
20890 }
20891 }
20892 DialectType::DuckDB => {
20893 // Non-Snowflake source: simple date + INTERVAL
20894 let cast_date =
20895 if matches!(&date, Expression::Literal(Literal::String(_))) {
20896 Expression::Cast(Box::new(Cast {
20897 this: date,
20898 to: DataType::Timestamp {
20899 precision: None,
20900 timezone: false,
20901 },
20902 trailing_comments: Vec::new(),
20903 double_colon_syntax: false,
20904 format: None,
20905 default: None,
20906 }))
20907 } else {
20908 date
20909 };
20910 let interval =
20911 Expression::Interval(Box::new(crate::expressions::Interval {
20912 this: Some(val),
20913 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20914 unit: crate::expressions::IntervalUnit::Month,
20915 use_plural: false,
20916 }),
20917 }));
20918 Ok(Expression::Add(Box::new(BinaryOp::new(
20919 cast_date, interval,
20920 ))))
20921 }
20922 DialectType::Snowflake => {
20923 // Keep ADD_MONTHS when source is also Snowflake
20924 if matches!(source, DialectType::Snowflake) {
20925 Ok(Expression::Function(Box::new(Function::new(
20926 "ADD_MONTHS".to_string(),
20927 vec![date, val],
20928 ))))
20929 } else {
20930 Ok(Expression::Function(Box::new(Function::new(
20931 "DATEADD".to_string(),
20932 vec![
20933 Expression::Identifier(Identifier::new("MONTH")),
20934 val,
20935 date,
20936 ],
20937 ))))
20938 }
20939 }
20940 DialectType::Redshift => {
20941 Ok(Expression::Function(Box::new(Function::new(
20942 "DATEADD".to_string(),
20943 vec![
20944 Expression::Identifier(Identifier::new("MONTH")),
20945 val,
20946 date,
20947 ],
20948 ))))
20949 }
20950 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20951 let cast_date =
20952 if matches!(&date, Expression::Literal(Literal::String(_))) {
20953 Expression::Cast(Box::new(Cast {
20954 this: date,
20955 to: DataType::Timestamp {
20956 precision: None,
20957 timezone: false,
20958 },
20959 trailing_comments: Vec::new(),
20960 double_colon_syntax: false,
20961 format: None,
20962 default: None,
20963 }))
20964 } else {
20965 date
20966 };
20967 Ok(Expression::Function(Box::new(Function::new(
20968 "DATE_ADD".to_string(),
20969 vec![Expression::string("MONTH"), val, cast_date],
20970 ))))
20971 }
20972 DialectType::BigQuery => {
20973 let interval =
20974 Expression::Interval(Box::new(crate::expressions::Interval {
20975 this: Some(val),
20976 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
20977 unit: crate::expressions::IntervalUnit::Month,
20978 use_plural: false,
20979 }),
20980 }));
20981 let cast_date =
20982 if matches!(&date, Expression::Literal(Literal::String(_))) {
20983 Expression::Cast(Box::new(Cast {
20984 this: date,
20985 to: DataType::Custom {
20986 name: "DATETIME".to_string(),
20987 },
20988 trailing_comments: Vec::new(),
20989 double_colon_syntax: false,
20990 format: None,
20991 default: None,
20992 }))
20993 } else {
20994 date
20995 };
20996 Ok(Expression::Function(Box::new(Function::new(
20997 "DATE_ADD".to_string(),
20998 vec![cast_date, interval],
20999 ))))
21000 }
21001 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21002 Ok(Expression::Function(Box::new(Function::new(
21003 "ADD_MONTHS".to_string(),
21004 vec![date, val],
21005 ))))
21006 }
21007 _ => {
21008 // Default: keep as AddMonths expression
21009 Ok(Expression::AddMonths(Box::new(
21010 crate::expressions::BinaryFunc {
21011 this: date,
21012 expression: val,
21013 original_name: None,
21014 },
21015 )))
21016 }
21017 }
21018 } else {
21019 Ok(e)
21020 }
21021 }
21022
21023 Action::PercentileContConvert => {
21024 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21025 // Presto/Trino: APPROX_PERCENTILE(col, p)
21026 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21027 if let Expression::WithinGroup(wg) = e {
21028 // Extract percentile value and order by column
21029 let (percentile, _is_disc) = match &wg.this {
21030 Expression::Function(f) => {
21031 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21032 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21033 Literal::Number("0.5".to_string()),
21034 ));
21035 (pct, is_disc)
21036 }
21037 Expression::AggregateFunction(af) => {
21038 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21039 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21040 Literal::Number("0.5".to_string()),
21041 ));
21042 (pct, is_disc)
21043 }
21044 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21045 _ => return Ok(Expression::WithinGroup(wg)),
21046 };
21047 let col = wg
21048 .order_by
21049 .first()
21050 .map(|o| o.this.clone())
21051 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21052
21053 let func_name = match target {
21054 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21055 "APPROX_PERCENTILE"
21056 }
21057 _ => "PERCENTILE_APPROX", // Spark, Databricks
21058 };
21059 Ok(Expression::Function(Box::new(Function::new(
21060 func_name.to_string(),
21061 vec![col, percentile],
21062 ))))
21063 } else {
21064 Ok(e)
21065 }
21066 }
21067
21068 Action::CurrentUserSparkParens => {
21069 // CURRENT_USER -> CURRENT_USER() for Spark
21070 if let Expression::CurrentUser(_) = e {
21071 Ok(Expression::Function(Box::new(Function::new(
21072 "CURRENT_USER".to_string(),
21073 vec![],
21074 ))))
21075 } else {
21076 Ok(e)
21077 }
21078 }
21079
21080 Action::SparkDateFuncCast => {
21081 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21082 let cast_arg = |arg: Expression| -> Expression {
21083 match target {
21084 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21085 Self::double_cast_timestamp_date(arg)
21086 }
21087 _ => {
21088 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21089 Self::ensure_cast_date(arg)
21090 }
21091 }
21092 };
21093 match e {
21094 Expression::Month(f) => Ok(Expression::Month(Box::new(
21095 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21096 ))),
21097 Expression::Year(f) => Ok(Expression::Year(Box::new(
21098 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21099 ))),
21100 Expression::Day(f) => Ok(Expression::Day(Box::new(
21101 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21102 ))),
21103 other => Ok(other),
21104 }
21105 }
21106
21107 Action::MapFromArraysConvert => {
21108 // Expression::MapFromArrays -> target-specific
21109 if let Expression::MapFromArrays(mfa) = e {
21110 let keys = mfa.this;
21111 let values = mfa.expression;
21112 match target {
21113 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21114 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21115 ))),
21116 _ => {
21117 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21118 Ok(Expression::Function(Box::new(Function::new(
21119 "MAP".to_string(),
21120 vec![keys, values],
21121 ))))
21122 }
21123 }
21124 } else {
21125 Ok(e)
21126 }
21127 }
21128
21129 Action::AnyToExists => {
21130 if let Expression::Any(q) = e {
21131 if let Some(op) = q.op.clone() {
21132 let lambda_param = crate::expressions::Identifier::new("x");
21133 let rhs = Expression::Identifier(lambda_param.clone());
21134 let body = match op {
21135 crate::expressions::QuantifiedOp::Eq => {
21136 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21137 }
21138 crate::expressions::QuantifiedOp::Neq => {
21139 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21140 }
21141 crate::expressions::QuantifiedOp::Lt => {
21142 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21143 }
21144 crate::expressions::QuantifiedOp::Lte => {
21145 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21146 }
21147 crate::expressions::QuantifiedOp::Gt => {
21148 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21149 }
21150 crate::expressions::QuantifiedOp::Gte => {
21151 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21152 }
21153 };
21154 let lambda =
21155 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21156 parameters: vec![lambda_param],
21157 body,
21158 colon: false,
21159 parameter_types: Vec::new(),
21160 }));
21161 Ok(Expression::Function(Box::new(Function::new(
21162 "EXISTS".to_string(),
21163 vec![q.subquery, lambda],
21164 ))))
21165 } else {
21166 Ok(Expression::Any(q))
21167 }
21168 } else {
21169 Ok(e)
21170 }
21171 }
21172
21173 Action::GenerateSeriesConvert => {
21174 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21175 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21176 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21177 if let Expression::Function(f) = e {
21178 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21179 let start = f.args[0].clone();
21180 let end = f.args[1].clone();
21181 let step = f.args.get(2).cloned();
21182
21183 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21184 let step = step.map(|s| Self::normalize_interval_string(s, target));
21185
21186 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21187 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21188 if matches!(
21189 target,
21190 DialectType::Presto
21191 | DialectType::Trino
21192 | DialectType::Athena
21193 | DialectType::Spark
21194 | DialectType::Databricks
21195 | DialectType::Hive
21196 ) {
21197 match &arg {
21198 Expression::CurrentTimestamp(_) => {
21199 Expression::Cast(Box::new(Cast {
21200 this: arg,
21201 to: DataType::Timestamp {
21202 precision: None,
21203 timezone: false,
21204 },
21205 trailing_comments: Vec::new(),
21206 double_colon_syntax: false,
21207 format: None,
21208 default: None,
21209 }))
21210 }
21211 _ => arg,
21212 }
21213 } else {
21214 arg
21215 }
21216 };
21217
21218 let start = maybe_cast_timestamp(start);
21219 let end = maybe_cast_timestamp(end);
21220
21221 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21222 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21223 let mut gs_args = vec![start, end];
21224 if let Some(step) = step {
21225 gs_args.push(step);
21226 }
21227 return Ok(Expression::Function(Box::new(Function::new(
21228 "GENERATE_SERIES".to_string(),
21229 gs_args,
21230 ))));
21231 }
21232
21233 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21234 if matches!(target, DialectType::DuckDB) {
21235 let mut gs_args = vec![start, end];
21236 if let Some(step) = step {
21237 gs_args.push(step);
21238 }
21239 let gs = Expression::Function(Box::new(Function::new(
21240 "GENERATE_SERIES".to_string(),
21241 gs_args,
21242 )));
21243 return Ok(Expression::Function(Box::new(Function::new(
21244 "UNNEST".to_string(),
21245 vec![gs],
21246 ))));
21247 }
21248
21249 let mut seq_args = vec![start, end];
21250 if let Some(step) = step {
21251 seq_args.push(step);
21252 }
21253
21254 let seq = Expression::Function(Box::new(Function::new(
21255 "SEQUENCE".to_string(),
21256 seq_args,
21257 )));
21258
21259 match target {
21260 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21261 // Wrap in UNNEST
21262 Ok(Expression::Function(Box::new(Function::new(
21263 "UNNEST".to_string(),
21264 vec![seq],
21265 ))))
21266 }
21267 DialectType::Spark
21268 | DialectType::Databricks
21269 | DialectType::Hive => {
21270 // Wrap in EXPLODE
21271 Ok(Expression::Function(Box::new(Function::new(
21272 "EXPLODE".to_string(),
21273 vec![seq],
21274 ))))
21275 }
21276 _ => {
21277 // Just SEQUENCE for others
21278 Ok(seq)
21279 }
21280 }
21281 } else {
21282 Ok(Expression::Function(f))
21283 }
21284 } else {
21285 Ok(e)
21286 }
21287 }
21288
21289 Action::ConcatCoalesceWrap => {
21290 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21291 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21292 if let Expression::Function(f) = e {
21293 if f.name.eq_ignore_ascii_case("CONCAT") {
21294 let new_args: Vec<Expression> = f
21295 .args
21296 .into_iter()
21297 .map(|arg| {
21298 let cast_arg = if matches!(
21299 target,
21300 DialectType::Presto
21301 | DialectType::Trino
21302 | DialectType::Athena
21303 ) {
21304 Expression::Cast(Box::new(Cast {
21305 this: arg,
21306 to: DataType::VarChar {
21307 length: None,
21308 parenthesized_length: false,
21309 },
21310 trailing_comments: Vec::new(),
21311 double_colon_syntax: false,
21312 format: None,
21313 default: None,
21314 }))
21315 } else {
21316 arg
21317 };
21318 Expression::Function(Box::new(Function::new(
21319 "COALESCE".to_string(),
21320 vec![cast_arg, Expression::string("")],
21321 )))
21322 })
21323 .collect();
21324 Ok(Expression::Function(Box::new(Function::new(
21325 "CONCAT".to_string(),
21326 new_args,
21327 ))))
21328 } else {
21329 Ok(Expression::Function(f))
21330 }
21331 } else {
21332 Ok(e)
21333 }
21334 }
21335
21336 Action::PipeConcatToConcat => {
21337 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21338 if let Expression::Concat(op) = e {
21339 let cast_left = Expression::Cast(Box::new(Cast {
21340 this: op.left,
21341 to: DataType::VarChar {
21342 length: None,
21343 parenthesized_length: false,
21344 },
21345 trailing_comments: Vec::new(),
21346 double_colon_syntax: false,
21347 format: None,
21348 default: None,
21349 }));
21350 let cast_right = Expression::Cast(Box::new(Cast {
21351 this: op.right,
21352 to: DataType::VarChar {
21353 length: None,
21354 parenthesized_length: false,
21355 },
21356 trailing_comments: Vec::new(),
21357 double_colon_syntax: false,
21358 format: None,
21359 default: None,
21360 }));
21361 Ok(Expression::Function(Box::new(Function::new(
21362 "CONCAT".to_string(),
21363 vec![cast_left, cast_right],
21364 ))))
21365 } else {
21366 Ok(e)
21367 }
21368 }
21369
21370 Action::DivFuncConvert => {
21371 // DIV(a, b) -> target-specific integer division
21372 if let Expression::Function(f) = e {
21373 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21374 let a = f.args[0].clone();
21375 let b = f.args[1].clone();
21376 match target {
21377 DialectType::DuckDB => {
21378 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21379 let int_div = Expression::IntDiv(Box::new(
21380 crate::expressions::BinaryFunc {
21381 this: a,
21382 expression: b,
21383 original_name: None,
21384 },
21385 ));
21386 Ok(Expression::Cast(Box::new(Cast {
21387 this: int_div,
21388 to: DataType::Decimal {
21389 precision: None,
21390 scale: None,
21391 },
21392 trailing_comments: Vec::new(),
21393 double_colon_syntax: false,
21394 format: None,
21395 default: None,
21396 })))
21397 }
21398 DialectType::BigQuery => {
21399 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21400 let div_func = Expression::Function(Box::new(Function::new(
21401 "DIV".to_string(),
21402 vec![a, b],
21403 )));
21404 Ok(Expression::Cast(Box::new(Cast {
21405 this: div_func,
21406 to: DataType::Custom {
21407 name: "NUMERIC".to_string(),
21408 },
21409 trailing_comments: Vec::new(),
21410 double_colon_syntax: false,
21411 format: None,
21412 default: None,
21413 })))
21414 }
21415 DialectType::SQLite => {
21416 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21417 let cast_a = Expression::Cast(Box::new(Cast {
21418 this: a,
21419 to: DataType::Custom {
21420 name: "REAL".to_string(),
21421 },
21422 trailing_comments: Vec::new(),
21423 double_colon_syntax: false,
21424 format: None,
21425 default: None,
21426 }));
21427 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21428 let cast_int = Expression::Cast(Box::new(Cast {
21429 this: div,
21430 to: DataType::Int {
21431 length: None,
21432 integer_spelling: true,
21433 },
21434 trailing_comments: Vec::new(),
21435 double_colon_syntax: false,
21436 format: None,
21437 default: None,
21438 }));
21439 Ok(Expression::Cast(Box::new(Cast {
21440 this: cast_int,
21441 to: DataType::Custom {
21442 name: "REAL".to_string(),
21443 },
21444 trailing_comments: Vec::new(),
21445 double_colon_syntax: false,
21446 format: None,
21447 default: None,
21448 })))
21449 }
21450 _ => Ok(Expression::Function(f)),
21451 }
21452 } else {
21453 Ok(Expression::Function(f))
21454 }
21455 } else {
21456 Ok(e)
21457 }
21458 }
21459
21460 Action::JsonObjectAggConvert => {
21461 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21462 match e {
21463 Expression::Function(f) => Ok(Expression::Function(Box::new(
21464 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21465 ))),
21466 Expression::AggregateFunction(af) => {
21467 // AggregateFunction stores all args in the `args` vec
21468 Ok(Expression::Function(Box::new(Function::new(
21469 "JSON_GROUP_OBJECT".to_string(),
21470 af.args,
21471 ))))
21472 }
21473 other => Ok(other),
21474 }
21475 }
21476
21477 Action::JsonbExistsConvert => {
21478 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21479 if let Expression::Function(f) = e {
21480 if f.args.len() == 2 {
21481 let json_expr = f.args[0].clone();
21482 let key = match &f.args[1] {
21483 Expression::Literal(crate::expressions::Literal::String(s)) => {
21484 format!("$.{}", s)
21485 }
21486 _ => return Ok(Expression::Function(f)),
21487 };
21488 Ok(Expression::Function(Box::new(Function::new(
21489 "JSON_EXISTS".to_string(),
21490 vec![json_expr, Expression::string(&key)],
21491 ))))
21492 } else {
21493 Ok(Expression::Function(f))
21494 }
21495 } else {
21496 Ok(e)
21497 }
21498 }
21499
21500 Action::DateBinConvert => {
21501 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21502 if let Expression::Function(f) = e {
21503 Ok(Expression::Function(Box::new(Function::new(
21504 "TIME_BUCKET".to_string(),
21505 f.args,
21506 ))))
21507 } else {
21508 Ok(e)
21509 }
21510 }
21511
21512 Action::MysqlCastCharToText => {
21513 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21514 if let Expression::Cast(mut c) = e {
21515 c.to = DataType::Text;
21516 Ok(Expression::Cast(c))
21517 } else {
21518 Ok(e)
21519 }
21520 }
21521
21522 Action::SparkCastVarcharToString => {
21523 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21524 match e {
21525 Expression::Cast(mut c) => {
21526 c.to = Self::normalize_varchar_to_string(c.to);
21527 Ok(Expression::Cast(c))
21528 }
21529 Expression::TryCast(mut c) => {
21530 c.to = Self::normalize_varchar_to_string(c.to);
21531 Ok(Expression::TryCast(c))
21532 }
21533 _ => Ok(e),
21534 }
21535 }
21536
21537 Action::MinMaxToLeastGreatest => {
21538 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21539 if let Expression::Function(f) = e {
21540 let name = f.name.to_uppercase();
21541 let new_name = match name.as_str() {
21542 "MIN" => "LEAST",
21543 "MAX" => "GREATEST",
21544 _ => return Ok(Expression::Function(f)),
21545 };
21546 Ok(Expression::Function(Box::new(Function::new(
21547 new_name.to_string(),
21548 f.args,
21549 ))))
21550 } else {
21551 Ok(e)
21552 }
21553 }
21554
21555 Action::ClickHouseUniqToApproxCountDistinct => {
21556 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21557 if let Expression::Function(f) = e {
21558 Ok(Expression::Function(Box::new(Function::new(
21559 "APPROX_COUNT_DISTINCT".to_string(),
21560 f.args,
21561 ))))
21562 } else {
21563 Ok(e)
21564 }
21565 }
21566
21567 Action::ClickHouseAnyToAnyValue => {
21568 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21569 if let Expression::Function(f) = e {
21570 Ok(Expression::Function(Box::new(Function::new(
21571 "ANY_VALUE".to_string(),
21572 f.args,
21573 ))))
21574 } else {
21575 Ok(e)
21576 }
21577 }
21578
21579 Action::OracleVarchar2ToVarchar => {
21580 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21581 if let Expression::DataType(DataType::Custom { ref name }) = e {
21582 let upper = name.to_uppercase();
21583 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21584 let inner =
21585 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21586 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21587 let end = name.len() - 1; // skip trailing ")"
21588 Some(&name[start..end])
21589 } else {
21590 Option::None
21591 };
21592 if let Some(inner_str) = inner {
21593 // Parse the number part, ignoring BYTE/CHAR qualifier
21594 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21595 if let Ok(n) = num_str.parse::<u32>() {
21596 Ok(Expression::DataType(DataType::VarChar {
21597 length: Some(n),
21598 parenthesized_length: false,
21599 }))
21600 } else {
21601 Ok(e)
21602 }
21603 } else {
21604 // Plain VARCHAR2 / NVARCHAR2 without parens
21605 Ok(Expression::DataType(DataType::VarChar {
21606 length: Option::None,
21607 parenthesized_length: false,
21608 }))
21609 }
21610 } else {
21611 Ok(e)
21612 }
21613 }
21614
21615 Action::Nvl2Expand => {
21616 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21617 // But keep as NVL2 for dialects that support it natively
21618 let nvl2_native = matches!(
21619 target,
21620 DialectType::Oracle
21621 | DialectType::Snowflake
21622 | DialectType::Redshift
21623 | DialectType::Teradata
21624 | DialectType::Spark
21625 | DialectType::Databricks
21626 );
21627 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21628 if nvl2_native {
21629 return Ok(Expression::Nvl2(nvl2));
21630 }
21631 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
21632 } else if let Expression::Function(f) = e {
21633 if nvl2_native {
21634 return Ok(Expression::Function(Box::new(Function::new(
21635 "NVL2".to_string(),
21636 f.args,
21637 ))));
21638 }
21639 if f.args.len() < 2 {
21640 return Ok(Expression::Function(f));
21641 }
21642 let mut args = f.args;
21643 let a = args.remove(0);
21644 let b = args.remove(0);
21645 let c = if !args.is_empty() {
21646 Some(args.remove(0))
21647 } else {
21648 Option::None
21649 };
21650 (a, b, c)
21651 } else {
21652 return Ok(e);
21653 };
21654 // Build: NOT (a IS NULL)
21655 let is_null = Expression::IsNull(Box::new(IsNull {
21656 this: a,
21657 not: false,
21658 postfix_form: false,
21659 }));
21660 let not_null =
21661 Expression::Not(Box::new(crate::expressions::UnaryOp { this: is_null }));
21662 Ok(Expression::Case(Box::new(Case {
21663 operand: Option::None,
21664 whens: vec![(not_null, b)],
21665 else_: c,
21666 comments: Vec::new(),
21667 })))
21668 }
21669
21670 Action::IfnullToCoalesce => {
21671 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
21672 if let Expression::Coalesce(mut cf) = e {
21673 cf.original_name = Option::None;
21674 Ok(Expression::Coalesce(cf))
21675 } else if let Expression::Function(f) = e {
21676 Ok(Expression::Function(Box::new(Function::new(
21677 "COALESCE".to_string(),
21678 f.args,
21679 ))))
21680 } else {
21681 Ok(e)
21682 }
21683 }
21684
21685 Action::IsAsciiConvert => {
21686 // IS_ASCII(x) -> dialect-specific ASCII check
21687 if let Expression::Function(f) = e {
21688 let arg = f.args.into_iter().next().unwrap();
21689 match target {
21690 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
21691 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
21692 Ok(Expression::Function(Box::new(Function::new(
21693 "REGEXP_LIKE".to_string(),
21694 vec![
21695 arg,
21696 Expression::Literal(Literal::String(
21697 "^[[:ascii:]]*$".to_string(),
21698 )),
21699 ],
21700 ))))
21701 }
21702 DialectType::PostgreSQL
21703 | DialectType::Redshift
21704 | DialectType::Materialize
21705 | DialectType::RisingWave => {
21706 // (x ~ '^[[:ascii:]]*$')
21707 Ok(Expression::Paren(Box::new(Paren {
21708 this: Expression::RegexpLike(Box::new(
21709 crate::expressions::RegexpFunc {
21710 this: arg,
21711 pattern: Expression::Literal(Literal::String(
21712 "^[[:ascii:]]*$".to_string(),
21713 )),
21714 flags: Option::None,
21715 },
21716 )),
21717 trailing_comments: Vec::new(),
21718 })))
21719 }
21720 DialectType::SQLite => {
21721 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
21722 let hex_lit = Expression::Literal(Literal::HexString(
21723 "2a5b5e012d7f5d2a".to_string(),
21724 ));
21725 let cast_expr = Expression::Cast(Box::new(Cast {
21726 this: hex_lit,
21727 to: DataType::Text,
21728 trailing_comments: Vec::new(),
21729 double_colon_syntax: false,
21730 format: Option::None,
21731 default: Option::None,
21732 }));
21733 let glob = Expression::Glob(Box::new(BinaryOp {
21734 left: arg,
21735 right: cast_expr,
21736 left_comments: Vec::new(),
21737 operator_comments: Vec::new(),
21738 trailing_comments: Vec::new(),
21739 }));
21740 Ok(Expression::Paren(Box::new(Paren {
21741 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
21742 this: glob,
21743 })),
21744 trailing_comments: Vec::new(),
21745 })))
21746 }
21747 DialectType::TSQL | DialectType::Fabric => {
21748 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
21749 let hex_lit = Expression::Literal(Literal::HexNumber(
21750 "255b5e002d7f5d25".to_string(),
21751 ));
21752 let convert_expr = Expression::Convert(Box::new(
21753 crate::expressions::ConvertFunc {
21754 this: hex_lit,
21755 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
21756 style: None,
21757 },
21758 ));
21759 let collated = Expression::Collation(Box::new(
21760 crate::expressions::CollationExpr {
21761 this: convert_expr,
21762 collation: "Latin1_General_BIN".to_string(),
21763 quoted: false,
21764 double_quoted: false,
21765 },
21766 ));
21767 let patindex = Expression::Function(Box::new(Function::new(
21768 "PATINDEX".to_string(),
21769 vec![collated, arg],
21770 )));
21771 let zero = Expression::Literal(Literal::Number("0".to_string()));
21772 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21773 left: patindex,
21774 right: zero,
21775 left_comments: Vec::new(),
21776 operator_comments: Vec::new(),
21777 trailing_comments: Vec::new(),
21778 }));
21779 Ok(Expression::Paren(Box::new(Paren {
21780 this: eq_zero,
21781 trailing_comments: Vec::new(),
21782 })))
21783 }
21784 DialectType::Oracle => {
21785 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
21786 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21787 let s1 = Expression::Literal(Literal::String("^[".to_string()));
21788 let chr1 = Expression::Function(Box::new(Function::new(
21789 "CHR".to_string(),
21790 vec![Expression::Literal(Literal::Number("1".to_string()))],
21791 )));
21792 let dash = Expression::Literal(Literal::String("-".to_string()));
21793 let chr127 = Expression::Function(Box::new(Function::new(
21794 "CHR".to_string(),
21795 vec![Expression::Literal(Literal::Number("127".to_string()))],
21796 )));
21797 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
21798 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
21799 let concat1 =
21800 Expression::DPipe(Box::new(crate::expressions::DPipe {
21801 this: Box::new(s1),
21802 expression: Box::new(chr1),
21803 safe: None,
21804 }));
21805 let concat2 =
21806 Expression::DPipe(Box::new(crate::expressions::DPipe {
21807 this: Box::new(concat1),
21808 expression: Box::new(dash),
21809 safe: None,
21810 }));
21811 let concat3 =
21812 Expression::DPipe(Box::new(crate::expressions::DPipe {
21813 this: Box::new(concat2),
21814 expression: Box::new(chr127),
21815 safe: None,
21816 }));
21817 let concat4 =
21818 Expression::DPipe(Box::new(crate::expressions::DPipe {
21819 this: Box::new(concat3),
21820 expression: Box::new(s2),
21821 safe: None,
21822 }));
21823 let regexp_like = Expression::Function(Box::new(Function::new(
21824 "REGEXP_LIKE".to_string(),
21825 vec![arg, concat4],
21826 )));
21827 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
21828 let true_expr = Expression::Column(crate::expressions::Column {
21829 name: Identifier {
21830 name: "TRUE".to_string(),
21831 quoted: false,
21832 trailing_comments: Vec::new(),
21833 },
21834 table: None,
21835 join_mark: false,
21836 trailing_comments: Vec::new(),
21837 });
21838 let nvl = Expression::Function(Box::new(Function::new(
21839 "NVL".to_string(),
21840 vec![regexp_like, true_expr],
21841 )));
21842 Ok(nvl)
21843 }
21844 _ => Ok(Expression::Function(Box::new(Function::new(
21845 "IS_ASCII".to_string(),
21846 vec![arg],
21847 )))),
21848 }
21849 } else {
21850 Ok(e)
21851 }
21852 }
21853
21854 Action::StrPositionConvert => {
21855 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
21856 if let Expression::Function(f) = e {
21857 if f.args.len() < 2 {
21858 return Ok(Expression::Function(f));
21859 }
21860 let mut args = f.args;
21861
21862 let haystack = args.remove(0);
21863 let needle = args.remove(0);
21864 let position = if !args.is_empty() {
21865 Some(args.remove(0))
21866 } else {
21867 Option::None
21868 };
21869 let occurrence = if !args.is_empty() {
21870 Some(args.remove(0))
21871 } else {
21872 Option::None
21873 };
21874
21875 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
21876 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
21877 fn build_position_expansion(
21878 haystack: Expression,
21879 needle: Expression,
21880 pos: Expression,
21881 occurrence: Option<Expression>,
21882 inner_func: &str,
21883 wrapper: &str, // "CASE", "IF", "IIF"
21884 ) -> Expression {
21885 let substr = Expression::Function(Box::new(Function::new(
21886 "SUBSTRING".to_string(),
21887 vec![haystack, pos.clone()],
21888 )));
21889 let mut inner_args = vec![substr, needle];
21890 if let Some(occ) = occurrence {
21891 inner_args.push(occ);
21892 }
21893 let inner_call = Expression::Function(Box::new(Function::new(
21894 inner_func.to_string(),
21895 inner_args,
21896 )));
21897 let zero = Expression::Literal(Literal::Number("0".to_string()));
21898 let one = Expression::Literal(Literal::Number("1".to_string()));
21899 let eq_zero = Expression::Eq(Box::new(BinaryOp {
21900 left: inner_call.clone(),
21901 right: zero.clone(),
21902 left_comments: Vec::new(),
21903 operator_comments: Vec::new(),
21904 trailing_comments: Vec::new(),
21905 }));
21906 let add_pos = Expression::Add(Box::new(BinaryOp {
21907 left: inner_call,
21908 right: pos,
21909 left_comments: Vec::new(),
21910 operator_comments: Vec::new(),
21911 trailing_comments: Vec::new(),
21912 }));
21913 let sub_one = Expression::Sub(Box::new(BinaryOp {
21914 left: add_pos,
21915 right: one,
21916 left_comments: Vec::new(),
21917 operator_comments: Vec::new(),
21918 trailing_comments: Vec::new(),
21919 }));
21920
21921 match wrapper {
21922 "CASE" => Expression::Case(Box::new(Case {
21923 operand: Option::None,
21924 whens: vec![(eq_zero, zero)],
21925 else_: Some(sub_one),
21926 comments: Vec::new(),
21927 })),
21928 "IIF" => Expression::Function(Box::new(Function::new(
21929 "IIF".to_string(),
21930 vec![eq_zero, zero, sub_one],
21931 ))),
21932 _ => Expression::Function(Box::new(Function::new(
21933 "IF".to_string(),
21934 vec![eq_zero, zero, sub_one],
21935 ))),
21936 }
21937 }
21938
21939 match target {
21940 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
21941 DialectType::Athena
21942 | DialectType::DuckDB
21943 | DialectType::Presto
21944 | DialectType::Trino
21945 | DialectType::Drill => {
21946 if let Some(pos) = position {
21947 let wrapper = if matches!(target, DialectType::DuckDB) {
21948 "CASE"
21949 } else {
21950 "IF"
21951 };
21952 let result = build_position_expansion(
21953 haystack, needle, pos, occurrence, "STRPOS", wrapper,
21954 );
21955 if matches!(target, DialectType::Drill) {
21956 // Drill uses backtick-quoted `IF`
21957 if let Expression::Function(mut f) = result {
21958 f.name = "`IF`".to_string();
21959 Ok(Expression::Function(f))
21960 } else {
21961 Ok(result)
21962 }
21963 } else {
21964 Ok(result)
21965 }
21966 } else {
21967 Ok(Expression::Function(Box::new(Function::new(
21968 "STRPOS".to_string(),
21969 vec![haystack, needle],
21970 ))))
21971 }
21972 }
21973 // SQLite: IIF wrapper
21974 DialectType::SQLite => {
21975 if let Some(pos) = position {
21976 Ok(build_position_expansion(
21977 haystack, needle, pos, occurrence, "INSTR", "IIF",
21978 ))
21979 } else {
21980 Ok(Expression::Function(Box::new(Function::new(
21981 "INSTR".to_string(),
21982 vec![haystack, needle],
21983 ))))
21984 }
21985 }
21986 // INSTR group: Teradata, BigQuery, Oracle
21987 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
21988 let mut a = vec![haystack, needle];
21989 if let Some(pos) = position {
21990 a.push(pos);
21991 }
21992 if let Some(occ) = occurrence {
21993 a.push(occ);
21994 }
21995 Ok(Expression::Function(Box::new(Function::new(
21996 "INSTR".to_string(),
21997 a,
21998 ))))
21999 }
22000 // CHARINDEX group: Snowflake, TSQL
22001 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22002 let mut a = vec![needle, haystack];
22003 if let Some(pos) = position {
22004 a.push(pos);
22005 }
22006 Ok(Expression::Function(Box::new(Function::new(
22007 "CHARINDEX".to_string(),
22008 a,
22009 ))))
22010 }
22011 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22012 DialectType::PostgreSQL
22013 | DialectType::Materialize
22014 | DialectType::RisingWave
22015 | DialectType::Redshift => {
22016 if let Some(pos) = position {
22017 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22018 // ELSE POSITION(...) + pos - 1 END
22019 let substr = Expression::Substring(Box::new(
22020 crate::expressions::SubstringFunc {
22021 this: haystack,
22022 start: pos.clone(),
22023 length: Option::None,
22024 from_for_syntax: true,
22025 },
22026 ));
22027 let pos_in = Expression::StrPosition(Box::new(
22028 crate::expressions::StrPosition {
22029 this: Box::new(substr),
22030 substr: Some(Box::new(needle)),
22031 position: Option::None,
22032 occurrence: Option::None,
22033 },
22034 ));
22035 let zero =
22036 Expression::Literal(Literal::Number("0".to_string()));
22037 let one = Expression::Literal(Literal::Number("1".to_string()));
22038 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22039 left: pos_in.clone(),
22040 right: zero.clone(),
22041 left_comments: Vec::new(),
22042 operator_comments: Vec::new(),
22043 trailing_comments: Vec::new(),
22044 }));
22045 let add_pos = Expression::Add(Box::new(BinaryOp {
22046 left: pos_in,
22047 right: pos,
22048 left_comments: Vec::new(),
22049 operator_comments: Vec::new(),
22050 trailing_comments: Vec::new(),
22051 }));
22052 let sub_one = Expression::Sub(Box::new(BinaryOp {
22053 left: add_pos,
22054 right: one,
22055 left_comments: Vec::new(),
22056 operator_comments: Vec::new(),
22057 trailing_comments: Vec::new(),
22058 }));
22059 Ok(Expression::Case(Box::new(Case {
22060 operand: Option::None,
22061 whens: vec![(eq_zero, zero)],
22062 else_: Some(sub_one),
22063 comments: Vec::new(),
22064 })))
22065 } else {
22066 Ok(Expression::StrPosition(Box::new(
22067 crate::expressions::StrPosition {
22068 this: Box::new(haystack),
22069 substr: Some(Box::new(needle)),
22070 position: Option::None,
22071 occurrence: Option::None,
22072 },
22073 )))
22074 }
22075 }
22076 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22077 DialectType::MySQL
22078 | DialectType::SingleStore
22079 | DialectType::TiDB
22080 | DialectType::Hive
22081 | DialectType::Spark
22082 | DialectType::Databricks
22083 | DialectType::Doris
22084 | DialectType::StarRocks => {
22085 let mut a = vec![needle, haystack];
22086 if let Some(pos) = position {
22087 a.push(pos);
22088 }
22089 Ok(Expression::Function(Box::new(Function::new(
22090 "LOCATE".to_string(),
22091 a,
22092 ))))
22093 }
22094 // ClickHouse: POSITION(haystack, needle[, position])
22095 DialectType::ClickHouse => {
22096 let mut a = vec![haystack, needle];
22097 if let Some(pos) = position {
22098 a.push(pos);
22099 }
22100 Ok(Expression::Function(Box::new(Function::new(
22101 "POSITION".to_string(),
22102 a,
22103 ))))
22104 }
22105 _ => {
22106 let mut a = vec![haystack, needle];
22107 if let Some(pos) = position {
22108 a.push(pos);
22109 }
22110 if let Some(occ) = occurrence {
22111 a.push(occ);
22112 }
22113 Ok(Expression::Function(Box::new(Function::new(
22114 "STR_POSITION".to_string(),
22115 a,
22116 ))))
22117 }
22118 }
22119 } else {
22120 Ok(e)
22121 }
22122 }
22123
22124 Action::ArraySumConvert => {
22125 // ARRAY_SUM(arr) -> dialect-specific
22126 if let Expression::Function(f) = e {
22127 let args = f.args;
22128 match target {
22129 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22130 Function::new("LIST_SUM".to_string(), args),
22131 ))),
22132 DialectType::Spark | DialectType::Databricks => {
22133 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22134 let arr = args.into_iter().next().unwrap();
22135 let zero = Expression::Literal(Literal::Number("0".to_string()));
22136 let acc_id = Identifier::new("acc");
22137 let x_id = Identifier::new("x");
22138 let acc = Expression::Identifier(acc_id.clone());
22139 let x = Expression::Identifier(x_id.clone());
22140 let add = Expression::Add(Box::new(BinaryOp {
22141 left: acc.clone(),
22142 right: x,
22143 left_comments: Vec::new(),
22144 operator_comments: Vec::new(),
22145 trailing_comments: Vec::new(),
22146 }));
22147 let lambda1 =
22148 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22149 parameters: vec![acc_id.clone(), x_id],
22150 body: add,
22151 colon: false,
22152 parameter_types: Vec::new(),
22153 }));
22154 let lambda2 =
22155 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22156 parameters: vec![acc_id],
22157 body: acc,
22158 colon: false,
22159 parameter_types: Vec::new(),
22160 }));
22161 Ok(Expression::Function(Box::new(Function::new(
22162 "AGGREGATE".to_string(),
22163 vec![arr, zero, lambda1, lambda2],
22164 ))))
22165 }
22166 DialectType::Presto | DialectType::Athena => {
22167 // Presto/Athena keep ARRAY_SUM natively
22168 Ok(Expression::Function(Box::new(Function::new(
22169 "ARRAY_SUM".to_string(),
22170 args,
22171 ))))
22172 }
22173 DialectType::Trino => {
22174 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22175 if args.len() == 1 {
22176 let arr = args.into_iter().next().unwrap();
22177 let zero =
22178 Expression::Literal(Literal::Number("0".to_string()));
22179 let acc_id = Identifier::new("acc");
22180 let x_id = Identifier::new("x");
22181 let acc = Expression::Identifier(acc_id.clone());
22182 let x = Expression::Identifier(x_id.clone());
22183 let add = Expression::Add(Box::new(BinaryOp {
22184 left: acc.clone(),
22185 right: x,
22186 left_comments: Vec::new(),
22187 operator_comments: Vec::new(),
22188 trailing_comments: Vec::new(),
22189 }));
22190 let lambda1 = Expression::Lambda(Box::new(
22191 crate::expressions::LambdaExpr {
22192 parameters: vec![acc_id.clone(), x_id],
22193 body: add,
22194 colon: false,
22195 parameter_types: Vec::new(),
22196 },
22197 ));
22198 let lambda2 = Expression::Lambda(Box::new(
22199 crate::expressions::LambdaExpr {
22200 parameters: vec![acc_id],
22201 body: acc,
22202 colon: false,
22203 parameter_types: Vec::new(),
22204 },
22205 ));
22206 Ok(Expression::Function(Box::new(Function::new(
22207 "REDUCE".to_string(),
22208 vec![arr, zero, lambda1, lambda2],
22209 ))))
22210 } else {
22211 Ok(Expression::Function(Box::new(Function::new(
22212 "ARRAY_SUM".to_string(),
22213 args,
22214 ))))
22215 }
22216 }
22217 DialectType::ClickHouse => {
22218 // arraySum(lambda, arr) or arraySum(arr)
22219 Ok(Expression::Function(Box::new(Function::new(
22220 "arraySum".to_string(),
22221 args,
22222 ))))
22223 }
22224 _ => Ok(Expression::Function(Box::new(Function::new(
22225 "ARRAY_SUM".to_string(),
22226 args,
22227 )))),
22228 }
22229 } else {
22230 Ok(e)
22231 }
22232 }
22233
22234 Action::ArraySizeConvert => {
22235 if let Expression::Function(f) = e {
22236 Ok(Expression::Function(Box::new(Function::new(
22237 "REPEATED_COUNT".to_string(),
22238 f.args,
22239 ))))
22240 } else {
22241 Ok(e)
22242 }
22243 }
22244
22245 Action::ArrayAnyConvert => {
22246 if let Expression::Function(f) = e {
22247 let mut args = f.args;
22248 if args.len() == 2 {
22249 let arr = args.remove(0);
22250 let lambda = args.remove(0);
22251
22252 // Extract lambda parameter name and body
22253 let (param_name, pred_body) =
22254 if let Expression::Lambda(ref lam) = lambda {
22255 let name = if let Some(p) = lam.parameters.first() {
22256 p.name.clone()
22257 } else {
22258 "x".to_string()
22259 };
22260 (name, lam.body.clone())
22261 } else {
22262 ("x".to_string(), lambda.clone())
22263 };
22264
22265 // Helper: build a function call Expression
22266 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22267 Expression::Function(Box::new(Function::new(
22268 name.to_string(),
22269 args,
22270 )))
22271 };
22272
22273 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22274 let build_filter_pattern = |len_func: &str,
22275 len_args_extra: Vec<Expression>,
22276 filter_expr: Expression|
22277 -> Expression {
22278 // len_func(arr, ...extra) = 0
22279 let mut len_arr_args = vec![arr.clone()];
22280 len_arr_args.extend(len_args_extra.clone());
22281 let len_arr = make_func(len_func, len_arr_args);
22282 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22283 len_arr,
22284 Expression::number(0),
22285 )));
22286
22287 // len_func(filter_expr, ...extra) <> 0
22288 let mut len_filter_args = vec![filter_expr];
22289 len_filter_args.extend(len_args_extra);
22290 let len_filter = make_func(len_func, len_filter_args);
22291 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22292 len_filter,
22293 Expression::number(0),
22294 )));
22295
22296 // (eq_zero OR neq_zero)
22297 let or_expr =
22298 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22299 Expression::Paren(Box::new(Paren {
22300 this: or_expr,
22301 trailing_comments: Vec::new(),
22302 }))
22303 };
22304
22305 match target {
22306 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22307 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22308 }
22309 DialectType::ClickHouse => {
22310 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22311 // ClickHouse arrayFilter takes lambda first, then array
22312 let filter_expr =
22313 make_func("arrayFilter", vec![lambda, arr.clone()]);
22314 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22315 }
22316 DialectType::Databricks | DialectType::Spark => {
22317 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22318 let filter_expr =
22319 make_func("FILTER", vec![arr.clone(), lambda]);
22320 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22321 }
22322 DialectType::DuckDB => {
22323 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22324 let filter_expr =
22325 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22326 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22327 }
22328 DialectType::Teradata => {
22329 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22330 let filter_expr =
22331 make_func("FILTER", vec![arr.clone(), lambda]);
22332 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22333 }
22334 DialectType::BigQuery => {
22335 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22336 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22337 let param_col = Expression::column(¶m_name);
22338 let unnest_expr = Expression::Unnest(Box::new(
22339 crate::expressions::UnnestFunc {
22340 this: arr.clone(),
22341 expressions: vec![],
22342 with_ordinality: false,
22343 alias: Some(Identifier::new(¶m_name)),
22344 offset_alias: None,
22345 },
22346 ));
22347 let mut sel = crate::expressions::Select::default();
22348 sel.expressions = vec![param_col];
22349 sel.from = Some(crate::expressions::From {
22350 expressions: vec![unnest_expr],
22351 });
22352 sel.where_clause =
22353 Some(crate::expressions::Where { this: pred_body });
22354 let array_subquery =
22355 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22356 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22357 }
22358 DialectType::PostgreSQL => {
22359 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22360 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22361 let param_col = Expression::column(¶m_name);
22362 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22363 let unnest_with_alias =
22364 Expression::Alias(Box::new(crate::expressions::Alias {
22365 this: Expression::Unnest(Box::new(
22366 crate::expressions::UnnestFunc {
22367 this: arr.clone(),
22368 expressions: vec![],
22369 with_ordinality: false,
22370 alias: None,
22371 offset_alias: None,
22372 },
22373 )),
22374 alias: Identifier::new("_t0"),
22375 column_aliases: vec![Identifier::new(¶m_name)],
22376 pre_alias_comments: Vec::new(),
22377 trailing_comments: Vec::new(),
22378 }));
22379 let mut sel = crate::expressions::Select::default();
22380 sel.expressions = vec![param_col];
22381 sel.from = Some(crate::expressions::From {
22382 expressions: vec![unnest_with_alias],
22383 });
22384 sel.where_clause =
22385 Some(crate::expressions::Where { this: pred_body });
22386 let array_subquery =
22387 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22388 Ok(build_filter_pattern(
22389 "ARRAY_LENGTH",
22390 vec![Expression::number(1)],
22391 array_subquery,
22392 ))
22393 }
22394 _ => Ok(Expression::Function(Box::new(Function::new(
22395 "ARRAY_ANY".to_string(),
22396 vec![arr, lambda],
22397 )))),
22398 }
22399 } else {
22400 Ok(Expression::Function(Box::new(Function::new(
22401 "ARRAY_ANY".to_string(),
22402 args,
22403 ))))
22404 }
22405 } else {
22406 Ok(e)
22407 }
22408 }
22409
22410 Action::DecodeSimplify => {
22411 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22412 // For literal search values: CASE WHEN x = search THEN result
22413 // For NULL search: CASE WHEN x IS NULL THEN result
22414 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22415 fn is_decode_literal(e: &Expression) -> bool {
22416 matches!(
22417 e,
22418 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22419 )
22420 }
22421
22422 let build_decode_case =
22423 |this_expr: Expression,
22424 pairs: Vec<(Expression, Expression)>,
22425 default: Option<Expression>| {
22426 let whens: Vec<(Expression, Expression)> = pairs
22427 .into_iter()
22428 .map(|(search, result)| {
22429 if matches!(&search, Expression::Null(_)) {
22430 // NULL search -> IS NULL
22431 let condition = Expression::Is(Box::new(BinaryOp {
22432 left: this_expr.clone(),
22433 right: Expression::Null(crate::expressions::Null),
22434 left_comments: Vec::new(),
22435 operator_comments: Vec::new(),
22436 trailing_comments: Vec::new(),
22437 }));
22438 (condition, result)
22439 } else if is_decode_literal(&search)
22440 || is_decode_literal(&this_expr)
22441 {
22442 // At least one side is a literal -> simple equality (no NULL check needed)
22443 let eq = Expression::Eq(Box::new(BinaryOp {
22444 left: this_expr.clone(),
22445 right: search,
22446 left_comments: Vec::new(),
22447 operator_comments: Vec::new(),
22448 trailing_comments: Vec::new(),
22449 }));
22450 (eq, result)
22451 } else {
22452 // Non-literal -> null-safe comparison
22453 let needs_paren = matches!(
22454 &search,
22455 Expression::Eq(_)
22456 | Expression::Neq(_)
22457 | Expression::Gt(_)
22458 | Expression::Gte(_)
22459 | Expression::Lt(_)
22460 | Expression::Lte(_)
22461 );
22462 let search_ref = if needs_paren {
22463 Expression::Paren(Box::new(crate::expressions::Paren {
22464 this: search.clone(),
22465 trailing_comments: Vec::new(),
22466 }))
22467 } else {
22468 search.clone()
22469 };
22470 // Build: x = search OR (x IS NULL AND search IS NULL)
22471 let eq = Expression::Eq(Box::new(BinaryOp {
22472 left: this_expr.clone(),
22473 right: search_ref,
22474 left_comments: Vec::new(),
22475 operator_comments: Vec::new(),
22476 trailing_comments: Vec::new(),
22477 }));
22478 let search_in_null = if needs_paren {
22479 Expression::Paren(Box::new(crate::expressions::Paren {
22480 this: search.clone(),
22481 trailing_comments: Vec::new(),
22482 }))
22483 } else {
22484 search.clone()
22485 };
22486 let x_is_null = Expression::Is(Box::new(BinaryOp {
22487 left: this_expr.clone(),
22488 right: Expression::Null(crate::expressions::Null),
22489 left_comments: Vec::new(),
22490 operator_comments: Vec::new(),
22491 trailing_comments: Vec::new(),
22492 }));
22493 let search_is_null = Expression::Is(Box::new(BinaryOp {
22494 left: search_in_null,
22495 right: Expression::Null(crate::expressions::Null),
22496 left_comments: Vec::new(),
22497 operator_comments: Vec::new(),
22498 trailing_comments: Vec::new(),
22499 }));
22500 let both_null = Expression::And(Box::new(BinaryOp {
22501 left: x_is_null,
22502 right: search_is_null,
22503 left_comments: Vec::new(),
22504 operator_comments: Vec::new(),
22505 trailing_comments: Vec::new(),
22506 }));
22507 let condition = Expression::Or(Box::new(BinaryOp {
22508 left: eq,
22509 right: Expression::Paren(Box::new(
22510 crate::expressions::Paren {
22511 this: both_null,
22512 trailing_comments: Vec::new(),
22513 },
22514 )),
22515 left_comments: Vec::new(),
22516 operator_comments: Vec::new(),
22517 trailing_comments: Vec::new(),
22518 }));
22519 (condition, result)
22520 }
22521 })
22522 .collect();
22523 Expression::Case(Box::new(Case {
22524 operand: None,
22525 whens,
22526 else_: default,
22527 comments: Vec::new(),
22528 }))
22529 };
22530
22531 if let Expression::Decode(decode) = e {
22532 Ok(build_decode_case(
22533 decode.this,
22534 decode.search_results,
22535 decode.default,
22536 ))
22537 } else if let Expression::DecodeCase(dc) = e {
22538 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22539 let mut exprs = dc.expressions;
22540 if exprs.len() < 3 {
22541 return Ok(Expression::DecodeCase(Box::new(
22542 crate::expressions::DecodeCase { expressions: exprs },
22543 )));
22544 }
22545 let this_expr = exprs.remove(0);
22546 let mut pairs = Vec::new();
22547 let mut default = None;
22548 let mut i = 0;
22549 while i + 1 < exprs.len() {
22550 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22551 i += 2;
22552 }
22553 if i < exprs.len() {
22554 // Odd remaining element is the default
22555 default = Some(exprs[i].clone());
22556 }
22557 Ok(build_decode_case(this_expr, pairs, default))
22558 } else {
22559 Ok(e)
22560 }
22561 }
22562
22563 Action::CreateTableLikeToCtas => {
22564 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22565 if let Expression::CreateTable(ct) = e {
22566 let like_source = ct.constraints.iter().find_map(|c| {
22567 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22568 Some(source.clone())
22569 } else {
22570 None
22571 }
22572 });
22573 if let Some(source_table) = like_source {
22574 let mut new_ct = *ct;
22575 new_ct.constraints.clear();
22576 // Build: SELECT * FROM b LIMIT 0
22577 let select = Expression::Select(Box::new(crate::expressions::Select {
22578 expressions: vec![Expression::Star(crate::expressions::Star {
22579 table: None,
22580 except: None,
22581 replace: None,
22582 rename: None,
22583 trailing_comments: Vec::new(),
22584 })],
22585 from: Some(crate::expressions::From {
22586 expressions: vec![Expression::Table(source_table)],
22587 }),
22588 limit: Some(crate::expressions::Limit {
22589 this: Expression::Literal(Literal::Number("0".to_string())),
22590 percent: false,
22591 comments: Vec::new(),
22592 }),
22593 ..Default::default()
22594 }));
22595 new_ct.as_select = Some(select);
22596 Ok(Expression::CreateTable(Box::new(new_ct)))
22597 } else {
22598 Ok(Expression::CreateTable(ct))
22599 }
22600 } else {
22601 Ok(e)
22602 }
22603 }
22604
22605 Action::CreateTableLikeToSelectInto => {
22606 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
22607 if let Expression::CreateTable(ct) = e {
22608 let like_source = ct.constraints.iter().find_map(|c| {
22609 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22610 Some(source.clone())
22611 } else {
22612 None
22613 }
22614 });
22615 if let Some(source_table) = like_source {
22616 let mut aliased_source = source_table;
22617 aliased_source.alias = Some(Identifier::new("temp"));
22618 // Build: SELECT TOP 0 * INTO a FROM b AS temp
22619 let select = Expression::Select(Box::new(crate::expressions::Select {
22620 expressions: vec![Expression::Star(crate::expressions::Star {
22621 table: None,
22622 except: None,
22623 replace: None,
22624 rename: None,
22625 trailing_comments: Vec::new(),
22626 })],
22627 from: Some(crate::expressions::From {
22628 expressions: vec![Expression::Table(aliased_source)],
22629 }),
22630 into: Some(crate::expressions::SelectInto {
22631 this: Expression::Table(ct.name.clone()),
22632 temporary: false,
22633 unlogged: false,
22634 bulk_collect: false,
22635 expressions: Vec::new(),
22636 }),
22637 top: Some(crate::expressions::Top {
22638 this: Expression::Literal(Literal::Number("0".to_string())),
22639 percent: false,
22640 with_ties: false,
22641 parenthesized: false,
22642 }),
22643 ..Default::default()
22644 }));
22645 Ok(select)
22646 } else {
22647 Ok(Expression::CreateTable(ct))
22648 }
22649 } else {
22650 Ok(e)
22651 }
22652 }
22653
22654 Action::CreateTableLikeToAs => {
22655 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
22656 if let Expression::CreateTable(ct) = e {
22657 let like_source = ct.constraints.iter().find_map(|c| {
22658 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22659 Some(source.clone())
22660 } else {
22661 None
22662 }
22663 });
22664 if let Some(source_table) = like_source {
22665 let mut new_ct = *ct;
22666 new_ct.constraints.clear();
22667 // AS b (just a table reference, not a SELECT)
22668 new_ct.as_select = Some(Expression::Table(source_table));
22669 Ok(Expression::CreateTable(Box::new(new_ct)))
22670 } else {
22671 Ok(Expression::CreateTable(ct))
22672 }
22673 } else {
22674 Ok(e)
22675 }
22676 }
22677
22678 Action::TsOrDsToDateConvert => {
22679 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
22680 if let Expression::Function(f) = e {
22681 let mut args = f.args;
22682 let this = args.remove(0);
22683 let fmt = if !args.is_empty() {
22684 match &args[0] {
22685 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22686 _ => None,
22687 }
22688 } else {
22689 None
22690 };
22691 Ok(Expression::TsOrDsToDate(Box::new(
22692 crate::expressions::TsOrDsToDate {
22693 this: Box::new(this),
22694 format: fmt,
22695 safe: None,
22696 },
22697 )))
22698 } else {
22699 Ok(e)
22700 }
22701 }
22702
22703 Action::TsOrDsToDateStrConvert => {
22704 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
22705 if let Expression::Function(f) = e {
22706 let arg = f.args.into_iter().next().unwrap();
22707 let str_type = match target {
22708 DialectType::DuckDB
22709 | DialectType::PostgreSQL
22710 | DialectType::Materialize => DataType::Text,
22711 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
22712 DataType::Custom {
22713 name: "STRING".to_string(),
22714 }
22715 }
22716 DialectType::Presto
22717 | DialectType::Trino
22718 | DialectType::Athena
22719 | DialectType::Drill => DataType::VarChar {
22720 length: None,
22721 parenthesized_length: false,
22722 },
22723 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
22724 DataType::Custom {
22725 name: "STRING".to_string(),
22726 }
22727 }
22728 _ => DataType::VarChar {
22729 length: None,
22730 parenthesized_length: false,
22731 },
22732 };
22733 let cast_expr = Expression::Cast(Box::new(Cast {
22734 this: arg,
22735 to: str_type,
22736 double_colon_syntax: false,
22737 trailing_comments: Vec::new(),
22738 format: None,
22739 default: None,
22740 }));
22741 Ok(Expression::Substring(Box::new(
22742 crate::expressions::SubstringFunc {
22743 this: cast_expr,
22744 start: Expression::number(1),
22745 length: Some(Expression::number(10)),
22746 from_for_syntax: false,
22747 },
22748 )))
22749 } else {
22750 Ok(e)
22751 }
22752 }
22753
22754 Action::DateStrToDateConvert => {
22755 // DATE_STR_TO_DATE(x) -> dialect-specific
22756 if let Expression::Function(f) = e {
22757 let arg = f.args.into_iter().next().unwrap();
22758 match target {
22759 DialectType::SQLite => {
22760 // SQLite: just the bare expression (dates are strings)
22761 Ok(arg)
22762 }
22763 _ => Ok(Expression::Cast(Box::new(Cast {
22764 this: arg,
22765 to: DataType::Date,
22766 double_colon_syntax: false,
22767 trailing_comments: Vec::new(),
22768 format: None,
22769 default: None,
22770 }))),
22771 }
22772 } else {
22773 Ok(e)
22774 }
22775 }
22776
22777 Action::TimeStrToDateConvert => {
22778 // TIME_STR_TO_DATE(x) -> dialect-specific
22779 if let Expression::Function(f) = e {
22780 let arg = f.args.into_iter().next().unwrap();
22781 match target {
22782 DialectType::Hive
22783 | DialectType::Doris
22784 | DialectType::StarRocks
22785 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
22786 Function::new("TO_DATE".to_string(), vec![arg]),
22787 ))),
22788 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22789 // Presto: CAST(x AS TIMESTAMP)
22790 Ok(Expression::Cast(Box::new(Cast {
22791 this: arg,
22792 to: DataType::Timestamp {
22793 timezone: false,
22794 precision: None,
22795 },
22796 double_colon_syntax: false,
22797 trailing_comments: Vec::new(),
22798 format: None,
22799 default: None,
22800 })))
22801 }
22802 _ => {
22803 // Default: CAST(x AS DATE)
22804 Ok(Expression::Cast(Box::new(Cast {
22805 this: arg,
22806 to: DataType::Date,
22807 double_colon_syntax: false,
22808 trailing_comments: Vec::new(),
22809 format: None,
22810 default: None,
22811 })))
22812 }
22813 }
22814 } else {
22815 Ok(e)
22816 }
22817 }
22818
22819 Action::TimeStrToTimeConvert => {
22820 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
22821 if let Expression::Function(f) = e {
22822 let mut args = f.args;
22823 let this = args.remove(0);
22824 let zone = if !args.is_empty() {
22825 match &args[0] {
22826 Expression::Literal(Literal::String(s)) => Some(s.clone()),
22827 _ => None,
22828 }
22829 } else {
22830 None
22831 };
22832 let has_zone = zone.is_some();
22833
22834 match target {
22835 DialectType::SQLite => {
22836 // SQLite: just the bare expression
22837 Ok(this)
22838 }
22839 DialectType::MySQL => {
22840 if has_zone {
22841 // MySQL with zone: TIMESTAMP(x)
22842 Ok(Expression::Function(Box::new(Function::new(
22843 "TIMESTAMP".to_string(),
22844 vec![this],
22845 ))))
22846 } else {
22847 // MySQL: CAST(x AS DATETIME) or with precision
22848 // Use DataType::Custom to avoid MySQL's transform_cast converting
22849 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
22850 let precision =
22851 if let Expression::Literal(Literal::String(ref s)) = this {
22852 if let Some(dot_pos) = s.rfind('.') {
22853 let frac = &s[dot_pos + 1..];
22854 let digit_count = frac
22855 .chars()
22856 .take_while(|c| c.is_ascii_digit())
22857 .count();
22858 if digit_count > 0 {
22859 Some(digit_count)
22860 } else {
22861 None
22862 }
22863 } else {
22864 None
22865 }
22866 } else {
22867 None
22868 };
22869 let type_name = match precision {
22870 Some(p) => format!("DATETIME({})", p),
22871 None => "DATETIME".to_string(),
22872 };
22873 Ok(Expression::Cast(Box::new(Cast {
22874 this,
22875 to: DataType::Custom { name: type_name },
22876 double_colon_syntax: false,
22877 trailing_comments: Vec::new(),
22878 format: None,
22879 default: None,
22880 })))
22881 }
22882 }
22883 DialectType::ClickHouse => {
22884 if has_zone {
22885 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
22886 // We need to strip the timezone offset from the literal if present
22887 let clean_this =
22888 if let Expression::Literal(Literal::String(ref s)) = this {
22889 // Strip timezone offset like "-08:00" or "+00:00"
22890 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
22891 if let Some(offset_pos) = re_offset {
22892 if offset_pos > 10 {
22893 // After the date part
22894 let trimmed = s[..offset_pos].to_string();
22895 Expression::Literal(Literal::String(trimmed))
22896 } else {
22897 this.clone()
22898 }
22899 } else {
22900 this.clone()
22901 }
22902 } else {
22903 this.clone()
22904 };
22905 let zone_str = zone.unwrap();
22906 // Build: CAST(x AS DateTime64(6, 'zone'))
22907 let type_name = format!("DateTime64(6, '{}')", zone_str);
22908 Ok(Expression::Cast(Box::new(Cast {
22909 this: clean_this,
22910 to: DataType::Custom { name: type_name },
22911 double_colon_syntax: false,
22912 trailing_comments: Vec::new(),
22913 format: None,
22914 default: None,
22915 })))
22916 } else {
22917 Ok(Expression::Cast(Box::new(Cast {
22918 this,
22919 to: DataType::Custom {
22920 name: "DateTime64(6)".to_string(),
22921 },
22922 double_colon_syntax: false,
22923 trailing_comments: Vec::new(),
22924 format: None,
22925 default: None,
22926 })))
22927 }
22928 }
22929 DialectType::BigQuery => {
22930 if has_zone {
22931 // BigQuery with zone: CAST(x AS TIMESTAMP)
22932 Ok(Expression::Cast(Box::new(Cast {
22933 this,
22934 to: DataType::Timestamp {
22935 timezone: false,
22936 precision: None,
22937 },
22938 double_colon_syntax: false,
22939 trailing_comments: Vec::new(),
22940 format: None,
22941 default: None,
22942 })))
22943 } else {
22944 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
22945 Ok(Expression::Cast(Box::new(Cast {
22946 this,
22947 to: DataType::Custom {
22948 name: "DATETIME".to_string(),
22949 },
22950 double_colon_syntax: false,
22951 trailing_comments: Vec::new(),
22952 format: None,
22953 default: None,
22954 })))
22955 }
22956 }
22957 DialectType::Doris => {
22958 // Doris: CAST(x AS DATETIME)
22959 Ok(Expression::Cast(Box::new(Cast {
22960 this,
22961 to: DataType::Custom {
22962 name: "DATETIME".to_string(),
22963 },
22964 double_colon_syntax: false,
22965 trailing_comments: Vec::new(),
22966 format: None,
22967 default: None,
22968 })))
22969 }
22970 DialectType::TSQL | DialectType::Fabric => {
22971 if has_zone {
22972 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
22973 let cast_expr = Expression::Cast(Box::new(Cast {
22974 this,
22975 to: DataType::Custom {
22976 name: "DATETIMEOFFSET".to_string(),
22977 },
22978 double_colon_syntax: false,
22979 trailing_comments: Vec::new(),
22980 format: None,
22981 default: None,
22982 }));
22983 Ok(Expression::AtTimeZone(Box::new(
22984 crate::expressions::AtTimeZone {
22985 this: cast_expr,
22986 zone: Expression::Literal(Literal::String(
22987 "UTC".to_string(),
22988 )),
22989 },
22990 )))
22991 } else {
22992 // TSQL: CAST(x AS DATETIME2)
22993 Ok(Expression::Cast(Box::new(Cast {
22994 this,
22995 to: DataType::Custom {
22996 name: "DATETIME2".to_string(),
22997 },
22998 double_colon_syntax: false,
22999 trailing_comments: Vec::new(),
23000 format: None,
23001 default: None,
23002 })))
23003 }
23004 }
23005 DialectType::DuckDB => {
23006 if has_zone {
23007 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23008 Ok(Expression::Cast(Box::new(Cast {
23009 this,
23010 to: DataType::Timestamp {
23011 timezone: true,
23012 precision: None,
23013 },
23014 double_colon_syntax: false,
23015 trailing_comments: Vec::new(),
23016 format: None,
23017 default: None,
23018 })))
23019 } else {
23020 // DuckDB: CAST(x AS TIMESTAMP)
23021 Ok(Expression::Cast(Box::new(Cast {
23022 this,
23023 to: DataType::Timestamp {
23024 timezone: false,
23025 precision: None,
23026 },
23027 double_colon_syntax: false,
23028 trailing_comments: Vec::new(),
23029 format: None,
23030 default: None,
23031 })))
23032 }
23033 }
23034 DialectType::PostgreSQL
23035 | DialectType::Materialize
23036 | DialectType::RisingWave => {
23037 if has_zone {
23038 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23039 Ok(Expression::Cast(Box::new(Cast {
23040 this,
23041 to: DataType::Timestamp {
23042 timezone: true,
23043 precision: None,
23044 },
23045 double_colon_syntax: false,
23046 trailing_comments: Vec::new(),
23047 format: None,
23048 default: None,
23049 })))
23050 } else {
23051 // PostgreSQL: CAST(x AS TIMESTAMP)
23052 Ok(Expression::Cast(Box::new(Cast {
23053 this,
23054 to: DataType::Timestamp {
23055 timezone: false,
23056 precision: None,
23057 },
23058 double_colon_syntax: false,
23059 trailing_comments: Vec::new(),
23060 format: None,
23061 default: None,
23062 })))
23063 }
23064 }
23065 DialectType::Snowflake => {
23066 if has_zone {
23067 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23068 Ok(Expression::Cast(Box::new(Cast {
23069 this,
23070 to: DataType::Timestamp {
23071 timezone: true,
23072 precision: None,
23073 },
23074 double_colon_syntax: false,
23075 trailing_comments: Vec::new(),
23076 format: None,
23077 default: None,
23078 })))
23079 } else {
23080 // Snowflake: CAST(x AS TIMESTAMP)
23081 Ok(Expression::Cast(Box::new(Cast {
23082 this,
23083 to: DataType::Timestamp {
23084 timezone: false,
23085 precision: None,
23086 },
23087 double_colon_syntax: false,
23088 trailing_comments: Vec::new(),
23089 format: None,
23090 default: None,
23091 })))
23092 }
23093 }
23094 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23095 if has_zone {
23096 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23097 // Check for precision from sub-second digits
23098 let precision =
23099 if let Expression::Literal(Literal::String(ref s)) = this {
23100 if let Some(dot_pos) = s.rfind('.') {
23101 let frac = &s[dot_pos + 1..];
23102 let digit_count = frac
23103 .chars()
23104 .take_while(|c| c.is_ascii_digit())
23105 .count();
23106 if digit_count > 0
23107 && matches!(target, DialectType::Trino)
23108 {
23109 Some(digit_count as u32)
23110 } else {
23111 None
23112 }
23113 } else {
23114 None
23115 }
23116 } else {
23117 None
23118 };
23119 let dt = if let Some(prec) = precision {
23120 DataType::Timestamp {
23121 timezone: true,
23122 precision: Some(prec),
23123 }
23124 } else {
23125 DataType::Timestamp {
23126 timezone: true,
23127 precision: None,
23128 }
23129 };
23130 Ok(Expression::Cast(Box::new(Cast {
23131 this,
23132 to: dt,
23133 double_colon_syntax: false,
23134 trailing_comments: Vec::new(),
23135 format: None,
23136 default: None,
23137 })))
23138 } else {
23139 // Check for sub-second precision for Trino
23140 let precision =
23141 if let Expression::Literal(Literal::String(ref s)) = this {
23142 if let Some(dot_pos) = s.rfind('.') {
23143 let frac = &s[dot_pos + 1..];
23144 let digit_count = frac
23145 .chars()
23146 .take_while(|c| c.is_ascii_digit())
23147 .count();
23148 if digit_count > 0
23149 && matches!(target, DialectType::Trino)
23150 {
23151 Some(digit_count as u32)
23152 } else {
23153 None
23154 }
23155 } else {
23156 None
23157 }
23158 } else {
23159 None
23160 };
23161 let dt = DataType::Timestamp {
23162 timezone: false,
23163 precision,
23164 };
23165 Ok(Expression::Cast(Box::new(Cast {
23166 this,
23167 to: dt,
23168 double_colon_syntax: false,
23169 trailing_comments: Vec::new(),
23170 format: None,
23171 default: None,
23172 })))
23173 }
23174 }
23175 DialectType::Redshift => {
23176 if has_zone {
23177 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23178 Ok(Expression::Cast(Box::new(Cast {
23179 this,
23180 to: DataType::Timestamp {
23181 timezone: true,
23182 precision: None,
23183 },
23184 double_colon_syntax: false,
23185 trailing_comments: Vec::new(),
23186 format: None,
23187 default: None,
23188 })))
23189 } else {
23190 // Redshift: CAST(x AS TIMESTAMP)
23191 Ok(Expression::Cast(Box::new(Cast {
23192 this,
23193 to: DataType::Timestamp {
23194 timezone: false,
23195 precision: None,
23196 },
23197 double_colon_syntax: false,
23198 trailing_comments: Vec::new(),
23199 format: None,
23200 default: None,
23201 })))
23202 }
23203 }
23204 _ => {
23205 // Default: CAST(x AS TIMESTAMP)
23206 Ok(Expression::Cast(Box::new(Cast {
23207 this,
23208 to: DataType::Timestamp {
23209 timezone: false,
23210 precision: None,
23211 },
23212 double_colon_syntax: false,
23213 trailing_comments: Vec::new(),
23214 format: None,
23215 default: None,
23216 })))
23217 }
23218 }
23219 } else {
23220 Ok(e)
23221 }
23222 }
23223
23224 Action::DateToDateStrConvert => {
23225 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23226 if let Expression::Function(f) = e {
23227 let arg = f.args.into_iter().next().unwrap();
23228 let str_type = match target {
23229 DialectType::DuckDB => DataType::Text,
23230 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23231 DataType::Custom {
23232 name: "STRING".to_string(),
23233 }
23234 }
23235 DialectType::Presto
23236 | DialectType::Trino
23237 | DialectType::Athena
23238 | DialectType::Drill => DataType::VarChar {
23239 length: None,
23240 parenthesized_length: false,
23241 },
23242 _ => DataType::VarChar {
23243 length: None,
23244 parenthesized_length: false,
23245 },
23246 };
23247 Ok(Expression::Cast(Box::new(Cast {
23248 this: arg,
23249 to: str_type,
23250 double_colon_syntax: false,
23251 trailing_comments: Vec::new(),
23252 format: None,
23253 default: None,
23254 })))
23255 } else {
23256 Ok(e)
23257 }
23258 }
23259
23260 Action::DateToDiConvert => {
23261 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23262 if let Expression::Function(f) = e {
23263 let arg = f.args.into_iter().next().unwrap();
23264 let inner = match target {
23265 DialectType::DuckDB => {
23266 // STRFTIME(x, '%Y%m%d')
23267 Expression::Function(Box::new(Function::new(
23268 "STRFTIME".to_string(),
23269 vec![arg, Expression::string("%Y%m%d")],
23270 )))
23271 }
23272 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23273 // DATE_FORMAT(x, 'yyyyMMdd')
23274 Expression::Function(Box::new(Function::new(
23275 "DATE_FORMAT".to_string(),
23276 vec![arg, Expression::string("yyyyMMdd")],
23277 )))
23278 }
23279 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23280 // DATE_FORMAT(x, '%Y%m%d')
23281 Expression::Function(Box::new(Function::new(
23282 "DATE_FORMAT".to_string(),
23283 vec![arg, Expression::string("%Y%m%d")],
23284 )))
23285 }
23286 DialectType::Drill => {
23287 // TO_DATE(x, 'yyyyMMdd')
23288 Expression::Function(Box::new(Function::new(
23289 "TO_DATE".to_string(),
23290 vec![arg, Expression::string("yyyyMMdd")],
23291 )))
23292 }
23293 _ => {
23294 // Default: STRFTIME(x, '%Y%m%d')
23295 Expression::Function(Box::new(Function::new(
23296 "STRFTIME".to_string(),
23297 vec![arg, Expression::string("%Y%m%d")],
23298 )))
23299 }
23300 };
23301 // Use INT (not INTEGER) for Presto/Trino
23302 let int_type = match target {
23303 DialectType::Presto
23304 | DialectType::Trino
23305 | DialectType::Athena
23306 | DialectType::TSQL
23307 | DialectType::Fabric
23308 | DialectType::SQLite
23309 | DialectType::Redshift => DataType::Custom {
23310 name: "INT".to_string(),
23311 },
23312 _ => DataType::Int {
23313 length: None,
23314 integer_spelling: false,
23315 },
23316 };
23317 Ok(Expression::Cast(Box::new(Cast {
23318 this: inner,
23319 to: int_type,
23320 double_colon_syntax: false,
23321 trailing_comments: Vec::new(),
23322 format: None,
23323 default: None,
23324 })))
23325 } else {
23326 Ok(e)
23327 }
23328 }
23329
23330 Action::DiToDateConvert => {
23331 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23332 if let Expression::Function(f) = e {
23333 let arg = f.args.into_iter().next().unwrap();
23334 match target {
23335 DialectType::DuckDB => {
23336 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23337 let cast_text = Expression::Cast(Box::new(Cast {
23338 this: arg,
23339 to: DataType::Text,
23340 double_colon_syntax: false,
23341 trailing_comments: Vec::new(),
23342 format: None,
23343 default: None,
23344 }));
23345 let strptime = Expression::Function(Box::new(Function::new(
23346 "STRPTIME".to_string(),
23347 vec![cast_text, Expression::string("%Y%m%d")],
23348 )));
23349 Ok(Expression::Cast(Box::new(Cast {
23350 this: strptime,
23351 to: DataType::Date,
23352 double_colon_syntax: false,
23353 trailing_comments: Vec::new(),
23354 format: None,
23355 default: None,
23356 })))
23357 }
23358 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23359 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23360 let cast_str = Expression::Cast(Box::new(Cast {
23361 this: arg,
23362 to: DataType::Custom {
23363 name: "STRING".to_string(),
23364 },
23365 double_colon_syntax: false,
23366 trailing_comments: Vec::new(),
23367 format: None,
23368 default: None,
23369 }));
23370 Ok(Expression::Function(Box::new(Function::new(
23371 "TO_DATE".to_string(),
23372 vec![cast_str, Expression::string("yyyyMMdd")],
23373 ))))
23374 }
23375 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23376 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23377 let cast_varchar = Expression::Cast(Box::new(Cast {
23378 this: arg,
23379 to: DataType::VarChar {
23380 length: None,
23381 parenthesized_length: false,
23382 },
23383 double_colon_syntax: false,
23384 trailing_comments: Vec::new(),
23385 format: None,
23386 default: None,
23387 }));
23388 let date_parse = Expression::Function(Box::new(Function::new(
23389 "DATE_PARSE".to_string(),
23390 vec![cast_varchar, Expression::string("%Y%m%d")],
23391 )));
23392 Ok(Expression::Cast(Box::new(Cast {
23393 this: date_parse,
23394 to: DataType::Date,
23395 double_colon_syntax: false,
23396 trailing_comments: Vec::new(),
23397 format: None,
23398 default: None,
23399 })))
23400 }
23401 DialectType::Drill => {
23402 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23403 let cast_varchar = Expression::Cast(Box::new(Cast {
23404 this: arg,
23405 to: DataType::VarChar {
23406 length: None,
23407 parenthesized_length: false,
23408 },
23409 double_colon_syntax: false,
23410 trailing_comments: Vec::new(),
23411 format: None,
23412 default: None,
23413 }));
23414 Ok(Expression::Function(Box::new(Function::new(
23415 "TO_DATE".to_string(),
23416 vec![cast_varchar, Expression::string("yyyyMMdd")],
23417 ))))
23418 }
23419 _ => Ok(Expression::Function(Box::new(Function::new(
23420 "DI_TO_DATE".to_string(),
23421 vec![arg],
23422 )))),
23423 }
23424 } else {
23425 Ok(e)
23426 }
23427 }
23428
23429 Action::TsOrDiToDiConvert => {
23430 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23431 if let Expression::Function(f) = e {
23432 let arg = f.args.into_iter().next().unwrap();
23433 let str_type = match target {
23434 DialectType::DuckDB => DataType::Text,
23435 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23436 DataType::Custom {
23437 name: "STRING".to_string(),
23438 }
23439 }
23440 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23441 DataType::VarChar {
23442 length: None,
23443 parenthesized_length: false,
23444 }
23445 }
23446 _ => DataType::VarChar {
23447 length: None,
23448 parenthesized_length: false,
23449 },
23450 };
23451 let cast_str = Expression::Cast(Box::new(Cast {
23452 this: arg,
23453 to: str_type,
23454 double_colon_syntax: false,
23455 trailing_comments: Vec::new(),
23456 format: None,
23457 default: None,
23458 }));
23459 let replace_expr = Expression::Function(Box::new(Function::new(
23460 "REPLACE".to_string(),
23461 vec![cast_str, Expression::string("-"), Expression::string("")],
23462 )));
23463 let substr_name = match target {
23464 DialectType::DuckDB
23465 | DialectType::Hive
23466 | DialectType::Spark
23467 | DialectType::Databricks => "SUBSTR",
23468 _ => "SUBSTR",
23469 };
23470 let substr = Expression::Function(Box::new(Function::new(
23471 substr_name.to_string(),
23472 vec![replace_expr, Expression::number(1), Expression::number(8)],
23473 )));
23474 // Use INT (not INTEGER) for Presto/Trino etc.
23475 let int_type = match target {
23476 DialectType::Presto
23477 | DialectType::Trino
23478 | DialectType::Athena
23479 | DialectType::TSQL
23480 | DialectType::Fabric
23481 | DialectType::SQLite
23482 | DialectType::Redshift => DataType::Custom {
23483 name: "INT".to_string(),
23484 },
23485 _ => DataType::Int {
23486 length: None,
23487 integer_spelling: false,
23488 },
23489 };
23490 Ok(Expression::Cast(Box::new(Cast {
23491 this: substr,
23492 to: int_type,
23493 double_colon_syntax: false,
23494 trailing_comments: Vec::new(),
23495 format: None,
23496 default: None,
23497 })))
23498 } else {
23499 Ok(e)
23500 }
23501 }
23502
23503 Action::UnixToStrConvert => {
23504 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23505 if let Expression::Function(f) = e {
23506 let mut args = f.args;
23507 let this = args.remove(0);
23508 let fmt_expr = if !args.is_empty() {
23509 Some(args.remove(0))
23510 } else {
23511 None
23512 };
23513
23514 // Check if format is a string literal
23515 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23516 if let Expression::Literal(Literal::String(s)) = f {
23517 Some(s.clone())
23518 } else {
23519 None
23520 }
23521 });
23522
23523 if let Some(fmt_string) = fmt_str {
23524 // String literal format -> use UnixToStr expression (generator handles it)
23525 Ok(Expression::UnixToStr(Box::new(
23526 crate::expressions::UnixToStr {
23527 this: Box::new(this),
23528 format: Some(fmt_string),
23529 },
23530 )))
23531 } else if let Some(fmt_e) = fmt_expr {
23532 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23533 match target {
23534 DialectType::DuckDB => {
23535 // STRFTIME(TO_TIMESTAMP(x), y)
23536 let to_ts = Expression::Function(Box::new(Function::new(
23537 "TO_TIMESTAMP".to_string(),
23538 vec![this],
23539 )));
23540 Ok(Expression::Function(Box::new(Function::new(
23541 "STRFTIME".to_string(),
23542 vec![to_ts, fmt_e],
23543 ))))
23544 }
23545 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23546 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23547 let from_unix = Expression::Function(Box::new(Function::new(
23548 "FROM_UNIXTIME".to_string(),
23549 vec![this],
23550 )));
23551 Ok(Expression::Function(Box::new(Function::new(
23552 "DATE_FORMAT".to_string(),
23553 vec![from_unix, fmt_e],
23554 ))))
23555 }
23556 DialectType::Hive
23557 | DialectType::Spark
23558 | DialectType::Databricks
23559 | DialectType::Doris
23560 | DialectType::StarRocks => {
23561 // FROM_UNIXTIME(x, y)
23562 Ok(Expression::Function(Box::new(Function::new(
23563 "FROM_UNIXTIME".to_string(),
23564 vec![this, fmt_e],
23565 ))))
23566 }
23567 _ => {
23568 // Default: keep as UNIX_TO_STR(x, y)
23569 Ok(Expression::Function(Box::new(Function::new(
23570 "UNIX_TO_STR".to_string(),
23571 vec![this, fmt_e],
23572 ))))
23573 }
23574 }
23575 } else {
23576 Ok(Expression::UnixToStr(Box::new(
23577 crate::expressions::UnixToStr {
23578 this: Box::new(this),
23579 format: None,
23580 },
23581 )))
23582 }
23583 } else {
23584 Ok(e)
23585 }
23586 }
23587
23588 Action::UnixToTimeConvert => {
23589 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
23590 if let Expression::Function(f) = e {
23591 let arg = f.args.into_iter().next().unwrap();
23592 Ok(Expression::UnixToTime(Box::new(
23593 crate::expressions::UnixToTime {
23594 this: Box::new(arg),
23595 scale: None,
23596 zone: None,
23597 hours: None,
23598 minutes: None,
23599 format: None,
23600 target_type: None,
23601 },
23602 )))
23603 } else {
23604 Ok(e)
23605 }
23606 }
23607
23608 Action::UnixToTimeStrConvert => {
23609 // UNIX_TO_TIME_STR(x) -> dialect-specific
23610 if let Expression::Function(f) = e {
23611 let arg = f.args.into_iter().next().unwrap();
23612 match target {
23613 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23614 // FROM_UNIXTIME(x)
23615 Ok(Expression::Function(Box::new(Function::new(
23616 "FROM_UNIXTIME".to_string(),
23617 vec![arg],
23618 ))))
23619 }
23620 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23621 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
23622 let from_unix = Expression::Function(Box::new(Function::new(
23623 "FROM_UNIXTIME".to_string(),
23624 vec![arg],
23625 )));
23626 Ok(Expression::Cast(Box::new(Cast {
23627 this: from_unix,
23628 to: DataType::VarChar {
23629 length: None,
23630 parenthesized_length: false,
23631 },
23632 double_colon_syntax: false,
23633 trailing_comments: Vec::new(),
23634 format: None,
23635 default: None,
23636 })))
23637 }
23638 DialectType::DuckDB => {
23639 // CAST(TO_TIMESTAMP(x) AS TEXT)
23640 let to_ts = Expression::Function(Box::new(Function::new(
23641 "TO_TIMESTAMP".to_string(),
23642 vec![arg],
23643 )));
23644 Ok(Expression::Cast(Box::new(Cast {
23645 this: to_ts,
23646 to: DataType::Text,
23647 double_colon_syntax: false,
23648 trailing_comments: Vec::new(),
23649 format: None,
23650 default: None,
23651 })))
23652 }
23653 _ => Ok(Expression::Function(Box::new(Function::new(
23654 "UNIX_TO_TIME_STR".to_string(),
23655 vec![arg],
23656 )))),
23657 }
23658 } else {
23659 Ok(e)
23660 }
23661 }
23662
23663 Action::TimeToUnixConvert => {
23664 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
23665 if let Expression::Function(f) = e {
23666 let arg = f.args.into_iter().next().unwrap();
23667 Ok(Expression::TimeToUnix(Box::new(
23668 crate::expressions::UnaryFunc {
23669 this: arg,
23670 original_name: None,
23671 },
23672 )))
23673 } else {
23674 Ok(e)
23675 }
23676 }
23677
23678 Action::TimeToStrConvert => {
23679 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
23680 if let Expression::Function(f) = e {
23681 let mut args = f.args;
23682 let this = args.remove(0);
23683 let fmt = match args.remove(0) {
23684 Expression::Literal(Literal::String(s)) => s,
23685 other => {
23686 return Ok(Expression::Function(Box::new(Function::new(
23687 "TIME_TO_STR".to_string(),
23688 vec![this, other],
23689 ))));
23690 }
23691 };
23692 Ok(Expression::TimeToStr(Box::new(
23693 crate::expressions::TimeToStr {
23694 this: Box::new(this),
23695 format: fmt,
23696 culture: None,
23697 zone: None,
23698 },
23699 )))
23700 } else {
23701 Ok(e)
23702 }
23703 }
23704
23705 Action::StrToUnixConvert => {
23706 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
23707 if let Expression::Function(f) = e {
23708 let mut args = f.args;
23709 let this = args.remove(0);
23710 let fmt = match args.remove(0) {
23711 Expression::Literal(Literal::String(s)) => s,
23712 other => {
23713 return Ok(Expression::Function(Box::new(Function::new(
23714 "STR_TO_UNIX".to_string(),
23715 vec![this, other],
23716 ))));
23717 }
23718 };
23719 Ok(Expression::StrToUnix(Box::new(
23720 crate::expressions::StrToUnix {
23721 this: Some(Box::new(this)),
23722 format: Some(fmt),
23723 },
23724 )))
23725 } else {
23726 Ok(e)
23727 }
23728 }
23729
23730 Action::TimeStrToUnixConvert => {
23731 // TIME_STR_TO_UNIX(x) -> dialect-specific
23732 if let Expression::Function(f) = e {
23733 let arg = f.args.into_iter().next().unwrap();
23734 match target {
23735 DialectType::DuckDB => {
23736 // EPOCH(CAST(x AS TIMESTAMP))
23737 let cast_ts = Expression::Cast(Box::new(Cast {
23738 this: arg,
23739 to: DataType::Timestamp {
23740 timezone: false,
23741 precision: None,
23742 },
23743 double_colon_syntax: false,
23744 trailing_comments: Vec::new(),
23745 format: None,
23746 default: None,
23747 }));
23748 Ok(Expression::Function(Box::new(Function::new(
23749 "EPOCH".to_string(),
23750 vec![cast_ts],
23751 ))))
23752 }
23753 DialectType::Hive
23754 | DialectType::Doris
23755 | DialectType::StarRocks
23756 | DialectType::MySQL => {
23757 // UNIX_TIMESTAMP(x)
23758 Ok(Expression::Function(Box::new(Function::new(
23759 "UNIX_TIMESTAMP".to_string(),
23760 vec![arg],
23761 ))))
23762 }
23763 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23764 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
23765 let date_parse = Expression::Function(Box::new(Function::new(
23766 "DATE_PARSE".to_string(),
23767 vec![arg, Expression::string("%Y-%m-%d %T")],
23768 )));
23769 Ok(Expression::Function(Box::new(Function::new(
23770 "TO_UNIXTIME".to_string(),
23771 vec![date_parse],
23772 ))))
23773 }
23774 _ => Ok(Expression::Function(Box::new(Function::new(
23775 "TIME_STR_TO_UNIX".to_string(),
23776 vec![arg],
23777 )))),
23778 }
23779 } else {
23780 Ok(e)
23781 }
23782 }
23783
23784 Action::TimeToTimeStrConvert => {
23785 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
23786 if let Expression::Function(f) = e {
23787 let arg = f.args.into_iter().next().unwrap();
23788 let str_type = match target {
23789 DialectType::DuckDB => DataType::Text,
23790 DialectType::Hive
23791 | DialectType::Spark
23792 | DialectType::Databricks
23793 | DialectType::Doris
23794 | DialectType::StarRocks => DataType::Custom {
23795 name: "STRING".to_string(),
23796 },
23797 DialectType::Redshift => DataType::Custom {
23798 name: "VARCHAR(MAX)".to_string(),
23799 },
23800 _ => DataType::VarChar {
23801 length: None,
23802 parenthesized_length: false,
23803 },
23804 };
23805 Ok(Expression::Cast(Box::new(Cast {
23806 this: arg,
23807 to: str_type,
23808 double_colon_syntax: false,
23809 trailing_comments: Vec::new(),
23810 format: None,
23811 default: None,
23812 })))
23813 } else {
23814 Ok(e)
23815 }
23816 }
23817
23818 Action::DateTruncSwapArgs => {
23819 // DATE_TRUNC('unit', x) from Generic -> target-specific
23820 if let Expression::Function(f) = e {
23821 if f.args.len() == 2 {
23822 let unit_arg = f.args[0].clone();
23823 let expr_arg = f.args[1].clone();
23824 // Extract unit string from the first arg
23825 let unit_str = match &unit_arg {
23826 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23827 _ => return Ok(Expression::Function(f)),
23828 };
23829 match target {
23830 DialectType::BigQuery => {
23831 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
23832 let unit_ident =
23833 Expression::Column(crate::expressions::Column {
23834 name: crate::expressions::Identifier::new(unit_str),
23835 table: None,
23836 join_mark: false,
23837 trailing_comments: Vec::new(),
23838 });
23839 Ok(Expression::Function(Box::new(Function::new(
23840 "DATE_TRUNC".to_string(),
23841 vec![expr_arg, unit_ident],
23842 ))))
23843 }
23844 DialectType::Doris => {
23845 // Doris: DATE_TRUNC(x, 'UNIT')
23846 Ok(Expression::Function(Box::new(Function::new(
23847 "DATE_TRUNC".to_string(),
23848 vec![expr_arg, Expression::string(&unit_str)],
23849 ))))
23850 }
23851 DialectType::StarRocks => {
23852 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
23853 Ok(Expression::Function(Box::new(Function::new(
23854 "DATE_TRUNC".to_string(),
23855 vec![Expression::string(&unit_str), expr_arg],
23856 ))))
23857 }
23858 DialectType::Spark | DialectType::Databricks => {
23859 // Spark: TRUNC(x, 'UNIT')
23860 Ok(Expression::Function(Box::new(Function::new(
23861 "TRUNC".to_string(),
23862 vec![expr_arg, Expression::string(&unit_str)],
23863 ))))
23864 }
23865 DialectType::MySQL => {
23866 // MySQL: complex expansion based on unit
23867 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
23868 }
23869 _ => Ok(Expression::Function(f)),
23870 }
23871 } else {
23872 Ok(Expression::Function(f))
23873 }
23874 } else {
23875 Ok(e)
23876 }
23877 }
23878
23879 Action::TimestampTruncConvert => {
23880 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
23881 if let Expression::Function(f) = e {
23882 if f.args.len() >= 2 {
23883 let expr_arg = f.args[0].clone();
23884 let unit_arg = f.args[1].clone();
23885 let tz_arg = if f.args.len() >= 3 {
23886 Some(f.args[2].clone())
23887 } else {
23888 None
23889 };
23890 // Extract unit string
23891 let unit_str = match &unit_arg {
23892 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
23893 Expression::Column(c) => c.name.name.to_uppercase(),
23894 _ => {
23895 return Ok(Expression::Function(f));
23896 }
23897 };
23898 match target {
23899 DialectType::Spark | DialectType::Databricks => {
23900 // Spark: DATE_TRUNC('UNIT', x)
23901 Ok(Expression::Function(Box::new(Function::new(
23902 "DATE_TRUNC".to_string(),
23903 vec![Expression::string(&unit_str), expr_arg],
23904 ))))
23905 }
23906 DialectType::Doris | DialectType::StarRocks => {
23907 // Doris: DATE_TRUNC(x, 'UNIT')
23908 Ok(Expression::Function(Box::new(Function::new(
23909 "DATE_TRUNC".to_string(),
23910 vec![expr_arg, Expression::string(&unit_str)],
23911 ))))
23912 }
23913 DialectType::BigQuery => {
23914 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
23915 let unit_ident =
23916 Expression::Column(crate::expressions::Column {
23917 name: crate::expressions::Identifier::new(unit_str),
23918 table: None,
23919 join_mark: false,
23920 trailing_comments: Vec::new(),
23921 });
23922 let mut args = vec![expr_arg, unit_ident];
23923 if let Some(tz) = tz_arg {
23924 args.push(tz);
23925 }
23926 Ok(Expression::Function(Box::new(Function::new(
23927 "TIMESTAMP_TRUNC".to_string(),
23928 args,
23929 ))))
23930 }
23931 DialectType::DuckDB => {
23932 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
23933 if let Some(tz) = tz_arg {
23934 let tz_str = match &tz {
23935 Expression::Literal(Literal::String(s)) => s.clone(),
23936 _ => "UTC".to_string(),
23937 };
23938 // x AT TIME ZONE 'tz'
23939 let at_tz = Expression::AtTimeZone(Box::new(
23940 crate::expressions::AtTimeZone {
23941 this: expr_arg,
23942 zone: Expression::string(&tz_str),
23943 },
23944 ));
23945 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
23946 let trunc = Expression::Function(Box::new(Function::new(
23947 "DATE_TRUNC".to_string(),
23948 vec![Expression::string(&unit_str), at_tz],
23949 )));
23950 // DATE_TRUNC(...) AT TIME ZONE 'tz'
23951 Ok(Expression::AtTimeZone(Box::new(
23952 crate::expressions::AtTimeZone {
23953 this: trunc,
23954 zone: Expression::string(&tz_str),
23955 },
23956 )))
23957 } else {
23958 Ok(Expression::Function(Box::new(Function::new(
23959 "DATE_TRUNC".to_string(),
23960 vec![Expression::string(&unit_str), expr_arg],
23961 ))))
23962 }
23963 }
23964 DialectType::Presto
23965 | DialectType::Trino
23966 | DialectType::Athena
23967 | DialectType::Snowflake => {
23968 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
23969 Ok(Expression::Function(Box::new(Function::new(
23970 "DATE_TRUNC".to_string(),
23971 vec![Expression::string(&unit_str), expr_arg],
23972 ))))
23973 }
23974 _ => {
23975 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
23976 let mut args = vec![Expression::string(&unit_str), expr_arg];
23977 if let Some(tz) = tz_arg {
23978 args.push(tz);
23979 }
23980 Ok(Expression::Function(Box::new(Function::new(
23981 "DATE_TRUNC".to_string(),
23982 args,
23983 ))))
23984 }
23985 }
23986 } else {
23987 Ok(Expression::Function(f))
23988 }
23989 } else {
23990 Ok(e)
23991 }
23992 }
23993
23994 Action::StrToDateConvert => {
23995 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
23996 if let Expression::Function(f) = e {
23997 if f.args.len() == 2 {
23998 let mut args = f.args;
23999 let this = args.remove(0);
24000 let fmt_expr = args.remove(0);
24001 let fmt_str = match &fmt_expr {
24002 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24003 _ => None,
24004 };
24005 let default_date = "%Y-%m-%d";
24006 let default_time = "%Y-%m-%d %H:%M:%S";
24007 let is_default = fmt_str
24008 .as_ref()
24009 .map_or(false, |f| f == default_date || f == default_time);
24010
24011 if is_default {
24012 // Default format: handle per-dialect
24013 match target {
24014 DialectType::MySQL
24015 | DialectType::Doris
24016 | DialectType::StarRocks => {
24017 // Keep STR_TO_DATE(x, fmt) as-is
24018 Ok(Expression::Function(Box::new(Function::new(
24019 "STR_TO_DATE".to_string(),
24020 vec![this, fmt_expr],
24021 ))))
24022 }
24023 DialectType::Hive => {
24024 // Hive: CAST(x AS DATE)
24025 Ok(Expression::Cast(Box::new(Cast {
24026 this,
24027 to: DataType::Date,
24028 double_colon_syntax: false,
24029 trailing_comments: Vec::new(),
24030 format: None,
24031 default: None,
24032 })))
24033 }
24034 DialectType::Presto
24035 | DialectType::Trino
24036 | DialectType::Athena => {
24037 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24038 let date_parse =
24039 Expression::Function(Box::new(Function::new(
24040 "DATE_PARSE".to_string(),
24041 vec![this, fmt_expr],
24042 )));
24043 Ok(Expression::Cast(Box::new(Cast {
24044 this: date_parse,
24045 to: DataType::Date,
24046 double_colon_syntax: false,
24047 trailing_comments: Vec::new(),
24048 format: None,
24049 default: None,
24050 })))
24051 }
24052 _ => {
24053 // Others: TsOrDsToDate (delegates to generator)
24054 Ok(Expression::TsOrDsToDate(Box::new(
24055 crate::expressions::TsOrDsToDate {
24056 this: Box::new(this),
24057 format: None,
24058 safe: None,
24059 },
24060 )))
24061 }
24062 }
24063 } else if let Some(fmt) = fmt_str {
24064 match target {
24065 DialectType::Doris
24066 | DialectType::StarRocks
24067 | DialectType::MySQL => {
24068 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24069 let mut normalized = fmt.clone();
24070 normalized = normalized.replace("%-d", "%e");
24071 normalized = normalized.replace("%-m", "%c");
24072 normalized = normalized.replace("%H:%M:%S", "%T");
24073 Ok(Expression::Function(Box::new(Function::new(
24074 "STR_TO_DATE".to_string(),
24075 vec![this, Expression::string(&normalized)],
24076 ))))
24077 }
24078 DialectType::Hive => {
24079 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24080 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24081 let unix_ts =
24082 Expression::Function(Box::new(Function::new(
24083 "UNIX_TIMESTAMP".to_string(),
24084 vec![this, Expression::string(&java_fmt)],
24085 )));
24086 let from_unix =
24087 Expression::Function(Box::new(Function::new(
24088 "FROM_UNIXTIME".to_string(),
24089 vec![unix_ts],
24090 )));
24091 Ok(Expression::Cast(Box::new(Cast {
24092 this: from_unix,
24093 to: DataType::Date,
24094 double_colon_syntax: false,
24095 trailing_comments: Vec::new(),
24096 format: None,
24097 default: None,
24098 })))
24099 }
24100 DialectType::Spark | DialectType::Databricks => {
24101 // Spark: TO_DATE(x, java_fmt)
24102 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24103 Ok(Expression::Function(Box::new(Function::new(
24104 "TO_DATE".to_string(),
24105 vec![this, Expression::string(&java_fmt)],
24106 ))))
24107 }
24108 DialectType::Drill => {
24109 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24110 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24111 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24112 let java_fmt = java_fmt.replace('T', "'T'");
24113 Ok(Expression::Function(Box::new(Function::new(
24114 "TO_DATE".to_string(),
24115 vec![this, Expression::string(&java_fmt)],
24116 ))))
24117 }
24118 _ => {
24119 // For other dialects: use TsOrDsToDate which delegates to generator
24120 Ok(Expression::TsOrDsToDate(Box::new(
24121 crate::expressions::TsOrDsToDate {
24122 this: Box::new(this),
24123 format: Some(fmt),
24124 safe: None,
24125 },
24126 )))
24127 }
24128 }
24129 } else {
24130 // Non-string format - keep as-is
24131 let mut new_args = Vec::new();
24132 new_args.push(this);
24133 new_args.push(fmt_expr);
24134 Ok(Expression::Function(Box::new(Function::new(
24135 "STR_TO_DATE".to_string(),
24136 new_args,
24137 ))))
24138 }
24139 } else {
24140 Ok(Expression::Function(f))
24141 }
24142 } else {
24143 Ok(e)
24144 }
24145 }
24146
24147 Action::TsOrDsAddConvert => {
24148 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24149 if let Expression::Function(f) = e {
24150 if f.args.len() == 3 {
24151 let mut args = f.args;
24152 let x = args.remove(0);
24153 let n = args.remove(0);
24154 let unit_expr = args.remove(0);
24155 let unit_str = match &unit_expr {
24156 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24157 _ => "DAY".to_string(),
24158 };
24159
24160 match target {
24161 DialectType::Hive
24162 | DialectType::Spark
24163 | DialectType::Databricks => {
24164 // DATE_ADD(x, n) - only supports DAY unit
24165 Ok(Expression::Function(Box::new(Function::new(
24166 "DATE_ADD".to_string(),
24167 vec![x, n],
24168 ))))
24169 }
24170 DialectType::MySQL => {
24171 // DATE_ADD(x, INTERVAL n UNIT)
24172 let iu = match unit_str.to_uppercase().as_str() {
24173 "YEAR" => crate::expressions::IntervalUnit::Year,
24174 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24175 "MONTH" => crate::expressions::IntervalUnit::Month,
24176 "WEEK" => crate::expressions::IntervalUnit::Week,
24177 "HOUR" => crate::expressions::IntervalUnit::Hour,
24178 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24179 "SECOND" => crate::expressions::IntervalUnit::Second,
24180 _ => crate::expressions::IntervalUnit::Day,
24181 };
24182 let interval = Expression::Interval(Box::new(
24183 crate::expressions::Interval {
24184 this: Some(n),
24185 unit: Some(
24186 crate::expressions::IntervalUnitSpec::Simple {
24187 unit: iu,
24188 use_plural: false,
24189 },
24190 ),
24191 },
24192 ));
24193 Ok(Expression::Function(Box::new(Function::new(
24194 "DATE_ADD".to_string(),
24195 vec![x, interval],
24196 ))))
24197 }
24198 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24199 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24200 let cast_ts = Expression::Cast(Box::new(Cast {
24201 this: x,
24202 to: DataType::Timestamp {
24203 precision: None,
24204 timezone: false,
24205 },
24206 double_colon_syntax: false,
24207 trailing_comments: Vec::new(),
24208 format: None,
24209 default: None,
24210 }));
24211 let cast_date = Expression::Cast(Box::new(Cast {
24212 this: cast_ts,
24213 to: DataType::Date,
24214 double_colon_syntax: false,
24215 trailing_comments: Vec::new(),
24216 format: None,
24217 default: None,
24218 }));
24219 Ok(Expression::Function(Box::new(Function::new(
24220 "DATE_ADD".to_string(),
24221 vec![Expression::string(&unit_str), n, cast_date],
24222 ))))
24223 }
24224 DialectType::DuckDB => {
24225 // CAST(x AS DATE) + INTERVAL n UNIT
24226 let cast_date = Expression::Cast(Box::new(Cast {
24227 this: x,
24228 to: DataType::Date,
24229 double_colon_syntax: false,
24230 trailing_comments: Vec::new(),
24231 format: None,
24232 default: None,
24233 }));
24234 let iu = match unit_str.to_uppercase().as_str() {
24235 "YEAR" => crate::expressions::IntervalUnit::Year,
24236 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24237 "MONTH" => crate::expressions::IntervalUnit::Month,
24238 "WEEK" => crate::expressions::IntervalUnit::Week,
24239 "HOUR" => crate::expressions::IntervalUnit::Hour,
24240 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24241 "SECOND" => crate::expressions::IntervalUnit::Second,
24242 _ => crate::expressions::IntervalUnit::Day,
24243 };
24244 let interval = Expression::Interval(Box::new(
24245 crate::expressions::Interval {
24246 this: Some(n),
24247 unit: Some(
24248 crate::expressions::IntervalUnitSpec::Simple {
24249 unit: iu,
24250 use_plural: false,
24251 },
24252 ),
24253 },
24254 ));
24255 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24256 left: cast_date,
24257 right: interval,
24258 left_comments: Vec::new(),
24259 operator_comments: Vec::new(),
24260 trailing_comments: Vec::new(),
24261 })))
24262 }
24263 DialectType::Drill => {
24264 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24265 let cast_date = Expression::Cast(Box::new(Cast {
24266 this: x,
24267 to: DataType::Date,
24268 double_colon_syntax: false,
24269 trailing_comments: Vec::new(),
24270 format: None,
24271 default: None,
24272 }));
24273 let iu = match unit_str.to_uppercase().as_str() {
24274 "YEAR" => crate::expressions::IntervalUnit::Year,
24275 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24276 "MONTH" => crate::expressions::IntervalUnit::Month,
24277 "WEEK" => crate::expressions::IntervalUnit::Week,
24278 "HOUR" => crate::expressions::IntervalUnit::Hour,
24279 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24280 "SECOND" => crate::expressions::IntervalUnit::Second,
24281 _ => crate::expressions::IntervalUnit::Day,
24282 };
24283 let interval = Expression::Interval(Box::new(
24284 crate::expressions::Interval {
24285 this: Some(n),
24286 unit: Some(
24287 crate::expressions::IntervalUnitSpec::Simple {
24288 unit: iu,
24289 use_plural: false,
24290 },
24291 ),
24292 },
24293 ));
24294 Ok(Expression::Function(Box::new(Function::new(
24295 "DATE_ADD".to_string(),
24296 vec![cast_date, interval],
24297 ))))
24298 }
24299 _ => {
24300 // Default: keep as TS_OR_DS_ADD
24301 Ok(Expression::Function(Box::new(Function::new(
24302 "TS_OR_DS_ADD".to_string(),
24303 vec![x, n, unit_expr],
24304 ))))
24305 }
24306 }
24307 } else {
24308 Ok(Expression::Function(f))
24309 }
24310 } else {
24311 Ok(e)
24312 }
24313 }
24314
24315 Action::DateFromUnixDateConvert => {
24316 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24317 if let Expression::Function(f) = e {
24318 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24319 if matches!(
24320 target,
24321 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24322 ) {
24323 return Ok(Expression::Function(Box::new(Function::new(
24324 "DATE_FROM_UNIX_DATE".to_string(),
24325 f.args,
24326 ))));
24327 }
24328 let n = f.args.into_iter().next().unwrap();
24329 let epoch_date = Expression::Cast(Box::new(Cast {
24330 this: Expression::string("1970-01-01"),
24331 to: DataType::Date,
24332 double_colon_syntax: false,
24333 trailing_comments: Vec::new(),
24334 format: None,
24335 default: None,
24336 }));
24337 match target {
24338 DialectType::DuckDB => {
24339 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24340 let interval =
24341 Expression::Interval(Box::new(crate::expressions::Interval {
24342 this: Some(n),
24343 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24344 unit: crate::expressions::IntervalUnit::Day,
24345 use_plural: false,
24346 }),
24347 }));
24348 Ok(Expression::Add(Box::new(
24349 crate::expressions::BinaryOp::new(epoch_date, interval),
24350 )))
24351 }
24352 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24353 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24354 Ok(Expression::Function(Box::new(Function::new(
24355 "DATE_ADD".to_string(),
24356 vec![Expression::string("DAY"), n, epoch_date],
24357 ))))
24358 }
24359 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24360 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24361 Ok(Expression::Function(Box::new(Function::new(
24362 "DATEADD".to_string(),
24363 vec![
24364 Expression::Identifier(Identifier::new("DAY")),
24365 n,
24366 epoch_date,
24367 ],
24368 ))))
24369 }
24370 DialectType::BigQuery => {
24371 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24372 let interval =
24373 Expression::Interval(Box::new(crate::expressions::Interval {
24374 this: Some(n),
24375 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24376 unit: crate::expressions::IntervalUnit::Day,
24377 use_plural: false,
24378 }),
24379 }));
24380 Ok(Expression::Function(Box::new(Function::new(
24381 "DATE_ADD".to_string(),
24382 vec![epoch_date, interval],
24383 ))))
24384 }
24385 DialectType::MySQL
24386 | DialectType::Doris
24387 | DialectType::StarRocks
24388 | DialectType::Drill => {
24389 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24390 let interval =
24391 Expression::Interval(Box::new(crate::expressions::Interval {
24392 this: Some(n),
24393 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24394 unit: crate::expressions::IntervalUnit::Day,
24395 use_plural: false,
24396 }),
24397 }));
24398 Ok(Expression::Function(Box::new(Function::new(
24399 "DATE_ADD".to_string(),
24400 vec![epoch_date, interval],
24401 ))))
24402 }
24403 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24404 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24405 Ok(Expression::Function(Box::new(Function::new(
24406 "DATE_ADD".to_string(),
24407 vec![epoch_date, n],
24408 ))))
24409 }
24410 DialectType::PostgreSQL
24411 | DialectType::Materialize
24412 | DialectType::RisingWave => {
24413 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24414 let n_str = match &n {
24415 Expression::Literal(Literal::Number(s)) => s.clone(),
24416 _ => Self::expr_to_string_static(&n),
24417 };
24418 let interval =
24419 Expression::Interval(Box::new(crate::expressions::Interval {
24420 this: Some(Expression::string(&format!("{} DAY", n_str))),
24421 unit: None,
24422 }));
24423 Ok(Expression::Add(Box::new(
24424 crate::expressions::BinaryOp::new(epoch_date, interval),
24425 )))
24426 }
24427 _ => {
24428 // Default: keep as-is
24429 Ok(Expression::Function(Box::new(Function::new(
24430 "DATE_FROM_UNIX_DATE".to_string(),
24431 vec![n],
24432 ))))
24433 }
24434 }
24435 } else {
24436 Ok(e)
24437 }
24438 }
24439
24440 Action::ArrayRemoveConvert => {
24441 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24442 if let Expression::ArrayRemove(bf) = e {
24443 let arr = bf.this;
24444 let target_val = bf.expression;
24445 match target {
24446 DialectType::DuckDB => {
24447 let u_id = crate::expressions::Identifier::new("_u");
24448 let lambda =
24449 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24450 parameters: vec![u_id.clone()],
24451 body: Expression::Neq(Box::new(BinaryOp {
24452 left: Expression::Identifier(u_id),
24453 right: target_val,
24454 left_comments: Vec::new(),
24455 operator_comments: Vec::new(),
24456 trailing_comments: Vec::new(),
24457 })),
24458 colon: false,
24459 parameter_types: Vec::new(),
24460 }));
24461 Ok(Expression::Function(Box::new(Function::new(
24462 "LIST_FILTER".to_string(),
24463 vec![arr, lambda],
24464 ))))
24465 }
24466 DialectType::ClickHouse => {
24467 let u_id = crate::expressions::Identifier::new("_u");
24468 let lambda =
24469 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24470 parameters: vec![u_id.clone()],
24471 body: Expression::Neq(Box::new(BinaryOp {
24472 left: Expression::Identifier(u_id),
24473 right: target_val,
24474 left_comments: Vec::new(),
24475 operator_comments: Vec::new(),
24476 trailing_comments: Vec::new(),
24477 })),
24478 colon: false,
24479 parameter_types: Vec::new(),
24480 }));
24481 Ok(Expression::Function(Box::new(Function::new(
24482 "arrayFilter".to_string(),
24483 vec![lambda, arr],
24484 ))))
24485 }
24486 DialectType::BigQuery => {
24487 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24488 let u_id = crate::expressions::Identifier::new("_u");
24489 let u_col = Expression::Column(crate::expressions::Column {
24490 name: u_id.clone(),
24491 table: None,
24492 join_mark: false,
24493 trailing_comments: Vec::new(),
24494 });
24495 let unnest_expr =
24496 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24497 this: arr,
24498 expressions: Vec::new(),
24499 with_ordinality: false,
24500 alias: None,
24501 offset_alias: None,
24502 }));
24503 let aliased_unnest =
24504 Expression::Alias(Box::new(crate::expressions::Alias {
24505 this: unnest_expr,
24506 alias: u_id.clone(),
24507 column_aliases: Vec::new(),
24508 pre_alias_comments: Vec::new(),
24509 trailing_comments: Vec::new(),
24510 }));
24511 let where_cond = Expression::Neq(Box::new(BinaryOp {
24512 left: u_col.clone(),
24513 right: target_val,
24514 left_comments: Vec::new(),
24515 operator_comments: Vec::new(),
24516 trailing_comments: Vec::new(),
24517 }));
24518 let subquery = Expression::Select(Box::new(
24519 crate::expressions::Select::new()
24520 .column(u_col)
24521 .from(aliased_unnest)
24522 .where_(where_cond),
24523 ));
24524 Ok(Expression::ArrayFunc(Box::new(
24525 crate::expressions::ArrayConstructor {
24526 expressions: vec![subquery],
24527 bracket_notation: false,
24528 use_list_keyword: false,
24529 },
24530 )))
24531 }
24532 _ => Ok(Expression::ArrayRemove(Box::new(
24533 crate::expressions::BinaryFunc {
24534 original_name: None,
24535 this: arr,
24536 expression: target_val,
24537 },
24538 ))),
24539 }
24540 } else {
24541 Ok(e)
24542 }
24543 }
24544
24545 Action::ArrayReverseConvert => {
24546 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
24547 if let Expression::ArrayReverse(af) = e {
24548 Ok(Expression::Function(Box::new(Function::new(
24549 "arrayReverse".to_string(),
24550 vec![af.this],
24551 ))))
24552 } else {
24553 Ok(e)
24554 }
24555 }
24556
24557 Action::JsonKeysConvert => {
24558 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
24559 if let Expression::JsonKeys(uf) = e {
24560 match target {
24561 DialectType::Spark | DialectType::Databricks => {
24562 Ok(Expression::Function(Box::new(Function::new(
24563 "JSON_OBJECT_KEYS".to_string(),
24564 vec![uf.this],
24565 ))))
24566 }
24567 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24568 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
24569 ))),
24570 _ => Ok(Expression::JsonKeys(uf)),
24571 }
24572 } else {
24573 Ok(e)
24574 }
24575 }
24576
24577 Action::ParseJsonStrip => {
24578 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
24579 if let Expression::ParseJson(uf) = e {
24580 Ok(uf.this)
24581 } else {
24582 Ok(e)
24583 }
24584 }
24585
24586 Action::ArraySizeDrill => {
24587 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
24588 if let Expression::ArraySize(uf) = e {
24589 Ok(Expression::Function(Box::new(Function::new(
24590 "REPEATED_COUNT".to_string(),
24591 vec![uf.this],
24592 ))))
24593 } else {
24594 Ok(e)
24595 }
24596 }
24597
24598 Action::WeekOfYearToWeekIso => {
24599 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
24600 if let Expression::WeekOfYear(uf) = e {
24601 Ok(Expression::Function(Box::new(Function::new(
24602 "WEEKISO".to_string(),
24603 vec![uf.this],
24604 ))))
24605 } else {
24606 Ok(e)
24607 }
24608 }
24609 }
24610 })
24611 }
24612
24613 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
24614 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
24615 use crate::expressions::Function;
24616 match unit {
24617 "DAY" => {
24618 // DATE(x)
24619 Ok(Expression::Function(Box::new(Function::new(
24620 "DATE".to_string(),
24621 vec![expr.clone()],
24622 ))))
24623 }
24624 "WEEK" => {
24625 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
24626 let year_x = Expression::Function(Box::new(Function::new(
24627 "YEAR".to_string(),
24628 vec![expr.clone()],
24629 )));
24630 let week_x = Expression::Function(Box::new(Function::new(
24631 "WEEK".to_string(),
24632 vec![expr.clone(), Expression::number(1)],
24633 )));
24634 let concat_args = vec![
24635 year_x,
24636 Expression::string(" "),
24637 week_x,
24638 Expression::string(" 1"),
24639 ];
24640 let concat = Expression::Function(Box::new(Function::new(
24641 "CONCAT".to_string(),
24642 concat_args,
24643 )));
24644 Ok(Expression::Function(Box::new(Function::new(
24645 "STR_TO_DATE".to_string(),
24646 vec![concat, Expression::string("%Y %u %w")],
24647 ))))
24648 }
24649 "MONTH" => {
24650 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
24651 let year_x = Expression::Function(Box::new(Function::new(
24652 "YEAR".to_string(),
24653 vec![expr.clone()],
24654 )));
24655 let month_x = Expression::Function(Box::new(Function::new(
24656 "MONTH".to_string(),
24657 vec![expr.clone()],
24658 )));
24659 let concat_args = vec![
24660 year_x,
24661 Expression::string(" "),
24662 month_x,
24663 Expression::string(" 1"),
24664 ];
24665 let concat = Expression::Function(Box::new(Function::new(
24666 "CONCAT".to_string(),
24667 concat_args,
24668 )));
24669 Ok(Expression::Function(Box::new(Function::new(
24670 "STR_TO_DATE".to_string(),
24671 vec![concat, Expression::string("%Y %c %e")],
24672 ))))
24673 }
24674 "QUARTER" => {
24675 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
24676 let year_x = Expression::Function(Box::new(Function::new(
24677 "YEAR".to_string(),
24678 vec![expr.clone()],
24679 )));
24680 let quarter_x = Expression::Function(Box::new(Function::new(
24681 "QUARTER".to_string(),
24682 vec![expr.clone()],
24683 )));
24684 // QUARTER(x) * 3 - 2
24685 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
24686 left: quarter_x,
24687 right: Expression::number(3),
24688 left_comments: Vec::new(),
24689 operator_comments: Vec::new(),
24690 trailing_comments: Vec::new(),
24691 }));
24692 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
24693 left: mul,
24694 right: Expression::number(2),
24695 left_comments: Vec::new(),
24696 operator_comments: Vec::new(),
24697 trailing_comments: Vec::new(),
24698 }));
24699 let concat_args = vec![
24700 year_x,
24701 Expression::string(" "),
24702 sub,
24703 Expression::string(" 1"),
24704 ];
24705 let concat = Expression::Function(Box::new(Function::new(
24706 "CONCAT".to_string(),
24707 concat_args,
24708 )));
24709 Ok(Expression::Function(Box::new(Function::new(
24710 "STR_TO_DATE".to_string(),
24711 vec![concat, Expression::string("%Y %c %e")],
24712 ))))
24713 }
24714 "YEAR" => {
24715 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
24716 let year_x = Expression::Function(Box::new(Function::new(
24717 "YEAR".to_string(),
24718 vec![expr.clone()],
24719 )));
24720 let concat_args = vec![year_x, Expression::string(" 1 1")];
24721 let concat = Expression::Function(Box::new(Function::new(
24722 "CONCAT".to_string(),
24723 concat_args,
24724 )));
24725 Ok(Expression::Function(Box::new(Function::new(
24726 "STR_TO_DATE".to_string(),
24727 vec![concat, Expression::string("%Y %c %e")],
24728 ))))
24729 }
24730 _ => {
24731 // Unsupported unit -> keep as DATE_TRUNC
24732 Ok(Expression::Function(Box::new(Function::new(
24733 "DATE_TRUNC".to_string(),
24734 vec![Expression::string(unit), expr.clone()],
24735 ))))
24736 }
24737 }
24738 }
24739
24740 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
24741 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
24742 use crate::expressions::DataType;
24743 match dt {
24744 DataType::VarChar { .. } | DataType::Char { .. } => true,
24745 DataType::Struct { fields, .. } => fields
24746 .iter()
24747 .any(|f| Self::has_varchar_char_type(&f.data_type)),
24748 _ => false,
24749 }
24750 }
24751
24752 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
24753 fn normalize_varchar_to_string(
24754 dt: crate::expressions::DataType,
24755 ) -> crate::expressions::DataType {
24756 use crate::expressions::DataType;
24757 match dt {
24758 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
24759 name: "STRING".to_string(),
24760 },
24761 DataType::Struct { fields, nested } => {
24762 let fields = fields
24763 .into_iter()
24764 .map(|mut f| {
24765 f.data_type = Self::normalize_varchar_to_string(f.data_type);
24766 f
24767 })
24768 .collect();
24769 DataType::Struct { fields, nested }
24770 }
24771 other => other,
24772 }
24773 }
24774
24775 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
24776 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
24777 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
24778 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
24779 let trimmed = s.trim();
24780
24781 // Find where digits end and unit text begins
24782 let digit_end = trimmed
24783 .find(|c: char| !c.is_ascii_digit())
24784 .unwrap_or(trimmed.len());
24785 if digit_end == 0 || digit_end == trimmed.len() {
24786 return expr;
24787 }
24788 let num = &trimmed[..digit_end];
24789 let unit_text = trimmed[digit_end..].trim().to_uppercase();
24790 if unit_text.is_empty() {
24791 return expr;
24792 }
24793
24794 let known_units = [
24795 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
24796 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
24797 ];
24798 if !known_units.contains(&unit_text.as_str()) {
24799 return expr;
24800 }
24801
24802 let unit_str = unit_text.clone();
24803 // Singularize
24804 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
24805 &unit_str[..unit_str.len() - 1]
24806 } else {
24807 &unit_str
24808 };
24809 let unit = unit_singular;
24810
24811 match target {
24812 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24813 // INTERVAL '2' DAY
24814 let iu = match unit {
24815 "DAY" => crate::expressions::IntervalUnit::Day,
24816 "HOUR" => crate::expressions::IntervalUnit::Hour,
24817 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24818 "SECOND" => crate::expressions::IntervalUnit::Second,
24819 "WEEK" => crate::expressions::IntervalUnit::Week,
24820 "MONTH" => crate::expressions::IntervalUnit::Month,
24821 "YEAR" => crate::expressions::IntervalUnit::Year,
24822 _ => return expr,
24823 };
24824 return Expression::Interval(Box::new(crate::expressions::Interval {
24825 this: Some(Expression::string(num)),
24826 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24827 unit: iu,
24828 use_plural: false,
24829 }),
24830 }));
24831 }
24832 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
24833 // INTERVAL '2 DAYS'
24834 let plural = if num != "1" && !unit_str.ends_with('S') {
24835 format!("{} {}S", num, unit)
24836 } else if unit_str.ends_with('S') {
24837 format!("{} {}", num, unit_str)
24838 } else {
24839 format!("{} {}", num, unit)
24840 };
24841 return Expression::Interval(Box::new(crate::expressions::Interval {
24842 this: Some(Expression::string(&plural)),
24843 unit: None,
24844 }));
24845 }
24846 _ => {
24847 // Spark/Databricks/Hive: INTERVAL '1' DAY
24848 let iu = match unit {
24849 "DAY" => crate::expressions::IntervalUnit::Day,
24850 "HOUR" => crate::expressions::IntervalUnit::Hour,
24851 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24852 "SECOND" => crate::expressions::IntervalUnit::Second,
24853 "WEEK" => crate::expressions::IntervalUnit::Week,
24854 "MONTH" => crate::expressions::IntervalUnit::Month,
24855 "YEAR" => crate::expressions::IntervalUnit::Year,
24856 _ => return expr,
24857 };
24858 return Expression::Interval(Box::new(crate::expressions::Interval {
24859 this: Some(Expression::string(num)),
24860 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24861 unit: iu,
24862 use_plural: false,
24863 }),
24864 }));
24865 }
24866 }
24867 }
24868 // If it's already an INTERVAL expression, pass through
24869 expr
24870 }
24871
24872 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
24873 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
24874 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
24875 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
24876 fn rewrite_unnest_expansion(
24877 select: &crate::expressions::Select,
24878 target: DialectType,
24879 ) -> Option<crate::expressions::Select> {
24880 use crate::expressions::{
24881 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
24882 UnnestFunc,
24883 };
24884
24885 let index_offset: i64 = match target {
24886 DialectType::Presto | DialectType::Trino => 1,
24887 _ => 0, // BigQuery, Snowflake
24888 };
24889
24890 let if_func_name = match target {
24891 DialectType::Snowflake => "IFF",
24892 _ => "IF",
24893 };
24894
24895 let array_length_func = match target {
24896 DialectType::BigQuery => "ARRAY_LENGTH",
24897 DialectType::Presto | DialectType::Trino => "CARDINALITY",
24898 DialectType::Snowflake => "ARRAY_SIZE",
24899 _ => "ARRAY_LENGTH",
24900 };
24901
24902 let use_table_aliases = matches!(
24903 target,
24904 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
24905 );
24906 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
24907
24908 fn make_col(name: &str, table: Option<&str>) -> Expression {
24909 if let Some(tbl) = table {
24910 Expression::Column(Column {
24911 name: Identifier::new(name.to_string()),
24912 table: Some(Identifier::new(tbl.to_string())),
24913 join_mark: false,
24914 trailing_comments: Vec::new(),
24915 })
24916 } else {
24917 Expression::Identifier(Identifier::new(name.to_string()))
24918 }
24919 }
24920
24921 fn make_join(this: Expression) -> Join {
24922 Join {
24923 this,
24924 on: None,
24925 using: Vec::new(),
24926 kind: JoinKind::Cross,
24927 use_inner_keyword: false,
24928 use_outer_keyword: false,
24929 deferred_condition: false,
24930 join_hint: None,
24931 match_condition: None,
24932 pivots: Vec::new(),
24933 comments: Vec::new(),
24934 nesting_group: 0,
24935 directed: false,
24936 }
24937 }
24938
24939 // Collect UNNEST info from SELECT expressions
24940 struct UnnestInfo {
24941 arr_expr: Expression,
24942 col_alias: String,
24943 pos_alias: String,
24944 source_alias: String,
24945 original_expr: Expression,
24946 has_outer_alias: Option<String>,
24947 }
24948
24949 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
24950 let mut col_counter = 0usize;
24951 let mut pos_counter = 1usize;
24952 let mut source_counter = 1usize;
24953
24954 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
24955 match expr {
24956 Expression::Unnest(u) => Some(u.this.clone()),
24957 Expression::Function(f)
24958 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
24959 {
24960 Some(f.args[0].clone())
24961 }
24962 Expression::Alias(a) => extract_unnest_arg(&a.this),
24963 Expression::Add(op)
24964 | Expression::Sub(op)
24965 | Expression::Mul(op)
24966 | Expression::Div(op) => {
24967 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
24968 }
24969 _ => None,
24970 }
24971 }
24972
24973 fn get_alias_name(expr: &Expression) -> Option<String> {
24974 if let Expression::Alias(a) = expr {
24975 Some(a.alias.name.clone())
24976 } else {
24977 None
24978 }
24979 }
24980
24981 for sel_expr in &select.expressions {
24982 if let Some(arr) = extract_unnest_arg(sel_expr) {
24983 col_counter += 1;
24984 pos_counter += 1;
24985 source_counter += 1;
24986
24987 let col_alias = if col_counter == 1 {
24988 "col".to_string()
24989 } else {
24990 format!("col_{}", col_counter)
24991 };
24992 let pos_alias = format!("pos_{}", pos_counter);
24993 let source_alias = format!("_u_{}", source_counter);
24994 let has_outer_alias = get_alias_name(sel_expr);
24995
24996 unnest_infos.push(UnnestInfo {
24997 arr_expr: arr,
24998 col_alias,
24999 pos_alias,
25000 source_alias,
25001 original_expr: sel_expr.clone(),
25002 has_outer_alias,
25003 });
25004 }
25005 }
25006
25007 if unnest_infos.is_empty() {
25008 return None;
25009 }
25010
25011 let series_alias = "pos".to_string();
25012 let series_source_alias = "_u".to_string();
25013 let tbl_ref = if use_table_aliases {
25014 Some(series_source_alias.as_str())
25015 } else {
25016 None
25017 };
25018
25019 // Build new SELECT expressions
25020 let mut new_select_exprs = Vec::new();
25021 for info in &unnest_infos {
25022 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25023 let src_ref = if use_table_aliases {
25024 Some(info.source_alias.as_str())
25025 } else {
25026 None
25027 };
25028
25029 let pos_col = make_col(&series_alias, tbl_ref);
25030 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25031 let col_ref = make_col(actual_col_name, src_ref);
25032
25033 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25034 pos_col.clone(),
25035 unnest_pos_col.clone(),
25036 )));
25037 let mut if_args = vec![eq_cond, col_ref];
25038 if null_third_arg {
25039 if_args.push(Expression::Null(crate::expressions::Null));
25040 }
25041
25042 let if_expr =
25043 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25044 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25045
25046 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25047 final_expr,
25048 Identifier::new(actual_col_name.clone()),
25049 ))));
25050 }
25051
25052 // Build array size expressions for GREATEST
25053 let size_exprs: Vec<Expression> = unnest_infos
25054 .iter()
25055 .map(|info| {
25056 Expression::Function(Box::new(Function::new(
25057 array_length_func.to_string(),
25058 vec![info.arr_expr.clone()],
25059 )))
25060 })
25061 .collect();
25062
25063 let greatest =
25064 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25065
25066 let series_end = if index_offset == 0 {
25067 Expression::Sub(Box::new(BinaryOp::new(
25068 greatest,
25069 Expression::Literal(Literal::Number("1".to_string())),
25070 )))
25071 } else {
25072 greatest
25073 };
25074
25075 // Build the position array source
25076 let series_unnest_expr = match target {
25077 DialectType::BigQuery => {
25078 let gen_array = Expression::Function(Box::new(Function::new(
25079 "GENERATE_ARRAY".to_string(),
25080 vec![
25081 Expression::Literal(Literal::Number("0".to_string())),
25082 series_end,
25083 ],
25084 )));
25085 Expression::Unnest(Box::new(UnnestFunc {
25086 this: gen_array,
25087 expressions: Vec::new(),
25088 with_ordinality: false,
25089 alias: None,
25090 offset_alias: None,
25091 }))
25092 }
25093 DialectType::Presto | DialectType::Trino => {
25094 let sequence = Expression::Function(Box::new(Function::new(
25095 "SEQUENCE".to_string(),
25096 vec![
25097 Expression::Literal(Literal::Number("1".to_string())),
25098 series_end,
25099 ],
25100 )));
25101 Expression::Unnest(Box::new(UnnestFunc {
25102 this: sequence,
25103 expressions: Vec::new(),
25104 with_ordinality: false,
25105 alias: None,
25106 offset_alias: None,
25107 }))
25108 }
25109 DialectType::Snowflake => {
25110 let range_end = Expression::Add(Box::new(BinaryOp::new(
25111 Expression::Paren(Box::new(crate::expressions::Paren {
25112 this: series_end,
25113 trailing_comments: Vec::new(),
25114 })),
25115 Expression::Literal(Literal::Number("1".to_string())),
25116 )));
25117 let gen_range = Expression::Function(Box::new(Function::new(
25118 "ARRAY_GENERATE_RANGE".to_string(),
25119 vec![
25120 Expression::Literal(Literal::Number("0".to_string())),
25121 range_end,
25122 ],
25123 )));
25124 let flatten_arg =
25125 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25126 name: Identifier::new("INPUT".to_string()),
25127 value: gen_range,
25128 separator: crate::expressions::NamedArgSeparator::DArrow,
25129 }));
25130 let flatten = Expression::Function(Box::new(Function::new(
25131 "FLATTEN".to_string(),
25132 vec![flatten_arg],
25133 )));
25134 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25135 }
25136 _ => return None,
25137 };
25138
25139 // Build series alias expression
25140 let series_alias_expr = if use_table_aliases {
25141 let col_aliases = if matches!(target, DialectType::Snowflake) {
25142 vec![
25143 Identifier::new("seq".to_string()),
25144 Identifier::new("key".to_string()),
25145 Identifier::new("path".to_string()),
25146 Identifier::new("index".to_string()),
25147 Identifier::new(series_alias.clone()),
25148 Identifier::new("this".to_string()),
25149 ]
25150 } else {
25151 vec![Identifier::new(series_alias.clone())]
25152 };
25153 Expression::Alias(Box::new(Alias {
25154 this: series_unnest_expr,
25155 alias: Identifier::new(series_source_alias.clone()),
25156 column_aliases: col_aliases,
25157 pre_alias_comments: Vec::new(),
25158 trailing_comments: Vec::new(),
25159 }))
25160 } else {
25161 Expression::Alias(Box::new(Alias::new(
25162 series_unnest_expr,
25163 Identifier::new(series_alias.clone()),
25164 )))
25165 };
25166
25167 // Build CROSS JOINs for each UNNEST
25168 let mut joins = Vec::new();
25169 for info in &unnest_infos {
25170 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25171
25172 let unnest_join_expr = match target {
25173 DialectType::BigQuery => {
25174 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25175 let unnest = UnnestFunc {
25176 this: info.arr_expr.clone(),
25177 expressions: Vec::new(),
25178 with_ordinality: true,
25179 alias: Some(Identifier::new(actual_col_name.clone())),
25180 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25181 };
25182 Expression::Unnest(Box::new(unnest))
25183 }
25184 DialectType::Presto | DialectType::Trino => {
25185 let unnest = UnnestFunc {
25186 this: info.arr_expr.clone(),
25187 expressions: Vec::new(),
25188 with_ordinality: true,
25189 alias: None,
25190 offset_alias: None,
25191 };
25192 Expression::Alias(Box::new(Alias {
25193 this: Expression::Unnest(Box::new(unnest)),
25194 alias: Identifier::new(info.source_alias.clone()),
25195 column_aliases: vec![
25196 Identifier::new(actual_col_name.clone()),
25197 Identifier::new(info.pos_alias.clone()),
25198 ],
25199 pre_alias_comments: Vec::new(),
25200 trailing_comments: Vec::new(),
25201 }))
25202 }
25203 DialectType::Snowflake => {
25204 let flatten_arg =
25205 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25206 name: Identifier::new("INPUT".to_string()),
25207 value: info.arr_expr.clone(),
25208 separator: crate::expressions::NamedArgSeparator::DArrow,
25209 }));
25210 let flatten = Expression::Function(Box::new(Function::new(
25211 "FLATTEN".to_string(),
25212 vec![flatten_arg],
25213 )));
25214 let table_fn = Expression::Function(Box::new(Function::new(
25215 "TABLE".to_string(),
25216 vec![flatten],
25217 )));
25218 Expression::Alias(Box::new(Alias {
25219 this: table_fn,
25220 alias: Identifier::new(info.source_alias.clone()),
25221 column_aliases: vec![
25222 Identifier::new("seq".to_string()),
25223 Identifier::new("key".to_string()),
25224 Identifier::new("path".to_string()),
25225 Identifier::new(info.pos_alias.clone()),
25226 Identifier::new(actual_col_name.clone()),
25227 Identifier::new("this".to_string()),
25228 ],
25229 pre_alias_comments: Vec::new(),
25230 trailing_comments: Vec::new(),
25231 }))
25232 }
25233 _ => return None,
25234 };
25235
25236 joins.push(make_join(unnest_join_expr));
25237 }
25238
25239 // Build WHERE clause
25240 let mut where_conditions: Vec<Expression> = Vec::new();
25241 for info in &unnest_infos {
25242 let src_ref = if use_table_aliases {
25243 Some(info.source_alias.as_str())
25244 } else {
25245 None
25246 };
25247 let pos_col = make_col(&series_alias, tbl_ref);
25248 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25249
25250 let arr_size = Expression::Function(Box::new(Function::new(
25251 array_length_func.to_string(),
25252 vec![info.arr_expr.clone()],
25253 )));
25254
25255 let size_ref = if index_offset == 0 {
25256 Expression::Paren(Box::new(crate::expressions::Paren {
25257 this: Expression::Sub(Box::new(BinaryOp::new(
25258 arr_size,
25259 Expression::Literal(Literal::Number("1".to_string())),
25260 ))),
25261 trailing_comments: Vec::new(),
25262 }))
25263 } else {
25264 arr_size
25265 };
25266
25267 let eq = Expression::Eq(Box::new(BinaryOp::new(
25268 pos_col.clone(),
25269 unnest_pos_col.clone(),
25270 )));
25271 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25272 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25273 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25274 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25275 this: and_cond,
25276 trailing_comments: Vec::new(),
25277 }));
25278 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25279
25280 where_conditions.push(or_cond);
25281 }
25282
25283 let where_expr = if where_conditions.len() == 1 {
25284 // Single condition: no parens needed
25285 where_conditions.into_iter().next().unwrap()
25286 } else {
25287 // Multiple conditions: wrap each OR in parens, then combine with AND
25288 let wrap = |e: Expression| {
25289 Expression::Paren(Box::new(crate::expressions::Paren {
25290 this: e,
25291 trailing_comments: Vec::new(),
25292 }))
25293 };
25294 let mut iter = where_conditions.into_iter();
25295 let first = wrap(iter.next().unwrap());
25296 let second = wrap(iter.next().unwrap());
25297 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25298 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25299 trailing_comments: Vec::new(),
25300 }));
25301 for cond in iter {
25302 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25303 }
25304 combined
25305 };
25306
25307 // Build the new SELECT
25308 let mut new_select = select.clone();
25309 new_select.expressions = new_select_exprs;
25310
25311 if new_select.from.is_some() {
25312 let mut all_joins = vec![make_join(series_alias_expr)];
25313 all_joins.extend(joins);
25314 new_select.joins.extend(all_joins);
25315 } else {
25316 new_select.from = Some(From {
25317 expressions: vec![series_alias_expr],
25318 });
25319 new_select.joins.extend(joins);
25320 }
25321
25322 if let Some(ref existing_where) = new_select.where_clause {
25323 let combined = Expression::And(Box::new(BinaryOp::new(
25324 existing_where.this.clone(),
25325 where_expr,
25326 )));
25327 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25328 } else {
25329 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25330 }
25331
25332 Some(new_select)
25333 }
25334
25335 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25336 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25337 match original {
25338 Expression::Unnest(_) => replacement.clone(),
25339 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25340 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25341 Expression::Add(op) => {
25342 let left = Self::replace_unnest_with_if(&op.left, replacement);
25343 let right = Self::replace_unnest_with_if(&op.right, replacement);
25344 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25345 }
25346 Expression::Sub(op) => {
25347 let left = Self::replace_unnest_with_if(&op.left, replacement);
25348 let right = Self::replace_unnest_with_if(&op.right, replacement);
25349 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25350 }
25351 Expression::Mul(op) => {
25352 let left = Self::replace_unnest_with_if(&op.left, replacement);
25353 let right = Self::replace_unnest_with_if(&op.right, replacement);
25354 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25355 }
25356 Expression::Div(op) => {
25357 let left = Self::replace_unnest_with_if(&op.left, replacement);
25358 let right = Self::replace_unnest_with_if(&op.right, replacement);
25359 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25360 }
25361 _ => original.clone(),
25362 }
25363 }
25364
25365 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25366 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25367 fn decompose_json_path(path: &str) -> Vec<String> {
25368 let mut parts = Vec::new();
25369 let path = if path.starts_with("$.") {
25370 &path[2..]
25371 } else if path.starts_with('$') {
25372 &path[1..]
25373 } else {
25374 path
25375 };
25376 if path.is_empty() {
25377 return parts;
25378 }
25379 let mut current = String::new();
25380 let chars: Vec<char> = path.chars().collect();
25381 let mut i = 0;
25382 while i < chars.len() {
25383 match chars[i] {
25384 '.' => {
25385 if !current.is_empty() {
25386 parts.push(current.clone());
25387 current.clear();
25388 }
25389 i += 1;
25390 }
25391 '[' => {
25392 if !current.is_empty() {
25393 parts.push(current.clone());
25394 current.clear();
25395 }
25396 i += 1;
25397 let mut bracket_content = String::new();
25398 while i < chars.len() && chars[i] != ']' {
25399 if chars[i] == '"' || chars[i] == '\'' {
25400 let quote = chars[i];
25401 i += 1;
25402 while i < chars.len() && chars[i] != quote {
25403 bracket_content.push(chars[i]);
25404 i += 1;
25405 }
25406 if i < chars.len() {
25407 i += 1;
25408 }
25409 } else {
25410 bracket_content.push(chars[i]);
25411 i += 1;
25412 }
25413 }
25414 if i < chars.len() {
25415 i += 1;
25416 }
25417 if bracket_content != "*" {
25418 parts.push(bracket_content);
25419 }
25420 }
25421 _ => {
25422 current.push(chars[i]);
25423 i += 1;
25424 }
25425 }
25426 }
25427 if !current.is_empty() {
25428 parts.push(current);
25429 }
25430 parts
25431 }
25432
25433 /// Strip `$` prefix from a JSON path, keeping the rest.
25434 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25435 fn strip_json_dollar_prefix(path: &str) -> String {
25436 if path.starts_with("$.") {
25437 path[2..].to_string()
25438 } else if path.starts_with('$') {
25439 path[1..].to_string()
25440 } else {
25441 path.to_string()
25442 }
25443 }
25444
25445 /// Strip `[*]` wildcards from a JSON path.
25446 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25447 fn strip_json_wildcards(path: &str) -> String {
25448 path.replace("[*]", "")
25449 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25450 .trim_end_matches('.')
25451 .to_string()
25452 }
25453
25454 /// Convert bracket notation to dot notation for JSON paths.
25455 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25456 fn bracket_to_dot_notation(path: &str) -> String {
25457 let mut result = String::new();
25458 let chars: Vec<char> = path.chars().collect();
25459 let mut i = 0;
25460 while i < chars.len() {
25461 if chars[i] == '[' {
25462 // Read bracket content
25463 i += 1;
25464 let mut bracket_content = String::new();
25465 let mut is_quoted = false;
25466 let mut _quote_char = '"';
25467 while i < chars.len() && chars[i] != ']' {
25468 if chars[i] == '"' || chars[i] == '\'' {
25469 is_quoted = true;
25470 _quote_char = chars[i];
25471 i += 1;
25472 while i < chars.len() && chars[i] != _quote_char {
25473 bracket_content.push(chars[i]);
25474 i += 1;
25475 }
25476 if i < chars.len() {
25477 i += 1;
25478 }
25479 } else {
25480 bracket_content.push(chars[i]);
25481 i += 1;
25482 }
25483 }
25484 if i < chars.len() {
25485 i += 1;
25486 } // skip ]
25487 if bracket_content == "*" {
25488 // Keep wildcard as-is
25489 result.push_str("[*]");
25490 } else if is_quoted {
25491 // Quoted bracket -> dot notation with quotes
25492 result.push('.');
25493 result.push('"');
25494 result.push_str(&bracket_content);
25495 result.push('"');
25496 } else {
25497 // Numeric index -> keep as bracket
25498 result.push('[');
25499 result.push_str(&bracket_content);
25500 result.push(']');
25501 }
25502 } else {
25503 result.push(chars[i]);
25504 i += 1;
25505 }
25506 }
25507 result
25508 }
25509
25510 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25511 /// `$["a b"]` -> `$['a b']`
25512 fn bracket_to_single_quotes(path: &str) -> String {
25513 let mut result = String::new();
25514 let chars: Vec<char> = path.chars().collect();
25515 let mut i = 0;
25516 while i < chars.len() {
25517 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25518 result.push('[');
25519 result.push('\'');
25520 i += 2; // skip [ and "
25521 while i < chars.len() && chars[i] != '"' {
25522 result.push(chars[i]);
25523 i += 1;
25524 }
25525 if i < chars.len() {
25526 i += 1;
25527 } // skip closing "
25528 result.push('\'');
25529 } else {
25530 result.push(chars[i]);
25531 i += 1;
25532 }
25533 }
25534 result
25535 }
25536
25537 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
25538 /// or PostgreSQL #temp -> TEMPORARY.
25539 /// Also strips # from INSERT INTO #table for non-TSQL targets.
25540 fn transform_select_into(
25541 expr: Expression,
25542 _source: DialectType,
25543 target: DialectType,
25544 ) -> Expression {
25545 use crate::expressions::{CreateTable, Expression, TableRef};
25546
25547 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
25548 if let Expression::Insert(ref insert) = expr {
25549 if insert.table.name.name.starts_with('#')
25550 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
25551 {
25552 let mut new_insert = insert.clone();
25553 new_insert.table.name.name =
25554 insert.table.name.name.trim_start_matches('#').to_string();
25555 return Expression::Insert(new_insert);
25556 }
25557 return expr;
25558 }
25559
25560 if let Expression::Select(ref select) = expr {
25561 if let Some(ref into) = select.into {
25562 let table_name_raw = match &into.this {
25563 Expression::Table(tr) => tr.name.name.clone(),
25564 Expression::Identifier(id) => id.name.clone(),
25565 _ => String::new(),
25566 };
25567 let is_temp = table_name_raw.starts_with('#') || into.temporary;
25568 let clean_name = table_name_raw.trim_start_matches('#').to_string();
25569
25570 match target {
25571 DialectType::DuckDB | DialectType::Snowflake => {
25572 // SELECT INTO -> CREATE TABLE AS SELECT
25573 let mut new_select = select.clone();
25574 new_select.into = None;
25575 let ct = CreateTable {
25576 name: TableRef::new(clean_name),
25577 on_cluster: None,
25578 columns: Vec::new(),
25579 constraints: Vec::new(),
25580 if_not_exists: false,
25581 temporary: is_temp,
25582 or_replace: false,
25583 table_modifier: None,
25584 as_select: Some(Expression::Select(new_select)),
25585 as_select_parenthesized: false,
25586 on_commit: None,
25587 clone_source: None,
25588 clone_at_clause: None,
25589 shallow_clone: false,
25590 is_copy: false,
25591 leading_comments: Vec::new(),
25592 with_properties: Vec::new(),
25593 teradata_post_name_options: Vec::new(),
25594 with_data: None,
25595 with_statistics: None,
25596 teradata_indexes: Vec::new(),
25597 with_cte: None,
25598 properties: Vec::new(),
25599 partition_of: None,
25600 post_table_properties: Vec::new(),
25601 mysql_table_options: Vec::new(),
25602 inherits: Vec::new(),
25603 on_property: None,
25604 copy_grants: false,
25605 using_template: None,
25606 rollup: None,
25607 };
25608 return Expression::CreateTable(Box::new(ct));
25609 }
25610 DialectType::PostgreSQL | DialectType::Redshift => {
25611 // PostgreSQL: #foo -> INTO TEMPORARY foo
25612 if is_temp && !into.temporary {
25613 let mut new_select = select.clone();
25614 let mut new_into = into.clone();
25615 new_into.temporary = true;
25616 new_into.unlogged = false;
25617 new_into.this = Expression::Table(TableRef::new(clean_name));
25618 new_select.into = Some(new_into);
25619 Expression::Select(new_select)
25620 } else {
25621 expr
25622 }
25623 }
25624 _ => expr,
25625 }
25626 } else {
25627 expr
25628 }
25629 } else {
25630 expr
25631 }
25632 }
25633
25634 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
25635 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
25636 fn transform_create_table_properties(
25637 ct: &mut crate::expressions::CreateTable,
25638 _source: DialectType,
25639 target: DialectType,
25640 ) {
25641 use crate::expressions::{
25642 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
25643 Properties,
25644 };
25645
25646 // Helper to convert a raw property value string to the correct Expression
25647 let value_to_expr = |v: &str| -> Expression {
25648 let trimmed = v.trim();
25649 // Check if it's a quoted string (starts and ends with ')
25650 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
25651 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
25652 }
25653 // Check if it's a number
25654 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
25655 Expression::Literal(Literal::Number(trimmed.to_string()))
25656 }
25657 // Check if it's ARRAY[...] or ARRAY(...)
25658 else if trimmed.to_uppercase().starts_with("ARRAY") {
25659 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
25660 let inner = trimmed
25661 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
25662 .trim_start_matches('[')
25663 .trim_start_matches('(')
25664 .trim_end_matches(']')
25665 .trim_end_matches(')');
25666 let elements: Vec<Expression> = inner
25667 .split(',')
25668 .map(|e| {
25669 let elem = e.trim().trim_matches('\'');
25670 Expression::Literal(Literal::String(elem.to_string()))
25671 })
25672 .collect();
25673 Expression::Function(Box::new(crate::expressions::Function::new(
25674 "ARRAY".to_string(),
25675 elements,
25676 )))
25677 }
25678 // Otherwise, just output as identifier (unquoted)
25679 else {
25680 Expression::Identifier(Identifier::new(trimmed.to_string()))
25681 }
25682 };
25683
25684 if ct.with_properties.is_empty() && ct.properties.is_empty() {
25685 return;
25686 }
25687
25688 // Handle Presto-style WITH properties
25689 if !ct.with_properties.is_empty() {
25690 // Extract FORMAT property and remaining properties
25691 let mut format_value: Option<String> = None;
25692 let mut partitioned_by: Option<String> = None;
25693 let mut other_props: Vec<(String, String)> = Vec::new();
25694
25695 for (key, value) in ct.with_properties.drain(..) {
25696 let key_upper = key.to_uppercase();
25697 if key_upper == "FORMAT" {
25698 // Strip surrounding quotes from value if present
25699 format_value = Some(value.trim_matches('\'').to_string());
25700 } else if key_upper == "PARTITIONED_BY" {
25701 partitioned_by = Some(value);
25702 } else {
25703 other_props.push((key, value));
25704 }
25705 }
25706
25707 match target {
25708 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25709 // Presto: keep WITH properties but lowercase 'format' key
25710 if let Some(fmt) = format_value {
25711 ct.with_properties
25712 .push(("format".to_string(), format!("'{}'", fmt)));
25713 }
25714 if let Some(part) = partitioned_by {
25715 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
25716 let trimmed = part.trim();
25717 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
25718 // Also handle ARRAY['...'] format - keep as-is
25719 if trimmed.to_uppercase().starts_with("ARRAY") {
25720 ct.with_properties
25721 .push(("PARTITIONED_BY".to_string(), part));
25722 } else {
25723 // Parse column names from the parenthesized list
25724 let cols: Vec<&str> = inner
25725 .split(',')
25726 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
25727 .collect();
25728 let array_val = format!(
25729 "ARRAY[{}]",
25730 cols.iter()
25731 .map(|c| format!("'{}'", c))
25732 .collect::<Vec<_>>()
25733 .join(", ")
25734 );
25735 ct.with_properties
25736 .push(("PARTITIONED_BY".to_string(), array_val));
25737 }
25738 }
25739 ct.with_properties.extend(other_props);
25740 }
25741 DialectType::Hive => {
25742 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
25743 if let Some(fmt) = format_value {
25744 ct.properties.push(Expression::FileFormatProperty(Box::new(
25745 FileFormatProperty {
25746 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25747 expressions: vec![],
25748 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
25749 value: true,
25750 }))),
25751 },
25752 )));
25753 }
25754 if let Some(_part) = partitioned_by {
25755 // PARTITIONED_BY handling is complex - move columns to partitioned by
25756 // For now, the partition columns are extracted from the column list
25757 Self::apply_partitioned_by(ct, &_part, target);
25758 }
25759 if !other_props.is_empty() {
25760 let eq_exprs: Vec<Expression> = other_props
25761 .into_iter()
25762 .map(|(k, v)| {
25763 Expression::Eq(Box::new(BinaryOp::new(
25764 Expression::Literal(Literal::String(k)),
25765 value_to_expr(&v),
25766 )))
25767 })
25768 .collect();
25769 ct.properties
25770 .push(Expression::Properties(Box::new(Properties {
25771 expressions: eq_exprs,
25772 })));
25773 }
25774 }
25775 DialectType::Spark | DialectType::Databricks => {
25776 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
25777 if let Some(fmt) = format_value {
25778 ct.properties.push(Expression::FileFormatProperty(Box::new(
25779 FileFormatProperty {
25780 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
25781 expressions: vec![],
25782 hive_format: None, // None means USING syntax
25783 },
25784 )));
25785 }
25786 if let Some(_part) = partitioned_by {
25787 Self::apply_partitioned_by(ct, &_part, target);
25788 }
25789 if !other_props.is_empty() {
25790 let eq_exprs: Vec<Expression> = other_props
25791 .into_iter()
25792 .map(|(k, v)| {
25793 Expression::Eq(Box::new(BinaryOp::new(
25794 Expression::Literal(Literal::String(k)),
25795 value_to_expr(&v),
25796 )))
25797 })
25798 .collect();
25799 ct.properties
25800 .push(Expression::Properties(Box::new(Properties {
25801 expressions: eq_exprs,
25802 })));
25803 }
25804 }
25805 DialectType::DuckDB => {
25806 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
25807 // Keep nothing
25808 }
25809 _ => {
25810 // For other dialects, keep WITH properties as-is
25811 if let Some(fmt) = format_value {
25812 ct.with_properties
25813 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
25814 }
25815 if let Some(part) = partitioned_by {
25816 ct.with_properties
25817 .push(("PARTITIONED_BY".to_string(), part));
25818 }
25819 ct.with_properties.extend(other_props);
25820 }
25821 }
25822 }
25823
25824 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
25825 // and Hive STORED AS -> Presto WITH (format=...) conversion
25826 if !ct.properties.is_empty() {
25827 let is_presto_target = matches!(
25828 target,
25829 DialectType::Presto | DialectType::Trino | DialectType::Athena
25830 );
25831 let is_duckdb_target = matches!(target, DialectType::DuckDB);
25832
25833 if is_presto_target || is_duckdb_target {
25834 let mut new_properties = Vec::new();
25835 for prop in ct.properties.drain(..) {
25836 match &prop {
25837 Expression::FileFormatProperty(ffp) => {
25838 if is_presto_target {
25839 // Convert STORED AS/USING to WITH (format=...)
25840 if let Some(ref fmt_expr) = ffp.this {
25841 let fmt_str = match fmt_expr.as_ref() {
25842 Expression::Identifier(id) => id.name.clone(),
25843 Expression::Literal(Literal::String(s)) => s.clone(),
25844 _ => {
25845 new_properties.push(prop);
25846 continue;
25847 }
25848 };
25849 ct.with_properties
25850 .push(("format".to_string(), format!("'{}'", fmt_str)));
25851 }
25852 }
25853 // DuckDB: just strip file format properties
25854 }
25855 // Convert TBLPROPERTIES to WITH properties for Presto target
25856 Expression::Properties(props) if is_presto_target => {
25857 for expr in &props.expressions {
25858 if let Expression::Eq(eq) = expr {
25859 // Extract key and value from the Eq expression
25860 let key = match &eq.left {
25861 Expression::Literal(Literal::String(s)) => s.clone(),
25862 Expression::Identifier(id) => id.name.clone(),
25863 _ => continue,
25864 };
25865 let value = match &eq.right {
25866 Expression::Literal(Literal::String(s)) => {
25867 format!("'{}'", s)
25868 }
25869 Expression::Literal(Literal::Number(n)) => n.clone(),
25870 Expression::Identifier(id) => id.name.clone(),
25871 _ => continue,
25872 };
25873 ct.with_properties.push((key, value));
25874 }
25875 }
25876 }
25877 // Convert PartitionedByProperty for Presto target
25878 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
25879 // Check if it contains ColumnDef expressions (Hive-style with types)
25880 if let Expression::Tuple(ref tuple) = *pbp.this {
25881 let mut col_names: Vec<String> = Vec::new();
25882 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
25883 let mut has_col_defs = false;
25884 for expr in &tuple.expressions {
25885 if let Expression::ColumnDef(ref cd) = expr {
25886 has_col_defs = true;
25887 col_names.push(cd.name.name.clone());
25888 col_defs.push(*cd.clone());
25889 } else if let Expression::Column(ref col) = expr {
25890 col_names.push(col.name.name.clone());
25891 } else if let Expression::Identifier(ref id) = expr {
25892 col_names.push(id.name.clone());
25893 } else {
25894 // For function expressions like MONTHS(y), serialize to SQL
25895 let generic = Dialect::get(DialectType::Generic);
25896 if let Ok(sql) = generic.generate(expr) {
25897 col_names.push(sql);
25898 }
25899 }
25900 }
25901 if has_col_defs {
25902 // Merge partition column defs into the main column list
25903 for cd in col_defs {
25904 ct.columns.push(cd);
25905 }
25906 }
25907 if !col_names.is_empty() {
25908 // Add PARTITIONED_BY property
25909 let array_val = format!(
25910 "ARRAY[{}]",
25911 col_names
25912 .iter()
25913 .map(|n| format!("'{}'", n))
25914 .collect::<Vec<_>>()
25915 .join(", ")
25916 );
25917 ct.with_properties
25918 .push(("PARTITIONED_BY".to_string(), array_val));
25919 }
25920 }
25921 // Skip - don't keep in properties
25922 }
25923 _ => {
25924 if !is_duckdb_target {
25925 new_properties.push(prop);
25926 }
25927 }
25928 }
25929 }
25930 ct.properties = new_properties;
25931 } else {
25932 // For Hive/Spark targets, unquote format names in STORED AS
25933 for prop in &mut ct.properties {
25934 if let Expression::FileFormatProperty(ref mut ffp) = prop {
25935 if let Some(ref mut fmt_expr) = ffp.this {
25936 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
25937 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
25938 let unquoted = s.clone();
25939 *fmt_expr =
25940 Box::new(Expression::Identifier(Identifier::new(unquoted)));
25941 }
25942 }
25943 }
25944 }
25945 }
25946 }
25947 }
25948
25949 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
25950 fn apply_partitioned_by(
25951 ct: &mut crate::expressions::CreateTable,
25952 partitioned_by_value: &str,
25953 target: DialectType,
25954 ) {
25955 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
25956
25957 // Parse the ARRAY['col1', 'col2'] value to extract column names
25958 let mut col_names: Vec<String> = Vec::new();
25959 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
25960 let inner = partitioned_by_value
25961 .trim()
25962 .trim_start_matches("ARRAY")
25963 .trim_start_matches('[')
25964 .trim_start_matches('(')
25965 .trim_end_matches(']')
25966 .trim_end_matches(')');
25967 for part in inner.split(',') {
25968 let col = part.trim().trim_matches('\'').trim_matches('"');
25969 if !col.is_empty() {
25970 col_names.push(col.to_string());
25971 }
25972 }
25973
25974 if col_names.is_empty() {
25975 return;
25976 }
25977
25978 if matches!(target, DialectType::Hive) {
25979 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
25980 let mut partition_col_defs = Vec::new();
25981 for col_name in &col_names {
25982 // Find and remove from columns
25983 if let Some(pos) = ct
25984 .columns
25985 .iter()
25986 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
25987 {
25988 let col_def = ct.columns.remove(pos);
25989 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
25990 }
25991 }
25992 if !partition_col_defs.is_empty() {
25993 ct.properties
25994 .push(Expression::PartitionedByProperty(Box::new(
25995 PartitionedByProperty {
25996 this: Box::new(Expression::Tuple(Box::new(Tuple {
25997 expressions: partition_col_defs,
25998 }))),
25999 },
26000 )));
26001 }
26002 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26003 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26004 // Use quoted identifiers to match the quoting style of the original column definitions
26005 let partition_exprs: Vec<Expression> = col_names
26006 .iter()
26007 .map(|name| {
26008 // Check if the column exists in the column list and use its quoting
26009 let is_quoted = ct
26010 .columns
26011 .iter()
26012 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26013 let ident = if is_quoted {
26014 Identifier::quoted(name.clone())
26015 } else {
26016 Identifier::new(name.clone())
26017 };
26018 Expression::Column(Column {
26019 name: ident,
26020 table: None,
26021 join_mark: false,
26022 trailing_comments: Vec::new(),
26023 })
26024 })
26025 .collect();
26026 ct.properties
26027 .push(Expression::PartitionedByProperty(Box::new(
26028 PartitionedByProperty {
26029 this: Box::new(Expression::Tuple(Box::new(Tuple {
26030 expressions: partition_exprs,
26031 }))),
26032 },
26033 )));
26034 }
26035 // DuckDB: strip partitioned_by entirely (already handled)
26036 }
26037
26038 /// Convert a DataType to Spark's type string format (using angle brackets)
26039 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26040 use crate::expressions::DataType;
26041 match dt {
26042 DataType::Int { .. } => "INT".to_string(),
26043 DataType::BigInt { .. } => "BIGINT".to_string(),
26044 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26045 DataType::TinyInt { .. } => "TINYINT".to_string(),
26046 DataType::Float { .. } => "FLOAT".to_string(),
26047 DataType::Double { .. } => "DOUBLE".to_string(),
26048 DataType::Decimal {
26049 precision: Some(p),
26050 scale: Some(s),
26051 } => format!("DECIMAL({}, {})", p, s),
26052 DataType::Decimal {
26053 precision: Some(p), ..
26054 } => format!("DECIMAL({})", p),
26055 DataType::Decimal { .. } => "DECIMAL".to_string(),
26056 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26057 "STRING".to_string()
26058 }
26059 DataType::Char { .. } => "STRING".to_string(),
26060 DataType::Boolean => "BOOLEAN".to_string(),
26061 DataType::Date => "DATE".to_string(),
26062 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26063 DataType::Json | DataType::JsonB => "STRING".to_string(),
26064 DataType::Binary { .. } => "BINARY".to_string(),
26065 DataType::Array { element_type, .. } => {
26066 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26067 }
26068 DataType::Map {
26069 key_type,
26070 value_type,
26071 } => format!(
26072 "MAP<{}, {}>",
26073 Self::data_type_to_spark_string(key_type),
26074 Self::data_type_to_spark_string(value_type)
26075 ),
26076 DataType::Struct { fields, .. } => {
26077 let field_strs: Vec<String> = fields
26078 .iter()
26079 .map(|f| {
26080 if f.name.is_empty() {
26081 Self::data_type_to_spark_string(&f.data_type)
26082 } else {
26083 format!(
26084 "{}: {}",
26085 f.name,
26086 Self::data_type_to_spark_string(&f.data_type)
26087 )
26088 }
26089 })
26090 .collect();
26091 format!("STRUCT<{}>", field_strs.join(", "))
26092 }
26093 DataType::Custom { name } => name.clone(),
26094 _ => format!("{:?}", dt),
26095 }
26096 }
26097
26098 /// Extract value and unit from an Interval expression
26099 /// Returns (value_expression, IntervalUnit)
26100 fn extract_interval_parts(
26101 interval_expr: &Expression,
26102 ) -> (Expression, crate::expressions::IntervalUnit) {
26103 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26104
26105 if let Expression::Interval(iv) = interval_expr {
26106 let val = iv.this.clone().unwrap_or(Expression::number(0));
26107 let unit = match &iv.unit {
26108 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26109 None => {
26110 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26111 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26112 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26113 if parts.len() == 2 {
26114 let unit_str = parts[1].trim().to_uppercase();
26115 let parsed_unit = match unit_str.as_str() {
26116 "YEAR" | "YEARS" => IntervalUnit::Year,
26117 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26118 "MONTH" | "MONTHS" => IntervalUnit::Month,
26119 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26120 "DAY" | "DAYS" => IntervalUnit::Day,
26121 "HOUR" | "HOURS" => IntervalUnit::Hour,
26122 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26123 "SECOND" | "SECONDS" => IntervalUnit::Second,
26124 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26125 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26126 _ => IntervalUnit::Day,
26127 };
26128 // Return just the numeric part as value and parsed unit
26129 return (
26130 Expression::Literal(crate::expressions::Literal::String(
26131 parts[0].to_string(),
26132 )),
26133 parsed_unit,
26134 );
26135 }
26136 IntervalUnit::Day
26137 } else {
26138 IntervalUnit::Day
26139 }
26140 }
26141 _ => IntervalUnit::Day,
26142 };
26143 (val, unit)
26144 } else {
26145 // Not an interval - pass through
26146 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26147 }
26148 }
26149
26150 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26151 fn normalize_bigquery_function(
26152 e: Expression,
26153 source: DialectType,
26154 target: DialectType,
26155 ) -> Result<Expression> {
26156 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26157
26158 let f = if let Expression::Function(f) = e {
26159 *f
26160 } else {
26161 return Ok(e);
26162 };
26163 let name = f.name.to_uppercase();
26164 let mut args = f.args;
26165
26166 /// Helper to extract unit string from an identifier, column, or literal expression
26167 fn get_unit_str(expr: &Expression) -> String {
26168 match expr {
26169 Expression::Identifier(id) => id.name.to_uppercase(),
26170 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26171 Expression::Column(col) => col.name.name.to_uppercase(),
26172 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26173 Expression::Function(f) => {
26174 let base = f.name.to_uppercase();
26175 if !f.args.is_empty() {
26176 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26177 let inner = get_unit_str(&f.args[0]);
26178 format!("{}({})", base, inner)
26179 } else {
26180 base
26181 }
26182 }
26183 _ => "DAY".to_string(),
26184 }
26185 }
26186
26187 /// Parse unit string to IntervalUnit
26188 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26189 match s {
26190 "YEAR" => crate::expressions::IntervalUnit::Year,
26191 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26192 "MONTH" => crate::expressions::IntervalUnit::Month,
26193 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26194 "DAY" => crate::expressions::IntervalUnit::Day,
26195 "HOUR" => crate::expressions::IntervalUnit::Hour,
26196 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26197 "SECOND" => crate::expressions::IntervalUnit::Second,
26198 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26199 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26200 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26201 _ => crate::expressions::IntervalUnit::Day,
26202 }
26203 }
26204
26205 match name.as_str() {
26206 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26207 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26208 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26209 let date1 = args.remove(0);
26210 let date2 = args.remove(0);
26211 let unit_expr = args.remove(0);
26212 let unit_str = get_unit_str(&unit_expr);
26213
26214 if matches!(target, DialectType::BigQuery) {
26215 // BigQuery -> BigQuery: just uppercase the unit
26216 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26217 return Ok(Expression::Function(Box::new(Function::new(
26218 f.name,
26219 vec![date1, date2, unit],
26220 ))));
26221 }
26222
26223 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26224 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26225 if matches!(target, DialectType::Snowflake) {
26226 return Ok(Expression::TimestampDiff(Box::new(
26227 crate::expressions::TimestampDiff {
26228 this: Box::new(date2),
26229 expression: Box::new(date1),
26230 unit: Some(unit_str),
26231 },
26232 )));
26233 }
26234
26235 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26236 if matches!(target, DialectType::DuckDB) {
26237 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26238 // CAST to TIME
26239 let cast_fn = |e: Expression| -> Expression {
26240 match e {
26241 Expression::Literal(Literal::String(s)) => {
26242 Expression::Cast(Box::new(Cast {
26243 this: Expression::Literal(Literal::String(s)),
26244 to: DataType::Custom {
26245 name: "TIME".to_string(),
26246 },
26247 trailing_comments: vec![],
26248 double_colon_syntax: false,
26249 format: None,
26250 default: None,
26251 }))
26252 }
26253 other => other,
26254 }
26255 };
26256 (cast_fn(date1), cast_fn(date2))
26257 } else if name == "DATETIME_DIFF" {
26258 // CAST to TIMESTAMP
26259 (
26260 Self::ensure_cast_timestamp(date1),
26261 Self::ensure_cast_timestamp(date2),
26262 )
26263 } else {
26264 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26265 (
26266 Self::ensure_cast_timestamptz(date1),
26267 Self::ensure_cast_timestamptz(date2),
26268 )
26269 };
26270 return Ok(Expression::Function(Box::new(Function::new(
26271 "DATE_DIFF".to_string(),
26272 vec![
26273 Expression::Literal(Literal::String(unit_str)),
26274 cast_d2,
26275 cast_d1,
26276 ],
26277 ))));
26278 }
26279
26280 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26281 let unit = Expression::Identifier(Identifier::new(unit_str));
26282 Ok(Expression::Function(Box::new(Function::new(
26283 "TIMESTAMPDIFF".to_string(),
26284 vec![unit, date2, date1],
26285 ))))
26286 }
26287
26288 // DATEDIFF(unit, start, end) -> target-specific form
26289 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26290 "DATEDIFF" if args.len() == 3 => {
26291 let arg0 = args.remove(0);
26292 let arg1 = args.remove(0);
26293 let arg2 = args.remove(0);
26294 let unit_str = get_unit_str(&arg0);
26295
26296 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26297 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26298 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26299
26300 if matches!(target, DialectType::Snowflake) {
26301 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26302 let unit = Expression::Identifier(Identifier::new(unit_str));
26303 return Ok(Expression::Function(Box::new(Function::new(
26304 "DATEDIFF".to_string(),
26305 vec![unit, arg1, arg2],
26306 ))));
26307 }
26308
26309 if matches!(target, DialectType::DuckDB) {
26310 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26311 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26312 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26313 return Ok(Expression::Function(Box::new(Function::new(
26314 "DATE_DIFF".to_string(),
26315 vec![
26316 Expression::Literal(Literal::String(unit_str)),
26317 cast_d1,
26318 cast_d2,
26319 ],
26320 ))));
26321 }
26322
26323 if matches!(target, DialectType::BigQuery) {
26324 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26325 let cast_d1 = Self::ensure_cast_datetime(arg1);
26326 let cast_d2 = Self::ensure_cast_datetime(arg2);
26327 let unit = Expression::Identifier(Identifier::new(unit_str));
26328 return Ok(Expression::Function(Box::new(Function::new(
26329 "DATE_DIFF".to_string(),
26330 vec![cast_d2, cast_d1, unit],
26331 ))));
26332 }
26333
26334 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26335 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26336 let unit = Expression::Identifier(Identifier::new(unit_str));
26337 return Ok(Expression::Function(Box::new(Function::new(
26338 "DATEDIFF".to_string(),
26339 vec![unit, arg1, arg2],
26340 ))));
26341 }
26342
26343 if matches!(target, DialectType::Hive) {
26344 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26345 match unit_str.as_str() {
26346 "MONTH" => {
26347 return Ok(Expression::Function(Box::new(Function::new(
26348 "CAST".to_string(),
26349 vec![Expression::Function(Box::new(Function::new(
26350 "MONTHS_BETWEEN".to_string(),
26351 vec![arg2, arg1],
26352 )))],
26353 ))));
26354 }
26355 "WEEK" => {
26356 return Ok(Expression::Cast(Box::new(Cast {
26357 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26358 Expression::Function(Box::new(Function::new(
26359 "DATEDIFF".to_string(),
26360 vec![arg2, arg1],
26361 ))),
26362 Expression::Literal(Literal::Number("7".to_string())),
26363 ))),
26364 to: DataType::Int {
26365 length: None,
26366 integer_spelling: false,
26367 },
26368 trailing_comments: vec![],
26369 double_colon_syntax: false,
26370 format: None,
26371 default: None,
26372 })));
26373 }
26374 _ => {
26375 // Default: DATEDIFF(end, start) for DAY
26376 return Ok(Expression::Function(Box::new(Function::new(
26377 "DATEDIFF".to_string(),
26378 vec![arg2, arg1],
26379 ))));
26380 }
26381 }
26382 }
26383
26384 if matches!(
26385 target,
26386 DialectType::Presto | DialectType::Trino | DialectType::Athena
26387 ) {
26388 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26389 return Ok(Expression::Function(Box::new(Function::new(
26390 "DATE_DIFF".to_string(),
26391 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26392 ))));
26393 }
26394
26395 if matches!(target, DialectType::TSQL) {
26396 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26397 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26398 let unit = Expression::Identifier(Identifier::new(unit_str));
26399 return Ok(Expression::Function(Box::new(Function::new(
26400 "DATEDIFF".to_string(),
26401 vec![unit, arg1, cast_d2],
26402 ))));
26403 }
26404
26405 if matches!(target, DialectType::PostgreSQL) {
26406 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26407 // For now, use DATEDIFF (passthrough) with uppercased unit
26408 let unit = Expression::Identifier(Identifier::new(unit_str));
26409 return Ok(Expression::Function(Box::new(Function::new(
26410 "DATEDIFF".to_string(),
26411 vec![unit, arg1, arg2],
26412 ))));
26413 }
26414
26415 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26416 let unit = Expression::Identifier(Identifier::new(unit_str));
26417 Ok(Expression::Function(Box::new(Function::new(
26418 "DATEDIFF".to_string(),
26419 vec![unit, arg1, arg2],
26420 ))))
26421 }
26422
26423 // DATE_DIFF(date1, date2, unit) -> standard form
26424 "DATE_DIFF" if args.len() == 3 => {
26425 let date1 = args.remove(0);
26426 let date2 = args.remove(0);
26427 let unit_expr = args.remove(0);
26428 let unit_str = get_unit_str(&unit_expr);
26429
26430 if matches!(target, DialectType::BigQuery) {
26431 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26432 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26433 "WEEK".to_string()
26434 } else {
26435 unit_str
26436 };
26437 let norm_d1 = Self::date_literal_to_cast(date1);
26438 let norm_d2 = Self::date_literal_to_cast(date2);
26439 let unit = Expression::Identifier(Identifier::new(norm_unit));
26440 return Ok(Expression::Function(Box::new(Function::new(
26441 f.name,
26442 vec![norm_d1, norm_d2, unit],
26443 ))));
26444 }
26445
26446 if matches!(target, DialectType::MySQL) {
26447 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26448 let norm_d1 = Self::date_literal_to_cast(date1);
26449 let norm_d2 = Self::date_literal_to_cast(date2);
26450 return Ok(Expression::Function(Box::new(Function::new(
26451 "DATEDIFF".to_string(),
26452 vec![norm_d1, norm_d2],
26453 ))));
26454 }
26455
26456 if matches!(target, DialectType::StarRocks) {
26457 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26458 let norm_d1 = Self::date_literal_to_cast(date1);
26459 let norm_d2 = Self::date_literal_to_cast(date2);
26460 return Ok(Expression::Function(Box::new(Function::new(
26461 "DATE_DIFF".to_string(),
26462 vec![
26463 Expression::Literal(Literal::String(unit_str)),
26464 norm_d1,
26465 norm_d2,
26466 ],
26467 ))));
26468 }
26469
26470 if matches!(target, DialectType::DuckDB) {
26471 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26472 let norm_d1 = Self::ensure_cast_date(date1);
26473 let norm_d2 = Self::ensure_cast_date(date2);
26474
26475 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26476 let is_week_variant = unit_str == "WEEK"
26477 || unit_str.starts_with("WEEK(")
26478 || unit_str == "ISOWEEK";
26479 if is_week_variant {
26480 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26481 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26482 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26483 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26484 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26485 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26486 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26487 Some("1") // Shift Sunday to Monday alignment
26488 } else if unit_str == "WEEK(SATURDAY)" {
26489 Some("-5")
26490 } else if unit_str == "WEEK(TUESDAY)" {
26491 Some("-1")
26492 } else if unit_str == "WEEK(WEDNESDAY)" {
26493 Some("-2")
26494 } else if unit_str == "WEEK(THURSDAY)" {
26495 Some("-3")
26496 } else if unit_str == "WEEK(FRIDAY)" {
26497 Some("-4")
26498 } else {
26499 Some("1") // default to Sunday
26500 };
26501
26502 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26503 let shifted = if let Some(off) = offset {
26504 let interval =
26505 Expression::Interval(Box::new(crate::expressions::Interval {
26506 this: Some(Expression::Literal(Literal::String(
26507 off.to_string(),
26508 ))),
26509 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26510 unit: crate::expressions::IntervalUnit::Day,
26511 use_plural: false,
26512 }),
26513 }));
26514 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26515 date, interval,
26516 )))
26517 } else {
26518 date
26519 };
26520 Expression::Function(Box::new(Function::new(
26521 "DATE_TRUNC".to_string(),
26522 vec![
26523 Expression::Literal(Literal::String("WEEK".to_string())),
26524 shifted,
26525 ],
26526 )))
26527 };
26528
26529 let trunc_d2 = make_trunc(norm_d2, day_offset);
26530 let trunc_d1 = make_trunc(norm_d1, day_offset);
26531 return Ok(Expression::Function(Box::new(Function::new(
26532 "DATE_DIFF".to_string(),
26533 vec![
26534 Expression::Literal(Literal::String("WEEK".to_string())),
26535 trunc_d2,
26536 trunc_d1,
26537 ],
26538 ))));
26539 }
26540
26541 return Ok(Expression::Function(Box::new(Function::new(
26542 "DATE_DIFF".to_string(),
26543 vec![
26544 Expression::Literal(Literal::String(unit_str)),
26545 norm_d2,
26546 norm_d1,
26547 ],
26548 ))));
26549 }
26550
26551 // Default: DATEDIFF(unit, date2, date1)
26552 let unit = Expression::Identifier(Identifier::new(unit_str));
26553 Ok(Expression::Function(Box::new(Function::new(
26554 "DATEDIFF".to_string(),
26555 vec![unit, date2, date1],
26556 ))))
26557 }
26558
26559 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
26560 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
26561 let ts = args.remove(0);
26562 let interval_expr = args.remove(0);
26563 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26564
26565 match target {
26566 DialectType::Snowflake => {
26567 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
26568 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
26569 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
26570 let unit_str = Self::interval_unit_to_string(&unit);
26571 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26572 Ok(Expression::TimestampAdd(Box::new(
26573 crate::expressions::TimestampAdd {
26574 this: Box::new(val),
26575 expression: Box::new(cast_ts),
26576 unit: Some(unit_str),
26577 },
26578 )))
26579 }
26580 DialectType::Spark | DialectType::Databricks => {
26581 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
26582 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
26583 let interval =
26584 Expression::Interval(Box::new(crate::expressions::Interval {
26585 this: Some(val),
26586 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26587 unit,
26588 use_plural: false,
26589 }),
26590 }));
26591 Ok(Expression::Add(Box::new(
26592 crate::expressions::BinaryOp::new(ts, interval),
26593 )))
26594 } else if name == "DATETIME_ADD"
26595 && matches!(target, DialectType::Databricks)
26596 {
26597 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
26598 let unit_str = Self::interval_unit_to_string(&unit);
26599 Ok(Expression::Function(Box::new(Function::new(
26600 "TIMESTAMPADD".to_string(),
26601 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
26602 ))))
26603 } else {
26604 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
26605 let unit_str = Self::interval_unit_to_string(&unit);
26606 let cast_ts =
26607 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
26608 Self::maybe_cast_ts(ts)
26609 } else {
26610 ts
26611 };
26612 Ok(Expression::Function(Box::new(Function::new(
26613 "DATE_ADD".to_string(),
26614 vec![
26615 Expression::Identifier(Identifier::new(unit_str)),
26616 val,
26617 cast_ts,
26618 ],
26619 ))))
26620 }
26621 }
26622 DialectType::MySQL => {
26623 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
26624 let mysql_ts = if name.starts_with("TIMESTAMP") {
26625 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26626 match &ts {
26627 Expression::Function(ref inner_f)
26628 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26629 {
26630 // Already wrapped, keep as-is
26631 ts
26632 }
26633 _ => {
26634 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
26635 let unwrapped = match ts {
26636 Expression::Literal(Literal::Timestamp(s)) => {
26637 Expression::Literal(Literal::String(s))
26638 }
26639 other => other,
26640 };
26641 Expression::Function(Box::new(Function::new(
26642 "TIMESTAMP".to_string(),
26643 vec![unwrapped],
26644 )))
26645 }
26646 }
26647 } else {
26648 ts
26649 };
26650 Ok(Expression::DateAdd(Box::new(
26651 crate::expressions::DateAddFunc {
26652 this: mysql_ts,
26653 interval: val,
26654 unit,
26655 },
26656 )))
26657 }
26658 _ => {
26659 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
26660 let cast_ts = if matches!(target, DialectType::DuckDB) {
26661 if name == "DATETIME_ADD" {
26662 Self::ensure_cast_timestamp(ts)
26663 } else if name.starts_with("TIMESTAMP") {
26664 Self::maybe_cast_ts_to_tz(ts, &name)
26665 } else {
26666 ts
26667 }
26668 } else {
26669 ts
26670 };
26671 Ok(Expression::DateAdd(Box::new(
26672 crate::expressions::DateAddFunc {
26673 this: cast_ts,
26674 interval: val,
26675 unit,
26676 },
26677 )))
26678 }
26679 }
26680 }
26681
26682 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
26683 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
26684 let ts = args.remove(0);
26685 let interval_expr = args.remove(0);
26686 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26687
26688 match target {
26689 DialectType::Snowflake => {
26690 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
26691 let unit_str = Self::interval_unit_to_string(&unit);
26692 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
26693 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26694 val,
26695 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26696 this: Expression::number(1),
26697 })),
26698 )));
26699 Ok(Expression::TimestampAdd(Box::new(
26700 crate::expressions::TimestampAdd {
26701 this: Box::new(neg_val),
26702 expression: Box::new(cast_ts),
26703 unit: Some(unit_str),
26704 },
26705 )))
26706 }
26707 DialectType::Spark | DialectType::Databricks => {
26708 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
26709 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
26710 {
26711 // Spark: ts - INTERVAL val UNIT
26712 let cast_ts = if name.starts_with("TIMESTAMP") {
26713 Self::maybe_cast_ts(ts)
26714 } else {
26715 ts
26716 };
26717 let interval =
26718 Expression::Interval(Box::new(crate::expressions::Interval {
26719 this: Some(val),
26720 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26721 unit,
26722 use_plural: false,
26723 }),
26724 }));
26725 Ok(Expression::Sub(Box::new(
26726 crate::expressions::BinaryOp::new(cast_ts, interval),
26727 )))
26728 } else {
26729 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
26730 let unit_str = Self::interval_unit_to_string(&unit);
26731 let neg_val =
26732 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26733 val,
26734 Expression::Neg(Box::new(crate::expressions::UnaryOp {
26735 this: Expression::number(1),
26736 })),
26737 )));
26738 Ok(Expression::Function(Box::new(Function::new(
26739 "TIMESTAMPADD".to_string(),
26740 vec![
26741 Expression::Identifier(Identifier::new(unit_str)),
26742 neg_val,
26743 ts,
26744 ],
26745 ))))
26746 }
26747 }
26748 DialectType::MySQL => {
26749 let mysql_ts = if name.starts_with("TIMESTAMP") {
26750 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
26751 match &ts {
26752 Expression::Function(ref inner_f)
26753 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
26754 {
26755 // Already wrapped, keep as-is
26756 ts
26757 }
26758 _ => {
26759 let unwrapped = match ts {
26760 Expression::Literal(Literal::Timestamp(s)) => {
26761 Expression::Literal(Literal::String(s))
26762 }
26763 other => other,
26764 };
26765 Expression::Function(Box::new(Function::new(
26766 "TIMESTAMP".to_string(),
26767 vec![unwrapped],
26768 )))
26769 }
26770 }
26771 } else {
26772 ts
26773 };
26774 Ok(Expression::DateSub(Box::new(
26775 crate::expressions::DateAddFunc {
26776 this: mysql_ts,
26777 interval: val,
26778 unit,
26779 },
26780 )))
26781 }
26782 _ => {
26783 let cast_ts = if matches!(target, DialectType::DuckDB) {
26784 if name == "DATETIME_SUB" {
26785 Self::ensure_cast_timestamp(ts)
26786 } else if name.starts_with("TIMESTAMP") {
26787 Self::maybe_cast_ts_to_tz(ts, &name)
26788 } else {
26789 ts
26790 }
26791 } else {
26792 ts
26793 };
26794 Ok(Expression::DateSub(Box::new(
26795 crate::expressions::DateAddFunc {
26796 this: cast_ts,
26797 interval: val,
26798 unit,
26799 },
26800 )))
26801 }
26802 }
26803 }
26804
26805 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
26806 "DATE_SUB" if args.len() == 2 => {
26807 let date = args.remove(0);
26808 let interval_expr = args.remove(0);
26809 let (val, unit) = Self::extract_interval_parts(&interval_expr);
26810
26811 match target {
26812 DialectType::Databricks | DialectType::Spark => {
26813 // Databricks/Spark: DATE_ADD(date, -val)
26814 // Use DateAdd expression with negative val so it generates correctly
26815 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
26816 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
26817 // Instead, we directly output as a simple negated DateSub
26818 Ok(Expression::DateSub(Box::new(
26819 crate::expressions::DateAddFunc {
26820 this: date,
26821 interval: val,
26822 unit,
26823 },
26824 )))
26825 }
26826 DialectType::DuckDB => {
26827 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
26828 let cast_date = Self::ensure_cast_date(date);
26829 let interval =
26830 Expression::Interval(Box::new(crate::expressions::Interval {
26831 this: Some(val),
26832 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26833 unit,
26834 use_plural: false,
26835 }),
26836 }));
26837 Ok(Expression::Sub(Box::new(
26838 crate::expressions::BinaryOp::new(cast_date, interval),
26839 )))
26840 }
26841 DialectType::Snowflake => {
26842 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
26843 // Just ensure the date is cast properly
26844 let cast_date = Self::ensure_cast_date(date);
26845 Ok(Expression::DateSub(Box::new(
26846 crate::expressions::DateAddFunc {
26847 this: cast_date,
26848 interval: val,
26849 unit,
26850 },
26851 )))
26852 }
26853 DialectType::PostgreSQL => {
26854 // PostgreSQL: date - INTERVAL 'val UNIT'
26855 let unit_str = Self::interval_unit_to_string(&unit);
26856 let interval =
26857 Expression::Interval(Box::new(crate::expressions::Interval {
26858 this: Some(Expression::Literal(Literal::String(format!(
26859 "{} {}",
26860 Self::expr_to_string(&val),
26861 unit_str
26862 )))),
26863 unit: None,
26864 }));
26865 Ok(Expression::Sub(Box::new(
26866 crate::expressions::BinaryOp::new(date, interval),
26867 )))
26868 }
26869 _ => Ok(Expression::DateSub(Box::new(
26870 crate::expressions::DateAddFunc {
26871 this: date,
26872 interval: val,
26873 unit,
26874 },
26875 ))),
26876 }
26877 }
26878
26879 // DATEADD(unit, val, date) -> target-specific form
26880 // Used by: Redshift, Snowflake, TSQL, ClickHouse
26881 "DATEADD" if args.len() == 3 => {
26882 let arg0 = args.remove(0);
26883 let arg1 = args.remove(0);
26884 let arg2 = args.remove(0);
26885 let unit_str = get_unit_str(&arg0);
26886
26887 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
26888 // Keep DATEADD(UNIT, val, date) with uppercased unit
26889 let unit = Expression::Identifier(Identifier::new(unit_str));
26890 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
26891 let date = if matches!(target, DialectType::TSQL)
26892 && !matches!(
26893 source,
26894 DialectType::Spark | DialectType::Databricks | DialectType::Hive
26895 ) {
26896 Self::ensure_cast_datetime2(arg2)
26897 } else {
26898 arg2
26899 };
26900 return Ok(Expression::Function(Box::new(Function::new(
26901 "DATEADD".to_string(),
26902 vec![unit, arg1, date],
26903 ))));
26904 }
26905
26906 if matches!(target, DialectType::DuckDB) {
26907 // DuckDB: date + INTERVAL 'val' UNIT
26908 let iu = parse_interval_unit(&unit_str);
26909 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26910 this: Some(arg1),
26911 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26912 unit: iu,
26913 use_plural: false,
26914 }),
26915 }));
26916 let cast_date = Self::ensure_cast_timestamp(arg2);
26917 return Ok(Expression::Add(Box::new(
26918 crate::expressions::BinaryOp::new(cast_date, interval),
26919 )));
26920 }
26921
26922 if matches!(target, DialectType::BigQuery) {
26923 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
26924 let iu = parse_interval_unit(&unit_str);
26925 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
26926 this: Some(arg1),
26927 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26928 unit: iu,
26929 use_plural: false,
26930 }),
26931 }));
26932 return Ok(Expression::Function(Box::new(Function::new(
26933 "DATE_ADD".to_string(),
26934 vec![arg2, interval],
26935 ))));
26936 }
26937
26938 if matches!(target, DialectType::Databricks) {
26939 // Databricks: keep DATEADD(UNIT, val, date) format
26940 let unit = Expression::Identifier(Identifier::new(unit_str));
26941 return Ok(Expression::Function(Box::new(Function::new(
26942 "DATEADD".to_string(),
26943 vec![unit, arg1, arg2],
26944 ))));
26945 }
26946
26947 if matches!(target, DialectType::Spark) {
26948 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
26949 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
26950 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
26951 if let Ok(val) = n.parse::<i64>() {
26952 return Expression::Literal(crate::expressions::Literal::Number(
26953 (val * factor).to_string(),
26954 ));
26955 }
26956 }
26957 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
26958 expr,
26959 Expression::Literal(crate::expressions::Literal::Number(
26960 factor.to_string(),
26961 )),
26962 )))
26963 }
26964 match unit_str.as_str() {
26965 "YEAR" => {
26966 let months = multiply_expr_dateadd(arg1, 12);
26967 return Ok(Expression::Function(Box::new(Function::new(
26968 "ADD_MONTHS".to_string(),
26969 vec![arg2, months],
26970 ))));
26971 }
26972 "QUARTER" => {
26973 let months = multiply_expr_dateadd(arg1, 3);
26974 return Ok(Expression::Function(Box::new(Function::new(
26975 "ADD_MONTHS".to_string(),
26976 vec![arg2, months],
26977 ))));
26978 }
26979 "MONTH" => {
26980 return Ok(Expression::Function(Box::new(Function::new(
26981 "ADD_MONTHS".to_string(),
26982 vec![arg2, arg1],
26983 ))));
26984 }
26985 "WEEK" => {
26986 let days = multiply_expr_dateadd(arg1, 7);
26987 return Ok(Expression::Function(Box::new(Function::new(
26988 "DATE_ADD".to_string(),
26989 vec![arg2, days],
26990 ))));
26991 }
26992 "DAY" => {
26993 return Ok(Expression::Function(Box::new(Function::new(
26994 "DATE_ADD".to_string(),
26995 vec![arg2, arg1],
26996 ))));
26997 }
26998 _ => {
26999 let unit = Expression::Identifier(Identifier::new(unit_str));
27000 return Ok(Expression::Function(Box::new(Function::new(
27001 "DATE_ADD".to_string(),
27002 vec![unit, arg1, arg2],
27003 ))));
27004 }
27005 }
27006 }
27007
27008 if matches!(target, DialectType::Hive) {
27009 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27010 match unit_str.as_str() {
27011 "DAY" => {
27012 return Ok(Expression::Function(Box::new(Function::new(
27013 "DATE_ADD".to_string(),
27014 vec![arg2, arg1],
27015 ))));
27016 }
27017 "MONTH" => {
27018 return Ok(Expression::Function(Box::new(Function::new(
27019 "ADD_MONTHS".to_string(),
27020 vec![arg2, arg1],
27021 ))));
27022 }
27023 _ => {
27024 let iu = parse_interval_unit(&unit_str);
27025 let interval =
27026 Expression::Interval(Box::new(crate::expressions::Interval {
27027 this: Some(arg1),
27028 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27029 unit: iu,
27030 use_plural: false,
27031 }),
27032 }));
27033 return Ok(Expression::Add(Box::new(
27034 crate::expressions::BinaryOp::new(arg2, interval),
27035 )));
27036 }
27037 }
27038 }
27039
27040 if matches!(target, DialectType::PostgreSQL) {
27041 // PostgreSQL: date + INTERVAL 'val UNIT'
27042 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27043 this: Some(Expression::Literal(Literal::String(format!(
27044 "{} {}",
27045 Self::expr_to_string(&arg1),
27046 unit_str
27047 )))),
27048 unit: None,
27049 }));
27050 return Ok(Expression::Add(Box::new(
27051 crate::expressions::BinaryOp::new(arg2, interval),
27052 )));
27053 }
27054
27055 if matches!(
27056 target,
27057 DialectType::Presto | DialectType::Trino | DialectType::Athena
27058 ) {
27059 // Presto/Trino: DATE_ADD('UNIT', val, date)
27060 return Ok(Expression::Function(Box::new(Function::new(
27061 "DATE_ADD".to_string(),
27062 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27063 ))));
27064 }
27065
27066 if matches!(target, DialectType::ClickHouse) {
27067 // ClickHouse: DATE_ADD(UNIT, val, date)
27068 let unit = Expression::Identifier(Identifier::new(unit_str));
27069 return Ok(Expression::Function(Box::new(Function::new(
27070 "DATE_ADD".to_string(),
27071 vec![unit, arg1, arg2],
27072 ))));
27073 }
27074
27075 // Default: keep DATEADD with uppercased unit
27076 let unit = Expression::Identifier(Identifier::new(unit_str));
27077 Ok(Expression::Function(Box::new(Function::new(
27078 "DATEADD".to_string(),
27079 vec![unit, arg1, arg2],
27080 ))))
27081 }
27082
27083 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27084 "DATE_ADD" if args.len() == 3 => {
27085 let arg0 = args.remove(0);
27086 let arg1 = args.remove(0);
27087 let arg2 = args.remove(0);
27088 let unit_str = get_unit_str(&arg0);
27089
27090 if matches!(
27091 target,
27092 DialectType::Presto | DialectType::Trino | DialectType::Athena
27093 ) {
27094 // Presto/Trino: DATE_ADD('UNIT', val, date)
27095 return Ok(Expression::Function(Box::new(Function::new(
27096 "DATE_ADD".to_string(),
27097 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27098 ))));
27099 }
27100
27101 if matches!(
27102 target,
27103 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27104 ) {
27105 // DATEADD(UNIT, val, date)
27106 let unit = Expression::Identifier(Identifier::new(unit_str));
27107 let date = if matches!(target, DialectType::TSQL) {
27108 Self::ensure_cast_datetime2(arg2)
27109 } else {
27110 arg2
27111 };
27112 return Ok(Expression::Function(Box::new(Function::new(
27113 "DATEADD".to_string(),
27114 vec![unit, arg1, date],
27115 ))));
27116 }
27117
27118 if matches!(target, DialectType::DuckDB) {
27119 // DuckDB: date + INTERVAL val UNIT
27120 let iu = parse_interval_unit(&unit_str);
27121 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27122 this: Some(arg1),
27123 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27124 unit: iu,
27125 use_plural: false,
27126 }),
27127 }));
27128 return Ok(Expression::Add(Box::new(
27129 crate::expressions::BinaryOp::new(arg2, interval),
27130 )));
27131 }
27132
27133 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27134 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27135 let unit = Expression::Identifier(Identifier::new(unit_str));
27136 return Ok(Expression::Function(Box::new(Function::new(
27137 "DATE_ADD".to_string(),
27138 vec![unit, arg1, arg2],
27139 ))));
27140 }
27141
27142 // Default: DATE_ADD(UNIT, val, date)
27143 let unit = Expression::Identifier(Identifier::new(unit_str));
27144 Ok(Expression::Function(Box::new(Function::new(
27145 "DATE_ADD".to_string(),
27146 vec![unit, arg1, arg2],
27147 ))))
27148 }
27149
27150 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27151 "DATE_ADD" if args.len() == 2 => {
27152 let date = args.remove(0);
27153 let interval_expr = args.remove(0);
27154 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27155 let unit_str = Self::interval_unit_to_string(&unit);
27156
27157 match target {
27158 DialectType::DuckDB => {
27159 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27160 let cast_date = Self::ensure_cast_date(date);
27161 let quoted_val = Self::quote_interval_val(&val);
27162 let interval =
27163 Expression::Interval(Box::new(crate::expressions::Interval {
27164 this: Some(quoted_val),
27165 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27166 unit,
27167 use_plural: false,
27168 }),
27169 }));
27170 Ok(Expression::Add(Box::new(
27171 crate::expressions::BinaryOp::new(cast_date, interval),
27172 )))
27173 }
27174 DialectType::PostgreSQL => {
27175 // PostgreSQL: date + INTERVAL 'val UNIT'
27176 let interval =
27177 Expression::Interval(Box::new(crate::expressions::Interval {
27178 this: Some(Expression::Literal(Literal::String(format!(
27179 "{} {}",
27180 Self::expr_to_string(&val),
27181 unit_str
27182 )))),
27183 unit: None,
27184 }));
27185 Ok(Expression::Add(Box::new(
27186 crate::expressions::BinaryOp::new(date, interval),
27187 )))
27188 }
27189 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27190 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27191 let val_str = Self::expr_to_string(&val);
27192 Ok(Expression::Function(Box::new(Function::new(
27193 "DATE_ADD".to_string(),
27194 vec![
27195 Expression::Literal(Literal::String(unit_str)),
27196 Expression::Cast(Box::new(Cast {
27197 this: Expression::Literal(Literal::String(val_str)),
27198 to: DataType::BigInt { length: None },
27199 trailing_comments: vec![],
27200 double_colon_syntax: false,
27201 format: None,
27202 default: None,
27203 })),
27204 date,
27205 ],
27206 ))))
27207 }
27208 DialectType::Spark | DialectType::Hive => {
27209 // Spark/Hive: DATE_ADD(date, val) for DAY
27210 match unit_str.as_str() {
27211 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27212 "DATE_ADD".to_string(),
27213 vec![date, val],
27214 )))),
27215 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27216 "ADD_MONTHS".to_string(),
27217 vec![date, val],
27218 )))),
27219 _ => {
27220 let iu = parse_interval_unit(&unit_str);
27221 let interval =
27222 Expression::Interval(Box::new(crate::expressions::Interval {
27223 this: Some(val),
27224 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27225 unit: iu,
27226 use_plural: false,
27227 }),
27228 }));
27229 Ok(Expression::Function(Box::new(Function::new(
27230 "DATE_ADD".to_string(),
27231 vec![date, interval],
27232 ))))
27233 }
27234 }
27235 }
27236 DialectType::Snowflake => {
27237 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27238 let cast_date = Self::ensure_cast_date(date);
27239 let val_str = Self::expr_to_string(&val);
27240 Ok(Expression::Function(Box::new(Function::new(
27241 "DATEADD".to_string(),
27242 vec![
27243 Expression::Identifier(Identifier::new(unit_str)),
27244 Expression::Literal(Literal::String(val_str)),
27245 cast_date,
27246 ],
27247 ))))
27248 }
27249 DialectType::TSQL | DialectType::Fabric => {
27250 let cast_date = Self::ensure_cast_datetime2(date);
27251 Ok(Expression::Function(Box::new(Function::new(
27252 "DATEADD".to_string(),
27253 vec![
27254 Expression::Identifier(Identifier::new(unit_str)),
27255 val,
27256 cast_date,
27257 ],
27258 ))))
27259 }
27260 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27261 "DATEADD".to_string(),
27262 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27263 )))),
27264 DialectType::MySQL => {
27265 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27266 let quoted_val = Self::quote_interval_val(&val);
27267 let iu = parse_interval_unit(&unit_str);
27268 let interval =
27269 Expression::Interval(Box::new(crate::expressions::Interval {
27270 this: Some(quoted_val),
27271 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27272 unit: iu,
27273 use_plural: false,
27274 }),
27275 }));
27276 Ok(Expression::Function(Box::new(Function::new(
27277 "DATE_ADD".to_string(),
27278 vec![date, interval],
27279 ))))
27280 }
27281 DialectType::BigQuery => {
27282 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27283 let quoted_val = Self::quote_interval_val(&val);
27284 let iu = parse_interval_unit(&unit_str);
27285 let interval =
27286 Expression::Interval(Box::new(crate::expressions::Interval {
27287 this: Some(quoted_val),
27288 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27289 unit: iu,
27290 use_plural: false,
27291 }),
27292 }));
27293 Ok(Expression::Function(Box::new(Function::new(
27294 "DATE_ADD".to_string(),
27295 vec![date, interval],
27296 ))))
27297 }
27298 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27299 "DATEADD".to_string(),
27300 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27301 )))),
27302 _ => {
27303 // Default: keep as DATE_ADD with decomposed interval
27304 Ok(Expression::DateAdd(Box::new(
27305 crate::expressions::DateAddFunc {
27306 this: date,
27307 interval: val,
27308 unit,
27309 },
27310 )))
27311 }
27312 }
27313 }
27314
27315 // ADD_MONTHS(date, val) -> target-specific form
27316 "ADD_MONTHS" if args.len() == 2 => {
27317 let date = args.remove(0);
27318 let val = args.remove(0);
27319
27320 if matches!(target, DialectType::TSQL) {
27321 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27322 let cast_date = Self::ensure_cast_datetime2(date);
27323 return Ok(Expression::Function(Box::new(Function::new(
27324 "DATEADD".to_string(),
27325 vec![
27326 Expression::Identifier(Identifier::new("MONTH")),
27327 val,
27328 cast_date,
27329 ],
27330 ))));
27331 }
27332
27333 if matches!(target, DialectType::DuckDB) {
27334 // DuckDB: date + INTERVAL val MONTH
27335 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27336 this: Some(val),
27337 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27338 unit: crate::expressions::IntervalUnit::Month,
27339 use_plural: false,
27340 }),
27341 }));
27342 return Ok(Expression::Add(Box::new(
27343 crate::expressions::BinaryOp::new(date, interval),
27344 )));
27345 }
27346
27347 if matches!(target, DialectType::Snowflake) {
27348 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27349 if matches!(source, DialectType::Snowflake) {
27350 return Ok(Expression::Function(Box::new(Function::new(
27351 "ADD_MONTHS".to_string(),
27352 vec![date, val],
27353 ))));
27354 }
27355 return Ok(Expression::Function(Box::new(Function::new(
27356 "DATEADD".to_string(),
27357 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27358 ))));
27359 }
27360
27361 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27362 // Spark: ADD_MONTHS(date, val) - keep as is
27363 return Ok(Expression::Function(Box::new(Function::new(
27364 "ADD_MONTHS".to_string(),
27365 vec![date, val],
27366 ))));
27367 }
27368
27369 if matches!(target, DialectType::Hive) {
27370 return Ok(Expression::Function(Box::new(Function::new(
27371 "ADD_MONTHS".to_string(),
27372 vec![date, val],
27373 ))));
27374 }
27375
27376 if matches!(
27377 target,
27378 DialectType::Presto | DialectType::Trino | DialectType::Athena
27379 ) {
27380 // Presto: DATE_ADD('MONTH', val, date)
27381 return Ok(Expression::Function(Box::new(Function::new(
27382 "DATE_ADD".to_string(),
27383 vec![
27384 Expression::Literal(Literal::String("MONTH".to_string())),
27385 val,
27386 date,
27387 ],
27388 ))));
27389 }
27390
27391 // Default: keep ADD_MONTHS
27392 Ok(Expression::Function(Box::new(Function::new(
27393 "ADD_MONTHS".to_string(),
27394 vec![date, val],
27395 ))))
27396 }
27397
27398 // SAFE_DIVIDE(x, y) -> target-specific form directly
27399 "SAFE_DIVIDE" if args.len() == 2 => {
27400 let x = args.remove(0);
27401 let y = args.remove(0);
27402 // Wrap x and y in parens if they're complex expressions
27403 let y_ref = match &y {
27404 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27405 y.clone()
27406 }
27407 _ => Expression::Paren(Box::new(Paren {
27408 this: y.clone(),
27409 trailing_comments: vec![],
27410 })),
27411 };
27412 let x_ref = match &x {
27413 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27414 x.clone()
27415 }
27416 _ => Expression::Paren(Box::new(Paren {
27417 this: x.clone(),
27418 trailing_comments: vec![],
27419 })),
27420 };
27421 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27422 y_ref.clone(),
27423 Expression::number(0),
27424 )));
27425 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27426 x_ref.clone(),
27427 y_ref.clone(),
27428 )));
27429
27430 match target {
27431 DialectType::DuckDB | DialectType::PostgreSQL => {
27432 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27433 let result_div = if matches!(target, DialectType::PostgreSQL) {
27434 let cast_x = Expression::Cast(Box::new(Cast {
27435 this: x_ref,
27436 to: DataType::Custom {
27437 name: "DOUBLE PRECISION".to_string(),
27438 },
27439 trailing_comments: vec![],
27440 double_colon_syntax: false,
27441 format: None,
27442 default: None,
27443 }));
27444 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27445 cast_x, y_ref,
27446 )))
27447 } else {
27448 div_expr
27449 };
27450 Ok(Expression::Case(Box::new(crate::expressions::Case {
27451 operand: None,
27452 whens: vec![(condition, result_div)],
27453 else_: Some(Expression::Null(crate::expressions::Null)),
27454 comments: Vec::new(),
27455 })))
27456 }
27457 DialectType::Snowflake => {
27458 // IFF(y <> 0, x / y, NULL)
27459 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27460 condition,
27461 true_value: div_expr,
27462 false_value: Some(Expression::Null(crate::expressions::Null)),
27463 original_name: Some("IFF".to_string()),
27464 })))
27465 }
27466 DialectType::Presto | DialectType::Trino => {
27467 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27468 let cast_x = Expression::Cast(Box::new(Cast {
27469 this: x_ref,
27470 to: DataType::Double {
27471 precision: None,
27472 scale: None,
27473 },
27474 trailing_comments: vec![],
27475 double_colon_syntax: false,
27476 format: None,
27477 default: None,
27478 }));
27479 let cast_div = Expression::Div(Box::new(
27480 crate::expressions::BinaryOp::new(cast_x, y_ref),
27481 ));
27482 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27483 condition,
27484 true_value: cast_div,
27485 false_value: Some(Expression::Null(crate::expressions::Null)),
27486 original_name: None,
27487 })))
27488 }
27489 _ => {
27490 // IF(y <> 0, x / y, NULL)
27491 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27492 condition,
27493 true_value: div_expr,
27494 false_value: Some(Expression::Null(crate::expressions::Null)),
27495 original_name: None,
27496 })))
27497 }
27498 }
27499 }
27500
27501 // GENERATE_UUID() -> UUID() with CAST to string
27502 "GENERATE_UUID" => {
27503 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27504 this: None,
27505 name: None,
27506 is_string: None,
27507 }));
27508 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27509 let cast_type = match target {
27510 DialectType::DuckDB => Some(DataType::Text),
27511 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27512 length: None,
27513 parenthesized_length: false,
27514 }),
27515 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27516 Some(DataType::String { length: None })
27517 }
27518 _ => None,
27519 };
27520 if let Some(dt) = cast_type {
27521 Ok(Expression::Cast(Box::new(Cast {
27522 this: uuid_expr,
27523 to: dt,
27524 trailing_comments: vec![],
27525 double_colon_syntax: false,
27526 format: None,
27527 default: None,
27528 })))
27529 } else {
27530 Ok(uuid_expr)
27531 }
27532 }
27533
27534 // COUNTIF(x) -> CountIf expression
27535 "COUNTIF" if args.len() == 1 => {
27536 let arg = args.remove(0);
27537 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
27538 this: arg,
27539 distinct: false,
27540 filter: None,
27541 order_by: vec![],
27542 name: None,
27543 ignore_nulls: None,
27544 having_max: None,
27545 limit: None,
27546 })))
27547 }
27548
27549 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
27550 "EDIT_DISTANCE" => {
27551 // Strip named arguments (max_distance => N) and pass as positional
27552 let mut positional_args: Vec<Expression> = vec![];
27553 for arg in args {
27554 match arg {
27555 Expression::NamedArgument(na) => {
27556 positional_args.push(na.value);
27557 }
27558 other => positional_args.push(other),
27559 }
27560 }
27561 if positional_args.len() >= 2 {
27562 let col1 = positional_args.remove(0);
27563 let col2 = positional_args.remove(0);
27564 let levenshtein = crate::expressions::BinaryFunc {
27565 this: col1,
27566 expression: col2,
27567 original_name: None,
27568 };
27569 // Pass extra args through a function wrapper with all args
27570 if !positional_args.is_empty() {
27571 let max_dist = positional_args.remove(0);
27572 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
27573 if matches!(target, DialectType::DuckDB) {
27574 let lev = Expression::Function(Box::new(Function::new(
27575 "LEVENSHTEIN".to_string(),
27576 vec![levenshtein.this, levenshtein.expression],
27577 )));
27578 let lev_is_null =
27579 Expression::IsNull(Box::new(crate::expressions::IsNull {
27580 this: lev.clone(),
27581 not: false,
27582 postfix_form: false,
27583 }));
27584 let max_is_null =
27585 Expression::IsNull(Box::new(crate::expressions::IsNull {
27586 this: max_dist.clone(),
27587 not: false,
27588 postfix_form: false,
27589 }));
27590 let null_check =
27591 Expression::Or(Box::new(crate::expressions::BinaryOp {
27592 left: lev_is_null,
27593 right: max_is_null,
27594 left_comments: Vec::new(),
27595 operator_comments: Vec::new(),
27596 trailing_comments: Vec::new(),
27597 }));
27598 let least =
27599 Expression::Least(Box::new(crate::expressions::VarArgFunc {
27600 expressions: vec![lev, max_dist],
27601 original_name: None,
27602 }));
27603 return Ok(Expression::Case(Box::new(crate::expressions::Case {
27604 operand: None,
27605 whens: vec![(
27606 null_check,
27607 Expression::Null(crate::expressions::Null),
27608 )],
27609 else_: Some(least),
27610 comments: Vec::new(),
27611 })));
27612 }
27613 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
27614 all_args.extend(positional_args);
27615 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
27616 let func_name = if matches!(target, DialectType::PostgreSQL) {
27617 "LEVENSHTEIN_LESS_EQUAL"
27618 } else {
27619 "LEVENSHTEIN"
27620 };
27621 return Ok(Expression::Function(Box::new(Function::new(
27622 func_name.to_string(),
27623 all_args,
27624 ))));
27625 }
27626 Ok(Expression::Levenshtein(Box::new(levenshtein)))
27627 } else {
27628 Ok(Expression::Function(Box::new(Function::new(
27629 "EDIT_DISTANCE".to_string(),
27630 positional_args,
27631 ))))
27632 }
27633 }
27634
27635 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
27636 "TIMESTAMP_SECONDS" if args.len() == 1 => {
27637 let arg = args.remove(0);
27638 Ok(Expression::UnixToTime(Box::new(
27639 crate::expressions::UnixToTime {
27640 this: Box::new(arg),
27641 scale: Some(0),
27642 zone: None,
27643 hours: None,
27644 minutes: None,
27645 format: None,
27646 target_type: None,
27647 },
27648 )))
27649 }
27650
27651 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
27652 "TIMESTAMP_MILLIS" if args.len() == 1 => {
27653 let arg = args.remove(0);
27654 Ok(Expression::UnixToTime(Box::new(
27655 crate::expressions::UnixToTime {
27656 this: Box::new(arg),
27657 scale: Some(3),
27658 zone: None,
27659 hours: None,
27660 minutes: None,
27661 format: None,
27662 target_type: None,
27663 },
27664 )))
27665 }
27666
27667 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
27668 "TIMESTAMP_MICROS" if args.len() == 1 => {
27669 let arg = args.remove(0);
27670 Ok(Expression::UnixToTime(Box::new(
27671 crate::expressions::UnixToTime {
27672 this: Box::new(arg),
27673 scale: Some(6),
27674 zone: None,
27675 hours: None,
27676 minutes: None,
27677 format: None,
27678 target_type: None,
27679 },
27680 )))
27681 }
27682
27683 // DIV(x, y) -> IntDiv expression
27684 "DIV" if args.len() == 2 => {
27685 let x = args.remove(0);
27686 let y = args.remove(0);
27687 Ok(Expression::IntDiv(Box::new(
27688 crate::expressions::BinaryFunc {
27689 this: x,
27690 expression: y,
27691 original_name: None,
27692 },
27693 )))
27694 }
27695
27696 // TO_HEX(x) -> target-specific form
27697 "TO_HEX" if args.len() == 1 => {
27698 let arg = args.remove(0);
27699 // Check if inner function already returns hex string in certain targets
27700 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
27701 if matches!(target, DialectType::BigQuery) {
27702 // BQ->BQ: keep as TO_HEX
27703 Ok(Expression::Function(Box::new(Function::new(
27704 "TO_HEX".to_string(),
27705 vec![arg],
27706 ))))
27707 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
27708 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
27709 Ok(arg)
27710 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
27711 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
27712 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
27713 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
27714 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
27715 if let Expression::Function(ref inner_f) = arg {
27716 let inner_args = inner_f.args.clone();
27717 let binary_func = match inner_f.name.to_uppercase().as_str() {
27718 "SHA1" => Expression::Function(Box::new(Function::new(
27719 "SHA1_BINARY".to_string(),
27720 inner_args,
27721 ))),
27722 "MD5" => Expression::Function(Box::new(Function::new(
27723 "MD5_BINARY".to_string(),
27724 inner_args,
27725 ))),
27726 "SHA256" => {
27727 let mut a = inner_args;
27728 a.push(Expression::number(256));
27729 Expression::Function(Box::new(Function::new(
27730 "SHA2_BINARY".to_string(),
27731 a,
27732 )))
27733 }
27734 "SHA512" => {
27735 let mut a = inner_args;
27736 a.push(Expression::number(512));
27737 Expression::Function(Box::new(Function::new(
27738 "SHA2_BINARY".to_string(),
27739 a,
27740 )))
27741 }
27742 _ => arg.clone(),
27743 };
27744 Ok(Expression::Function(Box::new(Function::new(
27745 "TO_CHAR".to_string(),
27746 vec![binary_func],
27747 ))))
27748 } else {
27749 let inner = Expression::Function(Box::new(Function::new(
27750 "HEX".to_string(),
27751 vec![arg],
27752 )));
27753 Ok(Expression::Lower(Box::new(
27754 crate::expressions::UnaryFunc::new(inner),
27755 )))
27756 }
27757 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
27758 let inner = Expression::Function(Box::new(Function::new(
27759 "TO_HEX".to_string(),
27760 vec![arg],
27761 )));
27762 Ok(Expression::Lower(Box::new(
27763 crate::expressions::UnaryFunc::new(inner),
27764 )))
27765 } else {
27766 let inner =
27767 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
27768 Ok(Expression::Lower(Box::new(
27769 crate::expressions::UnaryFunc::new(inner),
27770 )))
27771 }
27772 }
27773
27774 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
27775 "LAST_DAY" if args.len() == 2 => {
27776 let date = args.remove(0);
27777 let _unit = args.remove(0); // Strip the unit (MONTH is default)
27778 Ok(Expression::Function(Box::new(Function::new(
27779 "LAST_DAY".to_string(),
27780 vec![date],
27781 ))))
27782 }
27783
27784 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
27785 "GENERATE_ARRAY" => {
27786 let start = args.get(0).cloned();
27787 let end = args.get(1).cloned();
27788 let step = args.get(2).cloned();
27789 Ok(Expression::GenerateSeries(Box::new(
27790 crate::expressions::GenerateSeries {
27791 start: start.map(Box::new),
27792 end: end.map(Box::new),
27793 step: step.map(Box::new),
27794 is_end_exclusive: None,
27795 },
27796 )))
27797 }
27798
27799 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
27800 "GENERATE_TIMESTAMP_ARRAY" => {
27801 let start = args.get(0).cloned();
27802 let end = args.get(1).cloned();
27803 let step = args.get(2).cloned();
27804
27805 if matches!(target, DialectType::DuckDB) {
27806 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
27807 // Only cast string literals - leave columns/expressions as-is
27808 let maybe_cast_ts = |expr: Expression| -> Expression {
27809 if matches!(&expr, Expression::Literal(Literal::String(_))) {
27810 Expression::Cast(Box::new(Cast {
27811 this: expr,
27812 to: DataType::Timestamp {
27813 precision: None,
27814 timezone: false,
27815 },
27816 trailing_comments: vec![],
27817 double_colon_syntax: false,
27818 format: None,
27819 default: None,
27820 }))
27821 } else {
27822 expr
27823 }
27824 };
27825 let cast_start = start.map(maybe_cast_ts);
27826 let cast_end = end.map(maybe_cast_ts);
27827 Ok(Expression::GenerateSeries(Box::new(
27828 crate::expressions::GenerateSeries {
27829 start: cast_start.map(Box::new),
27830 end: cast_end.map(Box::new),
27831 step: step.map(Box::new),
27832 is_end_exclusive: None,
27833 },
27834 )))
27835 } else {
27836 Ok(Expression::GenerateSeries(Box::new(
27837 crate::expressions::GenerateSeries {
27838 start: start.map(Box::new),
27839 end: end.map(Box::new),
27840 step: step.map(Box::new),
27841 is_end_exclusive: None,
27842 },
27843 )))
27844 }
27845 }
27846
27847 // TO_JSON(x) -> target-specific (from Spark/Hive)
27848 "TO_JSON" => {
27849 match target {
27850 DialectType::Presto | DialectType::Trino => {
27851 // JSON_FORMAT(CAST(x AS JSON))
27852 let arg = args
27853 .into_iter()
27854 .next()
27855 .unwrap_or(Expression::Null(crate::expressions::Null));
27856 let cast_json = Expression::Cast(Box::new(Cast {
27857 this: arg,
27858 to: DataType::Custom {
27859 name: "JSON".to_string(),
27860 },
27861 trailing_comments: vec![],
27862 double_colon_syntax: false,
27863 format: None,
27864 default: None,
27865 }));
27866 Ok(Expression::Function(Box::new(Function::new(
27867 "JSON_FORMAT".to_string(),
27868 vec![cast_json],
27869 ))))
27870 }
27871 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
27872 "TO_JSON_STRING".to_string(),
27873 args,
27874 )))),
27875 DialectType::DuckDB => {
27876 // CAST(TO_JSON(x) AS TEXT)
27877 let arg = args
27878 .into_iter()
27879 .next()
27880 .unwrap_or(Expression::Null(crate::expressions::Null));
27881 let to_json = Expression::Function(Box::new(Function::new(
27882 "TO_JSON".to_string(),
27883 vec![arg],
27884 )));
27885 Ok(Expression::Cast(Box::new(Cast {
27886 this: to_json,
27887 to: DataType::Text,
27888 trailing_comments: vec![],
27889 double_colon_syntax: false,
27890 format: None,
27891 default: None,
27892 })))
27893 }
27894 _ => Ok(Expression::Function(Box::new(Function::new(
27895 "TO_JSON".to_string(),
27896 args,
27897 )))),
27898 }
27899 }
27900
27901 // TO_JSON_STRING(x) -> target-specific
27902 "TO_JSON_STRING" => {
27903 match target {
27904 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
27905 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
27906 ),
27907 DialectType::Presto | DialectType::Trino => {
27908 // JSON_FORMAT(CAST(x AS JSON))
27909 let arg = args
27910 .into_iter()
27911 .next()
27912 .unwrap_or(Expression::Null(crate::expressions::Null));
27913 let cast_json = Expression::Cast(Box::new(Cast {
27914 this: arg,
27915 to: DataType::Custom {
27916 name: "JSON".to_string(),
27917 },
27918 trailing_comments: vec![],
27919 double_colon_syntax: false,
27920 format: None,
27921 default: None,
27922 }));
27923 Ok(Expression::Function(Box::new(Function::new(
27924 "JSON_FORMAT".to_string(),
27925 vec![cast_json],
27926 ))))
27927 }
27928 DialectType::DuckDB => {
27929 // CAST(TO_JSON(x) AS TEXT)
27930 let arg = args
27931 .into_iter()
27932 .next()
27933 .unwrap_or(Expression::Null(crate::expressions::Null));
27934 let to_json = Expression::Function(Box::new(Function::new(
27935 "TO_JSON".to_string(),
27936 vec![arg],
27937 )));
27938 Ok(Expression::Cast(Box::new(Cast {
27939 this: to_json,
27940 to: DataType::Text,
27941 trailing_comments: vec![],
27942 double_colon_syntax: false,
27943 format: None,
27944 default: None,
27945 })))
27946 }
27947 DialectType::Snowflake => {
27948 // TO_JSON(x)
27949 Ok(Expression::Function(Box::new(Function::new(
27950 "TO_JSON".to_string(),
27951 args,
27952 ))))
27953 }
27954 _ => Ok(Expression::Function(Box::new(Function::new(
27955 "TO_JSON_STRING".to_string(),
27956 args,
27957 )))),
27958 }
27959 }
27960
27961 // SAFE_ADD(x, y) -> SafeAdd expression
27962 "SAFE_ADD" if args.len() == 2 => {
27963 let x = args.remove(0);
27964 let y = args.remove(0);
27965 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
27966 this: Box::new(x),
27967 expression: Box::new(y),
27968 })))
27969 }
27970
27971 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
27972 "SAFE_SUBTRACT" if args.len() == 2 => {
27973 let x = args.remove(0);
27974 let y = args.remove(0);
27975 Ok(Expression::SafeSubtract(Box::new(
27976 crate::expressions::SafeSubtract {
27977 this: Box::new(x),
27978 expression: Box::new(y),
27979 },
27980 )))
27981 }
27982
27983 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
27984 "SAFE_MULTIPLY" if args.len() == 2 => {
27985 let x = args.remove(0);
27986 let y = args.remove(0);
27987 Ok(Expression::SafeMultiply(Box::new(
27988 crate::expressions::SafeMultiply {
27989 this: Box::new(x),
27990 expression: Box::new(y),
27991 },
27992 )))
27993 }
27994
27995 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
27996 "REGEXP_CONTAINS" if args.len() == 2 => {
27997 let str_expr = args.remove(0);
27998 let pattern = args.remove(0);
27999 Ok(Expression::RegexpLike(Box::new(
28000 crate::expressions::RegexpFunc {
28001 this: str_expr,
28002 pattern,
28003 flags: None,
28004 },
28005 )))
28006 }
28007
28008 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28009 "CONTAINS_SUBSTR" if args.len() == 2 => {
28010 let a = args.remove(0);
28011 let b = args.remove(0);
28012 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28013 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28014 Ok(Expression::Function(Box::new(Function::new(
28015 "CONTAINS".to_string(),
28016 vec![lower_a, lower_b],
28017 ))))
28018 }
28019
28020 // INT64(x) -> CAST(x AS BIGINT)
28021 "INT64" if args.len() == 1 => {
28022 let arg = args.remove(0);
28023 Ok(Expression::Cast(Box::new(Cast {
28024 this: arg,
28025 to: DataType::BigInt { length: None },
28026 trailing_comments: vec![],
28027 double_colon_syntax: false,
28028 format: None,
28029 default: None,
28030 })))
28031 }
28032
28033 // INSTR(str, substr) -> target-specific
28034 "INSTR" if args.len() >= 2 => {
28035 let str_expr = args.remove(0);
28036 let substr = args.remove(0);
28037 if matches!(target, DialectType::Snowflake) {
28038 // CHARINDEX(substr, str)
28039 Ok(Expression::Function(Box::new(Function::new(
28040 "CHARINDEX".to_string(),
28041 vec![substr, str_expr],
28042 ))))
28043 } else if matches!(target, DialectType::BigQuery) {
28044 // Keep as INSTR
28045 Ok(Expression::Function(Box::new(Function::new(
28046 "INSTR".to_string(),
28047 vec![str_expr, substr],
28048 ))))
28049 } else {
28050 // Default: keep as INSTR
28051 Ok(Expression::Function(Box::new(Function::new(
28052 "INSTR".to_string(),
28053 vec![str_expr, substr],
28054 ))))
28055 }
28056 }
28057
28058 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28059 "DATE_TRUNC" if args.len() == 2 => {
28060 let expr = args.remove(0);
28061 let unit_expr = args.remove(0);
28062 let unit_str = get_unit_str(&unit_expr);
28063
28064 match target {
28065 DialectType::DuckDB
28066 | DialectType::Snowflake
28067 | DialectType::PostgreSQL
28068 | DialectType::Presto
28069 | DialectType::Trino
28070 | DialectType::Databricks
28071 | DialectType::Spark
28072 | DialectType::Redshift
28073 | DialectType::ClickHouse
28074 | DialectType::TSQL => {
28075 // Standard: DATE_TRUNC('UNIT', expr)
28076 Ok(Expression::Function(Box::new(Function::new(
28077 "DATE_TRUNC".to_string(),
28078 vec![Expression::Literal(Literal::String(unit_str)), expr],
28079 ))))
28080 }
28081 _ => {
28082 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28083 Ok(Expression::Function(Box::new(Function::new(
28084 "DATE_TRUNC".to_string(),
28085 vec![expr, unit_expr],
28086 ))))
28087 }
28088 }
28089 }
28090
28091 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28092 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28093 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28094 let ts = args.remove(0);
28095 let unit_expr = args.remove(0);
28096 let tz = if !args.is_empty() {
28097 Some(args.remove(0))
28098 } else {
28099 None
28100 };
28101 let unit_str = get_unit_str(&unit_expr);
28102
28103 match target {
28104 DialectType::DuckDB => {
28105 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28106 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28107 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28108 let is_coarse = matches!(
28109 unit_str.as_str(),
28110 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28111 );
28112 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28113 let cast_ts = if name == "DATETIME_TRUNC" {
28114 match ts {
28115 Expression::Literal(Literal::String(ref _s)) => {
28116 Expression::Cast(Box::new(Cast {
28117 this: ts,
28118 to: DataType::Timestamp {
28119 precision: None,
28120 timezone: false,
28121 },
28122 trailing_comments: vec![],
28123 double_colon_syntax: false,
28124 format: None,
28125 default: None,
28126 }))
28127 }
28128 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28129 }
28130 } else {
28131 Self::maybe_cast_ts_to_tz(ts, &name)
28132 };
28133
28134 if let Some(tz_arg) = tz {
28135 if is_coarse {
28136 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28137 let at_tz = Expression::AtTimeZone(Box::new(
28138 crate::expressions::AtTimeZone {
28139 this: cast_ts,
28140 zone: tz_arg.clone(),
28141 },
28142 ));
28143 let date_trunc = Expression::Function(Box::new(Function::new(
28144 "DATE_TRUNC".to_string(),
28145 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28146 )));
28147 Ok(Expression::AtTimeZone(Box::new(
28148 crate::expressions::AtTimeZone {
28149 this: date_trunc,
28150 zone: tz_arg,
28151 },
28152 )))
28153 } else {
28154 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28155 Ok(Expression::Function(Box::new(Function::new(
28156 "DATE_TRUNC".to_string(),
28157 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28158 ))))
28159 }
28160 } else {
28161 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28162 Ok(Expression::Function(Box::new(Function::new(
28163 "DATE_TRUNC".to_string(),
28164 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28165 ))))
28166 }
28167 }
28168 DialectType::Databricks | DialectType::Spark => {
28169 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28170 Ok(Expression::Function(Box::new(Function::new(
28171 "DATE_TRUNC".to_string(),
28172 vec![Expression::Literal(Literal::String(unit_str)), ts],
28173 ))))
28174 }
28175 _ => {
28176 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28177 let unit = Expression::Literal(Literal::String(unit_str));
28178 let mut date_trunc_args = vec![unit, ts];
28179 if let Some(tz_arg) = tz {
28180 date_trunc_args.push(tz_arg);
28181 }
28182 Ok(Expression::Function(Box::new(Function::new(
28183 "TIMESTAMP_TRUNC".to_string(),
28184 date_trunc_args,
28185 ))))
28186 }
28187 }
28188 }
28189
28190 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28191 "TIME" => {
28192 if args.len() == 3 {
28193 // TIME(h, m, s) constructor
28194 match target {
28195 DialectType::TSQL => {
28196 // TIMEFROMPARTS(h, m, s, 0, 0)
28197 args.push(Expression::number(0));
28198 args.push(Expression::number(0));
28199 Ok(Expression::Function(Box::new(Function::new(
28200 "TIMEFROMPARTS".to_string(),
28201 args,
28202 ))))
28203 }
28204 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28205 "MAKETIME".to_string(),
28206 args,
28207 )))),
28208 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28209 Function::new("MAKE_TIME".to_string(), args),
28210 ))),
28211 _ => Ok(Expression::Function(Box::new(Function::new(
28212 "TIME".to_string(),
28213 args,
28214 )))),
28215 }
28216 } else if args.len() == 1 {
28217 let arg = args.remove(0);
28218 if matches!(target, DialectType::Spark) {
28219 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28220 Ok(Expression::Cast(Box::new(Cast {
28221 this: arg,
28222 to: DataType::Timestamp {
28223 timezone: false,
28224 precision: None,
28225 },
28226 trailing_comments: vec![],
28227 double_colon_syntax: false,
28228 format: None,
28229 default: None,
28230 })))
28231 } else {
28232 // Most targets: CAST(x AS TIME)
28233 Ok(Expression::Cast(Box::new(Cast {
28234 this: arg,
28235 to: DataType::Time {
28236 precision: None,
28237 timezone: false,
28238 },
28239 trailing_comments: vec![],
28240 double_colon_syntax: false,
28241 format: None,
28242 default: None,
28243 })))
28244 }
28245 } else if args.len() == 2 {
28246 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28247 let expr = args.remove(0);
28248 let tz = args.remove(0);
28249 let cast_tstz = Expression::Cast(Box::new(Cast {
28250 this: expr,
28251 to: DataType::Timestamp {
28252 timezone: true,
28253 precision: None,
28254 },
28255 trailing_comments: vec![],
28256 double_colon_syntax: false,
28257 format: None,
28258 default: None,
28259 }));
28260 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28261 this: cast_tstz,
28262 zone: tz,
28263 }));
28264 Ok(Expression::Cast(Box::new(Cast {
28265 this: at_tz,
28266 to: DataType::Time {
28267 precision: None,
28268 timezone: false,
28269 },
28270 trailing_comments: vec![],
28271 double_colon_syntax: false,
28272 format: None,
28273 default: None,
28274 })))
28275 } else {
28276 Ok(Expression::Function(Box::new(Function::new(
28277 "TIME".to_string(),
28278 args,
28279 ))))
28280 }
28281 }
28282
28283 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28284 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28285 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28286 // DATETIME(y, m, d, h, min, s) -> target-specific
28287 "DATETIME" => {
28288 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28289 if matches!(target, DialectType::BigQuery) {
28290 if args.len() == 2 {
28291 let has_time_literal =
28292 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28293 if has_time_literal {
28294 let first = args.remove(0);
28295 let second = args.remove(0);
28296 let time_as_cast = match second {
28297 Expression::Literal(Literal::Time(s)) => {
28298 Expression::Cast(Box::new(Cast {
28299 this: Expression::Literal(Literal::String(s)),
28300 to: DataType::Time {
28301 precision: None,
28302 timezone: false,
28303 },
28304 trailing_comments: vec![],
28305 double_colon_syntax: false,
28306 format: None,
28307 default: None,
28308 }))
28309 }
28310 other => other,
28311 };
28312 return Ok(Expression::Function(Box::new(Function::new(
28313 "DATETIME".to_string(),
28314 vec![first, time_as_cast],
28315 ))));
28316 }
28317 }
28318 return Ok(Expression::Function(Box::new(Function::new(
28319 "DATETIME".to_string(),
28320 args,
28321 ))));
28322 }
28323
28324 if args.len() == 1 {
28325 let arg = args.remove(0);
28326 Ok(Expression::Cast(Box::new(Cast {
28327 this: arg,
28328 to: DataType::Timestamp {
28329 timezone: false,
28330 precision: None,
28331 },
28332 trailing_comments: vec![],
28333 double_colon_syntax: false,
28334 format: None,
28335 default: None,
28336 })))
28337 } else if args.len() == 2 {
28338 let first = args.remove(0);
28339 let second = args.remove(0);
28340 // Check if second arg is a TIME literal
28341 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28342 if is_time_literal {
28343 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28344 let cast_date = Expression::Cast(Box::new(Cast {
28345 this: first,
28346 to: DataType::Date,
28347 trailing_comments: vec![],
28348 double_colon_syntax: false,
28349 format: None,
28350 default: None,
28351 }));
28352 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28353 let time_as_string = match second {
28354 Expression::Literal(Literal::Time(s)) => {
28355 Expression::Literal(Literal::String(s))
28356 }
28357 other => other,
28358 };
28359 let cast_time = Expression::Cast(Box::new(Cast {
28360 this: time_as_string,
28361 to: DataType::Time {
28362 precision: None,
28363 timezone: false,
28364 },
28365 trailing_comments: vec![],
28366 double_colon_syntax: false,
28367 format: None,
28368 default: None,
28369 }));
28370 let add_expr =
28371 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28372 Ok(Expression::Cast(Box::new(Cast {
28373 this: add_expr,
28374 to: DataType::Timestamp {
28375 timezone: false,
28376 precision: None,
28377 },
28378 trailing_comments: vec![],
28379 double_colon_syntax: false,
28380 format: None,
28381 default: None,
28382 })))
28383 } else {
28384 // DATETIME('string', 'timezone')
28385 let cast_tstz = Expression::Cast(Box::new(Cast {
28386 this: first,
28387 to: DataType::Timestamp {
28388 timezone: true,
28389 precision: None,
28390 },
28391 trailing_comments: vec![],
28392 double_colon_syntax: false,
28393 format: None,
28394 default: None,
28395 }));
28396 let at_tz =
28397 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28398 this: cast_tstz,
28399 zone: second,
28400 }));
28401 Ok(Expression::Cast(Box::new(Cast {
28402 this: at_tz,
28403 to: DataType::Timestamp {
28404 timezone: false,
28405 precision: None,
28406 },
28407 trailing_comments: vec![],
28408 double_colon_syntax: false,
28409 format: None,
28410 default: None,
28411 })))
28412 }
28413 } else if args.len() >= 3 {
28414 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28415 // For other targets, use MAKE_TIMESTAMP or similar
28416 if matches!(target, DialectType::Snowflake) {
28417 Ok(Expression::Function(Box::new(Function::new(
28418 "TIMESTAMP_FROM_PARTS".to_string(),
28419 args,
28420 ))))
28421 } else {
28422 Ok(Expression::Function(Box::new(Function::new(
28423 "DATETIME".to_string(),
28424 args,
28425 ))))
28426 }
28427 } else {
28428 Ok(Expression::Function(Box::new(Function::new(
28429 "DATETIME".to_string(),
28430 args,
28431 ))))
28432 }
28433 }
28434
28435 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28436 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28437 "TIMESTAMP" => {
28438 if args.len() == 1 {
28439 let arg = args.remove(0);
28440 Ok(Expression::Cast(Box::new(Cast {
28441 this: arg,
28442 to: DataType::Timestamp {
28443 timezone: true,
28444 precision: None,
28445 },
28446 trailing_comments: vec![],
28447 double_colon_syntax: false,
28448 format: None,
28449 default: None,
28450 })))
28451 } else if args.len() == 2 {
28452 let arg = args.remove(0);
28453 let tz = args.remove(0);
28454 let cast_ts = Expression::Cast(Box::new(Cast {
28455 this: arg,
28456 to: DataType::Timestamp {
28457 timezone: false,
28458 precision: None,
28459 },
28460 trailing_comments: vec![],
28461 double_colon_syntax: false,
28462 format: None,
28463 default: None,
28464 }));
28465 if matches!(target, DialectType::Snowflake) {
28466 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28467 Ok(Expression::Function(Box::new(Function::new(
28468 "CONVERT_TIMEZONE".to_string(),
28469 vec![tz, cast_ts],
28470 ))))
28471 } else {
28472 Ok(Expression::AtTimeZone(Box::new(
28473 crate::expressions::AtTimeZone {
28474 this: cast_ts,
28475 zone: tz,
28476 },
28477 )))
28478 }
28479 } else {
28480 Ok(Expression::Function(Box::new(Function::new(
28481 "TIMESTAMP".to_string(),
28482 args,
28483 ))))
28484 }
28485 }
28486
28487 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28488 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28489 "STRING" => {
28490 if args.len() == 1 {
28491 let arg = args.remove(0);
28492 let cast_type = match target {
28493 DialectType::DuckDB => DataType::Text,
28494 _ => DataType::VarChar {
28495 length: None,
28496 parenthesized_length: false,
28497 },
28498 };
28499 Ok(Expression::Cast(Box::new(Cast {
28500 this: arg,
28501 to: cast_type,
28502 trailing_comments: vec![],
28503 double_colon_syntax: false,
28504 format: None,
28505 default: None,
28506 })))
28507 } else if args.len() == 2 {
28508 let arg = args.remove(0);
28509 let tz = args.remove(0);
28510 let cast_type = match target {
28511 DialectType::DuckDB => DataType::Text,
28512 _ => DataType::VarChar {
28513 length: None,
28514 parenthesized_length: false,
28515 },
28516 };
28517 if matches!(target, DialectType::Snowflake) {
28518 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
28519 let convert_tz = Expression::Function(Box::new(Function::new(
28520 "CONVERT_TIMEZONE".to_string(),
28521 vec![
28522 Expression::Literal(Literal::String("UTC".to_string())),
28523 tz,
28524 arg,
28525 ],
28526 )));
28527 Ok(Expression::Cast(Box::new(Cast {
28528 this: convert_tz,
28529 to: cast_type,
28530 trailing_comments: vec![],
28531 double_colon_syntax: false,
28532 format: None,
28533 default: None,
28534 })))
28535 } else {
28536 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
28537 let cast_ts = Expression::Cast(Box::new(Cast {
28538 this: arg,
28539 to: DataType::Timestamp {
28540 timezone: false,
28541 precision: None,
28542 },
28543 trailing_comments: vec![],
28544 double_colon_syntax: false,
28545 format: None,
28546 default: None,
28547 }));
28548 let at_utc =
28549 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28550 this: cast_ts,
28551 zone: Expression::Literal(Literal::String("UTC".to_string())),
28552 }));
28553 let at_tz =
28554 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28555 this: at_utc,
28556 zone: tz,
28557 }));
28558 Ok(Expression::Cast(Box::new(Cast {
28559 this: at_tz,
28560 to: cast_type,
28561 trailing_comments: vec![],
28562 double_colon_syntax: false,
28563 format: None,
28564 default: None,
28565 })))
28566 }
28567 } else {
28568 Ok(Expression::Function(Box::new(Function::new(
28569 "STRING".to_string(),
28570 args,
28571 ))))
28572 }
28573 }
28574
28575 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
28576 "UNIX_SECONDS" if args.len() == 1 => {
28577 let ts = args.remove(0);
28578 match target {
28579 DialectType::DuckDB => {
28580 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
28581 let cast_ts = Self::ensure_cast_timestamptz(ts);
28582 let epoch = Expression::Function(Box::new(Function::new(
28583 "EPOCH".to_string(),
28584 vec![cast_ts],
28585 )));
28586 Ok(Expression::Cast(Box::new(Cast {
28587 this: epoch,
28588 to: DataType::BigInt { length: None },
28589 trailing_comments: vec![],
28590 double_colon_syntax: false,
28591 format: None,
28592 default: None,
28593 })))
28594 }
28595 DialectType::Snowflake => {
28596 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
28597 let epoch = Expression::Cast(Box::new(Cast {
28598 this: Expression::Literal(Literal::String(
28599 "1970-01-01 00:00:00+00".to_string(),
28600 )),
28601 to: DataType::Timestamp {
28602 timezone: true,
28603 precision: None,
28604 },
28605 trailing_comments: vec![],
28606 double_colon_syntax: false,
28607 format: None,
28608 default: None,
28609 }));
28610 Ok(Expression::TimestampDiff(Box::new(
28611 crate::expressions::TimestampDiff {
28612 this: Box::new(epoch),
28613 expression: Box::new(ts),
28614 unit: Some("SECONDS".to_string()),
28615 },
28616 )))
28617 }
28618 _ => Ok(Expression::Function(Box::new(Function::new(
28619 "UNIX_SECONDS".to_string(),
28620 vec![ts],
28621 )))),
28622 }
28623 }
28624
28625 "UNIX_MILLIS" if args.len() == 1 => {
28626 let ts = args.remove(0);
28627 match target {
28628 DialectType::DuckDB => {
28629 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
28630 let cast_ts = Self::ensure_cast_timestamptz(ts);
28631 Ok(Expression::Function(Box::new(Function::new(
28632 "EPOCH_MS".to_string(),
28633 vec![cast_ts],
28634 ))))
28635 }
28636 _ => Ok(Expression::Function(Box::new(Function::new(
28637 "UNIX_MILLIS".to_string(),
28638 vec![ts],
28639 )))),
28640 }
28641 }
28642
28643 "UNIX_MICROS" if args.len() == 1 => {
28644 let ts = args.remove(0);
28645 match target {
28646 DialectType::DuckDB => {
28647 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
28648 let cast_ts = Self::ensure_cast_timestamptz(ts);
28649 Ok(Expression::Function(Box::new(Function::new(
28650 "EPOCH_US".to_string(),
28651 vec![cast_ts],
28652 ))))
28653 }
28654 _ => Ok(Expression::Function(Box::new(Function::new(
28655 "UNIX_MICROS".to_string(),
28656 vec![ts],
28657 )))),
28658 }
28659 }
28660
28661 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
28662 "ARRAY_CONCAT" | "LIST_CONCAT" => {
28663 match target {
28664 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28665 // CONCAT(arr1, arr2, ...)
28666 Ok(Expression::Function(Box::new(Function::new(
28667 "CONCAT".to_string(),
28668 args,
28669 ))))
28670 }
28671 DialectType::Presto | DialectType::Trino => {
28672 // CONCAT(arr1, arr2, ...)
28673 Ok(Expression::Function(Box::new(Function::new(
28674 "CONCAT".to_string(),
28675 args,
28676 ))))
28677 }
28678 DialectType::Snowflake => {
28679 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28680 if args.len() == 1 {
28681 // ARRAY_CAT requires 2 args, add empty array as []
28682 let empty_arr = Expression::ArrayFunc(Box::new(
28683 crate::expressions::ArrayConstructor {
28684 expressions: vec![],
28685 bracket_notation: true,
28686 use_list_keyword: false,
28687 },
28688 ));
28689 let mut new_args = args;
28690 new_args.push(empty_arr);
28691 Ok(Expression::Function(Box::new(Function::new(
28692 "ARRAY_CAT".to_string(),
28693 new_args,
28694 ))))
28695 } else if args.is_empty() {
28696 Ok(Expression::Function(Box::new(Function::new(
28697 "ARRAY_CAT".to_string(),
28698 args,
28699 ))))
28700 } else {
28701 let mut it = args.into_iter().rev();
28702 let mut result = it.next().unwrap();
28703 for arr in it {
28704 result = Expression::Function(Box::new(Function::new(
28705 "ARRAY_CAT".to_string(),
28706 vec![arr, result],
28707 )));
28708 }
28709 Ok(result)
28710 }
28711 }
28712 DialectType::PostgreSQL => {
28713 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
28714 if args.len() <= 1 {
28715 Ok(Expression::Function(Box::new(Function::new(
28716 "ARRAY_CAT".to_string(),
28717 args,
28718 ))))
28719 } else {
28720 let mut it = args.into_iter().rev();
28721 let mut result = it.next().unwrap();
28722 for arr in it {
28723 result = Expression::Function(Box::new(Function::new(
28724 "ARRAY_CAT".to_string(),
28725 vec![arr, result],
28726 )));
28727 }
28728 Ok(result)
28729 }
28730 }
28731 DialectType::Redshift => {
28732 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
28733 if args.len() <= 2 {
28734 Ok(Expression::Function(Box::new(Function::new(
28735 "ARRAY_CONCAT".to_string(),
28736 args,
28737 ))))
28738 } else {
28739 let mut it = args.into_iter().rev();
28740 let mut result = it.next().unwrap();
28741 for arr in it {
28742 result = Expression::Function(Box::new(Function::new(
28743 "ARRAY_CONCAT".to_string(),
28744 vec![arr, result],
28745 )));
28746 }
28747 Ok(result)
28748 }
28749 }
28750 DialectType::DuckDB => {
28751 // LIST_CONCAT supports multiple args natively in DuckDB
28752 Ok(Expression::Function(Box::new(Function::new(
28753 "LIST_CONCAT".to_string(),
28754 args,
28755 ))))
28756 }
28757 _ => Ok(Expression::Function(Box::new(Function::new(
28758 "ARRAY_CONCAT".to_string(),
28759 args,
28760 )))),
28761 }
28762 }
28763
28764 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
28765 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
28766 let arg = args.remove(0);
28767 match target {
28768 DialectType::Snowflake => {
28769 let array_agg =
28770 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
28771 this: arg,
28772 distinct: false,
28773 filter: None,
28774 order_by: vec![],
28775 name: None,
28776 ignore_nulls: None,
28777 having_max: None,
28778 limit: None,
28779 }));
28780 Ok(Expression::Function(Box::new(Function::new(
28781 "ARRAY_FLATTEN".to_string(),
28782 vec![array_agg],
28783 ))))
28784 }
28785 _ => Ok(Expression::Function(Box::new(Function::new(
28786 "ARRAY_CONCAT_AGG".to_string(),
28787 vec![arg],
28788 )))),
28789 }
28790 }
28791
28792 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
28793 "MD5" if args.len() == 1 => {
28794 let arg = args.remove(0);
28795 match target {
28796 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28797 // UNHEX(MD5(x))
28798 let md5 = Expression::Function(Box::new(Function::new(
28799 "MD5".to_string(),
28800 vec![arg],
28801 )));
28802 Ok(Expression::Function(Box::new(Function::new(
28803 "UNHEX".to_string(),
28804 vec![md5],
28805 ))))
28806 }
28807 DialectType::Snowflake => {
28808 // MD5_BINARY(x)
28809 Ok(Expression::Function(Box::new(Function::new(
28810 "MD5_BINARY".to_string(),
28811 vec![arg],
28812 ))))
28813 }
28814 _ => Ok(Expression::Function(Box::new(Function::new(
28815 "MD5".to_string(),
28816 vec![arg],
28817 )))),
28818 }
28819 }
28820
28821 "SHA1" if args.len() == 1 => {
28822 let arg = args.remove(0);
28823 match target {
28824 DialectType::DuckDB => {
28825 // UNHEX(SHA1(x))
28826 let sha1 = Expression::Function(Box::new(Function::new(
28827 "SHA1".to_string(),
28828 vec![arg],
28829 )));
28830 Ok(Expression::Function(Box::new(Function::new(
28831 "UNHEX".to_string(),
28832 vec![sha1],
28833 ))))
28834 }
28835 _ => Ok(Expression::Function(Box::new(Function::new(
28836 "SHA1".to_string(),
28837 vec![arg],
28838 )))),
28839 }
28840 }
28841
28842 "SHA256" if args.len() == 1 => {
28843 let arg = args.remove(0);
28844 match target {
28845 DialectType::DuckDB => {
28846 // UNHEX(SHA256(x))
28847 let sha = Expression::Function(Box::new(Function::new(
28848 "SHA256".to_string(),
28849 vec![arg],
28850 )));
28851 Ok(Expression::Function(Box::new(Function::new(
28852 "UNHEX".to_string(),
28853 vec![sha],
28854 ))))
28855 }
28856 DialectType::Snowflake => {
28857 // SHA2_BINARY(x, 256)
28858 Ok(Expression::Function(Box::new(Function::new(
28859 "SHA2_BINARY".to_string(),
28860 vec![arg, Expression::number(256)],
28861 ))))
28862 }
28863 DialectType::Redshift | DialectType::Spark => {
28864 // SHA2(x, 256)
28865 Ok(Expression::Function(Box::new(Function::new(
28866 "SHA2".to_string(),
28867 vec![arg, Expression::number(256)],
28868 ))))
28869 }
28870 _ => Ok(Expression::Function(Box::new(Function::new(
28871 "SHA256".to_string(),
28872 vec![arg],
28873 )))),
28874 }
28875 }
28876
28877 "SHA512" if args.len() == 1 => {
28878 let arg = args.remove(0);
28879 match target {
28880 DialectType::Snowflake => {
28881 // SHA2_BINARY(x, 512)
28882 Ok(Expression::Function(Box::new(Function::new(
28883 "SHA2_BINARY".to_string(),
28884 vec![arg, Expression::number(512)],
28885 ))))
28886 }
28887 DialectType::Redshift | DialectType::Spark => {
28888 // SHA2(x, 512)
28889 Ok(Expression::Function(Box::new(Function::new(
28890 "SHA2".to_string(),
28891 vec![arg, Expression::number(512)],
28892 ))))
28893 }
28894 _ => Ok(Expression::Function(Box::new(Function::new(
28895 "SHA512".to_string(),
28896 vec![arg],
28897 )))),
28898 }
28899 }
28900
28901 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
28902 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
28903 let str_expr = args.remove(0);
28904 let pattern = args.remove(0);
28905
28906 // Check if pattern contains capturing groups (parentheses)
28907 let has_groups = match &pattern {
28908 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
28909 _ => false,
28910 };
28911
28912 match target {
28913 DialectType::DuckDB => {
28914 let group = if has_groups {
28915 Expression::number(1)
28916 } else {
28917 Expression::number(0)
28918 };
28919 Ok(Expression::Function(Box::new(Function::new(
28920 "REGEXP_EXTRACT_ALL".to_string(),
28921 vec![str_expr, pattern, group],
28922 ))))
28923 }
28924 DialectType::Spark | DialectType::Databricks => {
28925 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
28926 if has_groups {
28927 Ok(Expression::Function(Box::new(Function::new(
28928 "REGEXP_EXTRACT_ALL".to_string(),
28929 vec![str_expr, pattern],
28930 ))))
28931 } else {
28932 Ok(Expression::Function(Box::new(Function::new(
28933 "REGEXP_EXTRACT_ALL".to_string(),
28934 vec![str_expr, pattern, Expression::number(0)],
28935 ))))
28936 }
28937 }
28938 DialectType::Presto | DialectType::Trino => {
28939 if has_groups {
28940 Ok(Expression::Function(Box::new(Function::new(
28941 "REGEXP_EXTRACT_ALL".to_string(),
28942 vec![str_expr, pattern, Expression::number(1)],
28943 ))))
28944 } else {
28945 Ok(Expression::Function(Box::new(Function::new(
28946 "REGEXP_EXTRACT_ALL".to_string(),
28947 vec![str_expr, pattern],
28948 ))))
28949 }
28950 }
28951 DialectType::Snowflake => {
28952 if has_groups {
28953 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
28954 Ok(Expression::Function(Box::new(Function::new(
28955 "REGEXP_EXTRACT_ALL".to_string(),
28956 vec![
28957 str_expr,
28958 pattern,
28959 Expression::number(1),
28960 Expression::number(1),
28961 Expression::Literal(Literal::String("c".to_string())),
28962 Expression::number(1),
28963 ],
28964 ))))
28965 } else {
28966 Ok(Expression::Function(Box::new(Function::new(
28967 "REGEXP_EXTRACT_ALL".to_string(),
28968 vec![str_expr, pattern],
28969 ))))
28970 }
28971 }
28972 _ => Ok(Expression::Function(Box::new(Function::new(
28973 "REGEXP_EXTRACT_ALL".to_string(),
28974 vec![str_expr, pattern],
28975 )))),
28976 }
28977 }
28978
28979 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
28980 "MOD" if args.len() == 2 => {
28981 match target {
28982 DialectType::PostgreSQL
28983 | DialectType::DuckDB
28984 | DialectType::Presto
28985 | DialectType::Trino
28986 | DialectType::Athena
28987 | DialectType::Snowflake => {
28988 let x = args.remove(0);
28989 let y = args.remove(0);
28990 // Wrap complex expressions in parens to preserve precedence
28991 let needs_paren = |e: &Expression| {
28992 matches!(
28993 e,
28994 Expression::Add(_)
28995 | Expression::Sub(_)
28996 | Expression::Mul(_)
28997 | Expression::Div(_)
28998 )
28999 };
29000 let x = if needs_paren(&x) {
29001 Expression::Paren(Box::new(crate::expressions::Paren {
29002 this: x,
29003 trailing_comments: vec![],
29004 }))
29005 } else {
29006 x
29007 };
29008 let y = if needs_paren(&y) {
29009 Expression::Paren(Box::new(crate::expressions::Paren {
29010 this: y,
29011 trailing_comments: vec![],
29012 }))
29013 } else {
29014 y
29015 };
29016 Ok(Expression::Mod(Box::new(
29017 crate::expressions::BinaryOp::new(x, y),
29018 )))
29019 }
29020 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29021 // Hive/Spark: a % b
29022 let x = args.remove(0);
29023 let y = args.remove(0);
29024 let needs_paren = |e: &Expression| {
29025 matches!(
29026 e,
29027 Expression::Add(_)
29028 | Expression::Sub(_)
29029 | Expression::Mul(_)
29030 | Expression::Div(_)
29031 )
29032 };
29033 let x = if needs_paren(&x) {
29034 Expression::Paren(Box::new(crate::expressions::Paren {
29035 this: x,
29036 trailing_comments: vec![],
29037 }))
29038 } else {
29039 x
29040 };
29041 let y = if needs_paren(&y) {
29042 Expression::Paren(Box::new(crate::expressions::Paren {
29043 this: y,
29044 trailing_comments: vec![],
29045 }))
29046 } else {
29047 y
29048 };
29049 Ok(Expression::Mod(Box::new(
29050 crate::expressions::BinaryOp::new(x, y),
29051 )))
29052 }
29053 _ => Ok(Expression::Function(Box::new(Function::new(
29054 "MOD".to_string(),
29055 args,
29056 )))),
29057 }
29058 }
29059
29060 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29061 "ARRAY_FILTER" if args.len() == 2 => {
29062 let name = match target {
29063 DialectType::DuckDB => "LIST_FILTER",
29064 DialectType::StarRocks => "ARRAY_FILTER",
29065 _ => "FILTER",
29066 };
29067 Ok(Expression::Function(Box::new(Function::new(
29068 name.to_string(),
29069 args,
29070 ))))
29071 }
29072 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29073 "FILTER" if args.len() == 2 => {
29074 let name = match target {
29075 DialectType::DuckDB => "LIST_FILTER",
29076 DialectType::StarRocks => "ARRAY_FILTER",
29077 _ => "FILTER",
29078 };
29079 Ok(Expression::Function(Box::new(Function::new(
29080 name.to_string(),
29081 args,
29082 ))))
29083 }
29084 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29085 "REDUCE" if args.len() >= 3 => {
29086 let name = match target {
29087 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29088 _ => "REDUCE",
29089 };
29090 Ok(Expression::Function(Box::new(Function::new(
29091 name.to_string(),
29092 args,
29093 ))))
29094 }
29095 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29096 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29097 Function::new("ARRAY_REVERSE".to_string(), args),
29098 ))),
29099
29100 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29101 "CONCAT" if args.len() > 2 => match target {
29102 DialectType::DuckDB => {
29103 let mut it = args.into_iter();
29104 let mut result = it.next().unwrap();
29105 for arg in it {
29106 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29107 this: Box::new(result),
29108 expression: Box::new(arg),
29109 safe: None,
29110 }));
29111 }
29112 Ok(result)
29113 }
29114 _ => Ok(Expression::Function(Box::new(Function::new(
29115 "CONCAT".to_string(),
29116 args,
29117 )))),
29118 },
29119
29120 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29121 "GENERATE_DATE_ARRAY" => {
29122 if matches!(target, DialectType::BigQuery) {
29123 // BQ->BQ: add default interval if not present
29124 if args.len() == 2 {
29125 let start = args.remove(0);
29126 let end = args.remove(0);
29127 let default_interval =
29128 Expression::Interval(Box::new(crate::expressions::Interval {
29129 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29130 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29131 unit: crate::expressions::IntervalUnit::Day,
29132 use_plural: false,
29133 }),
29134 }));
29135 Ok(Expression::Function(Box::new(Function::new(
29136 "GENERATE_DATE_ARRAY".to_string(),
29137 vec![start, end, default_interval],
29138 ))))
29139 } else {
29140 Ok(Expression::Function(Box::new(Function::new(
29141 "GENERATE_DATE_ARRAY".to_string(),
29142 args,
29143 ))))
29144 }
29145 } else if matches!(target, DialectType::DuckDB) {
29146 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29147 let start = args.get(0).cloned();
29148 let end = args.get(1).cloned();
29149 let step = args.get(2).cloned().or_else(|| {
29150 Some(Expression::Interval(Box::new(
29151 crate::expressions::Interval {
29152 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29153 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29154 unit: crate::expressions::IntervalUnit::Day,
29155 use_plural: false,
29156 }),
29157 },
29158 )))
29159 });
29160
29161 // Wrap start/end in CAST(... AS DATE) only for string literals
29162 let maybe_cast_date = |expr: Expression| -> Expression {
29163 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29164 Expression::Cast(Box::new(Cast {
29165 this: expr,
29166 to: DataType::Date,
29167 trailing_comments: vec![],
29168 double_colon_syntax: false,
29169 format: None,
29170 default: None,
29171 }))
29172 } else {
29173 expr
29174 }
29175 };
29176 let cast_start = start.map(maybe_cast_date);
29177 let cast_end = end.map(maybe_cast_date);
29178
29179 let gen_series =
29180 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29181 start: cast_start.map(Box::new),
29182 end: cast_end.map(Box::new),
29183 step: step.map(Box::new),
29184 is_end_exclusive: None,
29185 }));
29186
29187 // Wrap in CAST(... AS DATE[])
29188 Ok(Expression::Cast(Box::new(Cast {
29189 this: gen_series,
29190 to: DataType::Array {
29191 element_type: Box::new(DataType::Date),
29192 dimension: None,
29193 },
29194 trailing_comments: vec![],
29195 double_colon_syntax: false,
29196 format: None,
29197 default: None,
29198 })))
29199 } else if matches!(target, DialectType::Snowflake) {
29200 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29201 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29202 if args.len() == 2 {
29203 let start = args.remove(0);
29204 let end = args.remove(0);
29205 let default_interval =
29206 Expression::Interval(Box::new(crate::expressions::Interval {
29207 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29208 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29209 unit: crate::expressions::IntervalUnit::Day,
29210 use_plural: false,
29211 }),
29212 }));
29213 Ok(Expression::Function(Box::new(Function::new(
29214 "GENERATE_DATE_ARRAY".to_string(),
29215 vec![start, end, default_interval],
29216 ))))
29217 } else {
29218 Ok(Expression::Function(Box::new(Function::new(
29219 "GENERATE_DATE_ARRAY".to_string(),
29220 args,
29221 ))))
29222 }
29223 } else {
29224 // Convert to GenerateSeries for other targets
29225 let start = args.get(0).cloned();
29226 let end = args.get(1).cloned();
29227 let step = args.get(2).cloned().or_else(|| {
29228 Some(Expression::Interval(Box::new(
29229 crate::expressions::Interval {
29230 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29231 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29232 unit: crate::expressions::IntervalUnit::Day,
29233 use_plural: false,
29234 }),
29235 },
29236 )))
29237 });
29238 Ok(Expression::GenerateSeries(Box::new(
29239 crate::expressions::GenerateSeries {
29240 start: start.map(Box::new),
29241 end: end.map(Box::new),
29242 step: step.map(Box::new),
29243 is_end_exclusive: None,
29244 },
29245 )))
29246 }
29247 }
29248
29249 // PARSE_DATE(format, str) -> target-specific
29250 "PARSE_DATE" if args.len() == 2 => {
29251 let format = args.remove(0);
29252 let str_expr = args.remove(0);
29253 match target {
29254 DialectType::DuckDB => {
29255 // CAST(STRPTIME(str, duck_format) AS DATE)
29256 let duck_format = Self::bq_format_to_duckdb(&format);
29257 let strptime = Expression::Function(Box::new(Function::new(
29258 "STRPTIME".to_string(),
29259 vec![str_expr, duck_format],
29260 )));
29261 Ok(Expression::Cast(Box::new(Cast {
29262 this: strptime,
29263 to: DataType::Date,
29264 trailing_comments: vec![],
29265 double_colon_syntax: false,
29266 format: None,
29267 default: None,
29268 })))
29269 }
29270 DialectType::Snowflake => {
29271 // _POLYGLOT_DATE(str, snowflake_format)
29272 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29273 let sf_format = Self::bq_format_to_snowflake(&format);
29274 Ok(Expression::Function(Box::new(Function::new(
29275 "_POLYGLOT_DATE".to_string(),
29276 vec![str_expr, sf_format],
29277 ))))
29278 }
29279 _ => Ok(Expression::Function(Box::new(Function::new(
29280 "PARSE_DATE".to_string(),
29281 vec![format, str_expr],
29282 )))),
29283 }
29284 }
29285
29286 // PARSE_TIMESTAMP(format, str) -> target-specific
29287 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29288 let format = args.remove(0);
29289 let str_expr = args.remove(0);
29290 let tz = if !args.is_empty() {
29291 Some(args.remove(0))
29292 } else {
29293 None
29294 };
29295 match target {
29296 DialectType::DuckDB => {
29297 let duck_format = Self::bq_format_to_duckdb(&format);
29298 let strptime = Expression::Function(Box::new(Function::new(
29299 "STRPTIME".to_string(),
29300 vec![str_expr, duck_format],
29301 )));
29302 Ok(strptime)
29303 }
29304 _ => {
29305 let mut result_args = vec![format, str_expr];
29306 if let Some(tz_arg) = tz {
29307 result_args.push(tz_arg);
29308 }
29309 Ok(Expression::Function(Box::new(Function::new(
29310 "PARSE_TIMESTAMP".to_string(),
29311 result_args,
29312 ))))
29313 }
29314 }
29315 }
29316
29317 // FORMAT_DATE(format, date) -> target-specific
29318 "FORMAT_DATE" if args.len() == 2 => {
29319 let format = args.remove(0);
29320 let date_expr = args.remove(0);
29321 match target {
29322 DialectType::DuckDB => {
29323 // STRFTIME(CAST(date AS DATE), format)
29324 let cast_date = Expression::Cast(Box::new(Cast {
29325 this: date_expr,
29326 to: DataType::Date,
29327 trailing_comments: vec![],
29328 double_colon_syntax: false,
29329 format: None,
29330 default: None,
29331 }));
29332 Ok(Expression::Function(Box::new(Function::new(
29333 "STRFTIME".to_string(),
29334 vec![cast_date, format],
29335 ))))
29336 }
29337 _ => Ok(Expression::Function(Box::new(Function::new(
29338 "FORMAT_DATE".to_string(),
29339 vec![format, date_expr],
29340 )))),
29341 }
29342 }
29343
29344 // FORMAT_DATETIME(format, datetime) -> target-specific
29345 "FORMAT_DATETIME" if args.len() == 2 => {
29346 let format = args.remove(0);
29347 let dt_expr = args.remove(0);
29348
29349 if matches!(target, DialectType::BigQuery) {
29350 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29351 let norm_format = Self::bq_format_normalize_bq(&format);
29352 // Also strip DATETIME keyword from typed literals
29353 let norm_dt = match dt_expr {
29354 Expression::Literal(Literal::Timestamp(s)) => {
29355 Expression::Cast(Box::new(Cast {
29356 this: Expression::Literal(Literal::String(s)),
29357 to: DataType::Custom {
29358 name: "DATETIME".to_string(),
29359 },
29360 trailing_comments: vec![],
29361 double_colon_syntax: false,
29362 format: None,
29363 default: None,
29364 }))
29365 }
29366 other => other,
29367 };
29368 return Ok(Expression::Function(Box::new(Function::new(
29369 "FORMAT_DATETIME".to_string(),
29370 vec![norm_format, norm_dt],
29371 ))));
29372 }
29373
29374 match target {
29375 DialectType::DuckDB => {
29376 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29377 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29378 let duck_format = Self::bq_format_to_duckdb(&format);
29379 Ok(Expression::Function(Box::new(Function::new(
29380 "STRFTIME".to_string(),
29381 vec![cast_dt, duck_format],
29382 ))))
29383 }
29384 _ => Ok(Expression::Function(Box::new(Function::new(
29385 "FORMAT_DATETIME".to_string(),
29386 vec![format, dt_expr],
29387 )))),
29388 }
29389 }
29390
29391 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29392 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29393 let format = args.remove(0);
29394 let ts_expr = args.remove(0);
29395 match target {
29396 DialectType::DuckDB => {
29397 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29398 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29399 let cast_ts = Expression::Cast(Box::new(Cast {
29400 this: cast_tstz,
29401 to: DataType::Timestamp {
29402 timezone: false,
29403 precision: None,
29404 },
29405 trailing_comments: vec![],
29406 double_colon_syntax: false,
29407 format: None,
29408 default: None,
29409 }));
29410 Ok(Expression::Function(Box::new(Function::new(
29411 "STRFTIME".to_string(),
29412 vec![cast_ts, format],
29413 ))))
29414 }
29415 DialectType::Snowflake => {
29416 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29417 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29418 let cast_ts = Expression::Cast(Box::new(Cast {
29419 this: cast_tstz,
29420 to: DataType::Timestamp {
29421 timezone: false,
29422 precision: None,
29423 },
29424 trailing_comments: vec![],
29425 double_colon_syntax: false,
29426 format: None,
29427 default: None,
29428 }));
29429 let sf_format = Self::bq_format_to_snowflake(&format);
29430 Ok(Expression::Function(Box::new(Function::new(
29431 "TO_CHAR".to_string(),
29432 vec![cast_ts, sf_format],
29433 ))))
29434 }
29435 _ => Ok(Expression::Function(Box::new(Function::new(
29436 "FORMAT_TIMESTAMP".to_string(),
29437 vec![format, ts_expr],
29438 )))),
29439 }
29440 }
29441
29442 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29443 "UNIX_DATE" if args.len() == 1 => {
29444 let date = args.remove(0);
29445 match target {
29446 DialectType::DuckDB => {
29447 let epoch = Expression::Cast(Box::new(Cast {
29448 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29449 to: DataType::Date,
29450 trailing_comments: vec![],
29451 double_colon_syntax: false,
29452 format: None,
29453 default: None,
29454 }));
29455 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29456 // Need to convert DATE literal to CAST
29457 let norm_date = Self::date_literal_to_cast(date);
29458 Ok(Expression::Function(Box::new(Function::new(
29459 "DATE_DIFF".to_string(),
29460 vec![
29461 Expression::Literal(Literal::String("DAY".to_string())),
29462 epoch,
29463 norm_date,
29464 ],
29465 ))))
29466 }
29467 _ => Ok(Expression::Function(Box::new(Function::new(
29468 "UNIX_DATE".to_string(),
29469 vec![date],
29470 )))),
29471 }
29472 }
29473
29474 // UNIX_SECONDS(ts) -> target-specific
29475 "UNIX_SECONDS" if args.len() == 1 => {
29476 let ts = args.remove(0);
29477 match target {
29478 DialectType::DuckDB => {
29479 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29480 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29481 let epoch = Expression::Function(Box::new(Function::new(
29482 "EPOCH".to_string(),
29483 vec![norm_ts],
29484 )));
29485 Ok(Expression::Cast(Box::new(Cast {
29486 this: epoch,
29487 to: DataType::BigInt { length: None },
29488 trailing_comments: vec![],
29489 double_colon_syntax: false,
29490 format: None,
29491 default: None,
29492 })))
29493 }
29494 DialectType::Snowflake => {
29495 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29496 let epoch = Expression::Cast(Box::new(Cast {
29497 this: Expression::Literal(Literal::String(
29498 "1970-01-01 00:00:00+00".to_string(),
29499 )),
29500 to: DataType::Timestamp {
29501 timezone: true,
29502 precision: None,
29503 },
29504 trailing_comments: vec![],
29505 double_colon_syntax: false,
29506 format: None,
29507 default: None,
29508 }));
29509 Ok(Expression::Function(Box::new(Function::new(
29510 "TIMESTAMPDIFF".to_string(),
29511 vec![
29512 Expression::Identifier(Identifier::new("SECONDS".to_string())),
29513 epoch,
29514 ts,
29515 ],
29516 ))))
29517 }
29518 _ => Ok(Expression::Function(Box::new(Function::new(
29519 "UNIX_SECONDS".to_string(),
29520 vec![ts],
29521 )))),
29522 }
29523 }
29524
29525 // UNIX_MILLIS(ts) -> target-specific
29526 "UNIX_MILLIS" if args.len() == 1 => {
29527 let ts = args.remove(0);
29528 match target {
29529 DialectType::DuckDB => {
29530 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29531 Ok(Expression::Function(Box::new(Function::new(
29532 "EPOCH_MS".to_string(),
29533 vec![norm_ts],
29534 ))))
29535 }
29536 _ => Ok(Expression::Function(Box::new(Function::new(
29537 "UNIX_MILLIS".to_string(),
29538 vec![ts],
29539 )))),
29540 }
29541 }
29542
29543 // UNIX_MICROS(ts) -> target-specific
29544 "UNIX_MICROS" if args.len() == 1 => {
29545 let ts = args.remove(0);
29546 match target {
29547 DialectType::DuckDB => {
29548 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29549 Ok(Expression::Function(Box::new(Function::new(
29550 "EPOCH_US".to_string(),
29551 vec![norm_ts],
29552 ))))
29553 }
29554 _ => Ok(Expression::Function(Box::new(Function::new(
29555 "UNIX_MICROS".to_string(),
29556 vec![ts],
29557 )))),
29558 }
29559 }
29560
29561 // INSTR(str, substr) -> target-specific
29562 "INSTR" => {
29563 if matches!(target, DialectType::BigQuery) {
29564 // BQ->BQ: keep as INSTR
29565 Ok(Expression::Function(Box::new(Function::new(
29566 "INSTR".to_string(),
29567 args,
29568 ))))
29569 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
29570 // Snowflake: CHARINDEX(substr, str) - swap args
29571 let str_expr = args.remove(0);
29572 let substr = args.remove(0);
29573 Ok(Expression::Function(Box::new(Function::new(
29574 "CHARINDEX".to_string(),
29575 vec![substr, str_expr],
29576 ))))
29577 } else {
29578 // Keep as INSTR for other targets
29579 Ok(Expression::Function(Box::new(Function::new(
29580 "INSTR".to_string(),
29581 args,
29582 ))))
29583 }
29584 }
29585
29586 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
29587 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
29588 if matches!(target, DialectType::BigQuery) {
29589 // BQ->BQ: always output with parens (function form), keep any timezone arg
29590 Ok(Expression::Function(Box::new(Function::new(name, args))))
29591 } else if name == "CURRENT_DATE" && args.len() == 1 {
29592 // CURRENT_DATE('UTC') - has timezone arg
29593 let tz_arg = args.remove(0);
29594 match target {
29595 DialectType::DuckDB => {
29596 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
29597 let ct = Expression::CurrentTimestamp(
29598 crate::expressions::CurrentTimestamp {
29599 precision: None,
29600 sysdate: false,
29601 },
29602 );
29603 let at_tz =
29604 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29605 this: ct,
29606 zone: tz_arg,
29607 }));
29608 Ok(Expression::Cast(Box::new(Cast {
29609 this: at_tz,
29610 to: DataType::Date,
29611 trailing_comments: vec![],
29612 double_colon_syntax: false,
29613 format: None,
29614 default: None,
29615 })))
29616 }
29617 DialectType::Snowflake => {
29618 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
29619 let ct = Expression::Function(Box::new(Function::new(
29620 "CURRENT_TIMESTAMP".to_string(),
29621 vec![],
29622 )));
29623 let convert = Expression::Function(Box::new(Function::new(
29624 "CONVERT_TIMEZONE".to_string(),
29625 vec![tz_arg, ct],
29626 )));
29627 Ok(Expression::Cast(Box::new(Cast {
29628 this: convert,
29629 to: DataType::Date,
29630 trailing_comments: vec![],
29631 double_colon_syntax: false,
29632 format: None,
29633 default: None,
29634 })))
29635 }
29636 _ => {
29637 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
29638 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
29639 Ok(Expression::AtTimeZone(Box::new(
29640 crate::expressions::AtTimeZone {
29641 this: cd,
29642 zone: tz_arg,
29643 },
29644 )))
29645 }
29646 }
29647 } else if (name == "CURRENT_TIMESTAMP"
29648 || name == "CURRENT_TIME"
29649 || name == "CURRENT_DATE")
29650 && args.is_empty()
29651 && matches!(
29652 target,
29653 DialectType::PostgreSQL
29654 | DialectType::DuckDB
29655 | DialectType::Presto
29656 | DialectType::Trino
29657 )
29658 {
29659 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
29660 if name == "CURRENT_TIMESTAMP" {
29661 Ok(Expression::CurrentTimestamp(
29662 crate::expressions::CurrentTimestamp {
29663 precision: None,
29664 sysdate: false,
29665 },
29666 ))
29667 } else if name == "CURRENT_DATE" {
29668 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
29669 } else {
29670 // CURRENT_TIME
29671 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
29672 precision: None,
29673 }))
29674 }
29675 } else {
29676 // All other targets: keep as function (with parens)
29677 Ok(Expression::Function(Box::new(Function::new(name, args))))
29678 }
29679 }
29680
29681 // JSON_QUERY(json, path) -> target-specific
29682 "JSON_QUERY" if args.len() == 2 => {
29683 match target {
29684 DialectType::DuckDB | DialectType::SQLite => {
29685 // json -> path syntax
29686 let json_expr = args.remove(0);
29687 let path = args.remove(0);
29688 Ok(Expression::JsonExtract(Box::new(
29689 crate::expressions::JsonExtractFunc {
29690 this: json_expr,
29691 path,
29692 returning: None,
29693 arrow_syntax: true,
29694 hash_arrow_syntax: false,
29695 wrapper_option: None,
29696 quotes_option: None,
29697 on_scalar_string: false,
29698 on_error: None,
29699 },
29700 )))
29701 }
29702 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29703 Ok(Expression::Function(Box::new(Function::new(
29704 "GET_JSON_OBJECT".to_string(),
29705 args,
29706 ))))
29707 }
29708 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
29709 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
29710 )),
29711 _ => Ok(Expression::Function(Box::new(Function::new(
29712 "JSON_QUERY".to_string(),
29713 args,
29714 )))),
29715 }
29716 }
29717
29718 // JSON_VALUE_ARRAY(json, path) -> target-specific
29719 "JSON_VALUE_ARRAY" if args.len() == 2 => {
29720 match target {
29721 DialectType::DuckDB => {
29722 // CAST(json -> path AS TEXT[])
29723 let json_expr = args.remove(0);
29724 let path = args.remove(0);
29725 let arrow = Expression::JsonExtract(Box::new(
29726 crate::expressions::JsonExtractFunc {
29727 this: json_expr,
29728 path,
29729 returning: None,
29730 arrow_syntax: true,
29731 hash_arrow_syntax: false,
29732 wrapper_option: None,
29733 quotes_option: None,
29734 on_scalar_string: false,
29735 on_error: None,
29736 },
29737 ));
29738 Ok(Expression::Cast(Box::new(Cast {
29739 this: arrow,
29740 to: DataType::Array {
29741 element_type: Box::new(DataType::Text),
29742 dimension: None,
29743 },
29744 trailing_comments: vec![],
29745 double_colon_syntax: false,
29746 format: None,
29747 default: None,
29748 })))
29749 }
29750 DialectType::Snowflake => {
29751 let json_expr = args.remove(0);
29752 let path_expr = args.remove(0);
29753 // Convert JSON path from $.path to just path
29754 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
29755 {
29756 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
29757 Expression::Literal(Literal::String(trimmed.to_string()))
29758 } else {
29759 path_expr
29760 };
29761 let parse_json = Expression::Function(Box::new(Function::new(
29762 "PARSE_JSON".to_string(),
29763 vec![json_expr],
29764 )));
29765 let get_path = Expression::Function(Box::new(Function::new(
29766 "GET_PATH".to_string(),
29767 vec![parse_json, sf_path],
29768 )));
29769 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
29770 let cast_expr = Expression::Cast(Box::new(Cast {
29771 this: Expression::Identifier(Identifier::new("x")),
29772 to: DataType::VarChar {
29773 length: None,
29774 parenthesized_length: false,
29775 },
29776 trailing_comments: vec![],
29777 double_colon_syntax: false,
29778 format: None,
29779 default: None,
29780 }));
29781 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29782 parameters: vec![Identifier::new("x")],
29783 body: cast_expr,
29784 colon: false,
29785 parameter_types: vec![],
29786 }));
29787 Ok(Expression::Function(Box::new(Function::new(
29788 "TRANSFORM".to_string(),
29789 vec![get_path, lambda],
29790 ))))
29791 }
29792 _ => Ok(Expression::Function(Box::new(Function::new(
29793 "JSON_VALUE_ARRAY".to_string(),
29794 args,
29795 )))),
29796 }
29797 }
29798
29799 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
29800 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
29801 // This is different from Hive/Spark where 3rd arg is "group_index"
29802 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
29803 match target {
29804 DialectType::DuckDB
29805 | DialectType::Presto
29806 | DialectType::Trino
29807 | DialectType::Athena => {
29808 if args.len() == 2 {
29809 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
29810 args.push(Expression::number(1));
29811 Ok(Expression::Function(Box::new(Function::new(
29812 "REGEXP_EXTRACT".to_string(),
29813 args,
29814 ))))
29815 } else if args.len() == 3 {
29816 let val = args.remove(0);
29817 let regex = args.remove(0);
29818 let position = args.remove(0);
29819 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29820 if is_pos_1 {
29821 Ok(Expression::Function(Box::new(Function::new(
29822 "REGEXP_EXTRACT".to_string(),
29823 vec![val, regex, Expression::number(1)],
29824 ))))
29825 } else {
29826 let substring_expr = Expression::Function(Box::new(Function::new(
29827 "SUBSTRING".to_string(),
29828 vec![val, position],
29829 )));
29830 let nullif_expr = Expression::Function(Box::new(Function::new(
29831 "NULLIF".to_string(),
29832 vec![
29833 substring_expr,
29834 Expression::Literal(Literal::String(String::new())),
29835 ],
29836 )));
29837 Ok(Expression::Function(Box::new(Function::new(
29838 "REGEXP_EXTRACT".to_string(),
29839 vec![nullif_expr, regex, Expression::number(1)],
29840 ))))
29841 }
29842 } else if args.len() == 4 {
29843 let val = args.remove(0);
29844 let regex = args.remove(0);
29845 let position = args.remove(0);
29846 let occurrence = args.remove(0);
29847 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
29848 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
29849 if is_pos_1 && is_occ_1 {
29850 Ok(Expression::Function(Box::new(Function::new(
29851 "REGEXP_EXTRACT".to_string(),
29852 vec![val, regex, Expression::number(1)],
29853 ))))
29854 } else {
29855 let subject = if is_pos_1 {
29856 val
29857 } else {
29858 let substring_expr = Expression::Function(Box::new(
29859 Function::new("SUBSTRING".to_string(), vec![val, position]),
29860 ));
29861 Expression::Function(Box::new(Function::new(
29862 "NULLIF".to_string(),
29863 vec![
29864 substring_expr,
29865 Expression::Literal(Literal::String(String::new())),
29866 ],
29867 )))
29868 };
29869 let extract_all = Expression::Function(Box::new(Function::new(
29870 "REGEXP_EXTRACT_ALL".to_string(),
29871 vec![subject, regex, Expression::number(1)],
29872 )));
29873 Ok(Expression::Function(Box::new(Function::new(
29874 "ARRAY_EXTRACT".to_string(),
29875 vec![extract_all, occurrence],
29876 ))))
29877 }
29878 } else {
29879 Ok(Expression::Function(Box::new(Function {
29880 name: f.name,
29881 args,
29882 distinct: f.distinct,
29883 trailing_comments: f.trailing_comments,
29884 use_bracket_syntax: f.use_bracket_syntax,
29885 no_parens: f.no_parens,
29886 quoted: f.quoted,
29887 })))
29888 }
29889 }
29890 DialectType::Snowflake => {
29891 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
29892 Ok(Expression::Function(Box::new(Function::new(
29893 "REGEXP_SUBSTR".to_string(),
29894 args,
29895 ))))
29896 }
29897 _ => {
29898 // For other targets (Hive/Spark/BigQuery): pass through as-is
29899 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
29900 Ok(Expression::Function(Box::new(Function {
29901 name: f.name,
29902 args,
29903 distinct: f.distinct,
29904 trailing_comments: f.trailing_comments,
29905 use_bracket_syntax: f.use_bracket_syntax,
29906 no_parens: f.no_parens,
29907 quoted: f.quoted,
29908 })))
29909 }
29910 }
29911 }
29912
29913 // BigQuery STRUCT(args) -> target-specific struct expression
29914 "STRUCT" => {
29915 // Convert Function args to Struct fields
29916 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
29917 for (i, arg) in args.into_iter().enumerate() {
29918 match arg {
29919 Expression::Alias(a) => {
29920 // Named field: expr AS name
29921 fields.push((Some(a.alias.name.clone()), a.this));
29922 }
29923 other => {
29924 // Unnamed field: for Spark/Hive, keep as None
29925 // For Snowflake, auto-name as _N
29926 // For DuckDB, use column name for column refs, _N for others
29927 if matches!(target, DialectType::Snowflake) {
29928 fields.push((Some(format!("_{}", i)), other));
29929 } else if matches!(target, DialectType::DuckDB) {
29930 let auto_name = match &other {
29931 Expression::Column(col) => col.name.name.clone(),
29932 _ => format!("_{}", i),
29933 };
29934 fields.push((Some(auto_name), other));
29935 } else {
29936 fields.push((None, other));
29937 }
29938 }
29939 }
29940 }
29941
29942 match target {
29943 DialectType::Snowflake => {
29944 // OBJECT_CONSTRUCT('name', value, ...)
29945 let mut oc_args = Vec::new();
29946 for (name, val) in &fields {
29947 if let Some(n) = name {
29948 oc_args.push(Expression::Literal(Literal::String(n.clone())));
29949 oc_args.push(val.clone());
29950 } else {
29951 oc_args.push(val.clone());
29952 }
29953 }
29954 Ok(Expression::Function(Box::new(Function::new(
29955 "OBJECT_CONSTRUCT".to_string(),
29956 oc_args,
29957 ))))
29958 }
29959 DialectType::DuckDB => {
29960 // {'name': value, ...}
29961 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29962 fields,
29963 })))
29964 }
29965 DialectType::Hive => {
29966 // STRUCT(val1, val2, ...) - strip aliases
29967 let hive_fields: Vec<(Option<String>, Expression)> =
29968 fields.into_iter().map(|(_, v)| (None, v)).collect();
29969 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29970 fields: hive_fields,
29971 })))
29972 }
29973 DialectType::Spark | DialectType::Databricks => {
29974 // Use Expression::Struct to bypass Spark target transform auto-naming
29975 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
29976 fields,
29977 })))
29978 }
29979 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29980 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
29981 let all_named =
29982 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
29983 let all_types_inferable = all_named
29984 && fields
29985 .iter()
29986 .all(|(_, val)| Self::can_infer_presto_type(val));
29987 let row_args: Vec<Expression> =
29988 fields.iter().map(|(_, v)| v.clone()).collect();
29989 let row_expr = Expression::Function(Box::new(Function::new(
29990 "ROW".to_string(),
29991 row_args,
29992 )));
29993 if all_named && all_types_inferable {
29994 // Build ROW type with inferred types
29995 let mut row_type_fields = Vec::new();
29996 for (name, val) in &fields {
29997 if let Some(n) = name {
29998 let type_str = Self::infer_sql_type_for_presto(val);
29999 row_type_fields.push(crate::expressions::StructField::new(
30000 n.clone(),
30001 crate::expressions::DataType::Custom { name: type_str },
30002 ));
30003 }
30004 }
30005 let row_type = crate::expressions::DataType::Struct {
30006 fields: row_type_fields,
30007 nested: true,
30008 };
30009 Ok(Expression::Cast(Box::new(Cast {
30010 this: row_expr,
30011 to: row_type,
30012 trailing_comments: Vec::new(),
30013 double_colon_syntax: false,
30014 format: None,
30015 default: None,
30016 })))
30017 } else {
30018 Ok(row_expr)
30019 }
30020 }
30021 _ => {
30022 // Default: keep as STRUCT function with original args
30023 let mut new_args = Vec::new();
30024 for (name, val) in fields {
30025 if let Some(n) = name {
30026 new_args.push(Expression::Alias(Box::new(
30027 crate::expressions::Alias::new(val, Identifier::new(n)),
30028 )));
30029 } else {
30030 new_args.push(val);
30031 }
30032 }
30033 Ok(Expression::Function(Box::new(Function::new(
30034 "STRUCT".to_string(),
30035 new_args,
30036 ))))
30037 }
30038 }
30039 }
30040
30041 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30042 "ROUND" if args.len() == 3 => {
30043 let x = args.remove(0);
30044 let n = args.remove(0);
30045 let mode = args.remove(0);
30046 // Check if mode is 'ROUND_HALF_EVEN'
30047 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30048 if is_half_even && matches!(target, DialectType::DuckDB) {
30049 Ok(Expression::Function(Box::new(Function::new(
30050 "ROUND_EVEN".to_string(),
30051 vec![x, n],
30052 ))))
30053 } else {
30054 // Pass through with all args
30055 Ok(Expression::Function(Box::new(Function::new(
30056 "ROUND".to_string(),
30057 vec![x, n, mode],
30058 ))))
30059 }
30060 }
30061
30062 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30063 "MAKE_INTERVAL" => {
30064 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30065 // The positional args are: year, month
30066 // Named args are: day =>, minute =>, etc.
30067 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30068 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30069 // For BigQuery->BigQuery: reorder named args (day before minute)
30070 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30071 let mut parts: Vec<(String, String)> = Vec::new();
30072 let mut pos_idx = 0;
30073 let pos_units = ["year", "month"];
30074 for arg in &args {
30075 if let Expression::NamedArgument(na) = arg {
30076 // Named arg like minute => 5
30077 let unit = na.name.name.clone();
30078 if let Expression::Literal(Literal::Number(n)) = &na.value {
30079 parts.push((unit, n.clone()));
30080 }
30081 } else if pos_idx < pos_units.len() {
30082 if let Expression::Literal(Literal::Number(n)) = arg {
30083 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30084 }
30085 pos_idx += 1;
30086 }
30087 }
30088 // Don't sort - preserve original argument order
30089 let separator = if matches!(target, DialectType::Snowflake) {
30090 ", "
30091 } else {
30092 " "
30093 };
30094 let interval_str = parts
30095 .iter()
30096 .map(|(u, v)| format!("{} {}", v, u))
30097 .collect::<Vec<_>>()
30098 .join(separator);
30099 Ok(Expression::Interval(Box::new(
30100 crate::expressions::Interval {
30101 this: Some(Expression::Literal(Literal::String(interval_str))),
30102 unit: None,
30103 },
30104 )))
30105 } else if matches!(target, DialectType::BigQuery) {
30106 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30107 let mut positional = Vec::new();
30108 let mut named: Vec<(
30109 String,
30110 Expression,
30111 crate::expressions::NamedArgSeparator,
30112 )> = Vec::new();
30113 let _pos_units = ["year", "month"];
30114 let mut _pos_idx = 0;
30115 for arg in args {
30116 if let Expression::NamedArgument(na) = arg {
30117 named.push((na.name.name.clone(), na.value, na.separator));
30118 } else {
30119 positional.push(arg);
30120 _pos_idx += 1;
30121 }
30122 }
30123 // Sort named args by: day, hour, minute, second
30124 let unit_order = |u: &str| -> usize {
30125 match u.to_lowercase().as_str() {
30126 "day" => 0,
30127 "hour" => 1,
30128 "minute" => 2,
30129 "second" => 3,
30130 _ => 4,
30131 }
30132 };
30133 named.sort_by_key(|(u, _, _)| unit_order(u));
30134 let mut result_args = positional;
30135 for (name, value, sep) in named {
30136 result_args.push(Expression::NamedArgument(Box::new(
30137 crate::expressions::NamedArgument {
30138 name: Identifier::new(&name),
30139 value,
30140 separator: sep,
30141 },
30142 )));
30143 }
30144 Ok(Expression::Function(Box::new(Function::new(
30145 "MAKE_INTERVAL".to_string(),
30146 result_args,
30147 ))))
30148 } else {
30149 Ok(Expression::Function(Box::new(Function::new(
30150 "MAKE_INTERVAL".to_string(),
30151 args,
30152 ))))
30153 }
30154 }
30155
30156 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30157 "ARRAY_TO_STRING" if args.len() == 3 => {
30158 let arr = args.remove(0);
30159 let sep = args.remove(0);
30160 let null_text = args.remove(0);
30161 match target {
30162 DialectType::DuckDB => {
30163 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30164 let _lambda_param =
30165 Expression::Identifier(crate::expressions::Identifier::new("x"));
30166 let coalesce =
30167 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30168 original_name: None,
30169 expressions: vec![
30170 Expression::Identifier(crate::expressions::Identifier::new(
30171 "x",
30172 )),
30173 null_text,
30174 ],
30175 }));
30176 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30177 parameters: vec![crate::expressions::Identifier::new("x")],
30178 body: coalesce,
30179 colon: false,
30180 parameter_types: vec![],
30181 }));
30182 let list_transform = Expression::Function(Box::new(Function::new(
30183 "LIST_TRANSFORM".to_string(),
30184 vec![arr, lambda],
30185 )));
30186 Ok(Expression::Function(Box::new(Function::new(
30187 "ARRAY_TO_STRING".to_string(),
30188 vec![list_transform, sep],
30189 ))))
30190 }
30191 _ => Ok(Expression::Function(Box::new(Function::new(
30192 "ARRAY_TO_STRING".to_string(),
30193 vec![arr, sep, null_text],
30194 )))),
30195 }
30196 }
30197
30198 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30199 "LENGTH" if args.len() == 1 => {
30200 let arg = args.remove(0);
30201 match target {
30202 DialectType::DuckDB => {
30203 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30204 let typeof_func = Expression::Function(Box::new(Function::new(
30205 "TYPEOF".to_string(),
30206 vec![arg.clone()],
30207 )));
30208 let blob_cast = Expression::Cast(Box::new(Cast {
30209 this: arg.clone(),
30210 to: DataType::VarBinary { length: None },
30211 trailing_comments: vec![],
30212 double_colon_syntax: false,
30213 format: None,
30214 default: None,
30215 }));
30216 let octet_length = Expression::Function(Box::new(Function::new(
30217 "OCTET_LENGTH".to_string(),
30218 vec![blob_cast],
30219 )));
30220 let text_cast = Expression::Cast(Box::new(Cast {
30221 this: arg,
30222 to: DataType::Text,
30223 trailing_comments: vec![],
30224 double_colon_syntax: false,
30225 format: None,
30226 default: None,
30227 }));
30228 let length_text = Expression::Function(Box::new(Function::new(
30229 "LENGTH".to_string(),
30230 vec![text_cast],
30231 )));
30232 Ok(Expression::Case(Box::new(crate::expressions::Case {
30233 operand: Some(typeof_func),
30234 whens: vec![(
30235 Expression::Literal(Literal::String("BLOB".to_string())),
30236 octet_length,
30237 )],
30238 else_: Some(length_text),
30239 comments: Vec::new(),
30240 })))
30241 }
30242 _ => Ok(Expression::Function(Box::new(Function::new(
30243 "LENGTH".to_string(),
30244 vec![arg],
30245 )))),
30246 }
30247 }
30248
30249 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30250 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30251 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30252 // The args should be [x, fraction] with the null handling stripped
30253 // For DuckDB: QUANTILE_CONT(x, fraction)
30254 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30255 match target {
30256 DialectType::DuckDB => {
30257 // Strip down to just 2 args, rename to QUANTILE_CONT
30258 let x = args[0].clone();
30259 let frac = args[1].clone();
30260 Ok(Expression::Function(Box::new(Function::new(
30261 "QUANTILE_CONT".to_string(),
30262 vec![x, frac],
30263 ))))
30264 }
30265 _ => Ok(Expression::Function(Box::new(Function::new(
30266 "PERCENTILE_CONT".to_string(),
30267 args,
30268 )))),
30269 }
30270 }
30271
30272 // All others: pass through
30273 _ => Ok(Expression::Function(Box::new(Function {
30274 name: f.name,
30275 args,
30276 distinct: f.distinct,
30277 trailing_comments: f.trailing_comments,
30278 use_bracket_syntax: f.use_bracket_syntax,
30279 no_parens: f.no_parens,
30280 quoted: f.quoted,
30281 }))),
30282 }
30283 }
30284
30285 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30286 /// Returns false for column references and other non-literal expressions where the type is unknown.
30287 fn can_infer_presto_type(expr: &Expression) -> bool {
30288 match expr {
30289 Expression::Literal(_) => true,
30290 Expression::Boolean(_) => true,
30291 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30292 Expression::Struct(_) | Expression::StructFunc(_) => true,
30293 Expression::Function(f) => {
30294 let up = f.name.to_uppercase();
30295 up == "STRUCT"
30296 || up == "ROW"
30297 || up == "CURRENT_DATE"
30298 || up == "CURRENT_TIMESTAMP"
30299 || up == "NOW"
30300 }
30301 Expression::Cast(_) => true,
30302 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30303 _ => false,
30304 }
30305 }
30306
30307 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30308 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30309 use crate::expressions::Literal;
30310 match expr {
30311 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30312 Expression::Literal(Literal::Number(n)) => {
30313 if n.contains('.') {
30314 "DOUBLE".to_string()
30315 } else {
30316 "INTEGER".to_string()
30317 }
30318 }
30319 Expression::Boolean(_) => "BOOLEAN".to_string(),
30320 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30321 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30322 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30323 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30324 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30325 Expression::Function(f) => {
30326 let up = f.name.to_uppercase();
30327 if up == "STRUCT" || up == "ROW" {
30328 "ROW".to_string()
30329 } else if up == "CURRENT_DATE" {
30330 "DATE".to_string()
30331 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30332 "TIMESTAMP".to_string()
30333 } else {
30334 "VARCHAR".to_string()
30335 }
30336 }
30337 Expression::Cast(c) => {
30338 // If already cast, use the target type
30339 Self::data_type_to_presto_string(&c.to)
30340 }
30341 _ => "VARCHAR".to_string(),
30342 }
30343 }
30344
30345 /// Convert a DataType to its Presto/Trino string representation for ROW type
30346 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30347 use crate::expressions::DataType;
30348 match dt {
30349 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30350 "VARCHAR".to_string()
30351 }
30352 DataType::Int { .. }
30353 | DataType::BigInt { .. }
30354 | DataType::SmallInt { .. }
30355 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30356 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30357 DataType::Boolean => "BOOLEAN".to_string(),
30358 DataType::Date => "DATE".to_string(),
30359 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30360 DataType::Struct { fields, .. } => {
30361 let field_strs: Vec<String> = fields
30362 .iter()
30363 .map(|f| {
30364 format!(
30365 "{} {}",
30366 f.name,
30367 Self::data_type_to_presto_string(&f.data_type)
30368 )
30369 })
30370 .collect();
30371 format!("ROW({})", field_strs.join(", "))
30372 }
30373 DataType::Array { element_type, .. } => {
30374 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30375 }
30376 DataType::Custom { name } => {
30377 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30378 name.clone()
30379 }
30380 _ => "VARCHAR".to_string(),
30381 }
30382 }
30383
30384 /// Convert IntervalUnit to string
30385 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30386 match unit {
30387 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30388 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30389 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30390 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30391 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30392 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30393 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30394 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30395 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30396 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30397 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30398 }
30399 }
30400
30401 /// Extract unit string from an expression (uppercased)
30402 fn get_unit_str_static(expr: &Expression) -> String {
30403 use crate::expressions::Literal;
30404 match expr {
30405 Expression::Identifier(id) => id.name.to_uppercase(),
30406 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30407 Expression::Column(col) => col.name.name.to_uppercase(),
30408 Expression::Function(f) => {
30409 let base = f.name.to_uppercase();
30410 if !f.args.is_empty() {
30411 let inner = Self::get_unit_str_static(&f.args[0]);
30412 format!("{}({})", base, inner)
30413 } else {
30414 base
30415 }
30416 }
30417 _ => "DAY".to_string(),
30418 }
30419 }
30420
30421 /// Parse unit string to IntervalUnit
30422 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30423 match s {
30424 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30425 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30426 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30427 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30428 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30429 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30430 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30431 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30432 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30433 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30434 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30435 _ => crate::expressions::IntervalUnit::Day,
30436 }
30437 }
30438
30439 /// Convert expression to simple string for interval building
30440 fn expr_to_string_static(expr: &Expression) -> String {
30441 use crate::expressions::Literal;
30442 match expr {
30443 Expression::Literal(Literal::Number(s)) => s.clone(),
30444 Expression::Literal(Literal::String(s)) => s.clone(),
30445 Expression::Identifier(id) => id.name.clone(),
30446 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30447 _ => "1".to_string(),
30448 }
30449 }
30450
30451 /// Extract a simple string representation from a literal expression
30452 fn expr_to_string(expr: &Expression) -> String {
30453 use crate::expressions::Literal;
30454 match expr {
30455 Expression::Literal(Literal::Number(s)) => s.clone(),
30456 Expression::Literal(Literal::String(s)) => s.clone(),
30457 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30458 Expression::Identifier(id) => id.name.clone(),
30459 _ => "1".to_string(),
30460 }
30461 }
30462
30463 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30464 fn quote_interval_val(expr: &Expression) -> Expression {
30465 use crate::expressions::Literal;
30466 match expr {
30467 Expression::Literal(Literal::Number(n)) => {
30468 Expression::Literal(Literal::String(n.clone()))
30469 }
30470 Expression::Literal(Literal::String(_)) => expr.clone(),
30471 Expression::Neg(inner) => {
30472 if let Expression::Literal(Literal::Number(n)) = &inner.this {
30473 Expression::Literal(Literal::String(format!("-{}", n)))
30474 } else {
30475 expr.clone()
30476 }
30477 }
30478 _ => expr.clone(),
30479 }
30480 }
30481
30482 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
30483 fn timestamp_string_has_timezone(ts: &str) -> bool {
30484 let trimmed = ts.trim();
30485 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
30486 if let Some(last_space) = trimmed.rfind(' ') {
30487 let suffix = &trimmed[last_space + 1..];
30488 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
30489 let rest = &suffix[1..];
30490 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
30491 return true;
30492 }
30493 }
30494 }
30495 // Check for named timezone abbreviations
30496 let ts_lower = trimmed.to_lowercase();
30497 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
30498 for abbrev in &tz_abbrevs {
30499 if ts_lower.ends_with(abbrev) {
30500 return true;
30501 }
30502 }
30503 false
30504 }
30505
30506 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
30507 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
30508 use crate::expressions::{Cast, DataType, Literal};
30509 match expr {
30510 Expression::Literal(Literal::Timestamp(s)) => {
30511 let tz = func_name.starts_with("TIMESTAMP");
30512 Expression::Cast(Box::new(Cast {
30513 this: Expression::Literal(Literal::String(s)),
30514 to: if tz {
30515 DataType::Timestamp {
30516 timezone: true,
30517 precision: None,
30518 }
30519 } else {
30520 DataType::Timestamp {
30521 timezone: false,
30522 precision: None,
30523 }
30524 },
30525 trailing_comments: vec![],
30526 double_colon_syntax: false,
30527 format: None,
30528 default: None,
30529 }))
30530 }
30531 other => other,
30532 }
30533 }
30534
30535 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
30536 fn maybe_cast_ts(expr: Expression) -> Expression {
30537 use crate::expressions::{Cast, DataType, Literal};
30538 match expr {
30539 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30540 this: Expression::Literal(Literal::String(s)),
30541 to: DataType::Timestamp {
30542 timezone: false,
30543 precision: None,
30544 },
30545 trailing_comments: vec![],
30546 double_colon_syntax: false,
30547 format: None,
30548 default: None,
30549 })),
30550 other => other,
30551 }
30552 }
30553
30554 /// Convert DATE 'x' literal to CAST('x' AS DATE)
30555 fn date_literal_to_cast(expr: Expression) -> Expression {
30556 use crate::expressions::{Cast, DataType, Literal};
30557 match expr {
30558 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30559 this: Expression::Literal(Literal::String(s)),
30560 to: DataType::Date,
30561 trailing_comments: vec![],
30562 double_colon_syntax: false,
30563 format: None,
30564 default: None,
30565 })),
30566 other => other,
30567 }
30568 }
30569
30570 /// Ensure an expression that should be a date is CAST(... AS DATE).
30571 /// Handles both DATE literals and string literals that look like dates.
30572 fn ensure_cast_date(expr: Expression) -> Expression {
30573 use crate::expressions::{Cast, DataType, Literal};
30574 match expr {
30575 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
30576 this: Expression::Literal(Literal::String(s)),
30577 to: DataType::Date,
30578 trailing_comments: vec![],
30579 double_colon_syntax: false,
30580 format: None,
30581 default: None,
30582 })),
30583 Expression::Literal(Literal::String(ref _s)) => {
30584 // String literal that should be a date -> CAST('s' AS DATE)
30585 Expression::Cast(Box::new(Cast {
30586 this: expr,
30587 to: DataType::Date,
30588 trailing_comments: vec![],
30589 double_colon_syntax: false,
30590 format: None,
30591 default: None,
30592 }))
30593 }
30594 // Already a CAST or other expression -> leave as-is
30595 other => other,
30596 }
30597 }
30598
30599 /// Force CAST(expr AS DATE) for any expression (not just literals)
30600 /// Skips if the expression is already a CAST to DATE
30601 fn force_cast_date(expr: Expression) -> Expression {
30602 use crate::expressions::{Cast, DataType};
30603 // If it's already a CAST to DATE, don't double-wrap
30604 if let Expression::Cast(ref c) = expr {
30605 if matches!(c.to, DataType::Date) {
30606 return expr;
30607 }
30608 }
30609 Expression::Cast(Box::new(Cast {
30610 this: expr,
30611 to: DataType::Date,
30612 trailing_comments: vec![],
30613 double_colon_syntax: false,
30614 format: None,
30615 default: None,
30616 }))
30617 }
30618
30619 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
30620 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
30621 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
30622 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
30623
30624 fn ensure_to_date_preserved(expr: Expression) -> Expression {
30625 use crate::expressions::{Function, Literal};
30626 if matches!(expr, Expression::Literal(Literal::String(_))) {
30627 Expression::Function(Box::new(Function::new(
30628 Self::PRESERVED_TO_DATE.to_string(),
30629 vec![expr],
30630 )))
30631 } else {
30632 expr
30633 }
30634 }
30635
30636 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
30637 fn try_cast_date(expr: Expression) -> Expression {
30638 use crate::expressions::{Cast, DataType};
30639 Expression::TryCast(Box::new(Cast {
30640 this: expr,
30641 to: DataType::Date,
30642 trailing_comments: vec![],
30643 double_colon_syntax: false,
30644 format: None,
30645 default: None,
30646 }))
30647 }
30648
30649 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
30650 fn double_cast_timestamp_date(expr: Expression) -> Expression {
30651 use crate::expressions::{Cast, DataType};
30652 let inner = Expression::Cast(Box::new(Cast {
30653 this: expr,
30654 to: DataType::Timestamp {
30655 timezone: false,
30656 precision: None,
30657 },
30658 trailing_comments: vec![],
30659 double_colon_syntax: false,
30660 format: None,
30661 default: None,
30662 }));
30663 Expression::Cast(Box::new(Cast {
30664 this: inner,
30665 to: DataType::Date,
30666 trailing_comments: vec![],
30667 double_colon_syntax: false,
30668 format: None,
30669 default: None,
30670 }))
30671 }
30672
30673 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
30674 fn double_cast_datetime_date(expr: Expression) -> Expression {
30675 use crate::expressions::{Cast, DataType};
30676 let inner = Expression::Cast(Box::new(Cast {
30677 this: expr,
30678 to: DataType::Custom {
30679 name: "DATETIME".to_string(),
30680 },
30681 trailing_comments: vec![],
30682 double_colon_syntax: false,
30683 format: None,
30684 default: None,
30685 }));
30686 Expression::Cast(Box::new(Cast {
30687 this: inner,
30688 to: DataType::Date,
30689 trailing_comments: vec![],
30690 double_colon_syntax: false,
30691 format: None,
30692 default: None,
30693 }))
30694 }
30695
30696 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
30697 fn double_cast_datetime2_date(expr: Expression) -> Expression {
30698 use crate::expressions::{Cast, DataType};
30699 let inner = Expression::Cast(Box::new(Cast {
30700 this: expr,
30701 to: DataType::Custom {
30702 name: "DATETIME2".to_string(),
30703 },
30704 trailing_comments: vec![],
30705 double_colon_syntax: false,
30706 format: None,
30707 default: None,
30708 }));
30709 Expression::Cast(Box::new(Cast {
30710 this: inner,
30711 to: DataType::Date,
30712 trailing_comments: vec![],
30713 double_colon_syntax: false,
30714 format: None,
30715 default: None,
30716 }))
30717 }
30718
30719 /// Convert Hive/Java-style date format strings to C-style (strftime) format
30720 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
30721 fn hive_format_to_c_format(fmt: &str) -> String {
30722 let mut result = String::new();
30723 let chars: Vec<char> = fmt.chars().collect();
30724 let mut i = 0;
30725 while i < chars.len() {
30726 match chars[i] {
30727 'y' => {
30728 let mut count = 0;
30729 while i < chars.len() && chars[i] == 'y' {
30730 count += 1;
30731 i += 1;
30732 }
30733 if count >= 4 {
30734 result.push_str("%Y");
30735 } else if count == 2 {
30736 result.push_str("%y");
30737 } else {
30738 result.push_str("%Y");
30739 }
30740 }
30741 'M' => {
30742 let mut count = 0;
30743 while i < chars.len() && chars[i] == 'M' {
30744 count += 1;
30745 i += 1;
30746 }
30747 if count >= 3 {
30748 result.push_str("%b");
30749 } else if count == 2 {
30750 result.push_str("%m");
30751 } else {
30752 result.push_str("%m");
30753 }
30754 }
30755 'd' => {
30756 let mut _count = 0;
30757 while i < chars.len() && chars[i] == 'd' {
30758 _count += 1;
30759 i += 1;
30760 }
30761 result.push_str("%d");
30762 }
30763 'H' => {
30764 let mut _count = 0;
30765 while i < chars.len() && chars[i] == 'H' {
30766 _count += 1;
30767 i += 1;
30768 }
30769 result.push_str("%H");
30770 }
30771 'h' => {
30772 let mut _count = 0;
30773 while i < chars.len() && chars[i] == 'h' {
30774 _count += 1;
30775 i += 1;
30776 }
30777 result.push_str("%I");
30778 }
30779 'm' => {
30780 let mut _count = 0;
30781 while i < chars.len() && chars[i] == 'm' {
30782 _count += 1;
30783 i += 1;
30784 }
30785 result.push_str("%M");
30786 }
30787 's' => {
30788 let mut _count = 0;
30789 while i < chars.len() && chars[i] == 's' {
30790 _count += 1;
30791 i += 1;
30792 }
30793 result.push_str("%S");
30794 }
30795 'S' => {
30796 // Fractional seconds - skip
30797 while i < chars.len() && chars[i] == 'S' {
30798 i += 1;
30799 }
30800 result.push_str("%f");
30801 }
30802 'a' => {
30803 // AM/PM
30804 while i < chars.len() && chars[i] == 'a' {
30805 i += 1;
30806 }
30807 result.push_str("%p");
30808 }
30809 'E' => {
30810 let mut count = 0;
30811 while i < chars.len() && chars[i] == 'E' {
30812 count += 1;
30813 i += 1;
30814 }
30815 if count >= 4 {
30816 result.push_str("%A");
30817 } else {
30818 result.push_str("%a");
30819 }
30820 }
30821 '\'' => {
30822 // Quoted literal text - pass through the quotes and content
30823 result.push('\'');
30824 i += 1;
30825 while i < chars.len() && chars[i] != '\'' {
30826 result.push(chars[i]);
30827 i += 1;
30828 }
30829 if i < chars.len() {
30830 result.push('\'');
30831 i += 1;
30832 }
30833 }
30834 c => {
30835 result.push(c);
30836 i += 1;
30837 }
30838 }
30839 }
30840 result
30841 }
30842
30843 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
30844 fn hive_format_to_presto_format(fmt: &str) -> String {
30845 let c_fmt = Self::hive_format_to_c_format(fmt);
30846 // Presto uses %T for HH:MM:SS
30847 c_fmt.replace("%H:%M:%S", "%T")
30848 }
30849
30850 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
30851 fn ensure_cast_timestamp(expr: Expression) -> Expression {
30852 use crate::expressions::{Cast, DataType, Literal};
30853 match expr {
30854 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30855 this: Expression::Literal(Literal::String(s)),
30856 to: DataType::Timestamp {
30857 timezone: false,
30858 precision: None,
30859 },
30860 trailing_comments: vec![],
30861 double_colon_syntax: false,
30862 format: None,
30863 default: None,
30864 })),
30865 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30866 this: expr,
30867 to: DataType::Timestamp {
30868 timezone: false,
30869 precision: None,
30870 },
30871 trailing_comments: vec![],
30872 double_colon_syntax: false,
30873 format: None,
30874 default: None,
30875 })),
30876 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30877 this: Expression::Literal(Literal::String(s)),
30878 to: DataType::Timestamp {
30879 timezone: false,
30880 precision: None,
30881 },
30882 trailing_comments: vec![],
30883 double_colon_syntax: false,
30884 format: None,
30885 default: None,
30886 })),
30887 other => other,
30888 }
30889 }
30890
30891 /// Force CAST to TIMESTAMP for any expression (not just literals)
30892 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
30893 fn force_cast_timestamp(expr: Expression) -> Expression {
30894 use crate::expressions::{Cast, DataType};
30895 // Don't double-wrap if already a CAST to TIMESTAMP
30896 if let Expression::Cast(ref c) = expr {
30897 if matches!(c.to, DataType::Timestamp { .. }) {
30898 return expr;
30899 }
30900 }
30901 Expression::Cast(Box::new(Cast {
30902 this: expr,
30903 to: DataType::Timestamp {
30904 timezone: false,
30905 precision: None,
30906 },
30907 trailing_comments: vec![],
30908 double_colon_syntax: false,
30909 format: None,
30910 default: None,
30911 }))
30912 }
30913
30914 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
30915 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
30916 use crate::expressions::{Cast, DataType, Literal};
30917 match expr {
30918 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
30919 this: Expression::Literal(Literal::String(s)),
30920 to: DataType::Timestamp {
30921 timezone: true,
30922 precision: None,
30923 },
30924 trailing_comments: vec![],
30925 double_colon_syntax: false,
30926 format: None,
30927 default: None,
30928 })),
30929 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30930 this: expr,
30931 to: DataType::Timestamp {
30932 timezone: true,
30933 precision: None,
30934 },
30935 trailing_comments: vec![],
30936 double_colon_syntax: false,
30937 format: None,
30938 default: None,
30939 })),
30940 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
30941 this: Expression::Literal(Literal::String(s)),
30942 to: DataType::Timestamp {
30943 timezone: true,
30944 precision: None,
30945 },
30946 trailing_comments: vec![],
30947 double_colon_syntax: false,
30948 format: None,
30949 default: None,
30950 })),
30951 other => other,
30952 }
30953 }
30954
30955 /// Ensure expression is CAST to DATETIME (for BigQuery)
30956 fn ensure_cast_datetime(expr: Expression) -> Expression {
30957 use crate::expressions::{Cast, DataType, Literal};
30958 match expr {
30959 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
30960 this: expr,
30961 to: DataType::Custom {
30962 name: "DATETIME".to_string(),
30963 },
30964 trailing_comments: vec![],
30965 double_colon_syntax: false,
30966 format: None,
30967 default: None,
30968 })),
30969 other => other,
30970 }
30971 }
30972
30973 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
30974 fn force_cast_datetime(expr: Expression) -> Expression {
30975 use crate::expressions::{Cast, DataType};
30976 if let Expression::Cast(ref c) = expr {
30977 if let DataType::Custom { ref name } = c.to {
30978 if name.eq_ignore_ascii_case("DATETIME") {
30979 return expr;
30980 }
30981 }
30982 }
30983 Expression::Cast(Box::new(Cast {
30984 this: expr,
30985 to: DataType::Custom {
30986 name: "DATETIME".to_string(),
30987 },
30988 trailing_comments: vec![],
30989 double_colon_syntax: false,
30990 format: None,
30991 default: None,
30992 }))
30993 }
30994
30995 /// Ensure expression is CAST to DATETIME2 (for TSQL)
30996 fn ensure_cast_datetime2(expr: Expression) -> Expression {
30997 use crate::expressions::{Cast, DataType, Literal};
30998 match expr {
30999 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31000 this: expr,
31001 to: DataType::Custom {
31002 name: "DATETIME2".to_string(),
31003 },
31004 trailing_comments: vec![],
31005 double_colon_syntax: false,
31006 format: None,
31007 default: None,
31008 })),
31009 other => other,
31010 }
31011 }
31012
31013 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31014 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31015 use crate::expressions::{Cast, DataType, Literal};
31016 match expr {
31017 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31018 this: Expression::Literal(Literal::String(s)),
31019 to: DataType::Timestamp {
31020 timezone: true,
31021 precision: None,
31022 },
31023 trailing_comments: vec![],
31024 double_colon_syntax: false,
31025 format: None,
31026 default: None,
31027 })),
31028 other => other,
31029 }
31030 }
31031
31032 /// Convert BigQuery format string to Snowflake format string
31033 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31034 use crate::expressions::Literal;
31035 if let Expression::Literal(Literal::String(s)) = format_expr {
31036 let sf = s
31037 .replace("%Y", "yyyy")
31038 .replace("%m", "mm")
31039 .replace("%d", "DD")
31040 .replace("%H", "HH24")
31041 .replace("%M", "MI")
31042 .replace("%S", "SS")
31043 .replace("%b", "mon")
31044 .replace("%B", "Month")
31045 .replace("%e", "FMDD");
31046 Expression::Literal(Literal::String(sf))
31047 } else {
31048 format_expr.clone()
31049 }
31050 }
31051
31052 /// Convert BigQuery format string to DuckDB format string
31053 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31054 use crate::expressions::Literal;
31055 if let Expression::Literal(Literal::String(s)) = format_expr {
31056 let duck = s
31057 .replace("%T", "%H:%M:%S")
31058 .replace("%F", "%Y-%m-%d")
31059 .replace("%D", "%m/%d/%y")
31060 .replace("%x", "%m/%d/%y")
31061 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31062 .replace("%e", "%-d")
31063 .replace("%E6S", "%S.%f");
31064 Expression::Literal(Literal::String(duck))
31065 } else {
31066 format_expr.clone()
31067 }
31068 }
31069
31070 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31071 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31072 use crate::expressions::Literal;
31073 if let Expression::Literal(Literal::String(s)) = format_expr {
31074 // Replace format elements from longest to shortest to avoid partial matches
31075 let result = s
31076 .replace("YYYYMMDD", "%Y%m%d")
31077 .replace("YYYY", "%Y")
31078 .replace("YY", "%y")
31079 .replace("MONTH", "%B")
31080 .replace("MON", "%b")
31081 .replace("MM", "%m")
31082 .replace("DD", "%d")
31083 .replace("HH24", "%H")
31084 .replace("HH12", "%I")
31085 .replace("HH", "%I")
31086 .replace("MI", "%M")
31087 .replace("SSTZH", "%S%z")
31088 .replace("SS", "%S")
31089 .replace("TZH", "%z");
31090 Expression::Literal(Literal::String(result))
31091 } else {
31092 format_expr.clone()
31093 }
31094 }
31095
31096 /// Normalize BigQuery format strings for BQ->BQ output
31097 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31098 use crate::expressions::Literal;
31099 if let Expression::Literal(Literal::String(s)) = format_expr {
31100 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31101 Expression::Literal(Literal::String(norm))
31102 } else {
31103 format_expr.clone()
31104 }
31105 }
31106}
31107
31108#[cfg(test)]
31109mod tests {
31110 use super::*;
31111
31112 #[test]
31113 fn test_dialect_type_from_str() {
31114 assert_eq!(
31115 "postgres".parse::<DialectType>().unwrap(),
31116 DialectType::PostgreSQL
31117 );
31118 assert_eq!(
31119 "postgresql".parse::<DialectType>().unwrap(),
31120 DialectType::PostgreSQL
31121 );
31122 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31123 assert_eq!(
31124 "bigquery".parse::<DialectType>().unwrap(),
31125 DialectType::BigQuery
31126 );
31127 }
31128
31129 #[test]
31130 fn test_basic_transpile() {
31131 let dialect = Dialect::get(DialectType::Generic);
31132 let result = dialect
31133 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31134 .unwrap();
31135 assert_eq!(result.len(), 1);
31136 assert_eq!(result[0], "SELECT 1");
31137 }
31138
31139 #[test]
31140 fn test_function_transformation_mysql() {
31141 // NVL should be transformed to IFNULL in MySQL
31142 let dialect = Dialect::get(DialectType::Generic);
31143 let result = dialect
31144 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31145 .unwrap();
31146 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31147 }
31148
31149 #[test]
31150 fn test_get_path_duckdb() {
31151 // Test: step by step
31152 let snowflake = Dialect::get(DialectType::Snowflake);
31153
31154 // Step 1: Parse and check what Snowflake produces as intermediate
31155 let result_sf_sf = snowflake
31156 .transpile_to(
31157 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31158 DialectType::Snowflake,
31159 )
31160 .unwrap();
31161 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31162
31163 // Step 2: DuckDB target
31164 let result_sf_dk = snowflake
31165 .transpile_to(
31166 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31167 DialectType::DuckDB,
31168 )
31169 .unwrap();
31170 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31171
31172 // Step 3: GET_PATH directly
31173 let result_gp = snowflake
31174 .transpile_to(
31175 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31176 DialectType::DuckDB,
31177 )
31178 .unwrap();
31179 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31180 }
31181
31182 #[test]
31183 fn test_function_transformation_postgres() {
31184 // IFNULL should be transformed to COALESCE in PostgreSQL
31185 let dialect = Dialect::get(DialectType::Generic);
31186 let result = dialect
31187 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31188 .unwrap();
31189 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31190
31191 // NVL should also be transformed to COALESCE
31192 let result = dialect
31193 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31194 .unwrap();
31195 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31196 }
31197
31198 #[test]
31199 fn test_hive_cast_to_trycast() {
31200 // Hive CAST should become TRY_CAST for targets that support it
31201 let hive = Dialect::get(DialectType::Hive);
31202 let result = hive
31203 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31204 .unwrap();
31205 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31206
31207 let result = hive
31208 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31209 .unwrap();
31210 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31211 }
31212
31213 #[test]
31214 fn test_hive_array_identity() {
31215 // Hive ARRAY<DATE> should preserve angle bracket syntax
31216 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31217 let hive = Dialect::get(DialectType::Hive);
31218
31219 // Test via transpile_to (this works)
31220 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31221 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31222 assert!(
31223 result[0].contains("ARRAY<DATE>"),
31224 "transpile_to: Expected ARRAY<DATE>, got: {}",
31225 result[0]
31226 );
31227
31228 // Test via parse -> transform -> generate (identity test path)
31229 let ast = hive.parse(sql).unwrap();
31230 let transformed = hive.transform(ast[0].clone()).unwrap();
31231 let output = hive.generate(&transformed).unwrap();
31232 eprintln!("Hive ARRAY via identity path: {}", output);
31233 assert!(
31234 output.contains("ARRAY<DATE>"),
31235 "identity path: Expected ARRAY<DATE>, got: {}",
31236 output
31237 );
31238 }
31239
31240 #[test]
31241 fn test_starrocks_delete_between_expansion() {
31242 // StarRocks doesn't support BETWEEN in DELETE statements
31243 let dialect = Dialect::get(DialectType::Generic);
31244
31245 // BETWEEN should be expanded to >= AND <= in DELETE
31246 let result = dialect
31247 .transpile_to(
31248 "DELETE FROM t WHERE a BETWEEN b AND c",
31249 DialectType::StarRocks,
31250 )
31251 .unwrap();
31252 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31253
31254 // NOT BETWEEN should be expanded to < OR > in DELETE
31255 let result = dialect
31256 .transpile_to(
31257 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31258 DialectType::StarRocks,
31259 )
31260 .unwrap();
31261 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31262
31263 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31264 let result = dialect
31265 .transpile_to(
31266 "SELECT * FROM t WHERE a BETWEEN b AND c",
31267 DialectType::StarRocks,
31268 )
31269 .unwrap();
31270 assert!(
31271 result[0].contains("BETWEEN"),
31272 "BETWEEN should be preserved in SELECT"
31273 );
31274 }
31275
31276 #[test]
31277 fn test_snowflake_ltrim_rtrim_parse() {
31278 let sf = Dialect::get(DialectType::Snowflake);
31279 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31280 let result = sf.transpile_to(sql, DialectType::DuckDB);
31281 match &result {
31282 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31283 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31284 }
31285 assert!(
31286 result.is_ok(),
31287 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31288 result.err()
31289 );
31290 }
31291
31292 #[test]
31293 fn test_duckdb_count_if_parse() {
31294 let duck = Dialect::get(DialectType::DuckDB);
31295 let sql = "COUNT_IF(x)";
31296 let result = duck.transpile_to(sql, DialectType::DuckDB);
31297 match &result {
31298 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31299 Err(e) => eprintln!("COUNT_IF error: {}", e),
31300 }
31301 assert!(
31302 result.is_ok(),
31303 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31304 result.err()
31305 );
31306 }
31307
31308 #[test]
31309 fn test_tsql_cast_tinyint_parse() {
31310 let tsql = Dialect::get(DialectType::TSQL);
31311 let sql = "CAST(X AS TINYINT)";
31312 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31313 match &result {
31314 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31315 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31316 }
31317 assert!(
31318 result.is_ok(),
31319 "Expected successful transpile, got error: {:?}",
31320 result.err()
31321 );
31322 }
31323
31324 #[test]
31325 fn test_pg_hash_bitwise_xor() {
31326 let dialect = Dialect::get(DialectType::PostgreSQL);
31327 let result = dialect
31328 .transpile_to("x # y", DialectType::PostgreSQL)
31329 .unwrap();
31330 assert_eq!(result[0], "x # y");
31331 }
31332
31333 #[test]
31334 fn test_pg_array_to_duckdb() {
31335 let dialect = Dialect::get(DialectType::PostgreSQL);
31336 let result = dialect
31337 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31338 .unwrap();
31339 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31340 }
31341
31342 #[test]
31343 fn test_array_remove_bigquery() {
31344 let dialect = Dialect::get(DialectType::Generic);
31345 let result = dialect
31346 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31347 .unwrap();
31348 assert_eq!(
31349 result[0],
31350 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31351 );
31352 }
31353
31354 #[test]
31355 fn test_map_clickhouse_case() {
31356 let dialect = Dialect::get(DialectType::Generic);
31357 let parsed = dialect
31358 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31359 .unwrap();
31360 eprintln!("MAP parsed: {:?}", parsed);
31361 let result = dialect
31362 .transpile_to(
31363 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31364 DialectType::ClickHouse,
31365 )
31366 .unwrap();
31367 eprintln!("MAP result: {}", result[0]);
31368 }
31369
31370 #[test]
31371 fn test_generate_date_array_presto() {
31372 let dialect = Dialect::get(DialectType::Generic);
31373 let result = dialect.transpile_to(
31374 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31375 DialectType::Presto,
31376 ).unwrap();
31377 eprintln!("GDA -> Presto: {}", result[0]);
31378 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31379 }
31380
31381 #[test]
31382 fn test_generate_date_array_postgres() {
31383 let dialect = Dialect::get(DialectType::Generic);
31384 let result = dialect.transpile_to(
31385 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31386 DialectType::PostgreSQL,
31387 ).unwrap();
31388 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31389 }
31390
31391 #[test]
31392 fn test_generate_date_array_snowflake() {
31393 let dialect = Dialect::get(DialectType::Generic);
31394 let result = dialect.transpile_to(
31395 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31396 DialectType::Snowflake,
31397 ).unwrap();
31398 eprintln!("GDA -> Snowflake: {}", result[0]);
31399 }
31400
31401 #[test]
31402 fn test_array_length_generate_date_array_snowflake() {
31403 let dialect = Dialect::get(DialectType::Generic);
31404 let result = dialect.transpile_to(
31405 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31406 DialectType::Snowflake,
31407 ).unwrap();
31408 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31409 }
31410
31411 #[test]
31412 fn test_generate_date_array_mysql() {
31413 let dialect = Dialect::get(DialectType::Generic);
31414 let result = dialect.transpile_to(
31415 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31416 DialectType::MySQL,
31417 ).unwrap();
31418 eprintln!("GDA -> MySQL: {}", result[0]);
31419 }
31420
31421 #[test]
31422 fn test_generate_date_array_redshift() {
31423 let dialect = Dialect::get(DialectType::Generic);
31424 let result = dialect.transpile_to(
31425 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31426 DialectType::Redshift,
31427 ).unwrap();
31428 eprintln!("GDA -> Redshift: {}", result[0]);
31429 }
31430
31431 #[test]
31432 fn test_generate_date_array_tsql() {
31433 let dialect = Dialect::get(DialectType::Generic);
31434 let result = dialect.transpile_to(
31435 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31436 DialectType::TSQL,
31437 ).unwrap();
31438 eprintln!("GDA -> TSQL: {}", result[0]);
31439 }
31440
31441 #[test]
31442 fn test_struct_colon_syntax() {
31443 let dialect = Dialect::get(DialectType::Generic);
31444 // Test without colon first
31445 let result = dialect.transpile_to(
31446 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
31447 DialectType::ClickHouse,
31448 );
31449 match result {
31450 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
31451 Err(e) => eprintln!("STRUCT no colon error: {}", e),
31452 }
31453 // Now test with colon
31454 let result = dialect.transpile_to(
31455 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
31456 DialectType::ClickHouse,
31457 );
31458 match result {
31459 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
31460 Err(e) => eprintln!("STRUCT colon error: {}", e),
31461 }
31462 }
31463
31464 #[test]
31465 fn test_generate_date_array_cte_wrapped_mysql() {
31466 let dialect = Dialect::get(DialectType::Generic);
31467 let result = dialect.transpile_to(
31468 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31469 DialectType::MySQL,
31470 ).unwrap();
31471 eprintln!("GDA CTE -> MySQL: {}", result[0]);
31472 }
31473
31474 #[test]
31475 fn test_generate_date_array_cte_wrapped_tsql() {
31476 let dialect = Dialect::get(DialectType::Generic);
31477 let result = dialect.transpile_to(
31478 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
31479 DialectType::TSQL,
31480 ).unwrap();
31481 eprintln!("GDA CTE -> TSQL: {}", result[0]);
31482 }
31483
31484 #[test]
31485 fn test_decode_literal_no_null_check() {
31486 // Oracle DECODE with all literals should produce simple equality, no IS NULL
31487 let dialect = Dialect::get(DialectType::Oracle);
31488 let result = dialect
31489 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
31490 .unwrap();
31491 assert_eq!(
31492 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
31493 "Literal DECODE should not have IS NULL checks"
31494 );
31495 }
31496
31497 #[test]
31498 fn test_decode_column_vs_literal_no_null_check() {
31499 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
31500 let dialect = Dialect::get(DialectType::Oracle);
31501 let result = dialect
31502 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
31503 .unwrap();
31504 assert_eq!(
31505 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
31506 "Column vs literal DECODE should not have IS NULL checks"
31507 );
31508 }
31509
31510 #[test]
31511 fn test_decode_column_vs_column_keeps_null_check() {
31512 // Oracle DECODE with column vs column should keep null-safe comparison
31513 let dialect = Dialect::get(DialectType::Oracle);
31514 let result = dialect
31515 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
31516 .unwrap();
31517 assert!(
31518 result[0].contains("IS NULL"),
31519 "Column vs column DECODE should have IS NULL checks, got: {}",
31520 result[0]
31521 );
31522 }
31523
31524 #[test]
31525 fn test_decode_null_search() {
31526 // Oracle DECODE with NULL search should use IS NULL
31527 let dialect = Dialect::get(DialectType::Oracle);
31528 let result = dialect
31529 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
31530 .unwrap();
31531 assert_eq!(
31532 result[0],
31533 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
31534 );
31535 }
31536}