polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // First recursively transform children, then apply the transform function
563 let expr = match expr {
564 Expression::Select(mut select) => {
565 select.expressions = select
566 .expressions
567 .into_iter()
568 .map(|e| transform_recursive(e, transform_fn))
569 .collect::<Result<Vec<_>>>()?;
570
571 // Transform FROM clause
572 if let Some(mut from) = select.from.take() {
573 from.expressions = from
574 .expressions
575 .into_iter()
576 .map(|e| transform_recursive(e, transform_fn))
577 .collect::<Result<Vec<_>>>()?;
578 select.from = Some(from);
579 }
580
581 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
582 select.joins = select
583 .joins
584 .into_iter()
585 .map(|mut join| {
586 join.this = transform_recursive(join.this, transform_fn)?;
587 if let Some(on) = join.on.take() {
588 join.on = Some(transform_recursive(on, transform_fn)?);
589 }
590 // Wrap join in Expression::Join to allow transform_fn to transform it
591 match transform_fn(Expression::Join(Box::new(join)))? {
592 Expression::Join(j) => Ok(*j),
593 _ => Err(crate::error::Error::parse(
594 "Join transformation returned non-join expression",
595 0,
596 0,
597 0,
598 0,
599 )),
600 }
601 })
602 .collect::<Result<Vec<_>>>()?;
603
604 // Transform LATERAL VIEW expressions (Hive/Spark)
605 select.lateral_views = select
606 .lateral_views
607 .into_iter()
608 .map(|mut lv| {
609 lv.this = transform_recursive(lv.this, transform_fn)?;
610 Ok(lv)
611 })
612 .collect::<Result<Vec<_>>>()?;
613
614 // Transform WHERE clause
615 if let Some(mut where_clause) = select.where_clause.take() {
616 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
617 select.where_clause = Some(where_clause);
618 }
619
620 // Transform GROUP BY
621 if let Some(mut group_by) = select.group_by.take() {
622 group_by.expressions = group_by
623 .expressions
624 .into_iter()
625 .map(|e| transform_recursive(e, transform_fn))
626 .collect::<Result<Vec<_>>>()?;
627 select.group_by = Some(group_by);
628 }
629
630 // Transform HAVING
631 if let Some(mut having) = select.having.take() {
632 having.this = transform_recursive(having.this, transform_fn)?;
633 select.having = Some(having);
634 }
635
636 // Transform WITH (CTEs)
637 if let Some(mut with) = select.with.take() {
638 with.ctes = with
639 .ctes
640 .into_iter()
641 .map(|mut cte| {
642 let original = cte.this.clone();
643 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
644 cte
645 })
646 .collect();
647 select.with = Some(with);
648 }
649
650 // Transform ORDER BY
651 if let Some(mut order) = select.order_by.take() {
652 order.expressions = order
653 .expressions
654 .into_iter()
655 .map(|o| {
656 let mut o = o;
657 let original = o.this.clone();
658 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
659 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
660 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
661 Ok(Expression::Ordered(transformed)) => *transformed,
662 Ok(_) | Err(_) => o,
663 }
664 })
665 .collect();
666 select.order_by = Some(order);
667 }
668
669 // Transform WINDOW clause order_by
670 if let Some(ref mut windows) = select.windows {
671 for nw in windows.iter_mut() {
672 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
673 .into_iter()
674 .map(|o| {
675 let mut o = o;
676 let original = o.this.clone();
677 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
678 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
679 Ok(Expression::Ordered(transformed)) => *transformed,
680 Ok(_) | Err(_) => o,
681 }
682 })
683 .collect();
684 }
685 }
686
687 // Transform QUALIFY
688 if let Some(mut qual) = select.qualify.take() {
689 qual.this = transform_recursive(qual.this, transform_fn)?;
690 select.qualify = Some(qual);
691 }
692
693 Expression::Select(select)
694 }
695 Expression::Function(mut f) => {
696 f.args = f
697 .args
698 .into_iter()
699 .map(|e| transform_recursive(e, transform_fn))
700 .collect::<Result<Vec<_>>>()?;
701 Expression::Function(f)
702 }
703 Expression::AggregateFunction(mut f) => {
704 f.args = f
705 .args
706 .into_iter()
707 .map(|e| transform_recursive(e, transform_fn))
708 .collect::<Result<Vec<_>>>()?;
709 if let Some(filter) = f.filter {
710 f.filter = Some(transform_recursive(filter, transform_fn)?);
711 }
712 Expression::AggregateFunction(f)
713 }
714 Expression::WindowFunction(mut wf) => {
715 wf.this = transform_recursive(wf.this, transform_fn)?;
716 wf.over.partition_by = wf
717 .over
718 .partition_by
719 .into_iter()
720 .map(|e| transform_recursive(e, transform_fn))
721 .collect::<Result<Vec<_>>>()?;
722 // Transform order_by items through Expression::Ordered wrapper
723 wf.over.order_by = wf
724 .over
725 .order_by
726 .into_iter()
727 .map(|o| {
728 let mut o = o;
729 o.this = transform_recursive(o.this, transform_fn)?;
730 match transform_fn(Expression::Ordered(Box::new(o)))? {
731 Expression::Ordered(transformed) => Ok(*transformed),
732 _ => Err(crate::error::Error::parse(
733 "Ordered transformation returned non-Ordered expression",
734 0,
735 0,
736 0,
737 0,
738 )),
739 }
740 })
741 .collect::<Result<Vec<_>>>()?;
742 Expression::WindowFunction(wf)
743 }
744 Expression::Alias(mut a) => {
745 a.this = transform_recursive(a.this, transform_fn)?;
746 Expression::Alias(a)
747 }
748 Expression::Cast(mut c) => {
749 c.this = transform_recursive(c.this, transform_fn)?;
750 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
751 c.to = transform_data_type_recursive(c.to, transform_fn)?;
752 Expression::Cast(c)
753 }
754 Expression::And(op) => transform_binary!(And, *op),
755 Expression::Or(op) => transform_binary!(Or, *op),
756 Expression::Add(op) => transform_binary!(Add, *op),
757 Expression::Sub(op) => transform_binary!(Sub, *op),
758 Expression::Mul(op) => transform_binary!(Mul, *op),
759 Expression::Div(op) => transform_binary!(Div, *op),
760 Expression::Eq(op) => transform_binary!(Eq, *op),
761 Expression::Lt(op) => transform_binary!(Lt, *op),
762 Expression::Gt(op) => transform_binary!(Gt, *op),
763 Expression::Paren(mut p) => {
764 p.this = transform_recursive(p.this, transform_fn)?;
765 Expression::Paren(p)
766 }
767 Expression::Coalesce(mut f) => {
768 f.expressions = f
769 .expressions
770 .into_iter()
771 .map(|e| transform_recursive(e, transform_fn))
772 .collect::<Result<Vec<_>>>()?;
773 Expression::Coalesce(f)
774 }
775 Expression::IfNull(mut f) => {
776 f.this = transform_recursive(f.this, transform_fn)?;
777 f.expression = transform_recursive(f.expression, transform_fn)?;
778 Expression::IfNull(f)
779 }
780 Expression::Nvl(mut f) => {
781 f.this = transform_recursive(f.this, transform_fn)?;
782 f.expression = transform_recursive(f.expression, transform_fn)?;
783 Expression::Nvl(f)
784 }
785 Expression::In(mut i) => {
786 i.this = transform_recursive(i.this, transform_fn)?;
787 i.expressions = i
788 .expressions
789 .into_iter()
790 .map(|e| transform_recursive(e, transform_fn))
791 .collect::<Result<Vec<_>>>()?;
792 if let Some(query) = i.query {
793 i.query = Some(transform_recursive(query, transform_fn)?);
794 }
795 Expression::In(i)
796 }
797 Expression::Not(mut n) => {
798 n.this = transform_recursive(n.this, transform_fn)?;
799 Expression::Not(n)
800 }
801 Expression::ArraySlice(mut s) => {
802 s.this = transform_recursive(s.this, transform_fn)?;
803 if let Some(start) = s.start {
804 s.start = Some(transform_recursive(start, transform_fn)?);
805 }
806 if let Some(end) = s.end {
807 s.end = Some(transform_recursive(end, transform_fn)?);
808 }
809 Expression::ArraySlice(s)
810 }
811 Expression::Subscript(mut s) => {
812 s.this = transform_recursive(s.this, transform_fn)?;
813 s.index = transform_recursive(s.index, transform_fn)?;
814 Expression::Subscript(s)
815 }
816 Expression::Array(mut a) => {
817 a.expressions = a
818 .expressions
819 .into_iter()
820 .map(|e| transform_recursive(e, transform_fn))
821 .collect::<Result<Vec<_>>>()?;
822 Expression::Array(a)
823 }
824 Expression::Struct(mut s) => {
825 let mut new_fields = Vec::new();
826 for (name, expr) in s.fields {
827 let transformed = transform_recursive(expr, transform_fn)?;
828 new_fields.push((name, transformed));
829 }
830 s.fields = new_fields;
831 Expression::Struct(s)
832 }
833 Expression::NamedArgument(mut na) => {
834 na.value = transform_recursive(na.value, transform_fn)?;
835 Expression::NamedArgument(na)
836 }
837 Expression::MapFunc(mut m) => {
838 m.keys = m
839 .keys
840 .into_iter()
841 .map(|e| transform_recursive(e, transform_fn))
842 .collect::<Result<Vec<_>>>()?;
843 m.values = m
844 .values
845 .into_iter()
846 .map(|e| transform_recursive(e, transform_fn))
847 .collect::<Result<Vec<_>>>()?;
848 Expression::MapFunc(m)
849 }
850 Expression::ArrayFunc(mut a) => {
851 a.expressions = a
852 .expressions
853 .into_iter()
854 .map(|e| transform_recursive(e, transform_fn))
855 .collect::<Result<Vec<_>>>()?;
856 Expression::ArrayFunc(a)
857 }
858 Expression::Lambda(mut l) => {
859 l.body = transform_recursive(l.body, transform_fn)?;
860 Expression::Lambda(l)
861 }
862 Expression::JsonExtract(mut f) => {
863 f.this = transform_recursive(f.this, transform_fn)?;
864 f.path = transform_recursive(f.path, transform_fn)?;
865 Expression::JsonExtract(f)
866 }
867 Expression::JsonExtractScalar(mut f) => {
868 f.this = transform_recursive(f.this, transform_fn)?;
869 f.path = transform_recursive(f.path, transform_fn)?;
870 Expression::JsonExtractScalar(f)
871 }
872
873 // ===== UnaryFunc-based expressions =====
874 // These all have a single `this: Expression` child
875 Expression::Length(mut f) => {
876 f.this = transform_recursive(f.this, transform_fn)?;
877 Expression::Length(f)
878 }
879 Expression::Upper(mut f) => {
880 f.this = transform_recursive(f.this, transform_fn)?;
881 Expression::Upper(f)
882 }
883 Expression::Lower(mut f) => {
884 f.this = transform_recursive(f.this, transform_fn)?;
885 Expression::Lower(f)
886 }
887 Expression::LTrim(mut f) => {
888 f.this = transform_recursive(f.this, transform_fn)?;
889 Expression::LTrim(f)
890 }
891 Expression::RTrim(mut f) => {
892 f.this = transform_recursive(f.this, transform_fn)?;
893 Expression::RTrim(f)
894 }
895 Expression::Reverse(mut f) => {
896 f.this = transform_recursive(f.this, transform_fn)?;
897 Expression::Reverse(f)
898 }
899 Expression::Abs(mut f) => {
900 f.this = transform_recursive(f.this, transform_fn)?;
901 Expression::Abs(f)
902 }
903 Expression::Ceil(mut f) => {
904 f.this = transform_recursive(f.this, transform_fn)?;
905 Expression::Ceil(f)
906 }
907 Expression::Floor(mut f) => {
908 f.this = transform_recursive(f.this, transform_fn)?;
909 Expression::Floor(f)
910 }
911 Expression::Sign(mut f) => {
912 f.this = transform_recursive(f.this, transform_fn)?;
913 Expression::Sign(f)
914 }
915 Expression::Sqrt(mut f) => {
916 f.this = transform_recursive(f.this, transform_fn)?;
917 Expression::Sqrt(f)
918 }
919 Expression::Cbrt(mut f) => {
920 f.this = transform_recursive(f.this, transform_fn)?;
921 Expression::Cbrt(f)
922 }
923 Expression::Ln(mut f) => {
924 f.this = transform_recursive(f.this, transform_fn)?;
925 Expression::Ln(f)
926 }
927 Expression::Log(mut f) => {
928 f.this = transform_recursive(f.this, transform_fn)?;
929 if let Some(base) = f.base {
930 f.base = Some(transform_recursive(base, transform_fn)?);
931 }
932 Expression::Log(f)
933 }
934 Expression::Exp(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Exp(f)
937 }
938 Expression::Date(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Date(f)
941 }
942 Expression::Stddev(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 Expression::Stddev(f)
945 }
946 Expression::Variance(mut f) => {
947 f.this = transform_recursive(f.this, transform_fn)?;
948 Expression::Variance(f)
949 }
950
951 // ===== BinaryFunc-based expressions =====
952 Expression::ModFunc(mut f) => {
953 f.this = transform_recursive(f.this, transform_fn)?;
954 f.expression = transform_recursive(f.expression, transform_fn)?;
955 Expression::ModFunc(f)
956 }
957 Expression::Power(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 f.expression = transform_recursive(f.expression, transform_fn)?;
960 Expression::Power(f)
961 }
962 Expression::MapFromArrays(mut f) => {
963 f.this = transform_recursive(f.this, transform_fn)?;
964 f.expression = transform_recursive(f.expression, transform_fn)?;
965 Expression::MapFromArrays(f)
966 }
967 Expression::ElementAt(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ElementAt(f)
971 }
972 Expression::MapContainsKey(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::MapContainsKey(f)
976 }
977 Expression::Left(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.length = transform_recursive(f.length, transform_fn)?;
980 Expression::Left(f)
981 }
982 Expression::Right(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.length = transform_recursive(f.length, transform_fn)?;
985 Expression::Right(f)
986 }
987 Expression::Repeat(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.times = transform_recursive(f.times, transform_fn)?;
990 Expression::Repeat(f)
991 }
992
993 // ===== Complex function expressions =====
994 Expression::Substring(mut f) => {
995 f.this = transform_recursive(f.this, transform_fn)?;
996 f.start = transform_recursive(f.start, transform_fn)?;
997 if let Some(len) = f.length {
998 f.length = Some(transform_recursive(len, transform_fn)?);
999 }
1000 Expression::Substring(f)
1001 }
1002 Expression::Replace(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.old = transform_recursive(f.old, transform_fn)?;
1005 f.new = transform_recursive(f.new, transform_fn)?;
1006 Expression::Replace(f)
1007 }
1008 Expression::ConcatWs(mut f) => {
1009 f.separator = transform_recursive(f.separator, transform_fn)?;
1010 f.expressions = f
1011 .expressions
1012 .into_iter()
1013 .map(|e| transform_recursive(e, transform_fn))
1014 .collect::<Result<Vec<_>>>()?;
1015 Expression::ConcatWs(f)
1016 }
1017 Expression::Trim(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 if let Some(chars) = f.characters {
1020 f.characters = Some(transform_recursive(chars, transform_fn)?);
1021 }
1022 Expression::Trim(f)
1023 }
1024 Expression::Split(mut f) => {
1025 f.this = transform_recursive(f.this, transform_fn)?;
1026 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1027 Expression::Split(f)
1028 }
1029 Expression::Lpad(mut f) => {
1030 f.this = transform_recursive(f.this, transform_fn)?;
1031 f.length = transform_recursive(f.length, transform_fn)?;
1032 if let Some(fill) = f.fill {
1033 f.fill = Some(transform_recursive(fill, transform_fn)?);
1034 }
1035 Expression::Lpad(f)
1036 }
1037 Expression::Rpad(mut f) => {
1038 f.this = transform_recursive(f.this, transform_fn)?;
1039 f.length = transform_recursive(f.length, transform_fn)?;
1040 if let Some(fill) = f.fill {
1041 f.fill = Some(transform_recursive(fill, transform_fn)?);
1042 }
1043 Expression::Rpad(f)
1044 }
1045
1046 // ===== Conditional expressions =====
1047 Expression::Case(mut c) => {
1048 if let Some(operand) = c.operand {
1049 c.operand = Some(transform_recursive(operand, transform_fn)?);
1050 }
1051 c.whens = c
1052 .whens
1053 .into_iter()
1054 .map(|(cond, then)| {
1055 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1056 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1057 (new_cond, new_then)
1058 })
1059 .collect();
1060 if let Some(else_expr) = c.else_ {
1061 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1062 }
1063 Expression::Case(c)
1064 }
1065 Expression::IfFunc(mut f) => {
1066 f.condition = transform_recursive(f.condition, transform_fn)?;
1067 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1068 if let Some(false_val) = f.false_value {
1069 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1070 }
1071 Expression::IfFunc(f)
1072 }
1073
1074 // ===== Date/Time expressions =====
1075 Expression::DateAdd(mut f) => {
1076 f.this = transform_recursive(f.this, transform_fn)?;
1077 f.interval = transform_recursive(f.interval, transform_fn)?;
1078 Expression::DateAdd(f)
1079 }
1080 Expression::DateSub(mut f) => {
1081 f.this = transform_recursive(f.this, transform_fn)?;
1082 f.interval = transform_recursive(f.interval, transform_fn)?;
1083 Expression::DateSub(f)
1084 }
1085 Expression::DateDiff(mut f) => {
1086 f.this = transform_recursive(f.this, transform_fn)?;
1087 f.expression = transform_recursive(f.expression, transform_fn)?;
1088 Expression::DateDiff(f)
1089 }
1090 Expression::DateTrunc(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 Expression::DateTrunc(f)
1093 }
1094 Expression::Extract(mut f) => {
1095 f.this = transform_recursive(f.this, transform_fn)?;
1096 Expression::Extract(f)
1097 }
1098
1099 // ===== JSON expressions =====
1100 Expression::JsonObject(mut f) => {
1101 f.pairs = f
1102 .pairs
1103 .into_iter()
1104 .map(|(k, v)| {
1105 let new_k = transform_recursive(k, transform_fn)?;
1106 let new_v = transform_recursive(v, transform_fn)?;
1107 Ok((new_k, new_v))
1108 })
1109 .collect::<Result<Vec<_>>>()?;
1110 Expression::JsonObject(f)
1111 }
1112
1113 // ===== Subquery expressions =====
1114 Expression::Subquery(mut s) => {
1115 s.this = transform_recursive(s.this, transform_fn)?;
1116 Expression::Subquery(s)
1117 }
1118 Expression::Exists(mut e) => {
1119 e.this = transform_recursive(e.this, transform_fn)?;
1120 Expression::Exists(e)
1121 }
1122
1123 // ===== Set operations =====
1124 Expression::Union(mut u) => {
1125 u.left = transform_recursive(u.left, transform_fn)?;
1126 u.right = transform_recursive(u.right, transform_fn)?;
1127 Expression::Union(u)
1128 }
1129 Expression::Intersect(mut i) => {
1130 i.left = transform_recursive(i.left, transform_fn)?;
1131 i.right = transform_recursive(i.right, transform_fn)?;
1132 Expression::Intersect(i)
1133 }
1134 Expression::Except(mut e) => {
1135 e.left = transform_recursive(e.left, transform_fn)?;
1136 e.right = transform_recursive(e.right, transform_fn)?;
1137 Expression::Except(e)
1138 }
1139
1140 // ===== DML expressions =====
1141 Expression::Insert(mut ins) => {
1142 // Transform VALUES clause expressions
1143 let mut new_values = Vec::new();
1144 for row in ins.values {
1145 let mut new_row = Vec::new();
1146 for e in row {
1147 new_row.push(transform_recursive(e, transform_fn)?);
1148 }
1149 new_values.push(new_row);
1150 }
1151 ins.values = new_values;
1152
1153 // Transform query (for INSERT ... SELECT)
1154 if let Some(query) = ins.query {
1155 ins.query = Some(transform_recursive(query, transform_fn)?);
1156 }
1157
1158 // Transform RETURNING clause
1159 let mut new_returning = Vec::new();
1160 for e in ins.returning {
1161 new_returning.push(transform_recursive(e, transform_fn)?);
1162 }
1163 ins.returning = new_returning;
1164
1165 // Transform ON CONFLICT clause
1166 if let Some(on_conflict) = ins.on_conflict {
1167 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1168 }
1169
1170 Expression::Insert(ins)
1171 }
1172 Expression::Update(mut upd) => {
1173 upd.set = upd
1174 .set
1175 .into_iter()
1176 .map(|(id, val)| {
1177 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1178 (id, new_val)
1179 })
1180 .collect();
1181 if let Some(mut where_clause) = upd.where_clause.take() {
1182 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1183 upd.where_clause = Some(where_clause);
1184 }
1185 Expression::Update(upd)
1186 }
1187 Expression::Delete(mut del) => {
1188 if let Some(mut where_clause) = del.where_clause.take() {
1189 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1190 del.where_clause = Some(where_clause);
1191 }
1192 Expression::Delete(del)
1193 }
1194
1195 // ===== CTE expressions =====
1196 Expression::With(mut w) => {
1197 w.ctes = w
1198 .ctes
1199 .into_iter()
1200 .map(|mut cte| {
1201 let original = cte.this.clone();
1202 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1203 cte
1204 })
1205 .collect();
1206 Expression::With(w)
1207 }
1208 Expression::Cte(mut c) => {
1209 c.this = transform_recursive(c.this, transform_fn)?;
1210 Expression::Cte(c)
1211 }
1212
1213 // ===== Order expressions =====
1214 Expression::Ordered(mut o) => {
1215 o.this = transform_recursive(o.this, transform_fn)?;
1216 Expression::Ordered(o)
1217 }
1218
1219 // ===== Negation =====
1220 Expression::Neg(mut n) => {
1221 n.this = transform_recursive(n.this, transform_fn)?;
1222 Expression::Neg(n)
1223 }
1224
1225 // ===== Between =====
1226 Expression::Between(mut b) => {
1227 b.this = transform_recursive(b.this, transform_fn)?;
1228 b.low = transform_recursive(b.low, transform_fn)?;
1229 b.high = transform_recursive(b.high, transform_fn)?;
1230 Expression::Between(b)
1231 }
1232 Expression::IsNull(mut i) => {
1233 i.this = transform_recursive(i.this, transform_fn)?;
1234 Expression::IsNull(i)
1235 }
1236 Expression::IsTrue(mut i) => {
1237 i.this = transform_recursive(i.this, transform_fn)?;
1238 Expression::IsTrue(i)
1239 }
1240 Expression::IsFalse(mut i) => {
1241 i.this = transform_recursive(i.this, transform_fn)?;
1242 Expression::IsFalse(i)
1243 }
1244
1245 // ===== Like expressions =====
1246 Expression::Like(mut l) => {
1247 l.left = transform_recursive(l.left, transform_fn)?;
1248 l.right = transform_recursive(l.right, transform_fn)?;
1249 Expression::Like(l)
1250 }
1251 Expression::ILike(mut l) => {
1252 l.left = transform_recursive(l.left, transform_fn)?;
1253 l.right = transform_recursive(l.right, transform_fn)?;
1254 Expression::ILike(l)
1255 }
1256
1257 // ===== Additional binary ops not covered by macro =====
1258 Expression::Neq(op) => transform_binary!(Neq, *op),
1259 Expression::Lte(op) => transform_binary!(Lte, *op),
1260 Expression::Gte(op) => transform_binary!(Gte, *op),
1261 Expression::Mod(op) => transform_binary!(Mod, *op),
1262 Expression::Concat(op) => transform_binary!(Concat, *op),
1263 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1264 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1265 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1266 Expression::Is(op) => transform_binary!(Is, *op),
1267
1268 // ===== TryCast / SafeCast =====
1269 Expression::TryCast(mut c) => {
1270 c.this = transform_recursive(c.this, transform_fn)?;
1271 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1272 Expression::TryCast(c)
1273 }
1274 Expression::SafeCast(mut c) => {
1275 c.this = transform_recursive(c.this, transform_fn)?;
1276 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1277 Expression::SafeCast(c)
1278 }
1279
1280 // ===== Misc =====
1281 Expression::Unnest(mut f) => {
1282 f.this = transform_recursive(f.this, transform_fn)?;
1283 f.expressions = f
1284 .expressions
1285 .into_iter()
1286 .map(|e| transform_recursive(e, transform_fn))
1287 .collect::<Result<Vec<_>>>()?;
1288 Expression::Unnest(f)
1289 }
1290 Expression::Explode(mut f) => {
1291 f.this = transform_recursive(f.this, transform_fn)?;
1292 Expression::Explode(f)
1293 }
1294 Expression::GroupConcat(mut f) => {
1295 f.this = transform_recursive(f.this, transform_fn)?;
1296 Expression::GroupConcat(f)
1297 }
1298 Expression::StringAgg(mut f) => {
1299 f.this = transform_recursive(f.this, transform_fn)?;
1300 Expression::StringAgg(f)
1301 }
1302 Expression::ListAgg(mut f) => {
1303 f.this = transform_recursive(f.this, transform_fn)?;
1304 Expression::ListAgg(f)
1305 }
1306 Expression::ArrayAgg(mut f) => {
1307 f.this = transform_recursive(f.this, transform_fn)?;
1308 Expression::ArrayAgg(f)
1309 }
1310 Expression::ParseJson(mut f) => {
1311 f.this = transform_recursive(f.this, transform_fn)?;
1312 Expression::ParseJson(f)
1313 }
1314 Expression::ToJson(mut f) => {
1315 f.this = transform_recursive(f.this, transform_fn)?;
1316 Expression::ToJson(f)
1317 }
1318 Expression::JSONExtract(mut e) => {
1319 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1320 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1321 Expression::JSONExtract(e)
1322 }
1323 Expression::JSONExtractScalar(mut e) => {
1324 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1325 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1326 Expression::JSONExtractScalar(e)
1327 }
1328
1329 // StrToTime: recurse into this
1330 Expression::StrToTime(mut e) => {
1331 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1332 Expression::StrToTime(e)
1333 }
1334
1335 // UnixToTime: recurse into this
1336 Expression::UnixToTime(mut e) => {
1337 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1338 Expression::UnixToTime(e)
1339 }
1340
1341 // CreateTable: recurse into column defaults, on_update expressions, and data types
1342 Expression::CreateTable(mut ct) => {
1343 for col in &mut ct.columns {
1344 if let Some(default_expr) = col.default.take() {
1345 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1346 }
1347 if let Some(on_update_expr) = col.on_update.take() {
1348 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1349 }
1350 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1351 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1352 // contexts and may not produce correct results for DDL column definitions.
1353 // The DDL type mappings would need dedicated handling per source/target pair.
1354 }
1355 if let Some(as_select) = ct.as_select.take() {
1356 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1357 }
1358 Expression::CreateTable(ct)
1359 }
1360
1361 // CreateProcedure: recurse into body expressions
1362 Expression::CreateProcedure(mut cp) => {
1363 if let Some(body) = cp.body.take() {
1364 cp.body = Some(match body {
1365 FunctionBody::Expression(expr) => {
1366 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1367 }
1368 FunctionBody::Return(expr) => {
1369 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1370 }
1371 FunctionBody::Statements(stmts) => {
1372 let transformed_stmts = stmts
1373 .into_iter()
1374 .map(|s| transform_recursive(s, transform_fn))
1375 .collect::<Result<Vec<_>>>()?;
1376 FunctionBody::Statements(transformed_stmts)
1377 }
1378 other => other,
1379 });
1380 }
1381 Expression::CreateProcedure(cp)
1382 }
1383
1384 // CreateFunction: recurse into body expressions
1385 Expression::CreateFunction(mut cf) => {
1386 if let Some(body) = cf.body.take() {
1387 cf.body = Some(match body {
1388 FunctionBody::Expression(expr) => {
1389 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1390 }
1391 FunctionBody::Return(expr) => {
1392 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1393 }
1394 FunctionBody::Statements(stmts) => {
1395 let transformed_stmts = stmts
1396 .into_iter()
1397 .map(|s| transform_recursive(s, transform_fn))
1398 .collect::<Result<Vec<_>>>()?;
1399 FunctionBody::Statements(transformed_stmts)
1400 }
1401 other => other,
1402 });
1403 }
1404 Expression::CreateFunction(cf)
1405 }
1406
1407 // MemberOf: recurse into left and right operands
1408 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1409 // ArrayContainsAll (@>): recurse into left and right operands
1410 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1411 // ArrayContainedBy (<@): recurse into left and right operands
1412 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1413 // ArrayOverlaps (&&): recurse into left and right operands
1414 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1415 // TsMatch (@@): recurse into left and right operands
1416 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1417 // Adjacent (-|-): recurse into left and right operands
1418 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1419
1420 // Table: recurse into when (HistoricalData) and changes fields
1421 Expression::Table(mut t) => {
1422 if let Some(when) = t.when.take() {
1423 let transformed =
1424 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1425 if let Expression::HistoricalData(hd) = transformed {
1426 t.when = Some(hd);
1427 }
1428 }
1429 if let Some(changes) = t.changes.take() {
1430 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1431 if let Expression::Changes(c) = transformed {
1432 t.changes = Some(c);
1433 }
1434 }
1435 Expression::Table(t)
1436 }
1437
1438 // HistoricalData (Snowflake time travel): recurse into expression
1439 Expression::HistoricalData(mut hd) => {
1440 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1441 Expression::HistoricalData(hd)
1442 }
1443
1444 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1445 Expression::Changes(mut c) => {
1446 if let Some(at_before) = c.at_before.take() {
1447 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1448 }
1449 if let Some(end) = c.end.take() {
1450 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1451 }
1452 Expression::Changes(c)
1453 }
1454
1455 // TableArgument: TABLE(expr) or MODEL(expr)
1456 Expression::TableArgument(mut ta) => {
1457 ta.this = transform_recursive(ta.this, transform_fn)?;
1458 Expression::TableArgument(ta)
1459 }
1460
1461 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1462 Expression::JoinedTable(mut jt) => {
1463 jt.left = transform_recursive(jt.left, transform_fn)?;
1464 for join in &mut jt.joins {
1465 join.this = transform_recursive(
1466 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1467 transform_fn,
1468 )?;
1469 if let Some(on) = join.on.take() {
1470 join.on = Some(transform_recursive(on, transform_fn)?);
1471 }
1472 }
1473 jt.lateral_views = jt
1474 .lateral_views
1475 .into_iter()
1476 .map(|mut lv| {
1477 lv.this = transform_recursive(lv.this, transform_fn)?;
1478 Ok(lv)
1479 })
1480 .collect::<Result<Vec<_>>>()?;
1481 Expression::JoinedTable(jt)
1482 }
1483
1484 // Lateral: LATERAL func() - recurse into the function expression
1485 Expression::Lateral(mut lat) => {
1486 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1487 Expression::Lateral(lat)
1488 }
1489
1490 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1491 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1492 // as a unit together with the WithinGroup wrapper
1493 Expression::WithinGroup(mut wg) => {
1494 wg.order_by = wg
1495 .order_by
1496 .into_iter()
1497 .map(|mut o| {
1498 let original = o.this.clone();
1499 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1500 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1501 Ok(Expression::Ordered(transformed)) => *transformed,
1502 Ok(_) | Err(_) => o,
1503 }
1504 })
1505 .collect();
1506 Expression::WithinGroup(wg)
1507 }
1508
1509 // Filter: recurse into both the aggregate and the filter condition
1510 Expression::Filter(mut f) => {
1511 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1512 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1513 Expression::Filter(f)
1514 }
1515
1516 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1517 Expression::BitwiseOrAgg(mut f) => {
1518 f.this = transform_recursive(f.this, transform_fn)?;
1519 Expression::BitwiseOrAgg(f)
1520 }
1521 Expression::BitwiseAndAgg(mut f) => {
1522 f.this = transform_recursive(f.this, transform_fn)?;
1523 Expression::BitwiseAndAgg(f)
1524 }
1525 Expression::BitwiseXorAgg(mut f) => {
1526 f.this = transform_recursive(f.this, transform_fn)?;
1527 Expression::BitwiseXorAgg(f)
1528 }
1529 Expression::PipeOperator(mut pipe) => {
1530 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1531 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1532 Expression::PipeOperator(pipe)
1533 }
1534
1535 // Pass through leaf nodes unchanged
1536 other => other,
1537 };
1538
1539 // Then apply the transform function
1540 transform_fn(expr)
1541}
1542
1543/// Returns the tokenizer config, generator config, and expression transform closure
1544/// for a built-in dialect type. This is the shared implementation used by both
1545/// `Dialect::get()` and custom dialect construction.
1546fn configs_for_dialect_type(
1547 dt: DialectType,
1548) -> (
1549 TokenizerConfig,
1550 GeneratorConfig,
1551 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1552) {
1553 macro_rules! dialect_configs {
1554 ($dialect_struct:ident) => {{
1555 let d = $dialect_struct;
1556 (
1557 d.tokenizer_config(),
1558 d.generator_config(),
1559 Box::new(move |e| $dialect_struct.transform_expr(e)),
1560 )
1561 }};
1562 }
1563 match dt {
1564 #[cfg(feature = "dialect-postgresql")]
1565 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1566 #[cfg(feature = "dialect-mysql")]
1567 DialectType::MySQL => dialect_configs!(MySQLDialect),
1568 #[cfg(feature = "dialect-bigquery")]
1569 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1570 #[cfg(feature = "dialect-snowflake")]
1571 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1572 #[cfg(feature = "dialect-duckdb")]
1573 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1574 #[cfg(feature = "dialect-tsql")]
1575 DialectType::TSQL => dialect_configs!(TSQLDialect),
1576 #[cfg(feature = "dialect-oracle")]
1577 DialectType::Oracle => dialect_configs!(OracleDialect),
1578 #[cfg(feature = "dialect-hive")]
1579 DialectType::Hive => dialect_configs!(HiveDialect),
1580 #[cfg(feature = "dialect-spark")]
1581 DialectType::Spark => dialect_configs!(SparkDialect),
1582 #[cfg(feature = "dialect-sqlite")]
1583 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1584 #[cfg(feature = "dialect-presto")]
1585 DialectType::Presto => dialect_configs!(PrestoDialect),
1586 #[cfg(feature = "dialect-trino")]
1587 DialectType::Trino => dialect_configs!(TrinoDialect),
1588 #[cfg(feature = "dialect-redshift")]
1589 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1590 #[cfg(feature = "dialect-clickhouse")]
1591 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1592 #[cfg(feature = "dialect-databricks")]
1593 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1594 #[cfg(feature = "dialect-athena")]
1595 DialectType::Athena => dialect_configs!(AthenaDialect),
1596 #[cfg(feature = "dialect-teradata")]
1597 DialectType::Teradata => dialect_configs!(TeradataDialect),
1598 #[cfg(feature = "dialect-doris")]
1599 DialectType::Doris => dialect_configs!(DorisDialect),
1600 #[cfg(feature = "dialect-starrocks")]
1601 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1602 #[cfg(feature = "dialect-materialize")]
1603 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1604 #[cfg(feature = "dialect-risingwave")]
1605 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1606 #[cfg(feature = "dialect-singlestore")]
1607 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1608 #[cfg(feature = "dialect-cockroachdb")]
1609 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1610 #[cfg(feature = "dialect-tidb")]
1611 DialectType::TiDB => dialect_configs!(TiDBDialect),
1612 #[cfg(feature = "dialect-druid")]
1613 DialectType::Druid => dialect_configs!(DruidDialect),
1614 #[cfg(feature = "dialect-solr")]
1615 DialectType::Solr => dialect_configs!(SolrDialect),
1616 #[cfg(feature = "dialect-tableau")]
1617 DialectType::Tableau => dialect_configs!(TableauDialect),
1618 #[cfg(feature = "dialect-dune")]
1619 DialectType::Dune => dialect_configs!(DuneDialect),
1620 #[cfg(feature = "dialect-fabric")]
1621 DialectType::Fabric => dialect_configs!(FabricDialect),
1622 #[cfg(feature = "dialect-drill")]
1623 DialectType::Drill => dialect_configs!(DrillDialect),
1624 #[cfg(feature = "dialect-dremio")]
1625 DialectType::Dremio => dialect_configs!(DremioDialect),
1626 #[cfg(feature = "dialect-exasol")]
1627 DialectType::Exasol => dialect_configs!(ExasolDialect),
1628 #[cfg(feature = "dialect-datafusion")]
1629 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1630 _ => dialect_configs!(GenericDialect),
1631 }
1632}
1633
1634// ---------------------------------------------------------------------------
1635// Custom dialect registry
1636// ---------------------------------------------------------------------------
1637
1638static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1639 LazyLock::new(|| RwLock::new(HashMap::new()));
1640
1641struct CustomDialectConfig {
1642 name: String,
1643 base_dialect: DialectType,
1644 tokenizer_config: TokenizerConfig,
1645 generator_config: GeneratorConfig,
1646 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1647 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1648}
1649
1650/// Fluent builder for creating and registering custom SQL dialects.
1651///
1652/// A custom dialect is based on an existing built-in dialect and allows selective
1653/// overrides of tokenizer configuration, generator configuration, and expression
1654/// transforms.
1655///
1656/// # Example
1657///
1658/// ```rust,ignore
1659/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1660/// use polyglot_sql::generator::NormalizeFunctions;
1661///
1662/// CustomDialectBuilder::new("my_postgres")
1663/// .based_on(DialectType::PostgreSQL)
1664/// .generator_config_modifier(|gc| {
1665/// gc.normalize_functions = NormalizeFunctions::Lower;
1666/// })
1667/// .register()
1668/// .unwrap();
1669///
1670/// let d = Dialect::get_by_name("my_postgres").unwrap();
1671/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1672/// let sql = d.generate(&exprs[0]).unwrap();
1673/// assert_eq!(sql, "select count(*)");
1674///
1675/// polyglot_sql::unregister_custom_dialect("my_postgres");
1676/// ```
1677pub struct CustomDialectBuilder {
1678 name: String,
1679 base_dialect: DialectType,
1680 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1681 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1682 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1683 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1684}
1685
1686impl CustomDialectBuilder {
1687 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1688 pub fn new(name: impl Into<String>) -> Self {
1689 Self {
1690 name: name.into(),
1691 base_dialect: DialectType::Generic,
1692 tokenizer_modifier: None,
1693 generator_modifier: None,
1694 transform: None,
1695 preprocess: None,
1696 }
1697 }
1698
1699 /// Set the base built-in dialect to inherit configuration from.
1700 pub fn based_on(mut self, dialect: DialectType) -> Self {
1701 self.base_dialect = dialect;
1702 self
1703 }
1704
1705 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1706 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1707 where
1708 F: FnOnce(&mut TokenizerConfig) + 'static,
1709 {
1710 self.tokenizer_modifier = Some(Box::new(f));
1711 self
1712 }
1713
1714 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1715 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1716 where
1717 F: FnOnce(&mut GeneratorConfig) + 'static,
1718 {
1719 self.generator_modifier = Some(Box::new(f));
1720 self
1721 }
1722
1723 /// Set a custom per-node expression transform function.
1724 ///
1725 /// This replaces the base dialect's transform. It is called on every expression
1726 /// node during the recursive transform pass.
1727 pub fn transform_fn<F>(mut self, f: F) -> Self
1728 where
1729 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1730 {
1731 self.transform = Some(Arc::new(f));
1732 self
1733 }
1734
1735 /// Set a custom whole-tree preprocessing function.
1736 ///
1737 /// This replaces the base dialect's built-in preprocessing. It is called once
1738 /// on the entire expression tree before the recursive per-node transform.
1739 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1740 where
1741 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1742 {
1743 self.preprocess = Some(Arc::new(f));
1744 self
1745 }
1746
1747 /// Build the custom dialect configuration and register it in the global registry.
1748 ///
1749 /// Returns an error if:
1750 /// - The name collides with a built-in dialect name
1751 /// - A custom dialect with the same name is already registered
1752 pub fn register(self) -> Result<()> {
1753 // Reject names that collide with built-in dialects
1754 if DialectType::from_str(&self.name).is_ok() {
1755 return Err(crate::error::Error::parse(
1756 format!(
1757 "Cannot register custom dialect '{}': name collides with built-in dialect",
1758 self.name
1759 ),
1760 0,
1761 0,
1762 0,
1763 0,
1764 ));
1765 }
1766
1767 // Get base configs
1768 let (mut tok_config, mut gen_config, _base_transform) =
1769 configs_for_dialect_type(self.base_dialect);
1770
1771 // Apply modifiers
1772 if let Some(tok_mod) = self.tokenizer_modifier {
1773 tok_mod(&mut tok_config);
1774 }
1775 if let Some(gen_mod) = self.generator_modifier {
1776 gen_mod(&mut gen_config);
1777 }
1778
1779 let config = CustomDialectConfig {
1780 name: self.name.clone(),
1781 base_dialect: self.base_dialect,
1782 tokenizer_config: tok_config,
1783 generator_config: gen_config,
1784 transform: self.transform,
1785 preprocess: self.preprocess,
1786 };
1787
1788 register_custom_dialect(config)
1789 }
1790}
1791
1792use std::str::FromStr;
1793
1794fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1795 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1796 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1797 })?;
1798
1799 if registry.contains_key(&config.name) {
1800 return Err(crate::error::Error::parse(
1801 format!("Custom dialect '{}' is already registered", config.name),
1802 0,
1803 0,
1804 0,
1805 0,
1806 ));
1807 }
1808
1809 registry.insert(config.name.clone(), Arc::new(config));
1810 Ok(())
1811}
1812
1813/// Remove a custom dialect from the global registry.
1814///
1815/// Returns `true` if a dialect with that name was found and removed,
1816/// `false` if no such custom dialect existed.
1817pub fn unregister_custom_dialect(name: &str) -> bool {
1818 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1819 registry.remove(name).is_some()
1820 } else {
1821 false
1822 }
1823}
1824
1825fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1826 CUSTOM_DIALECT_REGISTRY
1827 .read()
1828 .ok()
1829 .and_then(|registry| registry.get(name).cloned())
1830}
1831
1832/// Main entry point for dialect-specific SQL operations.
1833///
1834/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1835/// transformer for a specific SQL database engine. It is the high-level API through
1836/// which callers parse, generate, transform, and transpile SQL.
1837///
1838/// # Usage
1839///
1840/// ```rust,ignore
1841/// use polyglot_sql::dialects::{Dialect, DialectType};
1842///
1843/// // Parse PostgreSQL SQL into an AST
1844/// let pg = Dialect::get(DialectType::PostgreSQL);
1845/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1846///
1847/// // Transpile from PostgreSQL to BigQuery
1848/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1849/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1850/// ```
1851///
1852/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1853/// The struct is `Send + Sync` safe so it can be shared across threads.
1854pub struct Dialect {
1855 dialect_type: DialectType,
1856 tokenizer: Tokenizer,
1857 generator_config: GeneratorConfig,
1858 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1859 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1860 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1861 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1862 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1863}
1864
1865impl Dialect {
1866 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1867 ///
1868 /// This is the primary constructor. It initializes the tokenizer, generator config,
1869 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1870 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1871 /// config routing.
1872 pub fn get(dialect_type: DialectType) -> Self {
1873 let (tokenizer_config, generator_config, transformer) =
1874 configs_for_dialect_type(dialect_type);
1875
1876 // Set up expression-specific generator config for hybrid dialects
1877 let generator_config_for_expr: Option<
1878 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1879 > = match dialect_type {
1880 #[cfg(feature = "dialect-athena")]
1881 DialectType::Athena => Some(Box::new(|expr| {
1882 AthenaDialect.generator_config_for_expr(expr)
1883 })),
1884 _ => None,
1885 };
1886
1887 Self {
1888 dialect_type,
1889 tokenizer: Tokenizer::new(tokenizer_config),
1890 generator_config,
1891 transformer,
1892 generator_config_for_expr,
1893 custom_preprocess: None,
1894 }
1895 }
1896
1897 /// Look up a dialect by string name.
1898 ///
1899 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1900 /// falls back to the custom dialect registry. Returns `None` if no dialect
1901 /// with the given name exists.
1902 pub fn get_by_name(name: &str) -> Option<Self> {
1903 // Try built-in first
1904 if let Ok(dt) = DialectType::from_str(name) {
1905 return Some(Self::get(dt));
1906 }
1907
1908 // Try custom registry
1909 let config = get_custom_dialect_config(name)?;
1910 Some(Self::from_custom_config(&config))
1911 }
1912
1913 /// Construct a `Dialect` from a custom dialect configuration.
1914 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1915 // Build the transformer: use custom if provided, else use base dialect's
1916 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1917 if let Some(ref custom_transform) = config.transform {
1918 let t = Arc::clone(custom_transform);
1919 Box::new(move |e| t(e))
1920 } else {
1921 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1922 base_transform
1923 };
1924
1925 // Build the custom preprocess: use custom if provided
1926 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1927 config.preprocess.as_ref().map(|p| {
1928 let p = Arc::clone(p);
1929 Box::new(move |e: Expression| p(e))
1930 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1931 });
1932
1933 Self {
1934 dialect_type: config.base_dialect,
1935 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1936 generator_config: config.generator_config.clone(),
1937 transformer,
1938 generator_config_for_expr: None,
1939 custom_preprocess,
1940 }
1941 }
1942
1943 /// Get the dialect type
1944 pub fn dialect_type(&self) -> DialectType {
1945 self.dialect_type
1946 }
1947
1948 /// Get the generator configuration
1949 pub fn generator_config(&self) -> &GeneratorConfig {
1950 &self.generator_config
1951 }
1952
1953 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1954 ///
1955 /// The input may contain multiple semicolon-separated statements; each one
1956 /// produces a separate element in the returned vector. Tokenization uses
1957 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1958 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1959 let tokens = self.tokenizer.tokenize(sql)?;
1960 let config = crate::parser::ParserConfig {
1961 dialect: Some(self.dialect_type),
1962 ..Default::default()
1963 };
1964 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1965 parser.parse()
1966 }
1967
1968 /// Tokenize SQL using this dialect's tokenizer configuration.
1969 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1970 self.tokenizer.tokenize(sql)
1971 }
1972
1973 /// Get the generator config for a specific expression (supports hybrid dialects)
1974 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1975 if let Some(ref config_fn) = self.generator_config_for_expr {
1976 config_fn(expr)
1977 } else {
1978 self.generator_config.clone()
1979 }
1980 }
1981
1982 /// Generates a SQL string from an [`Expression`] AST node.
1983 ///
1984 /// The output uses this dialect's generator configuration for identifier quoting,
1985 /// keyword casing, function name normalization, and syntax style. The result is
1986 /// a single-line (non-pretty) SQL string.
1987 pub fn generate(&self, expr: &Expression) -> Result<String> {
1988 let config = self.get_config_for_expr(expr);
1989 let mut generator = Generator::with_config(config);
1990 generator.generate(expr)
1991 }
1992
1993 /// Generate SQL from an expression with pretty printing enabled
1994 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1995 let mut config = self.get_config_for_expr(expr);
1996 config.pretty = true;
1997 let mut generator = Generator::with_config(config);
1998 generator.generate(expr)
1999 }
2000
2001 /// Generate SQL from an expression with source dialect info (for transpilation)
2002 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2003 let mut config = self.get_config_for_expr(expr);
2004 config.source_dialect = Some(source);
2005 let mut generator = Generator::with_config(config);
2006 generator.generate(expr)
2007 }
2008
2009 /// Generate SQL from an expression with pretty printing and source dialect info
2010 pub fn generate_pretty_with_source(
2011 &self,
2012 expr: &Expression,
2013 source: DialectType,
2014 ) -> Result<String> {
2015 let mut config = self.get_config_for_expr(expr);
2016 config.pretty = true;
2017 config.source_dialect = Some(source);
2018 let mut generator = Generator::with_config(config);
2019 generator.generate(expr)
2020 }
2021
2022 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2023 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2024 let mut config = self.get_config_for_expr(expr);
2025 config.always_quote_identifiers = true;
2026 let mut generator = Generator::with_config(config);
2027 generator.generate(expr)
2028 }
2029
2030 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2031 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2032 let mut config = self.generator_config.clone();
2033 config.pretty = true;
2034 config.always_quote_identifiers = true;
2035 let mut generator = Generator::with_config(config);
2036 generator.generate(expr)
2037 }
2038
2039 /// Generate SQL from an expression with caller-specified config overrides
2040 pub fn generate_with_overrides(
2041 &self,
2042 expr: &Expression,
2043 overrides: impl FnOnce(&mut GeneratorConfig),
2044 ) -> Result<String> {
2045 let mut config = self.get_config_for_expr(expr);
2046 overrides(&mut config);
2047 let mut generator = Generator::with_config(config);
2048 generator.generate(expr)
2049 }
2050
2051 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2052 ///
2053 /// The transformation proceeds in two phases:
2054 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2055 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2056 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2057 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2058 ///
2059 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2060 /// and for identity transforms (normalizing SQL within the same dialect).
2061 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2062 // Apply preprocessing transforms based on dialect
2063 let preprocessed = self.preprocess(expr)?;
2064 // Then apply recursive transformation
2065 transform_recursive(preprocessed, &self.transformer)
2066 }
2067
2068 /// Apply dialect-specific preprocessing transforms
2069 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2070 // If a custom preprocess function is set, use it instead of the built-in logic
2071 if let Some(ref custom_preprocess) = self.custom_preprocess {
2072 return custom_preprocess(expr);
2073 }
2074
2075 #[cfg(any(
2076 feature = "dialect-mysql",
2077 feature = "dialect-postgresql",
2078 feature = "dialect-bigquery",
2079 feature = "dialect-snowflake",
2080 feature = "dialect-tsql",
2081 feature = "dialect-spark",
2082 feature = "dialect-databricks",
2083 feature = "dialect-hive",
2084 feature = "dialect-sqlite",
2085 feature = "dialect-trino",
2086 feature = "dialect-presto",
2087 feature = "dialect-duckdb",
2088 feature = "dialect-redshift",
2089 feature = "dialect-starrocks",
2090 feature = "dialect-oracle",
2091 feature = "dialect-clickhouse",
2092 ))]
2093 use crate::transforms;
2094
2095 match self.dialect_type {
2096 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2097 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2098 #[cfg(feature = "dialect-mysql")]
2099 DialectType::MySQL => {
2100 let expr = transforms::eliminate_qualify(expr)?;
2101 let expr = transforms::eliminate_full_outer_join(expr)?;
2102 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2103 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2104 Ok(expr)
2105 }
2106 // PostgreSQL doesn't support QUALIFY
2107 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2108 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2109 #[cfg(feature = "dialect-postgresql")]
2110 DialectType::PostgreSQL => {
2111 let expr = transforms::eliminate_qualify(expr)?;
2112 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2113 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2114 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2115 // Only normalize when sqlglot would fully parse (no body) —
2116 // sqlglot falls back to Command for complex function bodies,
2117 // preserving the original text including TO.
2118 let expr = if let Expression::CreateFunction(mut cf) = expr {
2119 if cf.body.is_none() {
2120 for opt in &mut cf.set_options {
2121 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2122 &mut opt.value
2123 {
2124 *use_to = false;
2125 }
2126 }
2127 }
2128 Expression::CreateFunction(cf)
2129 } else {
2130 expr
2131 };
2132 Ok(expr)
2133 }
2134 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2135 #[cfg(feature = "dialect-bigquery")]
2136 DialectType::BigQuery => {
2137 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2138 let expr = transforms::pushdown_cte_column_names(expr)?;
2139 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2140 Ok(expr)
2141 }
2142 // Snowflake
2143 #[cfg(feature = "dialect-snowflake")]
2144 DialectType::Snowflake => {
2145 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2146 let expr = transforms::eliminate_window_clause(expr)?;
2147 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2148 Ok(expr)
2149 }
2150 // TSQL doesn't support QUALIFY
2151 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2152 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2153 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2154 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2155 #[cfg(feature = "dialect-tsql")]
2156 DialectType::TSQL => {
2157 let expr = transforms::eliminate_qualify(expr)?;
2158 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2159 let expr = transforms::ensure_bools(expr)?;
2160 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2161 let expr = transforms::move_ctes_to_top_level(expr)?;
2162 let expr = transforms::qualify_derived_table_outputs(expr)?;
2163 Ok(expr)
2164 }
2165 // Spark doesn't support QUALIFY (but Databricks does)
2166 // Spark doesn't support CTEs in subqueries (hoist to top level)
2167 #[cfg(feature = "dialect-spark")]
2168 DialectType::Spark => {
2169 let expr = transforms::eliminate_qualify(expr)?;
2170 let expr = transforms::add_auto_table_alias(expr)?;
2171 let expr = transforms::simplify_nested_paren_values(expr)?;
2172 let expr = transforms::move_ctes_to_top_level(expr)?;
2173 Ok(expr)
2174 }
2175 // Databricks supports QUALIFY natively
2176 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2177 #[cfg(feature = "dialect-databricks")]
2178 DialectType::Databricks => {
2179 let expr = transforms::add_auto_table_alias(expr)?;
2180 let expr = transforms::simplify_nested_paren_values(expr)?;
2181 let expr = transforms::move_ctes_to_top_level(expr)?;
2182 Ok(expr)
2183 }
2184 // Hive doesn't support QUALIFY or CTEs in subqueries
2185 #[cfg(feature = "dialect-hive")]
2186 DialectType::Hive => {
2187 let expr = transforms::eliminate_qualify(expr)?;
2188 let expr = transforms::move_ctes_to_top_level(expr)?;
2189 Ok(expr)
2190 }
2191 // SQLite doesn't support QUALIFY
2192 #[cfg(feature = "dialect-sqlite")]
2193 DialectType::SQLite => {
2194 let expr = transforms::eliminate_qualify(expr)?;
2195 Ok(expr)
2196 }
2197 // Trino doesn't support QUALIFY
2198 #[cfg(feature = "dialect-trino")]
2199 DialectType::Trino => {
2200 let expr = transforms::eliminate_qualify(expr)?;
2201 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2202 Ok(expr)
2203 }
2204 // Presto doesn't support QUALIFY or WINDOW clause
2205 #[cfg(feature = "dialect-presto")]
2206 DialectType::Presto => {
2207 let expr = transforms::eliminate_qualify(expr)?;
2208 let expr = transforms::eliminate_window_clause(expr)?;
2209 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2210 Ok(expr)
2211 }
2212 // DuckDB supports QUALIFY - no elimination needed
2213 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2214 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2215 #[cfg(feature = "dialect-duckdb")]
2216 DialectType::DuckDB => {
2217 let expr = transforms::expand_posexplode_duckdb(expr)?;
2218 let expr = transforms::expand_like_any(expr)?;
2219 Ok(expr)
2220 }
2221 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2222 #[cfg(feature = "dialect-redshift")]
2223 DialectType::Redshift => {
2224 let expr = transforms::eliminate_qualify(expr)?;
2225 let expr = transforms::eliminate_window_clause(expr)?;
2226 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2227 Ok(expr)
2228 }
2229 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2230 #[cfg(feature = "dialect-starrocks")]
2231 DialectType::StarRocks => {
2232 let expr = transforms::eliminate_qualify(expr)?;
2233 let expr = transforms::expand_between_in_delete(expr)?;
2234 Ok(expr)
2235 }
2236 // DataFusion supports QUALIFY and semi/anti joins natively
2237 #[cfg(feature = "dialect-datafusion")]
2238 DialectType::DataFusion => Ok(expr),
2239 // Oracle doesn't support QUALIFY
2240 #[cfg(feature = "dialect-oracle")]
2241 DialectType::Oracle => {
2242 let expr = transforms::eliminate_qualify(expr)?;
2243 Ok(expr)
2244 }
2245 // Drill - no special preprocessing needed
2246 #[cfg(feature = "dialect-drill")]
2247 DialectType::Drill => Ok(expr),
2248 // Teradata - no special preprocessing needed
2249 #[cfg(feature = "dialect-teradata")]
2250 DialectType::Teradata => Ok(expr),
2251 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2252 #[cfg(feature = "dialect-clickhouse")]
2253 DialectType::ClickHouse => {
2254 let expr = transforms::no_limit_order_by_union(expr)?;
2255 Ok(expr)
2256 }
2257 // Other dialects - no preprocessing
2258 _ => Ok(expr),
2259 }
2260 }
2261
2262 /// Transpile SQL from this dialect to another
2263 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2264 self.transpile_to_inner(sql, target, false)
2265 }
2266
2267 /// Transpile SQL from this dialect to another with pretty printing enabled
2268 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2269 self.transpile_to_inner(sql, target, true)
2270 }
2271
2272 #[cfg(not(feature = "transpile"))]
2273 fn transpile_to_inner(
2274 &self,
2275 sql: &str,
2276 target: DialectType,
2277 pretty: bool,
2278 ) -> Result<Vec<String>> {
2279 // Without the transpile feature, only same-dialect or to/from generic is supported
2280 if self.dialect_type != target
2281 && self.dialect_type != DialectType::Generic
2282 && target != DialectType::Generic
2283 {
2284 return Err(crate::error::Error::parse(
2285 "Cross-dialect transpilation not available in this build",
2286 0,
2287 0,
2288 0,
2289 0,
2290 ));
2291 }
2292
2293 let expressions = self.parse(sql)?;
2294 let target_dialect = Dialect::get(target);
2295 let generic_identity =
2296 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2297
2298 if generic_identity {
2299 return expressions
2300 .into_iter()
2301 .map(|expr| {
2302 if pretty {
2303 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2304 } else {
2305 target_dialect.generate_with_source(&expr, self.dialect_type)
2306 }
2307 })
2308 .collect();
2309 }
2310
2311 expressions
2312 .into_iter()
2313 .map(|expr| {
2314 let transformed = target_dialect.transform(expr)?;
2315 if pretty {
2316 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2317 } else {
2318 target_dialect.generate_with_source(&transformed, self.dialect_type)
2319 }
2320 })
2321 .collect()
2322 }
2323
2324 #[cfg(feature = "transpile")]
2325 fn transpile_to_inner(
2326 &self,
2327 sql: &str,
2328 target: DialectType,
2329 pretty: bool,
2330 ) -> Result<Vec<String>> {
2331 let expressions = self.parse(sql)?;
2332 let target_dialect = Dialect::get(target);
2333 let generic_identity =
2334 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2335
2336 if generic_identity {
2337 return expressions
2338 .into_iter()
2339 .map(|expr| {
2340 if pretty {
2341 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2342 } else {
2343 target_dialect.generate_with_source(&expr, self.dialect_type)
2344 }
2345 })
2346 .collect();
2347 }
2348
2349 expressions
2350 .into_iter()
2351 .map(|expr| {
2352 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2353 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2354 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2355 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2356 use crate::expressions::DataType as DT;
2357 transform_recursive(expr, &|e| match e {
2358 Expression::DataType(DT::VarChar { .. }) => {
2359 Ok(Expression::DataType(DT::Text))
2360 }
2361 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2362 _ => Ok(e),
2363 })?
2364 } else {
2365 expr
2366 };
2367
2368 // When source and target differ, first normalize the source dialect's
2369 // AST constructs to standard SQL, so that the target dialect can handle them.
2370 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2371 let normalized =
2372 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2373 self.transform(expr)?
2374 } else {
2375 expr
2376 };
2377
2378 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2379 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2380 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2381 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2382 let normalized =
2383 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2384 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2385 {
2386 transform_recursive(normalized, &|e| {
2387 if let Expression::Function(ref f) = e {
2388 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2389 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2390 if let (
2391 Expression::Function(ref jq),
2392 Expression::Function(ref jv),
2393 ) = (&f.args[0], &f.args[1])
2394 {
2395 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2396 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2397 {
2398 // Unwrap: return just JSON_QUERY(...)
2399 return Ok(f.args[0].clone());
2400 }
2401 }
2402 }
2403 }
2404 Ok(e)
2405 })?
2406 } else {
2407 normalized
2408 };
2409
2410 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2411 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2412 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2413 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2414 && !matches!(target, DialectType::Snowflake)
2415 {
2416 transform_recursive(normalized, &|e| {
2417 if let Expression::Function(ref f) = e {
2418 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2419 return Ok(Expression::Localtime(Box::new(
2420 crate::expressions::Localtime { this: None },
2421 )));
2422 }
2423 }
2424 Ok(e)
2425 })?
2426 } else {
2427 normalized
2428 };
2429
2430 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2431 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2432 // transform. DuckDB requires the count argument to be BIGINT.
2433 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2434 && matches!(target, DialectType::DuckDB)
2435 {
2436 transform_recursive(normalized, &|e| {
2437 if let Expression::Function(ref f) = e {
2438 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2439 // Check if first arg is space string literal
2440 if let Expression::Literal(crate::expressions::Literal::String(
2441 ref s,
2442 )) = f.args[0]
2443 {
2444 if s == " " {
2445 // Wrap second arg in CAST(... AS BIGINT) if not already
2446 if !matches!(f.args[1], Expression::Cast(_)) {
2447 let mut new_args = f.args.clone();
2448 new_args[1] = Expression::Cast(Box::new(
2449 crate::expressions::Cast {
2450 this: new_args[1].clone(),
2451 to: crate::expressions::DataType::BigInt {
2452 length: None,
2453 },
2454 trailing_comments: Vec::new(),
2455 double_colon_syntax: false,
2456 format: None,
2457 default: None,
2458 inferred_type: None,
2459 },
2460 ));
2461 return Ok(Expression::Function(Box::new(
2462 crate::expressions::Function {
2463 name: f.name.clone(),
2464 args: new_args,
2465 distinct: f.distinct,
2466 trailing_comments: f.trailing_comments.clone(),
2467 use_bracket_syntax: f.use_bracket_syntax,
2468 no_parens: f.no_parens,
2469 quoted: f.quoted,
2470 span: None,
2471 inferred_type: None,
2472 },
2473 )));
2474 }
2475 }
2476 }
2477 }
2478 }
2479 Ok(e)
2480 })?
2481 } else {
2482 normalized
2483 };
2484
2485 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2486 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2487 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2488 && !matches!(target, DialectType::BigQuery)
2489 {
2490 crate::transforms::propagate_struct_field_names(normalized)?
2491 } else {
2492 normalized
2493 };
2494
2495 // Apply cross-dialect semantic normalizations
2496 let normalized =
2497 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2498
2499 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2500 // (SELECT UNNEST(..., max_depth => 2)) subquery
2501 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2502 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2503 && matches!(target, DialectType::DuckDB)
2504 {
2505 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2506 } else {
2507 normalized
2508 };
2509
2510 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2511 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2512 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2513 && matches!(
2514 target,
2515 DialectType::DuckDB
2516 | DialectType::Presto
2517 | DialectType::Trino
2518 | DialectType::Athena
2519 | DialectType::Spark
2520 | DialectType::Databricks
2521 ) {
2522 crate::transforms::unnest_alias_to_column_alias(normalized)?
2523 } else if matches!(self.dialect_type, DialectType::BigQuery)
2524 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2525 {
2526 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2527 // but don't convert alias format (no _t0 wrapper)
2528 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2529 // For Redshift: strip UNNEST when arg is a column reference path
2530 if matches!(target, DialectType::Redshift) {
2531 crate::transforms::strip_unnest_column_refs(result)?
2532 } else {
2533 result
2534 }
2535 } else {
2536 normalized
2537 };
2538
2539 // For Presto/Trino targets from PostgreSQL/Redshift source:
2540 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2541 let normalized = if matches!(
2542 self.dialect_type,
2543 DialectType::PostgreSQL | DialectType::Redshift
2544 ) && matches!(
2545 target,
2546 DialectType::Presto | DialectType::Trino | DialectType::Athena
2547 ) {
2548 crate::transforms::wrap_unnest_join_aliases(normalized)?
2549 } else {
2550 normalized
2551 };
2552
2553 // Eliminate DISTINCT ON with target-dialect awareness
2554 // This must happen after source transform (which may produce DISTINCT ON)
2555 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2556 let normalized =
2557 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2558
2559 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2560 let normalized = if matches!(target, DialectType::Snowflake) {
2561 Self::transform_generate_date_array_snowflake(normalized)?
2562 } else {
2563 normalized
2564 };
2565
2566 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2567 let normalized = if matches!(
2568 target,
2569 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2570 ) {
2571 crate::transforms::unnest_to_explode_select(normalized)?
2572 } else {
2573 normalized
2574 };
2575
2576 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2577 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2578 crate::transforms::no_limit_order_by_union(normalized)?
2579 } else {
2580 normalized
2581 };
2582
2583 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2584 // Python sqlglot does this in the TSQL generator, but we can't do it there
2585 // because it would break TSQL -> TSQL identity
2586 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2587 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2588 {
2589 transform_recursive(normalized, &|e| {
2590 if let Expression::Count(ref c) = e {
2591 // Build COUNT_BIG(...) as an AggregateFunction
2592 let args = if c.star {
2593 vec![Expression::Star(crate::expressions::Star {
2594 table: None,
2595 except: None,
2596 replace: None,
2597 rename: None,
2598 trailing_comments: Vec::new(),
2599 span: None,
2600 })]
2601 } else if let Some(ref this) = c.this {
2602 vec![this.clone()]
2603 } else {
2604 vec![]
2605 };
2606 Ok(Expression::AggregateFunction(Box::new(
2607 crate::expressions::AggregateFunction {
2608 name: "COUNT_BIG".to_string(),
2609 args,
2610 distinct: c.distinct,
2611 filter: c.filter.clone(),
2612 order_by: Vec::new(),
2613 limit: None,
2614 ignore_nulls: None,
2615 inferred_type: None,
2616 },
2617 )))
2618 } else {
2619 Ok(e)
2620 }
2621 })?
2622 } else {
2623 normalized
2624 };
2625
2626 let transformed = target_dialect.transform(normalized)?;
2627 let mut sql = if pretty {
2628 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2629 } else {
2630 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2631 };
2632
2633 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2634 if pretty && target == DialectType::Snowflake {
2635 sql = Self::normalize_snowflake_pretty(sql);
2636 }
2637
2638 Ok(sql)
2639 })
2640 .collect()
2641 }
2642}
2643
2644// Transpile-only methods: cross-dialect normalization and helpers
2645#[cfg(feature = "transpile")]
2646impl Dialect {
2647 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2648 /// Converts:
2649 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2650 /// To:
2651 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2652 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2653 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2654 use crate::expressions::*;
2655 transform_recursive(expr, &|e| {
2656 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2657 if let Expression::ArraySize(ref af) = e {
2658 if let Expression::Function(ref f) = af.this {
2659 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2660 let result = Self::convert_array_size_gda_snowflake(f)?;
2661 return Ok(result);
2662 }
2663 }
2664 }
2665
2666 let Expression::Select(mut sel) = e else {
2667 return Ok(e);
2668 };
2669
2670 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2671 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2672 let mut gda_join_idx: Option<usize> = None;
2673
2674 for (idx, join) in sel.joins.iter().enumerate() {
2675 // The join.this may be:
2676 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2677 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2678 let (unnest_ref, alias_name) = match &join.this {
2679 Expression::Unnest(ref unnest) => {
2680 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2681 (Some(unnest.as_ref()), alias)
2682 }
2683 Expression::Alias(ref a) => {
2684 if let Expression::Unnest(ref unnest) = a.this {
2685 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2686 } else {
2687 (None, None)
2688 }
2689 }
2690 _ => (None, None),
2691 };
2692
2693 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2694 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2695 if let Expression::Function(ref f) = unnest.this {
2696 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2697 let start_expr = f.args[0].clone();
2698 let end_expr = f.args[1].clone();
2699 let step = f.args.get(2).cloned();
2700
2701 // Extract unit from step interval
2702 let unit = if let Some(Expression::Interval(ref iv)) = step {
2703 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2704 Some(format!("{:?}", unit).to_uppercase())
2705 } else if let Some(ref this) = iv.this {
2706 // The interval may be stored as a string like "1 MONTH"
2707 if let Expression::Literal(Literal::String(ref s)) = this {
2708 let parts: Vec<&str> = s.split_whitespace().collect();
2709 if parts.len() == 2 {
2710 Some(parts[1].to_uppercase())
2711 } else if parts.len() == 1 {
2712 // Single word like "MONTH" or just "1"
2713 let upper = parts[0].to_uppercase();
2714 if matches!(
2715 upper.as_str(),
2716 "YEAR"
2717 | "QUARTER"
2718 | "MONTH"
2719 | "WEEK"
2720 | "DAY"
2721 | "HOUR"
2722 | "MINUTE"
2723 | "SECOND"
2724 ) {
2725 Some(upper)
2726 } else {
2727 None
2728 }
2729 } else {
2730 None
2731 }
2732 } else {
2733 None
2734 }
2735 } else {
2736 None
2737 }
2738 } else {
2739 None
2740 };
2741
2742 if let Some(unit_str) = unit {
2743 gda_info = Some((alias, start_expr, end_expr, unit_str));
2744 gda_join_idx = Some(idx);
2745 }
2746 }
2747 }
2748 }
2749 if gda_info.is_some() {
2750 break;
2751 }
2752 }
2753
2754 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2755 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2756 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2757 let result = Self::try_transform_from_gda_snowflake(sel);
2758 return result;
2759 };
2760 let join_idx = gda_join_idx.unwrap();
2761
2762 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2763 let datediff = Expression::Function(Box::new(Function::new(
2764 "DATEDIFF".to_string(),
2765 vec![
2766 Expression::Column(Column {
2767 name: Identifier::new(&unit_str),
2768 table: None,
2769 join_mark: false,
2770 trailing_comments: vec![],
2771 span: None,
2772 inferred_type: None,
2773 }),
2774 start_expr.clone(),
2775 end_expr.clone(),
2776 ],
2777 )));
2778 // (DATEDIFF(...) + 1 - 1) + 1
2779 let plus_one = Expression::Add(Box::new(BinaryOp {
2780 left: datediff,
2781 right: Expression::Literal(Literal::Number("1".to_string())),
2782 left_comments: vec![],
2783 operator_comments: vec![],
2784 trailing_comments: vec![],
2785 inferred_type: None,
2786 }));
2787 let minus_one = Expression::Sub(Box::new(BinaryOp {
2788 left: plus_one,
2789 right: Expression::Literal(Literal::Number("1".to_string())),
2790 left_comments: vec![],
2791 operator_comments: vec![],
2792 trailing_comments: vec![],
2793 inferred_type: None,
2794 }));
2795 let paren_inner = Expression::Paren(Box::new(Paren {
2796 this: minus_one,
2797 trailing_comments: vec![],
2798 }));
2799 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2800 left: paren_inner,
2801 right: Expression::Literal(Literal::Number("1".to_string())),
2802 left_comments: vec![],
2803 operator_comments: vec![],
2804 trailing_comments: vec![],
2805 inferred_type: None,
2806 }));
2807
2808 let array_gen_range = Expression::Function(Box::new(Function::new(
2809 "ARRAY_GENERATE_RANGE".to_string(),
2810 vec![
2811 Expression::Literal(Literal::Number("0".to_string())),
2812 outer_plus_one,
2813 ],
2814 )));
2815
2816 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2817 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2818 name: Identifier::new("INPUT"),
2819 value: array_gen_range,
2820 separator: crate::expressions::NamedArgSeparator::DArrow,
2821 }));
2822 let flatten = Expression::Function(Box::new(Function::new(
2823 "FLATTEN".to_string(),
2824 vec![flatten_input],
2825 )));
2826
2827 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2828 let alias_table = Alias {
2829 this: flatten,
2830 alias: Identifier::new("_t0"),
2831 column_aliases: vec![
2832 Identifier::new("seq"),
2833 Identifier::new("key"),
2834 Identifier::new("path"),
2835 Identifier::new("index"),
2836 Identifier::new(&alias_name),
2837 Identifier::new("this"),
2838 ],
2839 pre_alias_comments: vec![],
2840 trailing_comments: vec![],
2841 inferred_type: None,
2842 };
2843 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2844 this: Box::new(Expression::Alias(Box::new(alias_table))),
2845 view: None,
2846 outer: None,
2847 alias: None,
2848 alias_quoted: false,
2849 cross_apply: None,
2850 ordinality: None,
2851 column_aliases: vec![],
2852 }));
2853
2854 // Remove the original join and add to FROM expressions
2855 sel.joins.remove(join_idx);
2856 if let Some(ref mut from) = sel.from {
2857 from.expressions.push(lateral_expr);
2858 }
2859
2860 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2861 let dateadd_expr = Expression::Function(Box::new(Function::new(
2862 "DATEADD".to_string(),
2863 vec![
2864 Expression::Column(Column {
2865 name: Identifier::new(&unit_str),
2866 table: None,
2867 join_mark: false,
2868 trailing_comments: vec![],
2869 span: None,
2870 inferred_type: None,
2871 }),
2872 Expression::Cast(Box::new(Cast {
2873 this: Expression::Column(Column {
2874 name: Identifier::new(&alias_name),
2875 table: None,
2876 join_mark: false,
2877 trailing_comments: vec![],
2878 span: None,
2879 inferred_type: None,
2880 }),
2881 to: DataType::Int {
2882 length: None,
2883 integer_spelling: false,
2884 },
2885 trailing_comments: vec![],
2886 double_colon_syntax: false,
2887 format: None,
2888 default: None,
2889 inferred_type: None,
2890 })),
2891 Expression::Cast(Box::new(Cast {
2892 this: start_expr.clone(),
2893 to: DataType::Date,
2894 trailing_comments: vec![],
2895 double_colon_syntax: false,
2896 format: None,
2897 default: None,
2898 inferred_type: None,
2899 })),
2900 ],
2901 )));
2902
2903 // Replace references to the alias in the SELECT list
2904 let new_exprs: Vec<Expression> = sel
2905 .expressions
2906 .iter()
2907 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2908 .collect();
2909 sel.expressions = new_exprs;
2910
2911 Ok(Expression::Select(sel))
2912 })
2913 }
2914
2915 /// Helper: replace column references to `alias_name` with dateadd expression
2916 fn replace_column_ref_with_dateadd(
2917 expr: &Expression,
2918 alias_name: &str,
2919 dateadd: &Expression,
2920 ) -> Expression {
2921 use crate::expressions::*;
2922 match expr {
2923 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2924 // Plain column reference -> DATEADD(...) AS alias_name
2925 Expression::Alias(Box::new(Alias {
2926 this: dateadd.clone(),
2927 alias: Identifier::new(alias_name),
2928 column_aliases: vec![],
2929 pre_alias_comments: vec![],
2930 trailing_comments: vec![],
2931 inferred_type: None,
2932 }))
2933 }
2934 Expression::Alias(a) => {
2935 // Check if the inner expression references the alias
2936 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2937 Expression::Alias(Box::new(Alias {
2938 this: new_this,
2939 alias: a.alias.clone(),
2940 column_aliases: a.column_aliases.clone(),
2941 pre_alias_comments: a.pre_alias_comments.clone(),
2942 trailing_comments: a.trailing_comments.clone(),
2943 inferred_type: None,
2944 }))
2945 }
2946 _ => expr.clone(),
2947 }
2948 }
2949
2950 /// Helper: replace column references in inner expression (not top-level)
2951 fn replace_column_ref_inner(
2952 expr: &Expression,
2953 alias_name: &str,
2954 dateadd: &Expression,
2955 ) -> Expression {
2956 use crate::expressions::*;
2957 match expr {
2958 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2959 dateadd.clone()
2960 }
2961 Expression::Add(op) => {
2962 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2963 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2964 Expression::Add(Box::new(BinaryOp {
2965 left,
2966 right,
2967 left_comments: op.left_comments.clone(),
2968 operator_comments: op.operator_comments.clone(),
2969 trailing_comments: op.trailing_comments.clone(),
2970 inferred_type: None,
2971 }))
2972 }
2973 Expression::Sub(op) => {
2974 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2975 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2976 Expression::Sub(Box::new(BinaryOp {
2977 left,
2978 right,
2979 left_comments: op.left_comments.clone(),
2980 operator_comments: op.operator_comments.clone(),
2981 trailing_comments: op.trailing_comments.clone(),
2982 inferred_type: None,
2983 }))
2984 }
2985 Expression::Mul(op) => {
2986 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2987 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2988 Expression::Mul(Box::new(BinaryOp {
2989 left,
2990 right,
2991 left_comments: op.left_comments.clone(),
2992 operator_comments: op.operator_comments.clone(),
2993 trailing_comments: op.trailing_comments.clone(),
2994 inferred_type: None,
2995 }))
2996 }
2997 _ => expr.clone(),
2998 }
2999 }
3000
3001 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3002 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3003 fn try_transform_from_gda_snowflake(
3004 mut sel: Box<crate::expressions::Select>,
3005 ) -> Result<Expression> {
3006 use crate::expressions::*;
3007
3008 // Extract GDA info from FROM clause
3009 let mut gda_info: Option<(
3010 usize,
3011 String,
3012 Expression,
3013 Expression,
3014 String,
3015 Option<(String, Vec<Identifier>)>,
3016 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3017
3018 if let Some(ref from) = sel.from {
3019 for (idx, table_expr) in from.expressions.iter().enumerate() {
3020 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3021 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3022 let (unnest_opt, outer_alias_info) = match table_expr {
3023 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3024 Expression::Alias(ref a) => {
3025 if let Expression::Unnest(ref unnest) = a.this {
3026 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3027 (Some(unnest.as_ref()), Some(alias_info))
3028 } else {
3029 (None, None)
3030 }
3031 }
3032 _ => (None, None),
3033 };
3034
3035 if let Some(unnest) = unnest_opt {
3036 // Check for GENERATE_DATE_ARRAY function
3037 let func_opt = match &unnest.this {
3038 Expression::Function(ref f)
3039 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3040 && f.args.len() >= 2 =>
3041 {
3042 Some(f)
3043 }
3044 // Also check for GenerateSeries (from earlier normalization)
3045 _ => None,
3046 };
3047
3048 if let Some(f) = func_opt {
3049 let start_expr = f.args[0].clone();
3050 let end_expr = f.args[1].clone();
3051 let step = f.args.get(2).cloned();
3052
3053 // Extract unit and column name
3054 let unit = Self::extract_interval_unit_str(&step);
3055 let col_name = outer_alias_info
3056 .as_ref()
3057 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3058 .unwrap_or_else(|| "value".to_string());
3059
3060 if let Some(unit_str) = unit {
3061 gda_info = Some((
3062 idx,
3063 col_name,
3064 start_expr,
3065 end_expr,
3066 unit_str,
3067 outer_alias_info,
3068 ));
3069 break;
3070 }
3071 }
3072 }
3073 }
3074 }
3075
3076 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3077 else {
3078 return Ok(Expression::Select(sel));
3079 };
3080
3081 // Build the Snowflake subquery:
3082 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3083 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3084
3085 // DATEDIFF(unit, start, end)
3086 let datediff = Expression::Function(Box::new(Function::new(
3087 "DATEDIFF".to_string(),
3088 vec![
3089 Expression::Column(Column {
3090 name: Identifier::new(&unit_str),
3091 table: None,
3092 join_mark: false,
3093 trailing_comments: vec![],
3094 span: None,
3095 inferred_type: None,
3096 }),
3097 start_expr.clone(),
3098 end_expr.clone(),
3099 ],
3100 )));
3101 // (DATEDIFF(...) + 1 - 1) + 1
3102 let plus_one = Expression::Add(Box::new(BinaryOp {
3103 left: datediff,
3104 right: Expression::Literal(Literal::Number("1".to_string())),
3105 left_comments: vec![],
3106 operator_comments: vec![],
3107 trailing_comments: vec![],
3108 inferred_type: None,
3109 }));
3110 let minus_one = Expression::Sub(Box::new(BinaryOp {
3111 left: plus_one,
3112 right: Expression::Literal(Literal::Number("1".to_string())),
3113 left_comments: vec![],
3114 operator_comments: vec![],
3115 trailing_comments: vec![],
3116 inferred_type: None,
3117 }));
3118 let paren_inner = Expression::Paren(Box::new(Paren {
3119 this: minus_one,
3120 trailing_comments: vec![],
3121 }));
3122 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3123 left: paren_inner,
3124 right: Expression::Literal(Literal::Number("1".to_string())),
3125 left_comments: vec![],
3126 operator_comments: vec![],
3127 trailing_comments: vec![],
3128 inferred_type: None,
3129 }));
3130
3131 let array_gen_range = Expression::Function(Box::new(Function::new(
3132 "ARRAY_GENERATE_RANGE".to_string(),
3133 vec![
3134 Expression::Literal(Literal::Number("0".to_string())),
3135 outer_plus_one,
3136 ],
3137 )));
3138
3139 // TABLE(FLATTEN(INPUT => ...))
3140 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3141 name: Identifier::new("INPUT"),
3142 value: array_gen_range,
3143 separator: crate::expressions::NamedArgSeparator::DArrow,
3144 }));
3145 let flatten = Expression::Function(Box::new(Function::new(
3146 "FLATTEN".to_string(),
3147 vec![flatten_input],
3148 )));
3149
3150 // Determine alias name for the table: use outer alias or _t0
3151 let table_alias_name = outer_alias_info
3152 .as_ref()
3153 .map(|(name, _)| name.clone())
3154 .unwrap_or_else(|| "_t0".to_string());
3155
3156 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3157 let table_func =
3158 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3159 let flatten_aliased = Expression::Alias(Box::new(Alias {
3160 this: table_func,
3161 alias: Identifier::new(&table_alias_name),
3162 column_aliases: vec![
3163 Identifier::new("seq"),
3164 Identifier::new("key"),
3165 Identifier::new("path"),
3166 Identifier::new("index"),
3167 Identifier::new(&col_name),
3168 Identifier::new("this"),
3169 ],
3170 pre_alias_comments: vec![],
3171 trailing_comments: vec![],
3172 inferred_type: None,
3173 }));
3174
3175 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3176 let dateadd_expr = Expression::Function(Box::new(Function::new(
3177 "DATEADD".to_string(),
3178 vec![
3179 Expression::Column(Column {
3180 name: Identifier::new(&unit_str),
3181 table: None,
3182 join_mark: false,
3183 trailing_comments: vec![],
3184 span: None,
3185 inferred_type: None,
3186 }),
3187 Expression::Cast(Box::new(Cast {
3188 this: Expression::Column(Column {
3189 name: Identifier::new(&col_name),
3190 table: None,
3191 join_mark: false,
3192 trailing_comments: vec![],
3193 span: None,
3194 inferred_type: None,
3195 }),
3196 to: DataType::Int {
3197 length: None,
3198 integer_spelling: false,
3199 },
3200 trailing_comments: vec![],
3201 double_colon_syntax: false,
3202 format: None,
3203 default: None,
3204 inferred_type: None,
3205 })),
3206 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3207 start_expr.clone(),
3208 ],
3209 )));
3210 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3211 this: dateadd_expr,
3212 alias: Identifier::new(&col_name),
3213 column_aliases: vec![],
3214 pre_alias_comments: vec![],
3215 trailing_comments: vec![],
3216 inferred_type: None,
3217 }));
3218
3219 // Build inner SELECT
3220 let mut inner_select = Select::new();
3221 inner_select.expressions = vec![dateadd_aliased];
3222 inner_select.from = Some(From {
3223 expressions: vec![flatten_aliased],
3224 });
3225
3226 let inner_select_expr = Expression::Select(Box::new(inner_select));
3227 let subquery = Expression::Subquery(Box::new(Subquery {
3228 this: inner_select_expr,
3229 alias: None,
3230 column_aliases: vec![],
3231 order_by: None,
3232 limit: None,
3233 offset: None,
3234 distribute_by: None,
3235 sort_by: None,
3236 cluster_by: None,
3237 lateral: false,
3238 modifiers_inside: false,
3239 trailing_comments: vec![],
3240 inferred_type: None,
3241 }));
3242
3243 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3244 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3245 Expression::Alias(Box::new(Alias {
3246 this: subquery,
3247 alias: Identifier::new(&alias_name),
3248 column_aliases: col_aliases,
3249 pre_alias_comments: vec![],
3250 trailing_comments: vec![],
3251 inferred_type: None,
3252 }))
3253 } else {
3254 subquery
3255 };
3256
3257 // Replace the FROM expression
3258 if let Some(ref mut from) = sel.from {
3259 from.expressions[from_idx] = replacement;
3260 }
3261
3262 Ok(Expression::Select(sel))
3263 }
3264
3265 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3266 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3267 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3268 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3269 use crate::expressions::*;
3270
3271 let start_expr = f.args[0].clone();
3272 let end_expr = f.args[1].clone();
3273 let step = f.args.get(2).cloned();
3274 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3275 let col_name = "value";
3276
3277 // Build the inner subquery: same as try_transform_from_gda_snowflake
3278 let datediff = Expression::Function(Box::new(Function::new(
3279 "DATEDIFF".to_string(),
3280 vec![
3281 Expression::Column(Column {
3282 name: Identifier::new(&unit_str),
3283 table: None,
3284 join_mark: false,
3285 trailing_comments: vec![],
3286 span: None,
3287 inferred_type: None,
3288 }),
3289 start_expr.clone(),
3290 end_expr.clone(),
3291 ],
3292 )));
3293 let plus_one = Expression::Add(Box::new(BinaryOp {
3294 left: datediff,
3295 right: Expression::Literal(Literal::Number("1".to_string())),
3296 left_comments: vec![],
3297 operator_comments: vec![],
3298 trailing_comments: vec![],
3299 inferred_type: None,
3300 }));
3301 let minus_one = Expression::Sub(Box::new(BinaryOp {
3302 left: plus_one,
3303 right: Expression::Literal(Literal::Number("1".to_string())),
3304 left_comments: vec![],
3305 operator_comments: vec![],
3306 trailing_comments: vec![],
3307 inferred_type: None,
3308 }));
3309 let paren_inner = Expression::Paren(Box::new(Paren {
3310 this: minus_one,
3311 trailing_comments: vec![],
3312 }));
3313 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3314 left: paren_inner,
3315 right: Expression::Literal(Literal::Number("1".to_string())),
3316 left_comments: vec![],
3317 operator_comments: vec![],
3318 trailing_comments: vec![],
3319 inferred_type: None,
3320 }));
3321
3322 let array_gen_range = Expression::Function(Box::new(Function::new(
3323 "ARRAY_GENERATE_RANGE".to_string(),
3324 vec![
3325 Expression::Literal(Literal::Number("0".to_string())),
3326 outer_plus_one,
3327 ],
3328 )));
3329
3330 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3331 name: Identifier::new("INPUT"),
3332 value: array_gen_range,
3333 separator: crate::expressions::NamedArgSeparator::DArrow,
3334 }));
3335 let flatten = Expression::Function(Box::new(Function::new(
3336 "FLATTEN".to_string(),
3337 vec![flatten_input],
3338 )));
3339
3340 let table_func =
3341 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3342 let flatten_aliased = Expression::Alias(Box::new(Alias {
3343 this: table_func,
3344 alias: Identifier::new("_t0"),
3345 column_aliases: vec![
3346 Identifier::new("seq"),
3347 Identifier::new("key"),
3348 Identifier::new("path"),
3349 Identifier::new("index"),
3350 Identifier::new(col_name),
3351 Identifier::new("this"),
3352 ],
3353 pre_alias_comments: vec![],
3354 trailing_comments: vec![],
3355 inferred_type: None,
3356 }));
3357
3358 let dateadd_expr = Expression::Function(Box::new(Function::new(
3359 "DATEADD".to_string(),
3360 vec![
3361 Expression::Column(Column {
3362 name: Identifier::new(&unit_str),
3363 table: None,
3364 join_mark: false,
3365 trailing_comments: vec![],
3366 span: None,
3367 inferred_type: None,
3368 }),
3369 Expression::Cast(Box::new(Cast {
3370 this: Expression::Column(Column {
3371 name: Identifier::new(col_name),
3372 table: None,
3373 join_mark: false,
3374 trailing_comments: vec![],
3375 span: None,
3376 inferred_type: None,
3377 }),
3378 to: DataType::Int {
3379 length: None,
3380 integer_spelling: false,
3381 },
3382 trailing_comments: vec![],
3383 double_colon_syntax: false,
3384 format: None,
3385 default: None,
3386 inferred_type: None,
3387 })),
3388 start_expr.clone(),
3389 ],
3390 )));
3391 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3392 this: dateadd_expr,
3393 alias: Identifier::new(col_name),
3394 column_aliases: vec![],
3395 pre_alias_comments: vec![],
3396 trailing_comments: vec![],
3397 inferred_type: None,
3398 }));
3399
3400 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3401 let mut inner_select = Select::new();
3402 inner_select.expressions = vec![dateadd_aliased];
3403 inner_select.from = Some(From {
3404 expressions: vec![flatten_aliased],
3405 });
3406
3407 // Wrap in subquery for the inner part
3408 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3409 this: Expression::Select(Box::new(inner_select)),
3410 alias: None,
3411 column_aliases: vec![],
3412 order_by: None,
3413 limit: None,
3414 offset: None,
3415 distribute_by: None,
3416 sort_by: None,
3417 cluster_by: None,
3418 lateral: false,
3419 modifiers_inside: false,
3420 trailing_comments: vec![],
3421 inferred_type: None,
3422 }));
3423
3424 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3425 let star = Expression::Star(Star {
3426 table: None,
3427 except: None,
3428 replace: None,
3429 rename: None,
3430 trailing_comments: vec![],
3431 span: None,
3432 });
3433 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3434 this: star,
3435 distinct: false,
3436 filter: None,
3437 order_by: vec![],
3438 name: Some("ARRAY_AGG".to_string()),
3439 ignore_nulls: None,
3440 having_max: None,
3441 limit: None,
3442 inferred_type: None,
3443 }));
3444
3445 let mut outer_select = Select::new();
3446 outer_select.expressions = vec![array_agg];
3447 outer_select.from = Some(From {
3448 expressions: vec![inner_subquery],
3449 });
3450
3451 // Wrap in a subquery
3452 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3453 this: Expression::Select(Box::new(outer_select)),
3454 alias: None,
3455 column_aliases: vec![],
3456 order_by: None,
3457 limit: None,
3458 offset: None,
3459 distribute_by: None,
3460 sort_by: None,
3461 cluster_by: None,
3462 lateral: false,
3463 modifiers_inside: false,
3464 trailing_comments: vec![],
3465 inferred_type: None,
3466 }));
3467
3468 // ARRAY_SIZE(subquery)
3469 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3470 outer_subquery,
3471 ))))
3472 }
3473
3474 /// Extract interval unit string from an optional step expression.
3475 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3476 use crate::expressions::*;
3477 if let Some(Expression::Interval(ref iv)) = step {
3478 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3479 return Some(format!("{:?}", unit).to_uppercase());
3480 }
3481 if let Some(ref this) = iv.this {
3482 if let Expression::Literal(Literal::String(ref s)) = this {
3483 let parts: Vec<&str> = s.split_whitespace().collect();
3484 if parts.len() == 2 {
3485 return Some(parts[1].to_uppercase());
3486 } else if parts.len() == 1 {
3487 let upper = parts[0].to_uppercase();
3488 if matches!(
3489 upper.as_str(),
3490 "YEAR"
3491 | "QUARTER"
3492 | "MONTH"
3493 | "WEEK"
3494 | "DAY"
3495 | "HOUR"
3496 | "MINUTE"
3497 | "SECOND"
3498 ) {
3499 return Some(upper);
3500 }
3501 }
3502 }
3503 }
3504 }
3505 // Default to DAY if no step or no interval
3506 if step.is_none() {
3507 return Some("DAY".to_string());
3508 }
3509 None
3510 }
3511
3512 fn normalize_snowflake_pretty(mut sql: String) -> String {
3513 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3514 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3515 {
3516 sql = sql.replace(
3517 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3518 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3519 );
3520
3521 sql = sql.replace(
3522 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3523 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3524 );
3525
3526 sql = sql.replace(
3527 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3528 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3529 );
3530 }
3531
3532 sql
3533 }
3534
3535 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3536 /// This handles cases where the same syntax has different semantics across dialects.
3537 fn cross_dialect_normalize(
3538 expr: Expression,
3539 source: DialectType,
3540 target: DialectType,
3541 ) -> Result<Expression> {
3542 use crate::expressions::{
3543 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3544 Function, Identifier, IsNull, Literal, Null, Paren,
3545 };
3546
3547 // Helper to tag which kind of transform to apply
3548 #[derive(Debug)]
3549 enum Action {
3550 None,
3551 GreatestLeastNull,
3552 ArrayGenerateRange,
3553 Div0TypedDivision,
3554 ArrayAggCollectList,
3555 ArrayAggWithinGroupFilter,
3556 ArrayAggFilter,
3557 CastTimestampToDatetime,
3558 DateTruncWrapCast,
3559 ToDateToCast,
3560 ConvertTimezoneToExpr,
3561 SetToVariable,
3562 RegexpReplaceSnowflakeToDuckDB,
3563 BigQueryFunctionNormalize,
3564 BigQuerySafeDivide,
3565 BigQueryCastType,
3566 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3567 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3568 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3569 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3570 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3571 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3572 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3573 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3574 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3575 EpochConvert, // Expression::Epoch -> target-specific epoch function
3576 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3577 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3578 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3579 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3580 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3581 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3582 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3583 TempTableHash, // TSQL #table -> temp table normalization
3584 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3585 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3586 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3587 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3588 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3589 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3590 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3591 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3592 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3593 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3594 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3595 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3596 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3597 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3598 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3599 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
3600 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3601 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3602 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3603 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3604 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3605 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3606 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3607 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3608 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3609 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3610 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3611 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3612 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3613 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3614 DollarParamConvert, // $foo -> @foo for BigQuery
3615 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3616 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3617 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3618 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3619 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3620 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3621 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3622 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3623 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3624 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3625 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3626 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3627 RespectNullsConvert, // RESPECT NULLS window function handling
3628 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3629 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3630 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3631 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3632 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3633 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3634 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3635 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3636 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3637 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3638 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3639 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3640 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3641 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3642 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3643 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3644 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3645 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3646 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3647 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3648 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3649 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3650 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3651 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3652 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3653 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3654 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3655 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3656 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3657 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3658 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3659 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3660 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3661 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3662 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3663 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3664 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3665 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3666 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3667 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3668 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3669 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3670 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3671 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3672 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3673 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3674 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3675 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3676 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3677 ArraySumConvert, // ARRAY_SUM -> target-specific
3678 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3679 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3680 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3681 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3682 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3683 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3684 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3685 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3686 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3687 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3688 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3689 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3690 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3691 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3692 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3693 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3694 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3695 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3696 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3697 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3698 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3699 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3700 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3701 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3702 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3703 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3704 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3705 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3706 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3707 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3708 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3709 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3710 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3711 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3712 }
3713
3714 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3715 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3716 Self::transform_select_into(expr, source, target)
3717 } else {
3718 expr
3719 };
3720
3721 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3722 let expr = if !matches!(
3723 target,
3724 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3725 ) {
3726 if let Expression::Select(mut select) = expr {
3727 if let Some(ref mut offset) = select.offset {
3728 offset.rows = None;
3729 }
3730 Expression::Select(select)
3731 } else {
3732 expr
3733 }
3734 } else {
3735 expr
3736 };
3737
3738 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3739 let expr = if matches!(target, DialectType::Oracle) {
3740 if let Expression::Select(mut select) = expr {
3741 if let Some(limit) = select.limit.take() {
3742 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3743 select.fetch = Some(crate::expressions::Fetch {
3744 direction: "FIRST".to_string(),
3745 count: Some(limit.this),
3746 percent: false,
3747 rows: true,
3748 with_ties: false,
3749 });
3750 }
3751 // Add ROWS to OFFSET if present
3752 if let Some(ref mut offset) = select.offset {
3753 offset.rows = Some(true);
3754 }
3755 Expression::Select(select)
3756 } else {
3757 expr
3758 }
3759 } else {
3760 expr
3761 };
3762
3763 // Handle CreateTable WITH properties transformation before recursive transforms
3764 let expr = if let Expression::CreateTable(mut ct) = expr {
3765 Self::transform_create_table_properties(&mut ct, source, target);
3766
3767 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3768 // When the PARTITIONED BY clause contains column definitions, merge them into the
3769 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3770 if matches!(
3771 source,
3772 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3773 ) {
3774 let mut partition_col_names: Vec<String> = Vec::new();
3775 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3776 let mut has_col_def_partitions = false;
3777
3778 // Check if any PARTITIONED BY property contains ColumnDef expressions
3779 for prop in &ct.properties {
3780 if let Expression::PartitionedByProperty(ref pbp) = prop {
3781 if let Expression::Tuple(ref tuple) = *pbp.this {
3782 for expr in &tuple.expressions {
3783 if let Expression::ColumnDef(ref cd) = expr {
3784 has_col_def_partitions = true;
3785 partition_col_names.push(cd.name.name.clone());
3786 partition_col_defs.push(*cd.clone());
3787 }
3788 }
3789 }
3790 }
3791 }
3792
3793 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3794 // Merge partition columns into main column list
3795 for cd in partition_col_defs {
3796 ct.columns.push(cd);
3797 }
3798
3799 // Replace PARTITIONED BY property with column-name-only version
3800 ct.properties
3801 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3802
3803 if matches!(
3804 target,
3805 DialectType::Presto | DialectType::Trino | DialectType::Athena
3806 ) {
3807 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3808 let array_elements: Vec<String> = partition_col_names
3809 .iter()
3810 .map(|n| format!("'{}'", n))
3811 .collect();
3812 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3813 ct.with_properties
3814 .push(("PARTITIONED_BY".to_string(), array_value));
3815 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3816 // Spark: PARTITIONED BY (y, z) - just column names
3817 let name_exprs: Vec<Expression> = partition_col_names
3818 .iter()
3819 .map(|n| {
3820 Expression::Column(crate::expressions::Column {
3821 name: crate::expressions::Identifier::new(n.clone()),
3822 table: None,
3823 join_mark: false,
3824 trailing_comments: Vec::new(),
3825 span: None,
3826 inferred_type: None,
3827 })
3828 })
3829 .collect();
3830 ct.properties.insert(
3831 0,
3832 Expression::PartitionedByProperty(Box::new(
3833 crate::expressions::PartitionedByProperty {
3834 this: Box::new(Expression::Tuple(Box::new(
3835 crate::expressions::Tuple {
3836 expressions: name_exprs,
3837 },
3838 ))),
3839 },
3840 )),
3841 );
3842 }
3843 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3844 }
3845
3846 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3847 // are handled by transform_create_table_properties which runs first
3848 }
3849
3850 // Strip LOCATION property for Presto/Trino (not supported)
3851 if matches!(
3852 target,
3853 DialectType::Presto | DialectType::Trino | DialectType::Athena
3854 ) {
3855 ct.properties
3856 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3857 }
3858
3859 // Strip table-level constraints for Spark/Hive/Databricks
3860 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3861 if matches!(
3862 target,
3863 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3864 ) {
3865 ct.constraints.retain(|c| {
3866 matches!(
3867 c,
3868 crate::expressions::TableConstraint::PrimaryKey { .. }
3869 | crate::expressions::TableConstraint::Like { .. }
3870 )
3871 });
3872 for constraint in &mut ct.constraints {
3873 if let crate::expressions::TableConstraint::PrimaryKey {
3874 columns,
3875 modifiers,
3876 ..
3877 } = constraint
3878 {
3879 // Strip ASC/DESC from column names
3880 for col in columns.iter_mut() {
3881 if col.name.ends_with(" ASC") {
3882 col.name = col.name[..col.name.len() - 4].to_string();
3883 } else if col.name.ends_with(" DESC") {
3884 col.name = col.name[..col.name.len() - 5].to_string();
3885 }
3886 }
3887 // Strip TSQL-specific modifiers
3888 modifiers.clustered = None;
3889 modifiers.with_options.clear();
3890 modifiers.on_filegroup = None;
3891 }
3892 }
3893 }
3894
3895 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3896 if matches!(target, DialectType::Databricks) {
3897 for col in &mut ct.columns {
3898 if col.auto_increment {
3899 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3900 col.data_type = crate::expressions::DataType::BigInt { length: None };
3901 }
3902 }
3903 }
3904 }
3905
3906 // Spark/Databricks: INTEGER -> INT in column definitions
3907 // Python sqlglot always outputs INT for Spark/Databricks
3908 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3909 for col in &mut ct.columns {
3910 if let crate::expressions::DataType::Int {
3911 integer_spelling, ..
3912 } = &mut col.data_type
3913 {
3914 *integer_spelling = false;
3915 }
3916 }
3917 }
3918
3919 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3920 if matches!(target, DialectType::Hive | DialectType::Spark) {
3921 for col in &mut ct.columns {
3922 // If nullable is explicitly true (NULL), change to None (omit it)
3923 if col.nullable == Some(true) {
3924 col.nullable = None;
3925 }
3926 // Also remove from constraints if stored there
3927 col.constraints
3928 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3929 }
3930 }
3931
3932 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3933 if ct.on_property.is_some()
3934 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3935 {
3936 ct.on_property = None;
3937 }
3938
3939 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3940 // Snowflake doesn't support typed arrays in DDL
3941 if matches!(target, DialectType::Snowflake) {
3942 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3943 if let crate::expressions::DataType::Array { .. } = dt {
3944 *dt = crate::expressions::DataType::Custom {
3945 name: "ARRAY".to_string(),
3946 };
3947 }
3948 }
3949 for col in &mut ct.columns {
3950 strip_array_type_params(&mut col.data_type);
3951 }
3952 }
3953
3954 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3955 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3956 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3957 if matches!(target, DialectType::PostgreSQL) {
3958 for col in &mut ct.columns {
3959 if col.auto_increment && !col.constraint_order.is_empty() {
3960 use crate::expressions::ConstraintType;
3961 let has_explicit_not_null = col
3962 .constraint_order
3963 .iter()
3964 .any(|ct| *ct == ConstraintType::NotNull);
3965
3966 if has_explicit_not_null {
3967 // Source had explicit NOT NULL - preserve original order
3968 // Just ensure nullable is set
3969 if col.nullable != Some(false) {
3970 col.nullable = Some(false);
3971 }
3972 } else {
3973 // Source didn't have explicit NOT NULL - build order with
3974 // AutoIncrement + NotNull first, then remaining constraints
3975 let mut new_order = Vec::new();
3976 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3977 new_order.push(ConstraintType::AutoIncrement);
3978 new_order.push(ConstraintType::NotNull);
3979 // Add remaining constraints in original order (except AutoIncrement)
3980 for ct_type in &col.constraint_order {
3981 if *ct_type != ConstraintType::AutoIncrement {
3982 new_order.push(ct_type.clone());
3983 }
3984 }
3985 col.constraint_order = new_order;
3986 col.nullable = Some(false);
3987 }
3988 }
3989 }
3990 }
3991
3992 Expression::CreateTable(ct)
3993 } else {
3994 expr
3995 };
3996
3997 // Handle CreateView column stripping for Presto/Trino target
3998 let expr = if let Expression::CreateView(mut cv) = expr {
3999 // Presto/Trino: drop column list when view has a SELECT body
4000 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4001 {
4002 if !matches!(&cv.query, Expression::Null(_)) {
4003 cv.columns.clear();
4004 }
4005 }
4006 Expression::CreateView(cv)
4007 } else {
4008 expr
4009 };
4010
4011 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4012 let expr = if !matches!(
4013 target,
4014 DialectType::Presto | DialectType::Trino | DialectType::Athena
4015 ) {
4016 if let Expression::Select(mut select) = expr {
4017 if let Some(ref mut with) = select.with {
4018 for cte in &mut with.ctes {
4019 if let Expression::Values(ref vals) = cte.this {
4020 // Build: SELECT * FROM (VALUES ...) AS _values
4021 let values_subquery =
4022 Expression::Subquery(Box::new(crate::expressions::Subquery {
4023 this: Expression::Values(vals.clone()),
4024 alias: Some(Identifier::new("_values".to_string())),
4025 column_aliases: Vec::new(),
4026 order_by: None,
4027 limit: None,
4028 offset: None,
4029 distribute_by: None,
4030 sort_by: None,
4031 cluster_by: None,
4032 lateral: false,
4033 modifiers_inside: false,
4034 trailing_comments: Vec::new(),
4035 inferred_type: None,
4036 }));
4037 let mut new_select = crate::expressions::Select::new();
4038 new_select.expressions =
4039 vec![Expression::Star(crate::expressions::Star {
4040 table: None,
4041 except: None,
4042 replace: None,
4043 rename: None,
4044 trailing_comments: Vec::new(),
4045 span: None,
4046 })];
4047 new_select.from = Some(crate::expressions::From {
4048 expressions: vec![values_subquery],
4049 });
4050 cte.this = Expression::Select(Box::new(new_select));
4051 }
4052 }
4053 }
4054 Expression::Select(select)
4055 } else {
4056 expr
4057 }
4058 } else {
4059 expr
4060 };
4061
4062 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4063 let expr = if matches!(target, DialectType::PostgreSQL) {
4064 if let Expression::CreateIndex(mut ci) = expr {
4065 for col in &mut ci.columns {
4066 if col.nulls_first.is_none() {
4067 col.nulls_first = Some(true);
4068 }
4069 }
4070 Expression::CreateIndex(ci)
4071 } else {
4072 expr
4073 }
4074 } else {
4075 expr
4076 };
4077
4078 transform_recursive(expr, &|e| {
4079 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4080 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4081 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4082 if let Expression::Cast(ref c) = e {
4083 // Check if this is a CAST of an array to a struct array type
4084 let is_struct_array_cast =
4085 matches!(&c.to, crate::expressions::DataType::Array { .. });
4086 if is_struct_array_cast {
4087 let has_auto_named_structs = match &c.this {
4088 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4089 if let Expression::Struct(s) = elem {
4090 s.fields.iter().all(|(name, _)| {
4091 name.as_ref().map_or(true, |n| {
4092 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4093 })
4094 })
4095 } else {
4096 false
4097 }
4098 }),
4099 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4100 if let Expression::Struct(s) = elem {
4101 s.fields.iter().all(|(name, _)| {
4102 name.as_ref().map_or(true, |n| {
4103 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4104 })
4105 })
4106 } else {
4107 false
4108 }
4109 }),
4110 _ => false,
4111 };
4112 if has_auto_named_structs {
4113 let convert_struct_to_row = |elem: Expression| -> Expression {
4114 if let Expression::Struct(s) = elem {
4115 let row_args: Vec<Expression> =
4116 s.fields.into_iter().map(|(_, v)| v).collect();
4117 Expression::Function(Box::new(Function::new(
4118 "ROW".to_string(),
4119 row_args,
4120 )))
4121 } else {
4122 elem
4123 }
4124 };
4125 let mut c_clone = c.as_ref().clone();
4126 match &mut c_clone.this {
4127 Expression::Array(arr) => {
4128 arr.expressions = arr
4129 .expressions
4130 .drain(..)
4131 .map(convert_struct_to_row)
4132 .collect();
4133 }
4134 Expression::ArrayFunc(arr) => {
4135 arr.expressions = arr
4136 .expressions
4137 .drain(..)
4138 .map(convert_struct_to_row)
4139 .collect();
4140 }
4141 _ => {}
4142 }
4143 return Ok(Expression::Cast(Box::new(c_clone)));
4144 }
4145 }
4146 }
4147 }
4148
4149 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4150 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4151 if let Expression::Select(ref sel) = e {
4152 if sel.kind.as_deref() == Some("STRUCT") {
4153 let mut fields = Vec::new();
4154 for expr in &sel.expressions {
4155 match expr {
4156 Expression::Alias(a) => {
4157 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4158 }
4159 Expression::Column(c) => {
4160 fields.push((Some(c.name.name.clone()), expr.clone()));
4161 }
4162 _ => {
4163 fields.push((None, expr.clone()));
4164 }
4165 }
4166 }
4167 let struct_lit =
4168 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4169 let mut new_select = sel.as_ref().clone();
4170 new_select.kind = None;
4171 new_select.expressions = vec![struct_lit];
4172 return Ok(Expression::Select(Box::new(new_select)));
4173 }
4174 }
4175 }
4176
4177 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4178 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4179 && matches!(
4180 target,
4181 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4182 )
4183 {
4184 if let Expression::Parameter(ref p) = e {
4185 if p.style == crate::expressions::ParameterStyle::At {
4186 if let Some(ref name) = p.name {
4187 return Ok(Expression::Parameter(Box::new(
4188 crate::expressions::Parameter {
4189 name: Some(name.clone()),
4190 index: p.index,
4191 style: crate::expressions::ParameterStyle::DollarBrace,
4192 quoted: p.quoted,
4193 string_quoted: p.string_quoted,
4194 expression: None,
4195 },
4196 )));
4197 }
4198 }
4199 }
4200 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4201 if let Expression::Column(ref col) = e {
4202 if col.name.name.starts_with('@') && col.table.is_none() {
4203 let var_name = col.name.name.trim_start_matches('@').to_string();
4204 return Ok(Expression::Parameter(Box::new(
4205 crate::expressions::Parameter {
4206 name: Some(var_name),
4207 index: None,
4208 style: crate::expressions::ParameterStyle::DollarBrace,
4209 quoted: false,
4210 string_quoted: false,
4211 expression: None,
4212 },
4213 )));
4214 }
4215 }
4216 }
4217
4218 // Convert @variable -> variable in SET statements for Spark/Databricks
4219 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4220 && matches!(target, DialectType::Spark | DialectType::Databricks)
4221 {
4222 if let Expression::SetStatement(ref s) = e {
4223 let mut new_items = s.items.clone();
4224 let mut changed = false;
4225 for item in &mut new_items {
4226 // Strip @ from the SET name (Parameter style)
4227 if let Expression::Parameter(ref p) = item.name {
4228 if p.style == crate::expressions::ParameterStyle::At {
4229 if let Some(ref name) = p.name {
4230 item.name = Expression::Identifier(Identifier::new(name));
4231 changed = true;
4232 }
4233 }
4234 }
4235 // Strip @ from the SET name (Identifier style - SET parser)
4236 if let Expression::Identifier(ref id) = item.name {
4237 if id.name.starts_with('@') {
4238 let var_name = id.name.trim_start_matches('@').to_string();
4239 item.name = Expression::Identifier(Identifier::new(&var_name));
4240 changed = true;
4241 }
4242 }
4243 // Strip @ from the SET name (Column style - alternative parsing)
4244 if let Expression::Column(ref col) = item.name {
4245 if col.name.name.starts_with('@') && col.table.is_none() {
4246 let var_name = col.name.name.trim_start_matches('@').to_string();
4247 item.name = Expression::Identifier(Identifier::new(&var_name));
4248 changed = true;
4249 }
4250 }
4251 }
4252 if changed {
4253 let mut new_set = (**s).clone();
4254 new_set.items = new_items;
4255 return Ok(Expression::SetStatement(Box::new(new_set)));
4256 }
4257 }
4258 }
4259
4260 // Strip NOLOCK hint for non-TSQL targets
4261 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4262 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4263 {
4264 if let Expression::Table(ref tr) = e {
4265 if !tr.hints.is_empty() {
4266 let mut new_tr = tr.clone();
4267 new_tr.hints.clear();
4268 return Ok(Expression::Table(new_tr));
4269 }
4270 }
4271 }
4272
4273 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4274 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4275 if matches!(target, DialectType::Snowflake) {
4276 if let Expression::IsTrue(ref itf) = e {
4277 if let Expression::Boolean(ref b) = itf.this {
4278 if !itf.not {
4279 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4280 value: b.value,
4281 }));
4282 } else {
4283 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4284 value: !b.value,
4285 }));
4286 }
4287 }
4288 }
4289 if let Expression::IsFalse(ref itf) = e {
4290 if let Expression::Boolean(ref b) = itf.this {
4291 if !itf.not {
4292 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4293 value: !b.value,
4294 }));
4295 } else {
4296 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4297 value: b.value,
4298 }));
4299 }
4300 }
4301 }
4302 }
4303
4304 // BigQuery: split dotted backtick identifiers in table names
4305 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4306 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4307 if let Expression::CreateTable(ref ct) = e {
4308 let mut changed = false;
4309 let mut new_ct = ct.clone();
4310 // Split the table name
4311 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4312 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4313 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4314 let was_quoted = ct.name.name.quoted;
4315 let mk_id = |s: &str| {
4316 if was_quoted {
4317 Identifier::quoted(s)
4318 } else {
4319 Identifier::new(s)
4320 }
4321 };
4322 if parts.len() == 3 {
4323 new_ct.name.catalog = Some(mk_id(parts[0]));
4324 new_ct.name.schema = Some(mk_id(parts[1]));
4325 new_ct.name.name = mk_id(parts[2]);
4326 changed = true;
4327 } else if parts.len() == 2 {
4328 new_ct.name.schema = Some(mk_id(parts[0]));
4329 new_ct.name.name = mk_id(parts[1]);
4330 changed = true;
4331 }
4332 }
4333 // Split the clone source name
4334 if let Some(ref clone_src) = ct.clone_source {
4335 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4336 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4337 let was_quoted = clone_src.name.quoted;
4338 let mk_id = |s: &str| {
4339 if was_quoted {
4340 Identifier::quoted(s)
4341 } else {
4342 Identifier::new(s)
4343 }
4344 };
4345 let mut new_src = clone_src.clone();
4346 if parts.len() == 3 {
4347 new_src.catalog = Some(mk_id(parts[0]));
4348 new_src.schema = Some(mk_id(parts[1]));
4349 new_src.name = mk_id(parts[2]);
4350 new_ct.clone_source = Some(new_src);
4351 changed = true;
4352 } else if parts.len() == 2 {
4353 new_src.schema = Some(mk_id(parts[0]));
4354 new_src.name = mk_id(parts[1]);
4355 new_ct.clone_source = Some(new_src);
4356 changed = true;
4357 }
4358 }
4359 }
4360 if changed {
4361 return Ok(Expression::CreateTable(new_ct));
4362 }
4363 }
4364 }
4365
4366 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4367 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4368 if matches!(source, DialectType::BigQuery)
4369 && matches!(
4370 target,
4371 DialectType::DuckDB
4372 | DialectType::Presto
4373 | DialectType::Trino
4374 | DialectType::Athena
4375 )
4376 {
4377 if let Expression::Subscript(ref sub) = e {
4378 let (new_index, is_safe) = match &sub.index {
4379 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4380 Expression::Literal(Literal::Number(n)) => {
4381 if let Ok(val) = n.parse::<i64>() {
4382 (
4383 Some(Expression::Literal(Literal::Number(
4384 (val + 1).to_string(),
4385 ))),
4386 false,
4387 )
4388 } else {
4389 (None, false)
4390 }
4391 }
4392 // OFFSET(n) -> n+1 (0-based)
4393 Expression::Function(ref f)
4394 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4395 {
4396 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4397 if let Ok(val) = n.parse::<i64>() {
4398 (
4399 Some(Expression::Literal(Literal::Number(
4400 (val + 1).to_string(),
4401 ))),
4402 false,
4403 )
4404 } else {
4405 (
4406 Some(Expression::Add(Box::new(
4407 crate::expressions::BinaryOp::new(
4408 f.args[0].clone(),
4409 Expression::number(1),
4410 ),
4411 ))),
4412 false,
4413 )
4414 }
4415 } else {
4416 (
4417 Some(Expression::Add(Box::new(
4418 crate::expressions::BinaryOp::new(
4419 f.args[0].clone(),
4420 Expression::number(1),
4421 ),
4422 ))),
4423 false,
4424 )
4425 }
4426 }
4427 // ORDINAL(n) -> n (already 1-based)
4428 Expression::Function(ref f)
4429 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4430 {
4431 (Some(f.args[0].clone()), false)
4432 }
4433 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4434 Expression::Function(ref f)
4435 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4436 {
4437 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4438 if let Ok(val) = n.parse::<i64>() {
4439 (
4440 Some(Expression::Literal(Literal::Number(
4441 (val + 1).to_string(),
4442 ))),
4443 true,
4444 )
4445 } else {
4446 (
4447 Some(Expression::Add(Box::new(
4448 crate::expressions::BinaryOp::new(
4449 f.args[0].clone(),
4450 Expression::number(1),
4451 ),
4452 ))),
4453 true,
4454 )
4455 }
4456 } else {
4457 (
4458 Some(Expression::Add(Box::new(
4459 crate::expressions::BinaryOp::new(
4460 f.args[0].clone(),
4461 Expression::number(1),
4462 ),
4463 ))),
4464 true,
4465 )
4466 }
4467 }
4468 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4469 Expression::Function(ref f)
4470 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4471 {
4472 (Some(f.args[0].clone()), true)
4473 }
4474 _ => (None, false),
4475 };
4476 if let Some(idx) = new_index {
4477 if is_safe
4478 && matches!(
4479 target,
4480 DialectType::Presto | DialectType::Trino | DialectType::Athena
4481 )
4482 {
4483 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4484 return Ok(Expression::Function(Box::new(Function::new(
4485 "ELEMENT_AT".to_string(),
4486 vec![sub.this.clone(), idx],
4487 ))));
4488 } else {
4489 // DuckDB or non-safe: just use subscript with converted index
4490 return Ok(Expression::Subscript(Box::new(
4491 crate::expressions::Subscript {
4492 this: sub.this.clone(),
4493 index: idx,
4494 },
4495 )));
4496 }
4497 }
4498 }
4499 }
4500
4501 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4502 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4503 if let Expression::Length(ref uf) = e {
4504 let arg = uf.this.clone();
4505 let typeof_func = Expression::Function(Box::new(Function::new(
4506 "TYPEOF".to_string(),
4507 vec![arg.clone()],
4508 )));
4509 let blob_cast = Expression::Cast(Box::new(Cast {
4510 this: arg.clone(),
4511 to: DataType::VarBinary { length: None },
4512 trailing_comments: vec![],
4513 double_colon_syntax: false,
4514 format: None,
4515 default: None,
4516 inferred_type: None,
4517 }));
4518 let octet_length = Expression::Function(Box::new(Function::new(
4519 "OCTET_LENGTH".to_string(),
4520 vec![blob_cast],
4521 )));
4522 let text_cast = Expression::Cast(Box::new(Cast {
4523 this: arg,
4524 to: DataType::Text,
4525 trailing_comments: vec![],
4526 double_colon_syntax: false,
4527 format: None,
4528 default: None,
4529 inferred_type: None,
4530 }));
4531 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4532 this: text_cast,
4533 original_name: None,
4534 inferred_type: None,
4535 }));
4536 return Ok(Expression::Case(Box::new(Case {
4537 operand: Some(typeof_func),
4538 whens: vec![(
4539 Expression::Literal(Literal::String("BLOB".to_string())),
4540 octet_length,
4541 )],
4542 else_: Some(length_text),
4543 comments: Vec::new(),
4544 inferred_type: None,
4545 })));
4546 }
4547 }
4548
4549 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4550 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4551 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4552 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4553 if let Expression::Alias(ref a) = e {
4554 if matches!(&a.this, Expression::Unnest(_)) {
4555 if a.column_aliases.is_empty() {
4556 // Drop the entire alias, return just the UNNEST expression
4557 return Ok(a.this.clone());
4558 } else {
4559 // Use first column alias as the main alias
4560 let mut new_alias = a.as_ref().clone();
4561 new_alias.alias = a.column_aliases[0].clone();
4562 new_alias.column_aliases.clear();
4563 return Ok(Expression::Alias(Box::new(new_alias)));
4564 }
4565 }
4566 }
4567 }
4568
4569 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4570 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4571 if let Expression::In(ref in_expr) = e {
4572 if let Some(ref unnest_inner) = in_expr.unnest {
4573 // Build the function call for the target dialect
4574 let func_expr = if matches!(
4575 target,
4576 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4577 ) {
4578 // Use EXPLODE for Hive/Spark
4579 Expression::Function(Box::new(Function::new(
4580 "EXPLODE".to_string(),
4581 vec![*unnest_inner.clone()],
4582 )))
4583 } else {
4584 // Use UNNEST for Presto/Trino/DuckDB/etc.
4585 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4586 this: *unnest_inner.clone(),
4587 expressions: Vec::new(),
4588 with_ordinality: false,
4589 alias: None,
4590 offset_alias: None,
4591 }))
4592 };
4593
4594 // Wrap in SELECT
4595 let mut inner_select = crate::expressions::Select::new();
4596 inner_select.expressions = vec![func_expr];
4597
4598 let subquery_expr = Expression::Select(Box::new(inner_select));
4599
4600 return Ok(Expression::In(Box::new(crate::expressions::In {
4601 this: in_expr.this.clone(),
4602 expressions: Vec::new(),
4603 query: Some(subquery_expr),
4604 not: in_expr.not,
4605 global: in_expr.global,
4606 unnest: None,
4607 is_field: false,
4608 })));
4609 }
4610 }
4611 }
4612
4613 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4614 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4615 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4616 if let Expression::Alias(ref a) = e {
4617 if let Expression::Function(ref f) = a.this {
4618 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4619 && !a.column_aliases.is_empty()
4620 {
4621 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4622 let col_alias = a.column_aliases[0].clone();
4623 let mut inner_select = crate::expressions::Select::new();
4624 inner_select.expressions =
4625 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4626 Expression::Identifier(Identifier::new("value".to_string())),
4627 col_alias,
4628 )))];
4629 inner_select.from = Some(crate::expressions::From {
4630 expressions: vec![a.this.clone()],
4631 });
4632 let subquery =
4633 Expression::Subquery(Box::new(crate::expressions::Subquery {
4634 this: Expression::Select(Box::new(inner_select)),
4635 alias: Some(a.alias.clone()),
4636 column_aliases: Vec::new(),
4637 order_by: None,
4638 limit: None,
4639 offset: None,
4640 lateral: false,
4641 modifiers_inside: false,
4642 trailing_comments: Vec::new(),
4643 distribute_by: None,
4644 sort_by: None,
4645 cluster_by: None,
4646 inferred_type: None,
4647 }));
4648 return Ok(subquery);
4649 }
4650 }
4651 }
4652 }
4653
4654 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4655 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4656 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4657 if matches!(source, DialectType::BigQuery) {
4658 if let Expression::Select(ref s) = e {
4659 if let Some(ref from) = s.from {
4660 if from.expressions.len() >= 2 {
4661 // Collect table names from first expression
4662 let first_tables: Vec<String> = from
4663 .expressions
4664 .iter()
4665 .take(1)
4666 .filter_map(|expr| {
4667 if let Expression::Table(t) = expr {
4668 Some(t.name.name.to_lowercase())
4669 } else {
4670 None
4671 }
4672 })
4673 .collect();
4674
4675 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4676 // or have a dotted name matching a table
4677 let mut needs_rewrite = false;
4678 for expr in from.expressions.iter().skip(1) {
4679 if let Expression::Table(t) = expr {
4680 if let Some(ref schema) = t.schema {
4681 if first_tables.contains(&schema.name.to_lowercase()) {
4682 needs_rewrite = true;
4683 break;
4684 }
4685 }
4686 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4687 if t.schema.is_none() && t.name.name.contains('.') {
4688 let parts: Vec<&str> = t.name.name.split('.').collect();
4689 if parts.len() >= 2
4690 && first_tables.contains(&parts[0].to_lowercase())
4691 {
4692 needs_rewrite = true;
4693 break;
4694 }
4695 }
4696 }
4697 }
4698
4699 if needs_rewrite {
4700 let mut new_select = s.clone();
4701 let mut new_from_exprs = vec![from.expressions[0].clone()];
4702 let mut new_joins = s.joins.clone();
4703
4704 for expr in from.expressions.iter().skip(1) {
4705 if let Expression::Table(ref t) = expr {
4706 if let Some(ref schema) = t.schema {
4707 if first_tables.contains(&schema.name.to_lowercase()) {
4708 // This is an array path reference, convert to CROSS JOIN UNNEST
4709 let col_expr = Expression::Column(
4710 crate::expressions::Column {
4711 name: t.name.clone(),
4712 table: Some(schema.clone()),
4713 join_mark: false,
4714 trailing_comments: vec![],
4715 span: None,
4716 inferred_type: None,
4717 },
4718 );
4719 let unnest_expr = Expression::Unnest(Box::new(
4720 crate::expressions::UnnestFunc {
4721 this: col_expr,
4722 expressions: Vec::new(),
4723 with_ordinality: false,
4724 alias: None,
4725 offset_alias: None,
4726 },
4727 ));
4728 let join_this = if let Some(ref alias) = t.alias {
4729 if matches!(
4730 target,
4731 DialectType::Presto
4732 | DialectType::Trino
4733 | DialectType::Athena
4734 ) {
4735 // Presto: UNNEST(x) AS _t0(results)
4736 Expression::Alias(Box::new(
4737 crate::expressions::Alias {
4738 this: unnest_expr,
4739 alias: Identifier::new("_t0"),
4740 column_aliases: vec![alias.clone()],
4741 pre_alias_comments: vec![],
4742 trailing_comments: vec![],
4743 inferred_type: None,
4744 },
4745 ))
4746 } else {
4747 // BigQuery: UNNEST(x) AS results
4748 Expression::Alias(Box::new(
4749 crate::expressions::Alias {
4750 this: unnest_expr,
4751 alias: alias.clone(),
4752 column_aliases: vec![],
4753 pre_alias_comments: vec![],
4754 trailing_comments: vec![],
4755 inferred_type: None,
4756 },
4757 ))
4758 }
4759 } else {
4760 unnest_expr
4761 };
4762 new_joins.push(crate::expressions::Join {
4763 kind: crate::expressions::JoinKind::Cross,
4764 this: join_this,
4765 on: None,
4766 using: Vec::new(),
4767 use_inner_keyword: false,
4768 use_outer_keyword: false,
4769 deferred_condition: false,
4770 join_hint: None,
4771 match_condition: None,
4772 pivots: Vec::new(),
4773 comments: Vec::new(),
4774 nesting_group: 0,
4775 directed: false,
4776 });
4777 } else {
4778 new_from_exprs.push(expr.clone());
4779 }
4780 } else if t.schema.is_none() && t.name.name.contains('.') {
4781 // Dotted name in quoted identifier: `Coordinates.position`
4782 let parts: Vec<&str> = t.name.name.split('.').collect();
4783 if parts.len() >= 2
4784 && first_tables.contains(&parts[0].to_lowercase())
4785 {
4786 let join_this =
4787 if matches!(target, DialectType::BigQuery) {
4788 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4789 Expression::Table(t.clone())
4790 } else {
4791 // Other targets: split into "schema"."name"
4792 let mut new_t = t.clone();
4793 new_t.schema =
4794 Some(Identifier::quoted(parts[0]));
4795 new_t.name = Identifier::quoted(parts[1]);
4796 Expression::Table(new_t)
4797 };
4798 new_joins.push(crate::expressions::Join {
4799 kind: crate::expressions::JoinKind::Cross,
4800 this: join_this,
4801 on: None,
4802 using: Vec::new(),
4803 use_inner_keyword: false,
4804 use_outer_keyword: false,
4805 deferred_condition: false,
4806 join_hint: None,
4807 match_condition: None,
4808 pivots: Vec::new(),
4809 comments: Vec::new(),
4810 nesting_group: 0,
4811 directed: false,
4812 });
4813 } else {
4814 new_from_exprs.push(expr.clone());
4815 }
4816 } else {
4817 new_from_exprs.push(expr.clone());
4818 }
4819 } else {
4820 new_from_exprs.push(expr.clone());
4821 }
4822 }
4823
4824 new_select.from = Some(crate::expressions::From {
4825 expressions: new_from_exprs,
4826 ..from.clone()
4827 });
4828 new_select.joins = new_joins;
4829 return Ok(Expression::Select(new_select));
4830 }
4831 }
4832 }
4833 }
4834 }
4835
4836 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4837 if matches!(
4838 target,
4839 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4840 ) {
4841 if let Expression::Select(ref s) = e {
4842 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4843 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4844 matches!(expr, Expression::Unnest(_))
4845 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4846 };
4847 let has_unnest_join = s.joins.iter().any(|j| {
4848 j.kind == crate::expressions::JoinKind::Cross && (
4849 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4850 || is_unnest_or_explode_expr(&j.this)
4851 )
4852 });
4853 if has_unnest_join {
4854 let mut select = s.clone();
4855 let mut new_joins = Vec::new();
4856 for join in select.joins.drain(..) {
4857 if join.kind == crate::expressions::JoinKind::Cross {
4858 // Extract the UNNEST/EXPLODE from the join
4859 let (func_expr, table_alias, col_aliases) = match &join.this {
4860 Expression::Alias(a) => {
4861 let ta = if a.alias.is_empty() {
4862 None
4863 } else {
4864 Some(a.alias.clone())
4865 };
4866 let cas = a.column_aliases.clone();
4867 match &a.this {
4868 Expression::Unnest(u) => {
4869 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4870 if !u.expressions.is_empty() {
4871 let mut all_args = vec![u.this.clone()];
4872 all_args.extend(u.expressions.clone());
4873 let arrays_zip =
4874 Expression::Function(Box::new(
4875 crate::expressions::Function::new(
4876 "ARRAYS_ZIP".to_string(),
4877 all_args,
4878 ),
4879 ));
4880 let inline = Expression::Function(Box::new(
4881 crate::expressions::Function::new(
4882 "INLINE".to_string(),
4883 vec![arrays_zip],
4884 ),
4885 ));
4886 (Some(inline), ta, a.column_aliases.clone())
4887 } else {
4888 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4889 let func_name = if u.with_ordinality {
4890 "POSEXPLODE"
4891 } else {
4892 "EXPLODE"
4893 };
4894 let explode = Expression::Function(Box::new(
4895 crate::expressions::Function::new(
4896 func_name.to_string(),
4897 vec![u.this.clone()],
4898 ),
4899 ));
4900 // For POSEXPLODE, add 'pos' to column aliases
4901 let cas = if u.with_ordinality {
4902 let mut pos_aliases =
4903 vec![Identifier::new(
4904 "pos".to_string(),
4905 )];
4906 pos_aliases
4907 .extend(a.column_aliases.clone());
4908 pos_aliases
4909 } else {
4910 a.column_aliases.clone()
4911 };
4912 (Some(explode), ta, cas)
4913 }
4914 }
4915 Expression::Function(f)
4916 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4917 {
4918 (Some(Expression::Function(f.clone())), ta, cas)
4919 }
4920 _ => (None, None, Vec::new()),
4921 }
4922 }
4923 Expression::Unnest(u) => {
4924 let func_name = if u.with_ordinality {
4925 "POSEXPLODE"
4926 } else {
4927 "EXPLODE"
4928 };
4929 let explode = Expression::Function(Box::new(
4930 crate::expressions::Function::new(
4931 func_name.to_string(),
4932 vec![u.this.clone()],
4933 ),
4934 ));
4935 let ta = u.alias.clone();
4936 let col_aliases = if u.with_ordinality {
4937 vec![Identifier::new("pos".to_string())]
4938 } else {
4939 Vec::new()
4940 };
4941 (Some(explode), ta, col_aliases)
4942 }
4943 _ => (None, None, Vec::new()),
4944 };
4945 if let Some(func) = func_expr {
4946 select.lateral_views.push(crate::expressions::LateralView {
4947 this: func,
4948 table_alias,
4949 column_aliases: col_aliases,
4950 outer: false,
4951 });
4952 } else {
4953 new_joins.push(join);
4954 }
4955 } else {
4956 new_joins.push(join);
4957 }
4958 }
4959 select.joins = new_joins;
4960 return Ok(Expression::Select(select));
4961 }
4962 }
4963 }
4964
4965 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4966 // for BigQuery, Presto/Trino, Snowflake
4967 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4968 && matches!(
4969 target,
4970 DialectType::BigQuery
4971 | DialectType::Presto
4972 | DialectType::Trino
4973 | DialectType::Snowflake
4974 )
4975 {
4976 if let Expression::Select(ref s) = e {
4977 // Check if any SELECT expressions contain UNNEST
4978 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4979 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4980 fn contains_unnest(e: &Expression) -> bool {
4981 match e {
4982 Expression::Unnest(_) => true,
4983 Expression::Function(f)
4984 if f.name.eq_ignore_ascii_case("UNNEST") =>
4985 {
4986 true
4987 }
4988 Expression::Alias(a) => contains_unnest(&a.this),
4989 Expression::Add(op)
4990 | Expression::Sub(op)
4991 | Expression::Mul(op)
4992 | Expression::Div(op) => {
4993 contains_unnest(&op.left) || contains_unnest(&op.right)
4994 }
4995 _ => false,
4996 }
4997 }
4998 contains_unnest(expr)
4999 });
5000
5001 if has_unnest_in_select {
5002 let rewritten = Self::rewrite_unnest_expansion(s, target);
5003 if let Some(new_select) = rewritten {
5004 return Ok(Expression::Select(Box::new(new_select)));
5005 }
5006 }
5007 }
5008 }
5009
5010 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5011 // BigQuery '\n' -> PostgreSQL literal newline in string
5012 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5013 {
5014 if let Expression::Literal(Literal::String(ref s)) = e {
5015 if s.contains("\\n")
5016 || s.contains("\\t")
5017 || s.contains("\\r")
5018 || s.contains("\\\\")
5019 {
5020 let converted = s
5021 .replace("\\n", "\n")
5022 .replace("\\t", "\t")
5023 .replace("\\r", "\r")
5024 .replace("\\\\", "\\");
5025 return Ok(Expression::Literal(Literal::String(converted)));
5026 }
5027 }
5028 }
5029
5030 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5031 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5032 if source != target {
5033 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
5034 let s = s.clone();
5035 // MySQL: TIMESTAMP handling depends on source dialect
5036 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5037 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5038 if matches!(target, DialectType::MySQL) {
5039 if matches!(source, DialectType::BigQuery) {
5040 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5041 return Ok(Expression::Function(Box::new(Function::new(
5042 "TIMESTAMP".to_string(),
5043 vec![Expression::Literal(Literal::String(s))],
5044 ))));
5045 } else {
5046 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5047 return Ok(Expression::Cast(Box::new(Cast {
5048 this: Expression::Literal(Literal::String(s)),
5049 to: DataType::Custom {
5050 name: "DATETIME".to_string(),
5051 },
5052 trailing_comments: Vec::new(),
5053 double_colon_syntax: false,
5054 format: None,
5055 default: None,
5056 inferred_type: None,
5057 })));
5058 }
5059 }
5060 let dt = match target {
5061 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5062 name: "DATETIME".to_string(),
5063 },
5064 DialectType::Snowflake => {
5065 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5066 if matches!(source, DialectType::BigQuery) {
5067 DataType::Custom {
5068 name: "TIMESTAMPTZ".to_string(),
5069 }
5070 } else if matches!(
5071 source,
5072 DialectType::PostgreSQL
5073 | DialectType::Redshift
5074 | DialectType::Snowflake
5075 ) {
5076 DataType::Timestamp {
5077 precision: None,
5078 timezone: false,
5079 }
5080 } else {
5081 DataType::Custom {
5082 name: "TIMESTAMPNTZ".to_string(),
5083 }
5084 }
5085 }
5086 DialectType::Spark | DialectType::Databricks => {
5087 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5088 if matches!(source, DialectType::BigQuery) {
5089 DataType::Timestamp {
5090 precision: None,
5091 timezone: false,
5092 }
5093 } else {
5094 DataType::Custom {
5095 name: "TIMESTAMP_NTZ".to_string(),
5096 }
5097 }
5098 }
5099 DialectType::ClickHouse => DataType::Nullable {
5100 inner: Box::new(DataType::Custom {
5101 name: "DateTime".to_string(),
5102 }),
5103 },
5104 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5105 name: "DATETIME2".to_string(),
5106 },
5107 DialectType::DuckDB => {
5108 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5109 // or when the timestamp string explicitly has timezone info
5110 if matches!(source, DialectType::BigQuery)
5111 || Self::timestamp_string_has_timezone(&s)
5112 {
5113 DataType::Custom {
5114 name: "TIMESTAMPTZ".to_string(),
5115 }
5116 } else {
5117 DataType::Timestamp {
5118 precision: None,
5119 timezone: false,
5120 }
5121 }
5122 }
5123 _ => DataType::Timestamp {
5124 precision: None,
5125 timezone: false,
5126 },
5127 };
5128 return Ok(Expression::Cast(Box::new(Cast {
5129 this: Expression::Literal(Literal::String(s)),
5130 to: dt,
5131 trailing_comments: vec![],
5132 double_colon_syntax: false,
5133 format: None,
5134 default: None,
5135 inferred_type: None,
5136 })));
5137 }
5138 }
5139
5140 // PostgreSQL DELETE requires explicit AS for table aliases
5141 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5142 if let Expression::Delete(ref del) = e {
5143 if del.alias.is_some() && !del.alias_explicit_as {
5144 let mut new_del = del.clone();
5145 new_del.alias_explicit_as = true;
5146 return Ok(Expression::Delete(new_del));
5147 }
5148 }
5149 }
5150
5151 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5152 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5153 // while others don't support it (Presto, Spark, DuckDB, etc.)
5154 {
5155 let needs_distinct =
5156 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5157 let drop_distinct = matches!(
5158 target,
5159 DialectType::Presto
5160 | DialectType::Trino
5161 | DialectType::Athena
5162 | DialectType::Spark
5163 | DialectType::Databricks
5164 | DialectType::DuckDB
5165 | DialectType::Hive
5166 | DialectType::MySQL
5167 | DialectType::PostgreSQL
5168 | DialectType::SQLite
5169 | DialectType::TSQL
5170 | DialectType::Redshift
5171 | DialectType::Snowflake
5172 | DialectType::Oracle
5173 | DialectType::Teradata
5174 | DialectType::Drill
5175 | DialectType::Doris
5176 | DialectType::StarRocks
5177 );
5178 match &e {
5179 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5180 let mut new_u = (**u).clone();
5181 new_u.distinct = true;
5182 return Ok(Expression::Union(Box::new(new_u)));
5183 }
5184 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5185 let mut new_i = (**i).clone();
5186 new_i.distinct = true;
5187 return Ok(Expression::Intersect(Box::new(new_i)));
5188 }
5189 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5190 let mut new_ex = (**ex).clone();
5191 new_ex.distinct = true;
5192 return Ok(Expression::Except(Box::new(new_ex)));
5193 }
5194 Expression::Union(u) if u.distinct && drop_distinct => {
5195 let mut new_u = (**u).clone();
5196 new_u.distinct = false;
5197 return Ok(Expression::Union(Box::new(new_u)));
5198 }
5199 Expression::Intersect(i) if i.distinct && drop_distinct => {
5200 let mut new_i = (**i).clone();
5201 new_i.distinct = false;
5202 return Ok(Expression::Intersect(Box::new(new_i)));
5203 }
5204 Expression::Except(ex) if ex.distinct && drop_distinct => {
5205 let mut new_ex = (**ex).clone();
5206 new_ex.distinct = false;
5207 return Ok(Expression::Except(Box::new(new_ex)));
5208 }
5209 _ => {}
5210 }
5211 }
5212
5213 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5214 if matches!(target, DialectType::ClickHouse) {
5215 if let Expression::Function(ref f) = e {
5216 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5217 let mut new_f = f.as_ref().clone();
5218 new_f.name = "map".to_string();
5219 return Ok(Expression::Function(Box::new(new_f)));
5220 }
5221 }
5222 }
5223
5224 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5225 if matches!(target, DialectType::ClickHouse) {
5226 if let Expression::Intersect(ref i) = e {
5227 if i.all {
5228 let mut new_i = (**i).clone();
5229 new_i.all = false;
5230 return Ok(Expression::Intersect(Box::new(new_i)));
5231 }
5232 }
5233 }
5234
5235 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5236 // Only from Generic source, to prevent double-wrapping
5237 if matches!(source, DialectType::Generic) {
5238 if let Expression::Div(ref op) = e {
5239 let cast_type = match target {
5240 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5241 precision: None,
5242 scale: None,
5243 real_spelling: false,
5244 }),
5245 DialectType::Drill
5246 | DialectType::Trino
5247 | DialectType::Athena
5248 | DialectType::Presto => Some(DataType::Double {
5249 precision: None,
5250 scale: None,
5251 }),
5252 DialectType::PostgreSQL
5253 | DialectType::Redshift
5254 | DialectType::Materialize
5255 | DialectType::Teradata
5256 | DialectType::RisingWave => Some(DataType::Double {
5257 precision: None,
5258 scale: None,
5259 }),
5260 _ => None,
5261 };
5262 if let Some(dt) = cast_type {
5263 let cast_left = Expression::Cast(Box::new(Cast {
5264 this: op.left.clone(),
5265 to: dt,
5266 double_colon_syntax: false,
5267 trailing_comments: Vec::new(),
5268 format: None,
5269 default: None,
5270 inferred_type: None,
5271 }));
5272 let new_op = crate::expressions::BinaryOp {
5273 left: cast_left,
5274 right: op.right.clone(),
5275 left_comments: op.left_comments.clone(),
5276 operator_comments: op.operator_comments.clone(),
5277 trailing_comments: op.trailing_comments.clone(),
5278 inferred_type: None,
5279 };
5280 return Ok(Expression::Div(Box::new(new_op)));
5281 }
5282 }
5283 }
5284
5285 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5286 if matches!(target, DialectType::DuckDB) {
5287 if let Expression::CreateDatabase(db) = e {
5288 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5289 schema.if_not_exists = db.if_not_exists;
5290 return Ok(Expression::CreateSchema(Box::new(schema)));
5291 }
5292 if let Expression::DropDatabase(db) = e {
5293 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5294 schema.if_exists = db.if_exists;
5295 return Ok(Expression::DropSchema(Box::new(schema)));
5296 }
5297 }
5298
5299 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5300 if matches!(source, DialectType::ClickHouse)
5301 && !matches!(target, DialectType::ClickHouse)
5302 {
5303 if let Expression::Cast(ref c) = e {
5304 if let DataType::Custom { ref name } = c.to {
5305 let upper = name.to_uppercase();
5306 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5307 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5308 let inner_upper = inner.to_uppercase();
5309 let new_dt = match inner_upper.as_str() {
5310 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5311 precision: None,
5312 timezone: false,
5313 },
5314 "DATE" => DataType::Date,
5315 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5316 "INT32" | "INT" | "INTEGER" => DataType::Int {
5317 length: None,
5318 integer_spelling: false,
5319 },
5320 "FLOAT64" | "DOUBLE" => DataType::Double {
5321 precision: None,
5322 scale: None,
5323 },
5324 "STRING" => DataType::Text,
5325 _ => DataType::Custom {
5326 name: inner.to_string(),
5327 },
5328 };
5329 let mut new_cast = c.clone();
5330 new_cast.to = new_dt;
5331 return Ok(Expression::Cast(new_cast));
5332 }
5333 }
5334 }
5335 }
5336
5337 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5338 if matches!(target, DialectType::Snowflake) {
5339 if let Expression::ArrayConcatAgg(ref agg) = e {
5340 let mut agg_clone = agg.as_ref().clone();
5341 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5342 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5343 let flatten = Expression::Function(Box::new(Function::new(
5344 "ARRAY_FLATTEN".to_string(),
5345 vec![array_agg],
5346 )));
5347 return Ok(flatten);
5348 }
5349 }
5350
5351 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5352 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5353 if let Expression::ArrayConcatAgg(agg) = e {
5354 let arg = agg.this;
5355 return Ok(Expression::Function(Box::new(Function::new(
5356 "ARRAY_CONCAT_AGG".to_string(),
5357 vec![arg],
5358 ))));
5359 }
5360 }
5361
5362 // Determine what action to take by inspecting e immutably
5363 let action = {
5364 let source_propagates_nulls =
5365 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5366 let target_ignores_nulls =
5367 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5368
5369 match &e {
5370 Expression::Function(f) => {
5371 let name = f.name.to_uppercase();
5372 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5373 if (name == "DATE_PART" || name == "DATEPART")
5374 && f.args.len() == 2
5375 && matches!(target, DialectType::Snowflake)
5376 && !matches!(source, DialectType::Snowflake)
5377 && matches!(
5378 &f.args[0],
5379 Expression::Literal(crate::expressions::Literal::String(_))
5380 )
5381 {
5382 Action::DatePartUnquote
5383 } else if source_propagates_nulls
5384 && target_ignores_nulls
5385 && (name == "GREATEST" || name == "LEAST")
5386 && f.args.len() >= 2
5387 {
5388 Action::GreatestLeastNull
5389 } else if matches!(source, DialectType::Snowflake)
5390 && name == "ARRAY_GENERATE_RANGE"
5391 && f.args.len() >= 2
5392 {
5393 Action::ArrayGenerateRange
5394 } else if matches!(source, DialectType::Snowflake)
5395 && matches!(target, DialectType::DuckDB)
5396 && name == "DATE_TRUNC"
5397 && f.args.len() == 2
5398 {
5399 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5400 // Logic based on Python sqlglot's input_type_preserved flag:
5401 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5402 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5403 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5404 let unit_str = match &f.args[0] {
5405 Expression::Literal(crate::expressions::Literal::String(s)) => {
5406 Some(s.to_uppercase())
5407 }
5408 _ => None,
5409 };
5410 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5411 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5412 });
5413 match &f.args[1] {
5414 Expression::Cast(c) => match &c.to {
5415 DataType::Time { .. } => Action::DateTruncWrapCast,
5416 DataType::Custom { name }
5417 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5418 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5419 {
5420 Action::DateTruncWrapCast
5421 }
5422 DataType::Timestamp { timezone: true, .. } => {
5423 Action::DateTruncWrapCast
5424 }
5425 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5426 DataType::Timestamp {
5427 timezone: false, ..
5428 } if is_date_unit => Action::DateTruncWrapCast,
5429 _ => Action::None,
5430 },
5431 _ => Action::None,
5432 }
5433 } else if matches!(source, DialectType::Snowflake)
5434 && matches!(target, DialectType::DuckDB)
5435 && name == "TO_DATE"
5436 && f.args.len() == 1
5437 && !matches!(
5438 &f.args[0],
5439 Expression::Literal(crate::expressions::Literal::String(_))
5440 )
5441 {
5442 Action::ToDateToCast
5443 } else if !matches!(source, DialectType::Redshift)
5444 && matches!(target, DialectType::Redshift)
5445 && name == "CONVERT_TIMEZONE"
5446 && (f.args.len() == 2 || f.args.len() == 3)
5447 {
5448 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5449 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5450 // The Redshift parser adds 'UTC' as default source_tz, but when
5451 // transpiling from other dialects, we should preserve the original form.
5452 Action::ConvertTimezoneToExpr
5453 } else if matches!(source, DialectType::Snowflake)
5454 && matches!(target, DialectType::DuckDB)
5455 && name == "REGEXP_REPLACE"
5456 && f.args.len() == 4
5457 && !matches!(
5458 &f.args[3],
5459 Expression::Literal(crate::expressions::Literal::String(_))
5460 )
5461 {
5462 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5463 Action::RegexpReplaceSnowflakeToDuckDB
5464 } else if name == "_BQ_TO_HEX" {
5465 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5466 Action::BigQueryToHexBare
5467 } else if matches!(source, DialectType::BigQuery)
5468 && !matches!(target, DialectType::BigQuery)
5469 {
5470 // BigQuery-specific functions that need to be converted to standard forms
5471 match name.as_str() {
5472 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5473 | "DATE_DIFF"
5474 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5475 | "DATETIME_ADD" | "DATETIME_SUB"
5476 | "TIME_ADD" | "TIME_SUB"
5477 | "DATE_ADD" | "DATE_SUB"
5478 | "SAFE_DIVIDE"
5479 | "GENERATE_UUID"
5480 | "COUNTIF"
5481 | "EDIT_DISTANCE"
5482 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5483 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5484 | "TO_HEX"
5485 | "TO_JSON_STRING"
5486 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5487 | "DIV"
5488 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5489 | "LAST_DAY"
5490 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5491 | "REGEXP_CONTAINS"
5492 | "CONTAINS_SUBSTR"
5493 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5494 | "SAFE_CAST"
5495 | "GENERATE_DATE_ARRAY"
5496 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5497 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5498 | "ARRAY_CONCAT"
5499 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5500 | "INSTR"
5501 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5502 | "GENERATE_UUID()" // just in case
5503 | "REGEXP_EXTRACT_ALL"
5504 | "REGEXP_EXTRACT"
5505 | "INT64"
5506 | "ARRAY_CONCAT_AGG"
5507 | "DATE_DIFF(" // just in case
5508 | "TO_HEX_MD5" // internal
5509 | "MOD"
5510 | "CONCAT"
5511 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5512 | "STRUCT"
5513 | "ROUND"
5514 | "MAKE_INTERVAL"
5515 | "ARRAY_TO_STRING"
5516 | "PERCENTILE_CONT"
5517 => Action::BigQueryFunctionNormalize,
5518 "ARRAY" if matches!(target, DialectType::Snowflake)
5519 && f.args.len() == 1
5520 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5521 => Action::BigQueryArraySelectAsStructToSnowflake,
5522 _ => Action::None,
5523 }
5524 } else if matches!(source, DialectType::BigQuery)
5525 && matches!(target, DialectType::BigQuery)
5526 {
5527 // BigQuery -> BigQuery normalizations
5528 match name.as_str() {
5529 "TIMESTAMP_DIFF"
5530 | "DATETIME_DIFF"
5531 | "TIME_DIFF"
5532 | "DATE_DIFF"
5533 | "DATE_ADD"
5534 | "TO_HEX"
5535 | "CURRENT_TIMESTAMP"
5536 | "CURRENT_DATE"
5537 | "CURRENT_TIME"
5538 | "CURRENT_DATETIME"
5539 | "GENERATE_DATE_ARRAY"
5540 | "INSTR"
5541 | "FORMAT_DATETIME"
5542 | "DATETIME"
5543 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5544 _ => Action::None,
5545 }
5546 } else {
5547 // Generic function normalization for non-BigQuery sources
5548 match name.as_str() {
5549 "ARBITRARY" | "AGGREGATE"
5550 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5551 | "STRUCT_EXTRACT"
5552 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5553 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5554 | "SUBSTRINGINDEX"
5555 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5556 | "UNICODE"
5557 | "XOR"
5558 | "ARRAY_REVERSE_SORT"
5559 | "ENCODE" | "DECODE"
5560 | "QUANTILE"
5561 | "EPOCH" | "EPOCH_MS"
5562 | "HASHBYTES"
5563 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5564 | "APPROX_DISTINCT"
5565 | "DATE_PARSE" | "FORMAT_DATETIME"
5566 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5567 | "RLIKE"
5568 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5569 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5570 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5571 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5572 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5573 | "MAP" | "MAP_FROM_ENTRIES"
5574 | "COLLECT_LIST" | "COLLECT_SET"
5575 | "ISNAN" | "IS_NAN"
5576 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5577 | "FORMAT_NUMBER"
5578 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5579 | "ELEMENT_AT"
5580 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5581 | "SPLIT_PART"
5582 // GENERATE_SERIES: handled separately below
5583 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5584 | "JSON_QUERY" | "JSON_VALUE"
5585 | "JSON_SEARCH"
5586 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5587 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5588 | "CURDATE" | "CURTIME"
5589 | "ARRAY_TO_STRING"
5590 | "ARRAY_SORT" | "SORT_ARRAY"
5591 | "LEFT" | "RIGHT"
5592 | "MAP_FROM_ARRAYS"
5593 | "LIKE" | "ILIKE"
5594 | "ARRAY_CONCAT" | "LIST_CONCAT"
5595 | "QUANTILE_CONT" | "QUANTILE_DISC"
5596 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5597 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5598 | "LOCATE" | "STRPOS" | "INSTR"
5599 | "CHAR"
5600 // CONCAT: handled separately for COALESCE wrapping
5601 | "ARRAY_JOIN"
5602 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5603 | "ISNULL"
5604 | "MONTHNAME"
5605 | "TO_TIMESTAMP"
5606 | "TO_DATE"
5607 | "TO_JSON"
5608 | "REGEXP_SPLIT"
5609 | "SPLIT"
5610 | "FORMATDATETIME"
5611 | "ARRAYJOIN"
5612 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5613 | "NVL"
5614 | "TO_CHAR"
5615 | "DBMS_RANDOM.VALUE"
5616 | "REGEXP_LIKE"
5617 | "REPLICATE"
5618 | "LEN"
5619 | "COUNT_BIG"
5620 | "DATEFROMPARTS"
5621 | "DATETIMEFROMPARTS"
5622 | "CONVERT" | "TRY_CONVERT"
5623 | "STRFTIME" | "STRPTIME"
5624 | "DATE_FORMAT" | "FORMAT_DATE"
5625 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5626 | "FROM_BASE64" | "TO_BASE64"
5627 | "GETDATE"
5628 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5629 | "TO_UTF8" | "FROM_UTF8"
5630 | "STARTS_WITH" | "STARTSWITH"
5631 | "APPROX_COUNT_DISTINCT"
5632 | "JSON_FORMAT"
5633 | "SYSDATE"
5634 | "LOGICAL_OR" | "LOGICAL_AND"
5635 | "MONTHS_ADD"
5636 | "SCHEMA_NAME"
5637 | "STRTOL"
5638 | "EDITDIST3"
5639 | "FORMAT"
5640 | "LIST_CONTAINS" | "LIST_HAS"
5641 | "VARIANCE" | "STDDEV"
5642 | "ISINF"
5643 | "TO_UNIXTIME"
5644 | "FROM_UNIXTIME"
5645 | "DATEPART" | "DATE_PART"
5646 | "DATENAME"
5647 | "STRING_AGG"
5648 | "JSON_ARRAYAGG"
5649 | "APPROX_QUANTILE"
5650 | "MAKE_DATE"
5651 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5652 | "RANGE"
5653 | "TRY_ELEMENT_AT"
5654 | "STR_TO_MAP"
5655 | "STRING"
5656 | "STR_TO_TIME"
5657 | "CURRENT_SCHEMA"
5658 | "LTRIM" | "RTRIM"
5659 | "UUID"
5660 | "FARM_FINGERPRINT"
5661 | "JSON_KEYS"
5662 | "WEEKOFYEAR"
5663 | "CONCAT_WS"
5664 | "ARRAY_SLICE"
5665 | "ARRAY_PREPEND"
5666 | "ARRAY_REMOVE"
5667 | "GENERATE_DATE_ARRAY"
5668 | "PARSE_JSON"
5669 | "JSON_REMOVE"
5670 | "JSON_SET"
5671 | "LEVENSHTEIN"
5672 => Action::GenericFunctionNormalize,
5673 // Canonical date functions -> dialect-specific
5674 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5675 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5676 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5677 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5678 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5679 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5680 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5681 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5682 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5683 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5684 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5685 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5686 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5687 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5688 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5689 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5690 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5691 // STR_TO_DATE(x, fmt) -> dialect-specific
5692 "STR_TO_DATE" if f.args.len() == 2
5693 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5694 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5695 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5696 "TS_OR_DS_ADD" if f.args.len() == 3
5697 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5698 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5699 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5700 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5701 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5702 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5703 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5704 // IS_ASCII(x) -> dialect-specific
5705 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5706 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5707 "STR_POSITION" => Action::StrPositionConvert,
5708 // ARRAY_SUM -> dialect-specific
5709 "ARRAY_SUM" => Action::ArraySumConvert,
5710 // ARRAY_SIZE -> dialect-specific (Drill only)
5711 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5712 // ARRAY_ANY -> dialect-specific
5713 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5714 // Functions needing specific cross-dialect transforms
5715 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5716 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5717 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5718 "ARRAY" if matches!(source, DialectType::BigQuery)
5719 && matches!(target, DialectType::Snowflake)
5720 && f.args.len() == 1
5721 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5722 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5723 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5724 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5725 "DATE_TRUNC" if f.args.len() == 2
5726 && matches!(source, DialectType::Generic)
5727 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5728 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5729 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5730 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5731 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5732 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5733 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5734 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5735 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5736 // GENERATE_SERIES with interval normalization for PG target
5737 "GENERATE_SERIES" if f.args.len() >= 3
5738 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5739 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5740 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5741 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5742 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5743 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5744 "CONCAT" => Action::GenericFunctionNormalize,
5745 // DIV(a, b) -> target-specific integer division
5746 "DIV" if f.args.len() == 2
5747 && matches!(source, DialectType::PostgreSQL)
5748 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5749 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5750 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5751 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5752 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5753 "JSONB_EXISTS" if f.args.len() == 2
5754 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5755 // DATE_BIN -> TIME_BUCKET for DuckDB
5756 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5757 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5758 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5759 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5760 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5761 // ClickHouse any -> ANY_VALUE for other dialects
5762 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5763 _ => Action::None,
5764 }
5765 }
5766 }
5767 Expression::AggregateFunction(af) => {
5768 let name = af.name.to_uppercase();
5769 match name.as_str() {
5770 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5771 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5772 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5773 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5774 if matches!(target, DialectType::DuckDB) =>
5775 {
5776 Action::JsonObjectAggConvert
5777 }
5778 "ARRAY_AGG"
5779 if matches!(
5780 target,
5781 DialectType::Hive
5782 | DialectType::Spark
5783 | DialectType::Databricks
5784 ) =>
5785 {
5786 Action::ArrayAggToCollectList
5787 }
5788 "MAX_BY" | "MIN_BY"
5789 if matches!(
5790 target,
5791 DialectType::ClickHouse
5792 | DialectType::Spark
5793 | DialectType::Databricks
5794 | DialectType::DuckDB
5795 ) =>
5796 {
5797 Action::MaxByMinByConvert
5798 }
5799 "COLLECT_LIST"
5800 if matches!(
5801 target,
5802 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5803 ) =>
5804 {
5805 Action::CollectListToArrayAgg
5806 }
5807 "COLLECT_SET"
5808 if matches!(
5809 target,
5810 DialectType::Presto
5811 | DialectType::Trino
5812 | DialectType::Snowflake
5813 | DialectType::DuckDB
5814 ) =>
5815 {
5816 Action::CollectSetConvert
5817 }
5818 "PERCENTILE"
5819 if matches!(
5820 target,
5821 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5822 ) =>
5823 {
5824 Action::PercentileConvert
5825 }
5826 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5827 "CORR"
5828 if matches!(target, DialectType::DuckDB)
5829 && matches!(source, DialectType::Snowflake) =>
5830 {
5831 Action::CorrIsnanWrap
5832 }
5833 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5834 "APPROX_QUANTILES"
5835 if matches!(source, DialectType::BigQuery)
5836 && matches!(target, DialectType::DuckDB) =>
5837 {
5838 Action::BigQueryApproxQuantiles
5839 }
5840 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5841 "PERCENTILE_CONT"
5842 if matches!(source, DialectType::BigQuery)
5843 && matches!(target, DialectType::DuckDB)
5844 && af.args.len() >= 2 =>
5845 {
5846 Action::BigQueryPercentileContToDuckDB
5847 }
5848 _ => Action::None,
5849 }
5850 }
5851 Expression::JSONArrayAgg(_) => match target {
5852 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5853 _ => Action::None,
5854 },
5855 Expression::ToNumber(tn) => {
5856 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5857 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5858 match target {
5859 DialectType::Oracle
5860 | DialectType::Snowflake
5861 | DialectType::Teradata => Action::None,
5862 _ => Action::GenericFunctionNormalize,
5863 }
5864 } else {
5865 Action::None
5866 }
5867 }
5868 Expression::Nvl2(_) => {
5869 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5870 // Keep as NVL2 for dialects that support it natively
5871 match target {
5872 DialectType::Oracle
5873 | DialectType::Snowflake
5874 | DialectType::Teradata
5875 | DialectType::Spark
5876 | DialectType::Databricks
5877 | DialectType::Redshift => Action::None,
5878 _ => Action::Nvl2Expand,
5879 }
5880 }
5881 Expression::Decode(_) | Expression::DecodeCase(_) => {
5882 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5883 // Keep as DECODE for Oracle/Snowflake
5884 match target {
5885 DialectType::Oracle | DialectType::Snowflake => Action::None,
5886 _ => Action::DecodeSimplify,
5887 }
5888 }
5889 Expression::Coalesce(ref cf) => {
5890 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5891 // BigQuery keeps IFNULL natively when source is also BigQuery
5892 if cf.original_name.as_deref() == Some("IFNULL")
5893 && !(matches!(source, DialectType::BigQuery)
5894 && matches!(target, DialectType::BigQuery))
5895 {
5896 Action::IfnullToCoalesce
5897 } else {
5898 Action::None
5899 }
5900 }
5901 Expression::IfFunc(if_func) => {
5902 if matches!(source, DialectType::Snowflake)
5903 && matches!(
5904 target,
5905 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5906 )
5907 && matches!(if_func.false_value, Some(Expression::Div(_)))
5908 {
5909 Action::Div0TypedDivision
5910 } else {
5911 Action::None
5912 }
5913 }
5914 Expression::ToJson(_) => match target {
5915 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5916 DialectType::BigQuery => Action::ToJsonConvert,
5917 DialectType::DuckDB => Action::ToJsonConvert,
5918 _ => Action::None,
5919 },
5920 Expression::ArrayAgg(ref agg) => {
5921 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
5922 Action::ArrayAggToGroupConcat
5923 } else if matches!(
5924 target,
5925 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5926 ) {
5927 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5928 Action::ArrayAggToCollectList
5929 } else if matches!(
5930 source,
5931 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5932 ) && matches!(target, DialectType::DuckDB)
5933 && agg.filter.is_some()
5934 {
5935 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5936 // Need to add NOT x IS NULL to existing filter
5937 Action::ArrayAggNullFilter
5938 } else if matches!(target, DialectType::DuckDB)
5939 && agg.ignore_nulls == Some(true)
5940 && !agg.order_by.is_empty()
5941 {
5942 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5943 Action::ArrayAggIgnoreNullsDuckDB
5944 } else if !matches!(source, DialectType::Snowflake) {
5945 Action::None
5946 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5947 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5948 == Some("ARRAY_AGG".to_string())
5949 || agg.name.is_none();
5950 if is_array_agg {
5951 Action::ArrayAggCollectList
5952 } else {
5953 Action::None
5954 }
5955 } else if matches!(
5956 target,
5957 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5958 ) && agg.filter.is_none()
5959 {
5960 Action::ArrayAggFilter
5961 } else {
5962 Action::None
5963 }
5964 }
5965 Expression::WithinGroup(wg) => {
5966 if matches!(source, DialectType::Snowflake)
5967 && matches!(
5968 target,
5969 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5970 )
5971 && matches!(wg.this, Expression::ArrayAgg(_))
5972 {
5973 Action::ArrayAggWithinGroupFilter
5974 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5975 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5976 || matches!(&wg.this, Expression::StringAgg(_))
5977 {
5978 Action::StringAggConvert
5979 } else if matches!(
5980 target,
5981 DialectType::Presto
5982 | DialectType::Trino
5983 | DialectType::Athena
5984 | DialectType::Spark
5985 | DialectType::Databricks
5986 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5987 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5988 || matches!(&wg.this, Expression::PercentileCont(_)))
5989 {
5990 Action::PercentileContConvert
5991 } else {
5992 Action::None
5993 }
5994 }
5995 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5996 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5997 // DATETIME is the timezone-unaware type
5998 Expression::Cast(ref c) => {
5999 if c.format.is_some()
6000 && (matches!(source, DialectType::BigQuery)
6001 || matches!(source, DialectType::Teradata))
6002 {
6003 Action::BigQueryCastFormat
6004 } else if matches!(target, DialectType::BigQuery)
6005 && !matches!(source, DialectType::BigQuery)
6006 && matches!(
6007 c.to,
6008 DataType::Timestamp {
6009 timezone: false,
6010 ..
6011 }
6012 )
6013 {
6014 Action::CastTimestampToDatetime
6015 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6016 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6017 && matches!(
6018 c.to,
6019 DataType::Timestamp {
6020 timezone: false,
6021 ..
6022 }
6023 )
6024 {
6025 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6026 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6027 Action::CastTimestampToDatetime
6028 } else if matches!(
6029 source,
6030 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6031 ) && matches!(
6032 target,
6033 DialectType::Presto
6034 | DialectType::Trino
6035 | DialectType::Athena
6036 | DialectType::DuckDB
6037 | DialectType::Snowflake
6038 | DialectType::BigQuery
6039 | DialectType::Databricks
6040 | DialectType::TSQL
6041 ) {
6042 Action::HiveCastToTryCast
6043 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6044 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6045 {
6046 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6047 Action::CastTimestamptzToFunc
6048 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6049 && matches!(
6050 target,
6051 DialectType::Hive
6052 | DialectType::Spark
6053 | DialectType::Databricks
6054 | DialectType::BigQuery
6055 )
6056 {
6057 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6058 Action::CastTimestampStripTz
6059 } else if matches!(&c.to, DataType::Json)
6060 && matches!(&c.this, Expression::Literal(Literal::String(_)))
6061 && matches!(
6062 target,
6063 DialectType::Presto
6064 | DialectType::Trino
6065 | DialectType::Athena
6066 | DialectType::Snowflake
6067 )
6068 {
6069 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6070 // Only when the input is a string literal (JSON 'value' syntax)
6071 Action::JsonLiteralToJsonParse
6072 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6073 && matches!(target, DialectType::Spark | DialectType::Databricks)
6074 {
6075 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6076 Action::CastToJsonForSpark
6077 } else if (matches!(
6078 &c.to,
6079 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6080 )) && matches!(
6081 target,
6082 DialectType::Spark | DialectType::Databricks
6083 ) && (matches!(&c.this, Expression::ParseJson(_))
6084 || matches!(
6085 &c.this,
6086 Expression::Function(f)
6087 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6088 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6089 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6090 ))
6091 {
6092 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6093 // -> FROM_JSON(..., type_string) for Spark
6094 Action::CastJsonToFromJson
6095 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6096 && matches!(
6097 c.to,
6098 DataType::Timestamp {
6099 timezone: false,
6100 ..
6101 }
6102 )
6103 && matches!(source, DialectType::DuckDB)
6104 {
6105 Action::StrftimeCastTimestamp
6106 } else if matches!(source, DialectType::DuckDB)
6107 && matches!(
6108 c.to,
6109 DataType::Decimal {
6110 precision: None,
6111 ..
6112 }
6113 )
6114 {
6115 Action::DecimalDefaultPrecision
6116 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6117 && matches!(c.to, DataType::Char { length: None })
6118 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6119 {
6120 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6121 Action::MysqlCastCharToText
6122 } else if matches!(
6123 source,
6124 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6125 ) && matches!(
6126 target,
6127 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6128 ) && Self::has_varchar_char_type(&c.to)
6129 {
6130 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6131 Action::SparkCastVarcharToString
6132 } else {
6133 Action::None
6134 }
6135 }
6136 Expression::SafeCast(ref c) => {
6137 if c.format.is_some()
6138 && matches!(source, DialectType::BigQuery)
6139 && !matches!(target, DialectType::BigQuery)
6140 {
6141 Action::BigQueryCastFormat
6142 } else {
6143 Action::None
6144 }
6145 }
6146 // For DuckDB: DATE_TRUNC should preserve the input type
6147 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6148 if matches!(source, DialectType::Snowflake)
6149 && matches!(target, DialectType::DuckDB)
6150 {
6151 Action::DateTruncWrapCast
6152 } else {
6153 Action::None
6154 }
6155 }
6156 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6157 Expression::SetStatement(s) => {
6158 if matches!(target, DialectType::DuckDB)
6159 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6160 && s.items.iter().any(|item| item.kind.is_none())
6161 {
6162 Action::SetToVariable
6163 } else {
6164 Action::None
6165 }
6166 }
6167 // Cross-dialect NULL ordering normalization.
6168 // When nulls_first is not specified, fill in the source dialect's implied
6169 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6170 Expression::Ordered(o) => {
6171 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6172 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6173 Action::MysqlNullsOrdering
6174 } else {
6175 // Skip targets that don't support NULLS FIRST/LAST syntax
6176 let target_supports_nulls = !matches!(
6177 target,
6178 DialectType::MySQL
6179 | DialectType::TSQL
6180 | DialectType::StarRocks
6181 | DialectType::Doris
6182 );
6183 if o.nulls_first.is_none() && source != target && target_supports_nulls
6184 {
6185 Action::NullsOrdering
6186 } else {
6187 Action::None
6188 }
6189 }
6190 }
6191 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6192 Expression::DataType(dt) => {
6193 if matches!(source, DialectType::BigQuery)
6194 && !matches!(target, DialectType::BigQuery)
6195 {
6196 match dt {
6197 DataType::Custom { ref name }
6198 if name.eq_ignore_ascii_case("INT64")
6199 || name.eq_ignore_ascii_case("FLOAT64")
6200 || name.eq_ignore_ascii_case("BOOL")
6201 || name.eq_ignore_ascii_case("BYTES")
6202 || name.eq_ignore_ascii_case("NUMERIC")
6203 || name.eq_ignore_ascii_case("STRING")
6204 || name.eq_ignore_ascii_case("DATETIME") =>
6205 {
6206 Action::BigQueryCastType
6207 }
6208 _ => Action::None,
6209 }
6210 } else if matches!(source, DialectType::TSQL) {
6211 // For TSQL source -> any target (including TSQL itself for REAL)
6212 match dt {
6213 // REAL -> FLOAT even for TSQL->TSQL
6214 DataType::Custom { ref name }
6215 if name.eq_ignore_ascii_case("REAL") =>
6216 {
6217 Action::TSQLTypeNormalize
6218 }
6219 DataType::Float {
6220 real_spelling: true,
6221 ..
6222 } => Action::TSQLTypeNormalize,
6223 // Other TSQL type normalizations only for non-TSQL targets
6224 DataType::Custom { ref name }
6225 if !matches!(target, DialectType::TSQL)
6226 && (name.eq_ignore_ascii_case("MONEY")
6227 || name.eq_ignore_ascii_case("SMALLMONEY")
6228 || name.eq_ignore_ascii_case("DATETIME2")
6229 || name.eq_ignore_ascii_case("IMAGE")
6230 || name.eq_ignore_ascii_case("BIT")
6231 || name.eq_ignore_ascii_case("ROWVERSION")
6232 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6233 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6234 || name.to_uppercase().starts_with("NUMERIC")
6235 || name.to_uppercase().starts_with("DATETIME2(")
6236 || name.to_uppercase().starts_with("TIME(")) =>
6237 {
6238 Action::TSQLTypeNormalize
6239 }
6240 DataType::Float {
6241 precision: Some(_), ..
6242 } if !matches!(target, DialectType::TSQL) => {
6243 Action::TSQLTypeNormalize
6244 }
6245 DataType::TinyInt { .. }
6246 if !matches!(target, DialectType::TSQL) =>
6247 {
6248 Action::TSQLTypeNormalize
6249 }
6250 // INTEGER -> INT for Databricks/Spark targets
6251 DataType::Int {
6252 integer_spelling: true,
6253 ..
6254 } if matches!(
6255 target,
6256 DialectType::Databricks | DialectType::Spark
6257 ) =>
6258 {
6259 Action::TSQLTypeNormalize
6260 }
6261 _ => Action::None,
6262 }
6263 } else if (matches!(source, DialectType::Oracle)
6264 || matches!(source, DialectType::Generic))
6265 && !matches!(target, DialectType::Oracle)
6266 {
6267 match dt {
6268 DataType::Custom { ref name }
6269 if name.to_uppercase().starts_with("VARCHAR2(")
6270 || name.to_uppercase().starts_with("NVARCHAR2(")
6271 || name.eq_ignore_ascii_case("VARCHAR2")
6272 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6273 {
6274 Action::OracleVarchar2ToVarchar
6275 }
6276 _ => Action::None,
6277 }
6278 } else if matches!(target, DialectType::Snowflake)
6279 && !matches!(source, DialectType::Snowflake)
6280 {
6281 // When target is Snowflake but source is NOT Snowflake,
6282 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6283 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6284 // should keep their FLOAT spelling.
6285 match dt {
6286 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6287 _ => Action::None,
6288 }
6289 } else {
6290 Action::None
6291 }
6292 }
6293 // LOWER patterns from BigQuery TO_HEX conversions:
6294 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6295 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6296 Expression::Lower(uf) => {
6297 if matches!(source, DialectType::BigQuery) {
6298 match &uf.this {
6299 Expression::Lower(_) => Action::BigQueryToHexLower,
6300 Expression::Function(f)
6301 if f.name == "TO_HEX"
6302 && matches!(target, DialectType::BigQuery) =>
6303 {
6304 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6305 Action::BigQueryToHexLower
6306 }
6307 _ => Action::None,
6308 }
6309 } else {
6310 Action::None
6311 }
6312 }
6313 // UPPER patterns from BigQuery TO_HEX conversions:
6314 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6315 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6316 Expression::Upper(uf) => {
6317 if matches!(source, DialectType::BigQuery) {
6318 match &uf.this {
6319 Expression::Lower(_) => Action::BigQueryToHexUpper,
6320 _ => Action::None,
6321 }
6322 } else {
6323 Action::None
6324 }
6325 }
6326 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6327 // Snowflake supports LAST_DAY with unit, so keep it there
6328 Expression::LastDay(ld) => {
6329 if matches!(source, DialectType::BigQuery)
6330 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6331 && ld.unit.is_some()
6332 {
6333 Action::BigQueryLastDayStripUnit
6334 } else {
6335 Action::None
6336 }
6337 }
6338 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6339 Expression::SafeDivide(_) => {
6340 if matches!(source, DialectType::BigQuery)
6341 && !matches!(target, DialectType::BigQuery)
6342 {
6343 Action::BigQuerySafeDivide
6344 } else {
6345 Action::None
6346 }
6347 }
6348 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6349 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6350 Expression::AnyValue(ref agg) => {
6351 if matches!(source, DialectType::BigQuery)
6352 && matches!(target, DialectType::DuckDB)
6353 && agg.having_max.is_some()
6354 {
6355 Action::BigQueryAnyValueHaving
6356 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6357 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6358 && agg.ignore_nulls.is_none()
6359 {
6360 Action::AnyValueIgnoreNulls
6361 } else {
6362 Action::None
6363 }
6364 }
6365 Expression::Any(ref q) => {
6366 if matches!(source, DialectType::PostgreSQL)
6367 && matches!(
6368 target,
6369 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6370 )
6371 && q.op.is_some()
6372 && !matches!(
6373 q.subquery,
6374 Expression::Select(_) | Expression::Subquery(_)
6375 )
6376 {
6377 Action::AnyToExists
6378 } else {
6379 Action::None
6380 }
6381 }
6382 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6383 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6384 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6385 Expression::RegexpLike(_)
6386 if !matches!(source, DialectType::DuckDB)
6387 && matches!(target, DialectType::DuckDB) =>
6388 {
6389 Action::RegexpLikeToDuckDB
6390 }
6391 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6392 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6393 Expression::Div(ref op)
6394 if matches!(
6395 source,
6396 DialectType::MySQL
6397 | DialectType::DuckDB
6398 | DialectType::SingleStore
6399 | DialectType::TiDB
6400 | DialectType::ClickHouse
6401 | DialectType::Doris
6402 ) && matches!(
6403 target,
6404 DialectType::PostgreSQL
6405 | DialectType::Redshift
6406 | DialectType::Drill
6407 | DialectType::Trino
6408 | DialectType::Presto
6409 | DialectType::Athena
6410 | DialectType::TSQL
6411 | DialectType::Teradata
6412 | DialectType::SQLite
6413 | DialectType::BigQuery
6414 | DialectType::Snowflake
6415 | DialectType::Databricks
6416 | DialectType::Oracle
6417 | DialectType::Materialize
6418 | DialectType::RisingWave
6419 ) =>
6420 {
6421 // Only wrap if RHS is not already NULLIF
6422 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6423 {
6424 Action::MySQLSafeDivide
6425 } else {
6426 Action::None
6427 }
6428 }
6429 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6430 // For TSQL/Fabric, convert to sp_rename instead
6431 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6432 if let Some(crate::expressions::AlterTableAction::RenameTable(
6433 ref new_tbl,
6434 )) = at.actions.first()
6435 {
6436 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6437 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6438 Action::AlterTableToSpRename
6439 } else if new_tbl.schema.is_some()
6440 && matches!(
6441 target,
6442 DialectType::BigQuery
6443 | DialectType::Doris
6444 | DialectType::StarRocks
6445 | DialectType::DuckDB
6446 | DialectType::PostgreSQL
6447 | DialectType::Redshift
6448 )
6449 {
6450 Action::AlterTableRenameStripSchema
6451 } else {
6452 Action::None
6453 }
6454 } else {
6455 Action::None
6456 }
6457 }
6458 // EPOCH(x) expression -> target-specific epoch conversion
6459 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6460 Action::EpochConvert
6461 }
6462 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6463 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6464 Action::EpochMsConvert
6465 }
6466 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6467 Expression::StringAgg(_) => {
6468 if matches!(
6469 target,
6470 DialectType::MySQL
6471 | DialectType::SingleStore
6472 | DialectType::Doris
6473 | DialectType::StarRocks
6474 | DialectType::SQLite
6475 ) {
6476 Action::StringAggConvert
6477 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6478 Action::StringAggConvert
6479 } else {
6480 Action::None
6481 }
6482 }
6483 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6484 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6485 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6486 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6487 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6488 Action::ArrayLengthConvert
6489 }
6490 Expression::ArraySize(_) => {
6491 if matches!(target, DialectType::Drill) {
6492 Action::ArraySizeDrill
6493 } else {
6494 Action::ArrayLengthConvert
6495 }
6496 }
6497 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6498 Expression::ArrayRemove(_) => match target {
6499 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6500 Action::ArrayRemoveConvert
6501 }
6502 _ => Action::None,
6503 },
6504 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6505 Expression::ArrayReverse(_) => match target {
6506 DialectType::ClickHouse => Action::ArrayReverseConvert,
6507 _ => Action::None,
6508 },
6509 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6510 Expression::JsonKeys(_) => match target {
6511 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6512 Action::JsonKeysConvert
6513 }
6514 _ => Action::None,
6515 },
6516 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6517 Expression::ParseJson(_) => match target {
6518 DialectType::SQLite
6519 | DialectType::Doris
6520 | DialectType::MySQL
6521 | DialectType::StarRocks => Action::ParseJsonStrip,
6522 _ => Action::None,
6523 },
6524 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6525 Expression::WeekOfYear(_)
6526 if matches!(target, DialectType::Snowflake)
6527 && !matches!(source, DialectType::Snowflake) =>
6528 {
6529 Action::WeekOfYearToWeekIso
6530 }
6531 // NVL: clear original_name so generator uses dialect-specific function names
6532 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6533 // XOR: expand for dialects that don't support the XOR keyword
6534 Expression::Xor(_) => {
6535 let target_supports_xor = matches!(
6536 target,
6537 DialectType::MySQL
6538 | DialectType::SingleStore
6539 | DialectType::Doris
6540 | DialectType::StarRocks
6541 );
6542 if !target_supports_xor {
6543 Action::XorExpand
6544 } else {
6545 Action::None
6546 }
6547 }
6548 // TSQL #table -> temp table normalization (CREATE TABLE)
6549 Expression::CreateTable(ct)
6550 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6551 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6552 && ct.name.name.name.starts_with('#') =>
6553 {
6554 Action::TempTableHash
6555 }
6556 // TSQL #table -> strip # from table references in SELECT/etc.
6557 Expression::Table(tr)
6558 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6559 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6560 && tr.name.name.starts_with('#') =>
6561 {
6562 Action::TempTableHash
6563 }
6564 // TSQL #table -> strip # from DROP TABLE names
6565 Expression::DropTable(ref dt)
6566 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6567 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6568 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6569 {
6570 Action::TempTableHash
6571 }
6572 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6573 Expression::JsonExtract(_)
6574 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6575 {
6576 Action::JsonExtractToTsql
6577 }
6578 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6579 Expression::JsonExtractScalar(_)
6580 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6581 {
6582 Action::JsonExtractToTsql
6583 }
6584 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6585 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6586 Action::JsonExtractToClickHouse
6587 }
6588 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6589 Expression::JsonExtractScalar(_)
6590 if matches!(target, DialectType::ClickHouse) =>
6591 {
6592 Action::JsonExtractToClickHouse
6593 }
6594 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6595 Expression::JsonExtract(ref f)
6596 if !f.arrow_syntax
6597 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6598 {
6599 Action::JsonExtractToArrow
6600 }
6601 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6602 Expression::JsonExtract(ref f)
6603 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6604 && !matches!(
6605 source,
6606 DialectType::PostgreSQL
6607 | DialectType::Redshift
6608 | DialectType::Materialize
6609 )
6610 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6611 {
6612 Action::JsonExtractToGetJsonObject
6613 }
6614 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6615 Expression::JsonExtract(_)
6616 if matches!(
6617 target,
6618 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6619 ) =>
6620 {
6621 Action::JsonExtractToGetJsonObject
6622 }
6623 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6624 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6625 Expression::JsonExtractScalar(ref f)
6626 if !f.arrow_syntax
6627 && !f.hash_arrow_syntax
6628 && matches!(
6629 target,
6630 DialectType::PostgreSQL
6631 | DialectType::Redshift
6632 | DialectType::Snowflake
6633 | DialectType::SQLite
6634 | DialectType::DuckDB
6635 ) =>
6636 {
6637 Action::JsonExtractScalarConvert
6638 }
6639 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6640 Expression::JsonExtractScalar(_)
6641 if matches!(
6642 target,
6643 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6644 ) =>
6645 {
6646 Action::JsonExtractScalarToGetJsonObject
6647 }
6648 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6649 Expression::JsonExtract(ref f)
6650 if !f.arrow_syntax
6651 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6652 {
6653 Action::JsonPathNormalize
6654 }
6655 // JsonQuery (parsed JSON_QUERY) -> target-specific
6656 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6657 // JsonValue (parsed JSON_VALUE) -> target-specific
6658 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6659 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6660 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6661 Expression::AtTimeZone(_)
6662 if matches!(
6663 target,
6664 DialectType::Presto
6665 | DialectType::Trino
6666 | DialectType::Athena
6667 | DialectType::Spark
6668 | DialectType::Databricks
6669 | DialectType::BigQuery
6670 | DialectType::Snowflake
6671 ) =>
6672 {
6673 Action::AtTimeZoneConvert
6674 }
6675 // DAY_OF_WEEK -> dialect-specific
6676 Expression::DayOfWeek(_)
6677 if matches!(
6678 target,
6679 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6680 ) =>
6681 {
6682 Action::DayOfWeekConvert
6683 }
6684 // CURRENT_USER -> CURRENT_USER() for Snowflake
6685 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6686 Action::CurrentUserParens
6687 }
6688 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6689 Expression::ElementAt(_)
6690 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6691 {
6692 Action::ElementAtConvert
6693 }
6694 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6695 Expression::ArrayFunc(ref arr)
6696 if !arr.bracket_notation
6697 && matches!(
6698 target,
6699 DialectType::Spark
6700 | DialectType::Databricks
6701 | DialectType::Hive
6702 | DialectType::BigQuery
6703 | DialectType::DuckDB
6704 | DialectType::Snowflake
6705 | DialectType::Presto
6706 | DialectType::Trino
6707 | DialectType::Athena
6708 | DialectType::ClickHouse
6709 | DialectType::StarRocks
6710 ) =>
6711 {
6712 Action::ArraySyntaxConvert
6713 }
6714 // VARIANCE expression -> varSamp for ClickHouse
6715 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6716 Action::VarianceToClickHouse
6717 }
6718 // STDDEV expression -> stddevSamp for ClickHouse
6719 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6720 Action::StddevToClickHouse
6721 }
6722 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6723 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6724 Action::ApproxQuantileConvert
6725 }
6726 // MonthsBetween -> target-specific
6727 Expression::MonthsBetween(_)
6728 if !matches!(
6729 target,
6730 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6731 ) =>
6732 {
6733 Action::MonthsBetweenConvert
6734 }
6735 // AddMonths -> target-specific DATEADD/DATE_ADD
6736 Expression::AddMonths(_) => Action::AddMonthsConvert,
6737 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6738 Expression::MapFromArrays(_)
6739 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6740 {
6741 Action::MapFromArraysConvert
6742 }
6743 // CURRENT_USER -> CURRENT_USER() for Spark
6744 Expression::CurrentUser(_)
6745 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6746 {
6747 Action::CurrentUserSparkParens
6748 }
6749 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6750 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6751 if matches!(
6752 source,
6753 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6754 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6755 && matches!(
6756 target,
6757 DialectType::DuckDB
6758 | DialectType::Presto
6759 | DialectType::Trino
6760 | DialectType::Athena
6761 | DialectType::PostgreSQL
6762 | DialectType::Redshift
6763 ) =>
6764 {
6765 Action::SparkDateFuncCast
6766 }
6767 // $parameter -> @parameter for BigQuery
6768 Expression::Parameter(ref p)
6769 if matches!(target, DialectType::BigQuery)
6770 && matches!(source, DialectType::DuckDB)
6771 && (p.style == crate::expressions::ParameterStyle::Dollar
6772 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6773 {
6774 Action::DollarParamConvert
6775 }
6776 // EscapeString literal: normalize literal newlines to \n
6777 Expression::Literal(Literal::EscapeString(ref s))
6778 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6779 {
6780 Action::EscapeStringNormalize
6781 }
6782 // straight_join: keep lowercase for DuckDB, quote for MySQL
6783 Expression::Column(ref col)
6784 if col.name.name == "STRAIGHT_JOIN"
6785 && col.table.is_none()
6786 && matches!(source, DialectType::DuckDB)
6787 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6788 {
6789 Action::StraightJoinCase
6790 }
6791 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6792 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6793 Expression::Interval(ref iv)
6794 if matches!(
6795 target,
6796 DialectType::Snowflake
6797 | DialectType::PostgreSQL
6798 | DialectType::Redshift
6799 ) && iv.unit.is_some()
6800 && matches!(
6801 &iv.this,
6802 Some(Expression::Literal(Literal::String(_)))
6803 ) =>
6804 {
6805 Action::SnowflakeIntervalFormat
6806 }
6807 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6808 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6809 if let Some(ref sample) = ts.sample {
6810 if !sample.explicit_method {
6811 Action::TablesampleReservoir
6812 } else {
6813 Action::None
6814 }
6815 } else {
6816 Action::None
6817 }
6818 }
6819 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6820 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6821 Expression::TableSample(ref ts)
6822 if matches!(target, DialectType::Snowflake)
6823 && !matches!(source, DialectType::Snowflake)
6824 && ts.sample.is_some() =>
6825 {
6826 if let Some(ref sample) = ts.sample {
6827 if !sample.explicit_method {
6828 Action::TablesampleSnowflakeStrip
6829 } else {
6830 Action::None
6831 }
6832 } else {
6833 Action::None
6834 }
6835 }
6836 Expression::Table(ref t)
6837 if matches!(target, DialectType::Snowflake)
6838 && !matches!(source, DialectType::Snowflake)
6839 && t.table_sample.is_some() =>
6840 {
6841 if let Some(ref sample) = t.table_sample {
6842 if !sample.explicit_method {
6843 Action::TablesampleSnowflakeStrip
6844 } else {
6845 Action::None
6846 }
6847 } else {
6848 Action::None
6849 }
6850 }
6851 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6852 Expression::AlterTable(ref at)
6853 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6854 && !at.actions.is_empty()
6855 && matches!(
6856 at.actions.first(),
6857 Some(crate::expressions::AlterTableAction::RenameTable(_))
6858 ) =>
6859 {
6860 Action::AlterTableToSpRename
6861 }
6862 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6863 Expression::Subscript(ref sub)
6864 if matches!(
6865 target,
6866 DialectType::BigQuery
6867 | DialectType::Hive
6868 | DialectType::Spark
6869 | DialectType::Databricks
6870 ) && matches!(
6871 source,
6872 DialectType::DuckDB
6873 | DialectType::PostgreSQL
6874 | DialectType::Presto
6875 | DialectType::Trino
6876 | DialectType::Redshift
6877 | DialectType::ClickHouse
6878 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6879 {
6880 Action::ArrayIndexConvert
6881 }
6882 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6883 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6884 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6885 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6886 Expression::WindowFunction(ref wf) => {
6887 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6888 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6889 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6890 if matches!(target, DialectType::BigQuery)
6891 && !is_row_number
6892 && !wf.over.order_by.is_empty()
6893 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6894 {
6895 Action::BigQueryNullsOrdering
6896 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6897 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6898 } else {
6899 let source_nulls_last = matches!(source, DialectType::DuckDB);
6900 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6901 matches!(
6902 f.kind,
6903 crate::expressions::WindowFrameKind::Range
6904 | crate::expressions::WindowFrameKind::Groups
6905 )
6906 });
6907 if source_nulls_last
6908 && matches!(target, DialectType::MySQL)
6909 && !wf.over.order_by.is_empty()
6910 && wf.over.order_by.iter().any(|o| !o.desc)
6911 && !has_range_frame
6912 {
6913 Action::MysqlNullsLastRewrite
6914 } else {
6915 match &wf.this {
6916 Expression::FirstValue(ref vf)
6917 | Expression::LastValue(ref vf)
6918 if vf.ignore_nulls == Some(false) =>
6919 {
6920 // RESPECT NULLS
6921 match target {
6922 DialectType::SQLite => Action::RespectNullsConvert,
6923 _ => Action::None,
6924 }
6925 }
6926 _ => Action::None,
6927 }
6928 }
6929 }
6930 }
6931 // CREATE TABLE a LIKE b -> dialect-specific transformations
6932 Expression::CreateTable(ref ct)
6933 if ct.columns.is_empty()
6934 && ct.constraints.iter().any(|c| {
6935 matches!(c, crate::expressions::TableConstraint::Like { .. })
6936 })
6937 && matches!(
6938 target,
6939 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6940 ) =>
6941 {
6942 Action::CreateTableLikeToCtas
6943 }
6944 Expression::CreateTable(ref ct)
6945 if ct.columns.is_empty()
6946 && ct.constraints.iter().any(|c| {
6947 matches!(c, crate::expressions::TableConstraint::Like { .. })
6948 })
6949 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6950 {
6951 Action::CreateTableLikeToSelectInto
6952 }
6953 Expression::CreateTable(ref ct)
6954 if ct.columns.is_empty()
6955 && ct.constraints.iter().any(|c| {
6956 matches!(c, crate::expressions::TableConstraint::Like { .. })
6957 })
6958 && matches!(target, DialectType::ClickHouse) =>
6959 {
6960 Action::CreateTableLikeToAs
6961 }
6962 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6963 Expression::CreateTable(ref ct)
6964 if matches!(target, DialectType::DuckDB)
6965 && matches!(
6966 source,
6967 DialectType::DuckDB
6968 | DialectType::Spark
6969 | DialectType::Databricks
6970 | DialectType::Hive
6971 ) =>
6972 {
6973 let has_comment = ct.columns.iter().any(|c| {
6974 c.comment.is_some()
6975 || c.constraints.iter().any(|con| {
6976 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6977 })
6978 });
6979 let has_props = !ct.properties.is_empty();
6980 if has_comment || has_props {
6981 Action::CreateTableStripComment
6982 } else {
6983 Action::None
6984 }
6985 }
6986 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6987 Expression::Array(_)
6988 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6989 {
6990 Action::ArrayConcatBracketConvert
6991 }
6992 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6993 Expression::ArrayFunc(ref arr)
6994 if arr.bracket_notation
6995 && matches!(source, DialectType::BigQuery)
6996 && matches!(target, DialectType::Redshift) =>
6997 {
6998 Action::ArrayConcatBracketConvert
6999 }
7000 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7001 Expression::BitwiseOrAgg(ref f)
7002 | Expression::BitwiseAndAgg(ref f)
7003 | Expression::BitwiseXorAgg(ref f) => {
7004 if matches!(target, DialectType::DuckDB) {
7005 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7006 if let Expression::Cast(ref c) = f.this {
7007 match &c.to {
7008 DataType::Float { .. }
7009 | DataType::Double { .. }
7010 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7011 DataType::Custom { ref name }
7012 if name.eq_ignore_ascii_case("REAL") =>
7013 {
7014 Action::BitAggFloatCast
7015 }
7016 _ => Action::None,
7017 }
7018 } else {
7019 Action::None
7020 }
7021 } else if matches!(target, DialectType::Snowflake) {
7022 Action::BitAggSnowflakeRename
7023 } else {
7024 Action::None
7025 }
7026 }
7027 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7028 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7029 Action::FilterToIff
7030 }
7031 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7032 Expression::Avg(ref f)
7033 | Expression::Sum(ref f)
7034 | Expression::Min(ref f)
7035 | Expression::Max(ref f)
7036 | Expression::CountIf(ref f)
7037 | Expression::Stddev(ref f)
7038 | Expression::StddevPop(ref f)
7039 | Expression::StddevSamp(ref f)
7040 | Expression::Variance(ref f)
7041 | Expression::VarPop(ref f)
7042 | Expression::VarSamp(ref f)
7043 | Expression::Median(ref f)
7044 | Expression::Mode(ref f)
7045 | Expression::First(ref f)
7046 | Expression::Last(ref f)
7047 | Expression::ApproxDistinct(ref f)
7048 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7049 {
7050 Action::AggFilterToIff
7051 }
7052 Expression::Count(ref c)
7053 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7054 {
7055 Action::AggFilterToIff
7056 }
7057 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7058 Expression::Count(ref c)
7059 if c.distinct
7060 && matches!(&c.this, Some(Expression::Tuple(_)))
7061 && matches!(
7062 target,
7063 DialectType::Presto
7064 | DialectType::Trino
7065 | DialectType::DuckDB
7066 | DialectType::PostgreSQL
7067 ) =>
7068 {
7069 Action::CountDistinctMultiArg
7070 }
7071 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7072 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7073 Action::JsonToGetPath
7074 }
7075 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7076 Expression::Struct(_)
7077 if matches!(
7078 target,
7079 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7080 ) && matches!(source, DialectType::DuckDB) =>
7081 {
7082 Action::StructToRow
7083 }
7084 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7085 Expression::MapFunc(ref m)
7086 if m.curly_brace_syntax
7087 && matches!(
7088 target,
7089 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7090 )
7091 && matches!(source, DialectType::DuckDB) =>
7092 {
7093 Action::StructToRow
7094 }
7095 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7096 Expression::ApproxCountDistinct(_)
7097 if matches!(
7098 target,
7099 DialectType::Presto | DialectType::Trino | DialectType::Athena
7100 ) =>
7101 {
7102 Action::ApproxCountDistinctToApproxDistinct
7103 }
7104 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7105 Expression::ArrayContains(_)
7106 if matches!(
7107 target,
7108 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7109 ) =>
7110 {
7111 Action::ArrayContainsConvert
7112 }
7113 // StrPosition with position -> complex expansion for Presto/DuckDB
7114 // STRPOS doesn't support a position arg in these dialects
7115 Expression::StrPosition(ref sp)
7116 if sp.position.is_some()
7117 && matches!(
7118 target,
7119 DialectType::Presto
7120 | DialectType::Trino
7121 | DialectType::Athena
7122 | DialectType::DuckDB
7123 ) =>
7124 {
7125 Action::StrPositionExpand
7126 }
7127 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7128 Expression::First(ref f)
7129 if f.ignore_nulls == Some(true)
7130 && matches!(target, DialectType::DuckDB) =>
7131 {
7132 Action::FirstToAnyValue
7133 }
7134 // BEGIN -> START TRANSACTION for Presto/Trino
7135 Expression::Command(ref cmd)
7136 if cmd.this.eq_ignore_ascii_case("BEGIN")
7137 && matches!(
7138 target,
7139 DialectType::Presto | DialectType::Trino | DialectType::Athena
7140 ) =>
7141 {
7142 // Handled inline below
7143 Action::None // We'll handle it directly
7144 }
7145 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7146 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7147 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7148 Expression::Concat(ref _op)
7149 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7150 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7151 {
7152 Action::PipeConcatToConcat
7153 }
7154 _ => Action::None,
7155 }
7156 };
7157
7158 match action {
7159 Action::None => {
7160 // Handle inline transforms that don't need a dedicated action
7161
7162 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7163 if let Expression::Between(ref b) = e {
7164 if let Some(sym) = b.symmetric {
7165 let keeps_symmetric =
7166 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7167 if !keeps_symmetric {
7168 if sym {
7169 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7170 let b = if let Expression::Between(b) = e {
7171 *b
7172 } else {
7173 unreachable!()
7174 };
7175 let between1 = Expression::Between(Box::new(
7176 crate::expressions::Between {
7177 this: b.this.clone(),
7178 low: b.low.clone(),
7179 high: b.high.clone(),
7180 not: b.not,
7181 symmetric: None,
7182 },
7183 ));
7184 let between2 = Expression::Between(Box::new(
7185 crate::expressions::Between {
7186 this: b.this,
7187 low: b.high,
7188 high: b.low,
7189 not: b.not,
7190 symmetric: None,
7191 },
7192 ));
7193 return Ok(Expression::Paren(Box::new(
7194 crate::expressions::Paren {
7195 this: Expression::Or(Box::new(
7196 crate::expressions::BinaryOp::new(
7197 between1, between2,
7198 ),
7199 )),
7200 trailing_comments: vec![],
7201 },
7202 )));
7203 } else {
7204 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7205 let b = if let Expression::Between(b) = e {
7206 *b
7207 } else {
7208 unreachable!()
7209 };
7210 return Ok(Expression::Between(Box::new(
7211 crate::expressions::Between {
7212 this: b.this,
7213 low: b.low,
7214 high: b.high,
7215 not: b.not,
7216 symmetric: None,
7217 },
7218 )));
7219 }
7220 }
7221 }
7222 }
7223
7224 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7225 if let Expression::ILike(ref _like) = e {
7226 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7227 let like = if let Expression::ILike(l) = e {
7228 *l
7229 } else {
7230 unreachable!()
7231 };
7232 let lower_left = Expression::Function(Box::new(Function::new(
7233 "LOWER".to_string(),
7234 vec![like.left],
7235 )));
7236 let lower_right = Expression::Function(Box::new(Function::new(
7237 "LOWER".to_string(),
7238 vec![like.right],
7239 )));
7240 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7241 left: lower_left,
7242 right: lower_right,
7243 escape: like.escape,
7244 quantifier: like.quantifier,
7245 inferred_type: None,
7246 })));
7247 }
7248 }
7249
7250 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7251 if let Expression::MethodCall(ref mc) = e {
7252 if matches!(source, DialectType::Oracle)
7253 && mc.method.name.eq_ignore_ascii_case("VALUE")
7254 && mc.args.is_empty()
7255 {
7256 let is_dbms_random = match &mc.this {
7257 Expression::Identifier(id) => {
7258 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7259 }
7260 Expression::Column(col) => {
7261 col.table.is_none()
7262 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7263 }
7264 _ => false,
7265 };
7266 if is_dbms_random {
7267 let func_name = match target {
7268 DialectType::PostgreSQL
7269 | DialectType::Redshift
7270 | DialectType::DuckDB
7271 | DialectType::SQLite => "RANDOM",
7272 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7273 _ => "RAND",
7274 };
7275 return Ok(Expression::Function(Box::new(Function::new(
7276 func_name.to_string(),
7277 vec![],
7278 ))));
7279 }
7280 }
7281 }
7282 // TRIM without explicit position -> add BOTH for ClickHouse
7283 if let Expression::Trim(ref trim) = e {
7284 if matches!(target, DialectType::ClickHouse)
7285 && trim.sql_standard_syntax
7286 && trim.characters.is_some()
7287 && !trim.position_explicit
7288 {
7289 let mut new_trim = (**trim).clone();
7290 new_trim.position_explicit = true;
7291 return Ok(Expression::Trim(Box::new(new_trim)));
7292 }
7293 }
7294 // BEGIN -> START TRANSACTION for Presto/Trino
7295 if let Expression::Transaction(ref txn) = e {
7296 if matches!(
7297 target,
7298 DialectType::Presto | DialectType::Trino | DialectType::Athena
7299 ) {
7300 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7301 let mut txn = txn.clone();
7302 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7303 "START".to_string(),
7304 ))));
7305 return Ok(Expression::Transaction(Box::new(*txn)));
7306 }
7307 }
7308 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7309 if matches!(
7310 target,
7311 DialectType::Presto | DialectType::Trino | DialectType::Athena
7312 ) {
7313 match &e {
7314 Expression::IsTrue(itf) if !itf.not => {
7315 // x IS TRUE -> x
7316 return Ok(itf.this.clone());
7317 }
7318 Expression::IsTrue(itf) if itf.not => {
7319 // x IS NOT TRUE -> NOT x
7320 return Ok(Expression::Not(Box::new(
7321 crate::expressions::UnaryOp {
7322 this: itf.this.clone(),
7323 inferred_type: None,
7324 },
7325 )));
7326 }
7327 Expression::IsFalse(itf) if !itf.not => {
7328 // x IS FALSE -> NOT x
7329 return Ok(Expression::Not(Box::new(
7330 crate::expressions::UnaryOp {
7331 this: itf.this.clone(),
7332 inferred_type: None,
7333 },
7334 )));
7335 }
7336 Expression::IsFalse(itf) if itf.not => {
7337 // x IS NOT FALSE -> NOT NOT x
7338 let not_x =
7339 Expression::Not(Box::new(crate::expressions::UnaryOp {
7340 this: itf.this.clone(),
7341 inferred_type: None,
7342 }));
7343 return Ok(Expression::Not(Box::new(
7344 crate::expressions::UnaryOp {
7345 this: not_x,
7346 inferred_type: None,
7347 },
7348 )));
7349 }
7350 _ => {}
7351 }
7352 }
7353 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7354 if matches!(target, DialectType::Redshift) {
7355 if let Expression::IsFalse(ref itf) = e {
7356 if itf.not {
7357 return Ok(Expression::Not(Box::new(
7358 crate::expressions::UnaryOp {
7359 this: Expression::IsFalse(Box::new(
7360 crate::expressions::IsTrueFalse {
7361 this: itf.this.clone(),
7362 not: false,
7363 },
7364 )),
7365 inferred_type: None,
7366 },
7367 )));
7368 }
7369 }
7370 }
7371 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7372 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7373 if let Expression::Function(ref f) = e {
7374 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7375 && matches!(source, DialectType::Snowflake)
7376 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7377 {
7378 if f.args.len() == 3 {
7379 let mut args = f.args.clone();
7380 args.push(Expression::string("g"));
7381 return Ok(Expression::Function(Box::new(Function::new(
7382 "REGEXP_REPLACE".to_string(),
7383 args,
7384 ))));
7385 } else if f.args.len() == 4 {
7386 // 4th arg might be position, add 'g' as 5th
7387 let mut args = f.args.clone();
7388 args.push(Expression::string("g"));
7389 return Ok(Expression::Function(Box::new(Function::new(
7390 "REGEXP_REPLACE".to_string(),
7391 args,
7392 ))));
7393 }
7394 }
7395 }
7396 Ok(e)
7397 }
7398
7399 Action::GreatestLeastNull => {
7400 let f = if let Expression::Function(f) = e {
7401 *f
7402 } else {
7403 unreachable!("action only triggered for Function expressions")
7404 };
7405 let mut null_checks: Vec<Expression> = f
7406 .args
7407 .iter()
7408 .map(|a| {
7409 Expression::IsNull(Box::new(IsNull {
7410 this: a.clone(),
7411 not: false,
7412 postfix_form: false,
7413 }))
7414 })
7415 .collect();
7416 let condition = if null_checks.len() == 1 {
7417 null_checks.remove(0)
7418 } else {
7419 let first = null_checks.remove(0);
7420 null_checks.into_iter().fold(first, |acc, check| {
7421 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7422 })
7423 };
7424 Ok(Expression::Case(Box::new(Case {
7425 operand: None,
7426 whens: vec![(condition, Expression::Null(Null))],
7427 else_: Some(Expression::Function(Box::new(Function::new(
7428 f.name, f.args,
7429 )))),
7430 comments: Vec::new(),
7431 inferred_type: None,
7432 })))
7433 }
7434
7435 Action::ArrayGenerateRange => {
7436 let f = if let Expression::Function(f) = e {
7437 *f
7438 } else {
7439 unreachable!("action only triggered for Function expressions")
7440 };
7441 let start = f.args[0].clone();
7442 let end = f.args[1].clone();
7443 let step = f.args.get(2).cloned();
7444
7445 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7446 end.clone(),
7447 Expression::number(1),
7448 )));
7449
7450 match target {
7451 DialectType::PostgreSQL | DialectType::Redshift => {
7452 let mut args = vec![start, end_minus_1];
7453 if let Some(s) = step {
7454 args.push(s);
7455 }
7456 Ok(Expression::Function(Box::new(Function::new(
7457 "GENERATE_SERIES".to_string(),
7458 args,
7459 ))))
7460 }
7461 DialectType::Presto | DialectType::Trino => {
7462 let mut args = vec![start, end_minus_1];
7463 if let Some(s) = step {
7464 args.push(s);
7465 }
7466 Ok(Expression::Function(Box::new(Function::new(
7467 "SEQUENCE".to_string(),
7468 args,
7469 ))))
7470 }
7471 DialectType::BigQuery => {
7472 let mut args = vec![start, end_minus_1];
7473 if let Some(s) = step {
7474 args.push(s);
7475 }
7476 Ok(Expression::Function(Box::new(Function::new(
7477 "GENERATE_ARRAY".to_string(),
7478 args,
7479 ))))
7480 }
7481 DialectType::Snowflake => {
7482 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7483 Expression::Paren(Box::new(Paren {
7484 this: end_minus_1,
7485 trailing_comments: vec![],
7486 })),
7487 Expression::number(1),
7488 )));
7489 let mut args = vec![start, normalized_end];
7490 if let Some(s) = step {
7491 args.push(s);
7492 }
7493 Ok(Expression::Function(Box::new(Function::new(
7494 "ARRAY_GENERATE_RANGE".to_string(),
7495 args,
7496 ))))
7497 }
7498 _ => Ok(Expression::Function(Box::new(Function::new(
7499 f.name, f.args,
7500 )))),
7501 }
7502 }
7503
7504 Action::Div0TypedDivision => {
7505 let if_func = if let Expression::IfFunc(f) = e {
7506 *f
7507 } else {
7508 unreachable!("action only triggered for IfFunc expressions")
7509 };
7510 if let Some(Expression::Div(div)) = if_func.false_value {
7511 let cast_type = if matches!(target, DialectType::SQLite) {
7512 DataType::Float {
7513 precision: None,
7514 scale: None,
7515 real_spelling: true,
7516 }
7517 } else {
7518 DataType::Double {
7519 precision: None,
7520 scale: None,
7521 }
7522 };
7523 let casted_left = Expression::Cast(Box::new(Cast {
7524 this: div.left,
7525 to: cast_type,
7526 trailing_comments: vec![],
7527 double_colon_syntax: false,
7528 format: None,
7529 default: None,
7530 inferred_type: None,
7531 }));
7532 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7533 condition: if_func.condition,
7534 true_value: if_func.true_value,
7535 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7536 casted_left,
7537 div.right,
7538 )))),
7539 original_name: if_func.original_name,
7540 inferred_type: None,
7541 })))
7542 } else {
7543 // Not actually a Div, reconstruct
7544 Ok(Expression::IfFunc(Box::new(if_func)))
7545 }
7546 }
7547
7548 Action::ArrayAggCollectList => {
7549 let agg = if let Expression::ArrayAgg(a) = e {
7550 *a
7551 } else {
7552 unreachable!("action only triggered for ArrayAgg expressions")
7553 };
7554 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7555 name: Some("COLLECT_LIST".to_string()),
7556 ..agg
7557 })))
7558 }
7559
7560 Action::ArrayAggToGroupConcat => {
7561 let agg = if let Expression::ArrayAgg(a) = e {
7562 *a
7563 } else {
7564 unreachable!("action only triggered for ArrayAgg expressions")
7565 };
7566 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7567 name: Some("GROUP_CONCAT".to_string()),
7568 ..agg
7569 })))
7570 }
7571
7572 Action::ArrayAggWithinGroupFilter => {
7573 let wg = if let Expression::WithinGroup(w) = e {
7574 *w
7575 } else {
7576 unreachable!("action only triggered for WithinGroup expressions")
7577 };
7578 if let Expression::ArrayAgg(inner_agg) = wg.this {
7579 let col = inner_agg.this.clone();
7580 let filter = Expression::IsNull(Box::new(IsNull {
7581 this: col,
7582 not: true,
7583 postfix_form: false,
7584 }));
7585 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7586 let order_by = if matches!(target, DialectType::DuckDB) {
7587 wg.order_by
7588 .into_iter()
7589 .map(|mut o| {
7590 if o.desc && o.nulls_first.is_none() {
7591 o.nulls_first = Some(true);
7592 }
7593 o
7594 })
7595 .collect()
7596 } else {
7597 wg.order_by
7598 };
7599 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7600 this: inner_agg.this,
7601 distinct: inner_agg.distinct,
7602 filter: Some(filter),
7603 order_by,
7604 name: inner_agg.name,
7605 ignore_nulls: inner_agg.ignore_nulls,
7606 having_max: inner_agg.having_max,
7607 limit: inner_agg.limit,
7608 inferred_type: None,
7609 })))
7610 } else {
7611 Ok(Expression::WithinGroup(Box::new(wg)))
7612 }
7613 }
7614
7615 Action::ArrayAggFilter => {
7616 let agg = if let Expression::ArrayAgg(a) = e {
7617 *a
7618 } else {
7619 unreachable!("action only triggered for ArrayAgg expressions")
7620 };
7621 let col = agg.this.clone();
7622 let filter = Expression::IsNull(Box::new(IsNull {
7623 this: col,
7624 not: true,
7625 postfix_form: false,
7626 }));
7627 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7628 filter: Some(filter),
7629 ..agg
7630 })))
7631 }
7632
7633 Action::ArrayAggNullFilter => {
7634 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7635 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7636 let agg = if let Expression::ArrayAgg(a) = e {
7637 *a
7638 } else {
7639 unreachable!("action only triggered for ArrayAgg expressions")
7640 };
7641 let col = agg.this.clone();
7642 let not_null = Expression::IsNull(Box::new(IsNull {
7643 this: col,
7644 not: true,
7645 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7646 }));
7647 let new_filter = if let Some(existing_filter) = agg.filter {
7648 // AND the NOT IS NULL with existing filter
7649 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7650 existing_filter,
7651 not_null,
7652 )))
7653 } else {
7654 not_null
7655 };
7656 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7657 filter: Some(new_filter),
7658 ..agg
7659 })))
7660 }
7661
7662 Action::BigQueryArraySelectAsStructToSnowflake => {
7663 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7664 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7665 if let Expression::Function(mut f) = e {
7666 let is_match = f.args.len() == 1
7667 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7668 if is_match {
7669 let inner_select = match f.args.remove(0) {
7670 Expression::Select(s) => *s,
7671 _ => unreachable!(
7672 "argument already verified to be a Select expression"
7673 ),
7674 };
7675 // Build OBJECT_CONSTRUCT args from SELECT expressions
7676 let mut oc_args = Vec::new();
7677 for expr in &inner_select.expressions {
7678 match expr {
7679 Expression::Alias(a) => {
7680 let key = Expression::Literal(Literal::String(
7681 a.alias.name.clone(),
7682 ));
7683 let value = a.this.clone();
7684 oc_args.push(key);
7685 oc_args.push(value);
7686 }
7687 Expression::Column(c) => {
7688 let key = Expression::Literal(Literal::String(
7689 c.name.name.clone(),
7690 ));
7691 oc_args.push(key);
7692 oc_args.push(expr.clone());
7693 }
7694 _ => {
7695 oc_args.push(expr.clone());
7696 }
7697 }
7698 }
7699 let object_construct = Expression::Function(Box::new(Function::new(
7700 "OBJECT_CONSTRUCT".to_string(),
7701 oc_args,
7702 )));
7703 let array_agg = Expression::Function(Box::new(Function::new(
7704 "ARRAY_AGG".to_string(),
7705 vec![object_construct],
7706 )));
7707 let mut new_select = crate::expressions::Select::new();
7708 new_select.expressions = vec![array_agg];
7709 new_select.from = inner_select.from.clone();
7710 new_select.where_clause = inner_select.where_clause.clone();
7711 new_select.group_by = inner_select.group_by.clone();
7712 new_select.having = inner_select.having.clone();
7713 new_select.joins = inner_select.joins.clone();
7714 Ok(Expression::Subquery(Box::new(
7715 crate::expressions::Subquery {
7716 this: Expression::Select(Box::new(new_select)),
7717 alias: None,
7718 column_aliases: Vec::new(),
7719 order_by: None,
7720 limit: None,
7721 offset: None,
7722 distribute_by: None,
7723 sort_by: None,
7724 cluster_by: None,
7725 lateral: false,
7726 modifiers_inside: false,
7727 trailing_comments: Vec::new(),
7728 inferred_type: None,
7729 },
7730 )))
7731 } else {
7732 Ok(Expression::Function(f))
7733 }
7734 } else {
7735 Ok(e)
7736 }
7737 }
7738
7739 Action::BigQueryPercentileContToDuckDB => {
7740 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7741 if let Expression::AggregateFunction(mut af) = e {
7742 af.name = "QUANTILE_CONT".to_string();
7743 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7744 // Keep only first 2 args
7745 if af.args.len() > 2 {
7746 af.args.truncate(2);
7747 }
7748 Ok(Expression::AggregateFunction(af))
7749 } else {
7750 Ok(e)
7751 }
7752 }
7753
7754 Action::ArrayAggIgnoreNullsDuckDB => {
7755 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7756 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7757 let mut agg = if let Expression::ArrayAgg(a) = e {
7758 *a
7759 } else {
7760 unreachable!("action only triggered for ArrayAgg expressions")
7761 };
7762 agg.ignore_nulls = None; // Strip IGNORE NULLS
7763 if !agg.order_by.is_empty() {
7764 agg.order_by[0].nulls_first = Some(true);
7765 }
7766 Ok(Expression::ArrayAgg(Box::new(agg)))
7767 }
7768
7769 Action::CountDistinctMultiArg => {
7770 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7771 if let Expression::Count(c) = e {
7772 if let Some(Expression::Tuple(t)) = c.this {
7773 let args = t.expressions;
7774 // Build CASE expression:
7775 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7776 let mut whens = Vec::new();
7777 for arg in &args {
7778 whens.push((
7779 Expression::IsNull(Box::new(IsNull {
7780 this: arg.clone(),
7781 not: false,
7782 postfix_form: false,
7783 })),
7784 Expression::Null(crate::expressions::Null),
7785 ));
7786 }
7787 // Build the tuple for ELSE
7788 let tuple_expr =
7789 Expression::Tuple(Box::new(crate::expressions::Tuple {
7790 expressions: args,
7791 }));
7792 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7793 operand: None,
7794 whens,
7795 else_: Some(tuple_expr),
7796 comments: Vec::new(),
7797 inferred_type: None,
7798 }));
7799 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7800 this: Some(case_expr),
7801 star: false,
7802 distinct: true,
7803 filter: c.filter,
7804 ignore_nulls: c.ignore_nulls,
7805 original_name: c.original_name,
7806 inferred_type: None,
7807 })))
7808 } else {
7809 Ok(Expression::Count(c))
7810 }
7811 } else {
7812 Ok(e)
7813 }
7814 }
7815
7816 Action::CastTimestampToDatetime => {
7817 let c = if let Expression::Cast(c) = e {
7818 *c
7819 } else {
7820 unreachable!("action only triggered for Cast expressions")
7821 };
7822 Ok(Expression::Cast(Box::new(Cast {
7823 to: DataType::Custom {
7824 name: "DATETIME".to_string(),
7825 },
7826 ..c
7827 })))
7828 }
7829
7830 Action::CastTimestampStripTz => {
7831 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7832 let c = if let Expression::Cast(c) = e {
7833 *c
7834 } else {
7835 unreachable!("action only triggered for Cast expressions")
7836 };
7837 Ok(Expression::Cast(Box::new(Cast {
7838 to: DataType::Timestamp {
7839 precision: None,
7840 timezone: false,
7841 },
7842 ..c
7843 })))
7844 }
7845
7846 Action::CastTimestamptzToFunc => {
7847 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7848 let c = if let Expression::Cast(c) = e {
7849 *c
7850 } else {
7851 unreachable!("action only triggered for Cast expressions")
7852 };
7853 Ok(Expression::Function(Box::new(Function::new(
7854 "TIMESTAMP".to_string(),
7855 vec![c.this],
7856 ))))
7857 }
7858
7859 Action::ToDateToCast => {
7860 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7861 if let Expression::Function(f) = e {
7862 let arg = f.args.into_iter().next().unwrap();
7863 Ok(Expression::Cast(Box::new(Cast {
7864 this: arg,
7865 to: DataType::Date,
7866 double_colon_syntax: false,
7867 trailing_comments: vec![],
7868 format: None,
7869 default: None,
7870 inferred_type: None,
7871 })))
7872 } else {
7873 Ok(e)
7874 }
7875 }
7876 Action::DateTruncWrapCast => {
7877 // Handle both Expression::DateTrunc/TimestampTrunc and
7878 // Expression::Function("DATE_TRUNC", [unit, expr])
7879 match e {
7880 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7881 let input_type = match &d.this {
7882 Expression::Cast(c) => Some(c.to.clone()),
7883 _ => None,
7884 };
7885 if let Some(cast_type) = input_type {
7886 let is_time = matches!(cast_type, DataType::Time { .. });
7887 if is_time {
7888 let date_expr = Expression::Cast(Box::new(Cast {
7889 this: Expression::Literal(
7890 crate::expressions::Literal::String(
7891 "1970-01-01".to_string(),
7892 ),
7893 ),
7894 to: DataType::Date,
7895 double_colon_syntax: false,
7896 trailing_comments: vec![],
7897 format: None,
7898 default: None,
7899 inferred_type: None,
7900 }));
7901 let add_expr =
7902 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7903 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7904 this: add_expr,
7905 unit: d.unit,
7906 }));
7907 Ok(Expression::Cast(Box::new(Cast {
7908 this: inner,
7909 to: cast_type,
7910 double_colon_syntax: false,
7911 trailing_comments: vec![],
7912 format: None,
7913 default: None,
7914 inferred_type: None,
7915 })))
7916 } else {
7917 let inner = Expression::DateTrunc(Box::new(*d));
7918 Ok(Expression::Cast(Box::new(Cast {
7919 this: inner,
7920 to: cast_type,
7921 double_colon_syntax: false,
7922 trailing_comments: vec![],
7923 format: None,
7924 default: None,
7925 inferred_type: None,
7926 })))
7927 }
7928 } else {
7929 Ok(Expression::DateTrunc(d))
7930 }
7931 }
7932 Expression::Function(f) if f.args.len() == 2 => {
7933 // Function-based DATE_TRUNC(unit, expr)
7934 let input_type = match &f.args[1] {
7935 Expression::Cast(c) => Some(c.to.clone()),
7936 _ => None,
7937 };
7938 if let Some(cast_type) = input_type {
7939 let is_time = matches!(cast_type, DataType::Time { .. });
7940 if is_time {
7941 let date_expr = Expression::Cast(Box::new(Cast {
7942 this: Expression::Literal(
7943 crate::expressions::Literal::String(
7944 "1970-01-01".to_string(),
7945 ),
7946 ),
7947 to: DataType::Date,
7948 double_colon_syntax: false,
7949 trailing_comments: vec![],
7950 format: None,
7951 default: None,
7952 inferred_type: None,
7953 }));
7954 let mut args = f.args;
7955 let unit_arg = args.remove(0);
7956 let time_expr = args.remove(0);
7957 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7958 date_expr, time_expr,
7959 )));
7960 let inner = Expression::Function(Box::new(Function::new(
7961 "DATE_TRUNC".to_string(),
7962 vec![unit_arg, add_expr],
7963 )));
7964 Ok(Expression::Cast(Box::new(Cast {
7965 this: inner,
7966 to: cast_type,
7967 double_colon_syntax: false,
7968 trailing_comments: vec![],
7969 format: None,
7970 default: None,
7971 inferred_type: None,
7972 })))
7973 } else {
7974 // Wrap the function in CAST
7975 Ok(Expression::Cast(Box::new(Cast {
7976 this: Expression::Function(f),
7977 to: cast_type,
7978 double_colon_syntax: false,
7979 trailing_comments: vec![],
7980 format: None,
7981 default: None,
7982 inferred_type: None,
7983 })))
7984 }
7985 } else {
7986 Ok(Expression::Function(f))
7987 }
7988 }
7989 other => Ok(other),
7990 }
7991 }
7992
7993 Action::RegexpReplaceSnowflakeToDuckDB => {
7994 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7995 if let Expression::Function(f) = e {
7996 let mut args = f.args;
7997 let subject = args.remove(0);
7998 let pattern = args.remove(0);
7999 let replacement = args.remove(0);
8000 Ok(Expression::Function(Box::new(Function::new(
8001 "REGEXP_REPLACE".to_string(),
8002 vec![
8003 subject,
8004 pattern,
8005 replacement,
8006 Expression::Literal(crate::expressions::Literal::String(
8007 "g".to_string(),
8008 )),
8009 ],
8010 ))))
8011 } else {
8012 Ok(e)
8013 }
8014 }
8015
8016 Action::SetToVariable => {
8017 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8018 if let Expression::SetStatement(mut s) = e {
8019 for item in &mut s.items {
8020 if item.kind.is_none() {
8021 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
8022 let already_variable = match &item.name {
8023 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
8024 _ => false,
8025 };
8026 if already_variable {
8027 // Extract the actual name and set kind
8028 if let Expression::Identifier(ref mut id) = item.name {
8029 let actual_name = id.name["VARIABLE ".len()..].to_string();
8030 id.name = actual_name;
8031 }
8032 }
8033 item.kind = Some("VARIABLE".to_string());
8034 }
8035 }
8036 Ok(Expression::SetStatement(s))
8037 } else {
8038 Ok(e)
8039 }
8040 }
8041
8042 Action::ConvertTimezoneToExpr => {
8043 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
8044 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
8045 if let Expression::Function(f) = e {
8046 if f.args.len() == 2 {
8047 let mut args = f.args;
8048 let target_tz = args.remove(0);
8049 let timestamp = args.remove(0);
8050 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
8051 source_tz: None,
8052 target_tz: Some(Box::new(target_tz)),
8053 timestamp: Some(Box::new(timestamp)),
8054 options: vec![],
8055 })))
8056 } else if f.args.len() == 3 {
8057 let mut args = f.args;
8058 let source_tz = args.remove(0);
8059 let target_tz = args.remove(0);
8060 let timestamp = args.remove(0);
8061 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
8062 source_tz: Some(Box::new(source_tz)),
8063 target_tz: Some(Box::new(target_tz)),
8064 timestamp: Some(Box::new(timestamp)),
8065 options: vec![],
8066 })))
8067 } else {
8068 Ok(Expression::Function(f))
8069 }
8070 } else {
8071 Ok(e)
8072 }
8073 }
8074
8075 Action::BigQueryCastType => {
8076 // Convert BigQuery types to standard SQL types
8077 if let Expression::DataType(dt) = e {
8078 match dt {
8079 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
8080 Ok(Expression::DataType(DataType::BigInt { length: None }))
8081 }
8082 DataType::Custom { ref name }
8083 if name.eq_ignore_ascii_case("FLOAT64") =>
8084 {
8085 Ok(Expression::DataType(DataType::Double {
8086 precision: None,
8087 scale: None,
8088 }))
8089 }
8090 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
8091 Ok(Expression::DataType(DataType::Boolean))
8092 }
8093 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
8094 Ok(Expression::DataType(DataType::VarBinary { length: None }))
8095 }
8096 DataType::Custom { ref name }
8097 if name.eq_ignore_ascii_case("NUMERIC") =>
8098 {
8099 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
8100 // default precision (18, 3) being added to bare DECIMAL
8101 if matches!(target, DialectType::DuckDB) {
8102 Ok(Expression::DataType(DataType::Custom {
8103 name: "DECIMAL".to_string(),
8104 }))
8105 } else {
8106 Ok(Expression::DataType(DataType::Decimal {
8107 precision: None,
8108 scale: None,
8109 }))
8110 }
8111 }
8112 DataType::Custom { ref name }
8113 if name.eq_ignore_ascii_case("STRING") =>
8114 {
8115 Ok(Expression::DataType(DataType::String { length: None }))
8116 }
8117 DataType::Custom { ref name }
8118 if name.eq_ignore_ascii_case("DATETIME") =>
8119 {
8120 Ok(Expression::DataType(DataType::Timestamp {
8121 precision: None,
8122 timezone: false,
8123 }))
8124 }
8125 _ => Ok(Expression::DataType(dt)),
8126 }
8127 } else {
8128 Ok(e)
8129 }
8130 }
8131
8132 Action::BigQuerySafeDivide => {
8133 // Convert SafeDivide expression to IF/CASE form for most targets
8134 if let Expression::SafeDivide(sd) = e {
8135 let x = *sd.this;
8136 let y = *sd.expression;
8137 // Wrap x and y in parens if they're complex expressions
8138 let y_ref = match &y {
8139 Expression::Column(_)
8140 | Expression::Literal(_)
8141 | Expression::Identifier(_) => y.clone(),
8142 _ => Expression::Paren(Box::new(Paren {
8143 this: y.clone(),
8144 trailing_comments: vec![],
8145 })),
8146 };
8147 let x_ref = match &x {
8148 Expression::Column(_)
8149 | Expression::Literal(_)
8150 | Expression::Identifier(_) => x.clone(),
8151 _ => Expression::Paren(Box::new(Paren {
8152 this: x.clone(),
8153 trailing_comments: vec![],
8154 })),
8155 };
8156 let condition = Expression::Neq(Box::new(BinaryOp::new(
8157 y_ref.clone(),
8158 Expression::number(0),
8159 )));
8160 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
8161
8162 if matches!(target, DialectType::Presto | DialectType::Trino) {
8163 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
8164 let cast_x = Expression::Cast(Box::new(Cast {
8165 this: match &x {
8166 Expression::Column(_)
8167 | Expression::Literal(_)
8168 | Expression::Identifier(_) => x,
8169 _ => Expression::Paren(Box::new(Paren {
8170 this: x,
8171 trailing_comments: vec![],
8172 })),
8173 },
8174 to: DataType::Double {
8175 precision: None,
8176 scale: None,
8177 },
8178 trailing_comments: vec![],
8179 double_colon_syntax: false,
8180 format: None,
8181 default: None,
8182 inferred_type: None,
8183 }));
8184 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8185 cast_x,
8186 match &y {
8187 Expression::Column(_)
8188 | Expression::Literal(_)
8189 | Expression::Identifier(_) => y,
8190 _ => Expression::Paren(Box::new(Paren {
8191 this: y,
8192 trailing_comments: vec![],
8193 })),
8194 },
8195 )));
8196 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8197 condition,
8198 true_value: cast_div,
8199 false_value: Some(Expression::Null(Null)),
8200 original_name: None,
8201 inferred_type: None,
8202 })))
8203 } else if matches!(target, DialectType::PostgreSQL) {
8204 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8205 let cast_x = Expression::Cast(Box::new(Cast {
8206 this: match &x {
8207 Expression::Column(_)
8208 | Expression::Literal(_)
8209 | Expression::Identifier(_) => x,
8210 _ => Expression::Paren(Box::new(Paren {
8211 this: x,
8212 trailing_comments: vec![],
8213 })),
8214 },
8215 to: DataType::Custom {
8216 name: "DOUBLE PRECISION".to_string(),
8217 },
8218 trailing_comments: vec![],
8219 double_colon_syntax: false,
8220 format: None,
8221 default: None,
8222 inferred_type: None,
8223 }));
8224 let y_paren = match &y {
8225 Expression::Column(_)
8226 | Expression::Literal(_)
8227 | Expression::Identifier(_) => y,
8228 _ => Expression::Paren(Box::new(Paren {
8229 this: y,
8230 trailing_comments: vec![],
8231 })),
8232 };
8233 let cast_div =
8234 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8235 Ok(Expression::Case(Box::new(Case {
8236 operand: None,
8237 whens: vec![(condition, cast_div)],
8238 else_: Some(Expression::Null(Null)),
8239 comments: Vec::new(),
8240 inferred_type: None,
8241 })))
8242 } else if matches!(target, DialectType::DuckDB) {
8243 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8244 Ok(Expression::Case(Box::new(Case {
8245 operand: None,
8246 whens: vec![(condition, div_expr)],
8247 else_: Some(Expression::Null(Null)),
8248 comments: Vec::new(),
8249 inferred_type: None,
8250 })))
8251 } else if matches!(target, DialectType::Snowflake) {
8252 // Snowflake: IFF(y <> 0, x / y, NULL)
8253 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8254 condition,
8255 true_value: div_expr,
8256 false_value: Some(Expression::Null(Null)),
8257 original_name: Some("IFF".to_string()),
8258 inferred_type: None,
8259 })))
8260 } else {
8261 // All others: IF(y <> 0, x / y, NULL)
8262 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8263 condition,
8264 true_value: div_expr,
8265 false_value: Some(Expression::Null(Null)),
8266 original_name: None,
8267 inferred_type: None,
8268 })))
8269 }
8270 } else {
8271 Ok(e)
8272 }
8273 }
8274
8275 Action::BigQueryLastDayStripUnit => {
8276 if let Expression::LastDay(mut ld) = e {
8277 ld.unit = None; // Strip the unit (MONTH is default)
8278 match target {
8279 DialectType::PostgreSQL => {
8280 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8281 let date_trunc = Expression::Function(Box::new(Function::new(
8282 "DATE_TRUNC".to_string(),
8283 vec![
8284 Expression::Literal(crate::expressions::Literal::String(
8285 "MONTH".to_string(),
8286 )),
8287 ld.this.clone(),
8288 ],
8289 )));
8290 let plus_month =
8291 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8292 date_trunc,
8293 Expression::Interval(Box::new(
8294 crate::expressions::Interval {
8295 this: Some(Expression::Literal(
8296 crate::expressions::Literal::String(
8297 "1 MONTH".to_string(),
8298 ),
8299 )),
8300 unit: None,
8301 },
8302 )),
8303 )));
8304 let minus_day =
8305 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8306 plus_month,
8307 Expression::Interval(Box::new(
8308 crate::expressions::Interval {
8309 this: Some(Expression::Literal(
8310 crate::expressions::Literal::String(
8311 "1 DAY".to_string(),
8312 ),
8313 )),
8314 unit: None,
8315 },
8316 )),
8317 )));
8318 Ok(Expression::Cast(Box::new(Cast {
8319 this: minus_day,
8320 to: DataType::Date,
8321 trailing_comments: vec![],
8322 double_colon_syntax: false,
8323 format: None,
8324 default: None,
8325 inferred_type: None,
8326 })))
8327 }
8328 DialectType::Presto => {
8329 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8330 Ok(Expression::Function(Box::new(Function::new(
8331 "LAST_DAY_OF_MONTH".to_string(),
8332 vec![ld.this],
8333 ))))
8334 }
8335 DialectType::ClickHouse => {
8336 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8337 // Need to wrap the DATE type in Nullable
8338 let nullable_date = match ld.this {
8339 Expression::Cast(mut c) => {
8340 c.to = DataType::Nullable {
8341 inner: Box::new(DataType::Date),
8342 };
8343 Expression::Cast(c)
8344 }
8345 other => other,
8346 };
8347 ld.this = nullable_date;
8348 Ok(Expression::LastDay(ld))
8349 }
8350 _ => Ok(Expression::LastDay(ld)),
8351 }
8352 } else {
8353 Ok(e)
8354 }
8355 }
8356
8357 Action::BigQueryCastFormat => {
8358 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8359 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8360 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8361 let (this, to, format_expr, is_safe) = match e {
8362 Expression::Cast(ref c) if c.format.is_some() => (
8363 c.this.clone(),
8364 c.to.clone(),
8365 c.format.as_ref().unwrap().as_ref().clone(),
8366 false,
8367 ),
8368 Expression::SafeCast(ref c) if c.format.is_some() => (
8369 c.this.clone(),
8370 c.to.clone(),
8371 c.format.as_ref().unwrap().as_ref().clone(),
8372 true,
8373 ),
8374 _ => return Ok(e),
8375 };
8376 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8377 if matches!(target, DialectType::BigQuery) {
8378 match &to {
8379 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8380 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8381 return Ok(e);
8382 }
8383 _ => {}
8384 }
8385 }
8386 // Extract timezone from format if AT TIME ZONE is present
8387 let (actual_format_expr, timezone) = match &format_expr {
8388 Expression::AtTimeZone(ref atz) => {
8389 (atz.this.clone(), Some(atz.zone.clone()))
8390 }
8391 _ => (format_expr.clone(), None),
8392 };
8393 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8394 match target {
8395 DialectType::BigQuery => {
8396 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8397 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8398 let func_name = match &to {
8399 DataType::Date => "PARSE_DATE",
8400 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8401 DataType::Time { .. } => "PARSE_TIMESTAMP",
8402 _ => "PARSE_TIMESTAMP",
8403 };
8404 let mut func_args = vec![strftime_fmt, this];
8405 if let Some(tz) = timezone {
8406 func_args.push(tz);
8407 }
8408 Ok(Expression::Function(Box::new(Function::new(
8409 func_name.to_string(),
8410 func_args,
8411 ))))
8412 }
8413 DialectType::DuckDB => {
8414 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8415 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8416 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8417 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8418 let parse_call = Expression::Function(Box::new(Function::new(
8419 parse_fn_name.to_string(),
8420 vec![this, duck_fmt],
8421 )));
8422 Ok(Expression::Cast(Box::new(Cast {
8423 this: parse_call,
8424 to,
8425 trailing_comments: vec![],
8426 double_colon_syntax: false,
8427 format: None,
8428 default: None,
8429 inferred_type: None,
8430 })))
8431 }
8432 _ => Ok(e),
8433 }
8434 }
8435
8436 Action::BigQueryFunctionNormalize => {
8437 Self::normalize_bigquery_function(e, source, target)
8438 }
8439
8440 Action::BigQueryToHexBare => {
8441 // Not used anymore - handled directly in normalize_bigquery_function
8442 Ok(e)
8443 }
8444
8445 Action::BigQueryToHexLower => {
8446 if let Expression::Lower(uf) = e {
8447 match uf.this {
8448 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8449 Expression::Function(f)
8450 if matches!(target, DialectType::BigQuery)
8451 && f.name == "TO_HEX" =>
8452 {
8453 Ok(Expression::Function(f))
8454 }
8455 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8456 Expression::Lower(inner_uf) => {
8457 if matches!(target, DialectType::BigQuery) {
8458 // BQ->BQ: extract TO_HEX
8459 if let Expression::Function(f) = inner_uf.this {
8460 Ok(Expression::Function(Box::new(Function::new(
8461 "TO_HEX".to_string(),
8462 f.args,
8463 ))))
8464 } else {
8465 Ok(Expression::Lower(inner_uf))
8466 }
8467 } else {
8468 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8469 Ok(Expression::Lower(inner_uf))
8470 }
8471 }
8472 other => {
8473 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8474 this: other,
8475 original_name: None,
8476 inferred_type: None,
8477 })))
8478 }
8479 }
8480 } else {
8481 Ok(e)
8482 }
8483 }
8484
8485 Action::BigQueryToHexUpper => {
8486 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8487 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8488 if let Expression::Upper(uf) = e {
8489 if let Expression::Lower(inner_uf) = uf.this {
8490 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8491 if matches!(target, DialectType::BigQuery) {
8492 // Restore TO_HEX name in inner function
8493 if let Expression::Function(f) = inner_uf.this {
8494 let restored = Expression::Function(Box::new(Function::new(
8495 "TO_HEX".to_string(),
8496 f.args,
8497 )));
8498 Ok(Expression::Upper(Box::new(
8499 crate::expressions::UnaryFunc::new(restored),
8500 )))
8501 } else {
8502 Ok(Expression::Upper(inner_uf))
8503 }
8504 } else {
8505 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8506 Ok(inner_uf.this)
8507 }
8508 } else {
8509 Ok(Expression::Upper(uf))
8510 }
8511 } else {
8512 Ok(e)
8513 }
8514 }
8515
8516 Action::BigQueryAnyValueHaving => {
8517 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8518 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8519 if let Expression::AnyValue(agg) = e {
8520 if let Some((having_expr, is_max)) = agg.having_max {
8521 let func_name = if is_max {
8522 "ARG_MAX_NULL"
8523 } else {
8524 "ARG_MIN_NULL"
8525 };
8526 Ok(Expression::Function(Box::new(Function::new(
8527 func_name.to_string(),
8528 vec![agg.this, *having_expr],
8529 ))))
8530 } else {
8531 Ok(Expression::AnyValue(agg))
8532 }
8533 } else {
8534 Ok(e)
8535 }
8536 }
8537
8538 Action::BigQueryApproxQuantiles => {
8539 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8540 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8541 if let Expression::AggregateFunction(agg) = e {
8542 if agg.args.len() >= 2 {
8543 let x_expr = agg.args[0].clone();
8544 let n_expr = &agg.args[1];
8545
8546 // Extract the numeric value from n_expr
8547 let n = match n_expr {
8548 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8549 s.parse::<usize>().unwrap_or(2)
8550 }
8551 _ => 2,
8552 };
8553
8554 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8555 let mut quantiles = Vec::new();
8556 for i in 0..=n {
8557 let q = i as f64 / n as f64;
8558 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8559 if q == 0.0 {
8560 quantiles.push(Expression::number(0));
8561 } else if q == 1.0 {
8562 quantiles.push(Expression::number(1));
8563 } else {
8564 quantiles.push(Expression::Literal(
8565 crate::expressions::Literal::Number(format!("{}", q)),
8566 ));
8567 }
8568 }
8569
8570 let array_expr =
8571 Expression::Array(Box::new(crate::expressions::Array {
8572 expressions: quantiles,
8573 }));
8574
8575 // Preserve DISTINCT modifier
8576 let mut new_func = Function::new(
8577 "APPROX_QUANTILE".to_string(),
8578 vec![x_expr, array_expr],
8579 );
8580 new_func.distinct = agg.distinct;
8581 Ok(Expression::Function(Box::new(new_func)))
8582 } else {
8583 Ok(Expression::AggregateFunction(agg))
8584 }
8585 } else {
8586 Ok(e)
8587 }
8588 }
8589
8590 Action::GenericFunctionNormalize => {
8591 // Helper closure to convert ARBITRARY to target-specific function
8592 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8593 let name = match target {
8594 DialectType::ClickHouse => "any",
8595 DialectType::TSQL | DialectType::SQLite => "MAX",
8596 DialectType::Hive => "FIRST",
8597 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8598 "ARBITRARY"
8599 }
8600 _ => "ANY_VALUE",
8601 };
8602 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8603 }
8604
8605 if let Expression::Function(f) = e {
8606 let name = f.name.to_uppercase();
8607 match name.as_str() {
8608 "ARBITRARY" if f.args.len() == 1 => {
8609 let arg = f.args.into_iter().next().unwrap();
8610 Ok(convert_arbitrary(arg, target))
8611 }
8612 "TO_NUMBER" if f.args.len() == 1 => {
8613 let arg = f.args.into_iter().next().unwrap();
8614 match target {
8615 DialectType::Oracle | DialectType::Snowflake => {
8616 Ok(Expression::Function(Box::new(Function::new(
8617 "TO_NUMBER".to_string(),
8618 vec![arg],
8619 ))))
8620 }
8621 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8622 this: arg,
8623 to: crate::expressions::DataType::Double {
8624 precision: None,
8625 scale: None,
8626 },
8627 double_colon_syntax: false,
8628 trailing_comments: Vec::new(),
8629 format: None,
8630 default: None,
8631 inferred_type: None,
8632 }))),
8633 }
8634 }
8635 "AGGREGATE" if f.args.len() >= 3 => match target {
8636 DialectType::DuckDB
8637 | DialectType::Hive
8638 | DialectType::Presto
8639 | DialectType::Trino => Ok(Expression::Function(Box::new(
8640 Function::new("REDUCE".to_string(), f.args),
8641 ))),
8642 _ => Ok(Expression::Function(f)),
8643 },
8644 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8645 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8646 if matches!(target, DialectType::DuckDB) {
8647 Ok(Expression::Function(f))
8648 } else {
8649 let mut args = f.args;
8650 let this = args.remove(0);
8651 let pattern = args.remove(0);
8652 let flags = if args.is_empty() {
8653 None
8654 } else {
8655 Some(args.remove(0))
8656 };
8657 Ok(Expression::RegexpLike(Box::new(
8658 crate::expressions::RegexpFunc {
8659 this,
8660 pattern,
8661 flags,
8662 },
8663 )))
8664 }
8665 }
8666 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8667 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8668 if matches!(target, DialectType::DuckDB) {
8669 Ok(Expression::Function(f))
8670 } else {
8671 let mut args = f.args;
8672 let this = args.remove(0);
8673 let pattern = args.remove(0);
8674 let flags = if args.is_empty() {
8675 None
8676 } else {
8677 Some(args.remove(0))
8678 };
8679 Ok(Expression::RegexpLike(Box::new(
8680 crate::expressions::RegexpFunc {
8681 this,
8682 pattern,
8683 flags,
8684 },
8685 )))
8686 }
8687 }
8688 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8689 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8690 let mut args = f.args;
8691 let this = args.remove(0);
8692 let field_expr = args.remove(0);
8693 // Extract string literal to get field name
8694 let field_name = match &field_expr {
8695 Expression::Literal(crate::expressions::Literal::String(s)) => {
8696 s.clone()
8697 }
8698 Expression::Identifier(id) => id.name.clone(),
8699 _ => {
8700 return Ok(Expression::Function(Box::new(Function::new(
8701 "STRUCT_EXTRACT".to_string(),
8702 vec![this, field_expr],
8703 ))))
8704 }
8705 };
8706 Ok(Expression::StructExtract(Box::new(
8707 crate::expressions::StructExtractFunc {
8708 this,
8709 field: crate::expressions::Identifier::new(field_name),
8710 },
8711 )))
8712 }
8713 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8714 "LIST_FILTER" if f.args.len() == 2 => {
8715 let name = match target {
8716 DialectType::DuckDB => "LIST_FILTER",
8717 _ => "FILTER",
8718 };
8719 Ok(Expression::Function(Box::new(Function::new(
8720 name.to_string(),
8721 f.args,
8722 ))))
8723 }
8724 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8725 "LIST_TRANSFORM" if f.args.len() == 2 => {
8726 let name = match target {
8727 DialectType::DuckDB => "LIST_TRANSFORM",
8728 _ => "TRANSFORM",
8729 };
8730 Ok(Expression::Function(Box::new(Function::new(
8731 name.to_string(),
8732 f.args,
8733 ))))
8734 }
8735 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8736 "LIST_SORT" if f.args.len() >= 1 => {
8737 let name = match target {
8738 DialectType::DuckDB
8739 | DialectType::Presto
8740 | DialectType::Trino => "ARRAY_SORT",
8741 _ => "SORT_ARRAY",
8742 };
8743 Ok(Expression::Function(Box::new(Function::new(
8744 name.to_string(),
8745 f.args,
8746 ))))
8747 }
8748 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8749 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8750 match target {
8751 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8752 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8753 ))),
8754 DialectType::Spark
8755 | DialectType::Databricks
8756 | DialectType::Hive => {
8757 let mut args = f.args;
8758 args.push(Expression::Identifier(
8759 crate::expressions::Identifier::new("FALSE"),
8760 ));
8761 Ok(Expression::Function(Box::new(Function::new(
8762 "SORT_ARRAY".to_string(),
8763 args,
8764 ))))
8765 }
8766 DialectType::Presto
8767 | DialectType::Trino
8768 | DialectType::Athena => {
8769 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8770 let arr = f.args.into_iter().next().unwrap();
8771 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8772 parameters: vec![
8773 crate::expressions::Identifier::new("a"),
8774 crate::expressions::Identifier::new("b"),
8775 ],
8776 body: Expression::Case(Box::new(Case {
8777 operand: None,
8778 whens: vec![
8779 (
8780 Expression::Lt(Box::new(BinaryOp::new(
8781 Expression::Identifier(crate::expressions::Identifier::new("a")),
8782 Expression::Identifier(crate::expressions::Identifier::new("b")),
8783 ))),
8784 Expression::number(1),
8785 ),
8786 (
8787 Expression::Gt(Box::new(BinaryOp::new(
8788 Expression::Identifier(crate::expressions::Identifier::new("a")),
8789 Expression::Identifier(crate::expressions::Identifier::new("b")),
8790 ))),
8791 Expression::Literal(Literal::Number("-1".to_string())),
8792 ),
8793 ],
8794 else_: Some(Expression::number(0)),
8795 comments: Vec::new(),
8796 inferred_type: None,
8797 })),
8798 colon: false,
8799 parameter_types: Vec::new(),
8800 }));
8801 Ok(Expression::Function(Box::new(Function::new(
8802 "ARRAY_SORT".to_string(),
8803 vec![arr, lambda],
8804 ))))
8805 }
8806 _ => Ok(Expression::Function(Box::new(Function::new(
8807 "LIST_REVERSE_SORT".to_string(),
8808 f.args,
8809 )))),
8810 }
8811 }
8812 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8813 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8814 let mut args = f.args;
8815 args.push(Expression::string(","));
8816 let name = match target {
8817 DialectType::DuckDB => "STR_SPLIT",
8818 DialectType::Presto | DialectType::Trino => "SPLIT",
8819 DialectType::Spark
8820 | DialectType::Databricks
8821 | DialectType::Hive => "SPLIT",
8822 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8823 DialectType::Redshift => "SPLIT_TO_ARRAY",
8824 _ => "SPLIT",
8825 };
8826 Ok(Expression::Function(Box::new(Function::new(
8827 name.to_string(),
8828 args,
8829 ))))
8830 }
8831 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8832 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8833 let name = match target {
8834 DialectType::DuckDB => "STR_SPLIT",
8835 DialectType::Presto | DialectType::Trino => "SPLIT",
8836 DialectType::Spark
8837 | DialectType::Databricks
8838 | DialectType::Hive => "SPLIT",
8839 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8840 DialectType::Redshift => "SPLIT_TO_ARRAY",
8841 _ => "SPLIT",
8842 };
8843 Ok(Expression::Function(Box::new(Function::new(
8844 name.to_string(),
8845 f.args,
8846 ))))
8847 }
8848 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8849 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8850 let name = match target {
8851 DialectType::DuckDB => "STR_SPLIT",
8852 DialectType::Presto | DialectType::Trino => "SPLIT",
8853 DialectType::Spark
8854 | DialectType::Databricks
8855 | DialectType::Hive => "SPLIT",
8856 DialectType::Doris | DialectType::StarRocks => {
8857 "SPLIT_BY_STRING"
8858 }
8859 DialectType::PostgreSQL | DialectType::Redshift => {
8860 "STRING_TO_ARRAY"
8861 }
8862 _ => "SPLIT",
8863 };
8864 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8865 if matches!(
8866 target,
8867 DialectType::Spark
8868 | DialectType::Databricks
8869 | DialectType::Hive
8870 ) {
8871 let mut args = f.args;
8872 let x = args.remove(0);
8873 let sep = args.remove(0);
8874 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8875 let escaped_sep =
8876 Expression::Function(Box::new(Function::new(
8877 "CONCAT".to_string(),
8878 vec![
8879 Expression::string("\\Q"),
8880 sep,
8881 Expression::string("\\E"),
8882 ],
8883 )));
8884 Ok(Expression::Function(Box::new(Function::new(
8885 name.to_string(),
8886 vec![x, escaped_sep],
8887 ))))
8888 } else {
8889 Ok(Expression::Function(Box::new(Function::new(
8890 name.to_string(),
8891 f.args,
8892 ))))
8893 }
8894 }
8895 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8896 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8897 let name = match target {
8898 DialectType::DuckDB => "STR_SPLIT_REGEX",
8899 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8900 DialectType::Spark
8901 | DialectType::Databricks
8902 | DialectType::Hive => "SPLIT",
8903 _ => "REGEXP_SPLIT",
8904 };
8905 Ok(Expression::Function(Box::new(Function::new(
8906 name.to_string(),
8907 f.args,
8908 ))))
8909 }
8910 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8911 "SPLIT"
8912 if f.args.len() == 2
8913 && matches!(
8914 source,
8915 DialectType::Presto
8916 | DialectType::Trino
8917 | DialectType::Athena
8918 | DialectType::StarRocks
8919 | DialectType::Doris
8920 )
8921 && matches!(
8922 target,
8923 DialectType::Spark
8924 | DialectType::Databricks
8925 | DialectType::Hive
8926 ) =>
8927 {
8928 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8929 let mut args = f.args;
8930 let x = args.remove(0);
8931 let sep = args.remove(0);
8932 let escaped_sep = Expression::Function(Box::new(Function::new(
8933 "CONCAT".to_string(),
8934 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8935 )));
8936 Ok(Expression::Function(Box::new(Function::new(
8937 "SPLIT".to_string(),
8938 vec![x, escaped_sep],
8939 ))))
8940 }
8941 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8942 // For ClickHouse target, preserve original name to maintain camelCase
8943 "SUBSTRINGINDEX" => {
8944 let name = if matches!(target, DialectType::ClickHouse) {
8945 f.name.clone()
8946 } else {
8947 "SUBSTRING_INDEX".to_string()
8948 };
8949 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8950 }
8951 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8952 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8953 // Get the array argument (first arg, drop dimension args)
8954 let mut args = f.args;
8955 let arr = if args.is_empty() {
8956 return Ok(Expression::Function(Box::new(Function::new(
8957 name.to_string(),
8958 args,
8959 ))));
8960 } else {
8961 args.remove(0)
8962 };
8963 let name =
8964 match target {
8965 DialectType::Spark
8966 | DialectType::Databricks
8967 | DialectType::Hive => "SIZE",
8968 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8969 DialectType::BigQuery => "ARRAY_LENGTH",
8970 DialectType::DuckDB => {
8971 // DuckDB: use ARRAY_LENGTH with all args
8972 let mut all_args = vec![arr];
8973 all_args.extend(args);
8974 return Ok(Expression::Function(Box::new(
8975 Function::new("ARRAY_LENGTH".to_string(), all_args),
8976 )));
8977 }
8978 DialectType::PostgreSQL | DialectType::Redshift => {
8979 // Keep ARRAY_LENGTH with dimension arg
8980 let mut all_args = vec![arr];
8981 all_args.extend(args);
8982 return Ok(Expression::Function(Box::new(
8983 Function::new("ARRAY_LENGTH".to_string(), all_args),
8984 )));
8985 }
8986 DialectType::ClickHouse => "LENGTH",
8987 _ => "ARRAY_LENGTH",
8988 };
8989 Ok(Expression::Function(Box::new(Function::new(
8990 name.to_string(),
8991 vec![arr],
8992 ))))
8993 }
8994 // UNICODE(x) -> target-specific codepoint function
8995 "UNICODE" if f.args.len() == 1 => {
8996 match target {
8997 DialectType::SQLite | DialectType::DuckDB => {
8998 Ok(Expression::Function(Box::new(Function::new(
8999 "UNICODE".to_string(),
9000 f.args,
9001 ))))
9002 }
9003 DialectType::Oracle => {
9004 // ASCII(UNISTR(x))
9005 let inner = Expression::Function(Box::new(Function::new(
9006 "UNISTR".to_string(),
9007 f.args,
9008 )));
9009 Ok(Expression::Function(Box::new(Function::new(
9010 "ASCII".to_string(),
9011 vec![inner],
9012 ))))
9013 }
9014 DialectType::MySQL => {
9015 // ORD(CONVERT(x USING utf32))
9016 let arg = f.args.into_iter().next().unwrap();
9017 let convert_expr = Expression::ConvertToCharset(Box::new(
9018 crate::expressions::ConvertToCharset {
9019 this: Box::new(arg),
9020 dest: Some(Box::new(Expression::Identifier(
9021 crate::expressions::Identifier::new("utf32"),
9022 ))),
9023 source: None,
9024 },
9025 ));
9026 Ok(Expression::Function(Box::new(Function::new(
9027 "ORD".to_string(),
9028 vec![convert_expr],
9029 ))))
9030 }
9031 _ => Ok(Expression::Function(Box::new(Function::new(
9032 "ASCII".to_string(),
9033 f.args,
9034 )))),
9035 }
9036 }
9037 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
9038 "XOR" if f.args.len() >= 2 => {
9039 match target {
9040 DialectType::ClickHouse => {
9041 // ClickHouse: keep as xor() function with lowercase name
9042 Ok(Expression::Function(Box::new(Function::new(
9043 "xor".to_string(),
9044 f.args,
9045 ))))
9046 }
9047 DialectType::Presto | DialectType::Trino => {
9048 if f.args.len() == 2 {
9049 Ok(Expression::Function(Box::new(Function::new(
9050 "BITWISE_XOR".to_string(),
9051 f.args,
9052 ))))
9053 } else {
9054 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
9055 let mut args = f.args;
9056 let first = args.remove(0);
9057 let second = args.remove(0);
9058 let mut result =
9059 Expression::Function(Box::new(Function::new(
9060 "BITWISE_XOR".to_string(),
9061 vec![first, second],
9062 )));
9063 for arg in args {
9064 result =
9065 Expression::Function(Box::new(Function::new(
9066 "BITWISE_XOR".to_string(),
9067 vec![result, arg],
9068 )));
9069 }
9070 Ok(result)
9071 }
9072 }
9073 DialectType::MySQL
9074 | DialectType::SingleStore
9075 | DialectType::Doris
9076 | DialectType::StarRocks => {
9077 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
9078 let args = f.args;
9079 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
9080 this: None,
9081 expression: None,
9082 expressions: args,
9083 })))
9084 }
9085 DialectType::PostgreSQL | DialectType::Redshift => {
9086 // PostgreSQL: a # b (hash operator for XOR)
9087 let mut args = f.args;
9088 let first = args.remove(0);
9089 let second = args.remove(0);
9090 let mut result = Expression::BitwiseXor(Box::new(
9091 BinaryOp::new(first, second),
9092 ));
9093 for arg in args {
9094 result = Expression::BitwiseXor(Box::new(
9095 BinaryOp::new(result, arg),
9096 ));
9097 }
9098 Ok(result)
9099 }
9100 DialectType::DuckDB => {
9101 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
9102 Ok(Expression::Function(Box::new(Function::new(
9103 "XOR".to_string(),
9104 f.args,
9105 ))))
9106 }
9107 DialectType::BigQuery => {
9108 // BigQuery: a ^ b (caret operator for XOR)
9109 let mut args = f.args;
9110 let first = args.remove(0);
9111 let second = args.remove(0);
9112 let mut result = Expression::BitwiseXor(Box::new(
9113 BinaryOp::new(first, second),
9114 ));
9115 for arg in args {
9116 result = Expression::BitwiseXor(Box::new(
9117 BinaryOp::new(result, arg),
9118 ));
9119 }
9120 Ok(result)
9121 }
9122 _ => Ok(Expression::Function(Box::new(Function::new(
9123 "XOR".to_string(),
9124 f.args,
9125 )))),
9126 }
9127 }
9128 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9129 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
9130 match target {
9131 DialectType::Spark
9132 | DialectType::Databricks
9133 | DialectType::Hive => {
9134 let mut args = f.args;
9135 args.push(Expression::Identifier(
9136 crate::expressions::Identifier::new("FALSE"),
9137 ));
9138 Ok(Expression::Function(Box::new(Function::new(
9139 "SORT_ARRAY".to_string(),
9140 args,
9141 ))))
9142 }
9143 DialectType::Presto
9144 | DialectType::Trino
9145 | DialectType::Athena => {
9146 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9147 let arr = f.args.into_iter().next().unwrap();
9148 let lambda = Expression::Lambda(Box::new(
9149 crate::expressions::LambdaExpr {
9150 parameters: vec![
9151 Identifier::new("a"),
9152 Identifier::new("b"),
9153 ],
9154 colon: false,
9155 parameter_types: Vec::new(),
9156 body: Expression::Case(Box::new(Case {
9157 operand: None,
9158 whens: vec![
9159 (
9160 Expression::Lt(Box::new(
9161 BinaryOp::new(
9162 Expression::Identifier(
9163 Identifier::new("a"),
9164 ),
9165 Expression::Identifier(
9166 Identifier::new("b"),
9167 ),
9168 ),
9169 )),
9170 Expression::number(1),
9171 ),
9172 (
9173 Expression::Gt(Box::new(
9174 BinaryOp::new(
9175 Expression::Identifier(
9176 Identifier::new("a"),
9177 ),
9178 Expression::Identifier(
9179 Identifier::new("b"),
9180 ),
9181 ),
9182 )),
9183 Expression::Neg(Box::new(
9184 crate::expressions::UnaryOp {
9185 this: Expression::number(1),
9186 inferred_type: None,
9187 },
9188 )),
9189 ),
9190 ],
9191 else_: Some(Expression::number(0)),
9192 comments: Vec::new(),
9193 inferred_type: None,
9194 })),
9195 },
9196 ));
9197 Ok(Expression::Function(Box::new(Function::new(
9198 "ARRAY_SORT".to_string(),
9199 vec![arr, lambda],
9200 ))))
9201 }
9202 _ => Ok(Expression::Function(Box::new(Function::new(
9203 "ARRAY_REVERSE_SORT".to_string(),
9204 f.args,
9205 )))),
9206 }
9207 }
9208 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9209 "ENCODE" if f.args.len() == 1 => match target {
9210 DialectType::Spark
9211 | DialectType::Databricks
9212 | DialectType::Hive => {
9213 let mut args = f.args;
9214 args.push(Expression::string("utf-8"));
9215 Ok(Expression::Function(Box::new(Function::new(
9216 "ENCODE".to_string(),
9217 args,
9218 ))))
9219 }
9220 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9221 Ok(Expression::Function(Box::new(Function::new(
9222 "TO_UTF8".to_string(),
9223 f.args,
9224 ))))
9225 }
9226 _ => Ok(Expression::Function(Box::new(Function::new(
9227 "ENCODE".to_string(),
9228 f.args,
9229 )))),
9230 },
9231 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9232 "DECODE" if f.args.len() == 1 => match target {
9233 DialectType::Spark
9234 | DialectType::Databricks
9235 | DialectType::Hive => {
9236 let mut args = f.args;
9237 args.push(Expression::string("utf-8"));
9238 Ok(Expression::Function(Box::new(Function::new(
9239 "DECODE".to_string(),
9240 args,
9241 ))))
9242 }
9243 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9244 Ok(Expression::Function(Box::new(Function::new(
9245 "FROM_UTF8".to_string(),
9246 f.args,
9247 ))))
9248 }
9249 _ => Ok(Expression::Function(Box::new(Function::new(
9250 "DECODE".to_string(),
9251 f.args,
9252 )))),
9253 },
9254 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9255 "QUANTILE" if f.args.len() == 2 => {
9256 let name = match target {
9257 DialectType::Spark
9258 | DialectType::Databricks
9259 | DialectType::Hive => "PERCENTILE",
9260 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9261 DialectType::BigQuery => "PERCENTILE_CONT",
9262 _ => "QUANTILE",
9263 };
9264 Ok(Expression::Function(Box::new(Function::new(
9265 name.to_string(),
9266 f.args,
9267 ))))
9268 }
9269 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9270 "QUANTILE_CONT" if f.args.len() == 2 => {
9271 let mut args = f.args;
9272 let column = args.remove(0);
9273 let quantile = args.remove(0);
9274 match target {
9275 DialectType::DuckDB => {
9276 Ok(Expression::Function(Box::new(Function::new(
9277 "QUANTILE_CONT".to_string(),
9278 vec![column, quantile],
9279 ))))
9280 }
9281 DialectType::PostgreSQL
9282 | DialectType::Redshift
9283 | DialectType::Snowflake => {
9284 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9285 let inner = Expression::PercentileCont(Box::new(
9286 crate::expressions::PercentileFunc {
9287 this: column.clone(),
9288 percentile: quantile,
9289 order_by: None,
9290 filter: None,
9291 },
9292 ));
9293 Ok(Expression::WithinGroup(Box::new(
9294 crate::expressions::WithinGroup {
9295 this: inner,
9296 order_by: vec![crate::expressions::Ordered {
9297 this: column,
9298 desc: false,
9299 nulls_first: None,
9300 explicit_asc: false,
9301 with_fill: None,
9302 }],
9303 },
9304 )))
9305 }
9306 _ => Ok(Expression::Function(Box::new(Function::new(
9307 "QUANTILE_CONT".to_string(),
9308 vec![column, quantile],
9309 )))),
9310 }
9311 }
9312 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9313 "QUANTILE_DISC" if f.args.len() == 2 => {
9314 let mut args = f.args;
9315 let column = args.remove(0);
9316 let quantile = args.remove(0);
9317 match target {
9318 DialectType::DuckDB => {
9319 Ok(Expression::Function(Box::new(Function::new(
9320 "QUANTILE_DISC".to_string(),
9321 vec![column, quantile],
9322 ))))
9323 }
9324 DialectType::PostgreSQL
9325 | DialectType::Redshift
9326 | DialectType::Snowflake => {
9327 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9328 let inner = Expression::PercentileDisc(Box::new(
9329 crate::expressions::PercentileFunc {
9330 this: column.clone(),
9331 percentile: quantile,
9332 order_by: None,
9333 filter: None,
9334 },
9335 ));
9336 Ok(Expression::WithinGroup(Box::new(
9337 crate::expressions::WithinGroup {
9338 this: inner,
9339 order_by: vec![crate::expressions::Ordered {
9340 this: column,
9341 desc: false,
9342 nulls_first: None,
9343 explicit_asc: false,
9344 with_fill: None,
9345 }],
9346 },
9347 )))
9348 }
9349 _ => Ok(Expression::Function(Box::new(Function::new(
9350 "QUANTILE_DISC".to_string(),
9351 vec![column, quantile],
9352 )))),
9353 }
9354 }
9355 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9356 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9357 let name = match target {
9358 DialectType::Presto
9359 | DialectType::Trino
9360 | DialectType::Athena => "APPROX_PERCENTILE",
9361 DialectType::Spark
9362 | DialectType::Databricks
9363 | DialectType::Hive => "PERCENTILE_APPROX",
9364 DialectType::DuckDB => "APPROX_QUANTILE",
9365 DialectType::PostgreSQL | DialectType::Redshift => {
9366 "PERCENTILE_CONT"
9367 }
9368 _ => &f.name,
9369 };
9370 Ok(Expression::Function(Box::new(Function::new(
9371 name.to_string(),
9372 f.args,
9373 ))))
9374 }
9375 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9376 "EPOCH" if f.args.len() == 1 => {
9377 let name = match target {
9378 DialectType::Spark
9379 | DialectType::Databricks
9380 | DialectType::Hive => "UNIX_TIMESTAMP",
9381 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9382 _ => "EPOCH",
9383 };
9384 Ok(Expression::Function(Box::new(Function::new(
9385 name.to_string(),
9386 f.args,
9387 ))))
9388 }
9389 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9390 "EPOCH_MS" if f.args.len() == 1 => {
9391 match target {
9392 DialectType::Spark | DialectType::Databricks => {
9393 Ok(Expression::Function(Box::new(Function::new(
9394 "TIMESTAMP_MILLIS".to_string(),
9395 f.args,
9396 ))))
9397 }
9398 DialectType::Hive => {
9399 // Hive: FROM_UNIXTIME(x / 1000)
9400 let arg = f.args.into_iter().next().unwrap();
9401 let div_expr = Expression::Div(Box::new(
9402 crate::expressions::BinaryOp::new(
9403 arg,
9404 Expression::number(1000),
9405 ),
9406 ));
9407 Ok(Expression::Function(Box::new(Function::new(
9408 "FROM_UNIXTIME".to_string(),
9409 vec![div_expr],
9410 ))))
9411 }
9412 DialectType::Presto | DialectType::Trino => {
9413 Ok(Expression::Function(Box::new(Function::new(
9414 "FROM_UNIXTIME".to_string(),
9415 vec![Expression::Div(Box::new(
9416 crate::expressions::BinaryOp::new(
9417 f.args.into_iter().next().unwrap(),
9418 Expression::number(1000),
9419 ),
9420 ))],
9421 ))))
9422 }
9423 _ => Ok(Expression::Function(Box::new(Function::new(
9424 "EPOCH_MS".to_string(),
9425 f.args,
9426 )))),
9427 }
9428 }
9429 // HASHBYTES('algorithm', x) -> target-specific hash function
9430 "HASHBYTES" if f.args.len() == 2 => {
9431 // Keep HASHBYTES as-is for TSQL target
9432 if matches!(target, DialectType::TSQL) {
9433 return Ok(Expression::Function(f));
9434 }
9435 let algo_expr = &f.args[0];
9436 let algo = match algo_expr {
9437 Expression::Literal(crate::expressions::Literal::String(s)) => {
9438 s.to_uppercase()
9439 }
9440 _ => return Ok(Expression::Function(f)),
9441 };
9442 let data_arg = f.args.into_iter().nth(1).unwrap();
9443 match algo.as_str() {
9444 "SHA1" => {
9445 let name = match target {
9446 DialectType::Spark | DialectType::Databricks => "SHA",
9447 DialectType::Hive => "SHA1",
9448 _ => "SHA1",
9449 };
9450 Ok(Expression::Function(Box::new(Function::new(
9451 name.to_string(),
9452 vec![data_arg],
9453 ))))
9454 }
9455 "SHA2_256" => {
9456 Ok(Expression::Function(Box::new(Function::new(
9457 "SHA2".to_string(),
9458 vec![data_arg, Expression::number(256)],
9459 ))))
9460 }
9461 "SHA2_512" => {
9462 Ok(Expression::Function(Box::new(Function::new(
9463 "SHA2".to_string(),
9464 vec![data_arg, Expression::number(512)],
9465 ))))
9466 }
9467 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9468 "MD5".to_string(),
9469 vec![data_arg],
9470 )))),
9471 _ => Ok(Expression::Function(Box::new(Function::new(
9472 "HASHBYTES".to_string(),
9473 vec![Expression::string(&algo), data_arg],
9474 )))),
9475 }
9476 }
9477 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9478 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9479 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9480 let mut args = f.args;
9481 let json_expr = args.remove(0);
9482 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9483 let mut json_path = "$".to_string();
9484 for a in &args {
9485 match a {
9486 Expression::Literal(
9487 crate::expressions::Literal::String(s),
9488 ) => {
9489 // Numeric string keys become array indices: [0]
9490 if s.chars().all(|c| c.is_ascii_digit()) {
9491 json_path.push('[');
9492 json_path.push_str(s);
9493 json_path.push(']');
9494 } else {
9495 json_path.push('.');
9496 json_path.push_str(s);
9497 }
9498 }
9499 _ => {
9500 json_path.push_str(".?");
9501 }
9502 }
9503 }
9504 match target {
9505 DialectType::Spark
9506 | DialectType::Databricks
9507 | DialectType::Hive => {
9508 Ok(Expression::Function(Box::new(Function::new(
9509 "GET_JSON_OBJECT".to_string(),
9510 vec![json_expr, Expression::string(&json_path)],
9511 ))))
9512 }
9513 DialectType::Presto | DialectType::Trino => {
9514 let func_name = if is_text {
9515 "JSON_EXTRACT_SCALAR"
9516 } else {
9517 "JSON_EXTRACT"
9518 };
9519 Ok(Expression::Function(Box::new(Function::new(
9520 func_name.to_string(),
9521 vec![json_expr, Expression::string(&json_path)],
9522 ))))
9523 }
9524 DialectType::BigQuery | DialectType::MySQL => {
9525 let func_name = if is_text {
9526 "JSON_EXTRACT_SCALAR"
9527 } else {
9528 "JSON_EXTRACT"
9529 };
9530 Ok(Expression::Function(Box::new(Function::new(
9531 func_name.to_string(),
9532 vec![json_expr, Expression::string(&json_path)],
9533 ))))
9534 }
9535 DialectType::PostgreSQL | DialectType::Materialize => {
9536 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9537 let func_name = if is_text {
9538 "JSON_EXTRACT_PATH_TEXT"
9539 } else {
9540 "JSON_EXTRACT_PATH"
9541 };
9542 let mut new_args = vec![json_expr];
9543 new_args.extend(args);
9544 Ok(Expression::Function(Box::new(Function::new(
9545 func_name.to_string(),
9546 new_args,
9547 ))))
9548 }
9549 DialectType::DuckDB | DialectType::SQLite => {
9550 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9551 if is_text {
9552 Ok(Expression::JsonExtractScalar(Box::new(
9553 crate::expressions::JsonExtractFunc {
9554 this: json_expr,
9555 path: Expression::string(&json_path),
9556 returning: None,
9557 arrow_syntax: true,
9558 hash_arrow_syntax: false,
9559 wrapper_option: None,
9560 quotes_option: None,
9561 on_scalar_string: false,
9562 on_error: None,
9563 },
9564 )))
9565 } else {
9566 Ok(Expression::JsonExtract(Box::new(
9567 crate::expressions::JsonExtractFunc {
9568 this: json_expr,
9569 path: Expression::string(&json_path),
9570 returning: None,
9571 arrow_syntax: true,
9572 hash_arrow_syntax: false,
9573 wrapper_option: None,
9574 quotes_option: None,
9575 on_scalar_string: false,
9576 on_error: None,
9577 },
9578 )))
9579 }
9580 }
9581 DialectType::Redshift => {
9582 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9583 let mut new_args = vec![json_expr];
9584 new_args.extend(args);
9585 Ok(Expression::Function(Box::new(Function::new(
9586 "JSON_EXTRACT_PATH_TEXT".to_string(),
9587 new_args,
9588 ))))
9589 }
9590 DialectType::TSQL => {
9591 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9592 let jq = Expression::Function(Box::new(Function::new(
9593 "JSON_QUERY".to_string(),
9594 vec![json_expr.clone(), Expression::string(&json_path)],
9595 )));
9596 let jv = Expression::Function(Box::new(Function::new(
9597 "JSON_VALUE".to_string(),
9598 vec![json_expr, Expression::string(&json_path)],
9599 )));
9600 Ok(Expression::Function(Box::new(Function::new(
9601 "ISNULL".to_string(),
9602 vec![jq, jv],
9603 ))))
9604 }
9605 DialectType::ClickHouse => {
9606 let func_name = if is_text {
9607 "JSONExtractString"
9608 } else {
9609 "JSONExtractRaw"
9610 };
9611 let mut new_args = vec![json_expr];
9612 new_args.extend(args);
9613 Ok(Expression::Function(Box::new(Function::new(
9614 func_name.to_string(),
9615 new_args,
9616 ))))
9617 }
9618 _ => {
9619 let func_name = if is_text {
9620 "JSON_EXTRACT_SCALAR"
9621 } else {
9622 "JSON_EXTRACT"
9623 };
9624 Ok(Expression::Function(Box::new(Function::new(
9625 func_name.to_string(),
9626 vec![json_expr, Expression::string(&json_path)],
9627 ))))
9628 }
9629 }
9630 }
9631 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9632 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9633 let name = match target {
9634 DialectType::Spark
9635 | DialectType::Databricks
9636 | DialectType::Hive
9637 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9638 _ => "APPROX_DISTINCT",
9639 };
9640 let mut args = f.args;
9641 // Hive doesn't support the accuracy parameter
9642 if name == "APPROX_COUNT_DISTINCT"
9643 && matches!(target, DialectType::Hive)
9644 {
9645 args.truncate(1);
9646 }
9647 Ok(Expression::Function(Box::new(Function::new(
9648 name.to_string(),
9649 args,
9650 ))))
9651 }
9652 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9653 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9654 // Determine source default group index
9655 let source_default = match source {
9656 DialectType::Presto
9657 | DialectType::Trino
9658 | DialectType::DuckDB => 0,
9659 _ => 1, // Hive/Spark/Databricks default = 1
9660 };
9661 // Determine target default group index
9662 let target_default = match target {
9663 DialectType::Presto
9664 | DialectType::Trino
9665 | DialectType::DuckDB
9666 | DialectType::BigQuery => 0,
9667 DialectType::Snowflake => {
9668 // Snowflake uses REGEXP_SUBSTR
9669 return Ok(Expression::Function(Box::new(Function::new(
9670 "REGEXP_SUBSTR".to_string(),
9671 f.args,
9672 ))));
9673 }
9674 _ => 1, // Hive/Spark/Databricks default = 1
9675 };
9676 if source_default != target_default {
9677 let mut args = f.args;
9678 args.push(Expression::number(source_default));
9679 Ok(Expression::Function(Box::new(Function::new(
9680 "REGEXP_EXTRACT".to_string(),
9681 args,
9682 ))))
9683 } else {
9684 Ok(Expression::Function(Box::new(Function::new(
9685 "REGEXP_EXTRACT".to_string(),
9686 f.args,
9687 ))))
9688 }
9689 }
9690 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9691 "RLIKE" if f.args.len() == 2 => {
9692 let mut args = f.args;
9693 let str_expr = args.remove(0);
9694 let pattern = args.remove(0);
9695 match target {
9696 DialectType::DuckDB => {
9697 // REGEXP_MATCHES(str, pattern)
9698 Ok(Expression::Function(Box::new(Function::new(
9699 "REGEXP_MATCHES".to_string(),
9700 vec![str_expr, pattern],
9701 ))))
9702 }
9703 _ => {
9704 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9705 Ok(Expression::RegexpLike(Box::new(
9706 crate::expressions::RegexpFunc {
9707 this: str_expr,
9708 pattern,
9709 flags: None,
9710 },
9711 )))
9712 }
9713 }
9714 }
9715 // EOMONTH(date[, month_offset]) -> target-specific
9716 "EOMONTH" if f.args.len() >= 1 => {
9717 let mut args = f.args;
9718 let date_arg = args.remove(0);
9719 let month_offset = if !args.is_empty() {
9720 Some(args.remove(0))
9721 } else {
9722 None
9723 };
9724
9725 // Helper: wrap date in CAST to DATE
9726 let cast_to_date = |e: Expression| -> Expression {
9727 Expression::Cast(Box::new(Cast {
9728 this: e,
9729 to: DataType::Date,
9730 trailing_comments: vec![],
9731 double_colon_syntax: false,
9732 format: None,
9733 default: None,
9734 inferred_type: None,
9735 }))
9736 };
9737
9738 match target {
9739 DialectType::TSQL | DialectType::Fabric => {
9740 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9741 let date = cast_to_date(date_arg);
9742 let date = if let Some(offset) = month_offset {
9743 Expression::Function(Box::new(Function::new(
9744 "DATEADD".to_string(),
9745 vec![
9746 Expression::Identifier(Identifier::new(
9747 "MONTH",
9748 )),
9749 offset,
9750 date,
9751 ],
9752 )))
9753 } else {
9754 date
9755 };
9756 Ok(Expression::Function(Box::new(Function::new(
9757 "EOMONTH".to_string(),
9758 vec![date],
9759 ))))
9760 }
9761 DialectType::Presto
9762 | DialectType::Trino
9763 | DialectType::Athena => {
9764 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9765 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9766 let cast_ts = Expression::Cast(Box::new(Cast {
9767 this: date_arg,
9768 to: DataType::Timestamp {
9769 timezone: false,
9770 precision: None,
9771 },
9772 trailing_comments: vec![],
9773 double_colon_syntax: false,
9774 format: None,
9775 default: None,
9776 inferred_type: None,
9777 }));
9778 let date = cast_to_date(cast_ts);
9779 let date = if let Some(offset) = month_offset {
9780 Expression::Function(Box::new(Function::new(
9781 "DATE_ADD".to_string(),
9782 vec![Expression::string("MONTH"), offset, date],
9783 )))
9784 } else {
9785 date
9786 };
9787 Ok(Expression::Function(Box::new(Function::new(
9788 "LAST_DAY_OF_MONTH".to_string(),
9789 vec![date],
9790 ))))
9791 }
9792 DialectType::PostgreSQL => {
9793 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9794 let date = cast_to_date(date_arg);
9795 let date = if let Some(offset) = month_offset {
9796 let interval_str = format!(
9797 "{} MONTH",
9798 Self::expr_to_string_static(&offset)
9799 );
9800 Expression::Add(Box::new(
9801 crate::expressions::BinaryOp::new(
9802 date,
9803 Expression::Interval(Box::new(
9804 crate::expressions::Interval {
9805 this: Some(Expression::string(
9806 &interval_str,
9807 )),
9808 unit: None,
9809 },
9810 )),
9811 ),
9812 ))
9813 } else {
9814 date
9815 };
9816 let truncated =
9817 Expression::Function(Box::new(Function::new(
9818 "DATE_TRUNC".to_string(),
9819 vec![Expression::string("MONTH"), date],
9820 )));
9821 let plus_month = Expression::Add(Box::new(
9822 crate::expressions::BinaryOp::new(
9823 truncated,
9824 Expression::Interval(Box::new(
9825 crate::expressions::Interval {
9826 this: Some(Expression::string("1 MONTH")),
9827 unit: None,
9828 },
9829 )),
9830 ),
9831 ));
9832 let minus_day = Expression::Sub(Box::new(
9833 crate::expressions::BinaryOp::new(
9834 plus_month,
9835 Expression::Interval(Box::new(
9836 crate::expressions::Interval {
9837 this: Some(Expression::string("1 DAY")),
9838 unit: None,
9839 },
9840 )),
9841 ),
9842 ));
9843 Ok(Expression::Cast(Box::new(Cast {
9844 this: minus_day,
9845 to: DataType::Date,
9846 trailing_comments: vec![],
9847 double_colon_syntax: false,
9848 format: None,
9849 default: None,
9850 inferred_type: None,
9851 })))
9852 }
9853 DialectType::DuckDB => {
9854 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9855 let date = cast_to_date(date_arg);
9856 let date = if let Some(offset) = month_offset {
9857 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9858 let interval_val =
9859 if matches!(&offset, Expression::Neg(_)) {
9860 Expression::Paren(Box::new(
9861 crate::expressions::Paren {
9862 this: offset,
9863 trailing_comments: Vec::new(),
9864 },
9865 ))
9866 } else {
9867 offset
9868 };
9869 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9870 date,
9871 Expression::Interval(Box::new(crate::expressions::Interval {
9872 this: Some(interval_val),
9873 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9874 unit: crate::expressions::IntervalUnit::Month,
9875 use_plural: false,
9876 }),
9877 })),
9878 )))
9879 } else {
9880 date
9881 };
9882 Ok(Expression::Function(Box::new(Function::new(
9883 "LAST_DAY".to_string(),
9884 vec![date],
9885 ))))
9886 }
9887 DialectType::Snowflake | DialectType::Redshift => {
9888 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9889 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9890 let date = if matches!(target, DialectType::Snowflake) {
9891 Expression::Function(Box::new(Function::new(
9892 "TO_DATE".to_string(),
9893 vec![date_arg],
9894 )))
9895 } else {
9896 cast_to_date(date_arg)
9897 };
9898 let date = if let Some(offset) = month_offset {
9899 Expression::Function(Box::new(Function::new(
9900 "DATEADD".to_string(),
9901 vec![
9902 Expression::Identifier(Identifier::new(
9903 "MONTH",
9904 )),
9905 offset,
9906 date,
9907 ],
9908 )))
9909 } else {
9910 date
9911 };
9912 Ok(Expression::Function(Box::new(Function::new(
9913 "LAST_DAY".to_string(),
9914 vec![date],
9915 ))))
9916 }
9917 DialectType::Spark | DialectType::Databricks => {
9918 // Spark: LAST_DAY(TO_DATE(date))
9919 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9920 let date = Expression::Function(Box::new(Function::new(
9921 "TO_DATE".to_string(),
9922 vec![date_arg],
9923 )));
9924 let date = if let Some(offset) = month_offset {
9925 Expression::Function(Box::new(Function::new(
9926 "ADD_MONTHS".to_string(),
9927 vec![date, offset],
9928 )))
9929 } else {
9930 date
9931 };
9932 Ok(Expression::Function(Box::new(Function::new(
9933 "LAST_DAY".to_string(),
9934 vec![date],
9935 ))))
9936 }
9937 DialectType::MySQL => {
9938 // MySQL: LAST_DAY(DATE(date)) - no offset
9939 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9940 let date = if let Some(offset) = month_offset {
9941 let iu = crate::expressions::IntervalUnit::Month;
9942 Expression::DateAdd(Box::new(
9943 crate::expressions::DateAddFunc {
9944 this: date_arg,
9945 interval: offset,
9946 unit: iu,
9947 },
9948 ))
9949 } else {
9950 Expression::Function(Box::new(Function::new(
9951 "DATE".to_string(),
9952 vec![date_arg],
9953 )))
9954 };
9955 Ok(Expression::Function(Box::new(Function::new(
9956 "LAST_DAY".to_string(),
9957 vec![date],
9958 ))))
9959 }
9960 DialectType::BigQuery => {
9961 // BigQuery: LAST_DAY(CAST(date AS DATE))
9962 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9963 let date = cast_to_date(date_arg);
9964 let date = if let Some(offset) = month_offset {
9965 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9966 this: Some(offset),
9967 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9968 unit: crate::expressions::IntervalUnit::Month,
9969 use_plural: false,
9970 }),
9971 }));
9972 Expression::Function(Box::new(Function::new(
9973 "DATE_ADD".to_string(),
9974 vec![date, interval],
9975 )))
9976 } else {
9977 date
9978 };
9979 Ok(Expression::Function(Box::new(Function::new(
9980 "LAST_DAY".to_string(),
9981 vec![date],
9982 ))))
9983 }
9984 DialectType::ClickHouse => {
9985 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9986 let date = Expression::Cast(Box::new(Cast {
9987 this: date_arg,
9988 to: DataType::Nullable {
9989 inner: Box::new(DataType::Date),
9990 },
9991 trailing_comments: vec![],
9992 double_colon_syntax: false,
9993 format: None,
9994 default: None,
9995 inferred_type: None,
9996 }));
9997 let date = if let Some(offset) = month_offset {
9998 Expression::Function(Box::new(Function::new(
9999 "DATE_ADD".to_string(),
10000 vec![
10001 Expression::Identifier(Identifier::new(
10002 "MONTH",
10003 )),
10004 offset,
10005 date,
10006 ],
10007 )))
10008 } else {
10009 date
10010 };
10011 Ok(Expression::Function(Box::new(Function::new(
10012 "LAST_DAY".to_string(),
10013 vec![date],
10014 ))))
10015 }
10016 DialectType::Hive => {
10017 // Hive: LAST_DAY(date)
10018 let date = if let Some(offset) = month_offset {
10019 Expression::Function(Box::new(Function::new(
10020 "ADD_MONTHS".to_string(),
10021 vec![date_arg, offset],
10022 )))
10023 } else {
10024 date_arg
10025 };
10026 Ok(Expression::Function(Box::new(Function::new(
10027 "LAST_DAY".to_string(),
10028 vec![date],
10029 ))))
10030 }
10031 _ => {
10032 // Default: LAST_DAY(date)
10033 let date = if let Some(offset) = month_offset {
10034 let unit =
10035 Expression::Identifier(Identifier::new("MONTH"));
10036 Expression::Function(Box::new(Function::new(
10037 "DATEADD".to_string(),
10038 vec![unit, offset, date_arg],
10039 )))
10040 } else {
10041 date_arg
10042 };
10043 Ok(Expression::Function(Box::new(Function::new(
10044 "LAST_DAY".to_string(),
10045 vec![date],
10046 ))))
10047 }
10048 }
10049 }
10050 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
10051 "LAST_DAY" | "LAST_DAY_OF_MONTH"
10052 if !matches!(source, DialectType::BigQuery)
10053 && f.args.len() >= 1 =>
10054 {
10055 let first_arg = f.args.into_iter().next().unwrap();
10056 match target {
10057 DialectType::TSQL | DialectType::Fabric => {
10058 Ok(Expression::Function(Box::new(Function::new(
10059 "EOMONTH".to_string(),
10060 vec![first_arg],
10061 ))))
10062 }
10063 DialectType::Presto
10064 | DialectType::Trino
10065 | DialectType::Athena => {
10066 Ok(Expression::Function(Box::new(Function::new(
10067 "LAST_DAY_OF_MONTH".to_string(),
10068 vec![first_arg],
10069 ))))
10070 }
10071 _ => Ok(Expression::Function(Box::new(Function::new(
10072 "LAST_DAY".to_string(),
10073 vec![first_arg],
10074 )))),
10075 }
10076 }
10077 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
10078 "MAP"
10079 if f.args.len() == 2
10080 && matches!(
10081 source,
10082 DialectType::Presto
10083 | DialectType::Trino
10084 | DialectType::Athena
10085 ) =>
10086 {
10087 let keys_arg = f.args[0].clone();
10088 let vals_arg = f.args[1].clone();
10089
10090 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
10091 fn extract_array_elements(
10092 expr: &Expression,
10093 ) -> Option<&Vec<Expression>> {
10094 match expr {
10095 Expression::Array(arr) => Some(&arr.expressions),
10096 Expression::ArrayFunc(arr) => Some(&arr.expressions),
10097 Expression::Function(f)
10098 if f.name.eq_ignore_ascii_case("ARRAY") =>
10099 {
10100 Some(&f.args)
10101 }
10102 _ => None,
10103 }
10104 }
10105
10106 match target {
10107 DialectType::Spark | DialectType::Databricks => {
10108 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
10109 Ok(Expression::Function(Box::new(Function::new(
10110 "MAP_FROM_ARRAYS".to_string(),
10111 f.args,
10112 ))))
10113 }
10114 DialectType::Hive => {
10115 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
10116 if let (Some(keys), Some(vals)) = (
10117 extract_array_elements(&keys_arg),
10118 extract_array_elements(&vals_arg),
10119 ) {
10120 if keys.len() == vals.len() {
10121 let mut interleaved = Vec::new();
10122 for (k, v) in keys.iter().zip(vals.iter()) {
10123 interleaved.push(k.clone());
10124 interleaved.push(v.clone());
10125 }
10126 Ok(Expression::Function(Box::new(Function::new(
10127 "MAP".to_string(),
10128 interleaved,
10129 ))))
10130 } else {
10131 Ok(Expression::Function(Box::new(Function::new(
10132 "MAP".to_string(),
10133 f.args,
10134 ))))
10135 }
10136 } else {
10137 Ok(Expression::Function(Box::new(Function::new(
10138 "MAP".to_string(),
10139 f.args,
10140 ))))
10141 }
10142 }
10143 DialectType::Snowflake => {
10144 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
10145 if let (Some(keys), Some(vals)) = (
10146 extract_array_elements(&keys_arg),
10147 extract_array_elements(&vals_arg),
10148 ) {
10149 if keys.len() == vals.len() {
10150 let mut interleaved = Vec::new();
10151 for (k, v) in keys.iter().zip(vals.iter()) {
10152 interleaved.push(k.clone());
10153 interleaved.push(v.clone());
10154 }
10155 Ok(Expression::Function(Box::new(Function::new(
10156 "OBJECT_CONSTRUCT".to_string(),
10157 interleaved,
10158 ))))
10159 } else {
10160 Ok(Expression::Function(Box::new(Function::new(
10161 "MAP".to_string(),
10162 f.args,
10163 ))))
10164 }
10165 } else {
10166 Ok(Expression::Function(Box::new(Function::new(
10167 "MAP".to_string(),
10168 f.args,
10169 ))))
10170 }
10171 }
10172 _ => Ok(Expression::Function(f)),
10173 }
10174 }
10175 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
10176 "MAP"
10177 if f.args.is_empty()
10178 && matches!(
10179 source,
10180 DialectType::Hive
10181 | DialectType::Spark
10182 | DialectType::Databricks
10183 )
10184 && matches!(
10185 target,
10186 DialectType::Presto
10187 | DialectType::Trino
10188 | DialectType::Athena
10189 ) =>
10190 {
10191 let empty_keys =
10192 Expression::Array(Box::new(crate::expressions::Array {
10193 expressions: vec![],
10194 }));
10195 let empty_vals =
10196 Expression::Array(Box::new(crate::expressions::Array {
10197 expressions: vec![],
10198 }));
10199 Ok(Expression::Function(Box::new(Function::new(
10200 "MAP".to_string(),
10201 vec![empty_keys, empty_vals],
10202 ))))
10203 }
10204 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10205 "MAP"
10206 if f.args.len() >= 2
10207 && f.args.len() % 2 == 0
10208 && matches!(
10209 source,
10210 DialectType::Hive
10211 | DialectType::Spark
10212 | DialectType::Databricks
10213 | DialectType::ClickHouse
10214 ) =>
10215 {
10216 let args = f.args;
10217 match target {
10218 DialectType::DuckDB => {
10219 // MAP([k1, k2], [v1, v2])
10220 let mut keys = Vec::new();
10221 let mut vals = Vec::new();
10222 for (i, arg) in args.into_iter().enumerate() {
10223 if i % 2 == 0 {
10224 keys.push(arg);
10225 } else {
10226 vals.push(arg);
10227 }
10228 }
10229 let keys_arr = Expression::Array(Box::new(
10230 crate::expressions::Array { expressions: keys },
10231 ));
10232 let vals_arr = Expression::Array(Box::new(
10233 crate::expressions::Array { expressions: vals },
10234 ));
10235 Ok(Expression::Function(Box::new(Function::new(
10236 "MAP".to_string(),
10237 vec![keys_arr, vals_arr],
10238 ))))
10239 }
10240 DialectType::Presto | DialectType::Trino => {
10241 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10242 let mut keys = Vec::new();
10243 let mut vals = Vec::new();
10244 for (i, arg) in args.into_iter().enumerate() {
10245 if i % 2 == 0 {
10246 keys.push(arg);
10247 } else {
10248 vals.push(arg);
10249 }
10250 }
10251 let keys_arr = Expression::Array(Box::new(
10252 crate::expressions::Array { expressions: keys },
10253 ));
10254 let vals_arr = Expression::Array(Box::new(
10255 crate::expressions::Array { expressions: vals },
10256 ));
10257 Ok(Expression::Function(Box::new(Function::new(
10258 "MAP".to_string(),
10259 vec![keys_arr, vals_arr],
10260 ))))
10261 }
10262 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10263 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10264 ))),
10265 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10266 Function::new("map".to_string(), args),
10267 ))),
10268 _ => Ok(Expression::Function(Box::new(Function::new(
10269 "MAP".to_string(),
10270 args,
10271 )))),
10272 }
10273 }
10274 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10275 "COLLECT_LIST" if f.args.len() >= 1 => {
10276 let name = match target {
10277 DialectType::Spark
10278 | DialectType::Databricks
10279 | DialectType::Hive => "COLLECT_LIST",
10280 DialectType::DuckDB
10281 | DialectType::PostgreSQL
10282 | DialectType::Redshift
10283 | DialectType::Snowflake
10284 | DialectType::BigQuery => "ARRAY_AGG",
10285 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10286 _ => "ARRAY_AGG",
10287 };
10288 Ok(Expression::Function(Box::new(Function::new(
10289 name.to_string(),
10290 f.args,
10291 ))))
10292 }
10293 // COLLECT_SET(x) -> target-specific distinct array aggregation
10294 "COLLECT_SET" if f.args.len() >= 1 => {
10295 let name = match target {
10296 DialectType::Spark
10297 | DialectType::Databricks
10298 | DialectType::Hive => "COLLECT_SET",
10299 DialectType::Presto
10300 | DialectType::Trino
10301 | DialectType::Athena => "SET_AGG",
10302 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10303 _ => "ARRAY_AGG",
10304 };
10305 Ok(Expression::Function(Box::new(Function::new(
10306 name.to_string(),
10307 f.args,
10308 ))))
10309 }
10310 // ISNAN(x) / IS_NAN(x) - normalize
10311 "ISNAN" | "IS_NAN" => {
10312 let name = match target {
10313 DialectType::Spark
10314 | DialectType::Databricks
10315 | DialectType::Hive => "ISNAN",
10316 DialectType::Presto
10317 | DialectType::Trino
10318 | DialectType::Athena => "IS_NAN",
10319 DialectType::BigQuery
10320 | DialectType::PostgreSQL
10321 | DialectType::Redshift => "IS_NAN",
10322 DialectType::ClickHouse => "IS_NAN",
10323 _ => "ISNAN",
10324 };
10325 Ok(Expression::Function(Box::new(Function::new(
10326 name.to_string(),
10327 f.args,
10328 ))))
10329 }
10330 // SPLIT_PART(str, delim, index) -> target-specific
10331 "SPLIT_PART" if f.args.len() == 3 => {
10332 match target {
10333 DialectType::Spark | DialectType::Databricks => {
10334 // Keep as SPLIT_PART (Spark 3.4+)
10335 Ok(Expression::Function(Box::new(Function::new(
10336 "SPLIT_PART".to_string(),
10337 f.args,
10338 ))))
10339 }
10340 DialectType::DuckDB
10341 | DialectType::PostgreSQL
10342 | DialectType::Snowflake
10343 | DialectType::Redshift
10344 | DialectType::Trino
10345 | DialectType::Presto => Ok(Expression::Function(Box::new(
10346 Function::new("SPLIT_PART".to_string(), f.args),
10347 ))),
10348 DialectType::Hive => {
10349 // SPLIT(str, delim)[index]
10350 // Complex conversion, just keep as-is for now
10351 Ok(Expression::Function(Box::new(Function::new(
10352 "SPLIT_PART".to_string(),
10353 f.args,
10354 ))))
10355 }
10356 _ => Ok(Expression::Function(Box::new(Function::new(
10357 "SPLIT_PART".to_string(),
10358 f.args,
10359 )))),
10360 }
10361 }
10362 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10363 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10364 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10365 match target {
10366 DialectType::Spark
10367 | DialectType::Databricks
10368 | DialectType::Hive => {
10369 let mut args = f.args;
10370 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10371 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10372 if let Some(Expression::Function(inner)) = args.first() {
10373 if inner.name.eq_ignore_ascii_case("TRY")
10374 && inner.args.len() == 1
10375 {
10376 let mut inner_args = inner.args.clone();
10377 args[0] = inner_args.remove(0);
10378 }
10379 }
10380 Ok(Expression::Function(Box::new(Function::new(
10381 "GET_JSON_OBJECT".to_string(),
10382 args,
10383 ))))
10384 }
10385 DialectType::DuckDB | DialectType::SQLite => {
10386 // json -> path syntax
10387 let mut args = f.args;
10388 let json_expr = args.remove(0);
10389 let path = args.remove(0);
10390 Ok(Expression::JsonExtract(Box::new(
10391 crate::expressions::JsonExtractFunc {
10392 this: json_expr,
10393 path,
10394 returning: None,
10395 arrow_syntax: true,
10396 hash_arrow_syntax: false,
10397 wrapper_option: None,
10398 quotes_option: None,
10399 on_scalar_string: false,
10400 on_error: None,
10401 },
10402 )))
10403 }
10404 DialectType::TSQL => {
10405 let func_name = if is_scalar {
10406 "JSON_VALUE"
10407 } else {
10408 "JSON_QUERY"
10409 };
10410 Ok(Expression::Function(Box::new(Function::new(
10411 func_name.to_string(),
10412 f.args,
10413 ))))
10414 }
10415 DialectType::PostgreSQL | DialectType::Redshift => {
10416 let func_name = if is_scalar {
10417 "JSON_EXTRACT_PATH_TEXT"
10418 } else {
10419 "JSON_EXTRACT_PATH"
10420 };
10421 Ok(Expression::Function(Box::new(Function::new(
10422 func_name.to_string(),
10423 f.args,
10424 ))))
10425 }
10426 _ => Ok(Expression::Function(Box::new(Function::new(
10427 name.to_string(),
10428 f.args,
10429 )))),
10430 }
10431 }
10432 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
10433 "JSON_SEARCH"
10434 if matches!(target, DialectType::DuckDB)
10435 && (3..=5).contains(&f.args.len()) =>
10436 {
10437 let args = &f.args;
10438
10439 // Only rewrite deterministic modes and NULL/no escape-char variant.
10440 let mode = match &args[1] {
10441 Expression::Literal(crate::expressions::Literal::String(s)) => {
10442 s.to_ascii_lowercase()
10443 }
10444 _ => return Ok(Expression::Function(f)),
10445 };
10446 if mode != "one" && mode != "all" {
10447 return Ok(Expression::Function(f));
10448 }
10449 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
10450 return Ok(Expression::Function(f));
10451 }
10452
10453 let json_doc_sql = match Generator::sql(&args[0]) {
10454 Ok(sql) => sql,
10455 Err(_) => return Ok(Expression::Function(f)),
10456 };
10457 let search_sql = match Generator::sql(&args[2]) {
10458 Ok(sql) => sql,
10459 Err(_) => return Ok(Expression::Function(f)),
10460 };
10461 let path_sql = if args.len() == 5 {
10462 match Generator::sql(&args[4]) {
10463 Ok(sql) => sql,
10464 Err(_) => return Ok(Expression::Function(f)),
10465 }
10466 } else {
10467 "'$'".to_string()
10468 };
10469
10470 let rewrite_sql = if mode == "all" {
10471 format!(
10472 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
10473 json_doc_sql, path_sql, search_sql
10474 )
10475 } else {
10476 format!(
10477 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
10478 json_doc_sql, path_sql, search_sql
10479 )
10480 };
10481
10482 Ok(Expression::Raw(crate::expressions::Raw {
10483 sql: rewrite_sql,
10484 }))
10485 }
10486 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10487 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10488 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10489 if f.args.len() >= 2
10490 && matches!(source, DialectType::SingleStore) =>
10491 {
10492 let is_bson = name == "BSON_EXTRACT_BSON";
10493 let mut args = f.args;
10494 let json_expr = args.remove(0);
10495
10496 // Build JSONPath from remaining arguments
10497 let mut path = String::from("$");
10498 for arg in &args {
10499 if let Expression::Literal(
10500 crate::expressions::Literal::String(s),
10501 ) = arg
10502 {
10503 // Check if it's a numeric string (array index)
10504 if s.parse::<i64>().is_ok() {
10505 path.push('[');
10506 path.push_str(s);
10507 path.push(']');
10508 } else {
10509 path.push('.');
10510 path.push_str(s);
10511 }
10512 }
10513 }
10514
10515 let target_func = if is_bson {
10516 "JSONB_EXTRACT"
10517 } else {
10518 "JSON_EXTRACT"
10519 };
10520 Ok(Expression::Function(Box::new(Function::new(
10521 target_func.to_string(),
10522 vec![json_expr, Expression::string(&path)],
10523 ))))
10524 }
10525 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10526 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10527 Ok(Expression::Function(Box::new(Function {
10528 name: "arraySum".to_string(),
10529 args: f.args,
10530 distinct: f.distinct,
10531 trailing_comments: f.trailing_comments,
10532 use_bracket_syntax: f.use_bracket_syntax,
10533 no_parens: f.no_parens,
10534 quoted: f.quoted,
10535 span: None,
10536 inferred_type: None,
10537 })))
10538 }
10539 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10540 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10541 // and is handled by JsonQueryValueConvert action. This handles the case where
10542 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10543 "JSON_QUERY" | "JSON_VALUE"
10544 if f.args.len() == 2
10545 && matches!(
10546 source,
10547 DialectType::TSQL | DialectType::Fabric
10548 ) =>
10549 {
10550 match target {
10551 DialectType::Spark
10552 | DialectType::Databricks
10553 | DialectType::Hive => Ok(Expression::Function(Box::new(
10554 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10555 ))),
10556 _ => Ok(Expression::Function(Box::new(Function::new(
10557 name.to_string(),
10558 f.args,
10559 )))),
10560 }
10561 }
10562 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10563 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10564 let arg = f.args.into_iter().next().unwrap();
10565 let is_hive_source = matches!(
10566 source,
10567 DialectType::Hive
10568 | DialectType::Spark
10569 | DialectType::Databricks
10570 );
10571 match target {
10572 DialectType::DuckDB if is_hive_source => {
10573 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10574 let strptime =
10575 Expression::Function(Box::new(Function::new(
10576 "STRPTIME".to_string(),
10577 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10578 )));
10579 Ok(Expression::Function(Box::new(Function::new(
10580 "EPOCH".to_string(),
10581 vec![strptime],
10582 ))))
10583 }
10584 DialectType::Presto | DialectType::Trino if is_hive_source => {
10585 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10586 let cast_varchar =
10587 Expression::Cast(Box::new(crate::expressions::Cast {
10588 this: arg.clone(),
10589 to: DataType::VarChar {
10590 length: None,
10591 parenthesized_length: false,
10592 },
10593 trailing_comments: vec![],
10594 double_colon_syntax: false,
10595 format: None,
10596 default: None,
10597 inferred_type: None,
10598 }));
10599 let date_parse =
10600 Expression::Function(Box::new(Function::new(
10601 "DATE_PARSE".to_string(),
10602 vec![
10603 cast_varchar,
10604 Expression::string("%Y-%m-%d %T"),
10605 ],
10606 )));
10607 let try_expr = Expression::Function(Box::new(
10608 Function::new("TRY".to_string(), vec![date_parse]),
10609 ));
10610 let date_format =
10611 Expression::Function(Box::new(Function::new(
10612 "DATE_FORMAT".to_string(),
10613 vec![arg, Expression::string("%Y-%m-%d %T")],
10614 )));
10615 let parse_datetime =
10616 Expression::Function(Box::new(Function::new(
10617 "PARSE_DATETIME".to_string(),
10618 vec![
10619 date_format,
10620 Expression::string("yyyy-MM-dd HH:mm:ss"),
10621 ],
10622 )));
10623 let coalesce =
10624 Expression::Function(Box::new(Function::new(
10625 "COALESCE".to_string(),
10626 vec![try_expr, parse_datetime],
10627 )));
10628 Ok(Expression::Function(Box::new(Function::new(
10629 "TO_UNIXTIME".to_string(),
10630 vec![coalesce],
10631 ))))
10632 }
10633 DialectType::Presto | DialectType::Trino => {
10634 Ok(Expression::Function(Box::new(Function::new(
10635 "TO_UNIXTIME".to_string(),
10636 vec![arg],
10637 ))))
10638 }
10639 _ => Ok(Expression::Function(Box::new(Function::new(
10640 "UNIX_TIMESTAMP".to_string(),
10641 vec![arg],
10642 )))),
10643 }
10644 }
10645 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10646 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10647 DialectType::Spark
10648 | DialectType::Databricks
10649 | DialectType::Hive => Ok(Expression::Function(Box::new(
10650 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10651 ))),
10652 _ => Ok(Expression::Function(Box::new(Function::new(
10653 "TO_UNIX_TIMESTAMP".to_string(),
10654 f.args,
10655 )))),
10656 },
10657 // CURDATE() -> CURRENT_DATE
10658 "CURDATE" => {
10659 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10660 }
10661 // CURTIME() -> CURRENT_TIME
10662 "CURTIME" => {
10663 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10664 precision: None,
10665 }))
10666 }
10667 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10668 "ARRAY_SORT" if f.args.len() >= 1 => {
10669 match target {
10670 DialectType::Hive => {
10671 let mut args = f.args;
10672 args.truncate(1); // Drop lambda comparator
10673 Ok(Expression::Function(Box::new(Function::new(
10674 "SORT_ARRAY".to_string(),
10675 args,
10676 ))))
10677 }
10678 _ => Ok(Expression::Function(f)),
10679 }
10680 }
10681 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10682 "SORT_ARRAY" if f.args.len() == 1 => match target {
10683 DialectType::Hive
10684 | DialectType::Spark
10685 | DialectType::Databricks => Ok(Expression::Function(f)),
10686 _ => Ok(Expression::Function(Box::new(Function::new(
10687 "ARRAY_SORT".to_string(),
10688 f.args,
10689 )))),
10690 },
10691 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10692 "SORT_ARRAY" if f.args.len() == 2 => {
10693 let is_desc =
10694 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10695 if is_desc {
10696 match target {
10697 DialectType::DuckDB => {
10698 Ok(Expression::Function(Box::new(Function::new(
10699 "ARRAY_REVERSE_SORT".to_string(),
10700 vec![f.args.into_iter().next().unwrap()],
10701 ))))
10702 }
10703 DialectType::Presto | DialectType::Trino => {
10704 let arr_arg = f.args.into_iter().next().unwrap();
10705 let a =
10706 Expression::Column(crate::expressions::Column {
10707 name: crate::expressions::Identifier::new("a"),
10708 table: None,
10709 join_mark: false,
10710 trailing_comments: Vec::new(),
10711 span: None,
10712 inferred_type: None,
10713 });
10714 let b =
10715 Expression::Column(crate::expressions::Column {
10716 name: crate::expressions::Identifier::new("b"),
10717 table: None,
10718 join_mark: false,
10719 trailing_comments: Vec::new(),
10720 span: None,
10721 inferred_type: None,
10722 });
10723 let case_expr = Expression::Case(Box::new(
10724 crate::expressions::Case {
10725 operand: None,
10726 whens: vec![
10727 (
10728 Expression::Lt(Box::new(
10729 BinaryOp::new(a.clone(), b.clone()),
10730 )),
10731 Expression::Literal(Literal::Number(
10732 "1".to_string(),
10733 )),
10734 ),
10735 (
10736 Expression::Gt(Box::new(
10737 BinaryOp::new(a.clone(), b.clone()),
10738 )),
10739 Expression::Literal(Literal::Number(
10740 "-1".to_string(),
10741 )),
10742 ),
10743 ],
10744 else_: Some(Expression::Literal(
10745 Literal::Number("0".to_string()),
10746 )),
10747 comments: Vec::new(),
10748 inferred_type: None,
10749 },
10750 ));
10751 let lambda = Expression::Lambda(Box::new(
10752 crate::expressions::LambdaExpr {
10753 parameters: vec![
10754 crate::expressions::Identifier::new("a"),
10755 crate::expressions::Identifier::new("b"),
10756 ],
10757 body: case_expr,
10758 colon: false,
10759 parameter_types: Vec::new(),
10760 },
10761 ));
10762 Ok(Expression::Function(Box::new(Function::new(
10763 "ARRAY_SORT".to_string(),
10764 vec![arr_arg, lambda],
10765 ))))
10766 }
10767 _ => Ok(Expression::Function(f)),
10768 }
10769 } else {
10770 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10771 match target {
10772 DialectType::Hive => Ok(Expression::Function(f)),
10773 _ => Ok(Expression::Function(Box::new(Function::new(
10774 "ARRAY_SORT".to_string(),
10775 vec![f.args.into_iter().next().unwrap()],
10776 )))),
10777 }
10778 }
10779 }
10780 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10781 "LEFT" if f.args.len() == 2 => {
10782 match target {
10783 DialectType::Hive
10784 | DialectType::Presto
10785 | DialectType::Trino
10786 | DialectType::Athena => {
10787 let x = f.args[0].clone();
10788 let n = f.args[1].clone();
10789 Ok(Expression::Function(Box::new(Function::new(
10790 "SUBSTRING".to_string(),
10791 vec![x, Expression::number(1), n],
10792 ))))
10793 }
10794 DialectType::Spark | DialectType::Databricks
10795 if matches!(
10796 source,
10797 DialectType::TSQL | DialectType::Fabric
10798 ) =>
10799 {
10800 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10801 let x = f.args[0].clone();
10802 let n = f.args[1].clone();
10803 let cast_x = Expression::Cast(Box::new(Cast {
10804 this: x,
10805 to: DataType::VarChar {
10806 length: None,
10807 parenthesized_length: false,
10808 },
10809 double_colon_syntax: false,
10810 trailing_comments: Vec::new(),
10811 format: None,
10812 default: None,
10813 inferred_type: None,
10814 }));
10815 Ok(Expression::Function(Box::new(Function::new(
10816 "LEFT".to_string(),
10817 vec![cast_x, n],
10818 ))))
10819 }
10820 _ => Ok(Expression::Function(f)),
10821 }
10822 }
10823 "RIGHT" if f.args.len() == 2 => {
10824 match target {
10825 DialectType::Hive
10826 | DialectType::Presto
10827 | DialectType::Trino
10828 | DialectType::Athena => {
10829 let x = f.args[0].clone();
10830 let n = f.args[1].clone();
10831 // SUBSTRING(x, LENGTH(x) - (n - 1))
10832 let len_x = Expression::Function(Box::new(Function::new(
10833 "LENGTH".to_string(),
10834 vec![x.clone()],
10835 )));
10836 let n_minus_1 = Expression::Sub(Box::new(
10837 crate::expressions::BinaryOp::new(
10838 n,
10839 Expression::number(1),
10840 ),
10841 ));
10842 let n_minus_1_paren = Expression::Paren(Box::new(
10843 crate::expressions::Paren {
10844 this: n_minus_1,
10845 trailing_comments: Vec::new(),
10846 },
10847 ));
10848 let offset = Expression::Sub(Box::new(
10849 crate::expressions::BinaryOp::new(
10850 len_x,
10851 n_minus_1_paren,
10852 ),
10853 ));
10854 Ok(Expression::Function(Box::new(Function::new(
10855 "SUBSTRING".to_string(),
10856 vec![x, offset],
10857 ))))
10858 }
10859 DialectType::Spark | DialectType::Databricks
10860 if matches!(
10861 source,
10862 DialectType::TSQL | DialectType::Fabric
10863 ) =>
10864 {
10865 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10866 let x = f.args[0].clone();
10867 let n = f.args[1].clone();
10868 let cast_x = Expression::Cast(Box::new(Cast {
10869 this: x,
10870 to: DataType::VarChar {
10871 length: None,
10872 parenthesized_length: false,
10873 },
10874 double_colon_syntax: false,
10875 trailing_comments: Vec::new(),
10876 format: None,
10877 default: None,
10878 inferred_type: None,
10879 }));
10880 Ok(Expression::Function(Box::new(Function::new(
10881 "RIGHT".to_string(),
10882 vec![cast_x, n],
10883 ))))
10884 }
10885 _ => Ok(Expression::Function(f)),
10886 }
10887 }
10888 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10889 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10890 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10891 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10892 ))),
10893 DialectType::Spark | DialectType::Databricks => {
10894 Ok(Expression::Function(Box::new(Function::new(
10895 "MAP_FROM_ARRAYS".to_string(),
10896 f.args,
10897 ))))
10898 }
10899 _ => Ok(Expression::Function(Box::new(Function::new(
10900 "MAP".to_string(),
10901 f.args,
10902 )))),
10903 },
10904 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10905 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10906 "LIKE" if f.args.len() >= 2 => {
10907 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10908 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10909 (f.args[1].clone(), f.args[0].clone())
10910 } else {
10911 // Standard: LIKE(string, pattern) -> string LIKE pattern
10912 (f.args[0].clone(), f.args[1].clone())
10913 };
10914 let escape = if f.args.len() >= 3 {
10915 Some(f.args[2].clone())
10916 } else {
10917 None
10918 };
10919 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10920 left: this,
10921 right: pattern,
10922 escape,
10923 quantifier: None,
10924 inferred_type: None,
10925 })))
10926 }
10927 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10928 "ILIKE" if f.args.len() >= 2 => {
10929 let this = f.args[0].clone();
10930 let pattern = f.args[1].clone();
10931 let escape = if f.args.len() >= 3 {
10932 Some(f.args[2].clone())
10933 } else {
10934 None
10935 };
10936 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10937 left: this,
10938 right: pattern,
10939 escape,
10940 quantifier: None,
10941 inferred_type: None,
10942 })))
10943 }
10944 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10945 "CHAR" if f.args.len() == 1 => match target {
10946 DialectType::MySQL
10947 | DialectType::SingleStore
10948 | DialectType::TSQL => Ok(Expression::Function(f)),
10949 _ => Ok(Expression::Function(Box::new(Function::new(
10950 "CHR".to_string(),
10951 f.args,
10952 )))),
10953 },
10954 // CONCAT(a, b) -> a || b for PostgreSQL
10955 "CONCAT"
10956 if f.args.len() == 2
10957 && matches!(target, DialectType::PostgreSQL)
10958 && matches!(
10959 source,
10960 DialectType::ClickHouse | DialectType::MySQL
10961 ) =>
10962 {
10963 let mut args = f.args;
10964 let right = args.pop().unwrap();
10965 let left = args.pop().unwrap();
10966 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10967 this: Box::new(left),
10968 expression: Box::new(right),
10969 safe: None,
10970 })))
10971 }
10972 // ARRAY_TO_STRING(arr, delim) -> target-specific
10973 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10974 DialectType::Presto | DialectType::Trino => {
10975 Ok(Expression::Function(Box::new(Function::new(
10976 "ARRAY_JOIN".to_string(),
10977 f.args,
10978 ))))
10979 }
10980 DialectType::TSQL => Ok(Expression::Function(Box::new(
10981 Function::new("STRING_AGG".to_string(), f.args),
10982 ))),
10983 _ => Ok(Expression::Function(f)),
10984 },
10985 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10986 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10987 DialectType::Spark
10988 | DialectType::Databricks
10989 | DialectType::Hive => Ok(Expression::Function(Box::new(
10990 Function::new("CONCAT".to_string(), f.args),
10991 ))),
10992 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10993 Function::new("ARRAY_CAT".to_string(), f.args),
10994 ))),
10995 DialectType::Redshift => Ok(Expression::Function(Box::new(
10996 Function::new("ARRAY_CONCAT".to_string(), f.args),
10997 ))),
10998 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10999 Function::new("ARRAY_CAT".to_string(), f.args),
11000 ))),
11001 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11002 Function::new("LIST_CONCAT".to_string(), f.args),
11003 ))),
11004 DialectType::Presto | DialectType::Trino => {
11005 Ok(Expression::Function(Box::new(Function::new(
11006 "CONCAT".to_string(),
11007 f.args,
11008 ))))
11009 }
11010 DialectType::BigQuery => Ok(Expression::Function(Box::new(
11011 Function::new("ARRAY_CONCAT".to_string(), f.args),
11012 ))),
11013 _ => Ok(Expression::Function(f)),
11014 },
11015 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
11016 "HAS" if f.args.len() == 2 => match target {
11017 DialectType::Spark
11018 | DialectType::Databricks
11019 | DialectType::Hive => Ok(Expression::Function(Box::new(
11020 Function::new("ARRAY_CONTAINS".to_string(), f.args),
11021 ))),
11022 DialectType::Presto | DialectType::Trino => {
11023 Ok(Expression::Function(Box::new(Function::new(
11024 "CONTAINS".to_string(),
11025 f.args,
11026 ))))
11027 }
11028 _ => Ok(Expression::Function(f)),
11029 },
11030 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
11031 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
11032 Function::new("COALESCE".to_string(), f.args),
11033 ))),
11034 // ISNULL(x) in MySQL -> (x IS NULL)
11035 "ISNULL"
11036 if f.args.len() == 1
11037 && matches!(source, DialectType::MySQL)
11038 && matches!(target, DialectType::MySQL) =>
11039 {
11040 let arg = f.args.into_iter().next().unwrap();
11041 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
11042 this: Expression::IsNull(Box::new(
11043 crate::expressions::IsNull {
11044 this: arg,
11045 not: false,
11046 postfix_form: false,
11047 },
11048 )),
11049 trailing_comments: Vec::new(),
11050 })))
11051 }
11052 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
11053 "MONTHNAME"
11054 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
11055 {
11056 let arg = f.args.into_iter().next().unwrap();
11057 Ok(Expression::Function(Box::new(Function::new(
11058 "DATE_FORMAT".to_string(),
11059 vec![arg, Expression::string("%M")],
11060 ))))
11061 }
11062 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
11063 "SPLITBYSTRING" if f.args.len() == 2 => {
11064 let sep = f.args[0].clone();
11065 let str_arg = f.args[1].clone();
11066 match target {
11067 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11068 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
11069 ))),
11070 DialectType::Doris => {
11071 Ok(Expression::Function(Box::new(Function::new(
11072 "SPLIT_BY_STRING".to_string(),
11073 vec![str_arg, sep],
11074 ))))
11075 }
11076 DialectType::Hive
11077 | DialectType::Spark
11078 | DialectType::Databricks => {
11079 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
11080 let escaped =
11081 Expression::Function(Box::new(Function::new(
11082 "CONCAT".to_string(),
11083 vec![
11084 Expression::string("\\Q"),
11085 sep,
11086 Expression::string("\\E"),
11087 ],
11088 )));
11089 Ok(Expression::Function(Box::new(Function::new(
11090 "SPLIT".to_string(),
11091 vec![str_arg, escaped],
11092 ))))
11093 }
11094 _ => Ok(Expression::Function(f)),
11095 }
11096 }
11097 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
11098 "SPLITBYREGEXP" if f.args.len() == 2 => {
11099 let sep = f.args[0].clone();
11100 let str_arg = f.args[1].clone();
11101 match target {
11102 DialectType::DuckDB => {
11103 Ok(Expression::Function(Box::new(Function::new(
11104 "STR_SPLIT_REGEX".to_string(),
11105 vec![str_arg, sep],
11106 ))))
11107 }
11108 DialectType::Hive
11109 | DialectType::Spark
11110 | DialectType::Databricks => {
11111 Ok(Expression::Function(Box::new(Function::new(
11112 "SPLIT".to_string(),
11113 vec![str_arg, sep],
11114 ))))
11115 }
11116 _ => Ok(Expression::Function(f)),
11117 }
11118 }
11119 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
11120 "TOMONDAY" => {
11121 if f.args.len() == 1 {
11122 let arg = f.args.into_iter().next().unwrap();
11123 match target {
11124 DialectType::Doris => {
11125 Ok(Expression::Function(Box::new(Function::new(
11126 "DATE_TRUNC".to_string(),
11127 vec![arg, Expression::string("WEEK")],
11128 ))))
11129 }
11130 _ => Ok(Expression::Function(Box::new(Function::new(
11131 "DATE_TRUNC".to_string(),
11132 vec![Expression::string("WEEK"), arg],
11133 )))),
11134 }
11135 } else {
11136 Ok(Expression::Function(f))
11137 }
11138 }
11139 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
11140 "COLLECT_LIST" if f.args.len() == 1 => match target {
11141 DialectType::Spark
11142 | DialectType::Databricks
11143 | DialectType::Hive => Ok(Expression::Function(f)),
11144 _ => Ok(Expression::Function(Box::new(Function::new(
11145 "ARRAY_AGG".to_string(),
11146 f.args,
11147 )))),
11148 },
11149 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
11150 "TO_CHAR"
11151 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
11152 {
11153 let arg = f.args.into_iter().next().unwrap();
11154 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11155 this: arg,
11156 to: DataType::Custom {
11157 name: "STRING".to_string(),
11158 },
11159 double_colon_syntax: false,
11160 trailing_comments: Vec::new(),
11161 format: None,
11162 default: None,
11163 inferred_type: None,
11164 })))
11165 }
11166 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
11167 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
11168 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11169 Function::new("RANDOM".to_string(), vec![]),
11170 ))),
11171 _ => Ok(Expression::Function(f)),
11172 },
11173 // ClickHouse formatDateTime -> target-specific
11174 "FORMATDATETIME" if f.args.len() >= 2 => match target {
11175 DialectType::MySQL => Ok(Expression::Function(Box::new(
11176 Function::new("DATE_FORMAT".to_string(), f.args),
11177 ))),
11178 _ => Ok(Expression::Function(f)),
11179 },
11180 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
11181 "REPLICATE" if f.args.len() == 2 => match target {
11182 DialectType::TSQL => Ok(Expression::Function(f)),
11183 _ => Ok(Expression::Function(Box::new(Function::new(
11184 "REPEAT".to_string(),
11185 f.args,
11186 )))),
11187 },
11188 // LEN(x) -> LENGTH(x) for non-TSQL targets
11189 // No CAST needed when arg is already a string literal
11190 "LEN" if f.args.len() == 1 => {
11191 match target {
11192 DialectType::TSQL => Ok(Expression::Function(f)),
11193 DialectType::Spark | DialectType::Databricks => {
11194 let arg = f.args.into_iter().next().unwrap();
11195 // Don't wrap string literals with CAST - they're already strings
11196 let is_string = matches!(
11197 &arg,
11198 Expression::Literal(
11199 crate::expressions::Literal::String(_)
11200 )
11201 );
11202 let final_arg = if is_string {
11203 arg
11204 } else {
11205 Expression::Cast(Box::new(Cast {
11206 this: arg,
11207 to: DataType::VarChar {
11208 length: None,
11209 parenthesized_length: false,
11210 },
11211 double_colon_syntax: false,
11212 trailing_comments: Vec::new(),
11213 format: None,
11214 default: None,
11215 inferred_type: None,
11216 }))
11217 };
11218 Ok(Expression::Function(Box::new(Function::new(
11219 "LENGTH".to_string(),
11220 vec![final_arg],
11221 ))))
11222 }
11223 _ => {
11224 let arg = f.args.into_iter().next().unwrap();
11225 Ok(Expression::Function(Box::new(Function::new(
11226 "LENGTH".to_string(),
11227 vec![arg],
11228 ))))
11229 }
11230 }
11231 }
11232 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
11233 "COUNT_BIG" if f.args.len() == 1 => match target {
11234 DialectType::TSQL => Ok(Expression::Function(f)),
11235 _ => Ok(Expression::Function(Box::new(Function::new(
11236 "COUNT".to_string(),
11237 f.args,
11238 )))),
11239 },
11240 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
11241 "DATEFROMPARTS" if f.args.len() == 3 => match target {
11242 DialectType::TSQL => Ok(Expression::Function(f)),
11243 _ => Ok(Expression::Function(Box::new(Function::new(
11244 "MAKE_DATE".to_string(),
11245 f.args,
11246 )))),
11247 },
11248 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
11249 "REGEXP_LIKE" if f.args.len() >= 2 => {
11250 let str_expr = f.args[0].clone();
11251 let pattern = f.args[1].clone();
11252 let flags = if f.args.len() >= 3 {
11253 Some(f.args[2].clone())
11254 } else {
11255 None
11256 };
11257 match target {
11258 DialectType::DuckDB => {
11259 let mut new_args = vec![str_expr, pattern];
11260 if let Some(fl) = flags {
11261 new_args.push(fl);
11262 }
11263 Ok(Expression::Function(Box::new(Function::new(
11264 "REGEXP_MATCHES".to_string(),
11265 new_args,
11266 ))))
11267 }
11268 _ => Ok(Expression::RegexpLike(Box::new(
11269 crate::expressions::RegexpFunc {
11270 this: str_expr,
11271 pattern,
11272 flags,
11273 },
11274 ))),
11275 }
11276 }
11277 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11278 "ARRAYJOIN" if f.args.len() == 1 => match target {
11279 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11280 Function::new("UNNEST".to_string(), f.args),
11281 ))),
11282 _ => Ok(Expression::Function(f)),
11283 },
11284 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11285 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11286 match target {
11287 DialectType::TSQL => Ok(Expression::Function(f)),
11288 DialectType::DuckDB => {
11289 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11290 let mut args = f.args;
11291 let ms = args.pop().unwrap();
11292 let s = args.pop().unwrap();
11293 // s + (ms / 1000.0)
11294 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11295 ms,
11296 Expression::Literal(
11297 crate::expressions::Literal::Number(
11298 "1000.0".to_string(),
11299 ),
11300 ),
11301 )));
11302 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11303 s,
11304 Expression::Paren(Box::new(Paren {
11305 this: ms_frac,
11306 trailing_comments: vec![],
11307 })),
11308 )));
11309 args.push(s_with_ms);
11310 Ok(Expression::Function(Box::new(Function::new(
11311 "MAKE_TIMESTAMP".to_string(),
11312 args,
11313 ))))
11314 }
11315 DialectType::Snowflake => {
11316 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11317 let mut args = f.args;
11318 let ms = args.pop().unwrap();
11319 // ms * 1000000
11320 let ns = Expression::Mul(Box::new(BinaryOp::new(
11321 ms,
11322 Expression::number(1000000),
11323 )));
11324 args.push(ns);
11325 Ok(Expression::Function(Box::new(Function::new(
11326 "TIMESTAMP_FROM_PARTS".to_string(),
11327 args,
11328 ))))
11329 }
11330 _ => {
11331 // Default: keep function name for other targets
11332 Ok(Expression::Function(Box::new(Function::new(
11333 "DATETIMEFROMPARTS".to_string(),
11334 f.args,
11335 ))))
11336 }
11337 }
11338 }
11339 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11340 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11341 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11342 let is_try = name == "TRY_CONVERT";
11343 let type_expr = f.args[0].clone();
11344 let value_expr = f.args[1].clone();
11345 let style = if f.args.len() >= 3 {
11346 Some(&f.args[2])
11347 } else {
11348 None
11349 };
11350
11351 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11352 if matches!(target, DialectType::TSQL) {
11353 let normalized_type = match &type_expr {
11354 Expression::DataType(dt) => {
11355 let new_dt = match dt {
11356 DataType::Int { .. } => DataType::Custom {
11357 name: "INTEGER".to_string(),
11358 },
11359 _ => dt.clone(),
11360 };
11361 Expression::DataType(new_dt)
11362 }
11363 Expression::Identifier(id) => {
11364 let upper = id.name.to_uppercase();
11365 let normalized = match upper.as_str() {
11366 "INT" => "INTEGER",
11367 _ => &upper,
11368 };
11369 Expression::Identifier(
11370 crate::expressions::Identifier::new(normalized),
11371 )
11372 }
11373 Expression::Column(col) => {
11374 let upper = col.name.name.to_uppercase();
11375 let normalized = match upper.as_str() {
11376 "INT" => "INTEGER",
11377 _ => &upper,
11378 };
11379 Expression::Identifier(
11380 crate::expressions::Identifier::new(normalized),
11381 )
11382 }
11383 _ => type_expr.clone(),
11384 };
11385 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11386 let mut new_args = vec![normalized_type, value_expr];
11387 if let Some(s) = style {
11388 new_args.push(s.clone());
11389 }
11390 return Ok(Expression::Function(Box::new(Function::new(
11391 func_name.to_string(),
11392 new_args,
11393 ))));
11394 }
11395
11396 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11397 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11398 match e {
11399 Expression::DataType(dt) => {
11400 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11401 match dt {
11402 DataType::Custom { name }
11403 if name.starts_with("NVARCHAR(")
11404 || name.starts_with("NCHAR(") =>
11405 {
11406 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11407 let inner = &name[name.find('(').unwrap() + 1
11408 ..name.len() - 1];
11409 if inner.eq_ignore_ascii_case("MAX") {
11410 Some(DataType::Text)
11411 } else if let Ok(len) = inner.parse::<u32>() {
11412 if name.starts_with("NCHAR") {
11413 Some(DataType::Char {
11414 length: Some(len),
11415 })
11416 } else {
11417 Some(DataType::VarChar {
11418 length: Some(len),
11419 parenthesized_length: false,
11420 })
11421 }
11422 } else {
11423 Some(dt.clone())
11424 }
11425 }
11426 DataType::Custom { name } if name == "NVARCHAR" => {
11427 Some(DataType::VarChar {
11428 length: None,
11429 parenthesized_length: false,
11430 })
11431 }
11432 DataType::Custom { name } if name == "NCHAR" => {
11433 Some(DataType::Char { length: None })
11434 }
11435 DataType::Custom { name }
11436 if name == "NVARCHAR(MAX)"
11437 || name == "VARCHAR(MAX)" =>
11438 {
11439 Some(DataType::Text)
11440 }
11441 _ => Some(dt.clone()),
11442 }
11443 }
11444 Expression::Identifier(id) => {
11445 let name = id.name.to_uppercase();
11446 match name.as_str() {
11447 "INT" | "INTEGER" => Some(DataType::Int {
11448 length: None,
11449 integer_spelling: false,
11450 }),
11451 "BIGINT" => Some(DataType::BigInt { length: None }),
11452 "SMALLINT" => {
11453 Some(DataType::SmallInt { length: None })
11454 }
11455 "TINYINT" => {
11456 Some(DataType::TinyInt { length: None })
11457 }
11458 "FLOAT" => Some(DataType::Float {
11459 precision: None,
11460 scale: None,
11461 real_spelling: false,
11462 }),
11463 "REAL" => Some(DataType::Float {
11464 precision: None,
11465 scale: None,
11466 real_spelling: true,
11467 }),
11468 "DATETIME" | "DATETIME2" => {
11469 Some(DataType::Timestamp {
11470 timezone: false,
11471 precision: None,
11472 })
11473 }
11474 "DATE" => Some(DataType::Date),
11475 "BIT" => Some(DataType::Boolean),
11476 "TEXT" => Some(DataType::Text),
11477 "NUMERIC" => Some(DataType::Decimal {
11478 precision: None,
11479 scale: None,
11480 }),
11481 "MONEY" => Some(DataType::Decimal {
11482 precision: Some(15),
11483 scale: Some(4),
11484 }),
11485 "SMALLMONEY" => Some(DataType::Decimal {
11486 precision: Some(6),
11487 scale: Some(4),
11488 }),
11489 "VARCHAR" => Some(DataType::VarChar {
11490 length: None,
11491 parenthesized_length: false,
11492 }),
11493 "NVARCHAR" => Some(DataType::VarChar {
11494 length: None,
11495 parenthesized_length: false,
11496 }),
11497 "CHAR" => Some(DataType::Char { length: None }),
11498 "NCHAR" => Some(DataType::Char { length: None }),
11499 _ => Some(DataType::Custom { name }),
11500 }
11501 }
11502 Expression::Column(col) => {
11503 let name = col.name.name.to_uppercase();
11504 match name.as_str() {
11505 "INT" | "INTEGER" => Some(DataType::Int {
11506 length: None,
11507 integer_spelling: false,
11508 }),
11509 "BIGINT" => Some(DataType::BigInt { length: None }),
11510 "FLOAT" => Some(DataType::Float {
11511 precision: None,
11512 scale: None,
11513 real_spelling: false,
11514 }),
11515 "DATETIME" | "DATETIME2" => {
11516 Some(DataType::Timestamp {
11517 timezone: false,
11518 precision: None,
11519 })
11520 }
11521 "DATE" => Some(DataType::Date),
11522 "NUMERIC" => Some(DataType::Decimal {
11523 precision: None,
11524 scale: None,
11525 }),
11526 "VARCHAR" => Some(DataType::VarChar {
11527 length: None,
11528 parenthesized_length: false,
11529 }),
11530 "NVARCHAR" => Some(DataType::VarChar {
11531 length: None,
11532 parenthesized_length: false,
11533 }),
11534 "CHAR" => Some(DataType::Char { length: None }),
11535 "NCHAR" => Some(DataType::Char { length: None }),
11536 _ => Some(DataType::Custom { name }),
11537 }
11538 }
11539 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11540 Expression::Function(f) => {
11541 let fname = f.name.to_uppercase();
11542 match fname.as_str() {
11543 "VARCHAR" | "NVARCHAR" => {
11544 let len = f.args.first().and_then(|a| {
11545 if let Expression::Literal(
11546 crate::expressions::Literal::Number(n),
11547 ) = a
11548 {
11549 n.parse::<u32>().ok()
11550 } else if let Expression::Identifier(id) = a
11551 {
11552 if id.name.eq_ignore_ascii_case("MAX") {
11553 None
11554 } else {
11555 None
11556 }
11557 } else {
11558 None
11559 }
11560 });
11561 // Check for VARCHAR(MAX) -> TEXT
11562 let is_max = f.args.first().map_or(false, |a| {
11563 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11564 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11565 });
11566 if is_max {
11567 Some(DataType::Text)
11568 } else {
11569 Some(DataType::VarChar {
11570 length: len,
11571 parenthesized_length: false,
11572 })
11573 }
11574 }
11575 "NCHAR" | "CHAR" => {
11576 let len = f.args.first().and_then(|a| {
11577 if let Expression::Literal(
11578 crate::expressions::Literal::Number(n),
11579 ) = a
11580 {
11581 n.parse::<u32>().ok()
11582 } else {
11583 None
11584 }
11585 });
11586 Some(DataType::Char { length: len })
11587 }
11588 "NUMERIC" | "DECIMAL" => {
11589 let precision = f.args.first().and_then(|a| {
11590 if let Expression::Literal(
11591 crate::expressions::Literal::Number(n),
11592 ) = a
11593 {
11594 n.parse::<u32>().ok()
11595 } else {
11596 None
11597 }
11598 });
11599 let scale = f.args.get(1).and_then(|a| {
11600 if let Expression::Literal(
11601 crate::expressions::Literal::Number(n),
11602 ) = a
11603 {
11604 n.parse::<u32>().ok()
11605 } else {
11606 None
11607 }
11608 });
11609 Some(DataType::Decimal { precision, scale })
11610 }
11611 _ => None,
11612 }
11613 }
11614 _ => None,
11615 }
11616 }
11617
11618 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11619 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11620 let is_tsql_source =
11621 matches!(source, DialectType::TSQL | DialectType::Fabric);
11622 if is_tsql_source {
11623 match &dt {
11624 DataType::VarChar { length: None, .. } => {
11625 dt = DataType::VarChar {
11626 length: Some(30),
11627 parenthesized_length: false,
11628 };
11629 }
11630 DataType::Char { length: None } => {
11631 dt = DataType::Char { length: Some(30) };
11632 }
11633 _ => {}
11634 }
11635 }
11636
11637 // Determine if this is a string type
11638 let is_string_type = matches!(
11639 dt,
11640 DataType::VarChar { .. }
11641 | DataType::Char { .. }
11642 | DataType::Text
11643 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11644 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11645 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11646 || name == "STRING");
11647
11648 // Determine if this is a date/time type
11649 let is_datetime_type = matches!(
11650 dt,
11651 DataType::Timestamp { .. } | DataType::Date
11652 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11653 || name == "DATETIME2" || name == "SMALLDATETIME");
11654
11655 // Check for date conversion with style
11656 if style.is_some() {
11657 let style_num = style.and_then(|s| {
11658 if let Expression::Literal(
11659 crate::expressions::Literal::Number(n),
11660 ) = s
11661 {
11662 n.parse::<u32>().ok()
11663 } else {
11664 None
11665 }
11666 });
11667
11668 // TSQL CONVERT date styles (Java format)
11669 let format_str = style_num.and_then(|n| match n {
11670 101 => Some("MM/dd/yyyy"),
11671 102 => Some("yyyy.MM.dd"),
11672 103 => Some("dd/MM/yyyy"),
11673 104 => Some("dd.MM.yyyy"),
11674 105 => Some("dd-MM-yyyy"),
11675 108 => Some("HH:mm:ss"),
11676 110 => Some("MM-dd-yyyy"),
11677 112 => Some("yyyyMMdd"),
11678 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11679 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11680 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11681 _ => None,
11682 });
11683
11684 // Non-string, non-datetime types with style: just CAST, ignore the style
11685 if !is_string_type && !is_datetime_type {
11686 let cast_expr = if is_try {
11687 Expression::TryCast(Box::new(
11688 crate::expressions::Cast {
11689 this: value_expr,
11690 to: dt,
11691 trailing_comments: Vec::new(),
11692 double_colon_syntax: false,
11693 format: None,
11694 default: None,
11695 inferred_type: None,
11696 },
11697 ))
11698 } else {
11699 Expression::Cast(Box::new(
11700 crate::expressions::Cast {
11701 this: value_expr,
11702 to: dt,
11703 trailing_comments: Vec::new(),
11704 double_colon_syntax: false,
11705 format: None,
11706 default: None,
11707 inferred_type: None,
11708 },
11709 ))
11710 };
11711 return Ok(cast_expr);
11712 }
11713
11714 if let Some(java_fmt) = format_str {
11715 let c_fmt = java_fmt
11716 .replace("yyyy", "%Y")
11717 .replace("MM", "%m")
11718 .replace("dd", "%d")
11719 .replace("HH", "%H")
11720 .replace("mm", "%M")
11721 .replace("ss", "%S")
11722 .replace("SSSSSS", "%f")
11723 .replace("SSS", "%f")
11724 .replace("'T'", "T");
11725
11726 // For datetime target types: style is the INPUT format for parsing strings -> dates
11727 if is_datetime_type {
11728 match target {
11729 DialectType::DuckDB => {
11730 return Ok(Expression::Function(Box::new(
11731 Function::new(
11732 "STRPTIME".to_string(),
11733 vec![
11734 value_expr,
11735 Expression::string(&c_fmt),
11736 ],
11737 ),
11738 )));
11739 }
11740 DialectType::Spark
11741 | DialectType::Databricks => {
11742 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11743 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11744 let func_name =
11745 if matches!(dt, DataType::Date) {
11746 "TO_DATE"
11747 } else {
11748 "TO_TIMESTAMP"
11749 };
11750 return Ok(Expression::Function(Box::new(
11751 Function::new(
11752 func_name.to_string(),
11753 vec![
11754 value_expr,
11755 Expression::string(java_fmt),
11756 ],
11757 ),
11758 )));
11759 }
11760 DialectType::Hive => {
11761 return Ok(Expression::Function(Box::new(
11762 Function::new(
11763 "TO_TIMESTAMP".to_string(),
11764 vec![
11765 value_expr,
11766 Expression::string(java_fmt),
11767 ],
11768 ),
11769 )));
11770 }
11771 _ => {
11772 return Ok(Expression::Cast(Box::new(
11773 crate::expressions::Cast {
11774 this: value_expr,
11775 to: dt,
11776 trailing_comments: Vec::new(),
11777 double_colon_syntax: false,
11778 format: None,
11779 default: None,
11780 inferred_type: None,
11781 },
11782 )));
11783 }
11784 }
11785 }
11786
11787 // For string target types: style is the OUTPUT format for dates -> strings
11788 match target {
11789 DialectType::DuckDB => Ok(Expression::Function(
11790 Box::new(Function::new(
11791 "STRPTIME".to_string(),
11792 vec![
11793 value_expr,
11794 Expression::string(&c_fmt),
11795 ],
11796 )),
11797 )),
11798 DialectType::Spark | DialectType::Databricks => {
11799 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11800 // Determine the target string type
11801 let string_dt = match &dt {
11802 DataType::VarChar {
11803 length: Some(l),
11804 ..
11805 } => DataType::VarChar {
11806 length: Some(*l),
11807 parenthesized_length: false,
11808 },
11809 DataType::Text => DataType::Custom {
11810 name: "STRING".to_string(),
11811 },
11812 _ => DataType::Custom {
11813 name: "STRING".to_string(),
11814 },
11815 };
11816 let date_format_expr = Expression::Function(
11817 Box::new(Function::new(
11818 "DATE_FORMAT".to_string(),
11819 vec![
11820 value_expr,
11821 Expression::string(java_fmt),
11822 ],
11823 )),
11824 );
11825 let cast_expr = if is_try {
11826 Expression::TryCast(Box::new(
11827 crate::expressions::Cast {
11828 this: date_format_expr,
11829 to: string_dt,
11830 trailing_comments: Vec::new(),
11831 double_colon_syntax: false,
11832 format: None,
11833 default: None,
11834 inferred_type: None,
11835 },
11836 ))
11837 } else {
11838 Expression::Cast(Box::new(
11839 crate::expressions::Cast {
11840 this: date_format_expr,
11841 to: string_dt,
11842 trailing_comments: Vec::new(),
11843 double_colon_syntax: false,
11844 format: None,
11845 default: None,
11846 inferred_type: None,
11847 },
11848 ))
11849 };
11850 Ok(cast_expr)
11851 }
11852 DialectType::MySQL | DialectType::SingleStore => {
11853 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11854 let mysql_fmt = java_fmt
11855 .replace("yyyy", "%Y")
11856 .replace("MM", "%m")
11857 .replace("dd", "%d")
11858 .replace("HH:mm:ss.SSSSSS", "%T")
11859 .replace("HH:mm:ss", "%T")
11860 .replace("HH", "%H")
11861 .replace("mm", "%i")
11862 .replace("ss", "%S");
11863 let date_format_expr = Expression::Function(
11864 Box::new(Function::new(
11865 "DATE_FORMAT".to_string(),
11866 vec![
11867 value_expr,
11868 Expression::string(&mysql_fmt),
11869 ],
11870 )),
11871 );
11872 // MySQL uses CHAR for string casts
11873 let mysql_dt = match &dt {
11874 DataType::VarChar { length, .. } => {
11875 DataType::Char { length: *length }
11876 }
11877 _ => dt,
11878 };
11879 Ok(Expression::Cast(Box::new(
11880 crate::expressions::Cast {
11881 this: date_format_expr,
11882 to: mysql_dt,
11883 trailing_comments: Vec::new(),
11884 double_colon_syntax: false,
11885 format: None,
11886 default: None,
11887 inferred_type: None,
11888 },
11889 )))
11890 }
11891 DialectType::Hive => {
11892 let func_name = "TO_TIMESTAMP";
11893 Ok(Expression::Function(Box::new(
11894 Function::new(
11895 func_name.to_string(),
11896 vec![
11897 value_expr,
11898 Expression::string(java_fmt),
11899 ],
11900 ),
11901 )))
11902 }
11903 _ => Ok(Expression::Cast(Box::new(
11904 crate::expressions::Cast {
11905 this: value_expr,
11906 to: dt,
11907 trailing_comments: Vec::new(),
11908 double_colon_syntax: false,
11909 format: None,
11910 default: None,
11911 inferred_type: None,
11912 },
11913 ))),
11914 }
11915 } else {
11916 // Unknown style, just CAST
11917 let cast_expr = if is_try {
11918 Expression::TryCast(Box::new(
11919 crate::expressions::Cast {
11920 this: value_expr,
11921 to: dt,
11922 trailing_comments: Vec::new(),
11923 double_colon_syntax: false,
11924 format: None,
11925 default: None,
11926 inferred_type: None,
11927 },
11928 ))
11929 } else {
11930 Expression::Cast(Box::new(
11931 crate::expressions::Cast {
11932 this: value_expr,
11933 to: dt,
11934 trailing_comments: Vec::new(),
11935 double_colon_syntax: false,
11936 format: None,
11937 default: None,
11938 inferred_type: None,
11939 },
11940 ))
11941 };
11942 Ok(cast_expr)
11943 }
11944 } else {
11945 // No style - simple CAST
11946 let final_dt = if matches!(
11947 target,
11948 DialectType::MySQL | DialectType::SingleStore
11949 ) {
11950 match &dt {
11951 DataType::Int { .. }
11952 | DataType::BigInt { .. }
11953 | DataType::SmallInt { .. }
11954 | DataType::TinyInt { .. } => DataType::Custom {
11955 name: "SIGNED".to_string(),
11956 },
11957 DataType::VarChar { length, .. } => {
11958 DataType::Char { length: *length }
11959 }
11960 _ => dt,
11961 }
11962 } else {
11963 dt
11964 };
11965 let cast_expr = if is_try {
11966 Expression::TryCast(Box::new(
11967 crate::expressions::Cast {
11968 this: value_expr,
11969 to: final_dt,
11970 trailing_comments: Vec::new(),
11971 double_colon_syntax: false,
11972 format: None,
11973 default: None,
11974 inferred_type: None,
11975 },
11976 ))
11977 } else {
11978 Expression::Cast(Box::new(crate::expressions::Cast {
11979 this: value_expr,
11980 to: final_dt,
11981 trailing_comments: Vec::new(),
11982 double_colon_syntax: false,
11983 format: None,
11984 default: None,
11985 inferred_type: None,
11986 }))
11987 };
11988 Ok(cast_expr)
11989 }
11990 } else {
11991 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11992 Ok(Expression::Function(f))
11993 }
11994 }
11995 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11996 "STRFTIME" if f.args.len() == 2 => {
11997 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11998 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11999 // SQLite: args[0] = format, args[1] = value
12000 (f.args[1].clone(), &f.args[0])
12001 } else {
12002 // DuckDB and others: args[0] = value, args[1] = format
12003 (f.args[0].clone(), &f.args[1])
12004 };
12005
12006 // Helper to convert C-style format to Java-style
12007 fn c_to_java_format(fmt: &str) -> String {
12008 fmt.replace("%Y", "yyyy")
12009 .replace("%m", "MM")
12010 .replace("%d", "dd")
12011 .replace("%H", "HH")
12012 .replace("%M", "mm")
12013 .replace("%S", "ss")
12014 .replace("%f", "SSSSSS")
12015 .replace("%y", "yy")
12016 .replace("%-m", "M")
12017 .replace("%-d", "d")
12018 .replace("%-H", "H")
12019 .replace("%-I", "h")
12020 .replace("%I", "hh")
12021 .replace("%p", "a")
12022 .replace("%j", "DDD")
12023 .replace("%a", "EEE")
12024 .replace("%b", "MMM")
12025 .replace("%F", "yyyy-MM-dd")
12026 .replace("%T", "HH:mm:ss")
12027 }
12028
12029 // Helper: recursively convert format strings within expressions (handles CONCAT)
12030 fn convert_fmt_expr(
12031 expr: &Expression,
12032 converter: &dyn Fn(&str) -> String,
12033 ) -> Expression {
12034 match expr {
12035 Expression::Literal(
12036 crate::expressions::Literal::String(s),
12037 ) => Expression::string(&converter(s)),
12038 Expression::Function(func)
12039 if func.name.eq_ignore_ascii_case("CONCAT") =>
12040 {
12041 let new_args: Vec<Expression> = func
12042 .args
12043 .iter()
12044 .map(|a| convert_fmt_expr(a, converter))
12045 .collect();
12046 Expression::Function(Box::new(Function::new(
12047 "CONCAT".to_string(),
12048 new_args,
12049 )))
12050 }
12051 other => other.clone(),
12052 }
12053 }
12054
12055 match target {
12056 DialectType::DuckDB => {
12057 if matches!(source, DialectType::SQLite) {
12058 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
12059 let cast_val = Expression::Cast(Box::new(Cast {
12060 this: val,
12061 to: crate::expressions::DataType::Timestamp {
12062 precision: None,
12063 timezone: false,
12064 },
12065 trailing_comments: Vec::new(),
12066 double_colon_syntax: false,
12067 format: None,
12068 default: None,
12069 inferred_type: None,
12070 }));
12071 Ok(Expression::Function(Box::new(Function::new(
12072 "STRFTIME".to_string(),
12073 vec![cast_val, fmt_expr.clone()],
12074 ))))
12075 } else {
12076 Ok(Expression::Function(f))
12077 }
12078 }
12079 DialectType::Spark
12080 | DialectType::Databricks
12081 | DialectType::Hive => {
12082 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
12083 let converted_fmt =
12084 convert_fmt_expr(fmt_expr, &c_to_java_format);
12085 Ok(Expression::Function(Box::new(Function::new(
12086 "DATE_FORMAT".to_string(),
12087 vec![val, converted_fmt],
12088 ))))
12089 }
12090 DialectType::TSQL | DialectType::Fabric => {
12091 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
12092 let converted_fmt =
12093 convert_fmt_expr(fmt_expr, &c_to_java_format);
12094 Ok(Expression::Function(Box::new(Function::new(
12095 "FORMAT".to_string(),
12096 vec![val, converted_fmt],
12097 ))))
12098 }
12099 DialectType::Presto
12100 | DialectType::Trino
12101 | DialectType::Athena => {
12102 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
12103 if let Expression::Literal(
12104 crate::expressions::Literal::String(s),
12105 ) = fmt_expr
12106 {
12107 let presto_fmt = duckdb_to_presto_format(s);
12108 Ok(Expression::Function(Box::new(Function::new(
12109 "DATE_FORMAT".to_string(),
12110 vec![val, Expression::string(&presto_fmt)],
12111 ))))
12112 } else {
12113 Ok(Expression::Function(Box::new(Function::new(
12114 "DATE_FORMAT".to_string(),
12115 vec![val, fmt_expr.clone()],
12116 ))))
12117 }
12118 }
12119 DialectType::BigQuery => {
12120 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
12121 if let Expression::Literal(
12122 crate::expressions::Literal::String(s),
12123 ) = fmt_expr
12124 {
12125 let bq_fmt = duckdb_to_bigquery_format(s);
12126 Ok(Expression::Function(Box::new(Function::new(
12127 "FORMAT_DATE".to_string(),
12128 vec![Expression::string(&bq_fmt), val],
12129 ))))
12130 } else {
12131 Ok(Expression::Function(Box::new(Function::new(
12132 "FORMAT_DATE".to_string(),
12133 vec![fmt_expr.clone(), val],
12134 ))))
12135 }
12136 }
12137 DialectType::PostgreSQL | DialectType::Redshift => {
12138 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
12139 if let Expression::Literal(
12140 crate::expressions::Literal::String(s),
12141 ) = fmt_expr
12142 {
12143 let pg_fmt = s
12144 .replace("%Y", "YYYY")
12145 .replace("%m", "MM")
12146 .replace("%d", "DD")
12147 .replace("%H", "HH24")
12148 .replace("%M", "MI")
12149 .replace("%S", "SS")
12150 .replace("%y", "YY")
12151 .replace("%-m", "FMMM")
12152 .replace("%-d", "FMDD")
12153 .replace("%-H", "FMHH24")
12154 .replace("%-I", "FMHH12")
12155 .replace("%p", "AM")
12156 .replace("%F", "YYYY-MM-DD")
12157 .replace("%T", "HH24:MI:SS");
12158 Ok(Expression::Function(Box::new(Function::new(
12159 "TO_CHAR".to_string(),
12160 vec![val, Expression::string(&pg_fmt)],
12161 ))))
12162 } else {
12163 Ok(Expression::Function(Box::new(Function::new(
12164 "TO_CHAR".to_string(),
12165 vec![val, fmt_expr.clone()],
12166 ))))
12167 }
12168 }
12169 _ => Ok(Expression::Function(f)),
12170 }
12171 }
12172 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
12173 "STRPTIME" if f.args.len() == 2 => {
12174 let val = f.args[0].clone();
12175 let fmt_expr = &f.args[1];
12176
12177 fn c_to_java_format_parse(fmt: &str) -> String {
12178 fmt.replace("%Y", "yyyy")
12179 .replace("%m", "MM")
12180 .replace("%d", "dd")
12181 .replace("%H", "HH")
12182 .replace("%M", "mm")
12183 .replace("%S", "ss")
12184 .replace("%f", "SSSSSS")
12185 .replace("%y", "yy")
12186 .replace("%-m", "M")
12187 .replace("%-d", "d")
12188 .replace("%-H", "H")
12189 .replace("%-I", "h")
12190 .replace("%I", "hh")
12191 .replace("%p", "a")
12192 .replace("%F", "yyyy-MM-dd")
12193 .replace("%T", "HH:mm:ss")
12194 }
12195
12196 match target {
12197 DialectType::DuckDB => Ok(Expression::Function(f)),
12198 DialectType::Spark | DialectType::Databricks => {
12199 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
12200 if let Expression::Literal(
12201 crate::expressions::Literal::String(s),
12202 ) = fmt_expr
12203 {
12204 let java_fmt = c_to_java_format_parse(s);
12205 Ok(Expression::Function(Box::new(Function::new(
12206 "TO_TIMESTAMP".to_string(),
12207 vec![val, Expression::string(&java_fmt)],
12208 ))))
12209 } else {
12210 Ok(Expression::Function(Box::new(Function::new(
12211 "TO_TIMESTAMP".to_string(),
12212 vec![val, fmt_expr.clone()],
12213 ))))
12214 }
12215 }
12216 DialectType::Hive => {
12217 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
12218 if let Expression::Literal(
12219 crate::expressions::Literal::String(s),
12220 ) = fmt_expr
12221 {
12222 let java_fmt = c_to_java_format_parse(s);
12223 let unix_ts =
12224 Expression::Function(Box::new(Function::new(
12225 "UNIX_TIMESTAMP".to_string(),
12226 vec![val, Expression::string(&java_fmt)],
12227 )));
12228 let from_unix =
12229 Expression::Function(Box::new(Function::new(
12230 "FROM_UNIXTIME".to_string(),
12231 vec![unix_ts],
12232 )));
12233 Ok(Expression::Cast(Box::new(
12234 crate::expressions::Cast {
12235 this: from_unix,
12236 to: DataType::Timestamp {
12237 timezone: false,
12238 precision: None,
12239 },
12240 trailing_comments: Vec::new(),
12241 double_colon_syntax: false,
12242 format: None,
12243 default: None,
12244 inferred_type: None,
12245 },
12246 )))
12247 } else {
12248 Ok(Expression::Function(f))
12249 }
12250 }
12251 DialectType::Presto
12252 | DialectType::Trino
12253 | DialectType::Athena => {
12254 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
12255 if let Expression::Literal(
12256 crate::expressions::Literal::String(s),
12257 ) = fmt_expr
12258 {
12259 let presto_fmt = duckdb_to_presto_format(s);
12260 Ok(Expression::Function(Box::new(Function::new(
12261 "DATE_PARSE".to_string(),
12262 vec![val, Expression::string(&presto_fmt)],
12263 ))))
12264 } else {
12265 Ok(Expression::Function(Box::new(Function::new(
12266 "DATE_PARSE".to_string(),
12267 vec![val, fmt_expr.clone()],
12268 ))))
12269 }
12270 }
12271 DialectType::BigQuery => {
12272 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12273 if let Expression::Literal(
12274 crate::expressions::Literal::String(s),
12275 ) = fmt_expr
12276 {
12277 let bq_fmt = duckdb_to_bigquery_format(s);
12278 Ok(Expression::Function(Box::new(Function::new(
12279 "PARSE_TIMESTAMP".to_string(),
12280 vec![Expression::string(&bq_fmt), val],
12281 ))))
12282 } else {
12283 Ok(Expression::Function(Box::new(Function::new(
12284 "PARSE_TIMESTAMP".to_string(),
12285 vec![fmt_expr.clone(), val],
12286 ))))
12287 }
12288 }
12289 _ => Ok(Expression::Function(f)),
12290 }
12291 }
12292 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12293 "DATE_FORMAT"
12294 if f.args.len() >= 2
12295 && matches!(
12296 source,
12297 DialectType::Presto
12298 | DialectType::Trino
12299 | DialectType::Athena
12300 ) =>
12301 {
12302 let val = f.args[0].clone();
12303 let fmt_expr = &f.args[1];
12304
12305 match target {
12306 DialectType::Presto
12307 | DialectType::Trino
12308 | DialectType::Athena => {
12309 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12310 if let Expression::Literal(
12311 crate::expressions::Literal::String(s),
12312 ) = fmt_expr
12313 {
12314 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12315 Ok(Expression::Function(Box::new(Function::new(
12316 "DATE_FORMAT".to_string(),
12317 vec![val, Expression::string(&normalized)],
12318 ))))
12319 } else {
12320 Ok(Expression::Function(f))
12321 }
12322 }
12323 DialectType::Hive
12324 | DialectType::Spark
12325 | DialectType::Databricks => {
12326 // Convert Presto C-style to Java-style format
12327 if let Expression::Literal(
12328 crate::expressions::Literal::String(s),
12329 ) = fmt_expr
12330 {
12331 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12332 Ok(Expression::Function(Box::new(Function::new(
12333 "DATE_FORMAT".to_string(),
12334 vec![val, Expression::string(&java_fmt)],
12335 ))))
12336 } else {
12337 Ok(Expression::Function(f))
12338 }
12339 }
12340 DialectType::DuckDB => {
12341 // Convert to STRFTIME(val, duckdb_fmt)
12342 if let Expression::Literal(
12343 crate::expressions::Literal::String(s),
12344 ) = fmt_expr
12345 {
12346 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12347 Ok(Expression::Function(Box::new(Function::new(
12348 "STRFTIME".to_string(),
12349 vec![val, Expression::string(&duckdb_fmt)],
12350 ))))
12351 } else {
12352 Ok(Expression::Function(Box::new(Function::new(
12353 "STRFTIME".to_string(),
12354 vec![val, fmt_expr.clone()],
12355 ))))
12356 }
12357 }
12358 DialectType::BigQuery => {
12359 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12360 if let Expression::Literal(
12361 crate::expressions::Literal::String(s),
12362 ) = fmt_expr
12363 {
12364 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12365 Ok(Expression::Function(Box::new(Function::new(
12366 "FORMAT_DATE".to_string(),
12367 vec![Expression::string(&bq_fmt), val],
12368 ))))
12369 } else {
12370 Ok(Expression::Function(Box::new(Function::new(
12371 "FORMAT_DATE".to_string(),
12372 vec![fmt_expr.clone(), val],
12373 ))))
12374 }
12375 }
12376 _ => Ok(Expression::Function(f)),
12377 }
12378 }
12379 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12380 "DATE_PARSE"
12381 if f.args.len() >= 2
12382 && matches!(
12383 source,
12384 DialectType::Presto
12385 | DialectType::Trino
12386 | DialectType::Athena
12387 ) =>
12388 {
12389 let val = f.args[0].clone();
12390 let fmt_expr = &f.args[1];
12391
12392 match target {
12393 DialectType::Presto
12394 | DialectType::Trino
12395 | DialectType::Athena => {
12396 // Presto -> Presto: normalize format
12397 if let Expression::Literal(
12398 crate::expressions::Literal::String(s),
12399 ) = fmt_expr
12400 {
12401 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12402 Ok(Expression::Function(Box::new(Function::new(
12403 "DATE_PARSE".to_string(),
12404 vec![val, Expression::string(&normalized)],
12405 ))))
12406 } else {
12407 Ok(Expression::Function(f))
12408 }
12409 }
12410 DialectType::Hive => {
12411 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12412 if let Expression::Literal(
12413 crate::expressions::Literal::String(s),
12414 ) = fmt_expr
12415 {
12416 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12417 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12418 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12419 this: val,
12420 to: DataType::Timestamp { timezone: false, precision: None },
12421 trailing_comments: Vec::new(),
12422 double_colon_syntax: false,
12423 format: None,
12424 default: None,
12425 inferred_type: None,
12426 })))
12427 } else {
12428 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12429 Ok(Expression::Function(Box::new(Function::new(
12430 "TO_TIMESTAMP".to_string(),
12431 vec![val, Expression::string(&java_fmt)],
12432 ))))
12433 }
12434 } else {
12435 Ok(Expression::Function(f))
12436 }
12437 }
12438 DialectType::Spark | DialectType::Databricks => {
12439 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12440 if let Expression::Literal(
12441 crate::expressions::Literal::String(s),
12442 ) = fmt_expr
12443 {
12444 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12445 Ok(Expression::Function(Box::new(Function::new(
12446 "TO_TIMESTAMP".to_string(),
12447 vec![val, Expression::string(&java_fmt)],
12448 ))))
12449 } else {
12450 Ok(Expression::Function(f))
12451 }
12452 }
12453 DialectType::DuckDB => {
12454 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12455 if let Expression::Literal(
12456 crate::expressions::Literal::String(s),
12457 ) = fmt_expr
12458 {
12459 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12460 Ok(Expression::Function(Box::new(Function::new(
12461 "STRPTIME".to_string(),
12462 vec![val, Expression::string(&duckdb_fmt)],
12463 ))))
12464 } else {
12465 Ok(Expression::Function(Box::new(Function::new(
12466 "STRPTIME".to_string(),
12467 vec![val, fmt_expr.clone()],
12468 ))))
12469 }
12470 }
12471 _ => Ok(Expression::Function(f)),
12472 }
12473 }
12474 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12475 "FROM_BASE64"
12476 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12477 {
12478 Ok(Expression::Function(Box::new(Function::new(
12479 "UNBASE64".to_string(),
12480 f.args,
12481 ))))
12482 }
12483 "TO_BASE64"
12484 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12485 {
12486 Ok(Expression::Function(Box::new(Function::new(
12487 "BASE64".to_string(),
12488 f.args,
12489 ))))
12490 }
12491 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12492 "FROM_UNIXTIME"
12493 if f.args.len() == 1
12494 && matches!(
12495 source,
12496 DialectType::Presto
12497 | DialectType::Trino
12498 | DialectType::Athena
12499 )
12500 && matches!(
12501 target,
12502 DialectType::Spark | DialectType::Databricks
12503 ) =>
12504 {
12505 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12506 let from_unix = Expression::Function(Box::new(Function::new(
12507 "FROM_UNIXTIME".to_string(),
12508 f.args,
12509 )));
12510 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12511 this: from_unix,
12512 to: DataType::Timestamp {
12513 timezone: false,
12514 precision: None,
12515 },
12516 trailing_comments: Vec::new(),
12517 double_colon_syntax: false,
12518 format: None,
12519 default: None,
12520 inferred_type: None,
12521 })))
12522 }
12523 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12524 "DATE_FORMAT"
12525 if f.args.len() >= 2
12526 && !matches!(
12527 target,
12528 DialectType::Hive
12529 | DialectType::Spark
12530 | DialectType::Databricks
12531 | DialectType::MySQL
12532 | DialectType::SingleStore
12533 ) =>
12534 {
12535 let val = f.args[0].clone();
12536 let fmt_expr = &f.args[1];
12537 let is_hive_source = matches!(
12538 source,
12539 DialectType::Hive
12540 | DialectType::Spark
12541 | DialectType::Databricks
12542 );
12543
12544 fn java_to_c_format(fmt: &str) -> String {
12545 // Replace Java patterns with C strftime patterns.
12546 // Uses multi-pass to handle patterns that conflict.
12547 // First pass: replace multi-char patterns (longer first)
12548 let result = fmt
12549 .replace("yyyy", "%Y")
12550 .replace("SSSSSS", "%f")
12551 .replace("EEEE", "%W")
12552 .replace("MM", "%m")
12553 .replace("dd", "%d")
12554 .replace("HH", "%H")
12555 .replace("mm", "%M")
12556 .replace("ss", "%S")
12557 .replace("yy", "%y");
12558 // Second pass: handle single-char timezone patterns
12559 // z -> %Z (timezone name), Z -> %z (timezone offset)
12560 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12561 let mut out = String::new();
12562 let chars: Vec<char> = result.chars().collect();
12563 let mut i = 0;
12564 while i < chars.len() {
12565 if chars[i] == '%' && i + 1 < chars.len() {
12566 // Already a format specifier, skip both chars
12567 out.push(chars[i]);
12568 out.push(chars[i + 1]);
12569 i += 2;
12570 } else if chars[i] == 'z' {
12571 out.push_str("%Z");
12572 i += 1;
12573 } else if chars[i] == 'Z' {
12574 out.push_str("%z");
12575 i += 1;
12576 } else {
12577 out.push(chars[i]);
12578 i += 1;
12579 }
12580 }
12581 out
12582 }
12583
12584 fn java_to_presto_format(fmt: &str) -> String {
12585 // Presto uses %T for HH:MM:SS
12586 let c_fmt = java_to_c_format(fmt);
12587 c_fmt.replace("%H:%M:%S", "%T")
12588 }
12589
12590 fn java_to_bq_format(fmt: &str) -> String {
12591 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12592 let c_fmt = java_to_c_format(fmt);
12593 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12594 }
12595
12596 // For Hive source, CAST string literals to appropriate type
12597 let cast_val = if is_hive_source {
12598 match &val {
12599 Expression::Literal(
12600 crate::expressions::Literal::String(_),
12601 ) => {
12602 match target {
12603 DialectType::DuckDB
12604 | DialectType::Presto
12605 | DialectType::Trino
12606 | DialectType::Athena => {
12607 Self::ensure_cast_timestamp(val.clone())
12608 }
12609 DialectType::BigQuery => {
12610 // BigQuery: CAST(val AS DATETIME)
12611 Expression::Cast(Box::new(
12612 crate::expressions::Cast {
12613 this: val.clone(),
12614 to: DataType::Custom {
12615 name: "DATETIME".to_string(),
12616 },
12617 trailing_comments: vec![],
12618 double_colon_syntax: false,
12619 format: None,
12620 default: None,
12621 inferred_type: None,
12622 },
12623 ))
12624 }
12625 _ => val.clone(),
12626 }
12627 }
12628 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12629 Expression::Cast(c)
12630 if matches!(c.to, DataType::Date)
12631 && matches!(
12632 target,
12633 DialectType::Presto
12634 | DialectType::Trino
12635 | DialectType::Athena
12636 ) =>
12637 {
12638 Expression::Cast(Box::new(crate::expressions::Cast {
12639 this: val.clone(),
12640 to: DataType::Timestamp {
12641 timezone: false,
12642 precision: None,
12643 },
12644 trailing_comments: vec![],
12645 double_colon_syntax: false,
12646 format: None,
12647 default: None,
12648 inferred_type: None,
12649 }))
12650 }
12651 Expression::Literal(crate::expressions::Literal::Date(
12652 _,
12653 )) if matches!(
12654 target,
12655 DialectType::Presto
12656 | DialectType::Trino
12657 | DialectType::Athena
12658 ) =>
12659 {
12660 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12661 let cast_date = Self::date_literal_to_cast(val.clone());
12662 Expression::Cast(Box::new(crate::expressions::Cast {
12663 this: cast_date,
12664 to: DataType::Timestamp {
12665 timezone: false,
12666 precision: None,
12667 },
12668 trailing_comments: vec![],
12669 double_colon_syntax: false,
12670 format: None,
12671 default: None,
12672 inferred_type: None,
12673 }))
12674 }
12675 _ => val.clone(),
12676 }
12677 } else {
12678 val.clone()
12679 };
12680
12681 match target {
12682 DialectType::DuckDB => {
12683 if let Expression::Literal(
12684 crate::expressions::Literal::String(s),
12685 ) = fmt_expr
12686 {
12687 let c_fmt = if is_hive_source {
12688 java_to_c_format(s)
12689 } else {
12690 s.clone()
12691 };
12692 Ok(Expression::Function(Box::new(Function::new(
12693 "STRFTIME".to_string(),
12694 vec![cast_val, Expression::string(&c_fmt)],
12695 ))))
12696 } else {
12697 Ok(Expression::Function(Box::new(Function::new(
12698 "STRFTIME".to_string(),
12699 vec![cast_val, fmt_expr.clone()],
12700 ))))
12701 }
12702 }
12703 DialectType::Presto
12704 | DialectType::Trino
12705 | DialectType::Athena => {
12706 if is_hive_source {
12707 if let Expression::Literal(
12708 crate::expressions::Literal::String(s),
12709 ) = fmt_expr
12710 {
12711 let p_fmt = java_to_presto_format(s);
12712 Ok(Expression::Function(Box::new(Function::new(
12713 "DATE_FORMAT".to_string(),
12714 vec![cast_val, Expression::string(&p_fmt)],
12715 ))))
12716 } else {
12717 Ok(Expression::Function(Box::new(Function::new(
12718 "DATE_FORMAT".to_string(),
12719 vec![cast_val, fmt_expr.clone()],
12720 ))))
12721 }
12722 } else {
12723 Ok(Expression::Function(Box::new(Function::new(
12724 "DATE_FORMAT".to_string(),
12725 f.args,
12726 ))))
12727 }
12728 }
12729 DialectType::BigQuery => {
12730 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12731 if let Expression::Literal(
12732 crate::expressions::Literal::String(s),
12733 ) = fmt_expr
12734 {
12735 let bq_fmt = if is_hive_source {
12736 java_to_bq_format(s)
12737 } else {
12738 java_to_c_format(s)
12739 };
12740 Ok(Expression::Function(Box::new(Function::new(
12741 "FORMAT_DATE".to_string(),
12742 vec![Expression::string(&bq_fmt), cast_val],
12743 ))))
12744 } else {
12745 Ok(Expression::Function(Box::new(Function::new(
12746 "FORMAT_DATE".to_string(),
12747 vec![fmt_expr.clone(), cast_val],
12748 ))))
12749 }
12750 }
12751 DialectType::PostgreSQL | DialectType::Redshift => {
12752 if let Expression::Literal(
12753 crate::expressions::Literal::String(s),
12754 ) = fmt_expr
12755 {
12756 let pg_fmt = s
12757 .replace("yyyy", "YYYY")
12758 .replace("MM", "MM")
12759 .replace("dd", "DD")
12760 .replace("HH", "HH24")
12761 .replace("mm", "MI")
12762 .replace("ss", "SS")
12763 .replace("yy", "YY");
12764 Ok(Expression::Function(Box::new(Function::new(
12765 "TO_CHAR".to_string(),
12766 vec![val, Expression::string(&pg_fmt)],
12767 ))))
12768 } else {
12769 Ok(Expression::Function(Box::new(Function::new(
12770 "TO_CHAR".to_string(),
12771 vec![val, fmt_expr.clone()],
12772 ))))
12773 }
12774 }
12775 _ => Ok(Expression::Function(f)),
12776 }
12777 }
12778 // DATEDIFF(unit, start, end) - 3-arg form
12779 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12780 "DATEDIFF" if f.args.len() == 3 => {
12781 let mut args = f.args;
12782 // SQLite source: args = (date1, date2, unit_string)
12783 // Standard source: args = (unit, start, end)
12784 let (_arg0, arg1, arg2, unit_str) =
12785 if matches!(source, DialectType::SQLite) {
12786 let date1 = args.remove(0);
12787 let date2 = args.remove(0);
12788 let unit_expr = args.remove(0);
12789 let unit_s = Self::get_unit_str_static(&unit_expr);
12790
12791 // For SQLite target, generate JULIANDAY arithmetic directly
12792 if matches!(target, DialectType::SQLite) {
12793 let jd_first = Expression::Function(Box::new(
12794 Function::new("JULIANDAY".to_string(), vec![date1]),
12795 ));
12796 let jd_second = Expression::Function(Box::new(
12797 Function::new("JULIANDAY".to_string(), vec![date2]),
12798 ));
12799 let diff = Expression::Sub(Box::new(
12800 crate::expressions::BinaryOp::new(
12801 jd_first, jd_second,
12802 ),
12803 ));
12804 let paren_diff = Expression::Paren(Box::new(
12805 crate::expressions::Paren {
12806 this: diff,
12807 trailing_comments: Vec::new(),
12808 },
12809 ));
12810 let adjusted = match unit_s.as_str() {
12811 "HOUR" => Expression::Mul(Box::new(
12812 crate::expressions::BinaryOp::new(
12813 paren_diff,
12814 Expression::Literal(Literal::Number(
12815 "24.0".to_string(),
12816 )),
12817 ),
12818 )),
12819 "MINUTE" => Expression::Mul(Box::new(
12820 crate::expressions::BinaryOp::new(
12821 paren_diff,
12822 Expression::Literal(Literal::Number(
12823 "1440.0".to_string(),
12824 )),
12825 ),
12826 )),
12827 "SECOND" => Expression::Mul(Box::new(
12828 crate::expressions::BinaryOp::new(
12829 paren_diff,
12830 Expression::Literal(Literal::Number(
12831 "86400.0".to_string(),
12832 )),
12833 ),
12834 )),
12835 "MONTH" => Expression::Div(Box::new(
12836 crate::expressions::BinaryOp::new(
12837 paren_diff,
12838 Expression::Literal(Literal::Number(
12839 "30.0".to_string(),
12840 )),
12841 ),
12842 )),
12843 "YEAR" => Expression::Div(Box::new(
12844 crate::expressions::BinaryOp::new(
12845 paren_diff,
12846 Expression::Literal(Literal::Number(
12847 "365.0".to_string(),
12848 )),
12849 ),
12850 )),
12851 _ => paren_diff,
12852 };
12853 return Ok(Expression::Cast(Box::new(Cast {
12854 this: adjusted,
12855 to: DataType::Int {
12856 length: None,
12857 integer_spelling: true,
12858 },
12859 trailing_comments: vec![],
12860 double_colon_syntax: false,
12861 format: None,
12862 default: None,
12863 inferred_type: None,
12864 })));
12865 }
12866
12867 // For other targets, remap to standard (unit, start, end) form
12868 let unit_ident =
12869 Expression::Identifier(Identifier::new(&unit_s));
12870 (unit_ident, date1, date2, unit_s)
12871 } else {
12872 let arg0 = args.remove(0);
12873 let arg1 = args.remove(0);
12874 let arg2 = args.remove(0);
12875 let unit_s = Self::get_unit_str_static(&arg0);
12876 (arg0, arg1, arg2, unit_s)
12877 };
12878
12879 // For Hive/Spark source, string literal dates need to be cast
12880 // Note: Databricks is excluded - it handles string args like standard SQL
12881 let is_hive_spark =
12882 matches!(source, DialectType::Hive | DialectType::Spark);
12883
12884 match target {
12885 DialectType::Snowflake => {
12886 let unit =
12887 Expression::Identifier(Identifier::new(&unit_str));
12888 // Use ensure_to_date_preserved to add TO_DATE with a marker
12889 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12890 let d1 = if is_hive_spark {
12891 Self::ensure_to_date_preserved(arg1)
12892 } else {
12893 arg1
12894 };
12895 let d2 = if is_hive_spark {
12896 Self::ensure_to_date_preserved(arg2)
12897 } else {
12898 arg2
12899 };
12900 Ok(Expression::Function(Box::new(Function::new(
12901 "DATEDIFF".to_string(),
12902 vec![unit, d1, d2],
12903 ))))
12904 }
12905 DialectType::Redshift => {
12906 let unit =
12907 Expression::Identifier(Identifier::new(&unit_str));
12908 let d1 = if is_hive_spark {
12909 Self::ensure_cast_date(arg1)
12910 } else {
12911 arg1
12912 };
12913 let d2 = if is_hive_spark {
12914 Self::ensure_cast_date(arg2)
12915 } else {
12916 arg2
12917 };
12918 Ok(Expression::Function(Box::new(Function::new(
12919 "DATEDIFF".to_string(),
12920 vec![unit, d1, d2],
12921 ))))
12922 }
12923 DialectType::TSQL => {
12924 let unit =
12925 Expression::Identifier(Identifier::new(&unit_str));
12926 Ok(Expression::Function(Box::new(Function::new(
12927 "DATEDIFF".to_string(),
12928 vec![unit, arg1, arg2],
12929 ))))
12930 }
12931 DialectType::DuckDB => {
12932 let is_redshift_tsql = matches!(
12933 source,
12934 DialectType::Redshift | DialectType::TSQL
12935 );
12936 if is_hive_spark {
12937 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12938 let d1 = Self::ensure_cast_date(arg1);
12939 let d2 = Self::ensure_cast_date(arg2);
12940 Ok(Expression::Function(Box::new(Function::new(
12941 "DATE_DIFF".to_string(),
12942 vec![Expression::string(&unit_str), d1, d2],
12943 ))))
12944 } else if matches!(source, DialectType::Snowflake) {
12945 // For Snowflake source: special handling per unit
12946 match unit_str.as_str() {
12947 "NANOSECOND" => {
12948 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12949 fn cast_to_timestamp_ns(
12950 expr: Expression,
12951 ) -> Expression
12952 {
12953 Expression::Cast(Box::new(Cast {
12954 this: expr,
12955 to: DataType::Custom {
12956 name: "TIMESTAMP_NS".to_string(),
12957 },
12958 trailing_comments: vec![],
12959 double_colon_syntax: false,
12960 format: None,
12961 default: None,
12962 inferred_type: None,
12963 }))
12964 }
12965 let epoch_end = Expression::Function(Box::new(
12966 Function::new(
12967 "EPOCH_NS".to_string(),
12968 vec![cast_to_timestamp_ns(arg2)],
12969 ),
12970 ));
12971 let epoch_start = Expression::Function(
12972 Box::new(Function::new(
12973 "EPOCH_NS".to_string(),
12974 vec![cast_to_timestamp_ns(arg1)],
12975 )),
12976 );
12977 Ok(Expression::Sub(Box::new(BinaryOp::new(
12978 epoch_end,
12979 epoch_start,
12980 ))))
12981 }
12982 "WEEK" => {
12983 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12984 let d1 = Self::force_cast_date(arg1);
12985 let d2 = Self::force_cast_date(arg2);
12986 let dt1 = Expression::Function(Box::new(
12987 Function::new(
12988 "DATE_TRUNC".to_string(),
12989 vec![Expression::string("WEEK"), d1],
12990 ),
12991 ));
12992 let dt2 = Expression::Function(Box::new(
12993 Function::new(
12994 "DATE_TRUNC".to_string(),
12995 vec![Expression::string("WEEK"), d2],
12996 ),
12997 ));
12998 Ok(Expression::Function(Box::new(
12999 Function::new(
13000 "DATE_DIFF".to_string(),
13001 vec![
13002 Expression::string(&unit_str),
13003 dt1,
13004 dt2,
13005 ],
13006 ),
13007 )))
13008 }
13009 _ => {
13010 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
13011 let d1 = Self::force_cast_date(arg1);
13012 let d2 = Self::force_cast_date(arg2);
13013 Ok(Expression::Function(Box::new(
13014 Function::new(
13015 "DATE_DIFF".to_string(),
13016 vec![
13017 Expression::string(&unit_str),
13018 d1,
13019 d2,
13020 ],
13021 ),
13022 )))
13023 }
13024 }
13025 } else if is_redshift_tsql {
13026 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
13027 let d1 = Self::force_cast_timestamp(arg1);
13028 let d2 = Self::force_cast_timestamp(arg2);
13029 Ok(Expression::Function(Box::new(Function::new(
13030 "DATE_DIFF".to_string(),
13031 vec![Expression::string(&unit_str), d1, d2],
13032 ))))
13033 } else {
13034 // Keep as DATEDIFF so DuckDB's transform_datediff handles
13035 // DATE_TRUNC for WEEK, CAST for string literals, etc.
13036 let unit =
13037 Expression::Identifier(Identifier::new(&unit_str));
13038 Ok(Expression::Function(Box::new(Function::new(
13039 "DATEDIFF".to_string(),
13040 vec![unit, arg1, arg2],
13041 ))))
13042 }
13043 }
13044 DialectType::BigQuery => {
13045 let is_redshift_tsql = matches!(
13046 source,
13047 DialectType::Redshift
13048 | DialectType::TSQL
13049 | DialectType::Snowflake
13050 );
13051 let cast_d1 = if is_hive_spark {
13052 Self::ensure_cast_date(arg1)
13053 } else if is_redshift_tsql {
13054 Self::force_cast_datetime(arg1)
13055 } else {
13056 Self::ensure_cast_datetime(arg1)
13057 };
13058 let cast_d2 = if is_hive_spark {
13059 Self::ensure_cast_date(arg2)
13060 } else if is_redshift_tsql {
13061 Self::force_cast_datetime(arg2)
13062 } else {
13063 Self::ensure_cast_datetime(arg2)
13064 };
13065 let unit =
13066 Expression::Identifier(Identifier::new(&unit_str));
13067 Ok(Expression::Function(Box::new(Function::new(
13068 "DATE_DIFF".to_string(),
13069 vec![cast_d2, cast_d1, unit],
13070 ))))
13071 }
13072 DialectType::Presto
13073 | DialectType::Trino
13074 | DialectType::Athena => {
13075 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
13076 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
13077 let is_redshift_tsql = matches!(
13078 source,
13079 DialectType::Redshift
13080 | DialectType::TSQL
13081 | DialectType::Snowflake
13082 );
13083 let d1 = if is_hive_spark {
13084 Self::double_cast_timestamp_date(arg1)
13085 } else if is_redshift_tsql {
13086 Self::force_cast_timestamp(arg1)
13087 } else {
13088 arg1
13089 };
13090 let d2 = if is_hive_spark {
13091 Self::double_cast_timestamp_date(arg2)
13092 } else if is_redshift_tsql {
13093 Self::force_cast_timestamp(arg2)
13094 } else {
13095 arg2
13096 };
13097 Ok(Expression::Function(Box::new(Function::new(
13098 "DATE_DIFF".to_string(),
13099 vec![Expression::string(&unit_str), d1, d2],
13100 ))))
13101 }
13102 DialectType::Hive => match unit_str.as_str() {
13103 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
13104 this: Expression::Function(Box::new(Function::new(
13105 "MONTHS_BETWEEN".to_string(),
13106 vec![arg2, arg1],
13107 ))),
13108 to: DataType::Int {
13109 length: None,
13110 integer_spelling: false,
13111 },
13112 trailing_comments: vec![],
13113 double_colon_syntax: false,
13114 format: None,
13115 default: None,
13116 inferred_type: None,
13117 }))),
13118 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
13119 this: Expression::Div(Box::new(
13120 crate::expressions::BinaryOp::new(
13121 Expression::Function(Box::new(Function::new(
13122 "DATEDIFF".to_string(),
13123 vec![arg2, arg1],
13124 ))),
13125 Expression::number(7),
13126 ),
13127 )),
13128 to: DataType::Int {
13129 length: None,
13130 integer_spelling: false,
13131 },
13132 trailing_comments: vec![],
13133 double_colon_syntax: false,
13134 format: None,
13135 default: None,
13136 inferred_type: None,
13137 }))),
13138 _ => Ok(Expression::Function(Box::new(Function::new(
13139 "DATEDIFF".to_string(),
13140 vec![arg2, arg1],
13141 )))),
13142 },
13143 DialectType::Spark | DialectType::Databricks => {
13144 let unit =
13145 Expression::Identifier(Identifier::new(&unit_str));
13146 Ok(Expression::Function(Box::new(Function::new(
13147 "DATEDIFF".to_string(),
13148 vec![unit, arg1, arg2],
13149 ))))
13150 }
13151 _ => {
13152 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
13153 let d1 = if is_hive_spark {
13154 Self::ensure_cast_date(arg1)
13155 } else {
13156 arg1
13157 };
13158 let d2 = if is_hive_spark {
13159 Self::ensure_cast_date(arg2)
13160 } else {
13161 arg2
13162 };
13163 let unit =
13164 Expression::Identifier(Identifier::new(&unit_str));
13165 Ok(Expression::Function(Box::new(Function::new(
13166 "DATEDIFF".to_string(),
13167 vec![unit, d1, d2],
13168 ))))
13169 }
13170 }
13171 }
13172 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
13173 "DATEDIFF" if f.args.len() == 2 => {
13174 let mut args = f.args;
13175 let arg0 = args.remove(0);
13176 let arg1 = args.remove(0);
13177
13178 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
13179 // Also recognizes TryCast/Cast to DATE that may have been produced by
13180 // cross-dialect TO_DATE -> TRY_CAST conversion
13181 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
13182 if let Expression::Function(ref f) = e {
13183 if f.name.eq_ignore_ascii_case("TO_DATE")
13184 && f.args.len() == 1
13185 {
13186 return (f.args[0].clone(), true);
13187 }
13188 }
13189 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
13190 if let Expression::TryCast(ref c) = e {
13191 if matches!(c.to, DataType::Date) {
13192 return (e, true); // Already properly cast, return as-is
13193 }
13194 }
13195 (e, false)
13196 };
13197
13198 match target {
13199 DialectType::DuckDB => {
13200 // For Hive source, always CAST to DATE
13201 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
13202 let cast_d0 = if matches!(
13203 source,
13204 DialectType::Hive
13205 | DialectType::Spark
13206 | DialectType::Databricks
13207 ) {
13208 let (inner, was_to_date) = unwrap_to_date(arg1);
13209 if was_to_date {
13210 // Already a date expression, use directly
13211 if matches!(&inner, Expression::TryCast(_)) {
13212 inner // Already TRY_CAST(x AS DATE)
13213 } else {
13214 Self::try_cast_date(inner)
13215 }
13216 } else {
13217 Self::force_cast_date(inner)
13218 }
13219 } else {
13220 Self::ensure_cast_date(arg1)
13221 };
13222 let cast_d1 = if matches!(
13223 source,
13224 DialectType::Hive
13225 | DialectType::Spark
13226 | DialectType::Databricks
13227 ) {
13228 let (inner, was_to_date) = unwrap_to_date(arg0);
13229 if was_to_date {
13230 if matches!(&inner, Expression::TryCast(_)) {
13231 inner
13232 } else {
13233 Self::try_cast_date(inner)
13234 }
13235 } else {
13236 Self::force_cast_date(inner)
13237 }
13238 } else {
13239 Self::ensure_cast_date(arg0)
13240 };
13241 Ok(Expression::Function(Box::new(Function::new(
13242 "DATE_DIFF".to_string(),
13243 vec![Expression::string("DAY"), cast_d0, cast_d1],
13244 ))))
13245 }
13246 DialectType::Presto
13247 | DialectType::Trino
13248 | DialectType::Athena => {
13249 // For Hive/Spark source, apply double_cast_timestamp_date
13250 // For other sources (MySQL etc.), just swap args without casting
13251 if matches!(
13252 source,
13253 DialectType::Hive
13254 | DialectType::Spark
13255 | DialectType::Databricks
13256 ) {
13257 let cast_fn = |e: Expression| -> Expression {
13258 let (inner, was_to_date) = unwrap_to_date(e);
13259 if was_to_date {
13260 let first_cast =
13261 Self::double_cast_timestamp_date(inner);
13262 Self::double_cast_timestamp_date(first_cast)
13263 } else {
13264 Self::double_cast_timestamp_date(inner)
13265 }
13266 };
13267 Ok(Expression::Function(Box::new(Function::new(
13268 "DATE_DIFF".to_string(),
13269 vec![
13270 Expression::string("DAY"),
13271 cast_fn(arg1),
13272 cast_fn(arg0),
13273 ],
13274 ))))
13275 } else {
13276 Ok(Expression::Function(Box::new(Function::new(
13277 "DATE_DIFF".to_string(),
13278 vec![Expression::string("DAY"), arg1, arg0],
13279 ))))
13280 }
13281 }
13282 DialectType::Redshift => {
13283 let unit = Expression::Identifier(Identifier::new("DAY"));
13284 Ok(Expression::Function(Box::new(Function::new(
13285 "DATEDIFF".to_string(),
13286 vec![unit, arg1, arg0],
13287 ))))
13288 }
13289 _ => Ok(Expression::Function(Box::new(Function::new(
13290 "DATEDIFF".to_string(),
13291 vec![arg0, arg1],
13292 )))),
13293 }
13294 }
13295 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13296 "DATE_DIFF" if f.args.len() == 3 => {
13297 let mut args = f.args;
13298 let arg0 = args.remove(0);
13299 let arg1 = args.remove(0);
13300 let arg2 = args.remove(0);
13301 let unit_str = Self::get_unit_str_static(&arg0);
13302
13303 match target {
13304 DialectType::DuckDB => {
13305 // DuckDB: DATE_DIFF('UNIT', start, end)
13306 Ok(Expression::Function(Box::new(Function::new(
13307 "DATE_DIFF".to_string(),
13308 vec![Expression::string(&unit_str), arg1, arg2],
13309 ))))
13310 }
13311 DialectType::Presto
13312 | DialectType::Trino
13313 | DialectType::Athena => {
13314 Ok(Expression::Function(Box::new(Function::new(
13315 "DATE_DIFF".to_string(),
13316 vec![Expression::string(&unit_str), arg1, arg2],
13317 ))))
13318 }
13319 DialectType::ClickHouse => {
13320 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13321 let unit =
13322 Expression::Identifier(Identifier::new(&unit_str));
13323 Ok(Expression::Function(Box::new(Function::new(
13324 "DATE_DIFF".to_string(),
13325 vec![unit, arg1, arg2],
13326 ))))
13327 }
13328 DialectType::Snowflake | DialectType::Redshift => {
13329 let unit =
13330 Expression::Identifier(Identifier::new(&unit_str));
13331 Ok(Expression::Function(Box::new(Function::new(
13332 "DATEDIFF".to_string(),
13333 vec![unit, arg1, arg2],
13334 ))))
13335 }
13336 _ => {
13337 let unit =
13338 Expression::Identifier(Identifier::new(&unit_str));
13339 Ok(Expression::Function(Box::new(Function::new(
13340 "DATEDIFF".to_string(),
13341 vec![unit, arg1, arg2],
13342 ))))
13343 }
13344 }
13345 }
13346 // DATEADD(unit, val, date) - 3-arg form
13347 "DATEADD" if f.args.len() == 3 => {
13348 let mut args = f.args;
13349 let arg0 = args.remove(0);
13350 let arg1 = args.remove(0);
13351 let arg2 = args.remove(0);
13352 let unit_str = Self::get_unit_str_static(&arg0);
13353
13354 // Normalize TSQL unit abbreviations to standard names
13355 let unit_str = match unit_str.as_str() {
13356 "YY" | "YYYY" => "YEAR".to_string(),
13357 "QQ" | "Q" => "QUARTER".to_string(),
13358 "MM" | "M" => "MONTH".to_string(),
13359 "WK" | "WW" => "WEEK".to_string(),
13360 "DD" | "D" | "DY" => "DAY".to_string(),
13361 "HH" => "HOUR".to_string(),
13362 "MI" | "N" => "MINUTE".to_string(),
13363 "SS" | "S" => "SECOND".to_string(),
13364 "MS" => "MILLISECOND".to_string(),
13365 "MCS" | "US" => "MICROSECOND".to_string(),
13366 _ => unit_str,
13367 };
13368 match target {
13369 DialectType::Snowflake => {
13370 let unit =
13371 Expression::Identifier(Identifier::new(&unit_str));
13372 // Cast string literal to TIMESTAMP, but not for Snowflake source
13373 // (Snowflake natively accepts string literals in DATEADD)
13374 let arg2 = if matches!(
13375 &arg2,
13376 Expression::Literal(Literal::String(_))
13377 ) && !matches!(source, DialectType::Snowflake)
13378 {
13379 Expression::Cast(Box::new(Cast {
13380 this: arg2,
13381 to: DataType::Timestamp {
13382 precision: None,
13383 timezone: false,
13384 },
13385 trailing_comments: Vec::new(),
13386 double_colon_syntax: false,
13387 format: None,
13388 default: None,
13389 inferred_type: None,
13390 }))
13391 } else {
13392 arg2
13393 };
13394 Ok(Expression::Function(Box::new(Function::new(
13395 "DATEADD".to_string(),
13396 vec![unit, arg1, arg2],
13397 ))))
13398 }
13399 DialectType::TSQL => {
13400 let unit =
13401 Expression::Identifier(Identifier::new(&unit_str));
13402 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13403 let arg2 = if matches!(
13404 &arg2,
13405 Expression::Literal(Literal::String(_))
13406 ) && !matches!(
13407 source,
13408 DialectType::Spark
13409 | DialectType::Databricks
13410 | DialectType::Hive
13411 ) {
13412 Expression::Cast(Box::new(Cast {
13413 this: arg2,
13414 to: DataType::Custom {
13415 name: "DATETIME2".to_string(),
13416 },
13417 trailing_comments: Vec::new(),
13418 double_colon_syntax: false,
13419 format: None,
13420 default: None,
13421 inferred_type: None,
13422 }))
13423 } else {
13424 arg2
13425 };
13426 Ok(Expression::Function(Box::new(Function::new(
13427 "DATEADD".to_string(),
13428 vec![unit, arg1, arg2],
13429 ))))
13430 }
13431 DialectType::Redshift => {
13432 let unit =
13433 Expression::Identifier(Identifier::new(&unit_str));
13434 Ok(Expression::Function(Box::new(Function::new(
13435 "DATEADD".to_string(),
13436 vec![unit, arg1, arg2],
13437 ))))
13438 }
13439 DialectType::Databricks => {
13440 let unit =
13441 Expression::Identifier(Identifier::new(&unit_str));
13442 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13443 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13444 let func_name = if matches!(
13445 source,
13446 DialectType::TSQL
13447 | DialectType::Fabric
13448 | DialectType::Databricks
13449 | DialectType::Snowflake
13450 ) {
13451 "DATEADD"
13452 } else {
13453 "DATE_ADD"
13454 };
13455 Ok(Expression::Function(Box::new(Function::new(
13456 func_name.to_string(),
13457 vec![unit, arg1, arg2],
13458 ))))
13459 }
13460 DialectType::DuckDB => {
13461 // Special handling for NANOSECOND from Snowflake
13462 if unit_str == "NANOSECOND"
13463 && matches!(source, DialectType::Snowflake)
13464 {
13465 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13466 let cast_ts = Expression::Cast(Box::new(Cast {
13467 this: arg2,
13468 to: DataType::Custom {
13469 name: "TIMESTAMP_NS".to_string(),
13470 },
13471 trailing_comments: vec![],
13472 double_colon_syntax: false,
13473 format: None,
13474 default: None,
13475 inferred_type: None,
13476 }));
13477 let epoch_ns =
13478 Expression::Function(Box::new(Function::new(
13479 "EPOCH_NS".to_string(),
13480 vec![cast_ts],
13481 )));
13482 let sum = Expression::Add(Box::new(BinaryOp::new(
13483 epoch_ns, arg1,
13484 )));
13485 Ok(Expression::Function(Box::new(Function::new(
13486 "MAKE_TIMESTAMP_NS".to_string(),
13487 vec![sum],
13488 ))))
13489 } else {
13490 // DuckDB: convert to date + INTERVAL syntax with CAST
13491 let iu = Self::parse_interval_unit_static(&unit_str);
13492 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13493 this: Some(arg1),
13494 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13495 }));
13496 // Cast string literal to TIMESTAMP
13497 let arg2 = if matches!(
13498 &arg2,
13499 Expression::Literal(Literal::String(_))
13500 ) {
13501 Expression::Cast(Box::new(Cast {
13502 this: arg2,
13503 to: DataType::Timestamp {
13504 precision: None,
13505 timezone: false,
13506 },
13507 trailing_comments: Vec::new(),
13508 double_colon_syntax: false,
13509 format: None,
13510 default: None,
13511 inferred_type: None,
13512 }))
13513 } else {
13514 arg2
13515 };
13516 Ok(Expression::Add(Box::new(
13517 crate::expressions::BinaryOp::new(arg2, interval),
13518 )))
13519 }
13520 }
13521 DialectType::Spark => {
13522 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13523 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13524 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13525 {
13526 fn multiply_expr_spark(
13527 expr: Expression,
13528 factor: i64,
13529 ) -> Expression
13530 {
13531 if let Expression::Literal(
13532 crate::expressions::Literal::Number(n),
13533 ) = &expr
13534 {
13535 if let Ok(val) = n.parse::<i64>() {
13536 return Expression::Literal(
13537 crate::expressions::Literal::Number(
13538 (val * factor).to_string(),
13539 ),
13540 );
13541 }
13542 }
13543 Expression::Mul(Box::new(
13544 crate::expressions::BinaryOp::new(
13545 expr,
13546 Expression::Literal(
13547 crate::expressions::Literal::Number(
13548 factor.to_string(),
13549 ),
13550 ),
13551 ),
13552 ))
13553 }
13554 let normalized_unit = match unit_str.as_str() {
13555 "YEAR" | "YY" | "YYYY" => "YEAR",
13556 "QUARTER" | "QQ" | "Q" => "QUARTER",
13557 "MONTH" | "MM" | "M" => "MONTH",
13558 "WEEK" | "WK" | "WW" => "WEEK",
13559 "DAY" | "DD" | "D" | "DY" => "DAY",
13560 _ => &unit_str,
13561 };
13562 match normalized_unit {
13563 "YEAR" => {
13564 let months = multiply_expr_spark(arg1, 12);
13565 Ok(Expression::Function(Box::new(
13566 Function::new(
13567 "ADD_MONTHS".to_string(),
13568 vec![arg2, months],
13569 ),
13570 )))
13571 }
13572 "QUARTER" => {
13573 let months = multiply_expr_spark(arg1, 3);
13574 Ok(Expression::Function(Box::new(
13575 Function::new(
13576 "ADD_MONTHS".to_string(),
13577 vec![arg2, months],
13578 ),
13579 )))
13580 }
13581 "MONTH" => Ok(Expression::Function(Box::new(
13582 Function::new(
13583 "ADD_MONTHS".to_string(),
13584 vec![arg2, arg1],
13585 ),
13586 ))),
13587 "WEEK" => {
13588 let days = multiply_expr_spark(arg1, 7);
13589 Ok(Expression::Function(Box::new(
13590 Function::new(
13591 "DATE_ADD".to_string(),
13592 vec![arg2, days],
13593 ),
13594 )))
13595 }
13596 "DAY" => Ok(Expression::Function(Box::new(
13597 Function::new(
13598 "DATE_ADD".to_string(),
13599 vec![arg2, arg1],
13600 ),
13601 ))),
13602 _ => {
13603 let unit = Expression::Identifier(
13604 Identifier::new(&unit_str),
13605 );
13606 Ok(Expression::Function(Box::new(
13607 Function::new(
13608 "DATE_ADD".to_string(),
13609 vec![unit, arg1, arg2],
13610 ),
13611 )))
13612 }
13613 }
13614 } else {
13615 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13616 let unit =
13617 Expression::Identifier(Identifier::new(&unit_str));
13618 Ok(Expression::Function(Box::new(Function::new(
13619 "DATE_ADD".to_string(),
13620 vec![unit, arg1, arg2],
13621 ))))
13622 }
13623 }
13624 DialectType::Hive => match unit_str.as_str() {
13625 "MONTH" => {
13626 Ok(Expression::Function(Box::new(Function::new(
13627 "ADD_MONTHS".to_string(),
13628 vec![arg2, arg1],
13629 ))))
13630 }
13631 _ => Ok(Expression::Function(Box::new(Function::new(
13632 "DATE_ADD".to_string(),
13633 vec![arg2, arg1],
13634 )))),
13635 },
13636 DialectType::Presto
13637 | DialectType::Trino
13638 | DialectType::Athena => {
13639 // Cast string literal date to TIMESTAMP
13640 let arg2 = if matches!(
13641 &arg2,
13642 Expression::Literal(Literal::String(_))
13643 ) {
13644 Expression::Cast(Box::new(Cast {
13645 this: arg2,
13646 to: DataType::Timestamp {
13647 precision: None,
13648 timezone: false,
13649 },
13650 trailing_comments: Vec::new(),
13651 double_colon_syntax: false,
13652 format: None,
13653 default: None,
13654 inferred_type: None,
13655 }))
13656 } else {
13657 arg2
13658 };
13659 Ok(Expression::Function(Box::new(Function::new(
13660 "DATE_ADD".to_string(),
13661 vec![Expression::string(&unit_str), arg1, arg2],
13662 ))))
13663 }
13664 DialectType::MySQL => {
13665 let iu = Self::parse_interval_unit_static(&unit_str);
13666 Ok(Expression::DateAdd(Box::new(
13667 crate::expressions::DateAddFunc {
13668 this: arg2,
13669 interval: arg1,
13670 unit: iu,
13671 },
13672 )))
13673 }
13674 DialectType::PostgreSQL => {
13675 // Cast string literal date to TIMESTAMP
13676 let arg2 = if matches!(
13677 &arg2,
13678 Expression::Literal(Literal::String(_))
13679 ) {
13680 Expression::Cast(Box::new(Cast {
13681 this: arg2,
13682 to: DataType::Timestamp {
13683 precision: None,
13684 timezone: false,
13685 },
13686 trailing_comments: Vec::new(),
13687 double_colon_syntax: false,
13688 format: None,
13689 default: None,
13690 inferred_type: None,
13691 }))
13692 } else {
13693 arg2
13694 };
13695 let interval = Expression::Interval(Box::new(
13696 crate::expressions::Interval {
13697 this: Some(Expression::string(&format!(
13698 "{} {}",
13699 Self::expr_to_string_static(&arg1),
13700 unit_str
13701 ))),
13702 unit: None,
13703 },
13704 ));
13705 Ok(Expression::Add(Box::new(
13706 crate::expressions::BinaryOp::new(arg2, interval),
13707 )))
13708 }
13709 DialectType::BigQuery => {
13710 let iu = Self::parse_interval_unit_static(&unit_str);
13711 let interval = Expression::Interval(Box::new(
13712 crate::expressions::Interval {
13713 this: Some(arg1),
13714 unit: Some(
13715 crate::expressions::IntervalUnitSpec::Simple {
13716 unit: iu,
13717 use_plural: false,
13718 },
13719 ),
13720 },
13721 ));
13722 // Non-TSQL sources: CAST string literal to DATETIME
13723 let arg2 = if !matches!(
13724 source,
13725 DialectType::TSQL | DialectType::Fabric
13726 ) && matches!(
13727 &arg2,
13728 Expression::Literal(Literal::String(_))
13729 ) {
13730 Expression::Cast(Box::new(Cast {
13731 this: arg2,
13732 to: DataType::Custom {
13733 name: "DATETIME".to_string(),
13734 },
13735 trailing_comments: Vec::new(),
13736 double_colon_syntax: false,
13737 format: None,
13738 default: None,
13739 inferred_type: None,
13740 }))
13741 } else {
13742 arg2
13743 };
13744 Ok(Expression::Function(Box::new(Function::new(
13745 "DATE_ADD".to_string(),
13746 vec![arg2, interval],
13747 ))))
13748 }
13749 _ => {
13750 let unit =
13751 Expression::Identifier(Identifier::new(&unit_str));
13752 Ok(Expression::Function(Box::new(Function::new(
13753 "DATEADD".to_string(),
13754 vec![unit, arg1, arg2],
13755 ))))
13756 }
13757 }
13758 }
13759 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13760 // or (date, val, 'UNIT') from Generic canonical form
13761 "DATE_ADD" if f.args.len() == 3 => {
13762 let mut args = f.args;
13763 let arg0 = args.remove(0);
13764 let arg1 = args.remove(0);
13765 let arg2 = args.remove(0);
13766 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13767 // where arg2 is a string literal matching a unit name
13768 let arg2_unit = match &arg2 {
13769 Expression::Literal(Literal::String(s)) => {
13770 let u = s.to_uppercase();
13771 if matches!(
13772 u.as_str(),
13773 "DAY"
13774 | "MONTH"
13775 | "YEAR"
13776 | "HOUR"
13777 | "MINUTE"
13778 | "SECOND"
13779 | "WEEK"
13780 | "QUARTER"
13781 | "MILLISECOND"
13782 | "MICROSECOND"
13783 ) {
13784 Some(u)
13785 } else {
13786 None
13787 }
13788 }
13789 _ => None,
13790 };
13791 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13792 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13793 (u, arg1, arg0)
13794 } else {
13795 (Self::get_unit_str_static(&arg0), arg1, arg2)
13796 };
13797 // Alias for backward compat with the rest of the match
13798 let arg1 = val;
13799 let arg2 = date;
13800
13801 match target {
13802 DialectType::Presto
13803 | DialectType::Trino
13804 | DialectType::Athena => {
13805 Ok(Expression::Function(Box::new(Function::new(
13806 "DATE_ADD".to_string(),
13807 vec![Expression::string(&unit_str), arg1, arg2],
13808 ))))
13809 }
13810 DialectType::DuckDB => {
13811 let iu = Self::parse_interval_unit_static(&unit_str);
13812 let interval = Expression::Interval(Box::new(
13813 crate::expressions::Interval {
13814 this: Some(arg1),
13815 unit: Some(
13816 crate::expressions::IntervalUnitSpec::Simple {
13817 unit: iu,
13818 use_plural: false,
13819 },
13820 ),
13821 },
13822 ));
13823 Ok(Expression::Add(Box::new(
13824 crate::expressions::BinaryOp::new(arg2, interval),
13825 )))
13826 }
13827 DialectType::PostgreSQL
13828 | DialectType::Materialize
13829 | DialectType::RisingWave => {
13830 // PostgreSQL: x + INTERVAL '1 DAY'
13831 let amount_str = Self::expr_to_string_static(&arg1);
13832 let interval = Expression::Interval(Box::new(
13833 crate::expressions::Interval {
13834 this: Some(Expression::string(&format!(
13835 "{} {}",
13836 amount_str, unit_str
13837 ))),
13838 unit: None,
13839 },
13840 ));
13841 Ok(Expression::Add(Box::new(
13842 crate::expressions::BinaryOp::new(arg2, interval),
13843 )))
13844 }
13845 DialectType::Snowflake
13846 | DialectType::TSQL
13847 | DialectType::Redshift => {
13848 let unit =
13849 Expression::Identifier(Identifier::new(&unit_str));
13850 Ok(Expression::Function(Box::new(Function::new(
13851 "DATEADD".to_string(),
13852 vec![unit, arg1, arg2],
13853 ))))
13854 }
13855 DialectType::BigQuery
13856 | DialectType::MySQL
13857 | DialectType::Doris
13858 | DialectType::StarRocks
13859 | DialectType::Drill => {
13860 // DATE_ADD(date, INTERVAL amount UNIT)
13861 let iu = Self::parse_interval_unit_static(&unit_str);
13862 let interval = Expression::Interval(Box::new(
13863 crate::expressions::Interval {
13864 this: Some(arg1),
13865 unit: Some(
13866 crate::expressions::IntervalUnitSpec::Simple {
13867 unit: iu,
13868 use_plural: false,
13869 },
13870 ),
13871 },
13872 ));
13873 Ok(Expression::Function(Box::new(Function::new(
13874 "DATE_ADD".to_string(),
13875 vec![arg2, interval],
13876 ))))
13877 }
13878 DialectType::SQLite => {
13879 // SQLite: DATE(x, '1 DAY')
13880 // Build the string '1 DAY' from amount and unit
13881 let amount_str = match &arg1 {
13882 Expression::Literal(Literal::Number(n)) => n.clone(),
13883 _ => "1".to_string(),
13884 };
13885 Ok(Expression::Function(Box::new(Function::new(
13886 "DATE".to_string(),
13887 vec![
13888 arg2,
13889 Expression::string(format!(
13890 "{} {}",
13891 amount_str, unit_str
13892 )),
13893 ],
13894 ))))
13895 }
13896 DialectType::Dremio => {
13897 // Dremio: DATE_ADD(date, amount) - drops unit
13898 Ok(Expression::Function(Box::new(Function::new(
13899 "DATE_ADD".to_string(),
13900 vec![arg2, arg1],
13901 ))))
13902 }
13903 DialectType::Spark => {
13904 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13905 if unit_str == "DAY" {
13906 Ok(Expression::Function(Box::new(Function::new(
13907 "DATE_ADD".to_string(),
13908 vec![arg2, arg1],
13909 ))))
13910 } else {
13911 let unit =
13912 Expression::Identifier(Identifier::new(&unit_str));
13913 Ok(Expression::Function(Box::new(Function::new(
13914 "DATE_ADD".to_string(),
13915 vec![unit, arg1, arg2],
13916 ))))
13917 }
13918 }
13919 DialectType::Databricks => {
13920 let unit =
13921 Expression::Identifier(Identifier::new(&unit_str));
13922 Ok(Expression::Function(Box::new(Function::new(
13923 "DATE_ADD".to_string(),
13924 vec![unit, arg1, arg2],
13925 ))))
13926 }
13927 DialectType::Hive => {
13928 // Hive: DATE_ADD(date, val) for DAY
13929 Ok(Expression::Function(Box::new(Function::new(
13930 "DATE_ADD".to_string(),
13931 vec![arg2, arg1],
13932 ))))
13933 }
13934 _ => {
13935 let unit =
13936 Expression::Identifier(Identifier::new(&unit_str));
13937 Ok(Expression::Function(Box::new(Function::new(
13938 "DATE_ADD".to_string(),
13939 vec![unit, arg1, arg2],
13940 ))))
13941 }
13942 }
13943 }
13944 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13945 "DATE_ADD"
13946 if f.args.len() == 2
13947 && matches!(
13948 source,
13949 DialectType::Hive
13950 | DialectType::Spark
13951 | DialectType::Databricks
13952 | DialectType::Generic
13953 ) =>
13954 {
13955 let mut args = f.args;
13956 let date = args.remove(0);
13957 let days = args.remove(0);
13958 match target {
13959 DialectType::Hive | DialectType::Spark => {
13960 // Keep as DATE_ADD(date, days) for Hive/Spark
13961 Ok(Expression::Function(Box::new(Function::new(
13962 "DATE_ADD".to_string(),
13963 vec![date, days],
13964 ))))
13965 }
13966 DialectType::Databricks => {
13967 // Databricks: DATEADD(DAY, days, date)
13968 Ok(Expression::Function(Box::new(Function::new(
13969 "DATEADD".to_string(),
13970 vec![
13971 Expression::Identifier(Identifier::new("DAY")),
13972 days,
13973 date,
13974 ],
13975 ))))
13976 }
13977 DialectType::DuckDB => {
13978 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13979 let cast_date = Self::ensure_cast_date(date);
13980 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13981 let interval_val = if matches!(
13982 days,
13983 Expression::Mul(_)
13984 | Expression::Sub(_)
13985 | Expression::Add(_)
13986 ) {
13987 Expression::Paren(Box::new(crate::expressions::Paren {
13988 this: days,
13989 trailing_comments: vec![],
13990 }))
13991 } else {
13992 days
13993 };
13994 let interval = Expression::Interval(Box::new(
13995 crate::expressions::Interval {
13996 this: Some(interval_val),
13997 unit: Some(
13998 crate::expressions::IntervalUnitSpec::Simple {
13999 unit: crate::expressions::IntervalUnit::Day,
14000 use_plural: false,
14001 },
14002 ),
14003 },
14004 ));
14005 Ok(Expression::Add(Box::new(
14006 crate::expressions::BinaryOp::new(cast_date, interval),
14007 )))
14008 }
14009 DialectType::Snowflake => {
14010 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
14011 let cast_date = if matches!(
14012 source,
14013 DialectType::Hive
14014 | DialectType::Spark
14015 | DialectType::Databricks
14016 ) {
14017 if matches!(
14018 date,
14019 Expression::Literal(Literal::String(_))
14020 ) {
14021 Self::double_cast_timestamp_date(date)
14022 } else {
14023 date
14024 }
14025 } else {
14026 date
14027 };
14028 Ok(Expression::Function(Box::new(Function::new(
14029 "DATEADD".to_string(),
14030 vec![
14031 Expression::Identifier(Identifier::new("DAY")),
14032 days,
14033 cast_date,
14034 ],
14035 ))))
14036 }
14037 DialectType::Redshift => {
14038 Ok(Expression::Function(Box::new(Function::new(
14039 "DATEADD".to_string(),
14040 vec![
14041 Expression::Identifier(Identifier::new("DAY")),
14042 days,
14043 date,
14044 ],
14045 ))))
14046 }
14047 DialectType::TSQL | DialectType::Fabric => {
14048 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
14049 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
14050 let cast_date = if matches!(
14051 source,
14052 DialectType::Hive | DialectType::Spark
14053 ) {
14054 if matches!(
14055 date,
14056 Expression::Literal(Literal::String(_))
14057 ) {
14058 Self::double_cast_datetime2_date(date)
14059 } else {
14060 date
14061 }
14062 } else {
14063 date
14064 };
14065 Ok(Expression::Function(Box::new(Function::new(
14066 "DATEADD".to_string(),
14067 vec![
14068 Expression::Identifier(Identifier::new("DAY")),
14069 days,
14070 cast_date,
14071 ],
14072 ))))
14073 }
14074 DialectType::Presto
14075 | DialectType::Trino
14076 | DialectType::Athena => {
14077 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
14078 let cast_date = if matches!(
14079 source,
14080 DialectType::Hive
14081 | DialectType::Spark
14082 | DialectType::Databricks
14083 ) {
14084 if matches!(
14085 date,
14086 Expression::Literal(Literal::String(_))
14087 ) {
14088 Self::double_cast_timestamp_date(date)
14089 } else {
14090 date
14091 }
14092 } else {
14093 date
14094 };
14095 Ok(Expression::Function(Box::new(Function::new(
14096 "DATE_ADD".to_string(),
14097 vec![Expression::string("DAY"), days, cast_date],
14098 ))))
14099 }
14100 DialectType::BigQuery => {
14101 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
14102 let cast_date = if matches!(
14103 source,
14104 DialectType::Hive
14105 | DialectType::Spark
14106 | DialectType::Databricks
14107 ) {
14108 Self::double_cast_datetime_date(date)
14109 } else {
14110 date
14111 };
14112 // Wrap complex expressions in Paren for interval
14113 let interval_val = if matches!(
14114 days,
14115 Expression::Mul(_)
14116 | Expression::Sub(_)
14117 | Expression::Add(_)
14118 ) {
14119 Expression::Paren(Box::new(crate::expressions::Paren {
14120 this: days,
14121 trailing_comments: vec![],
14122 }))
14123 } else {
14124 days
14125 };
14126 let interval = Expression::Interval(Box::new(
14127 crate::expressions::Interval {
14128 this: Some(interval_val),
14129 unit: Some(
14130 crate::expressions::IntervalUnitSpec::Simple {
14131 unit: crate::expressions::IntervalUnit::Day,
14132 use_plural: false,
14133 },
14134 ),
14135 },
14136 ));
14137 Ok(Expression::Function(Box::new(Function::new(
14138 "DATE_ADD".to_string(),
14139 vec![cast_date, interval],
14140 ))))
14141 }
14142 DialectType::MySQL => {
14143 let iu = crate::expressions::IntervalUnit::Day;
14144 Ok(Expression::DateAdd(Box::new(
14145 crate::expressions::DateAddFunc {
14146 this: date,
14147 interval: days,
14148 unit: iu,
14149 },
14150 )))
14151 }
14152 DialectType::PostgreSQL => {
14153 let interval = Expression::Interval(Box::new(
14154 crate::expressions::Interval {
14155 this: Some(Expression::string(&format!(
14156 "{} DAY",
14157 Self::expr_to_string_static(&days)
14158 ))),
14159 unit: None,
14160 },
14161 ));
14162 Ok(Expression::Add(Box::new(
14163 crate::expressions::BinaryOp::new(date, interval),
14164 )))
14165 }
14166 DialectType::Doris
14167 | DialectType::StarRocks
14168 | DialectType::Drill => {
14169 // DATE_ADD(date, INTERVAL days DAY)
14170 let interval = Expression::Interval(Box::new(
14171 crate::expressions::Interval {
14172 this: Some(days),
14173 unit: Some(
14174 crate::expressions::IntervalUnitSpec::Simple {
14175 unit: crate::expressions::IntervalUnit::Day,
14176 use_plural: false,
14177 },
14178 ),
14179 },
14180 ));
14181 Ok(Expression::Function(Box::new(Function::new(
14182 "DATE_ADD".to_string(),
14183 vec![date, interval],
14184 ))))
14185 }
14186 _ => Ok(Expression::Function(Box::new(Function::new(
14187 "DATE_ADD".to_string(),
14188 vec![date, days],
14189 )))),
14190 }
14191 }
14192 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
14193 "DATE_SUB"
14194 if f.args.len() == 2
14195 && matches!(
14196 source,
14197 DialectType::Hive
14198 | DialectType::Spark
14199 | DialectType::Databricks
14200 ) =>
14201 {
14202 let mut args = f.args;
14203 let date = args.remove(0);
14204 let days = args.remove(0);
14205 // Helper to create days * -1
14206 let make_neg_days = |d: Expression| -> Expression {
14207 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
14208 d,
14209 Expression::Literal(Literal::Number("-1".to_string())),
14210 )))
14211 };
14212 let is_string_literal =
14213 matches!(date, Expression::Literal(Literal::String(_)));
14214 match target {
14215 DialectType::Hive
14216 | DialectType::Spark
14217 | DialectType::Databricks => {
14218 // Keep as DATE_SUB(date, days) for Hive/Spark
14219 Ok(Expression::Function(Box::new(Function::new(
14220 "DATE_SUB".to_string(),
14221 vec![date, days],
14222 ))))
14223 }
14224 DialectType::DuckDB => {
14225 let cast_date = Self::ensure_cast_date(date);
14226 let neg = make_neg_days(days);
14227 let interval = Expression::Interval(Box::new(
14228 crate::expressions::Interval {
14229 this: Some(Expression::Paren(Box::new(
14230 crate::expressions::Paren {
14231 this: neg,
14232 trailing_comments: vec![],
14233 },
14234 ))),
14235 unit: Some(
14236 crate::expressions::IntervalUnitSpec::Simple {
14237 unit: crate::expressions::IntervalUnit::Day,
14238 use_plural: false,
14239 },
14240 ),
14241 },
14242 ));
14243 Ok(Expression::Add(Box::new(
14244 crate::expressions::BinaryOp::new(cast_date, interval),
14245 )))
14246 }
14247 DialectType::Snowflake => {
14248 let cast_date = if is_string_literal {
14249 Self::double_cast_timestamp_date(date)
14250 } else {
14251 date
14252 };
14253 let neg = make_neg_days(days);
14254 Ok(Expression::Function(Box::new(Function::new(
14255 "DATEADD".to_string(),
14256 vec![
14257 Expression::Identifier(Identifier::new("DAY")),
14258 neg,
14259 cast_date,
14260 ],
14261 ))))
14262 }
14263 DialectType::Redshift => {
14264 let neg = make_neg_days(days);
14265 Ok(Expression::Function(Box::new(Function::new(
14266 "DATEADD".to_string(),
14267 vec![
14268 Expression::Identifier(Identifier::new("DAY")),
14269 neg,
14270 date,
14271 ],
14272 ))))
14273 }
14274 DialectType::TSQL | DialectType::Fabric => {
14275 let cast_date = if is_string_literal {
14276 Self::double_cast_datetime2_date(date)
14277 } else {
14278 date
14279 };
14280 let neg = make_neg_days(days);
14281 Ok(Expression::Function(Box::new(Function::new(
14282 "DATEADD".to_string(),
14283 vec![
14284 Expression::Identifier(Identifier::new("DAY")),
14285 neg,
14286 cast_date,
14287 ],
14288 ))))
14289 }
14290 DialectType::Presto
14291 | DialectType::Trino
14292 | DialectType::Athena => {
14293 let cast_date = if is_string_literal {
14294 Self::double_cast_timestamp_date(date)
14295 } else {
14296 date
14297 };
14298 let neg = make_neg_days(days);
14299 Ok(Expression::Function(Box::new(Function::new(
14300 "DATE_ADD".to_string(),
14301 vec![Expression::string("DAY"), neg, cast_date],
14302 ))))
14303 }
14304 DialectType::BigQuery => {
14305 let cast_date = if is_string_literal {
14306 Self::double_cast_datetime_date(date)
14307 } else {
14308 date
14309 };
14310 let neg = make_neg_days(days);
14311 let interval = Expression::Interval(Box::new(
14312 crate::expressions::Interval {
14313 this: Some(Expression::Paren(Box::new(
14314 crate::expressions::Paren {
14315 this: neg,
14316 trailing_comments: vec![],
14317 },
14318 ))),
14319 unit: Some(
14320 crate::expressions::IntervalUnitSpec::Simple {
14321 unit: crate::expressions::IntervalUnit::Day,
14322 use_plural: false,
14323 },
14324 ),
14325 },
14326 ));
14327 Ok(Expression::Function(Box::new(Function::new(
14328 "DATE_ADD".to_string(),
14329 vec![cast_date, interval],
14330 ))))
14331 }
14332 _ => Ok(Expression::Function(Box::new(Function::new(
14333 "DATE_SUB".to_string(),
14334 vec![date, days],
14335 )))),
14336 }
14337 }
14338 // ADD_MONTHS(date, val) -> target-specific
14339 "ADD_MONTHS" if f.args.len() == 2 => {
14340 let mut args = f.args;
14341 let date = args.remove(0);
14342 let val = args.remove(0);
14343 match target {
14344 DialectType::TSQL => {
14345 let cast_date = Self::ensure_cast_datetime2(date);
14346 Ok(Expression::Function(Box::new(Function::new(
14347 "DATEADD".to_string(),
14348 vec![
14349 Expression::Identifier(Identifier::new("MONTH")),
14350 val,
14351 cast_date,
14352 ],
14353 ))))
14354 }
14355 DialectType::DuckDB => {
14356 let interval = Expression::Interval(Box::new(
14357 crate::expressions::Interval {
14358 this: Some(val),
14359 unit: Some(
14360 crate::expressions::IntervalUnitSpec::Simple {
14361 unit:
14362 crate::expressions::IntervalUnit::Month,
14363 use_plural: false,
14364 },
14365 ),
14366 },
14367 ));
14368 Ok(Expression::Add(Box::new(
14369 crate::expressions::BinaryOp::new(date, interval),
14370 )))
14371 }
14372 DialectType::Snowflake => {
14373 // Keep ADD_MONTHS when source is Snowflake
14374 if matches!(source, DialectType::Snowflake) {
14375 Ok(Expression::Function(Box::new(Function::new(
14376 "ADD_MONTHS".to_string(),
14377 vec![date, val],
14378 ))))
14379 } else {
14380 Ok(Expression::Function(Box::new(Function::new(
14381 "DATEADD".to_string(),
14382 vec![
14383 Expression::Identifier(Identifier::new(
14384 "MONTH",
14385 )),
14386 val,
14387 date,
14388 ],
14389 ))))
14390 }
14391 }
14392 DialectType::Redshift => {
14393 Ok(Expression::Function(Box::new(Function::new(
14394 "DATEADD".to_string(),
14395 vec![
14396 Expression::Identifier(Identifier::new("MONTH")),
14397 val,
14398 date,
14399 ],
14400 ))))
14401 }
14402 DialectType::Presto
14403 | DialectType::Trino
14404 | DialectType::Athena => {
14405 Ok(Expression::Function(Box::new(Function::new(
14406 "DATE_ADD".to_string(),
14407 vec![Expression::string("MONTH"), val, date],
14408 ))))
14409 }
14410 DialectType::BigQuery => {
14411 let interval = Expression::Interval(Box::new(
14412 crate::expressions::Interval {
14413 this: Some(val),
14414 unit: Some(
14415 crate::expressions::IntervalUnitSpec::Simple {
14416 unit:
14417 crate::expressions::IntervalUnit::Month,
14418 use_plural: false,
14419 },
14420 ),
14421 },
14422 ));
14423 Ok(Expression::Function(Box::new(Function::new(
14424 "DATE_ADD".to_string(),
14425 vec![date, interval],
14426 ))))
14427 }
14428 _ => Ok(Expression::Function(Box::new(Function::new(
14429 "ADD_MONTHS".to_string(),
14430 vec![date, val],
14431 )))),
14432 }
14433 }
14434 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14435 "DATETRUNC" if f.args.len() == 2 => {
14436 let mut args = f.args;
14437 let arg0 = args.remove(0);
14438 let arg1 = args.remove(0);
14439 let unit_str = Self::get_unit_str_static(&arg0);
14440 match target {
14441 DialectType::TSQL | DialectType::Fabric => {
14442 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14443 Ok(Expression::Function(Box::new(Function::new(
14444 "DATETRUNC".to_string(),
14445 vec![
14446 Expression::Identifier(Identifier::new(&unit_str)),
14447 arg1,
14448 ],
14449 ))))
14450 }
14451 DialectType::DuckDB => {
14452 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14453 let date = Self::ensure_cast_timestamp(arg1);
14454 Ok(Expression::Function(Box::new(Function::new(
14455 "DATE_TRUNC".to_string(),
14456 vec![Expression::string(&unit_str), date],
14457 ))))
14458 }
14459 DialectType::ClickHouse => {
14460 // ClickHouse: dateTrunc('UNIT', expr)
14461 Ok(Expression::Function(Box::new(Function::new(
14462 "dateTrunc".to_string(),
14463 vec![Expression::string(&unit_str), arg1],
14464 ))))
14465 }
14466 _ => {
14467 // Standard: DATE_TRUNC('UNIT', expr)
14468 let unit = Expression::string(&unit_str);
14469 Ok(Expression::Function(Box::new(Function::new(
14470 "DATE_TRUNC".to_string(),
14471 vec![unit, arg1],
14472 ))))
14473 }
14474 }
14475 }
14476 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14477 "GETDATE" if f.args.is_empty() => match target {
14478 DialectType::TSQL => Ok(Expression::Function(f)),
14479 DialectType::Redshift => Ok(Expression::Function(Box::new(
14480 Function::new("GETDATE".to_string(), vec![]),
14481 ))),
14482 _ => Ok(Expression::CurrentTimestamp(
14483 crate::expressions::CurrentTimestamp {
14484 precision: None,
14485 sysdate: false,
14486 },
14487 )),
14488 },
14489 // TO_HEX(x) / HEX(x) -> target-specific hex function
14490 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14491 let name = match target {
14492 DialectType::Presto | DialectType::Trino => "TO_HEX",
14493 DialectType::Spark
14494 | DialectType::Databricks
14495 | DialectType::Hive => "HEX",
14496 DialectType::DuckDB
14497 | DialectType::PostgreSQL
14498 | DialectType::Redshift => "TO_HEX",
14499 _ => &f.name,
14500 };
14501 Ok(Expression::Function(Box::new(Function::new(
14502 name.to_string(),
14503 f.args,
14504 ))))
14505 }
14506 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14507 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14508 match target {
14509 DialectType::BigQuery => {
14510 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14511 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14512 // because BigQuery MD5 returns BYTES, not hex string
14513 let arg = &f.args[0];
14514 let wrapped_arg = match arg {
14515 Expression::Function(inner_f)
14516 if inner_f.name.to_uppercase() == "MD5"
14517 || inner_f.name.to_uppercase() == "SHA1"
14518 || inner_f.name.to_uppercase() == "SHA256"
14519 || inner_f.name.to_uppercase() == "SHA512" =>
14520 {
14521 // Wrap hash function in TO_HEX for BigQuery
14522 Expression::Function(Box::new(Function::new(
14523 "TO_HEX".to_string(),
14524 vec![arg.clone()],
14525 )))
14526 }
14527 _ => f.args.into_iter().next().unwrap(),
14528 };
14529 Ok(Expression::Function(Box::new(Function::new(
14530 "FROM_HEX".to_string(),
14531 vec![wrapped_arg],
14532 ))))
14533 }
14534 _ => {
14535 let name = match target {
14536 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14537 DialectType::Spark
14538 | DialectType::Databricks
14539 | DialectType::Hive => "UNHEX",
14540 _ => &f.name,
14541 };
14542 Ok(Expression::Function(Box::new(Function::new(
14543 name.to_string(),
14544 f.args,
14545 ))))
14546 }
14547 }
14548 }
14549 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14550 "TO_UTF8" if f.args.len() == 1 => match target {
14551 DialectType::Spark | DialectType::Databricks => {
14552 let mut args = f.args;
14553 args.push(Expression::string("utf-8"));
14554 Ok(Expression::Function(Box::new(Function::new(
14555 "ENCODE".to_string(),
14556 args,
14557 ))))
14558 }
14559 _ => Ok(Expression::Function(f)),
14560 },
14561 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14562 "FROM_UTF8" if f.args.len() == 1 => match target {
14563 DialectType::Spark | DialectType::Databricks => {
14564 let mut args = f.args;
14565 args.push(Expression::string("utf-8"));
14566 Ok(Expression::Function(Box::new(Function::new(
14567 "DECODE".to_string(),
14568 args,
14569 ))))
14570 }
14571 _ => Ok(Expression::Function(f)),
14572 },
14573 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14574 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14575 let name = match target {
14576 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14577 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14578 DialectType::PostgreSQL | DialectType::Redshift => {
14579 "STARTS_WITH"
14580 }
14581 _ => &f.name,
14582 };
14583 Ok(Expression::Function(Box::new(Function::new(
14584 name.to_string(),
14585 f.args,
14586 ))))
14587 }
14588 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14589 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14590 let name = match target {
14591 DialectType::Presto
14592 | DialectType::Trino
14593 | DialectType::Athena => "APPROX_DISTINCT",
14594 _ => "APPROX_COUNT_DISTINCT",
14595 };
14596 Ok(Expression::Function(Box::new(Function::new(
14597 name.to_string(),
14598 f.args,
14599 ))))
14600 }
14601 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14602 "JSON_EXTRACT"
14603 if f.args.len() == 2
14604 && !matches!(source, DialectType::BigQuery)
14605 && matches!(
14606 target,
14607 DialectType::Spark
14608 | DialectType::Databricks
14609 | DialectType::Hive
14610 ) =>
14611 {
14612 Ok(Expression::Function(Box::new(Function::new(
14613 "GET_JSON_OBJECT".to_string(),
14614 f.args,
14615 ))))
14616 }
14617 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14618 "JSON_EXTRACT"
14619 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14620 {
14621 let mut args = f.args;
14622 let path = args.remove(1);
14623 let this = args.remove(0);
14624 Ok(Expression::JsonExtract(Box::new(
14625 crate::expressions::JsonExtractFunc {
14626 this,
14627 path,
14628 returning: None,
14629 arrow_syntax: true,
14630 hash_arrow_syntax: false,
14631 wrapper_option: None,
14632 quotes_option: None,
14633 on_scalar_string: false,
14634 on_error: None,
14635 },
14636 )))
14637 }
14638 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14639 "JSON_FORMAT" if f.args.len() == 1 => {
14640 match target {
14641 DialectType::Spark | DialectType::Databricks => {
14642 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14643 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14644 if matches!(
14645 source,
14646 DialectType::Presto
14647 | DialectType::Trino
14648 | DialectType::Athena
14649 ) {
14650 if let Some(Expression::ParseJson(pj)) = f.args.first()
14651 {
14652 if let Expression::Literal(Literal::String(s)) =
14653 &pj.this
14654 {
14655 let wrapped = Expression::Literal(
14656 Literal::String(format!("[{}]", s)),
14657 );
14658 let schema_of_json = Expression::Function(
14659 Box::new(Function::new(
14660 "SCHEMA_OF_JSON".to_string(),
14661 vec![wrapped.clone()],
14662 )),
14663 );
14664 let from_json = Expression::Function(Box::new(
14665 Function::new(
14666 "FROM_JSON".to_string(),
14667 vec![wrapped, schema_of_json],
14668 ),
14669 ));
14670 let to_json = Expression::Function(Box::new(
14671 Function::new(
14672 "TO_JSON".to_string(),
14673 vec![from_json],
14674 ),
14675 ));
14676 return Ok(Expression::Function(Box::new(
14677 Function::new(
14678 "REGEXP_EXTRACT".to_string(),
14679 vec![
14680 to_json,
14681 Expression::Literal(
14682 Literal::String(
14683 "^.(.*).$".to_string(),
14684 ),
14685 ),
14686 Expression::Literal(
14687 Literal::Number(
14688 "1".to_string(),
14689 ),
14690 ),
14691 ],
14692 ),
14693 )));
14694 }
14695 }
14696 }
14697
14698 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14699 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14700 let mut args = f.args;
14701 if let Some(Expression::Cast(ref c)) = args.first() {
14702 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14703 args = vec![c.this.clone()];
14704 }
14705 } else if let Some(Expression::Function(ref inner_f)) =
14706 args.first()
14707 {
14708 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14709 && inner_f.args.len() == 1
14710 {
14711 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14712 args = inner_f.args.clone();
14713 }
14714 }
14715 Ok(Expression::Function(Box::new(Function::new(
14716 "TO_JSON".to_string(),
14717 args,
14718 ))))
14719 }
14720 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14721 Function::new("TO_JSON_STRING".to_string(), f.args),
14722 ))),
14723 DialectType::DuckDB => {
14724 // CAST(TO_JSON(x) AS TEXT)
14725 let to_json = Expression::Function(Box::new(
14726 Function::new("TO_JSON".to_string(), f.args),
14727 ));
14728 Ok(Expression::Cast(Box::new(Cast {
14729 this: to_json,
14730 to: DataType::Text,
14731 trailing_comments: Vec::new(),
14732 double_colon_syntax: false,
14733 format: None,
14734 default: None,
14735 inferred_type: None,
14736 })))
14737 }
14738 _ => Ok(Expression::Function(f)),
14739 }
14740 }
14741 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14742 "SYSDATE" if f.args.is_empty() => {
14743 match target {
14744 DialectType::Oracle | DialectType::Redshift => {
14745 Ok(Expression::Function(f))
14746 }
14747 DialectType::Snowflake => {
14748 // Snowflake uses SYSDATE() with parens
14749 let mut f = *f;
14750 f.no_parens = false;
14751 Ok(Expression::Function(Box::new(f)))
14752 }
14753 DialectType::DuckDB => {
14754 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14755 Ok(Expression::AtTimeZone(Box::new(
14756 crate::expressions::AtTimeZone {
14757 this: Expression::CurrentTimestamp(
14758 crate::expressions::CurrentTimestamp {
14759 precision: None,
14760 sysdate: false,
14761 },
14762 ),
14763 zone: Expression::Literal(Literal::String(
14764 "UTC".to_string(),
14765 )),
14766 },
14767 )))
14768 }
14769 _ => Ok(Expression::CurrentTimestamp(
14770 crate::expressions::CurrentTimestamp {
14771 precision: None,
14772 sysdate: true,
14773 },
14774 )),
14775 }
14776 }
14777 // LOGICAL_OR(x) -> BOOL_OR(x)
14778 "LOGICAL_OR" if f.args.len() == 1 => {
14779 let name = match target {
14780 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14781 _ => &f.name,
14782 };
14783 Ok(Expression::Function(Box::new(Function::new(
14784 name.to_string(),
14785 f.args,
14786 ))))
14787 }
14788 // LOGICAL_AND(x) -> BOOL_AND(x)
14789 "LOGICAL_AND" if f.args.len() == 1 => {
14790 let name = match target {
14791 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14792 _ => &f.name,
14793 };
14794 Ok(Expression::Function(Box::new(Function::new(
14795 name.to_string(),
14796 f.args,
14797 ))))
14798 }
14799 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14800 "MONTHS_ADD" if f.args.len() == 2 => match target {
14801 DialectType::Oracle => Ok(Expression::Function(Box::new(
14802 Function::new("ADD_MONTHS".to_string(), f.args),
14803 ))),
14804 _ => Ok(Expression::Function(f)),
14805 },
14806 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14807 "ARRAY_JOIN" if f.args.len() >= 2 => {
14808 match target {
14809 DialectType::Spark | DialectType::Databricks => {
14810 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14811 Ok(Expression::Function(f))
14812 }
14813 DialectType::Hive => {
14814 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14815 let mut args = f.args;
14816 let arr = args.remove(0);
14817 let sep = args.remove(0);
14818 // Drop any remaining args (null_replacement)
14819 Ok(Expression::Function(Box::new(Function::new(
14820 "CONCAT_WS".to_string(),
14821 vec![sep, arr],
14822 ))))
14823 }
14824 DialectType::Presto | DialectType::Trino => {
14825 Ok(Expression::Function(f))
14826 }
14827 _ => Ok(Expression::Function(f)),
14828 }
14829 }
14830 // LOCATE(substr, str, pos) 3-arg -> target-specific
14831 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14832 "LOCATE"
14833 if f.args.len() == 3
14834 && matches!(
14835 target,
14836 DialectType::Presto
14837 | DialectType::Trino
14838 | DialectType::Athena
14839 | DialectType::DuckDB
14840 ) =>
14841 {
14842 let mut args = f.args;
14843 let substr = args.remove(0);
14844 let string = args.remove(0);
14845 let pos = args.remove(0);
14846 // STRPOS(SUBSTRING(string, pos), substr)
14847 let substring_call = Expression::Function(Box::new(Function::new(
14848 "SUBSTRING".to_string(),
14849 vec![string.clone(), pos.clone()],
14850 )));
14851 let strpos_call = Expression::Function(Box::new(Function::new(
14852 "STRPOS".to_string(),
14853 vec![substring_call, substr.clone()],
14854 )));
14855 // STRPOS(...) + pos - 1
14856 let pos_adjusted =
14857 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14858 Expression::Add(Box::new(
14859 crate::expressions::BinaryOp::new(
14860 strpos_call.clone(),
14861 pos.clone(),
14862 ),
14863 )),
14864 Expression::number(1),
14865 )));
14866 // STRPOS(...) = 0
14867 let is_zero =
14868 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14869 strpos_call.clone(),
14870 Expression::number(0),
14871 )));
14872
14873 match target {
14874 DialectType::Presto
14875 | DialectType::Trino
14876 | DialectType::Athena => {
14877 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14878 Ok(Expression::Function(Box::new(Function::new(
14879 "IF".to_string(),
14880 vec![is_zero, Expression::number(0), pos_adjusted],
14881 ))))
14882 }
14883 DialectType::DuckDB => {
14884 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14885 Ok(Expression::Case(Box::new(crate::expressions::Case {
14886 operand: None,
14887 whens: vec![(is_zero, Expression::number(0))],
14888 else_: Some(pos_adjusted),
14889 comments: Vec::new(),
14890 inferred_type: None,
14891 })))
14892 }
14893 _ => Ok(Expression::Function(Box::new(Function::new(
14894 "LOCATE".to_string(),
14895 vec![substr, string, pos],
14896 )))),
14897 }
14898 }
14899 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14900 "STRPOS"
14901 if f.args.len() == 3
14902 && matches!(
14903 target,
14904 DialectType::BigQuery
14905 | DialectType::Oracle
14906 | DialectType::Teradata
14907 ) =>
14908 {
14909 let mut args = f.args;
14910 let haystack = args.remove(0);
14911 let needle = args.remove(0);
14912 let occurrence = args.remove(0);
14913 Ok(Expression::Function(Box::new(Function::new(
14914 "INSTR".to_string(),
14915 vec![haystack, needle, Expression::number(1), occurrence],
14916 ))))
14917 }
14918 // SCHEMA_NAME(id) -> target-specific
14919 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14920 DialectType::MySQL | DialectType::SingleStore => {
14921 Ok(Expression::Function(Box::new(Function::new(
14922 "SCHEMA".to_string(),
14923 vec![],
14924 ))))
14925 }
14926 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14927 crate::expressions::CurrentSchema { this: None },
14928 ))),
14929 DialectType::SQLite => Ok(Expression::string("main")),
14930 _ => Ok(Expression::Function(f)),
14931 },
14932 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14933 "STRTOL" if f.args.len() == 2 => match target {
14934 DialectType::Presto | DialectType::Trino => {
14935 Ok(Expression::Function(Box::new(Function::new(
14936 "FROM_BASE".to_string(),
14937 f.args,
14938 ))))
14939 }
14940 _ => Ok(Expression::Function(f)),
14941 },
14942 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14943 "EDITDIST3" if f.args.len() == 2 => match target {
14944 DialectType::Spark | DialectType::Databricks => {
14945 Ok(Expression::Function(Box::new(Function::new(
14946 "LEVENSHTEIN".to_string(),
14947 f.args,
14948 ))))
14949 }
14950 _ => Ok(Expression::Function(f)),
14951 },
14952 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14953 "FORMAT"
14954 if f.args.len() == 2
14955 && matches!(
14956 source,
14957 DialectType::MySQL | DialectType::SingleStore
14958 )
14959 && matches!(target, DialectType::DuckDB) =>
14960 {
14961 let mut args = f.args;
14962 let num_expr = args.remove(0);
14963 let decimals_expr = args.remove(0);
14964 // Extract decimal count
14965 let dec_count = match &decimals_expr {
14966 Expression::Literal(Literal::Number(n)) => n.clone(),
14967 _ => "0".to_string(),
14968 };
14969 let fmt_str = format!("{{:,.{}f}}", dec_count);
14970 Ok(Expression::Function(Box::new(Function::new(
14971 "FORMAT".to_string(),
14972 vec![Expression::string(&fmt_str), num_expr],
14973 ))))
14974 }
14975 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14976 "FORMAT"
14977 if f.args.len() == 2
14978 && matches!(
14979 source,
14980 DialectType::TSQL | DialectType::Fabric
14981 ) =>
14982 {
14983 let val_expr = f.args[0].clone();
14984 let fmt_expr = f.args[1].clone();
14985 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14986 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14987 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14988 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14989 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14990 Expression::Literal(crate::expressions::Literal::String(s)) => {
14991 match s.as_str() {
14992 "m" | "M" => (Expression::string("MMMM d"), true),
14993 "t" => (Expression::string("h:mm tt"), true),
14994 "T" => (Expression::string("h:mm:ss tt"), true),
14995 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14996 _ => (fmt_expr.clone(), false),
14997 }
14998 }
14999 _ => (fmt_expr.clone(), false),
15000 };
15001 // Check if the format looks like a date format
15002 let is_date_format = is_shortcode
15003 || match &expanded_fmt {
15004 Expression::Literal(
15005 crate::expressions::Literal::String(s),
15006 ) => {
15007 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
15008 s.contains("yyyy")
15009 || s.contains("YYYY")
15010 || s.contains("MM")
15011 || s.contains("dd")
15012 || s.contains("MMMM")
15013 || s.contains("HH")
15014 || s.contains("hh")
15015 || s.contains("ss")
15016 }
15017 _ => false,
15018 };
15019 match target {
15020 DialectType::Spark | DialectType::Databricks => {
15021 let func_name = if is_date_format {
15022 "DATE_FORMAT"
15023 } else {
15024 "FORMAT_NUMBER"
15025 };
15026 Ok(Expression::Function(Box::new(Function::new(
15027 func_name.to_string(),
15028 vec![val_expr, expanded_fmt],
15029 ))))
15030 }
15031 _ => {
15032 // For TSQL and other targets, expand shortcodes but keep FORMAT
15033 if is_shortcode {
15034 Ok(Expression::Function(Box::new(Function::new(
15035 "FORMAT".to_string(),
15036 vec![val_expr, expanded_fmt],
15037 ))))
15038 } else {
15039 Ok(Expression::Function(f))
15040 }
15041 }
15042 }
15043 }
15044 // FORMAT('%s', x) from Trino/Presto -> target-specific
15045 "FORMAT"
15046 if f.args.len() >= 2
15047 && matches!(
15048 source,
15049 DialectType::Trino
15050 | DialectType::Presto
15051 | DialectType::Athena
15052 ) =>
15053 {
15054 let fmt_expr = f.args[0].clone();
15055 let value_args: Vec<Expression> = f.args[1..].to_vec();
15056 match target {
15057 // DuckDB: replace %s with {} in format string
15058 DialectType::DuckDB => {
15059 let new_fmt = match &fmt_expr {
15060 Expression::Literal(Literal::String(s)) => {
15061 Expression::Literal(Literal::String(
15062 s.replace("%s", "{}"),
15063 ))
15064 }
15065 _ => fmt_expr,
15066 };
15067 let mut args = vec![new_fmt];
15068 args.extend(value_args);
15069 Ok(Expression::Function(Box::new(Function::new(
15070 "FORMAT".to_string(),
15071 args,
15072 ))))
15073 }
15074 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
15075 DialectType::Snowflake => match &fmt_expr {
15076 Expression::Literal(Literal::String(s))
15077 if s == "%s" && value_args.len() == 1 =>
15078 {
15079 Ok(Expression::Function(Box::new(Function::new(
15080 "TO_CHAR".to_string(),
15081 value_args,
15082 ))))
15083 }
15084 _ => Ok(Expression::Function(f)),
15085 },
15086 // Default: keep FORMAT as-is
15087 _ => Ok(Expression::Function(f)),
15088 }
15089 }
15090 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
15091 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
15092 if f.args.len() == 2 =>
15093 {
15094 match target {
15095 DialectType::PostgreSQL | DialectType::Redshift => {
15096 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
15097 let arr = f.args[0].clone();
15098 let needle = f.args[1].clone();
15099 // Convert [] to ARRAY[] for PostgreSQL
15100 let pg_arr = match arr {
15101 Expression::Array(a) => Expression::ArrayFunc(
15102 Box::new(crate::expressions::ArrayConstructor {
15103 expressions: a.expressions,
15104 bracket_notation: false,
15105 use_list_keyword: false,
15106 }),
15107 ),
15108 _ => arr,
15109 };
15110 // needle = ANY(arr) using the Any quantified expression
15111 let any_expr = Expression::Any(Box::new(
15112 crate::expressions::QuantifiedExpr {
15113 this: needle.clone(),
15114 subquery: pg_arr,
15115 op: Some(crate::expressions::QuantifiedOp::Eq),
15116 },
15117 ));
15118 let coalesce = Expression::Coalesce(Box::new(
15119 crate::expressions::VarArgFunc {
15120 expressions: vec![
15121 any_expr,
15122 Expression::Boolean(
15123 crate::expressions::BooleanLiteral {
15124 value: false,
15125 },
15126 ),
15127 ],
15128 original_name: None,
15129 inferred_type: None,
15130 },
15131 ));
15132 let is_null_check = Expression::IsNull(Box::new(
15133 crate::expressions::IsNull {
15134 this: needle,
15135 not: false,
15136 postfix_form: false,
15137 },
15138 ));
15139 Ok(Expression::Case(Box::new(Case {
15140 operand: None,
15141 whens: vec![(
15142 is_null_check,
15143 Expression::Null(crate::expressions::Null),
15144 )],
15145 else_: Some(coalesce),
15146 comments: Vec::new(),
15147 inferred_type: None,
15148 })))
15149 }
15150 _ => Ok(Expression::Function(Box::new(Function::new(
15151 "ARRAY_CONTAINS".to_string(),
15152 f.args,
15153 )))),
15154 }
15155 }
15156 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
15157 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
15158 match target {
15159 DialectType::PostgreSQL | DialectType::Redshift => {
15160 // arr1 && arr2 with ARRAY[] syntax
15161 let mut args = f.args;
15162 let arr1 = args.remove(0);
15163 let arr2 = args.remove(0);
15164 let pg_arr1 = match arr1 {
15165 Expression::Array(a) => Expression::ArrayFunc(
15166 Box::new(crate::expressions::ArrayConstructor {
15167 expressions: a.expressions,
15168 bracket_notation: false,
15169 use_list_keyword: false,
15170 }),
15171 ),
15172 _ => arr1,
15173 };
15174 let pg_arr2 = match arr2 {
15175 Expression::Array(a) => Expression::ArrayFunc(
15176 Box::new(crate::expressions::ArrayConstructor {
15177 expressions: a.expressions,
15178 bracket_notation: false,
15179 use_list_keyword: false,
15180 }),
15181 ),
15182 _ => arr2,
15183 };
15184 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15185 pg_arr1, pg_arr2,
15186 ))))
15187 }
15188 DialectType::DuckDB => {
15189 // DuckDB: arr1 && arr2 (native support)
15190 let mut args = f.args;
15191 let arr1 = args.remove(0);
15192 let arr2 = args.remove(0);
15193 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15194 arr1, arr2,
15195 ))))
15196 }
15197 _ => Ok(Expression::Function(Box::new(Function::new(
15198 "LIST_HAS_ANY".to_string(),
15199 f.args,
15200 )))),
15201 }
15202 }
15203 // APPROX_QUANTILE(x, q) -> target-specific
15204 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
15205 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15206 Function::new("APPROX_PERCENTILE".to_string(), f.args),
15207 ))),
15208 DialectType::DuckDB => Ok(Expression::Function(f)),
15209 _ => Ok(Expression::Function(f)),
15210 },
15211 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
15212 "MAKE_DATE" if f.args.len() == 3 => match target {
15213 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15214 Function::new("DATE".to_string(), f.args),
15215 ))),
15216 _ => Ok(Expression::Function(f)),
15217 },
15218 // RANGE(start, end[, step]) -> target-specific
15219 "RANGE"
15220 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
15221 {
15222 let start = f.args[0].clone();
15223 let end = f.args[1].clone();
15224 let step = f.args.get(2).cloned();
15225 match target {
15226 DialectType::Spark | DialectType::Databricks => {
15227 // RANGE(start, end) -> SEQUENCE(start, end-1)
15228 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
15229 // RANGE(start, start) -> ARRAY() (empty)
15230 // RANGE(start, end, 0) -> ARRAY() (empty)
15231 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15232
15233 // Check for constant args
15234 fn extract_i64(e: &Expression) -> Option<i64> {
15235 match e {
15236 Expression::Literal(Literal::Number(n)) => {
15237 n.parse::<i64>().ok()
15238 }
15239 Expression::Neg(u) => {
15240 if let Expression::Literal(Literal::Number(n)) =
15241 &u.this
15242 {
15243 n.parse::<i64>().ok().map(|v| -v)
15244 } else {
15245 None
15246 }
15247 }
15248 _ => None,
15249 }
15250 }
15251 let start_val = extract_i64(&start);
15252 let end_val = extract_i64(&end);
15253 let step_val = step.as_ref().and_then(|s| extract_i64(s));
15254
15255 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
15256 if step_val == Some(0) {
15257 return Ok(Expression::Function(Box::new(
15258 Function::new("ARRAY".to_string(), vec![]),
15259 )));
15260 }
15261 if let (Some(s), Some(e_val)) = (start_val, end_val) {
15262 if s == e_val {
15263 return Ok(Expression::Function(Box::new(
15264 Function::new("ARRAY".to_string(), vec![]),
15265 )));
15266 }
15267 }
15268
15269 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
15270 // All constants - compute new end = end - step (if step provided) or end - 1
15271 match step_val {
15272 Some(st) if st < 0 => {
15273 // Negative step: SEQUENCE(start, end - step, step)
15274 let new_end = e_val - st; // end - step (= end + |step|)
15275 let mut args =
15276 vec![start, Expression::number(new_end)];
15277 if let Some(s) = step {
15278 args.push(s);
15279 }
15280 Ok(Expression::Function(Box::new(
15281 Function::new("SEQUENCE".to_string(), args),
15282 )))
15283 }
15284 Some(st) => {
15285 let new_end = e_val - st;
15286 let mut args =
15287 vec![start, Expression::number(new_end)];
15288 if let Some(s) = step {
15289 args.push(s);
15290 }
15291 Ok(Expression::Function(Box::new(
15292 Function::new("SEQUENCE".to_string(), args),
15293 )))
15294 }
15295 None => {
15296 // No step: SEQUENCE(start, end - 1)
15297 let new_end = e_val - 1;
15298 Ok(Expression::Function(Box::new(
15299 Function::new(
15300 "SEQUENCE".to_string(),
15301 vec![
15302 start,
15303 Expression::number(new_end),
15304 ],
15305 ),
15306 )))
15307 }
15308 }
15309 } else {
15310 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15311 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15312 end.clone(),
15313 Expression::number(1),
15314 )));
15315 let cond = Expression::Lte(Box::new(BinaryOp::new(
15316 Expression::Paren(Box::new(Paren {
15317 this: end_m1.clone(),
15318 trailing_comments: Vec::new(),
15319 })),
15320 start.clone(),
15321 )));
15322 let empty = Expression::Function(Box::new(
15323 Function::new("ARRAY".to_string(), vec![]),
15324 ));
15325 let mut seq_args = vec![
15326 start,
15327 Expression::Paren(Box::new(Paren {
15328 this: end_m1,
15329 trailing_comments: Vec::new(),
15330 })),
15331 ];
15332 if let Some(s) = step {
15333 seq_args.push(s);
15334 }
15335 let seq = Expression::Function(Box::new(
15336 Function::new("SEQUENCE".to_string(), seq_args),
15337 ));
15338 Ok(Expression::IfFunc(Box::new(
15339 crate::expressions::IfFunc {
15340 condition: cond,
15341 true_value: empty,
15342 false_value: Some(seq),
15343 original_name: None,
15344 inferred_type: None,
15345 },
15346 )))
15347 }
15348 }
15349 DialectType::SQLite => {
15350 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15351 // The subquery wrapping is handled at the Alias level
15352 let mut args = vec![start, end];
15353 if let Some(s) = step {
15354 args.push(s);
15355 }
15356 Ok(Expression::Function(Box::new(Function::new(
15357 "GENERATE_SERIES".to_string(),
15358 args,
15359 ))))
15360 }
15361 _ => Ok(Expression::Function(f)),
15362 }
15363 }
15364 // ARRAY_REVERSE_SORT -> target-specific
15365 // (handled above as well, but also need DuckDB self-normalization)
15366 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15367 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15368 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15369 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15370 ))),
15371 DialectType::Spark | DialectType::Databricks => {
15372 Ok(Expression::Function(Box::new(Function::new(
15373 "MAP_FROM_ARRAYS".to_string(),
15374 f.args,
15375 ))))
15376 }
15377 _ => Ok(Expression::Function(Box::new(Function::new(
15378 "MAP".to_string(),
15379 f.args,
15380 )))),
15381 },
15382 // VARIANCE(x) -> varSamp(x) for ClickHouse
15383 "VARIANCE" if f.args.len() == 1 => match target {
15384 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15385 Function::new("varSamp".to_string(), f.args),
15386 ))),
15387 _ => Ok(Expression::Function(f)),
15388 },
15389 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15390 "STDDEV" if f.args.len() == 1 => match target {
15391 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15392 Function::new("stddevSamp".to_string(), f.args),
15393 ))),
15394 _ => Ok(Expression::Function(f)),
15395 },
15396 // ISINF(x) -> IS_INF(x) for BigQuery
15397 "ISINF" if f.args.len() == 1 => match target {
15398 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15399 Function::new("IS_INF".to_string(), f.args),
15400 ))),
15401 _ => Ok(Expression::Function(f)),
15402 },
15403 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15404 "CONTAINS" if f.args.len() == 2 => match target {
15405 DialectType::Spark
15406 | DialectType::Databricks
15407 | DialectType::Hive => Ok(Expression::Function(Box::new(
15408 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15409 ))),
15410 _ => Ok(Expression::Function(f)),
15411 },
15412 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15413 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15414 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15415 Ok(Expression::Function(Box::new(Function::new(
15416 "CONTAINS".to_string(),
15417 f.args,
15418 ))))
15419 }
15420 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15421 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15422 ))),
15423 _ => Ok(Expression::Function(f)),
15424 },
15425 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15426 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15427 DialectType::Hive
15428 | DialectType::Spark
15429 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15430 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15431 ))),
15432 _ => Ok(Expression::Function(f)),
15433 },
15434 // FROM_UNIXTIME(x) -> target-specific
15435 "FROM_UNIXTIME" if f.args.len() == 1 => {
15436 match target {
15437 DialectType::Hive
15438 | DialectType::Spark
15439 | DialectType::Databricks
15440 | DialectType::Presto
15441 | DialectType::Trino => Ok(Expression::Function(f)),
15442 DialectType::DuckDB => {
15443 // DuckDB: TO_TIMESTAMP(x)
15444 let arg = f.args.into_iter().next().unwrap();
15445 Ok(Expression::Function(Box::new(Function::new(
15446 "TO_TIMESTAMP".to_string(),
15447 vec![arg],
15448 ))))
15449 }
15450 DialectType::PostgreSQL => {
15451 // PG: TO_TIMESTAMP(col)
15452 let arg = f.args.into_iter().next().unwrap();
15453 Ok(Expression::Function(Box::new(Function::new(
15454 "TO_TIMESTAMP".to_string(),
15455 vec![arg],
15456 ))))
15457 }
15458 DialectType::Redshift => {
15459 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15460 let arg = f.args.into_iter().next().unwrap();
15461 let epoch_ts = Expression::Literal(Literal::Timestamp(
15462 "epoch".to_string(),
15463 ));
15464 let interval = Expression::Interval(Box::new(
15465 crate::expressions::Interval {
15466 this: Some(Expression::string("1 SECOND")),
15467 unit: None,
15468 },
15469 ));
15470 let mul =
15471 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15472 let add =
15473 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15474 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15475 this: add,
15476 trailing_comments: Vec::new(),
15477 })))
15478 }
15479 _ => Ok(Expression::Function(f)),
15480 }
15481 }
15482 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15483 "FROM_UNIXTIME"
15484 if f.args.len() == 2
15485 && matches!(
15486 source,
15487 DialectType::Hive
15488 | DialectType::Spark
15489 | DialectType::Databricks
15490 ) =>
15491 {
15492 let mut args = f.args;
15493 let unix_ts = args.remove(0);
15494 let fmt_expr = args.remove(0);
15495 match target {
15496 DialectType::DuckDB => {
15497 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15498 let to_ts = Expression::Function(Box::new(Function::new(
15499 "TO_TIMESTAMP".to_string(),
15500 vec![unix_ts],
15501 )));
15502 if let Expression::Literal(
15503 crate::expressions::Literal::String(s),
15504 ) = &fmt_expr
15505 {
15506 let c_fmt = Self::hive_format_to_c_format(s);
15507 Ok(Expression::Function(Box::new(Function::new(
15508 "STRFTIME".to_string(),
15509 vec![to_ts, Expression::string(&c_fmt)],
15510 ))))
15511 } else {
15512 Ok(Expression::Function(Box::new(Function::new(
15513 "STRFTIME".to_string(),
15514 vec![to_ts, fmt_expr],
15515 ))))
15516 }
15517 }
15518 DialectType::Presto
15519 | DialectType::Trino
15520 | DialectType::Athena => {
15521 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15522 let from_unix =
15523 Expression::Function(Box::new(Function::new(
15524 "FROM_UNIXTIME".to_string(),
15525 vec![unix_ts],
15526 )));
15527 if let Expression::Literal(
15528 crate::expressions::Literal::String(s),
15529 ) = &fmt_expr
15530 {
15531 let p_fmt = Self::hive_format_to_presto_format(s);
15532 Ok(Expression::Function(Box::new(Function::new(
15533 "DATE_FORMAT".to_string(),
15534 vec![from_unix, Expression::string(&p_fmt)],
15535 ))))
15536 } else {
15537 Ok(Expression::Function(Box::new(Function::new(
15538 "DATE_FORMAT".to_string(),
15539 vec![from_unix, fmt_expr],
15540 ))))
15541 }
15542 }
15543 _ => {
15544 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15545 Ok(Expression::Function(Box::new(Function::new(
15546 "FROM_UNIXTIME".to_string(),
15547 vec![unix_ts, fmt_expr],
15548 ))))
15549 }
15550 }
15551 }
15552 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15553 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15554 let unit_str = Self::get_unit_str_static(&f.args[0]);
15555 // Get the raw unit text preserving original case
15556 let raw_unit = match &f.args[0] {
15557 Expression::Identifier(id) => id.name.clone(),
15558 Expression::Literal(crate::expressions::Literal::String(s)) => {
15559 s.clone()
15560 }
15561 Expression::Column(col) => col.name.name.clone(),
15562 _ => unit_str.clone(),
15563 };
15564 match target {
15565 DialectType::TSQL | DialectType::Fabric => {
15566 // Preserve original case of unit for TSQL
15567 let unit_name = match unit_str.as_str() {
15568 "YY" | "YYYY" => "YEAR".to_string(),
15569 "QQ" | "Q" => "QUARTER".to_string(),
15570 "MM" | "M" => "MONTH".to_string(),
15571 "WK" | "WW" => "WEEK".to_string(),
15572 "DD" | "D" | "DY" => "DAY".to_string(),
15573 "HH" => "HOUR".to_string(),
15574 "MI" | "N" => "MINUTE".to_string(),
15575 "SS" | "S" => "SECOND".to_string(),
15576 _ => raw_unit.clone(), // preserve original case
15577 };
15578 let mut args = f.args;
15579 args[0] =
15580 Expression::Identifier(Identifier::new(&unit_name));
15581 Ok(Expression::Function(Box::new(Function::new(
15582 "DATEPART".to_string(),
15583 args,
15584 ))))
15585 }
15586 DialectType::Spark | DialectType::Databricks => {
15587 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15588 // Preserve original case for non-abbreviation units
15589 let unit = match unit_str.as_str() {
15590 "YY" | "YYYY" => "YEAR".to_string(),
15591 "QQ" | "Q" => "QUARTER".to_string(),
15592 "MM" | "M" => "MONTH".to_string(),
15593 "WK" | "WW" => "WEEK".to_string(),
15594 "DD" | "D" | "DY" => "DAY".to_string(),
15595 "HH" => "HOUR".to_string(),
15596 "MI" | "N" => "MINUTE".to_string(),
15597 "SS" | "S" => "SECOND".to_string(),
15598 _ => raw_unit, // preserve original case
15599 };
15600 Ok(Expression::Extract(Box::new(
15601 crate::expressions::ExtractFunc {
15602 this: f.args[1].clone(),
15603 field: crate::expressions::DateTimeField::Custom(
15604 unit,
15605 ),
15606 },
15607 )))
15608 }
15609 _ => Ok(Expression::Function(Box::new(Function::new(
15610 "DATE_PART".to_string(),
15611 f.args,
15612 )))),
15613 }
15614 }
15615 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15616 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15617 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15618 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15619 "DATENAME" if f.args.len() == 2 => {
15620 let unit_str = Self::get_unit_str_static(&f.args[0]);
15621 let date_expr = f.args[1].clone();
15622 match unit_str.as_str() {
15623 "MM" | "M" | "MONTH" => match target {
15624 DialectType::TSQL => {
15625 let cast_date = Expression::Cast(Box::new(
15626 crate::expressions::Cast {
15627 this: date_expr,
15628 to: DataType::Custom {
15629 name: "DATETIME2".to_string(),
15630 },
15631 trailing_comments: Vec::new(),
15632 double_colon_syntax: false,
15633 format: None,
15634 default: None,
15635 inferred_type: None,
15636 },
15637 ));
15638 Ok(Expression::Function(Box::new(Function::new(
15639 "FORMAT".to_string(),
15640 vec![cast_date, Expression::string("MMMM")],
15641 ))))
15642 }
15643 DialectType::Spark | DialectType::Databricks => {
15644 let cast_date = Expression::Cast(Box::new(
15645 crate::expressions::Cast {
15646 this: date_expr,
15647 to: DataType::Timestamp {
15648 timezone: false,
15649 precision: None,
15650 },
15651 trailing_comments: Vec::new(),
15652 double_colon_syntax: false,
15653 format: None,
15654 default: None,
15655 inferred_type: None,
15656 },
15657 ));
15658 Ok(Expression::Function(Box::new(Function::new(
15659 "DATE_FORMAT".to_string(),
15660 vec![cast_date, Expression::string("MMMM")],
15661 ))))
15662 }
15663 _ => Ok(Expression::Function(f)),
15664 },
15665 "DW" | "WEEKDAY" => match target {
15666 DialectType::TSQL => {
15667 let cast_date = Expression::Cast(Box::new(
15668 crate::expressions::Cast {
15669 this: date_expr,
15670 to: DataType::Custom {
15671 name: "DATETIME2".to_string(),
15672 },
15673 trailing_comments: Vec::new(),
15674 double_colon_syntax: false,
15675 format: None,
15676 default: None,
15677 inferred_type: None,
15678 },
15679 ));
15680 Ok(Expression::Function(Box::new(Function::new(
15681 "FORMAT".to_string(),
15682 vec![cast_date, Expression::string("dddd")],
15683 ))))
15684 }
15685 DialectType::Spark | DialectType::Databricks => {
15686 let cast_date = Expression::Cast(Box::new(
15687 crate::expressions::Cast {
15688 this: date_expr,
15689 to: DataType::Timestamp {
15690 timezone: false,
15691 precision: None,
15692 },
15693 trailing_comments: Vec::new(),
15694 double_colon_syntax: false,
15695 format: None,
15696 default: None,
15697 inferred_type: None,
15698 },
15699 ));
15700 Ok(Expression::Function(Box::new(Function::new(
15701 "DATE_FORMAT".to_string(),
15702 vec![cast_date, Expression::string("EEEE")],
15703 ))))
15704 }
15705 _ => Ok(Expression::Function(f)),
15706 },
15707 _ => Ok(Expression::Function(f)),
15708 }
15709 }
15710 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15711 "STRING_AGG" if f.args.len() >= 2 => {
15712 let x = f.args[0].clone();
15713 let sep = f.args[1].clone();
15714 match target {
15715 DialectType::MySQL
15716 | DialectType::SingleStore
15717 | DialectType::Doris
15718 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15719 Box::new(crate::expressions::GroupConcatFunc {
15720 this: x,
15721 separator: Some(sep),
15722 order_by: None,
15723 distinct: false,
15724 filter: None,
15725 inferred_type: None,
15726 }),
15727 )),
15728 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15729 crate::expressions::GroupConcatFunc {
15730 this: x,
15731 separator: Some(sep),
15732 order_by: None,
15733 distinct: false,
15734 filter: None,
15735 inferred_type: None,
15736 },
15737 ))),
15738 DialectType::PostgreSQL | DialectType::Redshift => {
15739 Ok(Expression::StringAgg(Box::new(
15740 crate::expressions::StringAggFunc {
15741 this: x,
15742 separator: Some(sep),
15743 order_by: None,
15744 distinct: false,
15745 filter: None,
15746 limit: None,
15747 inferred_type: None,
15748 },
15749 )))
15750 }
15751 _ => Ok(Expression::Function(f)),
15752 }
15753 }
15754 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15755 "JSON_ARRAYAGG" => match target {
15756 DialectType::PostgreSQL => {
15757 Ok(Expression::Function(Box::new(Function {
15758 name: "JSON_AGG".to_string(),
15759 ..(*f)
15760 })))
15761 }
15762 _ => Ok(Expression::Function(f)),
15763 },
15764 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15765 "SCHEMA_NAME" => match target {
15766 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15767 crate::expressions::CurrentSchema { this: None },
15768 ))),
15769 DialectType::SQLite => Ok(Expression::string("main")),
15770 _ => Ok(Expression::Function(f)),
15771 },
15772 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15773 "TO_TIMESTAMP"
15774 if f.args.len() == 2
15775 && matches!(
15776 source,
15777 DialectType::Spark
15778 | DialectType::Databricks
15779 | DialectType::Hive
15780 )
15781 && matches!(target, DialectType::DuckDB) =>
15782 {
15783 let mut args = f.args;
15784 let val = args.remove(0);
15785 let fmt_expr = args.remove(0);
15786 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15787 // Convert Java/Spark format to C strptime format
15788 fn java_to_c_fmt(fmt: &str) -> String {
15789 let result = fmt
15790 .replace("yyyy", "%Y")
15791 .replace("SSSSSS", "%f")
15792 .replace("EEEE", "%W")
15793 .replace("MM", "%m")
15794 .replace("dd", "%d")
15795 .replace("HH", "%H")
15796 .replace("mm", "%M")
15797 .replace("ss", "%S")
15798 .replace("yy", "%y");
15799 let mut out = String::new();
15800 let chars: Vec<char> = result.chars().collect();
15801 let mut i = 0;
15802 while i < chars.len() {
15803 if chars[i] == '%' && i + 1 < chars.len() {
15804 out.push(chars[i]);
15805 out.push(chars[i + 1]);
15806 i += 2;
15807 } else if chars[i] == 'z' {
15808 out.push_str("%Z");
15809 i += 1;
15810 } else if chars[i] == 'Z' {
15811 out.push_str("%z");
15812 i += 1;
15813 } else {
15814 out.push(chars[i]);
15815 i += 1;
15816 }
15817 }
15818 out
15819 }
15820 let c_fmt = java_to_c_fmt(s);
15821 Ok(Expression::Function(Box::new(Function::new(
15822 "STRPTIME".to_string(),
15823 vec![val, Expression::string(&c_fmt)],
15824 ))))
15825 } else {
15826 Ok(Expression::Function(Box::new(Function::new(
15827 "STRPTIME".to_string(),
15828 vec![val, fmt_expr],
15829 ))))
15830 }
15831 }
15832 // TO_DATE(x) 1-arg from Doris: date conversion
15833 "TO_DATE"
15834 if f.args.len() == 1
15835 && matches!(
15836 source,
15837 DialectType::Doris | DialectType::StarRocks
15838 ) =>
15839 {
15840 let arg = f.args.into_iter().next().unwrap();
15841 match target {
15842 DialectType::Oracle
15843 | DialectType::DuckDB
15844 | DialectType::TSQL => {
15845 // CAST(x AS DATE)
15846 Ok(Expression::Cast(Box::new(Cast {
15847 this: arg,
15848 to: DataType::Date,
15849 double_colon_syntax: false,
15850 trailing_comments: vec![],
15851 format: None,
15852 default: None,
15853 inferred_type: None,
15854 })))
15855 }
15856 DialectType::MySQL | DialectType::SingleStore => {
15857 // DATE(x)
15858 Ok(Expression::Function(Box::new(Function::new(
15859 "DATE".to_string(),
15860 vec![arg],
15861 ))))
15862 }
15863 _ => {
15864 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15865 Ok(Expression::Function(Box::new(Function::new(
15866 "TO_DATE".to_string(),
15867 vec![arg],
15868 ))))
15869 }
15870 }
15871 }
15872 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15873 "TO_DATE"
15874 if f.args.len() == 1
15875 && matches!(
15876 source,
15877 DialectType::Spark
15878 | DialectType::Databricks
15879 | DialectType::Hive
15880 ) =>
15881 {
15882 let arg = f.args.into_iter().next().unwrap();
15883 match target {
15884 DialectType::DuckDB => {
15885 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15886 Ok(Expression::TryCast(Box::new(Cast {
15887 this: arg,
15888 to: DataType::Date,
15889 double_colon_syntax: false,
15890 trailing_comments: vec![],
15891 format: None,
15892 default: None,
15893 inferred_type: None,
15894 })))
15895 }
15896 DialectType::Presto
15897 | DialectType::Trino
15898 | DialectType::Athena => {
15899 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15900 Ok(Self::double_cast_timestamp_date(arg))
15901 }
15902 DialectType::Snowflake => {
15903 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15904 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15905 Ok(Expression::Function(Box::new(Function::new(
15906 "TRY_TO_DATE".to_string(),
15907 vec![arg, Expression::string("yyyy-mm-DD")],
15908 ))))
15909 }
15910 _ => {
15911 // Default: keep as TO_DATE(x)
15912 Ok(Expression::Function(Box::new(Function::new(
15913 "TO_DATE".to_string(),
15914 vec![arg],
15915 ))))
15916 }
15917 }
15918 }
15919 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15920 "TO_DATE"
15921 if f.args.len() == 2
15922 && matches!(
15923 source,
15924 DialectType::Spark
15925 | DialectType::Databricks
15926 | DialectType::Hive
15927 ) =>
15928 {
15929 let mut args = f.args;
15930 let val = args.remove(0);
15931 let fmt_expr = args.remove(0);
15932 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15933
15934 if is_default_format {
15935 // Default format: same as 1-arg form
15936 match target {
15937 DialectType::DuckDB => {
15938 Ok(Expression::TryCast(Box::new(Cast {
15939 this: val,
15940 to: DataType::Date,
15941 double_colon_syntax: false,
15942 trailing_comments: vec![],
15943 format: None,
15944 default: None,
15945 inferred_type: None,
15946 })))
15947 }
15948 DialectType::Presto
15949 | DialectType::Trino
15950 | DialectType::Athena => {
15951 Ok(Self::double_cast_timestamp_date(val))
15952 }
15953 DialectType::Snowflake => {
15954 // TRY_TO_DATE(x, format) with Snowflake format mapping
15955 let sf_fmt = "yyyy-MM-dd"
15956 .replace("yyyy", "yyyy")
15957 .replace("MM", "mm")
15958 .replace("dd", "DD");
15959 Ok(Expression::Function(Box::new(Function::new(
15960 "TRY_TO_DATE".to_string(),
15961 vec![val, Expression::string(&sf_fmt)],
15962 ))))
15963 }
15964 _ => Ok(Expression::Function(Box::new(Function::new(
15965 "TO_DATE".to_string(),
15966 vec![val],
15967 )))),
15968 }
15969 } else {
15970 // Non-default format: use format-based parsing
15971 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15972 match target {
15973 DialectType::DuckDB => {
15974 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15975 fn java_to_c_fmt_todate(fmt: &str) -> String {
15976 let result = fmt
15977 .replace("yyyy", "%Y")
15978 .replace("SSSSSS", "%f")
15979 .replace("EEEE", "%W")
15980 .replace("MM", "%m")
15981 .replace("dd", "%d")
15982 .replace("HH", "%H")
15983 .replace("mm", "%M")
15984 .replace("ss", "%S")
15985 .replace("yy", "%y");
15986 let mut out = String::new();
15987 let chars: Vec<char> = result.chars().collect();
15988 let mut i = 0;
15989 while i < chars.len() {
15990 if chars[i] == '%' && i + 1 < chars.len() {
15991 out.push(chars[i]);
15992 out.push(chars[i + 1]);
15993 i += 2;
15994 } else if chars[i] == 'z' {
15995 out.push_str("%Z");
15996 i += 1;
15997 } else if chars[i] == 'Z' {
15998 out.push_str("%z");
15999 i += 1;
16000 } else {
16001 out.push(chars[i]);
16002 i += 1;
16003 }
16004 }
16005 out
16006 }
16007 let c_fmt = java_to_c_fmt_todate(s);
16008 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
16009 let try_strptime =
16010 Expression::Function(Box::new(Function::new(
16011 "TRY_STRPTIME".to_string(),
16012 vec![val, Expression::string(&c_fmt)],
16013 )));
16014 let cast_ts = Expression::Cast(Box::new(Cast {
16015 this: try_strptime,
16016 to: DataType::Timestamp {
16017 precision: None,
16018 timezone: false,
16019 },
16020 double_colon_syntax: false,
16021 trailing_comments: vec![],
16022 format: None,
16023 default: None,
16024 inferred_type: None,
16025 }));
16026 Ok(Expression::Cast(Box::new(Cast {
16027 this: cast_ts,
16028 to: DataType::Date,
16029 double_colon_syntax: false,
16030 trailing_comments: vec![],
16031 format: None,
16032 default: None,
16033 inferred_type: None,
16034 })))
16035 }
16036 DialectType::Presto
16037 | DialectType::Trino
16038 | DialectType::Athena => {
16039 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
16040 let p_fmt = s
16041 .replace("yyyy", "%Y")
16042 .replace("SSSSSS", "%f")
16043 .replace("MM", "%m")
16044 .replace("dd", "%d")
16045 .replace("HH", "%H")
16046 .replace("mm", "%M")
16047 .replace("ss", "%S")
16048 .replace("yy", "%y");
16049 let date_parse =
16050 Expression::Function(Box::new(Function::new(
16051 "DATE_PARSE".to_string(),
16052 vec![val, Expression::string(&p_fmt)],
16053 )));
16054 Ok(Expression::Cast(Box::new(Cast {
16055 this: date_parse,
16056 to: DataType::Date,
16057 double_colon_syntax: false,
16058 trailing_comments: vec![],
16059 format: None,
16060 default: None,
16061 inferred_type: None,
16062 })))
16063 }
16064 DialectType::Snowflake => {
16065 // TRY_TO_DATE(x, snowflake_fmt)
16066 Ok(Expression::Function(Box::new(Function::new(
16067 "TRY_TO_DATE".to_string(),
16068 vec![val, Expression::string(s)],
16069 ))))
16070 }
16071 _ => Ok(Expression::Function(Box::new(Function::new(
16072 "TO_DATE".to_string(),
16073 vec![val, fmt_expr],
16074 )))),
16075 }
16076 } else {
16077 Ok(Expression::Function(Box::new(Function::new(
16078 "TO_DATE".to_string(),
16079 vec![val, fmt_expr],
16080 ))))
16081 }
16082 }
16083 }
16084 // TO_TIMESTAMP(x) 1-arg: epoch conversion
16085 "TO_TIMESTAMP"
16086 if f.args.len() == 1
16087 && matches!(source, DialectType::DuckDB)
16088 && matches!(
16089 target,
16090 DialectType::BigQuery
16091 | DialectType::Presto
16092 | DialectType::Trino
16093 | DialectType::Hive
16094 | DialectType::Spark
16095 | DialectType::Databricks
16096 | DialectType::Athena
16097 ) =>
16098 {
16099 let arg = f.args.into_iter().next().unwrap();
16100 let func_name = match target {
16101 DialectType::BigQuery => "TIMESTAMP_SECONDS",
16102 DialectType::Presto
16103 | DialectType::Trino
16104 | DialectType::Athena
16105 | DialectType::Hive
16106 | DialectType::Spark
16107 | DialectType::Databricks => "FROM_UNIXTIME",
16108 _ => "TO_TIMESTAMP",
16109 };
16110 Ok(Expression::Function(Box::new(Function::new(
16111 func_name.to_string(),
16112 vec![arg],
16113 ))))
16114 }
16115 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
16116 "CONCAT" if f.args.len() == 1 => {
16117 let arg = f.args.into_iter().next().unwrap();
16118 match target {
16119 DialectType::Presto
16120 | DialectType::Trino
16121 | DialectType::Athena => {
16122 // CONCAT(a) -> CAST(a AS VARCHAR)
16123 Ok(Expression::Cast(Box::new(Cast {
16124 this: arg,
16125 to: DataType::VarChar {
16126 length: None,
16127 parenthesized_length: false,
16128 },
16129 trailing_comments: vec![],
16130 double_colon_syntax: false,
16131 format: None,
16132 default: None,
16133 inferred_type: None,
16134 })))
16135 }
16136 DialectType::TSQL => {
16137 // CONCAT(a) -> a
16138 Ok(arg)
16139 }
16140 DialectType::DuckDB => {
16141 // Keep CONCAT(a) for DuckDB (native support)
16142 Ok(Expression::Function(Box::new(Function::new(
16143 "CONCAT".to_string(),
16144 vec![arg],
16145 ))))
16146 }
16147 DialectType::Spark | DialectType::Databricks => {
16148 let coalesced = Expression::Coalesce(Box::new(
16149 crate::expressions::VarArgFunc {
16150 expressions: vec![arg, Expression::string("")],
16151 original_name: None,
16152 inferred_type: None,
16153 },
16154 ));
16155 Ok(Expression::Function(Box::new(Function::new(
16156 "CONCAT".to_string(),
16157 vec![coalesced],
16158 ))))
16159 }
16160 _ => Ok(Expression::Function(Box::new(Function::new(
16161 "CONCAT".to_string(),
16162 vec![arg],
16163 )))),
16164 }
16165 }
16166 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
16167 "REGEXP_EXTRACT"
16168 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
16169 {
16170 // If group_index is 0, drop it
16171 let drop_group = match &f.args[2] {
16172 Expression::Literal(Literal::Number(n)) => n == "0",
16173 _ => false,
16174 };
16175 if drop_group {
16176 let mut args = f.args;
16177 args.truncate(2);
16178 Ok(Expression::Function(Box::new(Function::new(
16179 "REGEXP_EXTRACT".to_string(),
16180 args,
16181 ))))
16182 } else {
16183 Ok(Expression::Function(f))
16184 }
16185 }
16186 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
16187 "REGEXP_EXTRACT"
16188 if f.args.len() == 4
16189 && matches!(target, DialectType::Snowflake) =>
16190 {
16191 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
16192 let mut args = f.args;
16193 let this = args.remove(0);
16194 let pattern = args.remove(0);
16195 let group = args.remove(0);
16196 let flags = args.remove(0);
16197 Ok(Expression::Function(Box::new(Function::new(
16198 "REGEXP_SUBSTR".to_string(),
16199 vec![
16200 this,
16201 pattern,
16202 Expression::number(1),
16203 Expression::number(1),
16204 flags,
16205 group,
16206 ],
16207 ))))
16208 }
16209 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
16210 "REGEXP_SUBSTR"
16211 if f.args.len() == 3
16212 && matches!(
16213 target,
16214 DialectType::DuckDB
16215 | DialectType::Presto
16216 | DialectType::Trino
16217 | DialectType::Spark
16218 | DialectType::Databricks
16219 ) =>
16220 {
16221 let mut args = f.args;
16222 let this = args.remove(0);
16223 let pattern = args.remove(0);
16224 let position = args.remove(0);
16225 // Wrap subject in SUBSTRING(this, position) to apply the offset
16226 let substring_expr = Expression::Function(Box::new(Function::new(
16227 "SUBSTRING".to_string(),
16228 vec![this, position],
16229 )));
16230 let target_name = match target {
16231 DialectType::DuckDB => "REGEXP_EXTRACT",
16232 _ => "REGEXP_EXTRACT",
16233 };
16234 Ok(Expression::Function(Box::new(Function::new(
16235 target_name.to_string(),
16236 vec![substring_expr, pattern],
16237 ))))
16238 }
16239 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
16240 "TO_DAYS" if f.args.len() == 1 => {
16241 let x = f.args.into_iter().next().unwrap();
16242 let epoch = Expression::string("0000-01-01");
16243 // Build the final target-specific expression directly
16244 let datediff_expr = match target {
16245 DialectType::MySQL | DialectType::SingleStore => {
16246 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
16247 Expression::Function(Box::new(Function::new(
16248 "DATEDIFF".to_string(),
16249 vec![x, epoch],
16250 )))
16251 }
16252 DialectType::DuckDB => {
16253 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
16254 let cast_epoch = Expression::Cast(Box::new(Cast {
16255 this: epoch,
16256 to: DataType::Date,
16257 trailing_comments: Vec::new(),
16258 double_colon_syntax: false,
16259 format: None,
16260 default: None,
16261 inferred_type: None,
16262 }));
16263 let cast_x = Expression::Cast(Box::new(Cast {
16264 this: x,
16265 to: DataType::Date,
16266 trailing_comments: Vec::new(),
16267 double_colon_syntax: false,
16268 format: None,
16269 default: None,
16270 inferred_type: None,
16271 }));
16272 Expression::Function(Box::new(Function::new(
16273 "DATE_DIFF".to_string(),
16274 vec![Expression::string("DAY"), cast_epoch, cast_x],
16275 )))
16276 }
16277 DialectType::Presto
16278 | DialectType::Trino
16279 | DialectType::Athena => {
16280 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
16281 let cast_epoch = Self::double_cast_timestamp_date(epoch);
16282 let cast_x = Self::double_cast_timestamp_date(x);
16283 Expression::Function(Box::new(Function::new(
16284 "DATE_DIFF".to_string(),
16285 vec![Expression::string("DAY"), cast_epoch, cast_x],
16286 )))
16287 }
16288 _ => {
16289 // Default: (DATEDIFF(x, '0000-01-01') + 1)
16290 Expression::Function(Box::new(Function::new(
16291 "DATEDIFF".to_string(),
16292 vec![x, epoch],
16293 )))
16294 }
16295 };
16296 let add_one = Expression::Add(Box::new(BinaryOp::new(
16297 datediff_expr,
16298 Expression::number(1),
16299 )));
16300 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16301 this: add_one,
16302 trailing_comments: Vec::new(),
16303 })))
16304 }
16305 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16306 "STR_TO_DATE"
16307 if f.args.len() == 2
16308 && matches!(
16309 target,
16310 DialectType::Presto | DialectType::Trino
16311 ) =>
16312 {
16313 let mut args = f.args;
16314 let x = args.remove(0);
16315 let format_expr = args.remove(0);
16316 // Check if the format contains time components
16317 let has_time =
16318 if let Expression::Literal(Literal::String(ref fmt)) =
16319 format_expr
16320 {
16321 fmt.contains("%H")
16322 || fmt.contains("%T")
16323 || fmt.contains("%M")
16324 || fmt.contains("%S")
16325 || fmt.contains("%I")
16326 || fmt.contains("%p")
16327 } else {
16328 false
16329 };
16330 let date_parse = Expression::Function(Box::new(Function::new(
16331 "DATE_PARSE".to_string(),
16332 vec![x, format_expr],
16333 )));
16334 if has_time {
16335 // Has time components: just DATE_PARSE
16336 Ok(date_parse)
16337 } else {
16338 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16339 Ok(Expression::Cast(Box::new(Cast {
16340 this: date_parse,
16341 to: DataType::Date,
16342 trailing_comments: Vec::new(),
16343 double_colon_syntax: false,
16344 format: None,
16345 default: None,
16346 inferred_type: None,
16347 })))
16348 }
16349 }
16350 "STR_TO_DATE"
16351 if f.args.len() == 2
16352 && matches!(
16353 target,
16354 DialectType::PostgreSQL | DialectType::Redshift
16355 ) =>
16356 {
16357 let mut args = f.args;
16358 let x = args.remove(0);
16359 let fmt = args.remove(0);
16360 let pg_fmt = match fmt {
16361 Expression::Literal(Literal::String(s)) => Expression::string(
16362 &s.replace("%Y", "YYYY")
16363 .replace("%m", "MM")
16364 .replace("%d", "DD")
16365 .replace("%H", "HH24")
16366 .replace("%M", "MI")
16367 .replace("%S", "SS"),
16368 ),
16369 other => other,
16370 };
16371 let to_date = Expression::Function(Box::new(Function::new(
16372 "TO_DATE".to_string(),
16373 vec![x, pg_fmt],
16374 )));
16375 Ok(Expression::Cast(Box::new(Cast {
16376 this: to_date,
16377 to: DataType::Timestamp {
16378 timezone: false,
16379 precision: None,
16380 },
16381 trailing_comments: Vec::new(),
16382 double_colon_syntax: false,
16383 format: None,
16384 default: None,
16385 inferred_type: None,
16386 })))
16387 }
16388 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16389 "RANGE"
16390 if (f.args.len() == 1 || f.args.len() == 2)
16391 && matches!(target, DialectType::SQLite) =>
16392 {
16393 if f.args.len() == 2 {
16394 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16395 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16396 let mut args = f.args;
16397 let start = args.remove(0);
16398 let end = args.remove(0);
16399 Ok(Expression::Function(Box::new(Function::new(
16400 "GENERATE_SERIES".to_string(),
16401 vec![start, end],
16402 ))))
16403 } else {
16404 Ok(Expression::Function(f))
16405 }
16406 }
16407 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16408 // When source is Snowflake, keep as-is (args already in correct form)
16409 "UNIFORM"
16410 if matches!(target, DialectType::Snowflake)
16411 && (f.args.len() == 2 || f.args.len() == 3) =>
16412 {
16413 if matches!(source, DialectType::Snowflake) {
16414 // Snowflake -> Snowflake: keep as-is
16415 Ok(Expression::Function(f))
16416 } else {
16417 let mut args = f.args;
16418 let low = args.remove(0);
16419 let high = args.remove(0);
16420 let random = if !args.is_empty() {
16421 let seed = args.remove(0);
16422 Expression::Function(Box::new(Function::new(
16423 "RANDOM".to_string(),
16424 vec![seed],
16425 )))
16426 } else {
16427 Expression::Function(Box::new(Function::new(
16428 "RANDOM".to_string(),
16429 vec![],
16430 )))
16431 };
16432 Ok(Expression::Function(Box::new(Function::new(
16433 "UNIFORM".to_string(),
16434 vec![low, high, random],
16435 ))))
16436 }
16437 }
16438 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16439 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16440 let mut args = f.args;
16441 let ts_arg = args.remove(0);
16442 let tz_arg = args.remove(0);
16443 // Cast string literal to TIMESTAMP for all targets
16444 let ts_cast =
16445 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16446 Expression::Cast(Box::new(Cast {
16447 this: ts_arg,
16448 to: DataType::Timestamp {
16449 timezone: false,
16450 precision: None,
16451 },
16452 trailing_comments: vec![],
16453 double_colon_syntax: false,
16454 format: None,
16455 default: None,
16456 inferred_type: None,
16457 }))
16458 } else {
16459 ts_arg
16460 };
16461 match target {
16462 DialectType::Spark | DialectType::Databricks => {
16463 Ok(Expression::Function(Box::new(Function::new(
16464 "TO_UTC_TIMESTAMP".to_string(),
16465 vec![ts_cast, tz_arg],
16466 ))))
16467 }
16468 DialectType::Snowflake => {
16469 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16470 Ok(Expression::Function(Box::new(Function::new(
16471 "CONVERT_TIMEZONE".to_string(),
16472 vec![tz_arg, Expression::string("UTC"), ts_cast],
16473 ))))
16474 }
16475 DialectType::Presto
16476 | DialectType::Trino
16477 | DialectType::Athena => {
16478 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16479 let wtz = Expression::Function(Box::new(Function::new(
16480 "WITH_TIMEZONE".to_string(),
16481 vec![ts_cast, tz_arg],
16482 )));
16483 Ok(Expression::AtTimeZone(Box::new(
16484 crate::expressions::AtTimeZone {
16485 this: wtz,
16486 zone: Expression::string("UTC"),
16487 },
16488 )))
16489 }
16490 DialectType::BigQuery => {
16491 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16492 let cast_dt = Expression::Cast(Box::new(Cast {
16493 this: if let Expression::Cast(c) = ts_cast {
16494 c.this
16495 } else {
16496 ts_cast.clone()
16497 },
16498 to: DataType::Custom {
16499 name: "DATETIME".to_string(),
16500 },
16501 trailing_comments: vec![],
16502 double_colon_syntax: false,
16503 format: None,
16504 default: None,
16505 inferred_type: None,
16506 }));
16507 let ts_func =
16508 Expression::Function(Box::new(Function::new(
16509 "TIMESTAMP".to_string(),
16510 vec![cast_dt, tz_arg],
16511 )));
16512 Ok(Expression::Function(Box::new(Function::new(
16513 "DATETIME".to_string(),
16514 vec![ts_func, Expression::string("UTC")],
16515 ))))
16516 }
16517 _ => {
16518 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16519 let atz1 = Expression::AtTimeZone(Box::new(
16520 crate::expressions::AtTimeZone {
16521 this: ts_cast,
16522 zone: tz_arg,
16523 },
16524 ));
16525 Ok(Expression::AtTimeZone(Box::new(
16526 crate::expressions::AtTimeZone {
16527 this: atz1,
16528 zone: Expression::string("UTC"),
16529 },
16530 )))
16531 }
16532 }
16533 }
16534 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16535 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16536 let mut args = f.args;
16537 let ts_arg = args.remove(0);
16538 let tz_arg = args.remove(0);
16539 // Cast string literal to TIMESTAMP
16540 let ts_cast =
16541 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16542 Expression::Cast(Box::new(Cast {
16543 this: ts_arg,
16544 to: DataType::Timestamp {
16545 timezone: false,
16546 precision: None,
16547 },
16548 trailing_comments: vec![],
16549 double_colon_syntax: false,
16550 format: None,
16551 default: None,
16552 inferred_type: None,
16553 }))
16554 } else {
16555 ts_arg
16556 };
16557 match target {
16558 DialectType::Spark | DialectType::Databricks => {
16559 Ok(Expression::Function(Box::new(Function::new(
16560 "FROM_UTC_TIMESTAMP".to_string(),
16561 vec![ts_cast, tz_arg],
16562 ))))
16563 }
16564 DialectType::Presto
16565 | DialectType::Trino
16566 | DialectType::Athena => {
16567 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16568 Ok(Expression::Function(Box::new(Function::new(
16569 "AT_TIMEZONE".to_string(),
16570 vec![ts_cast, tz_arg],
16571 ))))
16572 }
16573 DialectType::Snowflake => {
16574 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16575 Ok(Expression::Function(Box::new(Function::new(
16576 "CONVERT_TIMEZONE".to_string(),
16577 vec![Expression::string("UTC"), tz_arg, ts_cast],
16578 ))))
16579 }
16580 _ => {
16581 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16582 Ok(Expression::AtTimeZone(Box::new(
16583 crate::expressions::AtTimeZone {
16584 this: ts_cast,
16585 zone: tz_arg,
16586 },
16587 )))
16588 }
16589 }
16590 }
16591 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16592 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16593 let name = match target {
16594 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16595 _ => "MAP",
16596 };
16597 Ok(Expression::Function(Box::new(Function::new(
16598 name.to_string(),
16599 f.args,
16600 ))))
16601 }
16602 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16603 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16604 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16605 Ok(Expression::Function(Box::new(Function::new(
16606 "SPLIT_TO_MAP".to_string(),
16607 f.args,
16608 ))))
16609 }
16610 _ => Ok(Expression::Function(f)),
16611 },
16612 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16613 "TIME_TO_STR" if f.args.len() == 2 => {
16614 let mut args = f.args;
16615 let this = args.remove(0);
16616 let fmt_expr = args.remove(0);
16617 let format =
16618 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16619 s
16620 } else {
16621 "%Y-%m-%d %H:%M:%S".to_string()
16622 };
16623 Ok(Expression::TimeToStr(Box::new(
16624 crate::expressions::TimeToStr {
16625 this: Box::new(this),
16626 format,
16627 culture: None,
16628 zone: None,
16629 },
16630 )))
16631 }
16632 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16633 "STR_TO_TIME" if f.args.len() == 2 => {
16634 let mut args = f.args;
16635 let this = args.remove(0);
16636 let fmt_expr = args.remove(0);
16637 let format =
16638 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16639 s
16640 } else {
16641 "%Y-%m-%d %H:%M:%S".to_string()
16642 };
16643 Ok(Expression::StrToTime(Box::new(
16644 crate::expressions::StrToTime {
16645 this: Box::new(this),
16646 format,
16647 zone: None,
16648 safe: None,
16649 target_type: None,
16650 },
16651 )))
16652 }
16653 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16654 "STR_TO_UNIX" if f.args.len() >= 1 => {
16655 let mut args = f.args;
16656 let this = args.remove(0);
16657 let format = if !args.is_empty() {
16658 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16659 {
16660 Some(s)
16661 } else {
16662 None
16663 }
16664 } else {
16665 None
16666 };
16667 Ok(Expression::StrToUnix(Box::new(
16668 crate::expressions::StrToUnix {
16669 this: Some(Box::new(this)),
16670 format,
16671 },
16672 )))
16673 }
16674 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16675 "TIME_TO_UNIX" if f.args.len() == 1 => {
16676 let mut args = f.args;
16677 let this = args.remove(0);
16678 Ok(Expression::TimeToUnix(Box::new(
16679 crate::expressions::UnaryFunc {
16680 this,
16681 original_name: None,
16682 inferred_type: None,
16683 },
16684 )))
16685 }
16686 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16687 "UNIX_TO_STR" if f.args.len() >= 1 => {
16688 let mut args = f.args;
16689 let this = args.remove(0);
16690 let format = if !args.is_empty() {
16691 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16692 {
16693 Some(s)
16694 } else {
16695 None
16696 }
16697 } else {
16698 None
16699 };
16700 Ok(Expression::UnixToStr(Box::new(
16701 crate::expressions::UnixToStr {
16702 this: Box::new(this),
16703 format,
16704 },
16705 )))
16706 }
16707 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16708 "UNIX_TO_TIME" if f.args.len() == 1 => {
16709 let mut args = f.args;
16710 let this = args.remove(0);
16711 Ok(Expression::UnixToTime(Box::new(
16712 crate::expressions::UnixToTime {
16713 this: Box::new(this),
16714 scale: None,
16715 zone: None,
16716 hours: None,
16717 minutes: None,
16718 format: None,
16719 target_type: None,
16720 },
16721 )))
16722 }
16723 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16724 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16725 let mut args = f.args;
16726 let this = args.remove(0);
16727 Ok(Expression::TimeStrToDate(Box::new(
16728 crate::expressions::UnaryFunc {
16729 this,
16730 original_name: None,
16731 inferred_type: None,
16732 },
16733 )))
16734 }
16735 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16736 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16737 let mut args = f.args;
16738 let this = args.remove(0);
16739 Ok(Expression::TimeStrToTime(Box::new(
16740 crate::expressions::TimeStrToTime {
16741 this: Box::new(this),
16742 zone: None,
16743 },
16744 )))
16745 }
16746 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16747 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16748 match target {
16749 DialectType::DuckDB => {
16750 let mut args = f.args;
16751 let end_date = args.remove(0);
16752 let start_date = args.remove(0);
16753 let cast_end = Self::ensure_cast_date(end_date);
16754 let cast_start = Self::ensure_cast_date(start_date);
16755 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16756 let dd = Expression::Function(Box::new(Function::new(
16757 "DATE_DIFF".to_string(),
16758 vec![
16759 Expression::string("MONTH"),
16760 cast_start.clone(),
16761 cast_end.clone(),
16762 ],
16763 )));
16764 let day_end =
16765 Expression::Function(Box::new(Function::new(
16766 "DAY".to_string(),
16767 vec![cast_end.clone()],
16768 )));
16769 let day_start =
16770 Expression::Function(Box::new(Function::new(
16771 "DAY".to_string(),
16772 vec![cast_start.clone()],
16773 )));
16774 let last_day_end =
16775 Expression::Function(Box::new(Function::new(
16776 "LAST_DAY".to_string(),
16777 vec![cast_end.clone()],
16778 )));
16779 let last_day_start =
16780 Expression::Function(Box::new(Function::new(
16781 "LAST_DAY".to_string(),
16782 vec![cast_start.clone()],
16783 )));
16784 let day_last_end = Expression::Function(Box::new(
16785 Function::new("DAY".to_string(), vec![last_day_end]),
16786 ));
16787 let day_last_start = Expression::Function(Box::new(
16788 Function::new("DAY".to_string(), vec![last_day_start]),
16789 ));
16790 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16791 day_end.clone(),
16792 day_last_end,
16793 )));
16794 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16795 day_start.clone(),
16796 day_last_start,
16797 )));
16798 let both_cond =
16799 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16800 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16801 day_end, day_start,
16802 )));
16803 let day_diff_paren = Expression::Paren(Box::new(
16804 crate::expressions::Paren {
16805 this: day_diff,
16806 trailing_comments: Vec::new(),
16807 },
16808 ));
16809 let frac = Expression::Div(Box::new(BinaryOp::new(
16810 day_diff_paren,
16811 Expression::Literal(Literal::Number(
16812 "31.0".to_string(),
16813 )),
16814 )));
16815 let case_expr = Expression::Case(Box::new(Case {
16816 operand: None,
16817 whens: vec![(both_cond, Expression::number(0))],
16818 else_: Some(frac),
16819 comments: Vec::new(),
16820 inferred_type: None,
16821 }));
16822 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16823 }
16824 DialectType::Snowflake | DialectType::Redshift => {
16825 let mut args = f.args;
16826 let end_date = args.remove(0);
16827 let start_date = args.remove(0);
16828 let unit = Expression::Identifier(Identifier::new("MONTH"));
16829 Ok(Expression::Function(Box::new(Function::new(
16830 "DATEDIFF".to_string(),
16831 vec![unit, start_date, end_date],
16832 ))))
16833 }
16834 DialectType::Presto
16835 | DialectType::Trino
16836 | DialectType::Athena => {
16837 let mut args = f.args;
16838 let end_date = args.remove(0);
16839 let start_date = args.remove(0);
16840 Ok(Expression::Function(Box::new(Function::new(
16841 "DATE_DIFF".to_string(),
16842 vec![Expression::string("MONTH"), start_date, end_date],
16843 ))))
16844 }
16845 _ => Ok(Expression::Function(f)),
16846 }
16847 }
16848 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16849 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16850 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16851 match target {
16852 DialectType::Spark | DialectType::Databricks => {
16853 Ok(Expression::Function(f))
16854 }
16855 _ => {
16856 // Drop the 3rd arg and delegate to the 2-arg logic
16857 let mut args = f.args;
16858 let end_date = args.remove(0);
16859 let start_date = args.remove(0);
16860 // Re-create as 2-arg and process
16861 let f2 = Function::new(
16862 "MONTHS_BETWEEN".to_string(),
16863 vec![end_date, start_date],
16864 );
16865 let e2 = Expression::Function(Box::new(f2));
16866 Self::cross_dialect_normalize(e2, source, target)
16867 }
16868 }
16869 }
16870 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16871 "TO_TIMESTAMP"
16872 if f.args.len() == 1
16873 && matches!(
16874 source,
16875 DialectType::Spark
16876 | DialectType::Databricks
16877 | DialectType::Hive
16878 ) =>
16879 {
16880 let arg = f.args.into_iter().next().unwrap();
16881 Ok(Expression::Cast(Box::new(Cast {
16882 this: arg,
16883 to: DataType::Timestamp {
16884 timezone: false,
16885 precision: None,
16886 },
16887 trailing_comments: vec![],
16888 double_colon_syntax: false,
16889 format: None,
16890 default: None,
16891 inferred_type: None,
16892 })))
16893 }
16894 // STRING(x) -> CAST(x AS STRING) for Spark target
16895 "STRING"
16896 if f.args.len() == 1
16897 && matches!(
16898 source,
16899 DialectType::Spark | DialectType::Databricks
16900 ) =>
16901 {
16902 let arg = f.args.into_iter().next().unwrap();
16903 let dt = match target {
16904 DialectType::Spark
16905 | DialectType::Databricks
16906 | DialectType::Hive => DataType::Custom {
16907 name: "STRING".to_string(),
16908 },
16909 _ => DataType::Text,
16910 };
16911 Ok(Expression::Cast(Box::new(Cast {
16912 this: arg,
16913 to: dt,
16914 trailing_comments: vec![],
16915 double_colon_syntax: false,
16916 format: None,
16917 default: None,
16918 inferred_type: None,
16919 })))
16920 }
16921 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16922 "LOGICAL_OR" if f.args.len() == 1 => {
16923 let name = match target {
16924 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16925 _ => "LOGICAL_OR",
16926 };
16927 Ok(Expression::Function(Box::new(Function::new(
16928 name.to_string(),
16929 f.args,
16930 ))))
16931 }
16932 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16933 "SPLIT"
16934 if f.args.len() == 2
16935 && matches!(
16936 source,
16937 DialectType::Spark
16938 | DialectType::Databricks
16939 | DialectType::Hive
16940 ) =>
16941 {
16942 let name = match target {
16943 DialectType::DuckDB => "STR_SPLIT_REGEX",
16944 DialectType::Presto
16945 | DialectType::Trino
16946 | DialectType::Athena => "REGEXP_SPLIT",
16947 DialectType::Spark
16948 | DialectType::Databricks
16949 | DialectType::Hive => "SPLIT",
16950 _ => "SPLIT",
16951 };
16952 Ok(Expression::Function(Box::new(Function::new(
16953 name.to_string(),
16954 f.args,
16955 ))))
16956 }
16957 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16958 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16959 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16960 Ok(Expression::Function(Box::new(Function::new(
16961 "ELEMENT_AT".to_string(),
16962 f.args,
16963 ))))
16964 }
16965 DialectType::DuckDB => {
16966 let mut args = f.args;
16967 let arr = args.remove(0);
16968 let idx = args.remove(0);
16969 Ok(Expression::Subscript(Box::new(
16970 crate::expressions::Subscript {
16971 this: arr,
16972 index: idx,
16973 },
16974 )))
16975 }
16976 _ => Ok(Expression::Function(f)),
16977 },
16978 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16979 "ARRAY_FILTER" if f.args.len() == 2 => {
16980 let name = match target {
16981 DialectType::DuckDB => "LIST_FILTER",
16982 DialectType::StarRocks => "ARRAY_FILTER",
16983 _ => "FILTER",
16984 };
16985 Ok(Expression::Function(Box::new(Function::new(
16986 name.to_string(),
16987 f.args,
16988 ))))
16989 }
16990 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16991 "FILTER" if f.args.len() == 2 => {
16992 let name = match target {
16993 DialectType::DuckDB => "LIST_FILTER",
16994 DialectType::StarRocks => "ARRAY_FILTER",
16995 _ => "FILTER",
16996 };
16997 Ok(Expression::Function(Box::new(Function::new(
16998 name.to_string(),
16999 f.args,
17000 ))))
17001 }
17002 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
17003 "REDUCE" if f.args.len() >= 3 => {
17004 let name = match target {
17005 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
17006 _ => "REDUCE",
17007 };
17008 Ok(Expression::Function(Box::new(Function::new(
17009 name.to_string(),
17010 f.args,
17011 ))))
17012 }
17013 // CURRENT_SCHEMA() -> dialect-specific
17014 "CURRENT_SCHEMA" => {
17015 match target {
17016 DialectType::PostgreSQL => {
17017 // PostgreSQL: CURRENT_SCHEMA (no parens)
17018 Ok(Expression::Function(Box::new(Function {
17019 name: "CURRENT_SCHEMA".to_string(),
17020 args: vec![],
17021 distinct: false,
17022 trailing_comments: vec![],
17023 use_bracket_syntax: false,
17024 no_parens: true,
17025 quoted: false,
17026 span: None,
17027 inferred_type: None,
17028 })))
17029 }
17030 DialectType::MySQL
17031 | DialectType::Doris
17032 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
17033 Function::new("SCHEMA".to_string(), vec![]),
17034 ))),
17035 DialectType::TSQL => Ok(Expression::Function(Box::new(
17036 Function::new("SCHEMA_NAME".to_string(), vec![]),
17037 ))),
17038 DialectType::SQLite => {
17039 Ok(Expression::Literal(Literal::String("main".to_string())))
17040 }
17041 _ => Ok(Expression::Function(f)),
17042 }
17043 }
17044 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
17045 "LTRIM" if f.args.len() == 2 => match target {
17046 DialectType::Spark
17047 | DialectType::Hive
17048 | DialectType::Databricks
17049 | DialectType::ClickHouse => {
17050 let mut args = f.args;
17051 let str_expr = args.remove(0);
17052 let chars = args.remove(0);
17053 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
17054 this: str_expr,
17055 characters: Some(chars),
17056 position: crate::expressions::TrimPosition::Leading,
17057 sql_standard_syntax: true,
17058 position_explicit: true,
17059 })))
17060 }
17061 _ => Ok(Expression::Function(f)),
17062 },
17063 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
17064 "RTRIM" if f.args.len() == 2 => match target {
17065 DialectType::Spark
17066 | DialectType::Hive
17067 | DialectType::Databricks
17068 | DialectType::ClickHouse => {
17069 let mut args = f.args;
17070 let str_expr = args.remove(0);
17071 let chars = args.remove(0);
17072 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
17073 this: str_expr,
17074 characters: Some(chars),
17075 position: crate::expressions::TrimPosition::Trailing,
17076 sql_standard_syntax: true,
17077 position_explicit: true,
17078 })))
17079 }
17080 _ => Ok(Expression::Function(f)),
17081 },
17082 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17083 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
17084 DialectType::ClickHouse => {
17085 let mut new_f = *f;
17086 new_f.name = "arrayReverse".to_string();
17087 Ok(Expression::Function(Box::new(new_f)))
17088 }
17089 _ => Ok(Expression::Function(f)),
17090 },
17091 // UUID() -> NEWID() for TSQL
17092 "UUID" if f.args.is_empty() => match target {
17093 DialectType::TSQL | DialectType::Fabric => {
17094 Ok(Expression::Function(Box::new(Function::new(
17095 "NEWID".to_string(),
17096 vec![],
17097 ))))
17098 }
17099 _ => Ok(Expression::Function(f)),
17100 },
17101 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
17102 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
17103 DialectType::ClickHouse => {
17104 let mut new_f = *f;
17105 new_f.name = "farmFingerprint64".to_string();
17106 Ok(Expression::Function(Box::new(new_f)))
17107 }
17108 DialectType::Redshift => {
17109 let mut new_f = *f;
17110 new_f.name = "FARMFINGERPRINT64".to_string();
17111 Ok(Expression::Function(Box::new(new_f)))
17112 }
17113 _ => Ok(Expression::Function(f)),
17114 },
17115 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
17116 "JSON_KEYS" => match target {
17117 DialectType::Databricks | DialectType::Spark => {
17118 let mut new_f = *f;
17119 new_f.name = "JSON_OBJECT_KEYS".to_string();
17120 Ok(Expression::Function(Box::new(new_f)))
17121 }
17122 DialectType::Snowflake => {
17123 let mut new_f = *f;
17124 new_f.name = "OBJECT_KEYS".to_string();
17125 Ok(Expression::Function(Box::new(new_f)))
17126 }
17127 _ => Ok(Expression::Function(f)),
17128 },
17129 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
17130 "WEEKOFYEAR" => match target {
17131 DialectType::Snowflake => {
17132 let mut new_f = *f;
17133 new_f.name = "WEEKISO".to_string();
17134 Ok(Expression::Function(Box::new(new_f)))
17135 }
17136 _ => Ok(Expression::Function(f)),
17137 },
17138 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
17139 "FORMAT"
17140 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
17141 {
17142 match target {
17143 DialectType::Databricks | DialectType::Spark => {
17144 let mut new_f = *f;
17145 new_f.name = "FORMAT_STRING".to_string();
17146 Ok(Expression::Function(Box::new(new_f)))
17147 }
17148 _ => Ok(Expression::Function(f)),
17149 }
17150 }
17151 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
17152 "CONCAT_WS" if f.args.len() >= 2 => match target {
17153 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17154 let mut args = f.args;
17155 let sep = args.remove(0);
17156 let cast_args: Vec<Expression> = args
17157 .into_iter()
17158 .map(|a| {
17159 Expression::Cast(Box::new(Cast {
17160 this: a,
17161 to: DataType::VarChar {
17162 length: None,
17163 parenthesized_length: false,
17164 },
17165 double_colon_syntax: false,
17166 trailing_comments: Vec::new(),
17167 format: None,
17168 default: None,
17169 inferred_type: None,
17170 }))
17171 })
17172 .collect();
17173 let mut new_args = vec![sep];
17174 new_args.extend(cast_args);
17175 Ok(Expression::Function(Box::new(Function::new(
17176 "CONCAT_WS".to_string(),
17177 new_args,
17178 ))))
17179 }
17180 _ => Ok(Expression::Function(f)),
17181 },
17182 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
17183 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
17184 DialectType::Presto
17185 | DialectType::Trino
17186 | DialectType::Athena
17187 | DialectType::Databricks
17188 | DialectType::Spark => {
17189 let mut new_f = *f;
17190 new_f.name = "SLICE".to_string();
17191 Ok(Expression::Function(Box::new(new_f)))
17192 }
17193 DialectType::ClickHouse => {
17194 let mut new_f = *f;
17195 new_f.name = "arraySlice".to_string();
17196 Ok(Expression::Function(Box::new(new_f)))
17197 }
17198 _ => Ok(Expression::Function(f)),
17199 },
17200 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
17201 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
17202 DialectType::DuckDB => {
17203 let mut args = f.args;
17204 let arr = args.remove(0);
17205 let val = args.remove(0);
17206 Ok(Expression::Function(Box::new(Function::new(
17207 "LIST_PREPEND".to_string(),
17208 vec![val, arr],
17209 ))))
17210 }
17211 _ => Ok(Expression::Function(f)),
17212 },
17213 // ARRAY_REMOVE(arr, target) -> dialect-specific
17214 "ARRAY_REMOVE" if f.args.len() == 2 => {
17215 match target {
17216 DialectType::DuckDB => {
17217 let mut args = f.args;
17218 let arr = args.remove(0);
17219 let target_val = args.remove(0);
17220 let u_id = crate::expressions::Identifier::new("_u");
17221 // LIST_FILTER(arr, _u -> _u <> target)
17222 let lambda = Expression::Lambda(Box::new(
17223 crate::expressions::LambdaExpr {
17224 parameters: vec![u_id.clone()],
17225 body: Expression::Neq(Box::new(BinaryOp {
17226 left: Expression::Identifier(u_id),
17227 right: target_val,
17228 left_comments: Vec::new(),
17229 operator_comments: Vec::new(),
17230 trailing_comments: Vec::new(),
17231 inferred_type: None,
17232 })),
17233 colon: false,
17234 parameter_types: Vec::new(),
17235 },
17236 ));
17237 Ok(Expression::Function(Box::new(Function::new(
17238 "LIST_FILTER".to_string(),
17239 vec![arr, lambda],
17240 ))))
17241 }
17242 DialectType::ClickHouse => {
17243 let mut args = f.args;
17244 let arr = args.remove(0);
17245 let target_val = args.remove(0);
17246 let u_id = crate::expressions::Identifier::new("_u");
17247 // arrayFilter(_u -> _u <> target, arr)
17248 let lambda = Expression::Lambda(Box::new(
17249 crate::expressions::LambdaExpr {
17250 parameters: vec![u_id.clone()],
17251 body: Expression::Neq(Box::new(BinaryOp {
17252 left: Expression::Identifier(u_id),
17253 right: target_val,
17254 left_comments: Vec::new(),
17255 operator_comments: Vec::new(),
17256 trailing_comments: Vec::new(),
17257 inferred_type: None,
17258 })),
17259 colon: false,
17260 parameter_types: Vec::new(),
17261 },
17262 ));
17263 Ok(Expression::Function(Box::new(Function::new(
17264 "arrayFilter".to_string(),
17265 vec![lambda, arr],
17266 ))))
17267 }
17268 DialectType::BigQuery => {
17269 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
17270 let mut args = f.args;
17271 let arr = args.remove(0);
17272 let target_val = args.remove(0);
17273 let u_id = crate::expressions::Identifier::new("_u");
17274 let u_col =
17275 Expression::Column(crate::expressions::Column {
17276 name: u_id.clone(),
17277 table: None,
17278 join_mark: false,
17279 trailing_comments: Vec::new(),
17280 span: None,
17281 inferred_type: None,
17282 });
17283 // UNNEST(the_array) AS _u
17284 let unnest_expr = Expression::Unnest(Box::new(
17285 crate::expressions::UnnestFunc {
17286 this: arr,
17287 expressions: Vec::new(),
17288 with_ordinality: false,
17289 alias: None,
17290 offset_alias: None,
17291 },
17292 ));
17293 let aliased_unnest = Expression::Alias(Box::new(
17294 crate::expressions::Alias {
17295 this: unnest_expr,
17296 alias: u_id.clone(),
17297 column_aliases: Vec::new(),
17298 pre_alias_comments: Vec::new(),
17299 trailing_comments: Vec::new(),
17300 inferred_type: None,
17301 },
17302 ));
17303 // _u <> target
17304 let where_cond = Expression::Neq(Box::new(BinaryOp {
17305 left: u_col.clone(),
17306 right: target_val,
17307 left_comments: Vec::new(),
17308 operator_comments: Vec::new(),
17309 trailing_comments: Vec::new(),
17310 inferred_type: None,
17311 }));
17312 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
17313 let subquery = Expression::Select(Box::new(
17314 crate::expressions::Select::new()
17315 .column(u_col)
17316 .from(aliased_unnest)
17317 .where_(where_cond),
17318 ));
17319 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
17320 Ok(Expression::ArrayFunc(Box::new(
17321 crate::expressions::ArrayConstructor {
17322 expressions: vec![subquery],
17323 bracket_notation: false,
17324 use_list_keyword: false,
17325 },
17326 )))
17327 }
17328 _ => Ok(Expression::Function(f)),
17329 }
17330 }
17331 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17332 "PARSE_JSON" if f.args.len() == 1 => {
17333 match target {
17334 DialectType::SQLite
17335 | DialectType::Doris
17336 | DialectType::MySQL
17337 | DialectType::StarRocks => {
17338 // Strip PARSE_JSON, return the inner argument
17339 Ok(f.args.into_iter().next().unwrap())
17340 }
17341 _ => Ok(Expression::Function(f)),
17342 }
17343 }
17344 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17345 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17346 "JSON_REMOVE" => Ok(Expression::Function(f)),
17347 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17348 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17349 "JSON_SET" => Ok(Expression::Function(f)),
17350 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17351 // Behavior per search value type:
17352 // NULL literal -> CASE WHEN x IS NULL THEN result
17353 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17354 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17355 "DECODE" if f.args.len() >= 3 => {
17356 // Keep as DECODE for targets that support it natively
17357 let keep_as_decode = matches!(
17358 target,
17359 DialectType::Oracle
17360 | DialectType::Snowflake
17361 | DialectType::Redshift
17362 | DialectType::Teradata
17363 | DialectType::Spark
17364 | DialectType::Databricks
17365 );
17366 if keep_as_decode {
17367 return Ok(Expression::Function(f));
17368 }
17369
17370 let mut args = f.args;
17371 let this_expr = args.remove(0);
17372 let mut pairs = Vec::new();
17373 let mut default = None;
17374 let mut i = 0;
17375 while i + 1 < args.len() {
17376 pairs.push((args[i].clone(), args[i + 1].clone()));
17377 i += 2;
17378 }
17379 if i < args.len() {
17380 default = Some(args[i].clone());
17381 }
17382 // Helper: check if expression is a literal value
17383 fn is_literal(e: &Expression) -> bool {
17384 matches!(
17385 e,
17386 Expression::Literal(_)
17387 | Expression::Boolean(_)
17388 | Expression::Neg(_)
17389 )
17390 }
17391 let whens: Vec<(Expression, Expression)> = pairs
17392 .into_iter()
17393 .map(|(search, result)| {
17394 if matches!(&search, Expression::Null(_)) {
17395 // NULL search -> IS NULL
17396 let condition = Expression::Is(Box::new(BinaryOp {
17397 left: this_expr.clone(),
17398 right: Expression::Null(crate::expressions::Null),
17399 left_comments: Vec::new(),
17400 operator_comments: Vec::new(),
17401 trailing_comments: Vec::new(),
17402 inferred_type: None,
17403 }));
17404 (condition, result)
17405 } else if is_literal(&search) {
17406 // Literal search -> simple equality
17407 let eq = Expression::Eq(Box::new(BinaryOp {
17408 left: this_expr.clone(),
17409 right: search,
17410 left_comments: Vec::new(),
17411 operator_comments: Vec::new(),
17412 trailing_comments: Vec::new(),
17413 inferred_type: None,
17414 }));
17415 (eq, result)
17416 } else {
17417 // Non-literal (column ref, expression) -> null-safe comparison
17418 let needs_paren = matches!(
17419 &search,
17420 Expression::Eq(_)
17421 | Expression::Neq(_)
17422 | Expression::Gt(_)
17423 | Expression::Gte(_)
17424 | Expression::Lt(_)
17425 | Expression::Lte(_)
17426 );
17427 let search_for_eq = if needs_paren {
17428 Expression::Paren(Box::new(
17429 crate::expressions::Paren {
17430 this: search.clone(),
17431 trailing_comments: Vec::new(),
17432 },
17433 ))
17434 } else {
17435 search.clone()
17436 };
17437 let eq = Expression::Eq(Box::new(BinaryOp {
17438 left: this_expr.clone(),
17439 right: search_for_eq,
17440 left_comments: Vec::new(),
17441 operator_comments: Vec::new(),
17442 trailing_comments: Vec::new(),
17443 inferred_type: None,
17444 }));
17445 let search_for_null = if needs_paren {
17446 Expression::Paren(Box::new(
17447 crate::expressions::Paren {
17448 this: search.clone(),
17449 trailing_comments: Vec::new(),
17450 },
17451 ))
17452 } else {
17453 search.clone()
17454 };
17455 let x_is_null = Expression::Is(Box::new(BinaryOp {
17456 left: this_expr.clone(),
17457 right: Expression::Null(crate::expressions::Null),
17458 left_comments: Vec::new(),
17459 operator_comments: Vec::new(),
17460 trailing_comments: Vec::new(),
17461 inferred_type: None,
17462 }));
17463 let s_is_null = Expression::Is(Box::new(BinaryOp {
17464 left: search_for_null,
17465 right: Expression::Null(crate::expressions::Null),
17466 left_comments: Vec::new(),
17467 operator_comments: Vec::new(),
17468 trailing_comments: Vec::new(),
17469 inferred_type: None,
17470 }));
17471 let both_null = Expression::And(Box::new(BinaryOp {
17472 left: x_is_null,
17473 right: s_is_null,
17474 left_comments: Vec::new(),
17475 operator_comments: Vec::new(),
17476 trailing_comments: Vec::new(),
17477 inferred_type: None,
17478 }));
17479 let condition = Expression::Or(Box::new(BinaryOp {
17480 left: eq,
17481 right: Expression::Paren(Box::new(
17482 crate::expressions::Paren {
17483 this: both_null,
17484 trailing_comments: Vec::new(),
17485 },
17486 )),
17487 left_comments: Vec::new(),
17488 operator_comments: Vec::new(),
17489 trailing_comments: Vec::new(),
17490 inferred_type: None,
17491 }));
17492 (condition, result)
17493 }
17494 })
17495 .collect();
17496 Ok(Expression::Case(Box::new(Case {
17497 operand: None,
17498 whens,
17499 else_: default,
17500 comments: Vec::new(),
17501 inferred_type: None,
17502 })))
17503 }
17504 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17505 "LEVENSHTEIN" => {
17506 match target {
17507 DialectType::BigQuery => {
17508 let mut new_f = *f;
17509 new_f.name = "EDIT_DISTANCE".to_string();
17510 Ok(Expression::Function(Box::new(new_f)))
17511 }
17512 DialectType::Drill => {
17513 let mut new_f = *f;
17514 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17515 Ok(Expression::Function(Box::new(new_f)))
17516 }
17517 DialectType::PostgreSQL if f.args.len() == 6 => {
17518 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17519 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17520 let mut new_f = *f;
17521 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17522 Ok(Expression::Function(Box::new(new_f)))
17523 }
17524 _ => Ok(Expression::Function(f)),
17525 }
17526 }
17527 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17528 "ARRAY_REVERSE" => match target {
17529 DialectType::ClickHouse => {
17530 let mut new_f = *f;
17531 new_f.name = "arrayReverse".to_string();
17532 Ok(Expression::Function(Box::new(new_f)))
17533 }
17534 _ => Ok(Expression::Function(f)),
17535 },
17536 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17537 "GENERATE_DATE_ARRAY" => {
17538 let mut args = f.args;
17539 if matches!(target, DialectType::BigQuery) {
17540 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17541 if args.len() == 2 {
17542 let default_interval = Expression::Interval(Box::new(
17543 crate::expressions::Interval {
17544 this: Some(Expression::Literal(Literal::String(
17545 "1".to_string(),
17546 ))),
17547 unit: Some(
17548 crate::expressions::IntervalUnitSpec::Simple {
17549 unit: crate::expressions::IntervalUnit::Day,
17550 use_plural: false,
17551 },
17552 ),
17553 },
17554 ));
17555 args.push(default_interval);
17556 }
17557 Ok(Expression::Function(Box::new(Function::new(
17558 "GENERATE_DATE_ARRAY".to_string(),
17559 args,
17560 ))))
17561 } else if matches!(target, DialectType::DuckDB) {
17562 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17563 let start = args.get(0).cloned();
17564 let end = args.get(1).cloned();
17565 let step = args.get(2).cloned().or_else(|| {
17566 Some(Expression::Interval(Box::new(
17567 crate::expressions::Interval {
17568 this: Some(Expression::Literal(Literal::String(
17569 "1".to_string(),
17570 ))),
17571 unit: Some(
17572 crate::expressions::IntervalUnitSpec::Simple {
17573 unit: crate::expressions::IntervalUnit::Day,
17574 use_plural: false,
17575 },
17576 ),
17577 },
17578 )))
17579 });
17580 let gen_series = Expression::GenerateSeries(Box::new(
17581 crate::expressions::GenerateSeries {
17582 start: start.map(Box::new),
17583 end: end.map(Box::new),
17584 step: step.map(Box::new),
17585 is_end_exclusive: None,
17586 },
17587 ));
17588 Ok(Expression::Cast(Box::new(Cast {
17589 this: gen_series,
17590 to: DataType::Array {
17591 element_type: Box::new(DataType::Date),
17592 dimension: None,
17593 },
17594 trailing_comments: vec![],
17595 double_colon_syntax: false,
17596 format: None,
17597 default: None,
17598 inferred_type: None,
17599 })))
17600 } else if matches!(
17601 target,
17602 DialectType::Presto | DialectType::Trino | DialectType::Athena
17603 ) {
17604 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17605 let start = args.get(0).cloned();
17606 let end = args.get(1).cloned();
17607 let step = args.get(2).cloned().or_else(|| {
17608 Some(Expression::Interval(Box::new(
17609 crate::expressions::Interval {
17610 this: Some(Expression::Literal(Literal::String(
17611 "1".to_string(),
17612 ))),
17613 unit: Some(
17614 crate::expressions::IntervalUnitSpec::Simple {
17615 unit: crate::expressions::IntervalUnit::Day,
17616 use_plural: false,
17617 },
17618 ),
17619 },
17620 )))
17621 });
17622 let gen_series = Expression::GenerateSeries(Box::new(
17623 crate::expressions::GenerateSeries {
17624 start: start.map(Box::new),
17625 end: end.map(Box::new),
17626 step: step.map(Box::new),
17627 is_end_exclusive: None,
17628 },
17629 ));
17630 Ok(gen_series)
17631 } else if matches!(
17632 target,
17633 DialectType::Spark | DialectType::Databricks
17634 ) {
17635 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17636 let start = args.get(0).cloned();
17637 let end = args.get(1).cloned();
17638 let step = args.get(2).cloned().or_else(|| {
17639 Some(Expression::Interval(Box::new(
17640 crate::expressions::Interval {
17641 this: Some(Expression::Literal(Literal::String(
17642 "1".to_string(),
17643 ))),
17644 unit: Some(
17645 crate::expressions::IntervalUnitSpec::Simple {
17646 unit: crate::expressions::IntervalUnit::Day,
17647 use_plural: false,
17648 },
17649 ),
17650 },
17651 )))
17652 });
17653 let gen_series = Expression::GenerateSeries(Box::new(
17654 crate::expressions::GenerateSeries {
17655 start: start.map(Box::new),
17656 end: end.map(Box::new),
17657 step: step.map(Box::new),
17658 is_end_exclusive: None,
17659 },
17660 ));
17661 Ok(gen_series)
17662 } else if matches!(target, DialectType::Snowflake) {
17663 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17664 if args.len() == 2 {
17665 let default_interval = Expression::Interval(Box::new(
17666 crate::expressions::Interval {
17667 this: Some(Expression::Literal(Literal::String(
17668 "1".to_string(),
17669 ))),
17670 unit: Some(
17671 crate::expressions::IntervalUnitSpec::Simple {
17672 unit: crate::expressions::IntervalUnit::Day,
17673 use_plural: false,
17674 },
17675 ),
17676 },
17677 ));
17678 args.push(default_interval);
17679 }
17680 Ok(Expression::Function(Box::new(Function::new(
17681 "GENERATE_DATE_ARRAY".to_string(),
17682 args,
17683 ))))
17684 } else if matches!(
17685 target,
17686 DialectType::MySQL
17687 | DialectType::TSQL
17688 | DialectType::Fabric
17689 | DialectType::Redshift
17690 ) {
17691 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17692 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17693 Ok(Expression::Function(Box::new(Function::new(
17694 "GENERATE_DATE_ARRAY".to_string(),
17695 args,
17696 ))))
17697 } else {
17698 // PostgreSQL/others: convert to GenerateSeries
17699 let start = args.get(0).cloned();
17700 let end = args.get(1).cloned();
17701 let step = args.get(2).cloned().or_else(|| {
17702 Some(Expression::Interval(Box::new(
17703 crate::expressions::Interval {
17704 this: Some(Expression::Literal(Literal::String(
17705 "1".to_string(),
17706 ))),
17707 unit: Some(
17708 crate::expressions::IntervalUnitSpec::Simple {
17709 unit: crate::expressions::IntervalUnit::Day,
17710 use_plural: false,
17711 },
17712 ),
17713 },
17714 )))
17715 });
17716 Ok(Expression::GenerateSeries(Box::new(
17717 crate::expressions::GenerateSeries {
17718 start: start.map(Box::new),
17719 end: end.map(Box::new),
17720 step: step.map(Box::new),
17721 is_end_exclusive: None,
17722 },
17723 )))
17724 }
17725 }
17726 _ => Ok(Expression::Function(f)),
17727 }
17728 } else if let Expression::AggregateFunction(mut af) = e {
17729 let name = af.name.to_uppercase();
17730 match name.as_str() {
17731 "ARBITRARY" if af.args.len() == 1 => {
17732 let arg = af.args.into_iter().next().unwrap();
17733 Ok(convert_arbitrary(arg, target))
17734 }
17735 "JSON_ARRAYAGG" => {
17736 match target {
17737 DialectType::PostgreSQL => {
17738 af.name = "JSON_AGG".to_string();
17739 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17740 for ordered in af.order_by.iter_mut() {
17741 if ordered.nulls_first.is_none() {
17742 ordered.nulls_first = Some(true);
17743 }
17744 }
17745 Ok(Expression::AggregateFunction(af))
17746 }
17747 _ => Ok(Expression::AggregateFunction(af)),
17748 }
17749 }
17750 _ => Ok(Expression::AggregateFunction(af)),
17751 }
17752 } else if let Expression::JSONArrayAgg(ja) = e {
17753 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17754 match target {
17755 DialectType::PostgreSQL => {
17756 let mut order_by = Vec::new();
17757 if let Some(order_expr) = ja.order {
17758 if let Expression::OrderBy(ob) = *order_expr {
17759 for mut ordered in ob.expressions {
17760 if ordered.nulls_first.is_none() {
17761 ordered.nulls_first = Some(true);
17762 }
17763 order_by.push(ordered);
17764 }
17765 }
17766 }
17767 Ok(Expression::AggregateFunction(Box::new(
17768 crate::expressions::AggregateFunction {
17769 name: "JSON_AGG".to_string(),
17770 args: vec![*ja.this],
17771 distinct: false,
17772 filter: None,
17773 order_by,
17774 limit: None,
17775 ignore_nulls: None,
17776 inferred_type: None,
17777 },
17778 )))
17779 }
17780 _ => Ok(Expression::JSONArrayAgg(ja)),
17781 }
17782 } else if let Expression::ToNumber(tn) = e {
17783 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17784 let arg = *tn.this;
17785 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17786 this: arg,
17787 to: crate::expressions::DataType::Double {
17788 precision: None,
17789 scale: None,
17790 },
17791 double_colon_syntax: false,
17792 trailing_comments: Vec::new(),
17793 format: None,
17794 default: None,
17795 inferred_type: None,
17796 })))
17797 } else {
17798 Ok(e)
17799 }
17800 }
17801
17802 Action::RegexpLikeToDuckDB => {
17803 if let Expression::RegexpLike(f) = e {
17804 let mut args = vec![f.this, f.pattern];
17805 if let Some(flags) = f.flags {
17806 args.push(flags);
17807 }
17808 Ok(Expression::Function(Box::new(Function::new(
17809 "REGEXP_MATCHES".to_string(),
17810 args,
17811 ))))
17812 } else {
17813 Ok(e)
17814 }
17815 }
17816 Action::EpochConvert => {
17817 if let Expression::Epoch(f) = e {
17818 let arg = f.this;
17819 let name = match target {
17820 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17821 "UNIX_TIMESTAMP"
17822 }
17823 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17824 DialectType::BigQuery => "TIME_TO_UNIX",
17825 _ => "EPOCH",
17826 };
17827 Ok(Expression::Function(Box::new(Function::new(
17828 name.to_string(),
17829 vec![arg],
17830 ))))
17831 } else {
17832 Ok(e)
17833 }
17834 }
17835 Action::EpochMsConvert => {
17836 use crate::expressions::{BinaryOp, Cast};
17837 if let Expression::EpochMs(f) = e {
17838 let arg = f.this;
17839 match target {
17840 DialectType::Spark | DialectType::Databricks => {
17841 Ok(Expression::Function(Box::new(Function::new(
17842 "TIMESTAMP_MILLIS".to_string(),
17843 vec![arg],
17844 ))))
17845 }
17846 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17847 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17848 ))),
17849 DialectType::Presto | DialectType::Trino => {
17850 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17851 let cast_arg = Expression::Cast(Box::new(Cast {
17852 this: arg,
17853 to: DataType::Double {
17854 precision: None,
17855 scale: None,
17856 },
17857 trailing_comments: Vec::new(),
17858 double_colon_syntax: false,
17859 format: None,
17860 default: None,
17861 inferred_type: None,
17862 }));
17863 let div = Expression::Div(Box::new(BinaryOp::new(
17864 cast_arg,
17865 Expression::Function(Box::new(Function::new(
17866 "POW".to_string(),
17867 vec![Expression::number(10), Expression::number(3)],
17868 ))),
17869 )));
17870 Ok(Expression::Function(Box::new(Function::new(
17871 "FROM_UNIXTIME".to_string(),
17872 vec![div],
17873 ))))
17874 }
17875 DialectType::MySQL => {
17876 // FROM_UNIXTIME(x / POWER(10, 3))
17877 let div = Expression::Div(Box::new(BinaryOp::new(
17878 arg,
17879 Expression::Function(Box::new(Function::new(
17880 "POWER".to_string(),
17881 vec![Expression::number(10), Expression::number(3)],
17882 ))),
17883 )));
17884 Ok(Expression::Function(Box::new(Function::new(
17885 "FROM_UNIXTIME".to_string(),
17886 vec![div],
17887 ))))
17888 }
17889 DialectType::PostgreSQL | DialectType::Redshift => {
17890 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17891 let cast_arg = Expression::Cast(Box::new(Cast {
17892 this: arg,
17893 to: DataType::Custom {
17894 name: "DOUBLE PRECISION".to_string(),
17895 },
17896 trailing_comments: Vec::new(),
17897 double_colon_syntax: false,
17898 format: None,
17899 default: None,
17900 inferred_type: None,
17901 }));
17902 let div = Expression::Div(Box::new(BinaryOp::new(
17903 cast_arg,
17904 Expression::Function(Box::new(Function::new(
17905 "POWER".to_string(),
17906 vec![Expression::number(10), Expression::number(3)],
17907 ))),
17908 )));
17909 Ok(Expression::Function(Box::new(Function::new(
17910 "TO_TIMESTAMP".to_string(),
17911 vec![div],
17912 ))))
17913 }
17914 DialectType::ClickHouse => {
17915 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17916 let cast_arg = Expression::Cast(Box::new(Cast {
17917 this: arg,
17918 to: DataType::Nullable {
17919 inner: Box::new(DataType::BigInt { length: None }),
17920 },
17921 trailing_comments: Vec::new(),
17922 double_colon_syntax: false,
17923 format: None,
17924 default: None,
17925 inferred_type: None,
17926 }));
17927 Ok(Expression::Function(Box::new(Function::new(
17928 "fromUnixTimestamp64Milli".to_string(),
17929 vec![cast_arg],
17930 ))))
17931 }
17932 _ => Ok(Expression::Function(Box::new(Function::new(
17933 "EPOCH_MS".to_string(),
17934 vec![arg],
17935 )))),
17936 }
17937 } else {
17938 Ok(e)
17939 }
17940 }
17941 Action::TSQLTypeNormalize => {
17942 if let Expression::DataType(dt) = e {
17943 let new_dt = match &dt {
17944 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17945 DataType::Decimal {
17946 precision: Some(15),
17947 scale: Some(4),
17948 }
17949 }
17950 DataType::Custom { name }
17951 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17952 {
17953 DataType::Decimal {
17954 precision: Some(6),
17955 scale: Some(4),
17956 }
17957 }
17958 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17959 DataType::Timestamp {
17960 timezone: false,
17961 precision: None,
17962 }
17963 }
17964 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17965 DataType::Float {
17966 precision: None,
17967 scale: None,
17968 real_spelling: false,
17969 }
17970 }
17971 DataType::Float {
17972 real_spelling: true,
17973 ..
17974 } => DataType::Float {
17975 precision: None,
17976 scale: None,
17977 real_spelling: false,
17978 },
17979 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17980 DataType::Custom {
17981 name: "BLOB".to_string(),
17982 }
17983 }
17984 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17985 DataType::Boolean
17986 }
17987 DataType::Custom { name }
17988 if name.eq_ignore_ascii_case("ROWVERSION") =>
17989 {
17990 DataType::Custom {
17991 name: "BINARY".to_string(),
17992 }
17993 }
17994 DataType::Custom { name }
17995 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17996 {
17997 match target {
17998 DialectType::Spark
17999 | DialectType::Databricks
18000 | DialectType::Hive => DataType::Custom {
18001 name: "STRING".to_string(),
18002 },
18003 _ => DataType::VarChar {
18004 length: Some(36),
18005 parenthesized_length: true,
18006 },
18007 }
18008 }
18009 DataType::Custom { name }
18010 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
18011 {
18012 match target {
18013 DialectType::Spark
18014 | DialectType::Databricks
18015 | DialectType::Hive => DataType::Timestamp {
18016 timezone: false,
18017 precision: None,
18018 },
18019 _ => DataType::Timestamp {
18020 timezone: true,
18021 precision: None,
18022 },
18023 }
18024 }
18025 DataType::Custom { ref name }
18026 if name.to_uppercase().starts_with("DATETIME2(") =>
18027 {
18028 // DATETIME2(n) -> TIMESTAMP
18029 DataType::Timestamp {
18030 timezone: false,
18031 precision: None,
18032 }
18033 }
18034 DataType::Custom { ref name }
18035 if name.to_uppercase().starts_with("TIME(") =>
18036 {
18037 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
18038 match target {
18039 DialectType::Spark
18040 | DialectType::Databricks
18041 | DialectType::Hive => DataType::Timestamp {
18042 timezone: false,
18043 precision: None,
18044 },
18045 _ => return Ok(Expression::DataType(dt)),
18046 }
18047 }
18048 DataType::Custom { ref name }
18049 if name.to_uppercase().starts_with("NUMERIC") =>
18050 {
18051 // Parse NUMERIC(p,s) back to Decimal(p,s)
18052 let upper = name.to_uppercase();
18053 if let Some(inner) = upper
18054 .strip_prefix("NUMERIC(")
18055 .and_then(|s| s.strip_suffix(')'))
18056 {
18057 let parts: Vec<&str> = inner.split(',').collect();
18058 let precision =
18059 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
18060 let scale =
18061 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
18062 DataType::Decimal { precision, scale }
18063 } else if upper == "NUMERIC" {
18064 DataType::Decimal {
18065 precision: None,
18066 scale: None,
18067 }
18068 } else {
18069 return Ok(Expression::DataType(dt));
18070 }
18071 }
18072 DataType::Float {
18073 precision: Some(p), ..
18074 } => {
18075 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
18076 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
18077 let boundary = match target {
18078 DialectType::Hive
18079 | DialectType::Spark
18080 | DialectType::Databricks => 32,
18081 _ => 24,
18082 };
18083 if *p <= boundary {
18084 DataType::Float {
18085 precision: None,
18086 scale: None,
18087 real_spelling: false,
18088 }
18089 } else {
18090 DataType::Double {
18091 precision: None,
18092 scale: None,
18093 }
18094 }
18095 }
18096 DataType::TinyInt { .. } => match target {
18097 DialectType::DuckDB => DataType::Custom {
18098 name: "UTINYINT".to_string(),
18099 },
18100 DialectType::Hive
18101 | DialectType::Spark
18102 | DialectType::Databricks => DataType::SmallInt { length: None },
18103 _ => return Ok(Expression::DataType(dt)),
18104 },
18105 // INTEGER -> INT for Spark/Databricks
18106 DataType::Int {
18107 length,
18108 integer_spelling: true,
18109 } => DataType::Int {
18110 length: *length,
18111 integer_spelling: false,
18112 },
18113 _ => return Ok(Expression::DataType(dt)),
18114 };
18115 Ok(Expression::DataType(new_dt))
18116 } else {
18117 Ok(e)
18118 }
18119 }
18120 Action::MySQLSafeDivide => {
18121 use crate::expressions::{BinaryOp, Cast};
18122 if let Expression::Div(op) = e {
18123 let left = op.left;
18124 let right = op.right;
18125 // For SQLite: CAST left as REAL but NO NULLIF wrapping
18126 if matches!(target, DialectType::SQLite) {
18127 let new_left = Expression::Cast(Box::new(Cast {
18128 this: left,
18129 to: DataType::Float {
18130 precision: None,
18131 scale: None,
18132 real_spelling: true,
18133 },
18134 trailing_comments: Vec::new(),
18135 double_colon_syntax: false,
18136 format: None,
18137 default: None,
18138 inferred_type: None,
18139 }));
18140 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
18141 }
18142 // Wrap right in NULLIF(right, 0)
18143 let nullif_right = Expression::Function(Box::new(Function::new(
18144 "NULLIF".to_string(),
18145 vec![right, Expression::number(0)],
18146 )));
18147 // For some dialects, also CAST the left side
18148 let new_left = match target {
18149 DialectType::PostgreSQL
18150 | DialectType::Redshift
18151 | DialectType::Teradata
18152 | DialectType::Materialize
18153 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
18154 this: left,
18155 to: DataType::Custom {
18156 name: "DOUBLE PRECISION".to_string(),
18157 },
18158 trailing_comments: Vec::new(),
18159 double_colon_syntax: false,
18160 format: None,
18161 default: None,
18162 inferred_type: None,
18163 })),
18164 DialectType::Drill
18165 | DialectType::Trino
18166 | DialectType::Presto
18167 | DialectType::Athena => Expression::Cast(Box::new(Cast {
18168 this: left,
18169 to: DataType::Double {
18170 precision: None,
18171 scale: None,
18172 },
18173 trailing_comments: Vec::new(),
18174 double_colon_syntax: false,
18175 format: None,
18176 default: None,
18177 inferred_type: None,
18178 })),
18179 DialectType::TSQL => Expression::Cast(Box::new(Cast {
18180 this: left,
18181 to: DataType::Float {
18182 precision: None,
18183 scale: None,
18184 real_spelling: false,
18185 },
18186 trailing_comments: Vec::new(),
18187 double_colon_syntax: false,
18188 format: None,
18189 default: None,
18190 inferred_type: None,
18191 })),
18192 _ => left,
18193 };
18194 Ok(Expression::Div(Box::new(BinaryOp::new(
18195 new_left,
18196 nullif_right,
18197 ))))
18198 } else {
18199 Ok(e)
18200 }
18201 }
18202 Action::AlterTableRenameStripSchema => {
18203 if let Expression::AlterTable(mut at) = e {
18204 if let Some(crate::expressions::AlterTableAction::RenameTable(
18205 ref mut new_tbl,
18206 )) = at.actions.first_mut()
18207 {
18208 new_tbl.schema = None;
18209 new_tbl.catalog = None;
18210 }
18211 Ok(Expression::AlterTable(at))
18212 } else {
18213 Ok(e)
18214 }
18215 }
18216 Action::NullsOrdering => {
18217 // Fill in the source dialect's implied null ordering default.
18218 // This makes implicit null ordering explicit so the target generator
18219 // can correctly strip or keep it.
18220 //
18221 // Dialect null ordering categories:
18222 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
18223 // ASC -> NULLS LAST, DESC -> NULLS FIRST
18224 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
18225 // ASC -> NULLS FIRST, DESC -> NULLS LAST
18226 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
18227 // NULLS LAST always (both ASC and DESC)
18228 if let Expression::Ordered(mut o) = e {
18229 let is_asc = !o.desc;
18230
18231 let is_source_nulls_large = matches!(
18232 source,
18233 DialectType::Oracle
18234 | DialectType::PostgreSQL
18235 | DialectType::Redshift
18236 | DialectType::Snowflake
18237 );
18238 let is_source_nulls_last = matches!(
18239 source,
18240 DialectType::DuckDB
18241 | DialectType::Presto
18242 | DialectType::Trino
18243 | DialectType::Dremio
18244 | DialectType::Athena
18245 | DialectType::ClickHouse
18246 | DialectType::Drill
18247 | DialectType::Exasol
18248 | DialectType::DataFusion
18249 );
18250
18251 // Determine target category to check if default matches
18252 let is_target_nulls_large = matches!(
18253 target,
18254 DialectType::Oracle
18255 | DialectType::PostgreSQL
18256 | DialectType::Redshift
18257 | DialectType::Snowflake
18258 );
18259 let is_target_nulls_last = matches!(
18260 target,
18261 DialectType::DuckDB
18262 | DialectType::Presto
18263 | DialectType::Trino
18264 | DialectType::Dremio
18265 | DialectType::Athena
18266 | DialectType::ClickHouse
18267 | DialectType::Drill
18268 | DialectType::Exasol
18269 | DialectType::DataFusion
18270 );
18271
18272 // Compute the implied nulls_first for source
18273 let source_nulls_first = if is_source_nulls_large {
18274 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
18275 } else if is_source_nulls_last {
18276 false // NULLS LAST always
18277 } else {
18278 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
18279 };
18280
18281 // Compute the target's default
18282 let target_nulls_first = if is_target_nulls_large {
18283 !is_asc
18284 } else if is_target_nulls_last {
18285 false
18286 } else {
18287 is_asc
18288 };
18289
18290 // Only add explicit nulls ordering if source and target defaults differ
18291 if source_nulls_first != target_nulls_first {
18292 o.nulls_first = Some(source_nulls_first);
18293 }
18294 // If they match, leave nulls_first as None so the generator won't output it
18295
18296 Ok(Expression::Ordered(o))
18297 } else {
18298 Ok(e)
18299 }
18300 }
18301 Action::StringAggConvert => {
18302 match e {
18303 Expression::WithinGroup(wg) => {
18304 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
18305 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
18306 let (x_opt, sep_opt, distinct) = match wg.this {
18307 Expression::AggregateFunction(ref af)
18308 if af.name.eq_ignore_ascii_case("STRING_AGG")
18309 && af.args.len() >= 2 =>
18310 {
18311 (
18312 Some(af.args[0].clone()),
18313 Some(af.args[1].clone()),
18314 af.distinct,
18315 )
18316 }
18317 Expression::Function(ref f)
18318 if f.name.eq_ignore_ascii_case("STRING_AGG")
18319 && f.args.len() >= 2 =>
18320 {
18321 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
18322 }
18323 Expression::StringAgg(ref sa) => {
18324 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
18325 }
18326 _ => (None, None, false),
18327 };
18328 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
18329 let order_by = wg.order_by;
18330
18331 match target {
18332 DialectType::TSQL | DialectType::Fabric => {
18333 // Keep as WithinGroup(StringAgg) for TSQL
18334 Ok(Expression::WithinGroup(Box::new(
18335 crate::expressions::WithinGroup {
18336 this: Expression::StringAgg(Box::new(
18337 crate::expressions::StringAggFunc {
18338 this: x,
18339 separator: Some(sep),
18340 order_by: None, // order_by goes in WithinGroup, not StringAgg
18341 distinct,
18342 filter: None,
18343 limit: None,
18344 inferred_type: None,
18345 },
18346 )),
18347 order_by,
18348 },
18349 )))
18350 }
18351 DialectType::MySQL
18352 | DialectType::SingleStore
18353 | DialectType::Doris
18354 | DialectType::StarRocks => {
18355 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18356 Ok(Expression::GroupConcat(Box::new(
18357 crate::expressions::GroupConcatFunc {
18358 this: x,
18359 separator: Some(sep),
18360 order_by: Some(order_by),
18361 distinct,
18362 filter: None,
18363 inferred_type: None,
18364 },
18365 )))
18366 }
18367 DialectType::SQLite => {
18368 // GROUP_CONCAT(x, sep) - no ORDER BY support
18369 Ok(Expression::GroupConcat(Box::new(
18370 crate::expressions::GroupConcatFunc {
18371 this: x,
18372 separator: Some(sep),
18373 order_by: None,
18374 distinct,
18375 filter: None,
18376 inferred_type: None,
18377 },
18378 )))
18379 }
18380 DialectType::PostgreSQL | DialectType::Redshift => {
18381 // STRING_AGG(x, sep ORDER BY z)
18382 Ok(Expression::StringAgg(Box::new(
18383 crate::expressions::StringAggFunc {
18384 this: x,
18385 separator: Some(sep),
18386 order_by: Some(order_by),
18387 distinct,
18388 filter: None,
18389 limit: None,
18390 inferred_type: None,
18391 },
18392 )))
18393 }
18394 _ => {
18395 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18396 Ok(Expression::StringAgg(Box::new(
18397 crate::expressions::StringAggFunc {
18398 this: x,
18399 separator: Some(sep),
18400 order_by: Some(order_by),
18401 distinct,
18402 filter: None,
18403 limit: None,
18404 inferred_type: None,
18405 },
18406 )))
18407 }
18408 }
18409 } else {
18410 Ok(Expression::WithinGroup(wg))
18411 }
18412 }
18413 Expression::StringAgg(sa) => {
18414 match target {
18415 DialectType::MySQL
18416 | DialectType::SingleStore
18417 | DialectType::Doris
18418 | DialectType::StarRocks => {
18419 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18420 Ok(Expression::GroupConcat(Box::new(
18421 crate::expressions::GroupConcatFunc {
18422 this: sa.this,
18423 separator: sa.separator,
18424 order_by: sa.order_by,
18425 distinct: sa.distinct,
18426 filter: sa.filter,
18427 inferred_type: None,
18428 },
18429 )))
18430 }
18431 DialectType::SQLite => {
18432 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18433 Ok(Expression::GroupConcat(Box::new(
18434 crate::expressions::GroupConcatFunc {
18435 this: sa.this,
18436 separator: sa.separator,
18437 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18438 distinct: sa.distinct,
18439 filter: sa.filter,
18440 inferred_type: None,
18441 },
18442 )))
18443 }
18444 DialectType::Spark | DialectType::Databricks => {
18445 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18446 Ok(Expression::ListAgg(Box::new(
18447 crate::expressions::ListAggFunc {
18448 this: sa.this,
18449 separator: sa.separator,
18450 on_overflow: None,
18451 order_by: sa.order_by,
18452 distinct: sa.distinct,
18453 filter: None,
18454 inferred_type: None,
18455 },
18456 )))
18457 }
18458 _ => Ok(Expression::StringAgg(sa)),
18459 }
18460 }
18461 _ => Ok(e),
18462 }
18463 }
18464 Action::GroupConcatConvert => {
18465 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18466 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18467 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18468 if let Expression::Function(ref f) = expr {
18469 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18470 let mut result = f.args[0].clone();
18471 for arg in &f.args[1..] {
18472 result = Expression::Concat(Box::new(BinaryOp {
18473 left: result,
18474 right: arg.clone(),
18475 left_comments: vec![],
18476 operator_comments: vec![],
18477 trailing_comments: vec![],
18478 inferred_type: None,
18479 }));
18480 }
18481 return result;
18482 }
18483 }
18484 expr
18485 }
18486 fn expand_concat_to_plus(expr: Expression) -> Expression {
18487 if let Expression::Function(ref f) = expr {
18488 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18489 let mut result = f.args[0].clone();
18490 for arg in &f.args[1..] {
18491 result = Expression::Add(Box::new(BinaryOp {
18492 left: result,
18493 right: arg.clone(),
18494 left_comments: vec![],
18495 operator_comments: vec![],
18496 trailing_comments: vec![],
18497 inferred_type: None,
18498 }));
18499 }
18500 return result;
18501 }
18502 }
18503 expr
18504 }
18505 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18506 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18507 if let Expression::Function(ref f) = expr {
18508 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18509 let new_args: Vec<Expression> = f
18510 .args
18511 .iter()
18512 .map(|arg| {
18513 Expression::Cast(Box::new(crate::expressions::Cast {
18514 this: arg.clone(),
18515 to: crate::expressions::DataType::VarChar {
18516 length: None,
18517 parenthesized_length: false,
18518 },
18519 trailing_comments: Vec::new(),
18520 double_colon_syntax: false,
18521 format: None,
18522 default: None,
18523 inferred_type: None,
18524 }))
18525 })
18526 .collect();
18527 return Expression::Function(Box::new(
18528 crate::expressions::Function::new(
18529 "CONCAT".to_string(),
18530 new_args,
18531 ),
18532 ));
18533 }
18534 }
18535 expr
18536 }
18537 if let Expression::GroupConcat(gc) = e {
18538 match target {
18539 DialectType::Presto => {
18540 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18541 let sep = gc.separator.unwrap_or(Expression::string(","));
18542 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18543 let this = wrap_concat_args_in_varchar_cast(gc.this);
18544 let array_agg =
18545 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18546 this,
18547 distinct: gc.distinct,
18548 filter: gc.filter,
18549 order_by: gc.order_by.unwrap_or_default(),
18550 name: None,
18551 ignore_nulls: None,
18552 having_max: None,
18553 limit: None,
18554 inferred_type: None,
18555 }));
18556 Ok(Expression::ArrayJoin(Box::new(
18557 crate::expressions::ArrayJoinFunc {
18558 this: array_agg,
18559 separator: sep,
18560 null_replacement: None,
18561 },
18562 )))
18563 }
18564 DialectType::Trino => {
18565 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18566 let sep = gc.separator.unwrap_or(Expression::string(","));
18567 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18568 let this = wrap_concat_args_in_varchar_cast(gc.this);
18569 Ok(Expression::ListAgg(Box::new(
18570 crate::expressions::ListAggFunc {
18571 this,
18572 separator: Some(sep),
18573 on_overflow: None,
18574 order_by: gc.order_by,
18575 distinct: gc.distinct,
18576 filter: gc.filter,
18577 inferred_type: None,
18578 },
18579 )))
18580 }
18581 DialectType::PostgreSQL
18582 | DialectType::Redshift
18583 | DialectType::Snowflake
18584 | DialectType::DuckDB
18585 | DialectType::Hive
18586 | DialectType::ClickHouse => {
18587 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18588 let sep = gc.separator.unwrap_or(Expression::string(","));
18589 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18590 let this = expand_concat_to_dpipe(gc.this);
18591 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18592 let order_by = if target == DialectType::PostgreSQL {
18593 gc.order_by.map(|ords| {
18594 ords.into_iter()
18595 .map(|mut o| {
18596 if o.nulls_first.is_none() {
18597 if o.desc {
18598 o.nulls_first = Some(false);
18599 // NULLS LAST
18600 } else {
18601 o.nulls_first = Some(true);
18602 // NULLS FIRST
18603 }
18604 }
18605 o
18606 })
18607 .collect()
18608 })
18609 } else {
18610 gc.order_by
18611 };
18612 Ok(Expression::StringAgg(Box::new(
18613 crate::expressions::StringAggFunc {
18614 this,
18615 separator: Some(sep),
18616 order_by,
18617 distinct: gc.distinct,
18618 filter: gc.filter,
18619 limit: None,
18620 inferred_type: None,
18621 },
18622 )))
18623 }
18624 DialectType::TSQL => {
18625 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18626 // TSQL doesn't support DISTINCT in STRING_AGG
18627 let sep = gc.separator.unwrap_or(Expression::string(","));
18628 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18629 let this = expand_concat_to_plus(gc.this);
18630 Ok(Expression::StringAgg(Box::new(
18631 crate::expressions::StringAggFunc {
18632 this,
18633 separator: Some(sep),
18634 order_by: gc.order_by,
18635 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18636 filter: gc.filter,
18637 limit: None,
18638 inferred_type: None,
18639 },
18640 )))
18641 }
18642 DialectType::SQLite => {
18643 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18644 // SQLite GROUP_CONCAT doesn't support ORDER BY
18645 // Expand CONCAT(a,b,c) -> a || b || c
18646 let this = expand_concat_to_dpipe(gc.this);
18647 Ok(Expression::GroupConcat(Box::new(
18648 crate::expressions::GroupConcatFunc {
18649 this,
18650 separator: gc.separator,
18651 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18652 distinct: gc.distinct,
18653 filter: gc.filter,
18654 inferred_type: None,
18655 },
18656 )))
18657 }
18658 DialectType::Spark | DialectType::Databricks => {
18659 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18660 let sep = gc.separator.unwrap_or(Expression::string(","));
18661 Ok(Expression::ListAgg(Box::new(
18662 crate::expressions::ListAggFunc {
18663 this: gc.this,
18664 separator: Some(sep),
18665 on_overflow: None,
18666 order_by: gc.order_by,
18667 distinct: gc.distinct,
18668 filter: None,
18669 inferred_type: None,
18670 },
18671 )))
18672 }
18673 DialectType::MySQL
18674 | DialectType::SingleStore
18675 | DialectType::StarRocks => {
18676 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18677 if gc.separator.is_none() {
18678 let mut gc = gc;
18679 gc.separator = Some(Expression::string(","));
18680 Ok(Expression::GroupConcat(gc))
18681 } else {
18682 Ok(Expression::GroupConcat(gc))
18683 }
18684 }
18685 _ => Ok(Expression::GroupConcat(gc)),
18686 }
18687 } else {
18688 Ok(e)
18689 }
18690 }
18691 Action::TempTableHash => {
18692 match e {
18693 Expression::CreateTable(mut ct) => {
18694 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18695 let name = &ct.name.name.name;
18696 if name.starts_with('#') {
18697 ct.name.name.name = name.trim_start_matches('#').to_string();
18698 }
18699 // Set temporary flag
18700 ct.temporary = true;
18701 Ok(Expression::CreateTable(ct))
18702 }
18703 Expression::Table(mut tr) => {
18704 // Strip # from table references
18705 let name = &tr.name.name;
18706 if name.starts_with('#') {
18707 tr.name.name = name.trim_start_matches('#').to_string();
18708 }
18709 Ok(Expression::Table(tr))
18710 }
18711 Expression::DropTable(mut dt) => {
18712 // Strip # from DROP TABLE names
18713 for table_ref in &mut dt.names {
18714 if table_ref.name.name.starts_with('#') {
18715 table_ref.name.name =
18716 table_ref.name.name.trim_start_matches('#').to_string();
18717 }
18718 }
18719 Ok(Expression::DropTable(dt))
18720 }
18721 _ => Ok(e),
18722 }
18723 }
18724 Action::NvlClearOriginal => {
18725 if let Expression::Nvl(mut f) = e {
18726 f.original_name = None;
18727 Ok(Expression::Nvl(f))
18728 } else {
18729 Ok(e)
18730 }
18731 }
18732 Action::HiveCastToTryCast => {
18733 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18734 if let Expression::Cast(mut c) = e {
18735 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18736 // (Spark's TIMESTAMP is always timezone-aware)
18737 if matches!(target, DialectType::DuckDB)
18738 && matches!(source, DialectType::Spark | DialectType::Databricks)
18739 && matches!(
18740 c.to,
18741 DataType::Timestamp {
18742 timezone: false,
18743 ..
18744 }
18745 )
18746 {
18747 c.to = DataType::Custom {
18748 name: "TIMESTAMPTZ".to_string(),
18749 };
18750 }
18751 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18752 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18753 if matches!(target, DialectType::Databricks | DialectType::Spark)
18754 && matches!(
18755 source,
18756 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18757 )
18758 && Self::has_varchar_char_type(&c.to)
18759 {
18760 c.to = Self::normalize_varchar_to_string(c.to);
18761 }
18762 Ok(Expression::TryCast(c))
18763 } else {
18764 Ok(e)
18765 }
18766 }
18767 Action::XorExpand => {
18768 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18769 // Snowflake: use BOOLXOR(a, b) instead
18770 if let Expression::Xor(xor) = e {
18771 // Collect all XOR operands
18772 let mut operands = Vec::new();
18773 if let Some(this) = xor.this {
18774 operands.push(*this);
18775 }
18776 if let Some(expr) = xor.expression {
18777 operands.push(*expr);
18778 }
18779 operands.extend(xor.expressions);
18780
18781 // Snowflake: use BOOLXOR(a, b)
18782 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18783 let a = operands.remove(0);
18784 let b = operands.remove(0);
18785 return Ok(Expression::Function(Box::new(Function::new(
18786 "BOOLXOR".to_string(),
18787 vec![a, b],
18788 ))));
18789 }
18790
18791 // Helper to build (a AND NOT b) OR (NOT a AND b)
18792 let make_xor = |a: Expression, b: Expression| -> Expression {
18793 let not_b = Expression::Not(Box::new(
18794 crate::expressions::UnaryOp::new(b.clone()),
18795 ));
18796 let not_a = Expression::Not(Box::new(
18797 crate::expressions::UnaryOp::new(a.clone()),
18798 ));
18799 let left_and = Expression::And(Box::new(BinaryOp {
18800 left: a,
18801 right: Expression::Paren(Box::new(Paren {
18802 this: not_b,
18803 trailing_comments: Vec::new(),
18804 })),
18805 left_comments: Vec::new(),
18806 operator_comments: Vec::new(),
18807 trailing_comments: Vec::new(),
18808 inferred_type: None,
18809 }));
18810 let right_and = Expression::And(Box::new(BinaryOp {
18811 left: Expression::Paren(Box::new(Paren {
18812 this: not_a,
18813 trailing_comments: Vec::new(),
18814 })),
18815 right: b,
18816 left_comments: Vec::new(),
18817 operator_comments: Vec::new(),
18818 trailing_comments: Vec::new(),
18819 inferred_type: None,
18820 }));
18821 Expression::Or(Box::new(BinaryOp {
18822 left: Expression::Paren(Box::new(Paren {
18823 this: left_and,
18824 trailing_comments: Vec::new(),
18825 })),
18826 right: Expression::Paren(Box::new(Paren {
18827 this: right_and,
18828 trailing_comments: Vec::new(),
18829 })),
18830 left_comments: Vec::new(),
18831 operator_comments: Vec::new(),
18832 trailing_comments: Vec::new(),
18833 inferred_type: None,
18834 }))
18835 };
18836
18837 if operands.len() >= 2 {
18838 let mut result = make_xor(operands.remove(0), operands.remove(0));
18839 for operand in operands {
18840 result = make_xor(result, operand);
18841 }
18842 Ok(result)
18843 } else if operands.len() == 1 {
18844 Ok(operands.remove(0))
18845 } else {
18846 // No operands - return FALSE (shouldn't happen)
18847 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18848 value: false,
18849 }))
18850 }
18851 } else {
18852 Ok(e)
18853 }
18854 }
18855 Action::DatePartUnquote => {
18856 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18857 // Convert the quoted string first arg to a bare Column/Identifier
18858 if let Expression::Function(mut f) = e {
18859 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18860 f.args.first()
18861 {
18862 let bare_name = s.to_lowercase();
18863 f.args[0] = Expression::Column(crate::expressions::Column {
18864 name: Identifier::new(bare_name),
18865 table: None,
18866 join_mark: false,
18867 trailing_comments: Vec::new(),
18868 span: None,
18869 inferred_type: None,
18870 });
18871 }
18872 Ok(Expression::Function(f))
18873 } else {
18874 Ok(e)
18875 }
18876 }
18877 Action::ArrayLengthConvert => {
18878 // Extract the argument from the expression
18879 let arg = match e {
18880 Expression::Cardinality(ref f) => f.this.clone(),
18881 Expression::ArrayLength(ref f) => f.this.clone(),
18882 Expression::ArraySize(ref f) => f.this.clone(),
18883 _ => return Ok(e),
18884 };
18885 match target {
18886 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18887 Ok(Expression::Function(Box::new(Function::new(
18888 "SIZE".to_string(),
18889 vec![arg],
18890 ))))
18891 }
18892 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18893 Ok(Expression::Cardinality(Box::new(
18894 crate::expressions::UnaryFunc::new(arg),
18895 )))
18896 }
18897 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18898 crate::expressions::UnaryFunc::new(arg),
18899 ))),
18900 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18901 crate::expressions::UnaryFunc::new(arg),
18902 ))),
18903 DialectType::PostgreSQL | DialectType::Redshift => {
18904 // PostgreSQL ARRAY_LENGTH requires dimension arg
18905 Ok(Expression::Function(Box::new(Function::new(
18906 "ARRAY_LENGTH".to_string(),
18907 vec![arg, Expression::number(1)],
18908 ))))
18909 }
18910 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18911 crate::expressions::UnaryFunc::new(arg),
18912 ))),
18913 _ => Ok(e), // Keep original
18914 }
18915 }
18916
18917 Action::JsonExtractToArrow => {
18918 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18919 if let Expression::JsonExtract(mut f) = e {
18920 f.arrow_syntax = true;
18921 // Transform path: convert bracket notation to dot notation
18922 // SQLite strips wildcards, DuckDB preserves them
18923 if let Expression::Literal(Literal::String(ref s)) = f.path {
18924 let mut transformed = s.clone();
18925 if matches!(target, DialectType::SQLite) {
18926 transformed = Self::strip_json_wildcards(&transformed);
18927 }
18928 transformed = Self::bracket_to_dot_notation(&transformed);
18929 if transformed != *s {
18930 f.path = Expression::string(&transformed);
18931 }
18932 }
18933 Ok(Expression::JsonExtract(f))
18934 } else {
18935 Ok(e)
18936 }
18937 }
18938
18939 Action::JsonExtractToGetJsonObject => {
18940 if let Expression::JsonExtract(f) = e {
18941 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18942 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18943 // Use proper decomposition that handles brackets
18944 let keys: Vec<Expression> =
18945 if let Expression::Literal(Literal::String(ref s)) = f.path {
18946 let parts = Self::decompose_json_path(s);
18947 parts.into_iter().map(|k| Expression::string(&k)).collect()
18948 } else {
18949 vec![f.path]
18950 };
18951 let func_name = if matches!(target, DialectType::Redshift) {
18952 "JSON_EXTRACT_PATH_TEXT"
18953 } else {
18954 "JSON_EXTRACT_PATH"
18955 };
18956 let mut args = vec![f.this];
18957 args.extend(keys);
18958 Ok(Expression::Function(Box::new(Function::new(
18959 func_name.to_string(),
18960 args,
18961 ))))
18962 } else {
18963 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18964 // Convert bracket double quotes to single quotes
18965 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18966 let normalized = Self::bracket_to_single_quotes(s);
18967 if normalized != *s {
18968 Expression::string(&normalized)
18969 } else {
18970 f.path
18971 }
18972 } else {
18973 f.path
18974 };
18975 Ok(Expression::Function(Box::new(Function::new(
18976 "GET_JSON_OBJECT".to_string(),
18977 vec![f.this, path],
18978 ))))
18979 }
18980 } else {
18981 Ok(e)
18982 }
18983 }
18984
18985 Action::JsonExtractScalarToGetJsonObject => {
18986 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18987 if let Expression::JsonExtractScalar(f) = e {
18988 Ok(Expression::Function(Box::new(Function::new(
18989 "GET_JSON_OBJECT".to_string(),
18990 vec![f.this, f.path],
18991 ))))
18992 } else {
18993 Ok(e)
18994 }
18995 }
18996
18997 Action::JsonExtractToTsql => {
18998 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18999 let (this, path) = match e {
19000 Expression::JsonExtract(f) => (f.this, f.path),
19001 Expression::JsonExtractScalar(f) => (f.this, f.path),
19002 _ => return Ok(e),
19003 };
19004 // Transform path: strip wildcards, convert bracket notation to dot notation
19005 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
19006 {
19007 let stripped = Self::strip_json_wildcards(s);
19008 let dotted = Self::bracket_to_dot_notation(&stripped);
19009 Expression::string(&dotted)
19010 } else {
19011 path
19012 };
19013 let json_query = Expression::Function(Box::new(Function::new(
19014 "JSON_QUERY".to_string(),
19015 vec![this.clone(), transformed_path.clone()],
19016 )));
19017 let json_value = Expression::Function(Box::new(Function::new(
19018 "JSON_VALUE".to_string(),
19019 vec![this, transformed_path],
19020 )));
19021 Ok(Expression::Function(Box::new(Function::new(
19022 "ISNULL".to_string(),
19023 vec![json_query, json_value],
19024 ))))
19025 }
19026
19027 Action::JsonExtractToClickHouse => {
19028 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
19029 let (this, path) = match e {
19030 Expression::JsonExtract(f) => (f.this, f.path),
19031 Expression::JsonExtractScalar(f) => (f.this, f.path),
19032 _ => return Ok(e),
19033 };
19034 let args: Vec<Expression> =
19035 if let Expression::Literal(Literal::String(ref s)) = path {
19036 let parts = Self::decompose_json_path(s);
19037 let mut result = vec![this];
19038 for part in parts {
19039 // ClickHouse uses 1-based integer indices for array access
19040 if let Ok(idx) = part.parse::<i64>() {
19041 result.push(Expression::number(idx + 1));
19042 } else {
19043 result.push(Expression::string(&part));
19044 }
19045 }
19046 result
19047 } else {
19048 vec![this, path]
19049 };
19050 Ok(Expression::Function(Box::new(Function::new(
19051 "JSONExtractString".to_string(),
19052 args,
19053 ))))
19054 }
19055
19056 Action::JsonExtractScalarConvert => {
19057 // JSON_EXTRACT_SCALAR -> target-specific
19058 if let Expression::JsonExtractScalar(f) = e {
19059 match target {
19060 DialectType::PostgreSQL | DialectType::Redshift => {
19061 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
19062 let keys: Vec<Expression> =
19063 if let Expression::Literal(Literal::String(ref s)) = f.path {
19064 let parts = Self::decompose_json_path(s);
19065 parts.into_iter().map(|k| Expression::string(&k)).collect()
19066 } else {
19067 vec![f.path]
19068 };
19069 let mut args = vec![f.this];
19070 args.extend(keys);
19071 Ok(Expression::Function(Box::new(Function::new(
19072 "JSON_EXTRACT_PATH_TEXT".to_string(),
19073 args,
19074 ))))
19075 }
19076 DialectType::Snowflake => {
19077 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
19078 let stripped_path =
19079 if let Expression::Literal(Literal::String(ref s)) = f.path {
19080 let stripped = Self::strip_json_dollar_prefix(s);
19081 Expression::string(&stripped)
19082 } else {
19083 f.path
19084 };
19085 Ok(Expression::Function(Box::new(Function::new(
19086 "JSON_EXTRACT_PATH_TEXT".to_string(),
19087 vec![f.this, stripped_path],
19088 ))))
19089 }
19090 DialectType::SQLite | DialectType::DuckDB => {
19091 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
19092 Ok(Expression::JsonExtractScalar(Box::new(
19093 crate::expressions::JsonExtractFunc {
19094 this: f.this,
19095 path: f.path,
19096 returning: f.returning,
19097 arrow_syntax: true,
19098 hash_arrow_syntax: false,
19099 wrapper_option: None,
19100 quotes_option: None,
19101 on_scalar_string: false,
19102 on_error: None,
19103 },
19104 )))
19105 }
19106 _ => Ok(Expression::JsonExtractScalar(f)),
19107 }
19108 } else {
19109 Ok(e)
19110 }
19111 }
19112
19113 Action::JsonPathNormalize => {
19114 // Normalize JSON path format for BigQuery, MySQL, etc.
19115 if let Expression::JsonExtract(mut f) = e {
19116 if let Expression::Literal(Literal::String(ref s)) = f.path {
19117 let mut normalized = s.clone();
19118 // Convert bracket notation and handle wildcards per dialect
19119 match target {
19120 DialectType::BigQuery => {
19121 // BigQuery strips wildcards and uses single quotes in brackets
19122 normalized = Self::strip_json_wildcards(&normalized);
19123 normalized = Self::bracket_to_single_quotes(&normalized);
19124 }
19125 DialectType::MySQL => {
19126 // MySQL preserves wildcards, converts brackets to dot notation
19127 normalized = Self::bracket_to_dot_notation(&normalized);
19128 }
19129 _ => {}
19130 }
19131 if normalized != *s {
19132 f.path = Expression::string(&normalized);
19133 }
19134 }
19135 Ok(Expression::JsonExtract(f))
19136 } else {
19137 Ok(e)
19138 }
19139 }
19140
19141 Action::JsonQueryValueConvert => {
19142 // JsonQuery/JsonValue -> target-specific
19143 let (f, is_query) = match e {
19144 Expression::JsonQuery(f) => (f, true),
19145 Expression::JsonValue(f) => (f, false),
19146 _ => return Ok(e),
19147 };
19148 match target {
19149 DialectType::TSQL | DialectType::Fabric => {
19150 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
19151 let json_query = Expression::Function(Box::new(Function::new(
19152 "JSON_QUERY".to_string(),
19153 vec![f.this.clone(), f.path.clone()],
19154 )));
19155 let json_value = Expression::Function(Box::new(Function::new(
19156 "JSON_VALUE".to_string(),
19157 vec![f.this, f.path],
19158 )));
19159 Ok(Expression::Function(Box::new(Function::new(
19160 "ISNULL".to_string(),
19161 vec![json_query, json_value],
19162 ))))
19163 }
19164 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
19165 Ok(Expression::Function(Box::new(Function::new(
19166 "GET_JSON_OBJECT".to_string(),
19167 vec![f.this, f.path],
19168 ))))
19169 }
19170 DialectType::PostgreSQL | DialectType::Redshift => {
19171 Ok(Expression::Function(Box::new(Function::new(
19172 "JSON_EXTRACT_PATH_TEXT".to_string(),
19173 vec![f.this, f.path],
19174 ))))
19175 }
19176 DialectType::DuckDB | DialectType::SQLite => {
19177 // json -> path arrow syntax
19178 Ok(Expression::JsonExtract(Box::new(
19179 crate::expressions::JsonExtractFunc {
19180 this: f.this,
19181 path: f.path,
19182 returning: f.returning,
19183 arrow_syntax: true,
19184 hash_arrow_syntax: false,
19185 wrapper_option: f.wrapper_option,
19186 quotes_option: f.quotes_option,
19187 on_scalar_string: f.on_scalar_string,
19188 on_error: f.on_error,
19189 },
19190 )))
19191 }
19192 DialectType::Snowflake => {
19193 // GET_PATH(PARSE_JSON(json), 'path')
19194 // Strip $. prefix from path
19195 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
19196 let json_expr = match &f.this {
19197 Expression::Function(ref inner_f)
19198 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
19199 {
19200 f.this
19201 }
19202 Expression::ParseJson(_) => {
19203 // Already a ParseJson expression, which generates as PARSE_JSON(...)
19204 f.this
19205 }
19206 _ => Expression::Function(Box::new(Function::new(
19207 "PARSE_JSON".to_string(),
19208 vec![f.this],
19209 ))),
19210 };
19211 let path_str = match &f.path {
19212 Expression::Literal(Literal::String(s)) => {
19213 let stripped = s.strip_prefix("$.").unwrap_or(s);
19214 Expression::Literal(Literal::String(stripped.to_string()))
19215 }
19216 other => other.clone(),
19217 };
19218 Ok(Expression::Function(Box::new(Function::new(
19219 "GET_PATH".to_string(),
19220 vec![json_expr, path_str],
19221 ))))
19222 }
19223 _ => {
19224 // Default: keep as JSON_QUERY/JSON_VALUE function
19225 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
19226 Ok(Expression::Function(Box::new(Function::new(
19227 func_name.to_string(),
19228 vec![f.this, f.path],
19229 ))))
19230 }
19231 }
19232 }
19233
19234 Action::JsonLiteralToJsonParse => {
19235 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
19236 if let Expression::Cast(c) = e {
19237 let func_name = if matches!(target, DialectType::Snowflake) {
19238 "PARSE_JSON"
19239 } else {
19240 "JSON_PARSE"
19241 };
19242 Ok(Expression::Function(Box::new(Function::new(
19243 func_name.to_string(),
19244 vec![c.this],
19245 ))))
19246 } else {
19247 Ok(e)
19248 }
19249 }
19250
19251 Action::AtTimeZoneConvert => {
19252 // AT TIME ZONE -> target-specific conversion
19253 if let Expression::AtTimeZone(atz) = e {
19254 match target {
19255 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19256 Ok(Expression::Function(Box::new(Function::new(
19257 "AT_TIMEZONE".to_string(),
19258 vec![atz.this, atz.zone],
19259 ))))
19260 }
19261 DialectType::Spark | DialectType::Databricks => {
19262 Ok(Expression::Function(Box::new(Function::new(
19263 "FROM_UTC_TIMESTAMP".to_string(),
19264 vec![atz.this, atz.zone],
19265 ))))
19266 }
19267 DialectType::Snowflake => {
19268 // CONVERT_TIMEZONE('zone', expr)
19269 Ok(Expression::Function(Box::new(Function::new(
19270 "CONVERT_TIMEZONE".to_string(),
19271 vec![atz.zone, atz.this],
19272 ))))
19273 }
19274 DialectType::BigQuery => {
19275 // TIMESTAMP(DATETIME(expr, 'zone'))
19276 let datetime_call = Expression::Function(Box::new(Function::new(
19277 "DATETIME".to_string(),
19278 vec![atz.this, atz.zone],
19279 )));
19280 Ok(Expression::Function(Box::new(Function::new(
19281 "TIMESTAMP".to_string(),
19282 vec![datetime_call],
19283 ))))
19284 }
19285 _ => Ok(Expression::Function(Box::new(Function::new(
19286 "AT_TIMEZONE".to_string(),
19287 vec![atz.this, atz.zone],
19288 )))),
19289 }
19290 } else {
19291 Ok(e)
19292 }
19293 }
19294
19295 Action::DayOfWeekConvert => {
19296 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
19297 if let Expression::DayOfWeek(f) = e {
19298 match target {
19299 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19300 Function::new("ISODOW".to_string(), vec![f.this]),
19301 ))),
19302 DialectType::Spark | DialectType::Databricks => {
19303 // ((DAYOFWEEK(x) % 7) + 1)
19304 let dayofweek = Expression::Function(Box::new(Function::new(
19305 "DAYOFWEEK".to_string(),
19306 vec![f.this],
19307 )));
19308 let modulo = Expression::Mod(Box::new(BinaryOp {
19309 left: dayofweek,
19310 right: Expression::number(7),
19311 left_comments: Vec::new(),
19312 operator_comments: Vec::new(),
19313 trailing_comments: Vec::new(),
19314 inferred_type: None,
19315 }));
19316 let paren_mod = Expression::Paren(Box::new(Paren {
19317 this: modulo,
19318 trailing_comments: Vec::new(),
19319 }));
19320 let add_one = Expression::Add(Box::new(BinaryOp {
19321 left: paren_mod,
19322 right: Expression::number(1),
19323 left_comments: Vec::new(),
19324 operator_comments: Vec::new(),
19325 trailing_comments: Vec::new(),
19326 inferred_type: None,
19327 }));
19328 Ok(Expression::Paren(Box::new(Paren {
19329 this: add_one,
19330 trailing_comments: Vec::new(),
19331 })))
19332 }
19333 _ => Ok(Expression::DayOfWeek(f)),
19334 }
19335 } else {
19336 Ok(e)
19337 }
19338 }
19339
19340 Action::MaxByMinByConvert => {
19341 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
19342 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
19343 // Handle both Expression::Function and Expression::AggregateFunction
19344 let (is_max, args) = match &e {
19345 Expression::Function(f) => {
19346 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
19347 }
19348 Expression::AggregateFunction(af) => {
19349 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
19350 }
19351 _ => return Ok(e),
19352 };
19353 match target {
19354 DialectType::ClickHouse => {
19355 let name = if is_max { "argMax" } else { "argMin" };
19356 let mut args = args;
19357 args.truncate(2);
19358 Ok(Expression::Function(Box::new(Function::new(
19359 name.to_string(),
19360 args,
19361 ))))
19362 }
19363 DialectType::DuckDB => {
19364 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
19365 Ok(Expression::Function(Box::new(Function::new(
19366 name.to_string(),
19367 args,
19368 ))))
19369 }
19370 DialectType::Spark | DialectType::Databricks => {
19371 let mut args = args;
19372 args.truncate(2);
19373 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19374 Ok(Expression::Function(Box::new(Function::new(
19375 name.to_string(),
19376 args,
19377 ))))
19378 }
19379 _ => Ok(e),
19380 }
19381 }
19382
19383 Action::ElementAtConvert => {
19384 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19385 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19386 (bf.this, bf.expression)
19387 } else if let Expression::Function(ref f) = e {
19388 if f.args.len() >= 2 {
19389 if let Expression::Function(f) = e {
19390 let mut args = f.args;
19391 let arr = args.remove(0);
19392 let idx = args.remove(0);
19393 (arr, idx)
19394 } else {
19395 unreachable!("outer condition already matched Expression::Function")
19396 }
19397 } else {
19398 return Ok(e);
19399 }
19400 } else {
19401 return Ok(e);
19402 };
19403 match target {
19404 DialectType::PostgreSQL => {
19405 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19406 let arr_expr = Expression::Paren(Box::new(Paren {
19407 this: arr,
19408 trailing_comments: vec![],
19409 }));
19410 Ok(Expression::Subscript(Box::new(
19411 crate::expressions::Subscript {
19412 this: arr_expr,
19413 index: idx,
19414 },
19415 )))
19416 }
19417 DialectType::BigQuery => {
19418 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19419 let arr_expr = match arr {
19420 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19421 crate::expressions::ArrayConstructor {
19422 expressions: af.expressions,
19423 bracket_notation: true,
19424 use_list_keyword: false,
19425 },
19426 )),
19427 other => other,
19428 };
19429 let safe_ordinal = Expression::Function(Box::new(Function::new(
19430 "SAFE_ORDINAL".to_string(),
19431 vec![idx],
19432 )));
19433 Ok(Expression::Subscript(Box::new(
19434 crate::expressions::Subscript {
19435 this: arr_expr,
19436 index: safe_ordinal,
19437 },
19438 )))
19439 }
19440 _ => Ok(Expression::Function(Box::new(Function::new(
19441 "ELEMENT_AT".to_string(),
19442 vec![arr, idx],
19443 )))),
19444 }
19445 }
19446
19447 Action::CurrentUserParens => {
19448 // CURRENT_USER -> CURRENT_USER() for Snowflake
19449 Ok(Expression::Function(Box::new(Function::new(
19450 "CURRENT_USER".to_string(),
19451 vec![],
19452 ))))
19453 }
19454
19455 Action::ArrayAggToCollectList => {
19456 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19457 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19458 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19459 match e {
19460 Expression::AggregateFunction(mut af) => {
19461 let is_simple =
19462 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19463 let args = if af.args.is_empty() {
19464 vec![]
19465 } else {
19466 vec![af.args[0].clone()]
19467 };
19468 af.name = "COLLECT_LIST".to_string();
19469 af.args = args;
19470 if is_simple {
19471 af.order_by = Vec::new();
19472 }
19473 Ok(Expression::AggregateFunction(af))
19474 }
19475 Expression::ArrayAgg(agg) => {
19476 let is_simple =
19477 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19478 Ok(Expression::AggregateFunction(Box::new(
19479 crate::expressions::AggregateFunction {
19480 name: "COLLECT_LIST".to_string(),
19481 args: vec![agg.this.clone()],
19482 distinct: agg.distinct,
19483 filter: agg.filter.clone(),
19484 order_by: if is_simple {
19485 Vec::new()
19486 } else {
19487 agg.order_by.clone()
19488 },
19489 limit: agg.limit.clone(),
19490 ignore_nulls: agg.ignore_nulls,
19491 inferred_type: None,
19492 },
19493 )))
19494 }
19495 _ => Ok(e),
19496 }
19497 }
19498
19499 Action::ArraySyntaxConvert => {
19500 match e {
19501 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19502 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19503 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19504 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19505 expressions: arr.expressions,
19506 bracket_notation: true,
19507 use_list_keyword: false,
19508 })),
19509 ),
19510 // ARRAY(y) function style -> ArrayFunc for target dialect
19511 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19512 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19513 let bracket = matches!(
19514 target,
19515 DialectType::BigQuery
19516 | DialectType::DuckDB
19517 | DialectType::ClickHouse
19518 | DialectType::StarRocks
19519 );
19520 Ok(Expression::ArrayFunc(Box::new(
19521 crate::expressions::ArrayConstructor {
19522 expressions: f.args,
19523 bracket_notation: bracket,
19524 use_list_keyword: false,
19525 },
19526 )))
19527 }
19528 _ => Ok(e),
19529 }
19530 }
19531
19532 Action::CastToJsonForSpark => {
19533 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19534 if let Expression::Cast(c) = e {
19535 Ok(Expression::Function(Box::new(Function::new(
19536 "TO_JSON".to_string(),
19537 vec![c.this],
19538 ))))
19539 } else {
19540 Ok(e)
19541 }
19542 }
19543
19544 Action::CastJsonToFromJson => {
19545 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19546 if let Expression::Cast(c) = e {
19547 // Extract the string literal from ParseJson
19548 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19549 pj.this
19550 } else {
19551 c.this
19552 };
19553 // Convert the target DataType to Spark's type string format
19554 let type_str = Self::data_type_to_spark_string(&c.to);
19555 Ok(Expression::Function(Box::new(Function::new(
19556 "FROM_JSON".to_string(),
19557 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19558 ))))
19559 } else {
19560 Ok(e)
19561 }
19562 }
19563
19564 Action::ToJsonConvert => {
19565 // TO_JSON(x) -> target-specific conversion
19566 if let Expression::ToJson(f) = e {
19567 let arg = f.this;
19568 match target {
19569 DialectType::Presto | DialectType::Trino => {
19570 // JSON_FORMAT(CAST(x AS JSON))
19571 let cast_json = Expression::Cast(Box::new(Cast {
19572 this: arg,
19573 to: DataType::Custom {
19574 name: "JSON".to_string(),
19575 },
19576 trailing_comments: vec![],
19577 double_colon_syntax: false,
19578 format: None,
19579 default: None,
19580 inferred_type: None,
19581 }));
19582 Ok(Expression::Function(Box::new(Function::new(
19583 "JSON_FORMAT".to_string(),
19584 vec![cast_json],
19585 ))))
19586 }
19587 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19588 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19589 ))),
19590 DialectType::DuckDB => {
19591 // CAST(TO_JSON(x) AS TEXT)
19592 let to_json =
19593 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19594 this: arg,
19595 original_name: None,
19596 inferred_type: None,
19597 }));
19598 Ok(Expression::Cast(Box::new(Cast {
19599 this: to_json,
19600 to: DataType::Text,
19601 trailing_comments: vec![],
19602 double_colon_syntax: false,
19603 format: None,
19604 default: None,
19605 inferred_type: None,
19606 })))
19607 }
19608 _ => Ok(Expression::ToJson(Box::new(
19609 crate::expressions::UnaryFunc {
19610 this: arg,
19611 original_name: None,
19612 inferred_type: None,
19613 },
19614 ))),
19615 }
19616 } else {
19617 Ok(e)
19618 }
19619 }
19620
19621 Action::VarianceToClickHouse => {
19622 if let Expression::Variance(f) = e {
19623 Ok(Expression::Function(Box::new(Function::new(
19624 "varSamp".to_string(),
19625 vec![f.this],
19626 ))))
19627 } else {
19628 Ok(e)
19629 }
19630 }
19631
19632 Action::StddevToClickHouse => {
19633 if let Expression::Stddev(f) = e {
19634 Ok(Expression::Function(Box::new(Function::new(
19635 "stddevSamp".to_string(),
19636 vec![f.this],
19637 ))))
19638 } else {
19639 Ok(e)
19640 }
19641 }
19642
19643 Action::ApproxQuantileConvert => {
19644 if let Expression::ApproxQuantile(aq) = e {
19645 let mut args = vec![*aq.this];
19646 if let Some(q) = aq.quantile {
19647 args.push(*q);
19648 }
19649 Ok(Expression::Function(Box::new(Function::new(
19650 "APPROX_PERCENTILE".to_string(),
19651 args,
19652 ))))
19653 } else {
19654 Ok(e)
19655 }
19656 }
19657
19658 Action::DollarParamConvert => {
19659 if let Expression::Parameter(p) = e {
19660 Ok(Expression::Parameter(Box::new(
19661 crate::expressions::Parameter {
19662 name: p.name,
19663 index: p.index,
19664 style: crate::expressions::ParameterStyle::At,
19665 quoted: p.quoted,
19666 string_quoted: p.string_quoted,
19667 expression: p.expression,
19668 },
19669 )))
19670 } else {
19671 Ok(e)
19672 }
19673 }
19674
19675 Action::EscapeStringNormalize => {
19676 if let Expression::Literal(Literal::EscapeString(s)) = e {
19677 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19678 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19679 s[2..].to_string()
19680 } else {
19681 s
19682 };
19683 let normalized = stripped
19684 .replace('\n', "\\n")
19685 .replace('\r', "\\r")
19686 .replace('\t', "\\t");
19687 match target {
19688 DialectType::BigQuery => {
19689 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19690 // Use Raw for the b'...' part to avoid double-escaping
19691 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19692 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19693 }
19694 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19695 }
19696 } else {
19697 Ok(e)
19698 }
19699 }
19700
19701 Action::StraightJoinCase => {
19702 // straight_join: keep lowercase for DuckDB, quote for MySQL
19703 if let Expression::Column(col) = e {
19704 if col.name.name == "STRAIGHT_JOIN" {
19705 let mut new_col = col;
19706 new_col.name.name = "straight_join".to_string();
19707 if matches!(target, DialectType::MySQL) {
19708 // MySQL: needs quoting since it's a reserved keyword
19709 new_col.name.quoted = true;
19710 }
19711 Ok(Expression::Column(new_col))
19712 } else {
19713 Ok(Expression::Column(col))
19714 }
19715 } else {
19716 Ok(e)
19717 }
19718 }
19719
19720 Action::TablesampleReservoir => {
19721 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19722 if let Expression::TableSample(mut ts) = e {
19723 if let Some(ref mut sample) = ts.sample {
19724 sample.method = crate::expressions::SampleMethod::Reservoir;
19725 sample.explicit_method = true;
19726 }
19727 Ok(Expression::TableSample(ts))
19728 } else {
19729 Ok(e)
19730 }
19731 }
19732
19733 Action::TablesampleSnowflakeStrip => {
19734 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19735 match e {
19736 Expression::TableSample(mut ts) => {
19737 if let Some(ref mut sample) = ts.sample {
19738 sample.suppress_method_output = true;
19739 sample.unit_after_size = false;
19740 sample.is_percent = false;
19741 }
19742 Ok(Expression::TableSample(ts))
19743 }
19744 Expression::Table(mut t) => {
19745 if let Some(ref mut sample) = t.table_sample {
19746 sample.suppress_method_output = true;
19747 sample.unit_after_size = false;
19748 sample.is_percent = false;
19749 }
19750 Ok(Expression::Table(t))
19751 }
19752 _ => Ok(e),
19753 }
19754 }
19755
19756 Action::FirstToAnyValue => {
19757 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19758 if let Expression::First(mut agg) = e {
19759 agg.ignore_nulls = None;
19760 agg.name = Some("ANY_VALUE".to_string());
19761 Ok(Expression::AnyValue(agg))
19762 } else {
19763 Ok(e)
19764 }
19765 }
19766
19767 Action::ArrayIndexConvert => {
19768 // Subscript index: 1-based to 0-based for BigQuery
19769 if let Expression::Subscript(mut sub) = e {
19770 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19771 if let Ok(val) = n.parse::<i64>() {
19772 sub.index =
19773 Expression::Literal(Literal::Number((val - 1).to_string()));
19774 }
19775 }
19776 Ok(Expression::Subscript(sub))
19777 } else {
19778 Ok(e)
19779 }
19780 }
19781
19782 Action::AnyValueIgnoreNulls => {
19783 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19784 if let Expression::AnyValue(mut av) = e {
19785 if av.ignore_nulls.is_none() {
19786 av.ignore_nulls = Some(true);
19787 }
19788 Ok(Expression::AnyValue(av))
19789 } else {
19790 Ok(e)
19791 }
19792 }
19793
19794 Action::BigQueryNullsOrdering => {
19795 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19796 if let Expression::WindowFunction(mut wf) = e {
19797 for o in &mut wf.over.order_by {
19798 o.nulls_first = None;
19799 }
19800 Ok(Expression::WindowFunction(wf))
19801 } else if let Expression::Ordered(mut o) = e {
19802 o.nulls_first = None;
19803 Ok(Expression::Ordered(o))
19804 } else {
19805 Ok(e)
19806 }
19807 }
19808
19809 Action::SnowflakeFloatProtect => {
19810 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19811 // Snowflake's target transform from converting it to DOUBLE.
19812 // Non-Snowflake sources should keep their FLOAT spelling.
19813 if let Expression::DataType(DataType::Float { .. }) = e {
19814 Ok(Expression::DataType(DataType::Custom {
19815 name: "FLOAT".to_string(),
19816 }))
19817 } else {
19818 Ok(e)
19819 }
19820 }
19821
19822 Action::MysqlNullsOrdering => {
19823 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19824 if let Expression::Ordered(mut o) = e {
19825 let nulls_last = o.nulls_first == Some(false);
19826 let desc = o.desc;
19827 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19828 // If requested ordering matches default, just strip NULLS clause
19829 let matches_default = if desc {
19830 // DESC default is NULLS FIRST, so nulls_first=true matches
19831 o.nulls_first == Some(true)
19832 } else {
19833 // ASC default is NULLS LAST, so nulls_first=false matches
19834 nulls_last
19835 };
19836 if matches_default {
19837 o.nulls_first = None;
19838 Ok(Expression::Ordered(o))
19839 } else {
19840 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19841 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19842 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19843 let null_val = if desc { 1 } else { 0 };
19844 let non_null_val = if desc { 0 } else { 1 };
19845 let _case_expr = Expression::Case(Box::new(Case {
19846 operand: None,
19847 whens: vec![(
19848 Expression::IsNull(Box::new(crate::expressions::IsNull {
19849 this: o.this.clone(),
19850 not: false,
19851 postfix_form: false,
19852 })),
19853 Expression::number(null_val),
19854 )],
19855 else_: Some(Expression::number(non_null_val)),
19856 comments: Vec::new(),
19857 inferred_type: None,
19858 }));
19859 o.nulls_first = None;
19860 // Return a tuple of [case_expr, ordered_expr]
19861 // We need to return both as part of the ORDER BY
19862 // But since transform_recursive processes individual expressions,
19863 // we can't easily add extra ORDER BY items here.
19864 // Instead, strip the nulls_first
19865 o.nulls_first = None;
19866 Ok(Expression::Ordered(o))
19867 }
19868 } else {
19869 Ok(e)
19870 }
19871 }
19872
19873 Action::MysqlNullsLastRewrite => {
19874 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19875 // to simulate NULLS LAST for ASC ordering
19876 if let Expression::WindowFunction(mut wf) = e {
19877 let mut new_order_by = Vec::new();
19878 for o in wf.over.order_by {
19879 if !o.desc {
19880 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19881 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19882 let case_expr = Expression::Case(Box::new(Case {
19883 operand: None,
19884 whens: vec![(
19885 Expression::IsNull(Box::new(crate::expressions::IsNull {
19886 this: o.this.clone(),
19887 not: false,
19888 postfix_form: false,
19889 })),
19890 Expression::Literal(Literal::Number("1".to_string())),
19891 )],
19892 else_: Some(Expression::Literal(Literal::Number(
19893 "0".to_string(),
19894 ))),
19895 comments: Vec::new(),
19896 inferred_type: None,
19897 }));
19898 new_order_by.push(crate::expressions::Ordered {
19899 this: case_expr,
19900 desc: false,
19901 nulls_first: None,
19902 explicit_asc: false,
19903 with_fill: None,
19904 });
19905 let mut ordered = o;
19906 ordered.nulls_first = None;
19907 new_order_by.push(ordered);
19908 } else {
19909 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19910 // No change needed
19911 let mut ordered = o;
19912 ordered.nulls_first = None;
19913 new_order_by.push(ordered);
19914 }
19915 }
19916 wf.over.order_by = new_order_by;
19917 Ok(Expression::WindowFunction(wf))
19918 } else {
19919 Ok(e)
19920 }
19921 }
19922
19923 Action::RespectNullsConvert => {
19924 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19925 if let Expression::WindowFunction(mut wf) = e {
19926 match &mut wf.this {
19927 Expression::FirstValue(ref mut vf) => {
19928 if vf.ignore_nulls == Some(false) {
19929 vf.ignore_nulls = None;
19930 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19931 // but that's handled by the generator's NULLS ordering
19932 }
19933 }
19934 Expression::LastValue(ref mut vf) => {
19935 if vf.ignore_nulls == Some(false) {
19936 vf.ignore_nulls = None;
19937 }
19938 }
19939 _ => {}
19940 }
19941 Ok(Expression::WindowFunction(wf))
19942 } else {
19943 Ok(e)
19944 }
19945 }
19946
19947 Action::CreateTableStripComment => {
19948 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19949 if let Expression::CreateTable(mut ct) = e {
19950 for col in &mut ct.columns {
19951 col.comment = None;
19952 col.constraints.retain(|c| {
19953 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19954 });
19955 // Also remove Comment from constraint_order
19956 col.constraint_order.retain(|c| {
19957 !matches!(c, crate::expressions::ConstraintType::Comment)
19958 });
19959 }
19960 // Strip properties (USING, PARTITIONED BY, etc.)
19961 ct.properties.clear();
19962 Ok(Expression::CreateTable(ct))
19963 } else {
19964 Ok(e)
19965 }
19966 }
19967
19968 Action::AlterTableToSpRename => {
19969 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19970 if let Expression::AlterTable(ref at) = e {
19971 if let Some(crate::expressions::AlterTableAction::RenameTable(
19972 ref new_tbl,
19973 )) = at.actions.first()
19974 {
19975 // Build the old table name using TSQL bracket quoting
19976 let old_name = if let Some(ref schema) = at.name.schema {
19977 if at.name.name.quoted || schema.quoted {
19978 format!("[{}].[{}]", schema.name, at.name.name.name)
19979 } else {
19980 format!("{}.{}", schema.name, at.name.name.name)
19981 }
19982 } else {
19983 if at.name.name.quoted {
19984 format!("[{}]", at.name.name.name)
19985 } else {
19986 at.name.name.name.clone()
19987 }
19988 };
19989 let new_name = new_tbl.name.name.clone();
19990 // EXEC sp_rename 'old_name', 'new_name'
19991 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19992 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19993 } else {
19994 Ok(e)
19995 }
19996 } else {
19997 Ok(e)
19998 }
19999 }
20000
20001 Action::SnowflakeIntervalFormat => {
20002 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
20003 if let Expression::Interval(mut iv) = e {
20004 if let (
20005 Some(Expression::Literal(Literal::String(ref val))),
20006 Some(ref unit_spec),
20007 ) = (&iv.this, &iv.unit)
20008 {
20009 let unit_str = match unit_spec {
20010 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
20011 match unit {
20012 crate::expressions::IntervalUnit::Year => "YEAR",
20013 crate::expressions::IntervalUnit::Quarter => "QUARTER",
20014 crate::expressions::IntervalUnit::Month => "MONTH",
20015 crate::expressions::IntervalUnit::Week => "WEEK",
20016 crate::expressions::IntervalUnit::Day => "DAY",
20017 crate::expressions::IntervalUnit::Hour => "HOUR",
20018 crate::expressions::IntervalUnit::Minute => "MINUTE",
20019 crate::expressions::IntervalUnit::Second => "SECOND",
20020 crate::expressions::IntervalUnit::Millisecond => {
20021 "MILLISECOND"
20022 }
20023 crate::expressions::IntervalUnit::Microsecond => {
20024 "MICROSECOND"
20025 }
20026 crate::expressions::IntervalUnit::Nanosecond => {
20027 "NANOSECOND"
20028 }
20029 }
20030 }
20031 _ => "",
20032 };
20033 if !unit_str.is_empty() {
20034 let combined = format!("{} {}", val, unit_str);
20035 iv.this = Some(Expression::Literal(Literal::String(combined)));
20036 iv.unit = None;
20037 }
20038 }
20039 Ok(Expression::Interval(iv))
20040 } else {
20041 Ok(e)
20042 }
20043 }
20044
20045 Action::ArrayConcatBracketConvert => {
20046 // Expression::Array/ArrayFunc -> target-specific
20047 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
20048 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
20049 match e {
20050 Expression::Array(arr) => {
20051 if matches!(target, DialectType::Redshift) {
20052 Ok(Expression::Function(Box::new(Function::new(
20053 "ARRAY".to_string(),
20054 arr.expressions,
20055 ))))
20056 } else {
20057 Ok(Expression::ArrayFunc(Box::new(
20058 crate::expressions::ArrayConstructor {
20059 expressions: arr.expressions,
20060 bracket_notation: false,
20061 use_list_keyword: false,
20062 },
20063 )))
20064 }
20065 }
20066 Expression::ArrayFunc(arr) => {
20067 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
20068 if matches!(target, DialectType::Redshift) {
20069 Ok(Expression::Function(Box::new(Function::new(
20070 "ARRAY".to_string(),
20071 arr.expressions,
20072 ))))
20073 } else {
20074 Ok(Expression::ArrayFunc(arr))
20075 }
20076 }
20077 _ => Ok(e),
20078 }
20079 }
20080
20081 Action::BitAggFloatCast => {
20082 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
20083 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
20084 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
20085 let int_type = DataType::Int {
20086 length: None,
20087 integer_spelling: false,
20088 };
20089 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
20090 if let Expression::Cast(c) = agg_this {
20091 match &c.to {
20092 DataType::Float { .. }
20093 | DataType::Double { .. }
20094 | DataType::Custom { .. } => {
20095 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
20096 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
20097 let inner_type = match &c.to {
20098 DataType::Float {
20099 precision, scale, ..
20100 } => DataType::Float {
20101 precision: *precision,
20102 scale: *scale,
20103 real_spelling: true,
20104 },
20105 other => other.clone(),
20106 };
20107 let inner_cast =
20108 Expression::Cast(Box::new(crate::expressions::Cast {
20109 this: c.this.clone(),
20110 to: inner_type,
20111 trailing_comments: Vec::new(),
20112 double_colon_syntax: false,
20113 format: None,
20114 default: None,
20115 inferred_type: None,
20116 }));
20117 let rounded = Expression::Function(Box::new(Function::new(
20118 "ROUND".to_string(),
20119 vec![inner_cast],
20120 )));
20121 Expression::Cast(Box::new(crate::expressions::Cast {
20122 this: rounded,
20123 to: int_dt,
20124 trailing_comments: Vec::new(),
20125 double_colon_syntax: false,
20126 format: None,
20127 default: None,
20128 inferred_type: None,
20129 }))
20130 }
20131 DataType::Decimal { .. } => {
20132 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
20133 Expression::Cast(Box::new(crate::expressions::Cast {
20134 this: Expression::Cast(c),
20135 to: int_dt,
20136 trailing_comments: Vec::new(),
20137 double_colon_syntax: false,
20138 format: None,
20139 default: None,
20140 inferred_type: None,
20141 }))
20142 }
20143 _ => Expression::Cast(c),
20144 }
20145 } else {
20146 agg_this
20147 }
20148 };
20149 match e {
20150 Expression::BitwiseOrAgg(mut f) => {
20151 f.this = wrap_agg(f.this, int_type);
20152 Ok(Expression::BitwiseOrAgg(f))
20153 }
20154 Expression::BitwiseAndAgg(mut f) => {
20155 let int_type = DataType::Int {
20156 length: None,
20157 integer_spelling: false,
20158 };
20159 f.this = wrap_agg(f.this, int_type);
20160 Ok(Expression::BitwiseAndAgg(f))
20161 }
20162 Expression::BitwiseXorAgg(mut f) => {
20163 let int_type = DataType::Int {
20164 length: None,
20165 integer_spelling: false,
20166 };
20167 f.this = wrap_agg(f.this, int_type);
20168 Ok(Expression::BitwiseXorAgg(f))
20169 }
20170 _ => Ok(e),
20171 }
20172 }
20173
20174 Action::BitAggSnowflakeRename => {
20175 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
20176 match e {
20177 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
20178 Function::new("BITORAGG".to_string(), vec![f.this]),
20179 ))),
20180 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
20181 Function::new("BITANDAGG".to_string(), vec![f.this]),
20182 ))),
20183 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
20184 Function::new("BITXORAGG".to_string(), vec![f.this]),
20185 ))),
20186 _ => Ok(e),
20187 }
20188 }
20189
20190 Action::StrftimeCastTimestamp => {
20191 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
20192 if let Expression::Cast(mut c) = e {
20193 if matches!(
20194 c.to,
20195 DataType::Timestamp {
20196 timezone: false,
20197 ..
20198 }
20199 ) {
20200 c.to = DataType::Custom {
20201 name: "TIMESTAMP_NTZ".to_string(),
20202 };
20203 }
20204 Ok(Expression::Cast(c))
20205 } else {
20206 Ok(e)
20207 }
20208 }
20209
20210 Action::DecimalDefaultPrecision => {
20211 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
20212 if let Expression::Cast(mut c) = e {
20213 if matches!(
20214 c.to,
20215 DataType::Decimal {
20216 precision: None,
20217 ..
20218 }
20219 ) {
20220 c.to = DataType::Decimal {
20221 precision: Some(18),
20222 scale: Some(3),
20223 };
20224 }
20225 Ok(Expression::Cast(c))
20226 } else {
20227 Ok(e)
20228 }
20229 }
20230
20231 Action::FilterToIff => {
20232 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
20233 if let Expression::Filter(f) = e {
20234 let condition = *f.expression;
20235 let agg = *f.this;
20236 // Strip WHERE from condition
20237 let cond = match condition {
20238 Expression::Where(w) => w.this,
20239 other => other,
20240 };
20241 // Extract the aggregate function and its argument
20242 // We want AVG(IFF(condition, x, NULL))
20243 match agg {
20244 Expression::Function(mut func) => {
20245 if !func.args.is_empty() {
20246 let orig_arg = func.args[0].clone();
20247 let iff_call = Expression::Function(Box::new(Function::new(
20248 "IFF".to_string(),
20249 vec![cond, orig_arg, Expression::Null(Null)],
20250 )));
20251 func.args[0] = iff_call;
20252 Ok(Expression::Function(func))
20253 } else {
20254 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20255 this: Box::new(Expression::Function(func)),
20256 expression: Box::new(cond),
20257 })))
20258 }
20259 }
20260 Expression::Avg(mut avg) => {
20261 let iff_call = Expression::Function(Box::new(Function::new(
20262 "IFF".to_string(),
20263 vec![cond, avg.this.clone(), Expression::Null(Null)],
20264 )));
20265 avg.this = iff_call;
20266 Ok(Expression::Avg(avg))
20267 }
20268 Expression::Sum(mut s) => {
20269 let iff_call = Expression::Function(Box::new(Function::new(
20270 "IFF".to_string(),
20271 vec![cond, s.this.clone(), Expression::Null(Null)],
20272 )));
20273 s.this = iff_call;
20274 Ok(Expression::Sum(s))
20275 }
20276 Expression::Count(mut c) => {
20277 if let Some(ref this_expr) = c.this {
20278 let iff_call = Expression::Function(Box::new(Function::new(
20279 "IFF".to_string(),
20280 vec![cond, this_expr.clone(), Expression::Null(Null)],
20281 )));
20282 c.this = Some(iff_call);
20283 }
20284 Ok(Expression::Count(c))
20285 }
20286 other => {
20287 // Fallback: keep as Filter
20288 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20289 this: Box::new(other),
20290 expression: Box::new(cond),
20291 })))
20292 }
20293 }
20294 } else {
20295 Ok(e)
20296 }
20297 }
20298
20299 Action::AggFilterToIff => {
20300 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
20301 // Helper macro to handle the common AggFunc case
20302 macro_rules! handle_agg_filter_to_iff {
20303 ($variant:ident, $agg:expr) => {{
20304 let mut agg = $agg;
20305 if let Some(filter_cond) = agg.filter.take() {
20306 let iff_call = Expression::Function(Box::new(Function::new(
20307 "IFF".to_string(),
20308 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
20309 )));
20310 agg.this = iff_call;
20311 }
20312 Ok(Expression::$variant(agg))
20313 }};
20314 }
20315
20316 match e {
20317 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
20318 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
20319 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
20320 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
20321 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
20322 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
20323 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
20324 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
20325 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
20326 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
20327 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
20328 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
20329 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
20330 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
20331 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
20332 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
20333 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
20334 Expression::ApproxDistinct(agg) => {
20335 handle_agg_filter_to_iff!(ApproxDistinct, agg)
20336 }
20337 Expression::Count(mut c) => {
20338 if let Some(filter_cond) = c.filter.take() {
20339 if let Some(ref this_expr) = c.this {
20340 let iff_call = Expression::Function(Box::new(Function::new(
20341 "IFF".to_string(),
20342 vec![
20343 filter_cond,
20344 this_expr.clone(),
20345 Expression::Null(Null),
20346 ],
20347 )));
20348 c.this = Some(iff_call);
20349 }
20350 }
20351 Ok(Expression::Count(c))
20352 }
20353 other => Ok(other),
20354 }
20355 }
20356
20357 Action::JsonToGetPath => {
20358 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
20359 if let Expression::JsonExtract(je) = e {
20360 // Convert to PARSE_JSON() wrapper:
20361 // - JSON(x) -> PARSE_JSON(x)
20362 // - PARSE_JSON(x) -> keep as-is
20363 // - anything else -> wrap in PARSE_JSON()
20364 let this = match &je.this {
20365 Expression::Function(f)
20366 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
20367 {
20368 Expression::Function(Box::new(Function::new(
20369 "PARSE_JSON".to_string(),
20370 f.args.clone(),
20371 )))
20372 }
20373 Expression::Function(f)
20374 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20375 {
20376 je.this.clone()
20377 }
20378 // GET_PATH result is already JSON, don't wrap
20379 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20380 je.this.clone()
20381 }
20382 other => {
20383 // Wrap non-JSON expressions in PARSE_JSON()
20384 Expression::Function(Box::new(Function::new(
20385 "PARSE_JSON".to_string(),
20386 vec![other.clone()],
20387 )))
20388 }
20389 };
20390 // Convert path: extract key from JSONPath or strip $. prefix from string
20391 let path = match &je.path {
20392 Expression::JSONPath(jp) => {
20393 // Extract the key from JSONPath: $root.key -> 'key'
20394 let mut key_parts = Vec::new();
20395 for expr in &jp.expressions {
20396 match expr {
20397 Expression::JSONPathRoot(_) => {} // skip root
20398 Expression::JSONPathKey(k) => {
20399 if let Expression::Literal(Literal::String(s)) =
20400 &*k.this
20401 {
20402 key_parts.push(s.clone());
20403 }
20404 }
20405 _ => {}
20406 }
20407 }
20408 if !key_parts.is_empty() {
20409 Expression::Literal(Literal::String(key_parts.join(".")))
20410 } else {
20411 je.path.clone()
20412 }
20413 }
20414 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20415 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20416 Expression::Literal(Literal::String(stripped))
20417 }
20418 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20419 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20420 Expression::Literal(Literal::String(stripped))
20421 }
20422 _ => je.path.clone(),
20423 };
20424 Ok(Expression::Function(Box::new(Function::new(
20425 "GET_PATH".to_string(),
20426 vec![this, path],
20427 ))))
20428 } else {
20429 Ok(e)
20430 }
20431 }
20432
20433 Action::StructToRow => {
20434 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20435 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20436
20437 // Extract key-value pairs from either Struct or MapFunc
20438 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20439 Expression::Struct(s) => Some(
20440 s.fields
20441 .iter()
20442 .map(|(opt_name, field_expr)| {
20443 if let Some(name) = opt_name {
20444 (name.clone(), field_expr.clone())
20445 } else if let Expression::NamedArgument(na) = field_expr {
20446 (na.name.name.clone(), na.value.clone())
20447 } else {
20448 (String::new(), field_expr.clone())
20449 }
20450 })
20451 .collect(),
20452 ),
20453 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20454 m.keys
20455 .iter()
20456 .zip(m.values.iter())
20457 .map(|(key, value)| {
20458 let key_name = match key {
20459 Expression::Literal(Literal::String(s)) => s.clone(),
20460 Expression::Identifier(id) => id.name.clone(),
20461 _ => String::new(),
20462 };
20463 (key_name, value.clone())
20464 })
20465 .collect(),
20466 ),
20467 _ => None,
20468 };
20469
20470 if let Some(pairs) = kv_pairs {
20471 let mut named_args = Vec::new();
20472 for (key_name, value) in pairs {
20473 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20474 named_args.push(Expression::Alias(Box::new(
20475 crate::expressions::Alias::new(
20476 value,
20477 Identifier::new(key_name),
20478 ),
20479 )));
20480 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20481 named_args.push(value);
20482 } else {
20483 named_args.push(value);
20484 }
20485 }
20486
20487 if matches!(target, DialectType::BigQuery) {
20488 Ok(Expression::Function(Box::new(Function::new(
20489 "STRUCT".to_string(),
20490 named_args,
20491 ))))
20492 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20493 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20494 let row_func = Expression::Function(Box::new(Function::new(
20495 "ROW".to_string(),
20496 named_args,
20497 )));
20498
20499 // Try to infer types for each pair
20500 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20501 Expression::Struct(s) => Some(
20502 s.fields
20503 .iter()
20504 .map(|(opt_name, field_expr)| {
20505 if let Some(name) = opt_name {
20506 (name.clone(), field_expr.clone())
20507 } else if let Expression::NamedArgument(na) = field_expr
20508 {
20509 (na.name.name.clone(), na.value.clone())
20510 } else {
20511 (String::new(), field_expr.clone())
20512 }
20513 })
20514 .collect(),
20515 ),
20516 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20517 m.keys
20518 .iter()
20519 .zip(m.values.iter())
20520 .map(|(key, value)| {
20521 let key_name = match key {
20522 Expression::Literal(Literal::String(s)) => {
20523 s.clone()
20524 }
20525 Expression::Identifier(id) => id.name.clone(),
20526 _ => String::new(),
20527 };
20528 (key_name, value.clone())
20529 })
20530 .collect(),
20531 ),
20532 _ => None,
20533 };
20534
20535 if let Some(pairs) = kv_pairs_again {
20536 // Infer types for all values
20537 let mut all_inferred = true;
20538 let mut fields = Vec::new();
20539 for (name, value) in &pairs {
20540 let inferred_type = match value {
20541 Expression::Literal(Literal::Number(n)) => {
20542 if n.contains('.') {
20543 Some(DataType::Double {
20544 precision: None,
20545 scale: None,
20546 })
20547 } else {
20548 Some(DataType::Int {
20549 length: None,
20550 integer_spelling: true,
20551 })
20552 }
20553 }
20554 Expression::Literal(Literal::String(_)) => {
20555 Some(DataType::VarChar {
20556 length: None,
20557 parenthesized_length: false,
20558 })
20559 }
20560 Expression::Boolean(_) => Some(DataType::Boolean),
20561 _ => None,
20562 };
20563 if let Some(dt) = inferred_type {
20564 fields.push(crate::expressions::StructField::new(
20565 name.clone(),
20566 dt,
20567 ));
20568 } else {
20569 all_inferred = false;
20570 break;
20571 }
20572 }
20573
20574 if all_inferred && !fields.is_empty() {
20575 let row_type = DataType::Struct {
20576 fields,
20577 nested: true,
20578 };
20579 Ok(Expression::Cast(Box::new(Cast {
20580 this: row_func,
20581 to: row_type,
20582 trailing_comments: Vec::new(),
20583 double_colon_syntax: false,
20584 format: None,
20585 default: None,
20586 inferred_type: None,
20587 })))
20588 } else {
20589 Ok(row_func)
20590 }
20591 } else {
20592 Ok(row_func)
20593 }
20594 } else {
20595 Ok(Expression::Function(Box::new(Function::new(
20596 "ROW".to_string(),
20597 named_args,
20598 ))))
20599 }
20600 } else {
20601 Ok(e)
20602 }
20603 }
20604
20605 Action::SparkStructConvert => {
20606 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20607 // or DuckDB {'name': val, ...}
20608 if let Expression::Function(f) = e {
20609 // Extract name-value pairs from aliased args
20610 let mut pairs: Vec<(String, Expression)> = Vec::new();
20611 for arg in &f.args {
20612 match arg {
20613 Expression::Alias(a) => {
20614 pairs.push((a.alias.name.clone(), a.this.clone()));
20615 }
20616 _ => {
20617 pairs.push((String::new(), arg.clone()));
20618 }
20619 }
20620 }
20621
20622 match target {
20623 DialectType::DuckDB => {
20624 // Convert to DuckDB struct literal {'name': value, ...}
20625 let mut keys = Vec::new();
20626 let mut values = Vec::new();
20627 for (name, value) in &pairs {
20628 keys.push(Expression::Literal(Literal::String(name.clone())));
20629 values.push(value.clone());
20630 }
20631 Ok(Expression::MapFunc(Box::new(
20632 crate::expressions::MapConstructor {
20633 keys,
20634 values,
20635 curly_brace_syntax: true,
20636 with_map_keyword: false,
20637 },
20638 )))
20639 }
20640 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20641 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20642 let row_args: Vec<Expression> =
20643 pairs.iter().map(|(_, v)| v.clone()).collect();
20644 let row_func = Expression::Function(Box::new(Function::new(
20645 "ROW".to_string(),
20646 row_args,
20647 )));
20648
20649 // Infer types
20650 let mut all_inferred = true;
20651 let mut fields = Vec::new();
20652 for (name, value) in &pairs {
20653 let inferred_type = match value {
20654 Expression::Literal(Literal::Number(n)) => {
20655 if n.contains('.') {
20656 Some(DataType::Double {
20657 precision: None,
20658 scale: None,
20659 })
20660 } else {
20661 Some(DataType::Int {
20662 length: None,
20663 integer_spelling: true,
20664 })
20665 }
20666 }
20667 Expression::Literal(Literal::String(_)) => {
20668 Some(DataType::VarChar {
20669 length: None,
20670 parenthesized_length: false,
20671 })
20672 }
20673 Expression::Boolean(_) => Some(DataType::Boolean),
20674 _ => None,
20675 };
20676 if let Some(dt) = inferred_type {
20677 fields.push(crate::expressions::StructField::new(
20678 name.clone(),
20679 dt,
20680 ));
20681 } else {
20682 all_inferred = false;
20683 break;
20684 }
20685 }
20686
20687 if all_inferred && !fields.is_empty() {
20688 let row_type = DataType::Struct {
20689 fields,
20690 nested: true,
20691 };
20692 Ok(Expression::Cast(Box::new(Cast {
20693 this: row_func,
20694 to: row_type,
20695 trailing_comments: Vec::new(),
20696 double_colon_syntax: false,
20697 format: None,
20698 default: None,
20699 inferred_type: None,
20700 })))
20701 } else {
20702 Ok(row_func)
20703 }
20704 }
20705 _ => Ok(Expression::Function(f)),
20706 }
20707 } else {
20708 Ok(e)
20709 }
20710 }
20711
20712 Action::ApproxCountDistinctToApproxDistinct => {
20713 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20714 if let Expression::ApproxCountDistinct(f) = e {
20715 Ok(Expression::ApproxDistinct(f))
20716 } else {
20717 Ok(e)
20718 }
20719 }
20720
20721 Action::CollectListToArrayAgg => {
20722 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20723 if let Expression::AggregateFunction(f) = e {
20724 let filter_expr = if !f.args.is_empty() {
20725 let arg = f.args[0].clone();
20726 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20727 this: arg,
20728 not: true,
20729 postfix_form: false,
20730 })))
20731 } else {
20732 None
20733 };
20734 let agg = crate::expressions::AggFunc {
20735 this: if f.args.is_empty() {
20736 Expression::Null(crate::expressions::Null)
20737 } else {
20738 f.args[0].clone()
20739 },
20740 distinct: f.distinct,
20741 order_by: f.order_by.clone(),
20742 filter: filter_expr,
20743 ignore_nulls: None,
20744 name: None,
20745 having_max: None,
20746 limit: None,
20747 inferred_type: None,
20748 };
20749 Ok(Expression::ArrayAgg(Box::new(agg)))
20750 } else {
20751 Ok(e)
20752 }
20753 }
20754
20755 Action::CollectSetConvert => {
20756 // COLLECT_SET(x) -> target-specific
20757 if let Expression::AggregateFunction(f) = e {
20758 match target {
20759 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20760 crate::expressions::AggregateFunction {
20761 name: "SET_AGG".to_string(),
20762 args: f.args,
20763 distinct: false,
20764 order_by: f.order_by,
20765 filter: f.filter,
20766 limit: f.limit,
20767 ignore_nulls: f.ignore_nulls,
20768 inferred_type: None,
20769 },
20770 ))),
20771 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20772 crate::expressions::AggregateFunction {
20773 name: "ARRAY_UNIQUE_AGG".to_string(),
20774 args: f.args,
20775 distinct: false,
20776 order_by: f.order_by,
20777 filter: f.filter,
20778 limit: f.limit,
20779 ignore_nulls: f.ignore_nulls,
20780 inferred_type: None,
20781 },
20782 ))),
20783 DialectType::Trino | DialectType::DuckDB => {
20784 let agg = crate::expressions::AggFunc {
20785 this: if f.args.is_empty() {
20786 Expression::Null(crate::expressions::Null)
20787 } else {
20788 f.args[0].clone()
20789 },
20790 distinct: true,
20791 order_by: Vec::new(),
20792 filter: None,
20793 ignore_nulls: None,
20794 name: None,
20795 having_max: None,
20796 limit: None,
20797 inferred_type: None,
20798 };
20799 Ok(Expression::ArrayAgg(Box::new(agg)))
20800 }
20801 _ => Ok(Expression::AggregateFunction(f)),
20802 }
20803 } else {
20804 Ok(e)
20805 }
20806 }
20807
20808 Action::PercentileConvert => {
20809 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20810 if let Expression::AggregateFunction(f) = e {
20811 let name = match target {
20812 DialectType::DuckDB => "QUANTILE",
20813 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20814 _ => "PERCENTILE",
20815 };
20816 Ok(Expression::AggregateFunction(Box::new(
20817 crate::expressions::AggregateFunction {
20818 name: name.to_string(),
20819 args: f.args,
20820 distinct: f.distinct,
20821 order_by: f.order_by,
20822 filter: f.filter,
20823 limit: f.limit,
20824 ignore_nulls: f.ignore_nulls,
20825 inferred_type: None,
20826 },
20827 )))
20828 } else {
20829 Ok(e)
20830 }
20831 }
20832
20833 Action::CorrIsnanWrap => {
20834 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20835 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20836 let corr_clone = e.clone();
20837 let isnan = Expression::Function(Box::new(Function::new(
20838 "ISNAN".to_string(),
20839 vec![corr_clone.clone()],
20840 )));
20841 let case_expr = Expression::Case(Box::new(Case {
20842 operand: None,
20843 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20844 else_: Some(corr_clone),
20845 comments: Vec::new(),
20846 inferred_type: None,
20847 }));
20848 Ok(case_expr)
20849 }
20850
20851 Action::TruncToDateTrunc => {
20852 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20853 if let Expression::Function(f) = e {
20854 if f.args.len() == 2 {
20855 let timestamp = f.args[0].clone();
20856 let unit_expr = f.args[1].clone();
20857
20858 if matches!(target, DialectType::ClickHouse) {
20859 // For ClickHouse, produce Expression::DateTrunc which the generator
20860 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20861 // target transform that would convert it to dateTrunc
20862 let unit_str = Self::get_unit_str_static(&unit_expr);
20863 let dt_field = match unit_str.as_str() {
20864 "YEAR" => DateTimeField::Year,
20865 "MONTH" => DateTimeField::Month,
20866 "DAY" => DateTimeField::Day,
20867 "HOUR" => DateTimeField::Hour,
20868 "MINUTE" => DateTimeField::Minute,
20869 "SECOND" => DateTimeField::Second,
20870 "WEEK" => DateTimeField::Week,
20871 "QUARTER" => DateTimeField::Quarter,
20872 _ => DateTimeField::Custom(unit_str),
20873 };
20874 Ok(Expression::DateTrunc(Box::new(
20875 crate::expressions::DateTruncFunc {
20876 this: timestamp,
20877 unit: dt_field,
20878 },
20879 )))
20880 } else {
20881 let new_args = vec![unit_expr, timestamp];
20882 Ok(Expression::Function(Box::new(Function::new(
20883 "DATE_TRUNC".to_string(),
20884 new_args,
20885 ))))
20886 }
20887 } else {
20888 Ok(Expression::Function(f))
20889 }
20890 } else {
20891 Ok(e)
20892 }
20893 }
20894
20895 Action::ArrayContainsConvert => {
20896 if let Expression::ArrayContains(f) = e {
20897 match target {
20898 DialectType::Presto | DialectType::Trino => {
20899 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20900 Ok(Expression::Function(Box::new(Function::new(
20901 "CONTAINS".to_string(),
20902 vec![f.this, f.expression],
20903 ))))
20904 }
20905 DialectType::Snowflake => {
20906 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20907 let cast_val =
20908 Expression::Cast(Box::new(crate::expressions::Cast {
20909 this: f.expression,
20910 to: crate::expressions::DataType::Custom {
20911 name: "VARIANT".to_string(),
20912 },
20913 trailing_comments: Vec::new(),
20914 double_colon_syntax: false,
20915 format: None,
20916 default: None,
20917 inferred_type: None,
20918 }));
20919 Ok(Expression::Function(Box::new(Function::new(
20920 "ARRAY_CONTAINS".to_string(),
20921 vec![cast_val, f.this],
20922 ))))
20923 }
20924 _ => Ok(Expression::ArrayContains(f)),
20925 }
20926 } else {
20927 Ok(e)
20928 }
20929 }
20930
20931 Action::StrPositionExpand => {
20932 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20933 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20934 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20935 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20936 if let Expression::StrPosition(sp) = e {
20937 let crate::expressions::StrPosition {
20938 this,
20939 substr,
20940 position,
20941 occurrence,
20942 } = *sp;
20943 let string = *this;
20944 let substr_expr = match substr {
20945 Some(s) => *s,
20946 None => Expression::Null(Null),
20947 };
20948 let pos = match position {
20949 Some(p) => *p,
20950 None => Expression::number(1),
20951 };
20952
20953 // SUBSTRING(string, pos)
20954 let substring_call = Expression::Function(Box::new(Function::new(
20955 "SUBSTRING".to_string(),
20956 vec![string.clone(), pos.clone()],
20957 )));
20958 // STRPOS(SUBSTRING(string, pos), substr)
20959 let strpos_call = Expression::Function(Box::new(Function::new(
20960 "STRPOS".to_string(),
20961 vec![substring_call, substr_expr.clone()],
20962 )));
20963 // STRPOS(...) + pos - 1
20964 let pos_adjusted =
20965 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20966 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20967 strpos_call.clone(),
20968 pos.clone(),
20969 ))),
20970 Expression::number(1),
20971 )));
20972 // STRPOS(...) = 0
20973 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20974 strpos_call.clone(),
20975 Expression::number(0),
20976 )));
20977
20978 match target {
20979 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20980 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20981 Ok(Expression::Function(Box::new(Function::new(
20982 "IF".to_string(),
20983 vec![is_zero, Expression::number(0), pos_adjusted],
20984 ))))
20985 }
20986 DialectType::DuckDB => {
20987 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20988 Ok(Expression::Case(Box::new(Case {
20989 operand: None,
20990 whens: vec![(is_zero, Expression::number(0))],
20991 else_: Some(pos_adjusted),
20992 comments: Vec::new(),
20993 inferred_type: None,
20994 })))
20995 }
20996 _ => {
20997 // Reconstruct StrPosition
20998 Ok(Expression::StrPosition(Box::new(
20999 crate::expressions::StrPosition {
21000 this: Box::new(string),
21001 substr: Some(Box::new(substr_expr)),
21002 position: Some(Box::new(pos)),
21003 occurrence,
21004 },
21005 )))
21006 }
21007 }
21008 } else {
21009 Ok(e)
21010 }
21011 }
21012
21013 Action::MonthsBetweenConvert => {
21014 if let Expression::MonthsBetween(mb) = e {
21015 let crate::expressions::BinaryFunc {
21016 this: end_date,
21017 expression: start_date,
21018 ..
21019 } = *mb;
21020 match target {
21021 DialectType::DuckDB => {
21022 let cast_end = Self::ensure_cast_date(end_date);
21023 let cast_start = Self::ensure_cast_date(start_date);
21024 let dd = Expression::Function(Box::new(Function::new(
21025 "DATE_DIFF".to_string(),
21026 vec![
21027 Expression::string("MONTH"),
21028 cast_start.clone(),
21029 cast_end.clone(),
21030 ],
21031 )));
21032 let day_end = Expression::Function(Box::new(Function::new(
21033 "DAY".to_string(),
21034 vec![cast_end.clone()],
21035 )));
21036 let day_start = Expression::Function(Box::new(Function::new(
21037 "DAY".to_string(),
21038 vec![cast_start.clone()],
21039 )));
21040 let last_day_end = Expression::Function(Box::new(Function::new(
21041 "LAST_DAY".to_string(),
21042 vec![cast_end.clone()],
21043 )));
21044 let last_day_start = Expression::Function(Box::new(Function::new(
21045 "LAST_DAY".to_string(),
21046 vec![cast_start.clone()],
21047 )));
21048 let day_last_end = Expression::Function(Box::new(Function::new(
21049 "DAY".to_string(),
21050 vec![last_day_end],
21051 )));
21052 let day_last_start = Expression::Function(Box::new(Function::new(
21053 "DAY".to_string(),
21054 vec![last_day_start],
21055 )));
21056 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
21057 day_end.clone(),
21058 day_last_end,
21059 )));
21060 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
21061 day_start.clone(),
21062 day_last_start,
21063 )));
21064 let both_cond =
21065 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
21066 let day_diff =
21067 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
21068 let day_diff_paren =
21069 Expression::Paren(Box::new(crate::expressions::Paren {
21070 this: day_diff,
21071 trailing_comments: Vec::new(),
21072 }));
21073 let frac = Expression::Div(Box::new(BinaryOp::new(
21074 day_diff_paren,
21075 Expression::Literal(Literal::Number("31.0".to_string())),
21076 )));
21077 let case_expr = Expression::Case(Box::new(Case {
21078 operand: None,
21079 whens: vec![(both_cond, Expression::number(0))],
21080 else_: Some(frac),
21081 comments: Vec::new(),
21082 inferred_type: None,
21083 }));
21084 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
21085 }
21086 DialectType::Snowflake | DialectType::Redshift => {
21087 let unit = Expression::Identifier(Identifier::new("MONTH"));
21088 Ok(Expression::Function(Box::new(Function::new(
21089 "DATEDIFF".to_string(),
21090 vec![unit, start_date, end_date],
21091 ))))
21092 }
21093 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21094 Ok(Expression::Function(Box::new(Function::new(
21095 "DATE_DIFF".to_string(),
21096 vec![Expression::string("MONTH"), start_date, end_date],
21097 ))))
21098 }
21099 _ => Ok(Expression::MonthsBetween(Box::new(
21100 crate::expressions::BinaryFunc {
21101 this: end_date,
21102 expression: start_date,
21103 original_name: None,
21104 inferred_type: None,
21105 },
21106 ))),
21107 }
21108 } else {
21109 Ok(e)
21110 }
21111 }
21112
21113 Action::AddMonthsConvert => {
21114 if let Expression::AddMonths(am) = e {
21115 let date = am.this;
21116 let val = am.expression;
21117 match target {
21118 DialectType::TSQL | DialectType::Fabric => {
21119 let cast_date = Self::ensure_cast_datetime2(date);
21120 Ok(Expression::Function(Box::new(Function::new(
21121 "DATEADD".to_string(),
21122 vec![
21123 Expression::Identifier(Identifier::new("MONTH")),
21124 val,
21125 cast_date,
21126 ],
21127 ))))
21128 }
21129 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
21130 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
21131 // Optionally wrapped in CAST(... AS type) if the input had a specific type
21132
21133 // Determine the cast type from the date expression
21134 let (cast_date, return_type) = match &date {
21135 Expression::Literal(Literal::String(_)) => {
21136 // String literal: CAST(str AS TIMESTAMP), no outer CAST
21137 (
21138 Expression::Cast(Box::new(Cast {
21139 this: date.clone(),
21140 to: DataType::Timestamp {
21141 precision: None,
21142 timezone: false,
21143 },
21144 trailing_comments: Vec::new(),
21145 double_colon_syntax: false,
21146 format: None,
21147 default: None,
21148 inferred_type: None,
21149 })),
21150 None,
21151 )
21152 }
21153 Expression::Cast(c) => {
21154 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
21155 (date.clone(), Some(c.to.clone()))
21156 }
21157 _ => {
21158 // Expression or NULL::TYPE - keep as-is, check for cast type
21159 if let Expression::Cast(c) = &date {
21160 (date.clone(), Some(c.to.clone()))
21161 } else {
21162 (date.clone(), None)
21163 }
21164 }
21165 };
21166
21167 // Build the interval expression
21168 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
21169 // For integer values, use INTERVAL val MONTH
21170 let is_non_integer_val = match &val {
21171 Expression::Literal(Literal::Number(n)) => n.contains('.'),
21172 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
21173 Expression::Neg(n) => {
21174 if let Expression::Literal(Literal::Number(s)) = &n.this {
21175 s.contains('.')
21176 } else {
21177 false
21178 }
21179 }
21180 _ => false,
21181 };
21182
21183 let add_interval = if is_non_integer_val {
21184 // TO_MONTHS(CAST(ROUND(val) AS INT))
21185 let round_val = Expression::Function(Box::new(Function::new(
21186 "ROUND".to_string(),
21187 vec![val.clone()],
21188 )));
21189 let cast_int = Expression::Cast(Box::new(Cast {
21190 this: round_val,
21191 to: DataType::Int {
21192 length: None,
21193 integer_spelling: false,
21194 },
21195 trailing_comments: Vec::new(),
21196 double_colon_syntax: false,
21197 format: None,
21198 default: None,
21199 inferred_type: None,
21200 }));
21201 Expression::Function(Box::new(Function::new(
21202 "TO_MONTHS".to_string(),
21203 vec![cast_int],
21204 )))
21205 } else {
21206 // INTERVAL val MONTH
21207 // For negative numbers, wrap in parens
21208 let interval_val = match &val {
21209 Expression::Literal(Literal::Number(n))
21210 if n.starts_with('-') =>
21211 {
21212 Expression::Paren(Box::new(Paren {
21213 this: val.clone(),
21214 trailing_comments: Vec::new(),
21215 }))
21216 }
21217 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
21218 this: val.clone(),
21219 trailing_comments: Vec::new(),
21220 })),
21221 Expression::Null(_) => Expression::Paren(Box::new(Paren {
21222 this: val.clone(),
21223 trailing_comments: Vec::new(),
21224 })),
21225 _ => val.clone(),
21226 };
21227 Expression::Interval(Box::new(crate::expressions::Interval {
21228 this: Some(interval_val),
21229 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21230 unit: crate::expressions::IntervalUnit::Month,
21231 use_plural: false,
21232 }),
21233 }))
21234 };
21235
21236 // Build: date + interval
21237 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
21238 cast_date.clone(),
21239 add_interval.clone(),
21240 )));
21241
21242 // Build LAST_DAY(date)
21243 let last_day_date = Expression::Function(Box::new(Function::new(
21244 "LAST_DAY".to_string(),
21245 vec![cast_date.clone()],
21246 )));
21247
21248 // Build LAST_DAY(date + interval)
21249 let last_day_date_plus =
21250 Expression::Function(Box::new(Function::new(
21251 "LAST_DAY".to_string(),
21252 vec![date_plus_interval.clone()],
21253 )));
21254
21255 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
21256 let case_expr = Expression::Case(Box::new(Case {
21257 operand: None,
21258 whens: vec![(
21259 Expression::Eq(Box::new(BinaryOp::new(
21260 last_day_date,
21261 cast_date.clone(),
21262 ))),
21263 last_day_date_plus,
21264 )],
21265 else_: Some(date_plus_interval),
21266 comments: Vec::new(),
21267 inferred_type: None,
21268 }));
21269
21270 // Wrap in CAST(... AS type) if needed
21271 if let Some(dt) = return_type {
21272 Ok(Expression::Cast(Box::new(Cast {
21273 this: case_expr,
21274 to: dt,
21275 trailing_comments: Vec::new(),
21276 double_colon_syntax: false,
21277 format: None,
21278 default: None,
21279 inferred_type: None,
21280 })))
21281 } else {
21282 Ok(case_expr)
21283 }
21284 }
21285 DialectType::DuckDB => {
21286 // Non-Snowflake source: simple date + INTERVAL
21287 let cast_date =
21288 if matches!(&date, Expression::Literal(Literal::String(_))) {
21289 Expression::Cast(Box::new(Cast {
21290 this: date,
21291 to: DataType::Timestamp {
21292 precision: None,
21293 timezone: false,
21294 },
21295 trailing_comments: Vec::new(),
21296 double_colon_syntax: false,
21297 format: None,
21298 default: None,
21299 inferred_type: None,
21300 }))
21301 } else {
21302 date
21303 };
21304 let interval =
21305 Expression::Interval(Box::new(crate::expressions::Interval {
21306 this: Some(val),
21307 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21308 unit: crate::expressions::IntervalUnit::Month,
21309 use_plural: false,
21310 }),
21311 }));
21312 Ok(Expression::Add(Box::new(BinaryOp::new(
21313 cast_date, interval,
21314 ))))
21315 }
21316 DialectType::Snowflake => {
21317 // Keep ADD_MONTHS when source is also Snowflake
21318 if matches!(source, DialectType::Snowflake) {
21319 Ok(Expression::Function(Box::new(Function::new(
21320 "ADD_MONTHS".to_string(),
21321 vec![date, val],
21322 ))))
21323 } else {
21324 Ok(Expression::Function(Box::new(Function::new(
21325 "DATEADD".to_string(),
21326 vec![
21327 Expression::Identifier(Identifier::new("MONTH")),
21328 val,
21329 date,
21330 ],
21331 ))))
21332 }
21333 }
21334 DialectType::Redshift => {
21335 Ok(Expression::Function(Box::new(Function::new(
21336 "DATEADD".to_string(),
21337 vec![
21338 Expression::Identifier(Identifier::new("MONTH")),
21339 val,
21340 date,
21341 ],
21342 ))))
21343 }
21344 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21345 let cast_date =
21346 if matches!(&date, Expression::Literal(Literal::String(_))) {
21347 Expression::Cast(Box::new(Cast {
21348 this: date,
21349 to: DataType::Timestamp {
21350 precision: None,
21351 timezone: false,
21352 },
21353 trailing_comments: Vec::new(),
21354 double_colon_syntax: false,
21355 format: None,
21356 default: None,
21357 inferred_type: None,
21358 }))
21359 } else {
21360 date
21361 };
21362 Ok(Expression::Function(Box::new(Function::new(
21363 "DATE_ADD".to_string(),
21364 vec![Expression::string("MONTH"), val, cast_date],
21365 ))))
21366 }
21367 DialectType::BigQuery => {
21368 let interval =
21369 Expression::Interval(Box::new(crate::expressions::Interval {
21370 this: Some(val),
21371 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21372 unit: crate::expressions::IntervalUnit::Month,
21373 use_plural: false,
21374 }),
21375 }));
21376 let cast_date =
21377 if matches!(&date, Expression::Literal(Literal::String(_))) {
21378 Expression::Cast(Box::new(Cast {
21379 this: date,
21380 to: DataType::Custom {
21381 name: "DATETIME".to_string(),
21382 },
21383 trailing_comments: Vec::new(),
21384 double_colon_syntax: false,
21385 format: None,
21386 default: None,
21387 inferred_type: None,
21388 }))
21389 } else {
21390 date
21391 };
21392 Ok(Expression::Function(Box::new(Function::new(
21393 "DATE_ADD".to_string(),
21394 vec![cast_date, interval],
21395 ))))
21396 }
21397 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21398 Ok(Expression::Function(Box::new(Function::new(
21399 "ADD_MONTHS".to_string(),
21400 vec![date, val],
21401 ))))
21402 }
21403 _ => {
21404 // Default: keep as AddMonths expression
21405 Ok(Expression::AddMonths(Box::new(
21406 crate::expressions::BinaryFunc {
21407 this: date,
21408 expression: val,
21409 original_name: None,
21410 inferred_type: None,
21411 },
21412 )))
21413 }
21414 }
21415 } else {
21416 Ok(e)
21417 }
21418 }
21419
21420 Action::PercentileContConvert => {
21421 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21422 // Presto/Trino: APPROX_PERCENTILE(col, p)
21423 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21424 if let Expression::WithinGroup(wg) = e {
21425 // Extract percentile value and order by column
21426 let (percentile, _is_disc) = match &wg.this {
21427 Expression::Function(f) => {
21428 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21429 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21430 Literal::Number("0.5".to_string()),
21431 ));
21432 (pct, is_disc)
21433 }
21434 Expression::AggregateFunction(af) => {
21435 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21436 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21437 Literal::Number("0.5".to_string()),
21438 ));
21439 (pct, is_disc)
21440 }
21441 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21442 _ => return Ok(Expression::WithinGroup(wg)),
21443 };
21444 let col = wg
21445 .order_by
21446 .first()
21447 .map(|o| o.this.clone())
21448 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21449
21450 let func_name = match target {
21451 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21452 "APPROX_PERCENTILE"
21453 }
21454 _ => "PERCENTILE_APPROX", // Spark, Databricks
21455 };
21456 Ok(Expression::Function(Box::new(Function::new(
21457 func_name.to_string(),
21458 vec![col, percentile],
21459 ))))
21460 } else {
21461 Ok(e)
21462 }
21463 }
21464
21465 Action::CurrentUserSparkParens => {
21466 // CURRENT_USER -> CURRENT_USER() for Spark
21467 if let Expression::CurrentUser(_) = e {
21468 Ok(Expression::Function(Box::new(Function::new(
21469 "CURRENT_USER".to_string(),
21470 vec![],
21471 ))))
21472 } else {
21473 Ok(e)
21474 }
21475 }
21476
21477 Action::SparkDateFuncCast => {
21478 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21479 let cast_arg = |arg: Expression| -> Expression {
21480 match target {
21481 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21482 Self::double_cast_timestamp_date(arg)
21483 }
21484 _ => {
21485 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21486 Self::ensure_cast_date(arg)
21487 }
21488 }
21489 };
21490 match e {
21491 Expression::Month(f) => Ok(Expression::Month(Box::new(
21492 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21493 ))),
21494 Expression::Year(f) => Ok(Expression::Year(Box::new(
21495 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21496 ))),
21497 Expression::Day(f) => Ok(Expression::Day(Box::new(
21498 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21499 ))),
21500 other => Ok(other),
21501 }
21502 }
21503
21504 Action::MapFromArraysConvert => {
21505 // Expression::MapFromArrays -> target-specific
21506 if let Expression::MapFromArrays(mfa) = e {
21507 let keys = mfa.this;
21508 let values = mfa.expression;
21509 match target {
21510 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21511 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21512 ))),
21513 _ => {
21514 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21515 Ok(Expression::Function(Box::new(Function::new(
21516 "MAP".to_string(),
21517 vec![keys, values],
21518 ))))
21519 }
21520 }
21521 } else {
21522 Ok(e)
21523 }
21524 }
21525
21526 Action::AnyToExists => {
21527 if let Expression::Any(q) = e {
21528 if let Some(op) = q.op.clone() {
21529 let lambda_param = crate::expressions::Identifier::new("x");
21530 let rhs = Expression::Identifier(lambda_param.clone());
21531 let body = match op {
21532 crate::expressions::QuantifiedOp::Eq => {
21533 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21534 }
21535 crate::expressions::QuantifiedOp::Neq => {
21536 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21537 }
21538 crate::expressions::QuantifiedOp::Lt => {
21539 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21540 }
21541 crate::expressions::QuantifiedOp::Lte => {
21542 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21543 }
21544 crate::expressions::QuantifiedOp::Gt => {
21545 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21546 }
21547 crate::expressions::QuantifiedOp::Gte => {
21548 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21549 }
21550 };
21551 let lambda =
21552 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21553 parameters: vec![lambda_param],
21554 body,
21555 colon: false,
21556 parameter_types: Vec::new(),
21557 }));
21558 Ok(Expression::Function(Box::new(Function::new(
21559 "EXISTS".to_string(),
21560 vec![q.subquery, lambda],
21561 ))))
21562 } else {
21563 Ok(Expression::Any(q))
21564 }
21565 } else {
21566 Ok(e)
21567 }
21568 }
21569
21570 Action::GenerateSeriesConvert => {
21571 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21572 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21573 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21574 if let Expression::Function(f) = e {
21575 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21576 let start = f.args[0].clone();
21577 let end = f.args[1].clone();
21578 let step = f.args.get(2).cloned();
21579
21580 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21581 let step = step.map(|s| Self::normalize_interval_string(s, target));
21582
21583 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21584 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21585 if matches!(
21586 target,
21587 DialectType::Presto
21588 | DialectType::Trino
21589 | DialectType::Athena
21590 | DialectType::Spark
21591 | DialectType::Databricks
21592 | DialectType::Hive
21593 ) {
21594 match &arg {
21595 Expression::CurrentTimestamp(_) => {
21596 Expression::Cast(Box::new(Cast {
21597 this: arg,
21598 to: DataType::Timestamp {
21599 precision: None,
21600 timezone: false,
21601 },
21602 trailing_comments: Vec::new(),
21603 double_colon_syntax: false,
21604 format: None,
21605 default: None,
21606 inferred_type: None,
21607 }))
21608 }
21609 _ => arg,
21610 }
21611 } else {
21612 arg
21613 }
21614 };
21615
21616 let start = maybe_cast_timestamp(start);
21617 let end = maybe_cast_timestamp(end);
21618
21619 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21620 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21621 let mut gs_args = vec![start, end];
21622 if let Some(step) = step {
21623 gs_args.push(step);
21624 }
21625 return Ok(Expression::Function(Box::new(Function::new(
21626 "GENERATE_SERIES".to_string(),
21627 gs_args,
21628 ))));
21629 }
21630
21631 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21632 if matches!(target, DialectType::DuckDB) {
21633 let mut gs_args = vec![start, end];
21634 if let Some(step) = step {
21635 gs_args.push(step);
21636 }
21637 let gs = Expression::Function(Box::new(Function::new(
21638 "GENERATE_SERIES".to_string(),
21639 gs_args,
21640 )));
21641 return Ok(Expression::Function(Box::new(Function::new(
21642 "UNNEST".to_string(),
21643 vec![gs],
21644 ))));
21645 }
21646
21647 let mut seq_args = vec![start, end];
21648 if let Some(step) = step {
21649 seq_args.push(step);
21650 }
21651
21652 let seq = Expression::Function(Box::new(Function::new(
21653 "SEQUENCE".to_string(),
21654 seq_args,
21655 )));
21656
21657 match target {
21658 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21659 // Wrap in UNNEST
21660 Ok(Expression::Function(Box::new(Function::new(
21661 "UNNEST".to_string(),
21662 vec![seq],
21663 ))))
21664 }
21665 DialectType::Spark
21666 | DialectType::Databricks
21667 | DialectType::Hive => {
21668 // Wrap in EXPLODE
21669 Ok(Expression::Function(Box::new(Function::new(
21670 "EXPLODE".to_string(),
21671 vec![seq],
21672 ))))
21673 }
21674 _ => {
21675 // Just SEQUENCE for others
21676 Ok(seq)
21677 }
21678 }
21679 } else {
21680 Ok(Expression::Function(f))
21681 }
21682 } else {
21683 Ok(e)
21684 }
21685 }
21686
21687 Action::ConcatCoalesceWrap => {
21688 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21689 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21690 if let Expression::Function(f) = e {
21691 if f.name.eq_ignore_ascii_case("CONCAT") {
21692 let new_args: Vec<Expression> = f
21693 .args
21694 .into_iter()
21695 .map(|arg| {
21696 let cast_arg = if matches!(
21697 target,
21698 DialectType::Presto
21699 | DialectType::Trino
21700 | DialectType::Athena
21701 ) {
21702 Expression::Cast(Box::new(Cast {
21703 this: arg,
21704 to: DataType::VarChar {
21705 length: None,
21706 parenthesized_length: false,
21707 },
21708 trailing_comments: Vec::new(),
21709 double_colon_syntax: false,
21710 format: None,
21711 default: None,
21712 inferred_type: None,
21713 }))
21714 } else {
21715 arg
21716 };
21717 Expression::Function(Box::new(Function::new(
21718 "COALESCE".to_string(),
21719 vec![cast_arg, Expression::string("")],
21720 )))
21721 })
21722 .collect();
21723 Ok(Expression::Function(Box::new(Function::new(
21724 "CONCAT".to_string(),
21725 new_args,
21726 ))))
21727 } else {
21728 Ok(Expression::Function(f))
21729 }
21730 } else {
21731 Ok(e)
21732 }
21733 }
21734
21735 Action::PipeConcatToConcat => {
21736 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21737 if let Expression::Concat(op) = e {
21738 let cast_left = Expression::Cast(Box::new(Cast {
21739 this: op.left,
21740 to: DataType::VarChar {
21741 length: None,
21742 parenthesized_length: false,
21743 },
21744 trailing_comments: Vec::new(),
21745 double_colon_syntax: false,
21746 format: None,
21747 default: None,
21748 inferred_type: None,
21749 }));
21750 let cast_right = Expression::Cast(Box::new(Cast {
21751 this: op.right,
21752 to: DataType::VarChar {
21753 length: None,
21754 parenthesized_length: false,
21755 },
21756 trailing_comments: Vec::new(),
21757 double_colon_syntax: false,
21758 format: None,
21759 default: None,
21760 inferred_type: None,
21761 }));
21762 Ok(Expression::Function(Box::new(Function::new(
21763 "CONCAT".to_string(),
21764 vec![cast_left, cast_right],
21765 ))))
21766 } else {
21767 Ok(e)
21768 }
21769 }
21770
21771 Action::DivFuncConvert => {
21772 // DIV(a, b) -> target-specific integer division
21773 if let Expression::Function(f) = e {
21774 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21775 let a = f.args[0].clone();
21776 let b = f.args[1].clone();
21777 match target {
21778 DialectType::DuckDB => {
21779 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21780 let int_div = Expression::IntDiv(Box::new(
21781 crate::expressions::BinaryFunc {
21782 this: a,
21783 expression: b,
21784 original_name: None,
21785 inferred_type: None,
21786 },
21787 ));
21788 Ok(Expression::Cast(Box::new(Cast {
21789 this: int_div,
21790 to: DataType::Decimal {
21791 precision: None,
21792 scale: None,
21793 },
21794 trailing_comments: Vec::new(),
21795 double_colon_syntax: false,
21796 format: None,
21797 default: None,
21798 inferred_type: None,
21799 })))
21800 }
21801 DialectType::BigQuery => {
21802 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21803 let div_func = Expression::Function(Box::new(Function::new(
21804 "DIV".to_string(),
21805 vec![a, b],
21806 )));
21807 Ok(Expression::Cast(Box::new(Cast {
21808 this: div_func,
21809 to: DataType::Custom {
21810 name: "NUMERIC".to_string(),
21811 },
21812 trailing_comments: Vec::new(),
21813 double_colon_syntax: false,
21814 format: None,
21815 default: None,
21816 inferred_type: None,
21817 })))
21818 }
21819 DialectType::SQLite => {
21820 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21821 let cast_a = Expression::Cast(Box::new(Cast {
21822 this: a,
21823 to: DataType::Custom {
21824 name: "REAL".to_string(),
21825 },
21826 trailing_comments: Vec::new(),
21827 double_colon_syntax: false,
21828 format: None,
21829 default: None,
21830 inferred_type: None,
21831 }));
21832 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21833 let cast_int = Expression::Cast(Box::new(Cast {
21834 this: div,
21835 to: DataType::Int {
21836 length: None,
21837 integer_spelling: true,
21838 },
21839 trailing_comments: Vec::new(),
21840 double_colon_syntax: false,
21841 format: None,
21842 default: None,
21843 inferred_type: None,
21844 }));
21845 Ok(Expression::Cast(Box::new(Cast {
21846 this: cast_int,
21847 to: DataType::Custom {
21848 name: "REAL".to_string(),
21849 },
21850 trailing_comments: Vec::new(),
21851 double_colon_syntax: false,
21852 format: None,
21853 default: None,
21854 inferred_type: None,
21855 })))
21856 }
21857 _ => Ok(Expression::Function(f)),
21858 }
21859 } else {
21860 Ok(Expression::Function(f))
21861 }
21862 } else {
21863 Ok(e)
21864 }
21865 }
21866
21867 Action::JsonObjectAggConvert => {
21868 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21869 match e {
21870 Expression::Function(f) => Ok(Expression::Function(Box::new(
21871 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21872 ))),
21873 Expression::AggregateFunction(af) => {
21874 // AggregateFunction stores all args in the `args` vec
21875 Ok(Expression::Function(Box::new(Function::new(
21876 "JSON_GROUP_OBJECT".to_string(),
21877 af.args,
21878 ))))
21879 }
21880 other => Ok(other),
21881 }
21882 }
21883
21884 Action::JsonbExistsConvert => {
21885 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21886 if let Expression::Function(f) = e {
21887 if f.args.len() == 2 {
21888 let json_expr = f.args[0].clone();
21889 let key = match &f.args[1] {
21890 Expression::Literal(crate::expressions::Literal::String(s)) => {
21891 format!("$.{}", s)
21892 }
21893 _ => return Ok(Expression::Function(f)),
21894 };
21895 Ok(Expression::Function(Box::new(Function::new(
21896 "JSON_EXISTS".to_string(),
21897 vec![json_expr, Expression::string(&key)],
21898 ))))
21899 } else {
21900 Ok(Expression::Function(f))
21901 }
21902 } else {
21903 Ok(e)
21904 }
21905 }
21906
21907 Action::DateBinConvert => {
21908 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21909 if let Expression::Function(f) = e {
21910 Ok(Expression::Function(Box::new(Function::new(
21911 "TIME_BUCKET".to_string(),
21912 f.args,
21913 ))))
21914 } else {
21915 Ok(e)
21916 }
21917 }
21918
21919 Action::MysqlCastCharToText => {
21920 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21921 if let Expression::Cast(mut c) = e {
21922 c.to = DataType::Text;
21923 Ok(Expression::Cast(c))
21924 } else {
21925 Ok(e)
21926 }
21927 }
21928
21929 Action::SparkCastVarcharToString => {
21930 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21931 match e {
21932 Expression::Cast(mut c) => {
21933 c.to = Self::normalize_varchar_to_string(c.to);
21934 Ok(Expression::Cast(c))
21935 }
21936 Expression::TryCast(mut c) => {
21937 c.to = Self::normalize_varchar_to_string(c.to);
21938 Ok(Expression::TryCast(c))
21939 }
21940 _ => Ok(e),
21941 }
21942 }
21943
21944 Action::MinMaxToLeastGreatest => {
21945 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21946 if let Expression::Function(f) = e {
21947 let name = f.name.to_uppercase();
21948 let new_name = match name.as_str() {
21949 "MIN" => "LEAST",
21950 "MAX" => "GREATEST",
21951 _ => return Ok(Expression::Function(f)),
21952 };
21953 Ok(Expression::Function(Box::new(Function::new(
21954 new_name.to_string(),
21955 f.args,
21956 ))))
21957 } else {
21958 Ok(e)
21959 }
21960 }
21961
21962 Action::ClickHouseUniqToApproxCountDistinct => {
21963 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21964 if let Expression::Function(f) = e {
21965 Ok(Expression::Function(Box::new(Function::new(
21966 "APPROX_COUNT_DISTINCT".to_string(),
21967 f.args,
21968 ))))
21969 } else {
21970 Ok(e)
21971 }
21972 }
21973
21974 Action::ClickHouseAnyToAnyValue => {
21975 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21976 if let Expression::Function(f) = e {
21977 Ok(Expression::Function(Box::new(Function::new(
21978 "ANY_VALUE".to_string(),
21979 f.args,
21980 ))))
21981 } else {
21982 Ok(e)
21983 }
21984 }
21985
21986 Action::OracleVarchar2ToVarchar => {
21987 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21988 if let Expression::DataType(DataType::Custom { ref name }) = e {
21989 let upper = name.to_uppercase();
21990 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21991 let inner =
21992 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21993 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21994 let end = name.len() - 1; // skip trailing ")"
21995 Some(&name[start..end])
21996 } else {
21997 Option::None
21998 };
21999 if let Some(inner_str) = inner {
22000 // Parse the number part, ignoring BYTE/CHAR qualifier
22001 let num_str = inner_str.split_whitespace().next().unwrap_or("");
22002 if let Ok(n) = num_str.parse::<u32>() {
22003 Ok(Expression::DataType(DataType::VarChar {
22004 length: Some(n),
22005 parenthesized_length: false,
22006 }))
22007 } else {
22008 Ok(e)
22009 }
22010 } else {
22011 // Plain VARCHAR2 / NVARCHAR2 without parens
22012 Ok(Expression::DataType(DataType::VarChar {
22013 length: Option::None,
22014 parenthesized_length: false,
22015 }))
22016 }
22017 } else {
22018 Ok(e)
22019 }
22020 }
22021
22022 Action::Nvl2Expand => {
22023 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
22024 // But keep as NVL2 for dialects that support it natively
22025 let nvl2_native = matches!(
22026 target,
22027 DialectType::Oracle
22028 | DialectType::Snowflake
22029 | DialectType::Redshift
22030 | DialectType::Teradata
22031 | DialectType::Spark
22032 | DialectType::Databricks
22033 );
22034 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
22035 if nvl2_native {
22036 return Ok(Expression::Nvl2(nvl2));
22037 }
22038 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
22039 } else if let Expression::Function(f) = e {
22040 if nvl2_native {
22041 return Ok(Expression::Function(Box::new(Function::new(
22042 "NVL2".to_string(),
22043 f.args,
22044 ))));
22045 }
22046 if f.args.len() < 2 {
22047 return Ok(Expression::Function(f));
22048 }
22049 let mut args = f.args;
22050 let a = args.remove(0);
22051 let b = args.remove(0);
22052 let c = if !args.is_empty() {
22053 Some(args.remove(0))
22054 } else {
22055 Option::None
22056 };
22057 (a, b, c)
22058 } else {
22059 return Ok(e);
22060 };
22061 // Build: NOT (a IS NULL)
22062 let is_null = Expression::IsNull(Box::new(IsNull {
22063 this: a,
22064 not: false,
22065 postfix_form: false,
22066 }));
22067 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
22068 this: is_null,
22069 inferred_type: None,
22070 }));
22071 Ok(Expression::Case(Box::new(Case {
22072 operand: Option::None,
22073 whens: vec![(not_null, b)],
22074 else_: c,
22075 comments: Vec::new(),
22076 inferred_type: None,
22077 })))
22078 }
22079
22080 Action::IfnullToCoalesce => {
22081 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
22082 if let Expression::Coalesce(mut cf) = e {
22083 cf.original_name = Option::None;
22084 Ok(Expression::Coalesce(cf))
22085 } else if let Expression::Function(f) = e {
22086 Ok(Expression::Function(Box::new(Function::new(
22087 "COALESCE".to_string(),
22088 f.args,
22089 ))))
22090 } else {
22091 Ok(e)
22092 }
22093 }
22094
22095 Action::IsAsciiConvert => {
22096 // IS_ASCII(x) -> dialect-specific ASCII check
22097 if let Expression::Function(f) = e {
22098 let arg = f.args.into_iter().next().unwrap();
22099 match target {
22100 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
22101 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
22102 Ok(Expression::Function(Box::new(Function::new(
22103 "REGEXP_LIKE".to_string(),
22104 vec![
22105 arg,
22106 Expression::Literal(Literal::String(
22107 "^[[:ascii:]]*$".to_string(),
22108 )),
22109 ],
22110 ))))
22111 }
22112 DialectType::PostgreSQL
22113 | DialectType::Redshift
22114 | DialectType::Materialize
22115 | DialectType::RisingWave => {
22116 // (x ~ '^[[:ascii:]]*$')
22117 Ok(Expression::Paren(Box::new(Paren {
22118 this: Expression::RegexpLike(Box::new(
22119 crate::expressions::RegexpFunc {
22120 this: arg,
22121 pattern: Expression::Literal(Literal::String(
22122 "^[[:ascii:]]*$".to_string(),
22123 )),
22124 flags: Option::None,
22125 },
22126 )),
22127 trailing_comments: Vec::new(),
22128 })))
22129 }
22130 DialectType::SQLite => {
22131 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
22132 let hex_lit = Expression::Literal(Literal::HexString(
22133 "2a5b5e012d7f5d2a".to_string(),
22134 ));
22135 let cast_expr = Expression::Cast(Box::new(Cast {
22136 this: hex_lit,
22137 to: DataType::Text,
22138 trailing_comments: Vec::new(),
22139 double_colon_syntax: false,
22140 format: Option::None,
22141 default: Option::None,
22142 inferred_type: None,
22143 }));
22144 let glob = Expression::Glob(Box::new(BinaryOp {
22145 left: arg,
22146 right: cast_expr,
22147 left_comments: Vec::new(),
22148 operator_comments: Vec::new(),
22149 trailing_comments: Vec::new(),
22150 inferred_type: None,
22151 }));
22152 Ok(Expression::Paren(Box::new(Paren {
22153 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
22154 this: glob,
22155 inferred_type: None,
22156 })),
22157 trailing_comments: Vec::new(),
22158 })))
22159 }
22160 DialectType::TSQL | DialectType::Fabric => {
22161 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
22162 let hex_lit = Expression::Literal(Literal::HexNumber(
22163 "255b5e002d7f5d25".to_string(),
22164 ));
22165 let convert_expr = Expression::Convert(Box::new(
22166 crate::expressions::ConvertFunc {
22167 this: hex_lit,
22168 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
22169 style: None,
22170 },
22171 ));
22172 let collated = Expression::Collation(Box::new(
22173 crate::expressions::CollationExpr {
22174 this: convert_expr,
22175 collation: "Latin1_General_BIN".to_string(),
22176 quoted: false,
22177 double_quoted: false,
22178 },
22179 ));
22180 let patindex = Expression::Function(Box::new(Function::new(
22181 "PATINDEX".to_string(),
22182 vec![collated, arg],
22183 )));
22184 let zero = Expression::Literal(Literal::Number("0".to_string()));
22185 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22186 left: patindex,
22187 right: zero,
22188 left_comments: Vec::new(),
22189 operator_comments: Vec::new(),
22190 trailing_comments: Vec::new(),
22191 inferred_type: None,
22192 }));
22193 Ok(Expression::Paren(Box::new(Paren {
22194 this: eq_zero,
22195 trailing_comments: Vec::new(),
22196 })))
22197 }
22198 DialectType::Oracle => {
22199 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
22200 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
22201 let s1 = Expression::Literal(Literal::String("^[".to_string()));
22202 let chr1 = Expression::Function(Box::new(Function::new(
22203 "CHR".to_string(),
22204 vec![Expression::Literal(Literal::Number("1".to_string()))],
22205 )));
22206 let dash = Expression::Literal(Literal::String("-".to_string()));
22207 let chr127 = Expression::Function(Box::new(Function::new(
22208 "CHR".to_string(),
22209 vec![Expression::Literal(Literal::Number("127".to_string()))],
22210 )));
22211 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
22212 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
22213 let concat1 =
22214 Expression::DPipe(Box::new(crate::expressions::DPipe {
22215 this: Box::new(s1),
22216 expression: Box::new(chr1),
22217 safe: None,
22218 }));
22219 let concat2 =
22220 Expression::DPipe(Box::new(crate::expressions::DPipe {
22221 this: Box::new(concat1),
22222 expression: Box::new(dash),
22223 safe: None,
22224 }));
22225 let concat3 =
22226 Expression::DPipe(Box::new(crate::expressions::DPipe {
22227 this: Box::new(concat2),
22228 expression: Box::new(chr127),
22229 safe: None,
22230 }));
22231 let concat4 =
22232 Expression::DPipe(Box::new(crate::expressions::DPipe {
22233 this: Box::new(concat3),
22234 expression: Box::new(s2),
22235 safe: None,
22236 }));
22237 let regexp_like = Expression::Function(Box::new(Function::new(
22238 "REGEXP_LIKE".to_string(),
22239 vec![arg, concat4],
22240 )));
22241 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
22242 let true_expr = Expression::Column(crate::expressions::Column {
22243 name: Identifier {
22244 name: "TRUE".to_string(),
22245 quoted: false,
22246 trailing_comments: Vec::new(),
22247 span: None,
22248 },
22249 table: None,
22250 join_mark: false,
22251 trailing_comments: Vec::new(),
22252 span: None,
22253 inferred_type: None,
22254 });
22255 let nvl = Expression::Function(Box::new(Function::new(
22256 "NVL".to_string(),
22257 vec![regexp_like, true_expr],
22258 )));
22259 Ok(nvl)
22260 }
22261 _ => Ok(Expression::Function(Box::new(Function::new(
22262 "IS_ASCII".to_string(),
22263 vec![arg],
22264 )))),
22265 }
22266 } else {
22267 Ok(e)
22268 }
22269 }
22270
22271 Action::StrPositionConvert => {
22272 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
22273 if let Expression::Function(f) = e {
22274 if f.args.len() < 2 {
22275 return Ok(Expression::Function(f));
22276 }
22277 let mut args = f.args;
22278
22279 let haystack = args.remove(0);
22280 let needle = args.remove(0);
22281 let position = if !args.is_empty() {
22282 Some(args.remove(0))
22283 } else {
22284 Option::None
22285 };
22286 let occurrence = if !args.is_empty() {
22287 Some(args.remove(0))
22288 } else {
22289 Option::None
22290 };
22291
22292 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
22293 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
22294 fn build_position_expansion(
22295 haystack: Expression,
22296 needle: Expression,
22297 pos: Expression,
22298 occurrence: Option<Expression>,
22299 inner_func: &str,
22300 wrapper: &str, // "CASE", "IF", "IIF"
22301 ) -> Expression {
22302 let substr = Expression::Function(Box::new(Function::new(
22303 "SUBSTRING".to_string(),
22304 vec![haystack, pos.clone()],
22305 )));
22306 let mut inner_args = vec![substr, needle];
22307 if let Some(occ) = occurrence {
22308 inner_args.push(occ);
22309 }
22310 let inner_call = Expression::Function(Box::new(Function::new(
22311 inner_func.to_string(),
22312 inner_args,
22313 )));
22314 let zero = Expression::Literal(Literal::Number("0".to_string()));
22315 let one = Expression::Literal(Literal::Number("1".to_string()));
22316 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22317 left: inner_call.clone(),
22318 right: zero.clone(),
22319 left_comments: Vec::new(),
22320 operator_comments: Vec::new(),
22321 trailing_comments: Vec::new(),
22322 inferred_type: None,
22323 }));
22324 let add_pos = Expression::Add(Box::new(BinaryOp {
22325 left: inner_call,
22326 right: pos,
22327 left_comments: Vec::new(),
22328 operator_comments: Vec::new(),
22329 trailing_comments: Vec::new(),
22330 inferred_type: None,
22331 }));
22332 let sub_one = Expression::Sub(Box::new(BinaryOp {
22333 left: add_pos,
22334 right: one,
22335 left_comments: Vec::new(),
22336 operator_comments: Vec::new(),
22337 trailing_comments: Vec::new(),
22338 inferred_type: None,
22339 }));
22340
22341 match wrapper {
22342 "CASE" => Expression::Case(Box::new(Case {
22343 operand: Option::None,
22344 whens: vec![(eq_zero, zero)],
22345 else_: Some(sub_one),
22346 comments: Vec::new(),
22347 inferred_type: None,
22348 })),
22349 "IIF" => Expression::Function(Box::new(Function::new(
22350 "IIF".to_string(),
22351 vec![eq_zero, zero, sub_one],
22352 ))),
22353 _ => Expression::Function(Box::new(Function::new(
22354 "IF".to_string(),
22355 vec![eq_zero, zero, sub_one],
22356 ))),
22357 }
22358 }
22359
22360 match target {
22361 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
22362 DialectType::Athena
22363 | DialectType::DuckDB
22364 | DialectType::Presto
22365 | DialectType::Trino
22366 | DialectType::Drill => {
22367 if let Some(pos) = position {
22368 let wrapper = if matches!(target, DialectType::DuckDB) {
22369 "CASE"
22370 } else {
22371 "IF"
22372 };
22373 let result = build_position_expansion(
22374 haystack, needle, pos, occurrence, "STRPOS", wrapper,
22375 );
22376 if matches!(target, DialectType::Drill) {
22377 // Drill uses backtick-quoted `IF`
22378 if let Expression::Function(mut f) = result {
22379 f.name = "`IF`".to_string();
22380 Ok(Expression::Function(f))
22381 } else {
22382 Ok(result)
22383 }
22384 } else {
22385 Ok(result)
22386 }
22387 } else {
22388 Ok(Expression::Function(Box::new(Function::new(
22389 "STRPOS".to_string(),
22390 vec![haystack, needle],
22391 ))))
22392 }
22393 }
22394 // SQLite: IIF wrapper
22395 DialectType::SQLite => {
22396 if let Some(pos) = position {
22397 Ok(build_position_expansion(
22398 haystack, needle, pos, occurrence, "INSTR", "IIF",
22399 ))
22400 } else {
22401 Ok(Expression::Function(Box::new(Function::new(
22402 "INSTR".to_string(),
22403 vec![haystack, needle],
22404 ))))
22405 }
22406 }
22407 // INSTR group: Teradata, BigQuery, Oracle
22408 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
22409 let mut a = vec![haystack, needle];
22410 if let Some(pos) = position {
22411 a.push(pos);
22412 }
22413 if let Some(occ) = occurrence {
22414 a.push(occ);
22415 }
22416 Ok(Expression::Function(Box::new(Function::new(
22417 "INSTR".to_string(),
22418 a,
22419 ))))
22420 }
22421 // CHARINDEX group: Snowflake, TSQL
22422 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22423 let mut a = vec![needle, haystack];
22424 if let Some(pos) = position {
22425 a.push(pos);
22426 }
22427 Ok(Expression::Function(Box::new(Function::new(
22428 "CHARINDEX".to_string(),
22429 a,
22430 ))))
22431 }
22432 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22433 DialectType::PostgreSQL
22434 | DialectType::Materialize
22435 | DialectType::RisingWave
22436 | DialectType::Redshift => {
22437 if let Some(pos) = position {
22438 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22439 // ELSE POSITION(...) + pos - 1 END
22440 let substr = Expression::Substring(Box::new(
22441 crate::expressions::SubstringFunc {
22442 this: haystack,
22443 start: pos.clone(),
22444 length: Option::None,
22445 from_for_syntax: true,
22446 },
22447 ));
22448 let pos_in = Expression::StrPosition(Box::new(
22449 crate::expressions::StrPosition {
22450 this: Box::new(substr),
22451 substr: Some(Box::new(needle)),
22452 position: Option::None,
22453 occurrence: Option::None,
22454 },
22455 ));
22456 let zero =
22457 Expression::Literal(Literal::Number("0".to_string()));
22458 let one = Expression::Literal(Literal::Number("1".to_string()));
22459 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22460 left: pos_in.clone(),
22461 right: zero.clone(),
22462 left_comments: Vec::new(),
22463 operator_comments: Vec::new(),
22464 trailing_comments: Vec::new(),
22465 inferred_type: None,
22466 }));
22467 let add_pos = Expression::Add(Box::new(BinaryOp {
22468 left: pos_in,
22469 right: pos,
22470 left_comments: Vec::new(),
22471 operator_comments: Vec::new(),
22472 trailing_comments: Vec::new(),
22473 inferred_type: None,
22474 }));
22475 let sub_one = Expression::Sub(Box::new(BinaryOp {
22476 left: add_pos,
22477 right: one,
22478 left_comments: Vec::new(),
22479 operator_comments: Vec::new(),
22480 trailing_comments: Vec::new(),
22481 inferred_type: None,
22482 }));
22483 Ok(Expression::Case(Box::new(Case {
22484 operand: Option::None,
22485 whens: vec![(eq_zero, zero)],
22486 else_: Some(sub_one),
22487 comments: Vec::new(),
22488 inferred_type: None,
22489 })))
22490 } else {
22491 Ok(Expression::StrPosition(Box::new(
22492 crate::expressions::StrPosition {
22493 this: Box::new(haystack),
22494 substr: Some(Box::new(needle)),
22495 position: Option::None,
22496 occurrence: Option::None,
22497 },
22498 )))
22499 }
22500 }
22501 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22502 DialectType::MySQL
22503 | DialectType::SingleStore
22504 | DialectType::TiDB
22505 | DialectType::Hive
22506 | DialectType::Spark
22507 | DialectType::Databricks
22508 | DialectType::Doris
22509 | DialectType::StarRocks => {
22510 let mut a = vec![needle, haystack];
22511 if let Some(pos) = position {
22512 a.push(pos);
22513 }
22514 Ok(Expression::Function(Box::new(Function::new(
22515 "LOCATE".to_string(),
22516 a,
22517 ))))
22518 }
22519 // ClickHouse: POSITION(haystack, needle[, position])
22520 DialectType::ClickHouse => {
22521 let mut a = vec![haystack, needle];
22522 if let Some(pos) = position {
22523 a.push(pos);
22524 }
22525 Ok(Expression::Function(Box::new(Function::new(
22526 "POSITION".to_string(),
22527 a,
22528 ))))
22529 }
22530 _ => {
22531 let mut a = vec![haystack, needle];
22532 if let Some(pos) = position {
22533 a.push(pos);
22534 }
22535 if let Some(occ) = occurrence {
22536 a.push(occ);
22537 }
22538 Ok(Expression::Function(Box::new(Function::new(
22539 "STR_POSITION".to_string(),
22540 a,
22541 ))))
22542 }
22543 }
22544 } else {
22545 Ok(e)
22546 }
22547 }
22548
22549 Action::ArraySumConvert => {
22550 // ARRAY_SUM(arr) -> dialect-specific
22551 if let Expression::Function(f) = e {
22552 let args = f.args;
22553 match target {
22554 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22555 Function::new("LIST_SUM".to_string(), args),
22556 ))),
22557 DialectType::Spark | DialectType::Databricks => {
22558 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22559 let arr = args.into_iter().next().unwrap();
22560 let zero = Expression::Literal(Literal::Number("0".to_string()));
22561 let acc_id = Identifier::new("acc");
22562 let x_id = Identifier::new("x");
22563 let acc = Expression::Identifier(acc_id.clone());
22564 let x = Expression::Identifier(x_id.clone());
22565 let add = Expression::Add(Box::new(BinaryOp {
22566 left: acc.clone(),
22567 right: x,
22568 left_comments: Vec::new(),
22569 operator_comments: Vec::new(),
22570 trailing_comments: Vec::new(),
22571 inferred_type: None,
22572 }));
22573 let lambda1 =
22574 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22575 parameters: vec![acc_id.clone(), x_id],
22576 body: add,
22577 colon: false,
22578 parameter_types: Vec::new(),
22579 }));
22580 let lambda2 =
22581 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22582 parameters: vec![acc_id],
22583 body: acc,
22584 colon: false,
22585 parameter_types: Vec::new(),
22586 }));
22587 Ok(Expression::Function(Box::new(Function::new(
22588 "AGGREGATE".to_string(),
22589 vec![arr, zero, lambda1, lambda2],
22590 ))))
22591 }
22592 DialectType::Presto | DialectType::Athena => {
22593 // Presto/Athena keep ARRAY_SUM natively
22594 Ok(Expression::Function(Box::new(Function::new(
22595 "ARRAY_SUM".to_string(),
22596 args,
22597 ))))
22598 }
22599 DialectType::Trino => {
22600 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22601 if args.len() == 1 {
22602 let arr = args.into_iter().next().unwrap();
22603 let zero =
22604 Expression::Literal(Literal::Number("0".to_string()));
22605 let acc_id = Identifier::new("acc");
22606 let x_id = Identifier::new("x");
22607 let acc = Expression::Identifier(acc_id.clone());
22608 let x = Expression::Identifier(x_id.clone());
22609 let add = Expression::Add(Box::new(BinaryOp {
22610 left: acc.clone(),
22611 right: x,
22612 left_comments: Vec::new(),
22613 operator_comments: Vec::new(),
22614 trailing_comments: Vec::new(),
22615 inferred_type: None,
22616 }));
22617 let lambda1 = Expression::Lambda(Box::new(
22618 crate::expressions::LambdaExpr {
22619 parameters: vec![acc_id.clone(), x_id],
22620 body: add,
22621 colon: false,
22622 parameter_types: Vec::new(),
22623 },
22624 ));
22625 let lambda2 = Expression::Lambda(Box::new(
22626 crate::expressions::LambdaExpr {
22627 parameters: vec![acc_id],
22628 body: acc,
22629 colon: false,
22630 parameter_types: Vec::new(),
22631 },
22632 ));
22633 Ok(Expression::Function(Box::new(Function::new(
22634 "REDUCE".to_string(),
22635 vec![arr, zero, lambda1, lambda2],
22636 ))))
22637 } else {
22638 Ok(Expression::Function(Box::new(Function::new(
22639 "ARRAY_SUM".to_string(),
22640 args,
22641 ))))
22642 }
22643 }
22644 DialectType::ClickHouse => {
22645 // arraySum(lambda, arr) or arraySum(arr)
22646 Ok(Expression::Function(Box::new(Function::new(
22647 "arraySum".to_string(),
22648 args,
22649 ))))
22650 }
22651 _ => Ok(Expression::Function(Box::new(Function::new(
22652 "ARRAY_SUM".to_string(),
22653 args,
22654 )))),
22655 }
22656 } else {
22657 Ok(e)
22658 }
22659 }
22660
22661 Action::ArraySizeConvert => {
22662 if let Expression::Function(f) = e {
22663 Ok(Expression::Function(Box::new(Function::new(
22664 "REPEATED_COUNT".to_string(),
22665 f.args,
22666 ))))
22667 } else {
22668 Ok(e)
22669 }
22670 }
22671
22672 Action::ArrayAnyConvert => {
22673 if let Expression::Function(f) = e {
22674 let mut args = f.args;
22675 if args.len() == 2 {
22676 let arr = args.remove(0);
22677 let lambda = args.remove(0);
22678
22679 // Extract lambda parameter name and body
22680 let (param_name, pred_body) =
22681 if let Expression::Lambda(ref lam) = lambda {
22682 let name = if let Some(p) = lam.parameters.first() {
22683 p.name.clone()
22684 } else {
22685 "x".to_string()
22686 };
22687 (name, lam.body.clone())
22688 } else {
22689 ("x".to_string(), lambda.clone())
22690 };
22691
22692 // Helper: build a function call Expression
22693 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22694 Expression::Function(Box::new(Function::new(
22695 name.to_string(),
22696 args,
22697 )))
22698 };
22699
22700 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22701 let build_filter_pattern = |len_func: &str,
22702 len_args_extra: Vec<Expression>,
22703 filter_expr: Expression|
22704 -> Expression {
22705 // len_func(arr, ...extra) = 0
22706 let mut len_arr_args = vec![arr.clone()];
22707 len_arr_args.extend(len_args_extra.clone());
22708 let len_arr = make_func(len_func, len_arr_args);
22709 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22710 len_arr,
22711 Expression::number(0),
22712 )));
22713
22714 // len_func(filter_expr, ...extra) <> 0
22715 let mut len_filter_args = vec![filter_expr];
22716 len_filter_args.extend(len_args_extra);
22717 let len_filter = make_func(len_func, len_filter_args);
22718 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22719 len_filter,
22720 Expression::number(0),
22721 )));
22722
22723 // (eq_zero OR neq_zero)
22724 let or_expr =
22725 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22726 Expression::Paren(Box::new(Paren {
22727 this: or_expr,
22728 trailing_comments: Vec::new(),
22729 }))
22730 };
22731
22732 match target {
22733 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22734 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22735 }
22736 DialectType::ClickHouse => {
22737 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22738 // ClickHouse arrayFilter takes lambda first, then array
22739 let filter_expr =
22740 make_func("arrayFilter", vec![lambda, arr.clone()]);
22741 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22742 }
22743 DialectType::Databricks | DialectType::Spark => {
22744 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22745 let filter_expr =
22746 make_func("FILTER", vec![arr.clone(), lambda]);
22747 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22748 }
22749 DialectType::DuckDB => {
22750 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22751 let filter_expr =
22752 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22753 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22754 }
22755 DialectType::Teradata => {
22756 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22757 let filter_expr =
22758 make_func("FILTER", vec![arr.clone(), lambda]);
22759 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22760 }
22761 DialectType::BigQuery => {
22762 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22763 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22764 let param_col = Expression::column(¶m_name);
22765 let unnest_expr = Expression::Unnest(Box::new(
22766 crate::expressions::UnnestFunc {
22767 this: arr.clone(),
22768 expressions: vec![],
22769 with_ordinality: false,
22770 alias: Some(Identifier::new(¶m_name)),
22771 offset_alias: None,
22772 },
22773 ));
22774 let mut sel = crate::expressions::Select::default();
22775 sel.expressions = vec![param_col];
22776 sel.from = Some(crate::expressions::From {
22777 expressions: vec![unnest_expr],
22778 });
22779 sel.where_clause =
22780 Some(crate::expressions::Where { this: pred_body });
22781 let array_subquery =
22782 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22783 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22784 }
22785 DialectType::PostgreSQL => {
22786 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22787 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22788 let param_col = Expression::column(¶m_name);
22789 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22790 let unnest_with_alias =
22791 Expression::Alias(Box::new(crate::expressions::Alias {
22792 this: Expression::Unnest(Box::new(
22793 crate::expressions::UnnestFunc {
22794 this: arr.clone(),
22795 expressions: vec![],
22796 with_ordinality: false,
22797 alias: None,
22798 offset_alias: None,
22799 },
22800 )),
22801 alias: Identifier::new("_t0"),
22802 column_aliases: vec![Identifier::new(¶m_name)],
22803 pre_alias_comments: Vec::new(),
22804 trailing_comments: Vec::new(),
22805 inferred_type: None,
22806 }));
22807 let mut sel = crate::expressions::Select::default();
22808 sel.expressions = vec![param_col];
22809 sel.from = Some(crate::expressions::From {
22810 expressions: vec![unnest_with_alias],
22811 });
22812 sel.where_clause =
22813 Some(crate::expressions::Where { this: pred_body });
22814 let array_subquery =
22815 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22816 Ok(build_filter_pattern(
22817 "ARRAY_LENGTH",
22818 vec![Expression::number(1)],
22819 array_subquery,
22820 ))
22821 }
22822 _ => Ok(Expression::Function(Box::new(Function::new(
22823 "ARRAY_ANY".to_string(),
22824 vec![arr, lambda],
22825 )))),
22826 }
22827 } else {
22828 Ok(Expression::Function(Box::new(Function::new(
22829 "ARRAY_ANY".to_string(),
22830 args,
22831 ))))
22832 }
22833 } else {
22834 Ok(e)
22835 }
22836 }
22837
22838 Action::DecodeSimplify => {
22839 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22840 // For literal search values: CASE WHEN x = search THEN result
22841 // For NULL search: CASE WHEN x IS NULL THEN result
22842 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22843 fn is_decode_literal(e: &Expression) -> bool {
22844 matches!(
22845 e,
22846 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22847 )
22848 }
22849
22850 let build_decode_case =
22851 |this_expr: Expression,
22852 pairs: Vec<(Expression, Expression)>,
22853 default: Option<Expression>| {
22854 let whens: Vec<(Expression, Expression)> = pairs
22855 .into_iter()
22856 .map(|(search, result)| {
22857 if matches!(&search, Expression::Null(_)) {
22858 // NULL search -> IS NULL
22859 let condition = Expression::Is(Box::new(BinaryOp {
22860 left: this_expr.clone(),
22861 right: Expression::Null(crate::expressions::Null),
22862 left_comments: Vec::new(),
22863 operator_comments: Vec::new(),
22864 trailing_comments: Vec::new(),
22865 inferred_type: None,
22866 }));
22867 (condition, result)
22868 } else if is_decode_literal(&search)
22869 || is_decode_literal(&this_expr)
22870 {
22871 // At least one side is a literal -> simple equality (no NULL check needed)
22872 let eq = Expression::Eq(Box::new(BinaryOp {
22873 left: this_expr.clone(),
22874 right: search,
22875 left_comments: Vec::new(),
22876 operator_comments: Vec::new(),
22877 trailing_comments: Vec::new(),
22878 inferred_type: None,
22879 }));
22880 (eq, result)
22881 } else {
22882 // Non-literal -> null-safe comparison
22883 let needs_paren = matches!(
22884 &search,
22885 Expression::Eq(_)
22886 | Expression::Neq(_)
22887 | Expression::Gt(_)
22888 | Expression::Gte(_)
22889 | Expression::Lt(_)
22890 | Expression::Lte(_)
22891 );
22892 let search_ref = if needs_paren {
22893 Expression::Paren(Box::new(crate::expressions::Paren {
22894 this: search.clone(),
22895 trailing_comments: Vec::new(),
22896 }))
22897 } else {
22898 search.clone()
22899 };
22900 // Build: x = search OR (x IS NULL AND search IS NULL)
22901 let eq = Expression::Eq(Box::new(BinaryOp {
22902 left: this_expr.clone(),
22903 right: search_ref,
22904 left_comments: Vec::new(),
22905 operator_comments: Vec::new(),
22906 trailing_comments: Vec::new(),
22907 inferred_type: None,
22908 }));
22909 let search_in_null = if needs_paren {
22910 Expression::Paren(Box::new(crate::expressions::Paren {
22911 this: search.clone(),
22912 trailing_comments: Vec::new(),
22913 }))
22914 } else {
22915 search.clone()
22916 };
22917 let x_is_null = Expression::Is(Box::new(BinaryOp {
22918 left: this_expr.clone(),
22919 right: Expression::Null(crate::expressions::Null),
22920 left_comments: Vec::new(),
22921 operator_comments: Vec::new(),
22922 trailing_comments: Vec::new(),
22923 inferred_type: None,
22924 }));
22925 let search_is_null = Expression::Is(Box::new(BinaryOp {
22926 left: search_in_null,
22927 right: Expression::Null(crate::expressions::Null),
22928 left_comments: Vec::new(),
22929 operator_comments: Vec::new(),
22930 trailing_comments: Vec::new(),
22931 inferred_type: None,
22932 }));
22933 let both_null = Expression::And(Box::new(BinaryOp {
22934 left: x_is_null,
22935 right: search_is_null,
22936 left_comments: Vec::new(),
22937 operator_comments: Vec::new(),
22938 trailing_comments: Vec::new(),
22939 inferred_type: None,
22940 }));
22941 let condition = Expression::Or(Box::new(BinaryOp {
22942 left: eq,
22943 right: Expression::Paren(Box::new(
22944 crate::expressions::Paren {
22945 this: both_null,
22946 trailing_comments: Vec::new(),
22947 },
22948 )),
22949 left_comments: Vec::new(),
22950 operator_comments: Vec::new(),
22951 trailing_comments: Vec::new(),
22952 inferred_type: None,
22953 }));
22954 (condition, result)
22955 }
22956 })
22957 .collect();
22958 Expression::Case(Box::new(Case {
22959 operand: None,
22960 whens,
22961 else_: default,
22962 comments: Vec::new(),
22963 inferred_type: None,
22964 }))
22965 };
22966
22967 if let Expression::Decode(decode) = e {
22968 Ok(build_decode_case(
22969 decode.this,
22970 decode.search_results,
22971 decode.default,
22972 ))
22973 } else if let Expression::DecodeCase(dc) = e {
22974 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22975 let mut exprs = dc.expressions;
22976 if exprs.len() < 3 {
22977 return Ok(Expression::DecodeCase(Box::new(
22978 crate::expressions::DecodeCase { expressions: exprs },
22979 )));
22980 }
22981 let this_expr = exprs.remove(0);
22982 let mut pairs = Vec::new();
22983 let mut default = None;
22984 let mut i = 0;
22985 while i + 1 < exprs.len() {
22986 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22987 i += 2;
22988 }
22989 if i < exprs.len() {
22990 // Odd remaining element is the default
22991 default = Some(exprs[i].clone());
22992 }
22993 Ok(build_decode_case(this_expr, pairs, default))
22994 } else {
22995 Ok(e)
22996 }
22997 }
22998
22999 Action::CreateTableLikeToCtas => {
23000 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
23001 if let Expression::CreateTable(ct) = e {
23002 let like_source = ct.constraints.iter().find_map(|c| {
23003 if let crate::expressions::TableConstraint::Like { source, .. } = c {
23004 Some(source.clone())
23005 } else {
23006 None
23007 }
23008 });
23009 if let Some(source_table) = like_source {
23010 let mut new_ct = *ct;
23011 new_ct.constraints.clear();
23012 // Build: SELECT * FROM b LIMIT 0
23013 let select = Expression::Select(Box::new(crate::expressions::Select {
23014 expressions: vec![Expression::Star(crate::expressions::Star {
23015 table: None,
23016 except: None,
23017 replace: None,
23018 rename: None,
23019 trailing_comments: Vec::new(),
23020 span: None,
23021 })],
23022 from: Some(crate::expressions::From {
23023 expressions: vec![Expression::Table(source_table)],
23024 }),
23025 limit: Some(crate::expressions::Limit {
23026 this: Expression::Literal(Literal::Number("0".to_string())),
23027 percent: false,
23028 comments: Vec::new(),
23029 }),
23030 ..Default::default()
23031 }));
23032 new_ct.as_select = Some(select);
23033 Ok(Expression::CreateTable(Box::new(new_ct)))
23034 } else {
23035 Ok(Expression::CreateTable(ct))
23036 }
23037 } else {
23038 Ok(e)
23039 }
23040 }
23041
23042 Action::CreateTableLikeToSelectInto => {
23043 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
23044 if let Expression::CreateTable(ct) = e {
23045 let like_source = ct.constraints.iter().find_map(|c| {
23046 if let crate::expressions::TableConstraint::Like { source, .. } = c {
23047 Some(source.clone())
23048 } else {
23049 None
23050 }
23051 });
23052 if let Some(source_table) = like_source {
23053 let mut aliased_source = source_table;
23054 aliased_source.alias = Some(Identifier::new("temp"));
23055 // Build: SELECT TOP 0 * INTO a FROM b AS temp
23056 let select = Expression::Select(Box::new(crate::expressions::Select {
23057 expressions: vec![Expression::Star(crate::expressions::Star {
23058 table: None,
23059 except: None,
23060 replace: None,
23061 rename: None,
23062 trailing_comments: Vec::new(),
23063 span: None,
23064 })],
23065 from: Some(crate::expressions::From {
23066 expressions: vec![Expression::Table(aliased_source)],
23067 }),
23068 into: Some(crate::expressions::SelectInto {
23069 this: Expression::Table(ct.name.clone()),
23070 temporary: false,
23071 unlogged: false,
23072 bulk_collect: false,
23073 expressions: Vec::new(),
23074 }),
23075 top: Some(crate::expressions::Top {
23076 this: Expression::Literal(Literal::Number("0".to_string())),
23077 percent: false,
23078 with_ties: false,
23079 parenthesized: false,
23080 }),
23081 ..Default::default()
23082 }));
23083 Ok(select)
23084 } else {
23085 Ok(Expression::CreateTable(ct))
23086 }
23087 } else {
23088 Ok(e)
23089 }
23090 }
23091
23092 Action::CreateTableLikeToAs => {
23093 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
23094 if let Expression::CreateTable(ct) = e {
23095 let like_source = ct.constraints.iter().find_map(|c| {
23096 if let crate::expressions::TableConstraint::Like { source, .. } = c {
23097 Some(source.clone())
23098 } else {
23099 None
23100 }
23101 });
23102 if let Some(source_table) = like_source {
23103 let mut new_ct = *ct;
23104 new_ct.constraints.clear();
23105 // AS b (just a table reference, not a SELECT)
23106 new_ct.as_select = Some(Expression::Table(source_table));
23107 Ok(Expression::CreateTable(Box::new(new_ct)))
23108 } else {
23109 Ok(Expression::CreateTable(ct))
23110 }
23111 } else {
23112 Ok(e)
23113 }
23114 }
23115
23116 Action::TsOrDsToDateConvert => {
23117 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
23118 if let Expression::Function(f) = e {
23119 let mut args = f.args;
23120 let this = args.remove(0);
23121 let fmt = if !args.is_empty() {
23122 match &args[0] {
23123 Expression::Literal(Literal::String(s)) => Some(s.clone()),
23124 _ => None,
23125 }
23126 } else {
23127 None
23128 };
23129 Ok(Expression::TsOrDsToDate(Box::new(
23130 crate::expressions::TsOrDsToDate {
23131 this: Box::new(this),
23132 format: fmt,
23133 safe: None,
23134 },
23135 )))
23136 } else {
23137 Ok(e)
23138 }
23139 }
23140
23141 Action::TsOrDsToDateStrConvert => {
23142 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
23143 if let Expression::Function(f) = e {
23144 let arg = f.args.into_iter().next().unwrap();
23145 let str_type = match target {
23146 DialectType::DuckDB
23147 | DialectType::PostgreSQL
23148 | DialectType::Materialize => DataType::Text,
23149 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23150 DataType::Custom {
23151 name: "STRING".to_string(),
23152 }
23153 }
23154 DialectType::Presto
23155 | DialectType::Trino
23156 | DialectType::Athena
23157 | DialectType::Drill => DataType::VarChar {
23158 length: None,
23159 parenthesized_length: false,
23160 },
23161 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
23162 DataType::Custom {
23163 name: "STRING".to_string(),
23164 }
23165 }
23166 _ => DataType::VarChar {
23167 length: None,
23168 parenthesized_length: false,
23169 },
23170 };
23171 let cast_expr = Expression::Cast(Box::new(Cast {
23172 this: arg,
23173 to: str_type,
23174 double_colon_syntax: false,
23175 trailing_comments: Vec::new(),
23176 format: None,
23177 default: None,
23178 inferred_type: None,
23179 }));
23180 Ok(Expression::Substring(Box::new(
23181 crate::expressions::SubstringFunc {
23182 this: cast_expr,
23183 start: Expression::number(1),
23184 length: Some(Expression::number(10)),
23185 from_for_syntax: false,
23186 },
23187 )))
23188 } else {
23189 Ok(e)
23190 }
23191 }
23192
23193 Action::DateStrToDateConvert => {
23194 // DATE_STR_TO_DATE(x) -> dialect-specific
23195 if let Expression::Function(f) = e {
23196 let arg = f.args.into_iter().next().unwrap();
23197 match target {
23198 DialectType::SQLite => {
23199 // SQLite: just the bare expression (dates are strings)
23200 Ok(arg)
23201 }
23202 _ => Ok(Expression::Cast(Box::new(Cast {
23203 this: arg,
23204 to: DataType::Date,
23205 double_colon_syntax: false,
23206 trailing_comments: Vec::new(),
23207 format: None,
23208 default: None,
23209 inferred_type: None,
23210 }))),
23211 }
23212 } else {
23213 Ok(e)
23214 }
23215 }
23216
23217 Action::TimeStrToDateConvert => {
23218 // TIME_STR_TO_DATE(x) -> dialect-specific
23219 if let Expression::Function(f) = e {
23220 let arg = f.args.into_iter().next().unwrap();
23221 match target {
23222 DialectType::Hive
23223 | DialectType::Doris
23224 | DialectType::StarRocks
23225 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
23226 Function::new("TO_DATE".to_string(), vec![arg]),
23227 ))),
23228 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23229 // Presto: CAST(x AS TIMESTAMP)
23230 Ok(Expression::Cast(Box::new(Cast {
23231 this: arg,
23232 to: DataType::Timestamp {
23233 timezone: false,
23234 precision: None,
23235 },
23236 double_colon_syntax: false,
23237 trailing_comments: Vec::new(),
23238 format: None,
23239 default: None,
23240 inferred_type: None,
23241 })))
23242 }
23243 _ => {
23244 // Default: CAST(x AS DATE)
23245 Ok(Expression::Cast(Box::new(Cast {
23246 this: arg,
23247 to: DataType::Date,
23248 double_colon_syntax: false,
23249 trailing_comments: Vec::new(),
23250 format: None,
23251 default: None,
23252 inferred_type: None,
23253 })))
23254 }
23255 }
23256 } else {
23257 Ok(e)
23258 }
23259 }
23260
23261 Action::TimeStrToTimeConvert => {
23262 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
23263 if let Expression::Function(f) = e {
23264 let mut args = f.args;
23265 let this = args.remove(0);
23266 let zone = if !args.is_empty() {
23267 match &args[0] {
23268 Expression::Literal(Literal::String(s)) => Some(s.clone()),
23269 _ => None,
23270 }
23271 } else {
23272 None
23273 };
23274 let has_zone = zone.is_some();
23275
23276 match target {
23277 DialectType::SQLite => {
23278 // SQLite: just the bare expression
23279 Ok(this)
23280 }
23281 DialectType::MySQL => {
23282 if has_zone {
23283 // MySQL with zone: TIMESTAMP(x)
23284 Ok(Expression::Function(Box::new(Function::new(
23285 "TIMESTAMP".to_string(),
23286 vec![this],
23287 ))))
23288 } else {
23289 // MySQL: CAST(x AS DATETIME) or with precision
23290 // Use DataType::Custom to avoid MySQL's transform_cast converting
23291 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
23292 let precision =
23293 if let Expression::Literal(Literal::String(ref s)) = this {
23294 if let Some(dot_pos) = s.rfind('.') {
23295 let frac = &s[dot_pos + 1..];
23296 let digit_count = frac
23297 .chars()
23298 .take_while(|c| c.is_ascii_digit())
23299 .count();
23300 if digit_count > 0 {
23301 Some(digit_count)
23302 } else {
23303 None
23304 }
23305 } else {
23306 None
23307 }
23308 } else {
23309 None
23310 };
23311 let type_name = match precision {
23312 Some(p) => format!("DATETIME({})", p),
23313 None => "DATETIME".to_string(),
23314 };
23315 Ok(Expression::Cast(Box::new(Cast {
23316 this,
23317 to: DataType::Custom { name: type_name },
23318 double_colon_syntax: false,
23319 trailing_comments: Vec::new(),
23320 format: None,
23321 default: None,
23322 inferred_type: None,
23323 })))
23324 }
23325 }
23326 DialectType::ClickHouse => {
23327 if has_zone {
23328 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
23329 // We need to strip the timezone offset from the literal if present
23330 let clean_this =
23331 if let Expression::Literal(Literal::String(ref s)) = this {
23332 // Strip timezone offset like "-08:00" or "+00:00"
23333 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
23334 if let Some(offset_pos) = re_offset {
23335 if offset_pos > 10 {
23336 // After the date part
23337 let trimmed = s[..offset_pos].to_string();
23338 Expression::Literal(Literal::String(trimmed))
23339 } else {
23340 this.clone()
23341 }
23342 } else {
23343 this.clone()
23344 }
23345 } else {
23346 this.clone()
23347 };
23348 let zone_str = zone.unwrap();
23349 // Build: CAST(x AS DateTime64(6, 'zone'))
23350 let type_name = format!("DateTime64(6, '{}')", zone_str);
23351 Ok(Expression::Cast(Box::new(Cast {
23352 this: clean_this,
23353 to: DataType::Custom { name: type_name },
23354 double_colon_syntax: false,
23355 trailing_comments: Vec::new(),
23356 format: None,
23357 default: None,
23358 inferred_type: None,
23359 })))
23360 } else {
23361 Ok(Expression::Cast(Box::new(Cast {
23362 this,
23363 to: DataType::Custom {
23364 name: "DateTime64(6)".to_string(),
23365 },
23366 double_colon_syntax: false,
23367 trailing_comments: Vec::new(),
23368 format: None,
23369 default: None,
23370 inferred_type: None,
23371 })))
23372 }
23373 }
23374 DialectType::BigQuery => {
23375 if has_zone {
23376 // BigQuery with zone: CAST(x AS TIMESTAMP)
23377 Ok(Expression::Cast(Box::new(Cast {
23378 this,
23379 to: DataType::Timestamp {
23380 timezone: false,
23381 precision: None,
23382 },
23383 double_colon_syntax: false,
23384 trailing_comments: Vec::new(),
23385 format: None,
23386 default: None,
23387 inferred_type: None,
23388 })))
23389 } else {
23390 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
23391 Ok(Expression::Cast(Box::new(Cast {
23392 this,
23393 to: DataType::Custom {
23394 name: "DATETIME".to_string(),
23395 },
23396 double_colon_syntax: false,
23397 trailing_comments: Vec::new(),
23398 format: None,
23399 default: None,
23400 inferred_type: None,
23401 })))
23402 }
23403 }
23404 DialectType::Doris => {
23405 // Doris: CAST(x AS DATETIME)
23406 Ok(Expression::Cast(Box::new(Cast {
23407 this,
23408 to: DataType::Custom {
23409 name: "DATETIME".to_string(),
23410 },
23411 double_colon_syntax: false,
23412 trailing_comments: Vec::new(),
23413 format: None,
23414 default: None,
23415 inferred_type: None,
23416 })))
23417 }
23418 DialectType::TSQL | DialectType::Fabric => {
23419 if has_zone {
23420 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
23421 let cast_expr = Expression::Cast(Box::new(Cast {
23422 this,
23423 to: DataType::Custom {
23424 name: "DATETIMEOFFSET".to_string(),
23425 },
23426 double_colon_syntax: false,
23427 trailing_comments: Vec::new(),
23428 format: None,
23429 default: None,
23430 inferred_type: None,
23431 }));
23432 Ok(Expression::AtTimeZone(Box::new(
23433 crate::expressions::AtTimeZone {
23434 this: cast_expr,
23435 zone: Expression::Literal(Literal::String(
23436 "UTC".to_string(),
23437 )),
23438 },
23439 )))
23440 } else {
23441 // TSQL: CAST(x AS DATETIME2)
23442 Ok(Expression::Cast(Box::new(Cast {
23443 this,
23444 to: DataType::Custom {
23445 name: "DATETIME2".to_string(),
23446 },
23447 double_colon_syntax: false,
23448 trailing_comments: Vec::new(),
23449 format: None,
23450 default: None,
23451 inferred_type: None,
23452 })))
23453 }
23454 }
23455 DialectType::DuckDB => {
23456 if has_zone {
23457 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23458 Ok(Expression::Cast(Box::new(Cast {
23459 this,
23460 to: DataType::Timestamp {
23461 timezone: true,
23462 precision: None,
23463 },
23464 double_colon_syntax: false,
23465 trailing_comments: Vec::new(),
23466 format: None,
23467 default: None,
23468 inferred_type: None,
23469 })))
23470 } else {
23471 // DuckDB: CAST(x AS TIMESTAMP)
23472 Ok(Expression::Cast(Box::new(Cast {
23473 this,
23474 to: DataType::Timestamp {
23475 timezone: false,
23476 precision: None,
23477 },
23478 double_colon_syntax: false,
23479 trailing_comments: Vec::new(),
23480 format: None,
23481 default: None,
23482 inferred_type: None,
23483 })))
23484 }
23485 }
23486 DialectType::PostgreSQL
23487 | DialectType::Materialize
23488 | DialectType::RisingWave => {
23489 if has_zone {
23490 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23491 Ok(Expression::Cast(Box::new(Cast {
23492 this,
23493 to: DataType::Timestamp {
23494 timezone: true,
23495 precision: None,
23496 },
23497 double_colon_syntax: false,
23498 trailing_comments: Vec::new(),
23499 format: None,
23500 default: None,
23501 inferred_type: None,
23502 })))
23503 } else {
23504 // PostgreSQL: CAST(x AS TIMESTAMP)
23505 Ok(Expression::Cast(Box::new(Cast {
23506 this,
23507 to: DataType::Timestamp {
23508 timezone: false,
23509 precision: None,
23510 },
23511 double_colon_syntax: false,
23512 trailing_comments: Vec::new(),
23513 format: None,
23514 default: None,
23515 inferred_type: None,
23516 })))
23517 }
23518 }
23519 DialectType::Snowflake => {
23520 if has_zone {
23521 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23522 Ok(Expression::Cast(Box::new(Cast {
23523 this,
23524 to: DataType::Timestamp {
23525 timezone: true,
23526 precision: None,
23527 },
23528 double_colon_syntax: false,
23529 trailing_comments: Vec::new(),
23530 format: None,
23531 default: None,
23532 inferred_type: None,
23533 })))
23534 } else {
23535 // Snowflake: CAST(x AS TIMESTAMP)
23536 Ok(Expression::Cast(Box::new(Cast {
23537 this,
23538 to: DataType::Timestamp {
23539 timezone: false,
23540 precision: None,
23541 },
23542 double_colon_syntax: false,
23543 trailing_comments: Vec::new(),
23544 format: None,
23545 default: None,
23546 inferred_type: None,
23547 })))
23548 }
23549 }
23550 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23551 if has_zone {
23552 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23553 // Check for precision from sub-second digits
23554 let precision =
23555 if let Expression::Literal(Literal::String(ref s)) = this {
23556 if let Some(dot_pos) = s.rfind('.') {
23557 let frac = &s[dot_pos + 1..];
23558 let digit_count = frac
23559 .chars()
23560 .take_while(|c| c.is_ascii_digit())
23561 .count();
23562 if digit_count > 0
23563 && matches!(target, DialectType::Trino)
23564 {
23565 Some(digit_count as u32)
23566 } else {
23567 None
23568 }
23569 } else {
23570 None
23571 }
23572 } else {
23573 None
23574 };
23575 let dt = if let Some(prec) = precision {
23576 DataType::Timestamp {
23577 timezone: true,
23578 precision: Some(prec),
23579 }
23580 } else {
23581 DataType::Timestamp {
23582 timezone: true,
23583 precision: None,
23584 }
23585 };
23586 Ok(Expression::Cast(Box::new(Cast {
23587 this,
23588 to: dt,
23589 double_colon_syntax: false,
23590 trailing_comments: Vec::new(),
23591 format: None,
23592 default: None,
23593 inferred_type: None,
23594 })))
23595 } else {
23596 // Check for sub-second precision for Trino
23597 let precision =
23598 if let Expression::Literal(Literal::String(ref s)) = this {
23599 if let Some(dot_pos) = s.rfind('.') {
23600 let frac = &s[dot_pos + 1..];
23601 let digit_count = frac
23602 .chars()
23603 .take_while(|c| c.is_ascii_digit())
23604 .count();
23605 if digit_count > 0
23606 && matches!(target, DialectType::Trino)
23607 {
23608 Some(digit_count as u32)
23609 } else {
23610 None
23611 }
23612 } else {
23613 None
23614 }
23615 } else {
23616 None
23617 };
23618 let dt = DataType::Timestamp {
23619 timezone: false,
23620 precision,
23621 };
23622 Ok(Expression::Cast(Box::new(Cast {
23623 this,
23624 to: dt,
23625 double_colon_syntax: false,
23626 trailing_comments: Vec::new(),
23627 format: None,
23628 default: None,
23629 inferred_type: None,
23630 })))
23631 }
23632 }
23633 DialectType::Redshift => {
23634 if has_zone {
23635 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23636 Ok(Expression::Cast(Box::new(Cast {
23637 this,
23638 to: DataType::Timestamp {
23639 timezone: true,
23640 precision: None,
23641 },
23642 double_colon_syntax: false,
23643 trailing_comments: Vec::new(),
23644 format: None,
23645 default: None,
23646 inferred_type: None,
23647 })))
23648 } else {
23649 // Redshift: CAST(x AS TIMESTAMP)
23650 Ok(Expression::Cast(Box::new(Cast {
23651 this,
23652 to: DataType::Timestamp {
23653 timezone: false,
23654 precision: None,
23655 },
23656 double_colon_syntax: false,
23657 trailing_comments: Vec::new(),
23658 format: None,
23659 default: None,
23660 inferred_type: None,
23661 })))
23662 }
23663 }
23664 _ => {
23665 // Default: CAST(x AS TIMESTAMP)
23666 Ok(Expression::Cast(Box::new(Cast {
23667 this,
23668 to: DataType::Timestamp {
23669 timezone: false,
23670 precision: None,
23671 },
23672 double_colon_syntax: false,
23673 trailing_comments: Vec::new(),
23674 format: None,
23675 default: None,
23676 inferred_type: None,
23677 })))
23678 }
23679 }
23680 } else {
23681 Ok(e)
23682 }
23683 }
23684
23685 Action::DateToDateStrConvert => {
23686 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23687 if let Expression::Function(f) = e {
23688 let arg = f.args.into_iter().next().unwrap();
23689 let str_type = match target {
23690 DialectType::DuckDB => DataType::Text,
23691 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23692 DataType::Custom {
23693 name: "STRING".to_string(),
23694 }
23695 }
23696 DialectType::Presto
23697 | DialectType::Trino
23698 | DialectType::Athena
23699 | DialectType::Drill => DataType::VarChar {
23700 length: None,
23701 parenthesized_length: false,
23702 },
23703 _ => DataType::VarChar {
23704 length: None,
23705 parenthesized_length: false,
23706 },
23707 };
23708 Ok(Expression::Cast(Box::new(Cast {
23709 this: arg,
23710 to: str_type,
23711 double_colon_syntax: false,
23712 trailing_comments: Vec::new(),
23713 format: None,
23714 default: None,
23715 inferred_type: None,
23716 })))
23717 } else {
23718 Ok(e)
23719 }
23720 }
23721
23722 Action::DateToDiConvert => {
23723 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23724 if let Expression::Function(f) = e {
23725 let arg = f.args.into_iter().next().unwrap();
23726 let inner = match target {
23727 DialectType::DuckDB => {
23728 // STRFTIME(x, '%Y%m%d')
23729 Expression::Function(Box::new(Function::new(
23730 "STRFTIME".to_string(),
23731 vec![arg, Expression::string("%Y%m%d")],
23732 )))
23733 }
23734 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23735 // DATE_FORMAT(x, 'yyyyMMdd')
23736 Expression::Function(Box::new(Function::new(
23737 "DATE_FORMAT".to_string(),
23738 vec![arg, Expression::string("yyyyMMdd")],
23739 )))
23740 }
23741 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23742 // DATE_FORMAT(x, '%Y%m%d')
23743 Expression::Function(Box::new(Function::new(
23744 "DATE_FORMAT".to_string(),
23745 vec![arg, Expression::string("%Y%m%d")],
23746 )))
23747 }
23748 DialectType::Drill => {
23749 // TO_DATE(x, 'yyyyMMdd')
23750 Expression::Function(Box::new(Function::new(
23751 "TO_DATE".to_string(),
23752 vec![arg, Expression::string("yyyyMMdd")],
23753 )))
23754 }
23755 _ => {
23756 // Default: STRFTIME(x, '%Y%m%d')
23757 Expression::Function(Box::new(Function::new(
23758 "STRFTIME".to_string(),
23759 vec![arg, Expression::string("%Y%m%d")],
23760 )))
23761 }
23762 };
23763 // Use INT (not INTEGER) for Presto/Trino
23764 let int_type = match target {
23765 DialectType::Presto
23766 | DialectType::Trino
23767 | DialectType::Athena
23768 | DialectType::TSQL
23769 | DialectType::Fabric
23770 | DialectType::SQLite
23771 | DialectType::Redshift => DataType::Custom {
23772 name: "INT".to_string(),
23773 },
23774 _ => DataType::Int {
23775 length: None,
23776 integer_spelling: false,
23777 },
23778 };
23779 Ok(Expression::Cast(Box::new(Cast {
23780 this: inner,
23781 to: int_type,
23782 double_colon_syntax: false,
23783 trailing_comments: Vec::new(),
23784 format: None,
23785 default: None,
23786 inferred_type: None,
23787 })))
23788 } else {
23789 Ok(e)
23790 }
23791 }
23792
23793 Action::DiToDateConvert => {
23794 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23795 if let Expression::Function(f) = e {
23796 let arg = f.args.into_iter().next().unwrap();
23797 match target {
23798 DialectType::DuckDB => {
23799 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23800 let cast_text = Expression::Cast(Box::new(Cast {
23801 this: arg,
23802 to: DataType::Text,
23803 double_colon_syntax: false,
23804 trailing_comments: Vec::new(),
23805 format: None,
23806 default: None,
23807 inferred_type: None,
23808 }));
23809 let strptime = Expression::Function(Box::new(Function::new(
23810 "STRPTIME".to_string(),
23811 vec![cast_text, Expression::string("%Y%m%d")],
23812 )));
23813 Ok(Expression::Cast(Box::new(Cast {
23814 this: strptime,
23815 to: DataType::Date,
23816 double_colon_syntax: false,
23817 trailing_comments: Vec::new(),
23818 format: None,
23819 default: None,
23820 inferred_type: None,
23821 })))
23822 }
23823 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23824 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23825 let cast_str = Expression::Cast(Box::new(Cast {
23826 this: arg,
23827 to: DataType::Custom {
23828 name: "STRING".to_string(),
23829 },
23830 double_colon_syntax: false,
23831 trailing_comments: Vec::new(),
23832 format: None,
23833 default: None,
23834 inferred_type: None,
23835 }));
23836 Ok(Expression::Function(Box::new(Function::new(
23837 "TO_DATE".to_string(),
23838 vec![cast_str, Expression::string("yyyyMMdd")],
23839 ))))
23840 }
23841 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23842 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23843 let cast_varchar = Expression::Cast(Box::new(Cast {
23844 this: arg,
23845 to: DataType::VarChar {
23846 length: None,
23847 parenthesized_length: false,
23848 },
23849 double_colon_syntax: false,
23850 trailing_comments: Vec::new(),
23851 format: None,
23852 default: None,
23853 inferred_type: None,
23854 }));
23855 let date_parse = Expression::Function(Box::new(Function::new(
23856 "DATE_PARSE".to_string(),
23857 vec![cast_varchar, Expression::string("%Y%m%d")],
23858 )));
23859 Ok(Expression::Cast(Box::new(Cast {
23860 this: date_parse,
23861 to: DataType::Date,
23862 double_colon_syntax: false,
23863 trailing_comments: Vec::new(),
23864 format: None,
23865 default: None,
23866 inferred_type: None,
23867 })))
23868 }
23869 DialectType::Drill => {
23870 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23871 let cast_varchar = Expression::Cast(Box::new(Cast {
23872 this: arg,
23873 to: DataType::VarChar {
23874 length: None,
23875 parenthesized_length: false,
23876 },
23877 double_colon_syntax: false,
23878 trailing_comments: Vec::new(),
23879 format: None,
23880 default: None,
23881 inferred_type: None,
23882 }));
23883 Ok(Expression::Function(Box::new(Function::new(
23884 "TO_DATE".to_string(),
23885 vec![cast_varchar, Expression::string("yyyyMMdd")],
23886 ))))
23887 }
23888 _ => Ok(Expression::Function(Box::new(Function::new(
23889 "DI_TO_DATE".to_string(),
23890 vec![arg],
23891 )))),
23892 }
23893 } else {
23894 Ok(e)
23895 }
23896 }
23897
23898 Action::TsOrDiToDiConvert => {
23899 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23900 if let Expression::Function(f) = e {
23901 let arg = f.args.into_iter().next().unwrap();
23902 let str_type = match target {
23903 DialectType::DuckDB => DataType::Text,
23904 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23905 DataType::Custom {
23906 name: "STRING".to_string(),
23907 }
23908 }
23909 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23910 DataType::VarChar {
23911 length: None,
23912 parenthesized_length: false,
23913 }
23914 }
23915 _ => DataType::VarChar {
23916 length: None,
23917 parenthesized_length: false,
23918 },
23919 };
23920 let cast_str = Expression::Cast(Box::new(Cast {
23921 this: arg,
23922 to: str_type,
23923 double_colon_syntax: false,
23924 trailing_comments: Vec::new(),
23925 format: None,
23926 default: None,
23927 inferred_type: None,
23928 }));
23929 let replace_expr = Expression::Function(Box::new(Function::new(
23930 "REPLACE".to_string(),
23931 vec![cast_str, Expression::string("-"), Expression::string("")],
23932 )));
23933 let substr_name = match target {
23934 DialectType::DuckDB
23935 | DialectType::Hive
23936 | DialectType::Spark
23937 | DialectType::Databricks => "SUBSTR",
23938 _ => "SUBSTR",
23939 };
23940 let substr = Expression::Function(Box::new(Function::new(
23941 substr_name.to_string(),
23942 vec![replace_expr, Expression::number(1), Expression::number(8)],
23943 )));
23944 // Use INT (not INTEGER) for Presto/Trino etc.
23945 let int_type = match target {
23946 DialectType::Presto
23947 | DialectType::Trino
23948 | DialectType::Athena
23949 | DialectType::TSQL
23950 | DialectType::Fabric
23951 | DialectType::SQLite
23952 | DialectType::Redshift => DataType::Custom {
23953 name: "INT".to_string(),
23954 },
23955 _ => DataType::Int {
23956 length: None,
23957 integer_spelling: false,
23958 },
23959 };
23960 Ok(Expression::Cast(Box::new(Cast {
23961 this: substr,
23962 to: int_type,
23963 double_colon_syntax: false,
23964 trailing_comments: Vec::new(),
23965 format: None,
23966 default: None,
23967 inferred_type: None,
23968 })))
23969 } else {
23970 Ok(e)
23971 }
23972 }
23973
23974 Action::UnixToStrConvert => {
23975 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23976 if let Expression::Function(f) = e {
23977 let mut args = f.args;
23978 let this = args.remove(0);
23979 let fmt_expr = if !args.is_empty() {
23980 Some(args.remove(0))
23981 } else {
23982 None
23983 };
23984
23985 // Check if format is a string literal
23986 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23987 if let Expression::Literal(Literal::String(s)) = f {
23988 Some(s.clone())
23989 } else {
23990 None
23991 }
23992 });
23993
23994 if let Some(fmt_string) = fmt_str {
23995 // String literal format -> use UnixToStr expression (generator handles it)
23996 Ok(Expression::UnixToStr(Box::new(
23997 crate::expressions::UnixToStr {
23998 this: Box::new(this),
23999 format: Some(fmt_string),
24000 },
24001 )))
24002 } else if let Some(fmt_e) = fmt_expr {
24003 // Non-literal format (e.g., identifier `y`) -> build target expression directly
24004 match target {
24005 DialectType::DuckDB => {
24006 // STRFTIME(TO_TIMESTAMP(x), y)
24007 let to_ts = Expression::Function(Box::new(Function::new(
24008 "TO_TIMESTAMP".to_string(),
24009 vec![this],
24010 )));
24011 Ok(Expression::Function(Box::new(Function::new(
24012 "STRFTIME".to_string(),
24013 vec![to_ts, fmt_e],
24014 ))))
24015 }
24016 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24017 // DATE_FORMAT(FROM_UNIXTIME(x), y)
24018 let from_unix = Expression::Function(Box::new(Function::new(
24019 "FROM_UNIXTIME".to_string(),
24020 vec![this],
24021 )));
24022 Ok(Expression::Function(Box::new(Function::new(
24023 "DATE_FORMAT".to_string(),
24024 vec![from_unix, fmt_e],
24025 ))))
24026 }
24027 DialectType::Hive
24028 | DialectType::Spark
24029 | DialectType::Databricks
24030 | DialectType::Doris
24031 | DialectType::StarRocks => {
24032 // FROM_UNIXTIME(x, y)
24033 Ok(Expression::Function(Box::new(Function::new(
24034 "FROM_UNIXTIME".to_string(),
24035 vec![this, fmt_e],
24036 ))))
24037 }
24038 _ => {
24039 // Default: keep as UNIX_TO_STR(x, y)
24040 Ok(Expression::Function(Box::new(Function::new(
24041 "UNIX_TO_STR".to_string(),
24042 vec![this, fmt_e],
24043 ))))
24044 }
24045 }
24046 } else {
24047 Ok(Expression::UnixToStr(Box::new(
24048 crate::expressions::UnixToStr {
24049 this: Box::new(this),
24050 format: None,
24051 },
24052 )))
24053 }
24054 } else {
24055 Ok(e)
24056 }
24057 }
24058
24059 Action::UnixToTimeConvert => {
24060 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
24061 if let Expression::Function(f) = e {
24062 let arg = f.args.into_iter().next().unwrap();
24063 Ok(Expression::UnixToTime(Box::new(
24064 crate::expressions::UnixToTime {
24065 this: Box::new(arg),
24066 scale: None,
24067 zone: None,
24068 hours: None,
24069 minutes: None,
24070 format: None,
24071 target_type: None,
24072 },
24073 )))
24074 } else {
24075 Ok(e)
24076 }
24077 }
24078
24079 Action::UnixToTimeStrConvert => {
24080 // UNIX_TO_TIME_STR(x) -> dialect-specific
24081 if let Expression::Function(f) = e {
24082 let arg = f.args.into_iter().next().unwrap();
24083 match target {
24084 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24085 // FROM_UNIXTIME(x)
24086 Ok(Expression::Function(Box::new(Function::new(
24087 "FROM_UNIXTIME".to_string(),
24088 vec![arg],
24089 ))))
24090 }
24091 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24092 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
24093 let from_unix = Expression::Function(Box::new(Function::new(
24094 "FROM_UNIXTIME".to_string(),
24095 vec![arg],
24096 )));
24097 Ok(Expression::Cast(Box::new(Cast {
24098 this: from_unix,
24099 to: DataType::VarChar {
24100 length: None,
24101 parenthesized_length: false,
24102 },
24103 double_colon_syntax: false,
24104 trailing_comments: Vec::new(),
24105 format: None,
24106 default: None,
24107 inferred_type: None,
24108 })))
24109 }
24110 DialectType::DuckDB => {
24111 // CAST(TO_TIMESTAMP(x) AS TEXT)
24112 let to_ts = Expression::Function(Box::new(Function::new(
24113 "TO_TIMESTAMP".to_string(),
24114 vec![arg],
24115 )));
24116 Ok(Expression::Cast(Box::new(Cast {
24117 this: to_ts,
24118 to: DataType::Text,
24119 double_colon_syntax: false,
24120 trailing_comments: Vec::new(),
24121 format: None,
24122 default: None,
24123 inferred_type: None,
24124 })))
24125 }
24126 _ => Ok(Expression::Function(Box::new(Function::new(
24127 "UNIX_TO_TIME_STR".to_string(),
24128 vec![arg],
24129 )))),
24130 }
24131 } else {
24132 Ok(e)
24133 }
24134 }
24135
24136 Action::TimeToUnixConvert => {
24137 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
24138 if let Expression::Function(f) = e {
24139 let arg = f.args.into_iter().next().unwrap();
24140 Ok(Expression::TimeToUnix(Box::new(
24141 crate::expressions::UnaryFunc {
24142 this: arg,
24143 original_name: None,
24144 inferred_type: None,
24145 },
24146 )))
24147 } else {
24148 Ok(e)
24149 }
24150 }
24151
24152 Action::TimeToStrConvert => {
24153 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
24154 if let Expression::Function(f) = e {
24155 let mut args = f.args;
24156 let this = args.remove(0);
24157 let fmt = match args.remove(0) {
24158 Expression::Literal(Literal::String(s)) => s,
24159 other => {
24160 return Ok(Expression::Function(Box::new(Function::new(
24161 "TIME_TO_STR".to_string(),
24162 vec![this, other],
24163 ))));
24164 }
24165 };
24166 Ok(Expression::TimeToStr(Box::new(
24167 crate::expressions::TimeToStr {
24168 this: Box::new(this),
24169 format: fmt,
24170 culture: None,
24171 zone: None,
24172 },
24173 )))
24174 } else {
24175 Ok(e)
24176 }
24177 }
24178
24179 Action::StrToUnixConvert => {
24180 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
24181 if let Expression::Function(f) = e {
24182 let mut args = f.args;
24183 let this = args.remove(0);
24184 let fmt = match args.remove(0) {
24185 Expression::Literal(Literal::String(s)) => s,
24186 other => {
24187 return Ok(Expression::Function(Box::new(Function::new(
24188 "STR_TO_UNIX".to_string(),
24189 vec![this, other],
24190 ))));
24191 }
24192 };
24193 Ok(Expression::StrToUnix(Box::new(
24194 crate::expressions::StrToUnix {
24195 this: Some(Box::new(this)),
24196 format: Some(fmt),
24197 },
24198 )))
24199 } else {
24200 Ok(e)
24201 }
24202 }
24203
24204 Action::TimeStrToUnixConvert => {
24205 // TIME_STR_TO_UNIX(x) -> dialect-specific
24206 if let Expression::Function(f) = e {
24207 let arg = f.args.into_iter().next().unwrap();
24208 match target {
24209 DialectType::DuckDB => {
24210 // EPOCH(CAST(x AS TIMESTAMP))
24211 let cast_ts = Expression::Cast(Box::new(Cast {
24212 this: arg,
24213 to: DataType::Timestamp {
24214 timezone: false,
24215 precision: None,
24216 },
24217 double_colon_syntax: false,
24218 trailing_comments: Vec::new(),
24219 format: None,
24220 default: None,
24221 inferred_type: None,
24222 }));
24223 Ok(Expression::Function(Box::new(Function::new(
24224 "EPOCH".to_string(),
24225 vec![cast_ts],
24226 ))))
24227 }
24228 DialectType::Hive
24229 | DialectType::Doris
24230 | DialectType::StarRocks
24231 | DialectType::MySQL => {
24232 // UNIX_TIMESTAMP(x)
24233 Ok(Expression::Function(Box::new(Function::new(
24234 "UNIX_TIMESTAMP".to_string(),
24235 vec![arg],
24236 ))))
24237 }
24238 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24239 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
24240 let date_parse = Expression::Function(Box::new(Function::new(
24241 "DATE_PARSE".to_string(),
24242 vec![arg, Expression::string("%Y-%m-%d %T")],
24243 )));
24244 Ok(Expression::Function(Box::new(Function::new(
24245 "TO_UNIXTIME".to_string(),
24246 vec![date_parse],
24247 ))))
24248 }
24249 _ => Ok(Expression::Function(Box::new(Function::new(
24250 "TIME_STR_TO_UNIX".to_string(),
24251 vec![arg],
24252 )))),
24253 }
24254 } else {
24255 Ok(e)
24256 }
24257 }
24258
24259 Action::TimeToTimeStrConvert => {
24260 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
24261 if let Expression::Function(f) = e {
24262 let arg = f.args.into_iter().next().unwrap();
24263 let str_type = match target {
24264 DialectType::DuckDB => DataType::Text,
24265 DialectType::Hive
24266 | DialectType::Spark
24267 | DialectType::Databricks
24268 | DialectType::Doris
24269 | DialectType::StarRocks => DataType::Custom {
24270 name: "STRING".to_string(),
24271 },
24272 DialectType::Redshift => DataType::Custom {
24273 name: "VARCHAR(MAX)".to_string(),
24274 },
24275 _ => DataType::VarChar {
24276 length: None,
24277 parenthesized_length: false,
24278 },
24279 };
24280 Ok(Expression::Cast(Box::new(Cast {
24281 this: arg,
24282 to: str_type,
24283 double_colon_syntax: false,
24284 trailing_comments: Vec::new(),
24285 format: None,
24286 default: None,
24287 inferred_type: None,
24288 })))
24289 } else {
24290 Ok(e)
24291 }
24292 }
24293
24294 Action::DateTruncSwapArgs => {
24295 // DATE_TRUNC('unit', x) from Generic -> target-specific
24296 if let Expression::Function(f) = e {
24297 if f.args.len() == 2 {
24298 let unit_arg = f.args[0].clone();
24299 let expr_arg = f.args[1].clone();
24300 // Extract unit string from the first arg
24301 let unit_str = match &unit_arg {
24302 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24303 _ => return Ok(Expression::Function(f)),
24304 };
24305 match target {
24306 DialectType::BigQuery => {
24307 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
24308 let unit_ident =
24309 Expression::Column(crate::expressions::Column {
24310 name: crate::expressions::Identifier::new(unit_str),
24311 table: None,
24312 join_mark: false,
24313 trailing_comments: Vec::new(),
24314 span: None,
24315 inferred_type: None,
24316 });
24317 Ok(Expression::Function(Box::new(Function::new(
24318 "DATE_TRUNC".to_string(),
24319 vec![expr_arg, unit_ident],
24320 ))))
24321 }
24322 DialectType::Doris => {
24323 // Doris: DATE_TRUNC(x, 'UNIT')
24324 Ok(Expression::Function(Box::new(Function::new(
24325 "DATE_TRUNC".to_string(),
24326 vec![expr_arg, Expression::string(&unit_str)],
24327 ))))
24328 }
24329 DialectType::StarRocks => {
24330 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
24331 Ok(Expression::Function(Box::new(Function::new(
24332 "DATE_TRUNC".to_string(),
24333 vec![Expression::string(&unit_str), expr_arg],
24334 ))))
24335 }
24336 DialectType::Spark | DialectType::Databricks => {
24337 // Spark: TRUNC(x, 'UNIT')
24338 Ok(Expression::Function(Box::new(Function::new(
24339 "TRUNC".to_string(),
24340 vec![expr_arg, Expression::string(&unit_str)],
24341 ))))
24342 }
24343 DialectType::MySQL => {
24344 // MySQL: complex expansion based on unit
24345 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
24346 }
24347 _ => Ok(Expression::Function(f)),
24348 }
24349 } else {
24350 Ok(Expression::Function(f))
24351 }
24352 } else {
24353 Ok(e)
24354 }
24355 }
24356
24357 Action::TimestampTruncConvert => {
24358 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
24359 if let Expression::Function(f) = e {
24360 if f.args.len() >= 2 {
24361 let expr_arg = f.args[0].clone();
24362 let unit_arg = f.args[1].clone();
24363 let tz_arg = if f.args.len() >= 3 {
24364 Some(f.args[2].clone())
24365 } else {
24366 None
24367 };
24368 // Extract unit string
24369 let unit_str = match &unit_arg {
24370 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24371 Expression::Column(c) => c.name.name.to_uppercase(),
24372 _ => {
24373 return Ok(Expression::Function(f));
24374 }
24375 };
24376 match target {
24377 DialectType::Spark | DialectType::Databricks => {
24378 // Spark: DATE_TRUNC('UNIT', x)
24379 Ok(Expression::Function(Box::new(Function::new(
24380 "DATE_TRUNC".to_string(),
24381 vec![Expression::string(&unit_str), expr_arg],
24382 ))))
24383 }
24384 DialectType::Doris | DialectType::StarRocks => {
24385 // Doris: DATE_TRUNC(x, 'UNIT')
24386 Ok(Expression::Function(Box::new(Function::new(
24387 "DATE_TRUNC".to_string(),
24388 vec![expr_arg, Expression::string(&unit_str)],
24389 ))))
24390 }
24391 DialectType::BigQuery => {
24392 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
24393 let unit_ident =
24394 Expression::Column(crate::expressions::Column {
24395 name: crate::expressions::Identifier::new(unit_str),
24396 table: None,
24397 join_mark: false,
24398 trailing_comments: Vec::new(),
24399 span: None,
24400 inferred_type: None,
24401 });
24402 let mut args = vec![expr_arg, unit_ident];
24403 if let Some(tz) = tz_arg {
24404 args.push(tz);
24405 }
24406 Ok(Expression::Function(Box::new(Function::new(
24407 "TIMESTAMP_TRUNC".to_string(),
24408 args,
24409 ))))
24410 }
24411 DialectType::DuckDB => {
24412 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
24413 if let Some(tz) = tz_arg {
24414 let tz_str = match &tz {
24415 Expression::Literal(Literal::String(s)) => s.clone(),
24416 _ => "UTC".to_string(),
24417 };
24418 // x AT TIME ZONE 'tz'
24419 let at_tz = Expression::AtTimeZone(Box::new(
24420 crate::expressions::AtTimeZone {
24421 this: expr_arg,
24422 zone: Expression::string(&tz_str),
24423 },
24424 ));
24425 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
24426 let trunc = Expression::Function(Box::new(Function::new(
24427 "DATE_TRUNC".to_string(),
24428 vec![Expression::string(&unit_str), at_tz],
24429 )));
24430 // DATE_TRUNC(...) AT TIME ZONE 'tz'
24431 Ok(Expression::AtTimeZone(Box::new(
24432 crate::expressions::AtTimeZone {
24433 this: trunc,
24434 zone: Expression::string(&tz_str),
24435 },
24436 )))
24437 } else {
24438 Ok(Expression::Function(Box::new(Function::new(
24439 "DATE_TRUNC".to_string(),
24440 vec![Expression::string(&unit_str), expr_arg],
24441 ))))
24442 }
24443 }
24444 DialectType::Presto
24445 | DialectType::Trino
24446 | DialectType::Athena
24447 | DialectType::Snowflake => {
24448 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
24449 Ok(Expression::Function(Box::new(Function::new(
24450 "DATE_TRUNC".to_string(),
24451 vec![Expression::string(&unit_str), expr_arg],
24452 ))))
24453 }
24454 _ => {
24455 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
24456 let mut args = vec![Expression::string(&unit_str), expr_arg];
24457 if let Some(tz) = tz_arg {
24458 args.push(tz);
24459 }
24460 Ok(Expression::Function(Box::new(Function::new(
24461 "DATE_TRUNC".to_string(),
24462 args,
24463 ))))
24464 }
24465 }
24466 } else {
24467 Ok(Expression::Function(f))
24468 }
24469 } else {
24470 Ok(e)
24471 }
24472 }
24473
24474 Action::StrToDateConvert => {
24475 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
24476 if let Expression::Function(f) = e {
24477 if f.args.len() == 2 {
24478 let mut args = f.args;
24479 let this = args.remove(0);
24480 let fmt_expr = args.remove(0);
24481 let fmt_str = match &fmt_expr {
24482 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24483 _ => None,
24484 };
24485 let default_date = "%Y-%m-%d";
24486 let default_time = "%Y-%m-%d %H:%M:%S";
24487 let is_default = fmt_str
24488 .as_ref()
24489 .map_or(false, |f| f == default_date || f == default_time);
24490
24491 if is_default {
24492 // Default format: handle per-dialect
24493 match target {
24494 DialectType::MySQL
24495 | DialectType::Doris
24496 | DialectType::StarRocks => {
24497 // Keep STR_TO_DATE(x, fmt) as-is
24498 Ok(Expression::Function(Box::new(Function::new(
24499 "STR_TO_DATE".to_string(),
24500 vec![this, fmt_expr],
24501 ))))
24502 }
24503 DialectType::Hive => {
24504 // Hive: CAST(x AS DATE)
24505 Ok(Expression::Cast(Box::new(Cast {
24506 this,
24507 to: DataType::Date,
24508 double_colon_syntax: false,
24509 trailing_comments: Vec::new(),
24510 format: None,
24511 default: None,
24512 inferred_type: None,
24513 })))
24514 }
24515 DialectType::Presto
24516 | DialectType::Trino
24517 | DialectType::Athena => {
24518 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24519 let date_parse =
24520 Expression::Function(Box::new(Function::new(
24521 "DATE_PARSE".to_string(),
24522 vec![this, fmt_expr],
24523 )));
24524 Ok(Expression::Cast(Box::new(Cast {
24525 this: date_parse,
24526 to: DataType::Date,
24527 double_colon_syntax: false,
24528 trailing_comments: Vec::new(),
24529 format: None,
24530 default: None,
24531 inferred_type: None,
24532 })))
24533 }
24534 _ => {
24535 // Others: TsOrDsToDate (delegates to generator)
24536 Ok(Expression::TsOrDsToDate(Box::new(
24537 crate::expressions::TsOrDsToDate {
24538 this: Box::new(this),
24539 format: None,
24540 safe: None,
24541 },
24542 )))
24543 }
24544 }
24545 } else if let Some(fmt) = fmt_str {
24546 match target {
24547 DialectType::Doris
24548 | DialectType::StarRocks
24549 | DialectType::MySQL => {
24550 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24551 let mut normalized = fmt.clone();
24552 normalized = normalized.replace("%-d", "%e");
24553 normalized = normalized.replace("%-m", "%c");
24554 normalized = normalized.replace("%H:%M:%S", "%T");
24555 Ok(Expression::Function(Box::new(Function::new(
24556 "STR_TO_DATE".to_string(),
24557 vec![this, Expression::string(&normalized)],
24558 ))))
24559 }
24560 DialectType::Hive => {
24561 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24562 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24563 let unix_ts =
24564 Expression::Function(Box::new(Function::new(
24565 "UNIX_TIMESTAMP".to_string(),
24566 vec![this, Expression::string(&java_fmt)],
24567 )));
24568 let from_unix =
24569 Expression::Function(Box::new(Function::new(
24570 "FROM_UNIXTIME".to_string(),
24571 vec![unix_ts],
24572 )));
24573 Ok(Expression::Cast(Box::new(Cast {
24574 this: from_unix,
24575 to: DataType::Date,
24576 double_colon_syntax: false,
24577 trailing_comments: Vec::new(),
24578 format: None,
24579 default: None,
24580 inferred_type: None,
24581 })))
24582 }
24583 DialectType::Spark | DialectType::Databricks => {
24584 // Spark: TO_DATE(x, java_fmt)
24585 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24586 Ok(Expression::Function(Box::new(Function::new(
24587 "TO_DATE".to_string(),
24588 vec![this, Expression::string(&java_fmt)],
24589 ))))
24590 }
24591 DialectType::Drill => {
24592 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24593 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24594 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24595 let java_fmt = java_fmt.replace('T', "'T'");
24596 Ok(Expression::Function(Box::new(Function::new(
24597 "TO_DATE".to_string(),
24598 vec![this, Expression::string(&java_fmt)],
24599 ))))
24600 }
24601 _ => {
24602 // For other dialects: use TsOrDsToDate which delegates to generator
24603 Ok(Expression::TsOrDsToDate(Box::new(
24604 crate::expressions::TsOrDsToDate {
24605 this: Box::new(this),
24606 format: Some(fmt),
24607 safe: None,
24608 },
24609 )))
24610 }
24611 }
24612 } else {
24613 // Non-string format - keep as-is
24614 let mut new_args = Vec::new();
24615 new_args.push(this);
24616 new_args.push(fmt_expr);
24617 Ok(Expression::Function(Box::new(Function::new(
24618 "STR_TO_DATE".to_string(),
24619 new_args,
24620 ))))
24621 }
24622 } else {
24623 Ok(Expression::Function(f))
24624 }
24625 } else {
24626 Ok(e)
24627 }
24628 }
24629
24630 Action::TsOrDsAddConvert => {
24631 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24632 if let Expression::Function(f) = e {
24633 if f.args.len() == 3 {
24634 let mut args = f.args;
24635 let x = args.remove(0);
24636 let n = args.remove(0);
24637 let unit_expr = args.remove(0);
24638 let unit_str = match &unit_expr {
24639 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24640 _ => "DAY".to_string(),
24641 };
24642
24643 match target {
24644 DialectType::Hive
24645 | DialectType::Spark
24646 | DialectType::Databricks => {
24647 // DATE_ADD(x, n) - only supports DAY unit
24648 Ok(Expression::Function(Box::new(Function::new(
24649 "DATE_ADD".to_string(),
24650 vec![x, n],
24651 ))))
24652 }
24653 DialectType::MySQL => {
24654 // DATE_ADD(x, INTERVAL n UNIT)
24655 let iu = match unit_str.to_uppercase().as_str() {
24656 "YEAR" => crate::expressions::IntervalUnit::Year,
24657 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24658 "MONTH" => crate::expressions::IntervalUnit::Month,
24659 "WEEK" => crate::expressions::IntervalUnit::Week,
24660 "HOUR" => crate::expressions::IntervalUnit::Hour,
24661 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24662 "SECOND" => crate::expressions::IntervalUnit::Second,
24663 _ => crate::expressions::IntervalUnit::Day,
24664 };
24665 let interval = Expression::Interval(Box::new(
24666 crate::expressions::Interval {
24667 this: Some(n),
24668 unit: Some(
24669 crate::expressions::IntervalUnitSpec::Simple {
24670 unit: iu,
24671 use_plural: false,
24672 },
24673 ),
24674 },
24675 ));
24676 Ok(Expression::Function(Box::new(Function::new(
24677 "DATE_ADD".to_string(),
24678 vec![x, interval],
24679 ))))
24680 }
24681 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24682 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24683 let cast_ts = Expression::Cast(Box::new(Cast {
24684 this: x,
24685 to: DataType::Timestamp {
24686 precision: None,
24687 timezone: false,
24688 },
24689 double_colon_syntax: false,
24690 trailing_comments: Vec::new(),
24691 format: None,
24692 default: None,
24693 inferred_type: None,
24694 }));
24695 let cast_date = Expression::Cast(Box::new(Cast {
24696 this: cast_ts,
24697 to: DataType::Date,
24698 double_colon_syntax: false,
24699 trailing_comments: Vec::new(),
24700 format: None,
24701 default: None,
24702 inferred_type: None,
24703 }));
24704 Ok(Expression::Function(Box::new(Function::new(
24705 "DATE_ADD".to_string(),
24706 vec![Expression::string(&unit_str), n, cast_date],
24707 ))))
24708 }
24709 DialectType::DuckDB => {
24710 // CAST(x AS DATE) + INTERVAL n UNIT
24711 let cast_date = Expression::Cast(Box::new(Cast {
24712 this: x,
24713 to: DataType::Date,
24714 double_colon_syntax: false,
24715 trailing_comments: Vec::new(),
24716 format: None,
24717 default: None,
24718 inferred_type: None,
24719 }));
24720 let iu = match unit_str.to_uppercase().as_str() {
24721 "YEAR" => crate::expressions::IntervalUnit::Year,
24722 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24723 "MONTH" => crate::expressions::IntervalUnit::Month,
24724 "WEEK" => crate::expressions::IntervalUnit::Week,
24725 "HOUR" => crate::expressions::IntervalUnit::Hour,
24726 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24727 "SECOND" => crate::expressions::IntervalUnit::Second,
24728 _ => crate::expressions::IntervalUnit::Day,
24729 };
24730 let interval = Expression::Interval(Box::new(
24731 crate::expressions::Interval {
24732 this: Some(n),
24733 unit: Some(
24734 crate::expressions::IntervalUnitSpec::Simple {
24735 unit: iu,
24736 use_plural: false,
24737 },
24738 ),
24739 },
24740 ));
24741 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24742 left: cast_date,
24743 right: interval,
24744 left_comments: Vec::new(),
24745 operator_comments: Vec::new(),
24746 trailing_comments: Vec::new(),
24747 inferred_type: None,
24748 })))
24749 }
24750 DialectType::Drill => {
24751 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24752 let cast_date = Expression::Cast(Box::new(Cast {
24753 this: x,
24754 to: DataType::Date,
24755 double_colon_syntax: false,
24756 trailing_comments: Vec::new(),
24757 format: None,
24758 default: None,
24759 inferred_type: None,
24760 }));
24761 let iu = match unit_str.to_uppercase().as_str() {
24762 "YEAR" => crate::expressions::IntervalUnit::Year,
24763 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24764 "MONTH" => crate::expressions::IntervalUnit::Month,
24765 "WEEK" => crate::expressions::IntervalUnit::Week,
24766 "HOUR" => crate::expressions::IntervalUnit::Hour,
24767 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24768 "SECOND" => crate::expressions::IntervalUnit::Second,
24769 _ => crate::expressions::IntervalUnit::Day,
24770 };
24771 let interval = Expression::Interval(Box::new(
24772 crate::expressions::Interval {
24773 this: Some(n),
24774 unit: Some(
24775 crate::expressions::IntervalUnitSpec::Simple {
24776 unit: iu,
24777 use_plural: false,
24778 },
24779 ),
24780 },
24781 ));
24782 Ok(Expression::Function(Box::new(Function::new(
24783 "DATE_ADD".to_string(),
24784 vec![cast_date, interval],
24785 ))))
24786 }
24787 _ => {
24788 // Default: keep as TS_OR_DS_ADD
24789 Ok(Expression::Function(Box::new(Function::new(
24790 "TS_OR_DS_ADD".to_string(),
24791 vec![x, n, unit_expr],
24792 ))))
24793 }
24794 }
24795 } else {
24796 Ok(Expression::Function(f))
24797 }
24798 } else {
24799 Ok(e)
24800 }
24801 }
24802
24803 Action::DateFromUnixDateConvert => {
24804 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24805 if let Expression::Function(f) = e {
24806 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24807 if matches!(
24808 target,
24809 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24810 ) {
24811 return Ok(Expression::Function(Box::new(Function::new(
24812 "DATE_FROM_UNIX_DATE".to_string(),
24813 f.args,
24814 ))));
24815 }
24816 let n = f.args.into_iter().next().unwrap();
24817 let epoch_date = Expression::Cast(Box::new(Cast {
24818 this: Expression::string("1970-01-01"),
24819 to: DataType::Date,
24820 double_colon_syntax: false,
24821 trailing_comments: Vec::new(),
24822 format: None,
24823 default: None,
24824 inferred_type: None,
24825 }));
24826 match target {
24827 DialectType::DuckDB => {
24828 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24829 let interval =
24830 Expression::Interval(Box::new(crate::expressions::Interval {
24831 this: Some(n),
24832 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24833 unit: crate::expressions::IntervalUnit::Day,
24834 use_plural: false,
24835 }),
24836 }));
24837 Ok(Expression::Add(Box::new(
24838 crate::expressions::BinaryOp::new(epoch_date, interval),
24839 )))
24840 }
24841 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24842 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24843 Ok(Expression::Function(Box::new(Function::new(
24844 "DATE_ADD".to_string(),
24845 vec![Expression::string("DAY"), n, epoch_date],
24846 ))))
24847 }
24848 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24849 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24850 Ok(Expression::Function(Box::new(Function::new(
24851 "DATEADD".to_string(),
24852 vec![
24853 Expression::Identifier(Identifier::new("DAY")),
24854 n,
24855 epoch_date,
24856 ],
24857 ))))
24858 }
24859 DialectType::BigQuery => {
24860 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24861 let interval =
24862 Expression::Interval(Box::new(crate::expressions::Interval {
24863 this: Some(n),
24864 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24865 unit: crate::expressions::IntervalUnit::Day,
24866 use_plural: false,
24867 }),
24868 }));
24869 Ok(Expression::Function(Box::new(Function::new(
24870 "DATE_ADD".to_string(),
24871 vec![epoch_date, interval],
24872 ))))
24873 }
24874 DialectType::MySQL
24875 | DialectType::Doris
24876 | DialectType::StarRocks
24877 | DialectType::Drill => {
24878 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24879 let interval =
24880 Expression::Interval(Box::new(crate::expressions::Interval {
24881 this: Some(n),
24882 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24883 unit: crate::expressions::IntervalUnit::Day,
24884 use_plural: false,
24885 }),
24886 }));
24887 Ok(Expression::Function(Box::new(Function::new(
24888 "DATE_ADD".to_string(),
24889 vec![epoch_date, interval],
24890 ))))
24891 }
24892 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24893 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24894 Ok(Expression::Function(Box::new(Function::new(
24895 "DATE_ADD".to_string(),
24896 vec![epoch_date, n],
24897 ))))
24898 }
24899 DialectType::PostgreSQL
24900 | DialectType::Materialize
24901 | DialectType::RisingWave => {
24902 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24903 let n_str = match &n {
24904 Expression::Literal(Literal::Number(s)) => s.clone(),
24905 _ => Self::expr_to_string_static(&n),
24906 };
24907 let interval =
24908 Expression::Interval(Box::new(crate::expressions::Interval {
24909 this: Some(Expression::string(&format!("{} DAY", n_str))),
24910 unit: None,
24911 }));
24912 Ok(Expression::Add(Box::new(
24913 crate::expressions::BinaryOp::new(epoch_date, interval),
24914 )))
24915 }
24916 _ => {
24917 // Default: keep as-is
24918 Ok(Expression::Function(Box::new(Function::new(
24919 "DATE_FROM_UNIX_DATE".to_string(),
24920 vec![n],
24921 ))))
24922 }
24923 }
24924 } else {
24925 Ok(e)
24926 }
24927 }
24928
24929 Action::ArrayRemoveConvert => {
24930 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24931 if let Expression::ArrayRemove(bf) = e {
24932 let arr = bf.this;
24933 let target_val = bf.expression;
24934 match target {
24935 DialectType::DuckDB => {
24936 let u_id = crate::expressions::Identifier::new("_u");
24937 let lambda =
24938 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24939 parameters: vec![u_id.clone()],
24940 body: Expression::Neq(Box::new(BinaryOp {
24941 left: Expression::Identifier(u_id),
24942 right: target_val,
24943 left_comments: Vec::new(),
24944 operator_comments: Vec::new(),
24945 trailing_comments: Vec::new(),
24946 inferred_type: None,
24947 })),
24948 colon: false,
24949 parameter_types: Vec::new(),
24950 }));
24951 Ok(Expression::Function(Box::new(Function::new(
24952 "LIST_FILTER".to_string(),
24953 vec![arr, lambda],
24954 ))))
24955 }
24956 DialectType::ClickHouse => {
24957 let u_id = crate::expressions::Identifier::new("_u");
24958 let lambda =
24959 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24960 parameters: vec![u_id.clone()],
24961 body: Expression::Neq(Box::new(BinaryOp {
24962 left: Expression::Identifier(u_id),
24963 right: target_val,
24964 left_comments: Vec::new(),
24965 operator_comments: Vec::new(),
24966 trailing_comments: Vec::new(),
24967 inferred_type: None,
24968 })),
24969 colon: false,
24970 parameter_types: Vec::new(),
24971 }));
24972 Ok(Expression::Function(Box::new(Function::new(
24973 "arrayFilter".to_string(),
24974 vec![lambda, arr],
24975 ))))
24976 }
24977 DialectType::BigQuery => {
24978 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24979 let u_id = crate::expressions::Identifier::new("_u");
24980 let u_col = Expression::Column(crate::expressions::Column {
24981 name: u_id.clone(),
24982 table: None,
24983 join_mark: false,
24984 trailing_comments: Vec::new(),
24985 span: None,
24986 inferred_type: None,
24987 });
24988 let unnest_expr =
24989 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24990 this: arr,
24991 expressions: Vec::new(),
24992 with_ordinality: false,
24993 alias: None,
24994 offset_alias: None,
24995 }));
24996 let aliased_unnest =
24997 Expression::Alias(Box::new(crate::expressions::Alias {
24998 this: unnest_expr,
24999 alias: u_id.clone(),
25000 column_aliases: Vec::new(),
25001 pre_alias_comments: Vec::new(),
25002 trailing_comments: Vec::new(),
25003 inferred_type: None,
25004 }));
25005 let where_cond = Expression::Neq(Box::new(BinaryOp {
25006 left: u_col.clone(),
25007 right: target_val,
25008 left_comments: Vec::new(),
25009 operator_comments: Vec::new(),
25010 trailing_comments: Vec::new(),
25011 inferred_type: None,
25012 }));
25013 let subquery = Expression::Select(Box::new(
25014 crate::expressions::Select::new()
25015 .column(u_col)
25016 .from(aliased_unnest)
25017 .where_(where_cond),
25018 ));
25019 Ok(Expression::ArrayFunc(Box::new(
25020 crate::expressions::ArrayConstructor {
25021 expressions: vec![subquery],
25022 bracket_notation: false,
25023 use_list_keyword: false,
25024 },
25025 )))
25026 }
25027 _ => Ok(Expression::ArrayRemove(Box::new(
25028 crate::expressions::BinaryFunc {
25029 original_name: None,
25030 this: arr,
25031 expression: target_val,
25032 inferred_type: None,
25033 },
25034 ))),
25035 }
25036 } else {
25037 Ok(e)
25038 }
25039 }
25040
25041 Action::ArrayReverseConvert => {
25042 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
25043 if let Expression::ArrayReverse(af) = e {
25044 Ok(Expression::Function(Box::new(Function::new(
25045 "arrayReverse".to_string(),
25046 vec![af.this],
25047 ))))
25048 } else {
25049 Ok(e)
25050 }
25051 }
25052
25053 Action::JsonKeysConvert => {
25054 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
25055 if let Expression::JsonKeys(uf) = e {
25056 match target {
25057 DialectType::Spark | DialectType::Databricks => {
25058 Ok(Expression::Function(Box::new(Function::new(
25059 "JSON_OBJECT_KEYS".to_string(),
25060 vec![uf.this],
25061 ))))
25062 }
25063 DialectType::Snowflake => Ok(Expression::Function(Box::new(
25064 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
25065 ))),
25066 _ => Ok(Expression::JsonKeys(uf)),
25067 }
25068 } else {
25069 Ok(e)
25070 }
25071 }
25072
25073 Action::ParseJsonStrip => {
25074 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
25075 if let Expression::ParseJson(uf) = e {
25076 Ok(uf.this)
25077 } else {
25078 Ok(e)
25079 }
25080 }
25081
25082 Action::ArraySizeDrill => {
25083 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
25084 if let Expression::ArraySize(uf) = e {
25085 Ok(Expression::Function(Box::new(Function::new(
25086 "REPEATED_COUNT".to_string(),
25087 vec![uf.this],
25088 ))))
25089 } else {
25090 Ok(e)
25091 }
25092 }
25093
25094 Action::WeekOfYearToWeekIso => {
25095 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
25096 if let Expression::WeekOfYear(uf) = e {
25097 Ok(Expression::Function(Box::new(Function::new(
25098 "WEEKISO".to_string(),
25099 vec![uf.this],
25100 ))))
25101 } else {
25102 Ok(e)
25103 }
25104 }
25105 }
25106 })
25107 }
25108
25109 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
25110 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
25111 use crate::expressions::Function;
25112 match unit {
25113 "DAY" => {
25114 // DATE(x)
25115 Ok(Expression::Function(Box::new(Function::new(
25116 "DATE".to_string(),
25117 vec![expr.clone()],
25118 ))))
25119 }
25120 "WEEK" => {
25121 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
25122 let year_x = Expression::Function(Box::new(Function::new(
25123 "YEAR".to_string(),
25124 vec![expr.clone()],
25125 )));
25126 let week_x = Expression::Function(Box::new(Function::new(
25127 "WEEK".to_string(),
25128 vec![expr.clone(), Expression::number(1)],
25129 )));
25130 let concat_args = vec![
25131 year_x,
25132 Expression::string(" "),
25133 week_x,
25134 Expression::string(" 1"),
25135 ];
25136 let concat = Expression::Function(Box::new(Function::new(
25137 "CONCAT".to_string(),
25138 concat_args,
25139 )));
25140 Ok(Expression::Function(Box::new(Function::new(
25141 "STR_TO_DATE".to_string(),
25142 vec![concat, Expression::string("%Y %u %w")],
25143 ))))
25144 }
25145 "MONTH" => {
25146 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
25147 let year_x = Expression::Function(Box::new(Function::new(
25148 "YEAR".to_string(),
25149 vec![expr.clone()],
25150 )));
25151 let month_x = Expression::Function(Box::new(Function::new(
25152 "MONTH".to_string(),
25153 vec![expr.clone()],
25154 )));
25155 let concat_args = vec![
25156 year_x,
25157 Expression::string(" "),
25158 month_x,
25159 Expression::string(" 1"),
25160 ];
25161 let concat = Expression::Function(Box::new(Function::new(
25162 "CONCAT".to_string(),
25163 concat_args,
25164 )));
25165 Ok(Expression::Function(Box::new(Function::new(
25166 "STR_TO_DATE".to_string(),
25167 vec![concat, Expression::string("%Y %c %e")],
25168 ))))
25169 }
25170 "QUARTER" => {
25171 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
25172 let year_x = Expression::Function(Box::new(Function::new(
25173 "YEAR".to_string(),
25174 vec![expr.clone()],
25175 )));
25176 let quarter_x = Expression::Function(Box::new(Function::new(
25177 "QUARTER".to_string(),
25178 vec![expr.clone()],
25179 )));
25180 // QUARTER(x) * 3 - 2
25181 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
25182 left: quarter_x,
25183 right: Expression::number(3),
25184 left_comments: Vec::new(),
25185 operator_comments: Vec::new(),
25186 trailing_comments: Vec::new(),
25187 inferred_type: None,
25188 }));
25189 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
25190 left: mul,
25191 right: Expression::number(2),
25192 left_comments: Vec::new(),
25193 operator_comments: Vec::new(),
25194 trailing_comments: Vec::new(),
25195 inferred_type: None,
25196 }));
25197 let concat_args = vec![
25198 year_x,
25199 Expression::string(" "),
25200 sub,
25201 Expression::string(" 1"),
25202 ];
25203 let concat = Expression::Function(Box::new(Function::new(
25204 "CONCAT".to_string(),
25205 concat_args,
25206 )));
25207 Ok(Expression::Function(Box::new(Function::new(
25208 "STR_TO_DATE".to_string(),
25209 vec![concat, Expression::string("%Y %c %e")],
25210 ))))
25211 }
25212 "YEAR" => {
25213 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
25214 let year_x = Expression::Function(Box::new(Function::new(
25215 "YEAR".to_string(),
25216 vec![expr.clone()],
25217 )));
25218 let concat_args = vec![year_x, Expression::string(" 1 1")];
25219 let concat = Expression::Function(Box::new(Function::new(
25220 "CONCAT".to_string(),
25221 concat_args,
25222 )));
25223 Ok(Expression::Function(Box::new(Function::new(
25224 "STR_TO_DATE".to_string(),
25225 vec![concat, Expression::string("%Y %c %e")],
25226 ))))
25227 }
25228 _ => {
25229 // Unsupported unit -> keep as DATE_TRUNC
25230 Ok(Expression::Function(Box::new(Function::new(
25231 "DATE_TRUNC".to_string(),
25232 vec![Expression::string(unit), expr.clone()],
25233 ))))
25234 }
25235 }
25236 }
25237
25238 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
25239 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
25240 use crate::expressions::DataType;
25241 match dt {
25242 DataType::VarChar { .. } | DataType::Char { .. } => true,
25243 DataType::Struct { fields, .. } => fields
25244 .iter()
25245 .any(|f| Self::has_varchar_char_type(&f.data_type)),
25246 _ => false,
25247 }
25248 }
25249
25250 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
25251 fn normalize_varchar_to_string(
25252 dt: crate::expressions::DataType,
25253 ) -> crate::expressions::DataType {
25254 use crate::expressions::DataType;
25255 match dt {
25256 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
25257 name: "STRING".to_string(),
25258 },
25259 DataType::Struct { fields, nested } => {
25260 let fields = fields
25261 .into_iter()
25262 .map(|mut f| {
25263 f.data_type = Self::normalize_varchar_to_string(f.data_type);
25264 f
25265 })
25266 .collect();
25267 DataType::Struct { fields, nested }
25268 }
25269 other => other,
25270 }
25271 }
25272
25273 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
25274 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
25275 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
25276 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
25277 let trimmed = s.trim();
25278
25279 // Find where digits end and unit text begins
25280 let digit_end = trimmed
25281 .find(|c: char| !c.is_ascii_digit())
25282 .unwrap_or(trimmed.len());
25283 if digit_end == 0 || digit_end == trimmed.len() {
25284 return expr;
25285 }
25286 let num = &trimmed[..digit_end];
25287 let unit_text = trimmed[digit_end..].trim().to_uppercase();
25288 if unit_text.is_empty() {
25289 return expr;
25290 }
25291
25292 let known_units = [
25293 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
25294 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
25295 ];
25296 if !known_units.contains(&unit_text.as_str()) {
25297 return expr;
25298 }
25299
25300 let unit_str = unit_text.clone();
25301 // Singularize
25302 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
25303 &unit_str[..unit_str.len() - 1]
25304 } else {
25305 &unit_str
25306 };
25307 let unit = unit_singular;
25308
25309 match target {
25310 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25311 // INTERVAL '2' DAY
25312 let iu = match unit {
25313 "DAY" => crate::expressions::IntervalUnit::Day,
25314 "HOUR" => crate::expressions::IntervalUnit::Hour,
25315 "MINUTE" => crate::expressions::IntervalUnit::Minute,
25316 "SECOND" => crate::expressions::IntervalUnit::Second,
25317 "WEEK" => crate::expressions::IntervalUnit::Week,
25318 "MONTH" => crate::expressions::IntervalUnit::Month,
25319 "YEAR" => crate::expressions::IntervalUnit::Year,
25320 _ => return expr,
25321 };
25322 return Expression::Interval(Box::new(crate::expressions::Interval {
25323 this: Some(Expression::string(num)),
25324 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25325 unit: iu,
25326 use_plural: false,
25327 }),
25328 }));
25329 }
25330 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
25331 // INTERVAL '2 DAYS'
25332 let plural = if num != "1" && !unit_str.ends_with('S') {
25333 format!("{} {}S", num, unit)
25334 } else if unit_str.ends_with('S') {
25335 format!("{} {}", num, unit_str)
25336 } else {
25337 format!("{} {}", num, unit)
25338 };
25339 return Expression::Interval(Box::new(crate::expressions::Interval {
25340 this: Some(Expression::string(&plural)),
25341 unit: None,
25342 }));
25343 }
25344 _ => {
25345 // Spark/Databricks/Hive: INTERVAL '1' DAY
25346 let iu = match unit {
25347 "DAY" => crate::expressions::IntervalUnit::Day,
25348 "HOUR" => crate::expressions::IntervalUnit::Hour,
25349 "MINUTE" => crate::expressions::IntervalUnit::Minute,
25350 "SECOND" => crate::expressions::IntervalUnit::Second,
25351 "WEEK" => crate::expressions::IntervalUnit::Week,
25352 "MONTH" => crate::expressions::IntervalUnit::Month,
25353 "YEAR" => crate::expressions::IntervalUnit::Year,
25354 _ => return expr,
25355 };
25356 return Expression::Interval(Box::new(crate::expressions::Interval {
25357 this: Some(Expression::string(num)),
25358 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25359 unit: iu,
25360 use_plural: false,
25361 }),
25362 }));
25363 }
25364 }
25365 }
25366 // If it's already an INTERVAL expression, pass through
25367 expr
25368 }
25369
25370 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
25371 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
25372 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
25373 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
25374 fn rewrite_unnest_expansion(
25375 select: &crate::expressions::Select,
25376 target: DialectType,
25377 ) -> Option<crate::expressions::Select> {
25378 use crate::expressions::{
25379 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
25380 UnnestFunc,
25381 };
25382
25383 let index_offset: i64 = match target {
25384 DialectType::Presto | DialectType::Trino => 1,
25385 _ => 0, // BigQuery, Snowflake
25386 };
25387
25388 let if_func_name = match target {
25389 DialectType::Snowflake => "IFF",
25390 _ => "IF",
25391 };
25392
25393 let array_length_func = match target {
25394 DialectType::BigQuery => "ARRAY_LENGTH",
25395 DialectType::Presto | DialectType::Trino => "CARDINALITY",
25396 DialectType::Snowflake => "ARRAY_SIZE",
25397 _ => "ARRAY_LENGTH",
25398 };
25399
25400 let use_table_aliases = matches!(
25401 target,
25402 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
25403 );
25404 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
25405
25406 fn make_col(name: &str, table: Option<&str>) -> Expression {
25407 if let Some(tbl) = table {
25408 Expression::Column(Column {
25409 name: Identifier::new(name.to_string()),
25410 table: Some(Identifier::new(tbl.to_string())),
25411 join_mark: false,
25412 trailing_comments: Vec::new(),
25413 span: None,
25414 inferred_type: None,
25415 })
25416 } else {
25417 Expression::Identifier(Identifier::new(name.to_string()))
25418 }
25419 }
25420
25421 fn make_join(this: Expression) -> Join {
25422 Join {
25423 this,
25424 on: None,
25425 using: Vec::new(),
25426 kind: JoinKind::Cross,
25427 use_inner_keyword: false,
25428 use_outer_keyword: false,
25429 deferred_condition: false,
25430 join_hint: None,
25431 match_condition: None,
25432 pivots: Vec::new(),
25433 comments: Vec::new(),
25434 nesting_group: 0,
25435 directed: false,
25436 }
25437 }
25438
25439 // Collect UNNEST info from SELECT expressions
25440 struct UnnestInfo {
25441 arr_expr: Expression,
25442 col_alias: String,
25443 pos_alias: String,
25444 source_alias: String,
25445 original_expr: Expression,
25446 has_outer_alias: Option<String>,
25447 }
25448
25449 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
25450 let mut col_counter = 0usize;
25451 let mut pos_counter = 1usize;
25452 let mut source_counter = 1usize;
25453
25454 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
25455 match expr {
25456 Expression::Unnest(u) => Some(u.this.clone()),
25457 Expression::Function(f)
25458 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
25459 {
25460 Some(f.args[0].clone())
25461 }
25462 Expression::Alias(a) => extract_unnest_arg(&a.this),
25463 Expression::Add(op)
25464 | Expression::Sub(op)
25465 | Expression::Mul(op)
25466 | Expression::Div(op) => {
25467 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
25468 }
25469 _ => None,
25470 }
25471 }
25472
25473 fn get_alias_name(expr: &Expression) -> Option<String> {
25474 if let Expression::Alias(a) = expr {
25475 Some(a.alias.name.clone())
25476 } else {
25477 None
25478 }
25479 }
25480
25481 for sel_expr in &select.expressions {
25482 if let Some(arr) = extract_unnest_arg(sel_expr) {
25483 col_counter += 1;
25484 pos_counter += 1;
25485 source_counter += 1;
25486
25487 let col_alias = if col_counter == 1 {
25488 "col".to_string()
25489 } else {
25490 format!("col_{}", col_counter)
25491 };
25492 let pos_alias = format!("pos_{}", pos_counter);
25493 let source_alias = format!("_u_{}", source_counter);
25494 let has_outer_alias = get_alias_name(sel_expr);
25495
25496 unnest_infos.push(UnnestInfo {
25497 arr_expr: arr,
25498 col_alias,
25499 pos_alias,
25500 source_alias,
25501 original_expr: sel_expr.clone(),
25502 has_outer_alias,
25503 });
25504 }
25505 }
25506
25507 if unnest_infos.is_empty() {
25508 return None;
25509 }
25510
25511 let series_alias = "pos".to_string();
25512 let series_source_alias = "_u".to_string();
25513 let tbl_ref = if use_table_aliases {
25514 Some(series_source_alias.as_str())
25515 } else {
25516 None
25517 };
25518
25519 // Build new SELECT expressions
25520 let mut new_select_exprs = Vec::new();
25521 for info in &unnest_infos {
25522 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25523 let src_ref = if use_table_aliases {
25524 Some(info.source_alias.as_str())
25525 } else {
25526 None
25527 };
25528
25529 let pos_col = make_col(&series_alias, tbl_ref);
25530 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25531 let col_ref = make_col(actual_col_name, src_ref);
25532
25533 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25534 pos_col.clone(),
25535 unnest_pos_col.clone(),
25536 )));
25537 let mut if_args = vec![eq_cond, col_ref];
25538 if null_third_arg {
25539 if_args.push(Expression::Null(crate::expressions::Null));
25540 }
25541
25542 let if_expr =
25543 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25544 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25545
25546 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25547 final_expr,
25548 Identifier::new(actual_col_name.clone()),
25549 ))));
25550 }
25551
25552 // Build array size expressions for GREATEST
25553 let size_exprs: Vec<Expression> = unnest_infos
25554 .iter()
25555 .map(|info| {
25556 Expression::Function(Box::new(Function::new(
25557 array_length_func.to_string(),
25558 vec![info.arr_expr.clone()],
25559 )))
25560 })
25561 .collect();
25562
25563 let greatest =
25564 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25565
25566 let series_end = if index_offset == 0 {
25567 Expression::Sub(Box::new(BinaryOp::new(
25568 greatest,
25569 Expression::Literal(Literal::Number("1".to_string())),
25570 )))
25571 } else {
25572 greatest
25573 };
25574
25575 // Build the position array source
25576 let series_unnest_expr = match target {
25577 DialectType::BigQuery => {
25578 let gen_array = Expression::Function(Box::new(Function::new(
25579 "GENERATE_ARRAY".to_string(),
25580 vec![
25581 Expression::Literal(Literal::Number("0".to_string())),
25582 series_end,
25583 ],
25584 )));
25585 Expression::Unnest(Box::new(UnnestFunc {
25586 this: gen_array,
25587 expressions: Vec::new(),
25588 with_ordinality: false,
25589 alias: None,
25590 offset_alias: None,
25591 }))
25592 }
25593 DialectType::Presto | DialectType::Trino => {
25594 let sequence = Expression::Function(Box::new(Function::new(
25595 "SEQUENCE".to_string(),
25596 vec![
25597 Expression::Literal(Literal::Number("1".to_string())),
25598 series_end,
25599 ],
25600 )));
25601 Expression::Unnest(Box::new(UnnestFunc {
25602 this: sequence,
25603 expressions: Vec::new(),
25604 with_ordinality: false,
25605 alias: None,
25606 offset_alias: None,
25607 }))
25608 }
25609 DialectType::Snowflake => {
25610 let range_end = Expression::Add(Box::new(BinaryOp::new(
25611 Expression::Paren(Box::new(crate::expressions::Paren {
25612 this: series_end,
25613 trailing_comments: Vec::new(),
25614 })),
25615 Expression::Literal(Literal::Number("1".to_string())),
25616 )));
25617 let gen_range = Expression::Function(Box::new(Function::new(
25618 "ARRAY_GENERATE_RANGE".to_string(),
25619 vec![
25620 Expression::Literal(Literal::Number("0".to_string())),
25621 range_end,
25622 ],
25623 )));
25624 let flatten_arg =
25625 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25626 name: Identifier::new("INPUT".to_string()),
25627 value: gen_range,
25628 separator: crate::expressions::NamedArgSeparator::DArrow,
25629 }));
25630 let flatten = Expression::Function(Box::new(Function::new(
25631 "FLATTEN".to_string(),
25632 vec![flatten_arg],
25633 )));
25634 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25635 }
25636 _ => return None,
25637 };
25638
25639 // Build series alias expression
25640 let series_alias_expr = if use_table_aliases {
25641 let col_aliases = if matches!(target, DialectType::Snowflake) {
25642 vec![
25643 Identifier::new("seq".to_string()),
25644 Identifier::new("key".to_string()),
25645 Identifier::new("path".to_string()),
25646 Identifier::new("index".to_string()),
25647 Identifier::new(series_alias.clone()),
25648 Identifier::new("this".to_string()),
25649 ]
25650 } else {
25651 vec![Identifier::new(series_alias.clone())]
25652 };
25653 Expression::Alias(Box::new(Alias {
25654 this: series_unnest_expr,
25655 alias: Identifier::new(series_source_alias.clone()),
25656 column_aliases: col_aliases,
25657 pre_alias_comments: Vec::new(),
25658 trailing_comments: Vec::new(),
25659 inferred_type: None,
25660 }))
25661 } else {
25662 Expression::Alias(Box::new(Alias::new(
25663 series_unnest_expr,
25664 Identifier::new(series_alias.clone()),
25665 )))
25666 };
25667
25668 // Build CROSS JOINs for each UNNEST
25669 let mut joins = Vec::new();
25670 for info in &unnest_infos {
25671 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25672
25673 let unnest_join_expr = match target {
25674 DialectType::BigQuery => {
25675 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25676 let unnest = UnnestFunc {
25677 this: info.arr_expr.clone(),
25678 expressions: Vec::new(),
25679 with_ordinality: true,
25680 alias: Some(Identifier::new(actual_col_name.clone())),
25681 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25682 };
25683 Expression::Unnest(Box::new(unnest))
25684 }
25685 DialectType::Presto | DialectType::Trino => {
25686 let unnest = UnnestFunc {
25687 this: info.arr_expr.clone(),
25688 expressions: Vec::new(),
25689 with_ordinality: true,
25690 alias: None,
25691 offset_alias: None,
25692 };
25693 Expression::Alias(Box::new(Alias {
25694 this: Expression::Unnest(Box::new(unnest)),
25695 alias: Identifier::new(info.source_alias.clone()),
25696 column_aliases: vec![
25697 Identifier::new(actual_col_name.clone()),
25698 Identifier::new(info.pos_alias.clone()),
25699 ],
25700 pre_alias_comments: Vec::new(),
25701 trailing_comments: Vec::new(),
25702 inferred_type: None,
25703 }))
25704 }
25705 DialectType::Snowflake => {
25706 let flatten_arg =
25707 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25708 name: Identifier::new("INPUT".to_string()),
25709 value: info.arr_expr.clone(),
25710 separator: crate::expressions::NamedArgSeparator::DArrow,
25711 }));
25712 let flatten = Expression::Function(Box::new(Function::new(
25713 "FLATTEN".to_string(),
25714 vec![flatten_arg],
25715 )));
25716 let table_fn = Expression::Function(Box::new(Function::new(
25717 "TABLE".to_string(),
25718 vec![flatten],
25719 )));
25720 Expression::Alias(Box::new(Alias {
25721 this: table_fn,
25722 alias: Identifier::new(info.source_alias.clone()),
25723 column_aliases: vec![
25724 Identifier::new("seq".to_string()),
25725 Identifier::new("key".to_string()),
25726 Identifier::new("path".to_string()),
25727 Identifier::new(info.pos_alias.clone()),
25728 Identifier::new(actual_col_name.clone()),
25729 Identifier::new("this".to_string()),
25730 ],
25731 pre_alias_comments: Vec::new(),
25732 trailing_comments: Vec::new(),
25733 inferred_type: None,
25734 }))
25735 }
25736 _ => return None,
25737 };
25738
25739 joins.push(make_join(unnest_join_expr));
25740 }
25741
25742 // Build WHERE clause
25743 let mut where_conditions: Vec<Expression> = Vec::new();
25744 for info in &unnest_infos {
25745 let src_ref = if use_table_aliases {
25746 Some(info.source_alias.as_str())
25747 } else {
25748 None
25749 };
25750 let pos_col = make_col(&series_alias, tbl_ref);
25751 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25752
25753 let arr_size = Expression::Function(Box::new(Function::new(
25754 array_length_func.to_string(),
25755 vec![info.arr_expr.clone()],
25756 )));
25757
25758 let size_ref = if index_offset == 0 {
25759 Expression::Paren(Box::new(crate::expressions::Paren {
25760 this: Expression::Sub(Box::new(BinaryOp::new(
25761 arr_size,
25762 Expression::Literal(Literal::Number("1".to_string())),
25763 ))),
25764 trailing_comments: Vec::new(),
25765 }))
25766 } else {
25767 arr_size
25768 };
25769
25770 let eq = Expression::Eq(Box::new(BinaryOp::new(
25771 pos_col.clone(),
25772 unnest_pos_col.clone(),
25773 )));
25774 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25775 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25776 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25777 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25778 this: and_cond,
25779 trailing_comments: Vec::new(),
25780 }));
25781 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25782
25783 where_conditions.push(or_cond);
25784 }
25785
25786 let where_expr = if where_conditions.len() == 1 {
25787 // Single condition: no parens needed
25788 where_conditions.into_iter().next().unwrap()
25789 } else {
25790 // Multiple conditions: wrap each OR in parens, then combine with AND
25791 let wrap = |e: Expression| {
25792 Expression::Paren(Box::new(crate::expressions::Paren {
25793 this: e,
25794 trailing_comments: Vec::new(),
25795 }))
25796 };
25797 let mut iter = where_conditions.into_iter();
25798 let first = wrap(iter.next().unwrap());
25799 let second = wrap(iter.next().unwrap());
25800 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25801 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25802 trailing_comments: Vec::new(),
25803 }));
25804 for cond in iter {
25805 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25806 }
25807 combined
25808 };
25809
25810 // Build the new SELECT
25811 let mut new_select = select.clone();
25812 new_select.expressions = new_select_exprs;
25813
25814 if new_select.from.is_some() {
25815 let mut all_joins = vec![make_join(series_alias_expr)];
25816 all_joins.extend(joins);
25817 new_select.joins.extend(all_joins);
25818 } else {
25819 new_select.from = Some(From {
25820 expressions: vec![series_alias_expr],
25821 });
25822 new_select.joins.extend(joins);
25823 }
25824
25825 if let Some(ref existing_where) = new_select.where_clause {
25826 let combined = Expression::And(Box::new(BinaryOp::new(
25827 existing_where.this.clone(),
25828 where_expr,
25829 )));
25830 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25831 } else {
25832 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25833 }
25834
25835 Some(new_select)
25836 }
25837
25838 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25839 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25840 match original {
25841 Expression::Unnest(_) => replacement.clone(),
25842 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25843 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25844 Expression::Add(op) => {
25845 let left = Self::replace_unnest_with_if(&op.left, replacement);
25846 let right = Self::replace_unnest_with_if(&op.right, replacement);
25847 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25848 }
25849 Expression::Sub(op) => {
25850 let left = Self::replace_unnest_with_if(&op.left, replacement);
25851 let right = Self::replace_unnest_with_if(&op.right, replacement);
25852 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25853 }
25854 Expression::Mul(op) => {
25855 let left = Self::replace_unnest_with_if(&op.left, replacement);
25856 let right = Self::replace_unnest_with_if(&op.right, replacement);
25857 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25858 }
25859 Expression::Div(op) => {
25860 let left = Self::replace_unnest_with_if(&op.left, replacement);
25861 let right = Self::replace_unnest_with_if(&op.right, replacement);
25862 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25863 }
25864 _ => original.clone(),
25865 }
25866 }
25867
25868 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25869 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25870 fn decompose_json_path(path: &str) -> Vec<String> {
25871 let mut parts = Vec::new();
25872 let path = if path.starts_with("$.") {
25873 &path[2..]
25874 } else if path.starts_with('$') {
25875 &path[1..]
25876 } else {
25877 path
25878 };
25879 if path.is_empty() {
25880 return parts;
25881 }
25882 let mut current = String::new();
25883 let chars: Vec<char> = path.chars().collect();
25884 let mut i = 0;
25885 while i < chars.len() {
25886 match chars[i] {
25887 '.' => {
25888 if !current.is_empty() {
25889 parts.push(current.clone());
25890 current.clear();
25891 }
25892 i += 1;
25893 }
25894 '[' => {
25895 if !current.is_empty() {
25896 parts.push(current.clone());
25897 current.clear();
25898 }
25899 i += 1;
25900 let mut bracket_content = String::new();
25901 while i < chars.len() && chars[i] != ']' {
25902 if chars[i] == '"' || chars[i] == '\'' {
25903 let quote = chars[i];
25904 i += 1;
25905 while i < chars.len() && chars[i] != quote {
25906 bracket_content.push(chars[i]);
25907 i += 1;
25908 }
25909 if i < chars.len() {
25910 i += 1;
25911 }
25912 } else {
25913 bracket_content.push(chars[i]);
25914 i += 1;
25915 }
25916 }
25917 if i < chars.len() {
25918 i += 1;
25919 }
25920 if bracket_content != "*" {
25921 parts.push(bracket_content);
25922 }
25923 }
25924 _ => {
25925 current.push(chars[i]);
25926 i += 1;
25927 }
25928 }
25929 }
25930 if !current.is_empty() {
25931 parts.push(current);
25932 }
25933 parts
25934 }
25935
25936 /// Strip `$` prefix from a JSON path, keeping the rest.
25937 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25938 fn strip_json_dollar_prefix(path: &str) -> String {
25939 if path.starts_with("$.") {
25940 path[2..].to_string()
25941 } else if path.starts_with('$') {
25942 path[1..].to_string()
25943 } else {
25944 path.to_string()
25945 }
25946 }
25947
25948 /// Strip `[*]` wildcards from a JSON path.
25949 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25950 fn strip_json_wildcards(path: &str) -> String {
25951 path.replace("[*]", "")
25952 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25953 .trim_end_matches('.')
25954 .to_string()
25955 }
25956
25957 /// Convert bracket notation to dot notation for JSON paths.
25958 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25959 fn bracket_to_dot_notation(path: &str) -> String {
25960 let mut result = String::new();
25961 let chars: Vec<char> = path.chars().collect();
25962 let mut i = 0;
25963 while i < chars.len() {
25964 if chars[i] == '[' {
25965 // Read bracket content
25966 i += 1;
25967 let mut bracket_content = String::new();
25968 let mut is_quoted = false;
25969 let mut _quote_char = '"';
25970 while i < chars.len() && chars[i] != ']' {
25971 if chars[i] == '"' || chars[i] == '\'' {
25972 is_quoted = true;
25973 _quote_char = chars[i];
25974 i += 1;
25975 while i < chars.len() && chars[i] != _quote_char {
25976 bracket_content.push(chars[i]);
25977 i += 1;
25978 }
25979 if i < chars.len() {
25980 i += 1;
25981 }
25982 } else {
25983 bracket_content.push(chars[i]);
25984 i += 1;
25985 }
25986 }
25987 if i < chars.len() {
25988 i += 1;
25989 } // skip ]
25990 if bracket_content == "*" {
25991 // Keep wildcard as-is
25992 result.push_str("[*]");
25993 } else if is_quoted {
25994 // Quoted bracket -> dot notation with quotes
25995 result.push('.');
25996 result.push('"');
25997 result.push_str(&bracket_content);
25998 result.push('"');
25999 } else {
26000 // Numeric index -> keep as bracket
26001 result.push('[');
26002 result.push_str(&bracket_content);
26003 result.push(']');
26004 }
26005 } else {
26006 result.push(chars[i]);
26007 i += 1;
26008 }
26009 }
26010 result
26011 }
26012
26013 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
26014 /// `$["a b"]` -> `$['a b']`
26015 fn bracket_to_single_quotes(path: &str) -> String {
26016 let mut result = String::new();
26017 let chars: Vec<char> = path.chars().collect();
26018 let mut i = 0;
26019 while i < chars.len() {
26020 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
26021 result.push('[');
26022 result.push('\'');
26023 i += 2; // skip [ and "
26024 while i < chars.len() && chars[i] != '"' {
26025 result.push(chars[i]);
26026 i += 1;
26027 }
26028 if i < chars.len() {
26029 i += 1;
26030 } // skip closing "
26031 result.push('\'');
26032 } else {
26033 result.push(chars[i]);
26034 i += 1;
26035 }
26036 }
26037 result
26038 }
26039
26040 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
26041 /// or PostgreSQL #temp -> TEMPORARY.
26042 /// Also strips # from INSERT INTO #table for non-TSQL targets.
26043 fn transform_select_into(
26044 expr: Expression,
26045 _source: DialectType,
26046 target: DialectType,
26047 ) -> Expression {
26048 use crate::expressions::{CreateTable, Expression, TableRef};
26049
26050 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
26051 if let Expression::Insert(ref insert) = expr {
26052 if insert.table.name.name.starts_with('#')
26053 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
26054 {
26055 let mut new_insert = insert.clone();
26056 new_insert.table.name.name =
26057 insert.table.name.name.trim_start_matches('#').to_string();
26058 return Expression::Insert(new_insert);
26059 }
26060 return expr;
26061 }
26062
26063 if let Expression::Select(ref select) = expr {
26064 if let Some(ref into) = select.into {
26065 let table_name_raw = match &into.this {
26066 Expression::Table(tr) => tr.name.name.clone(),
26067 Expression::Identifier(id) => id.name.clone(),
26068 _ => String::new(),
26069 };
26070 let is_temp = table_name_raw.starts_with('#') || into.temporary;
26071 let clean_name = table_name_raw.trim_start_matches('#').to_string();
26072
26073 match target {
26074 DialectType::DuckDB | DialectType::Snowflake => {
26075 // SELECT INTO -> CREATE TABLE AS SELECT
26076 let mut new_select = select.clone();
26077 new_select.into = None;
26078 let ct = CreateTable {
26079 name: TableRef::new(clean_name),
26080 on_cluster: None,
26081 columns: Vec::new(),
26082 constraints: Vec::new(),
26083 if_not_exists: false,
26084 temporary: is_temp,
26085 or_replace: false,
26086 table_modifier: None,
26087 as_select: Some(Expression::Select(new_select)),
26088 as_select_parenthesized: false,
26089 on_commit: None,
26090 clone_source: None,
26091 clone_at_clause: None,
26092 shallow_clone: false,
26093 is_copy: false,
26094 leading_comments: Vec::new(),
26095 with_properties: Vec::new(),
26096 teradata_post_name_options: Vec::new(),
26097 with_data: None,
26098 with_statistics: None,
26099 teradata_indexes: Vec::new(),
26100 with_cte: None,
26101 properties: Vec::new(),
26102 partition_of: None,
26103 post_table_properties: Vec::new(),
26104 mysql_table_options: Vec::new(),
26105 inherits: Vec::new(),
26106 on_property: None,
26107 copy_grants: false,
26108 using_template: None,
26109 rollup: None,
26110 };
26111 return Expression::CreateTable(Box::new(ct));
26112 }
26113 DialectType::PostgreSQL | DialectType::Redshift => {
26114 // PostgreSQL: #foo -> INTO TEMPORARY foo
26115 if is_temp && !into.temporary {
26116 let mut new_select = select.clone();
26117 let mut new_into = into.clone();
26118 new_into.temporary = true;
26119 new_into.unlogged = false;
26120 new_into.this = Expression::Table(TableRef::new(clean_name));
26121 new_select.into = Some(new_into);
26122 Expression::Select(new_select)
26123 } else {
26124 expr
26125 }
26126 }
26127 _ => expr,
26128 }
26129 } else {
26130 expr
26131 }
26132 } else {
26133 expr
26134 }
26135 }
26136
26137 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
26138 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
26139 fn transform_create_table_properties(
26140 ct: &mut crate::expressions::CreateTable,
26141 _source: DialectType,
26142 target: DialectType,
26143 ) {
26144 use crate::expressions::{
26145 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
26146 Properties,
26147 };
26148
26149 // Helper to convert a raw property value string to the correct Expression
26150 let value_to_expr = |v: &str| -> Expression {
26151 let trimmed = v.trim();
26152 // Check if it's a quoted string (starts and ends with ')
26153 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
26154 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
26155 }
26156 // Check if it's a number
26157 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
26158 Expression::Literal(Literal::Number(trimmed.to_string()))
26159 }
26160 // Check if it's ARRAY[...] or ARRAY(...)
26161 else if trimmed.to_uppercase().starts_with("ARRAY") {
26162 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
26163 let inner = trimmed
26164 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
26165 .trim_start_matches('[')
26166 .trim_start_matches('(')
26167 .trim_end_matches(']')
26168 .trim_end_matches(')');
26169 let elements: Vec<Expression> = inner
26170 .split(',')
26171 .map(|e| {
26172 let elem = e.trim().trim_matches('\'');
26173 Expression::Literal(Literal::String(elem.to_string()))
26174 })
26175 .collect();
26176 Expression::Function(Box::new(crate::expressions::Function::new(
26177 "ARRAY".to_string(),
26178 elements,
26179 )))
26180 }
26181 // Otherwise, just output as identifier (unquoted)
26182 else {
26183 Expression::Identifier(Identifier::new(trimmed.to_string()))
26184 }
26185 };
26186
26187 if ct.with_properties.is_empty() && ct.properties.is_empty() {
26188 return;
26189 }
26190
26191 // Handle Presto-style WITH properties
26192 if !ct.with_properties.is_empty() {
26193 // Extract FORMAT property and remaining properties
26194 let mut format_value: Option<String> = None;
26195 let mut partitioned_by: Option<String> = None;
26196 let mut other_props: Vec<(String, String)> = Vec::new();
26197
26198 for (key, value) in ct.with_properties.drain(..) {
26199 let key_upper = key.to_uppercase();
26200 if key_upper == "FORMAT" {
26201 // Strip surrounding quotes from value if present
26202 format_value = Some(value.trim_matches('\'').to_string());
26203 } else if key_upper == "PARTITIONED_BY" {
26204 partitioned_by = Some(value);
26205 } else {
26206 other_props.push((key, value));
26207 }
26208 }
26209
26210 match target {
26211 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26212 // Presto: keep WITH properties but lowercase 'format' key
26213 if let Some(fmt) = format_value {
26214 ct.with_properties
26215 .push(("format".to_string(), format!("'{}'", fmt)));
26216 }
26217 if let Some(part) = partitioned_by {
26218 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
26219 let trimmed = part.trim();
26220 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
26221 // Also handle ARRAY['...'] format - keep as-is
26222 if trimmed.to_uppercase().starts_with("ARRAY") {
26223 ct.with_properties
26224 .push(("PARTITIONED_BY".to_string(), part));
26225 } else {
26226 // Parse column names from the parenthesized list
26227 let cols: Vec<&str> = inner
26228 .split(',')
26229 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
26230 .collect();
26231 let array_val = format!(
26232 "ARRAY[{}]",
26233 cols.iter()
26234 .map(|c| format!("'{}'", c))
26235 .collect::<Vec<_>>()
26236 .join(", ")
26237 );
26238 ct.with_properties
26239 .push(("PARTITIONED_BY".to_string(), array_val));
26240 }
26241 }
26242 ct.with_properties.extend(other_props);
26243 }
26244 DialectType::Hive => {
26245 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
26246 if let Some(fmt) = format_value {
26247 ct.properties.push(Expression::FileFormatProperty(Box::new(
26248 FileFormatProperty {
26249 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
26250 expressions: vec![],
26251 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
26252 value: true,
26253 }))),
26254 },
26255 )));
26256 }
26257 if let Some(_part) = partitioned_by {
26258 // PARTITIONED_BY handling is complex - move columns to partitioned by
26259 // For now, the partition columns are extracted from the column list
26260 Self::apply_partitioned_by(ct, &_part, target);
26261 }
26262 if !other_props.is_empty() {
26263 let eq_exprs: Vec<Expression> = other_props
26264 .into_iter()
26265 .map(|(k, v)| {
26266 Expression::Eq(Box::new(BinaryOp::new(
26267 Expression::Literal(Literal::String(k)),
26268 value_to_expr(&v),
26269 )))
26270 })
26271 .collect();
26272 ct.properties
26273 .push(Expression::Properties(Box::new(Properties {
26274 expressions: eq_exprs,
26275 })));
26276 }
26277 }
26278 DialectType::Spark | DialectType::Databricks => {
26279 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
26280 if let Some(fmt) = format_value {
26281 ct.properties.push(Expression::FileFormatProperty(Box::new(
26282 FileFormatProperty {
26283 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
26284 expressions: vec![],
26285 hive_format: None, // None means USING syntax
26286 },
26287 )));
26288 }
26289 if let Some(_part) = partitioned_by {
26290 Self::apply_partitioned_by(ct, &_part, target);
26291 }
26292 if !other_props.is_empty() {
26293 let eq_exprs: Vec<Expression> = other_props
26294 .into_iter()
26295 .map(|(k, v)| {
26296 Expression::Eq(Box::new(BinaryOp::new(
26297 Expression::Literal(Literal::String(k)),
26298 value_to_expr(&v),
26299 )))
26300 })
26301 .collect();
26302 ct.properties
26303 .push(Expression::Properties(Box::new(Properties {
26304 expressions: eq_exprs,
26305 })));
26306 }
26307 }
26308 DialectType::DuckDB => {
26309 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
26310 // Keep nothing
26311 }
26312 _ => {
26313 // For other dialects, keep WITH properties as-is
26314 if let Some(fmt) = format_value {
26315 ct.with_properties
26316 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
26317 }
26318 if let Some(part) = partitioned_by {
26319 ct.with_properties
26320 .push(("PARTITIONED_BY".to_string(), part));
26321 }
26322 ct.with_properties.extend(other_props);
26323 }
26324 }
26325 }
26326
26327 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
26328 // and Hive STORED AS -> Presto WITH (format=...) conversion
26329 if !ct.properties.is_empty() {
26330 let is_presto_target = matches!(
26331 target,
26332 DialectType::Presto | DialectType::Trino | DialectType::Athena
26333 );
26334 let is_duckdb_target = matches!(target, DialectType::DuckDB);
26335
26336 if is_presto_target || is_duckdb_target {
26337 let mut new_properties = Vec::new();
26338 for prop in ct.properties.drain(..) {
26339 match &prop {
26340 Expression::FileFormatProperty(ffp) => {
26341 if is_presto_target {
26342 // Convert STORED AS/USING to WITH (format=...)
26343 if let Some(ref fmt_expr) = ffp.this {
26344 let fmt_str = match fmt_expr.as_ref() {
26345 Expression::Identifier(id) => id.name.clone(),
26346 Expression::Literal(Literal::String(s)) => s.clone(),
26347 _ => {
26348 new_properties.push(prop);
26349 continue;
26350 }
26351 };
26352 ct.with_properties
26353 .push(("format".to_string(), format!("'{}'", fmt_str)));
26354 }
26355 }
26356 // DuckDB: just strip file format properties
26357 }
26358 // Convert TBLPROPERTIES to WITH properties for Presto target
26359 Expression::Properties(props) if is_presto_target => {
26360 for expr in &props.expressions {
26361 if let Expression::Eq(eq) = expr {
26362 // Extract key and value from the Eq expression
26363 let key = match &eq.left {
26364 Expression::Literal(Literal::String(s)) => s.clone(),
26365 Expression::Identifier(id) => id.name.clone(),
26366 _ => continue,
26367 };
26368 let value = match &eq.right {
26369 Expression::Literal(Literal::String(s)) => {
26370 format!("'{}'", s)
26371 }
26372 Expression::Literal(Literal::Number(n)) => n.clone(),
26373 Expression::Identifier(id) => id.name.clone(),
26374 _ => continue,
26375 };
26376 ct.with_properties.push((key, value));
26377 }
26378 }
26379 }
26380 // Convert PartitionedByProperty for Presto target
26381 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
26382 // Check if it contains ColumnDef expressions (Hive-style with types)
26383 if let Expression::Tuple(ref tuple) = *pbp.this {
26384 let mut col_names: Vec<String> = Vec::new();
26385 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
26386 let mut has_col_defs = false;
26387 for expr in &tuple.expressions {
26388 if let Expression::ColumnDef(ref cd) = expr {
26389 has_col_defs = true;
26390 col_names.push(cd.name.name.clone());
26391 col_defs.push(*cd.clone());
26392 } else if let Expression::Column(ref col) = expr {
26393 col_names.push(col.name.name.clone());
26394 } else if let Expression::Identifier(ref id) = expr {
26395 col_names.push(id.name.clone());
26396 } else {
26397 // For function expressions like MONTHS(y), serialize to SQL
26398 let generic = Dialect::get(DialectType::Generic);
26399 if let Ok(sql) = generic.generate(expr) {
26400 col_names.push(sql);
26401 }
26402 }
26403 }
26404 if has_col_defs {
26405 // Merge partition column defs into the main column list
26406 for cd in col_defs {
26407 ct.columns.push(cd);
26408 }
26409 }
26410 if !col_names.is_empty() {
26411 // Add PARTITIONED_BY property
26412 let array_val = format!(
26413 "ARRAY[{}]",
26414 col_names
26415 .iter()
26416 .map(|n| format!("'{}'", n))
26417 .collect::<Vec<_>>()
26418 .join(", ")
26419 );
26420 ct.with_properties
26421 .push(("PARTITIONED_BY".to_string(), array_val));
26422 }
26423 }
26424 // Skip - don't keep in properties
26425 }
26426 _ => {
26427 if !is_duckdb_target {
26428 new_properties.push(prop);
26429 }
26430 }
26431 }
26432 }
26433 ct.properties = new_properties;
26434 } else {
26435 // For Hive/Spark targets, unquote format names in STORED AS
26436 for prop in &mut ct.properties {
26437 if let Expression::FileFormatProperty(ref mut ffp) = prop {
26438 if let Some(ref mut fmt_expr) = ffp.this {
26439 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
26440 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
26441 let unquoted = s.clone();
26442 *fmt_expr =
26443 Box::new(Expression::Identifier(Identifier::new(unquoted)));
26444 }
26445 }
26446 }
26447 }
26448 }
26449 }
26450 }
26451
26452 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
26453 fn apply_partitioned_by(
26454 ct: &mut crate::expressions::CreateTable,
26455 partitioned_by_value: &str,
26456 target: DialectType,
26457 ) {
26458 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
26459
26460 // Parse the ARRAY['col1', 'col2'] value to extract column names
26461 let mut col_names: Vec<String> = Vec::new();
26462 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
26463 let inner = partitioned_by_value
26464 .trim()
26465 .trim_start_matches("ARRAY")
26466 .trim_start_matches('[')
26467 .trim_start_matches('(')
26468 .trim_end_matches(']')
26469 .trim_end_matches(')');
26470 for part in inner.split(',') {
26471 let col = part.trim().trim_matches('\'').trim_matches('"');
26472 if !col.is_empty() {
26473 col_names.push(col.to_string());
26474 }
26475 }
26476
26477 if col_names.is_empty() {
26478 return;
26479 }
26480
26481 if matches!(target, DialectType::Hive) {
26482 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
26483 let mut partition_col_defs = Vec::new();
26484 for col_name in &col_names {
26485 // Find and remove from columns
26486 if let Some(pos) = ct
26487 .columns
26488 .iter()
26489 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
26490 {
26491 let col_def = ct.columns.remove(pos);
26492 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
26493 }
26494 }
26495 if !partition_col_defs.is_empty() {
26496 ct.properties
26497 .push(Expression::PartitionedByProperty(Box::new(
26498 PartitionedByProperty {
26499 this: Box::new(Expression::Tuple(Box::new(Tuple {
26500 expressions: partition_col_defs,
26501 }))),
26502 },
26503 )));
26504 }
26505 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26506 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26507 // Use quoted identifiers to match the quoting style of the original column definitions
26508 let partition_exprs: Vec<Expression> = col_names
26509 .iter()
26510 .map(|name| {
26511 // Check if the column exists in the column list and use its quoting
26512 let is_quoted = ct
26513 .columns
26514 .iter()
26515 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26516 let ident = if is_quoted {
26517 Identifier::quoted(name.clone())
26518 } else {
26519 Identifier::new(name.clone())
26520 };
26521 Expression::Column(Column {
26522 name: ident,
26523 table: None,
26524 join_mark: false,
26525 trailing_comments: Vec::new(),
26526 span: None,
26527 inferred_type: None,
26528 })
26529 })
26530 .collect();
26531 ct.properties
26532 .push(Expression::PartitionedByProperty(Box::new(
26533 PartitionedByProperty {
26534 this: Box::new(Expression::Tuple(Box::new(Tuple {
26535 expressions: partition_exprs,
26536 }))),
26537 },
26538 )));
26539 }
26540 // DuckDB: strip partitioned_by entirely (already handled)
26541 }
26542
26543 /// Convert a DataType to Spark's type string format (using angle brackets)
26544 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26545 use crate::expressions::DataType;
26546 match dt {
26547 DataType::Int { .. } => "INT".to_string(),
26548 DataType::BigInt { .. } => "BIGINT".to_string(),
26549 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26550 DataType::TinyInt { .. } => "TINYINT".to_string(),
26551 DataType::Float { .. } => "FLOAT".to_string(),
26552 DataType::Double { .. } => "DOUBLE".to_string(),
26553 DataType::Decimal {
26554 precision: Some(p),
26555 scale: Some(s),
26556 } => format!("DECIMAL({}, {})", p, s),
26557 DataType::Decimal {
26558 precision: Some(p), ..
26559 } => format!("DECIMAL({})", p),
26560 DataType::Decimal { .. } => "DECIMAL".to_string(),
26561 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26562 "STRING".to_string()
26563 }
26564 DataType::Char { .. } => "STRING".to_string(),
26565 DataType::Boolean => "BOOLEAN".to_string(),
26566 DataType::Date => "DATE".to_string(),
26567 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26568 DataType::Json | DataType::JsonB => "STRING".to_string(),
26569 DataType::Binary { .. } => "BINARY".to_string(),
26570 DataType::Array { element_type, .. } => {
26571 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26572 }
26573 DataType::Map {
26574 key_type,
26575 value_type,
26576 } => format!(
26577 "MAP<{}, {}>",
26578 Self::data_type_to_spark_string(key_type),
26579 Self::data_type_to_spark_string(value_type)
26580 ),
26581 DataType::Struct { fields, .. } => {
26582 let field_strs: Vec<String> = fields
26583 .iter()
26584 .map(|f| {
26585 if f.name.is_empty() {
26586 Self::data_type_to_spark_string(&f.data_type)
26587 } else {
26588 format!(
26589 "{}: {}",
26590 f.name,
26591 Self::data_type_to_spark_string(&f.data_type)
26592 )
26593 }
26594 })
26595 .collect();
26596 format!("STRUCT<{}>", field_strs.join(", "))
26597 }
26598 DataType::Custom { name } => name.clone(),
26599 _ => format!("{:?}", dt),
26600 }
26601 }
26602
26603 /// Extract value and unit from an Interval expression
26604 /// Returns (value_expression, IntervalUnit)
26605 fn extract_interval_parts(
26606 interval_expr: &Expression,
26607 ) -> (Expression, crate::expressions::IntervalUnit) {
26608 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26609
26610 if let Expression::Interval(iv) = interval_expr {
26611 let val = iv.this.clone().unwrap_or(Expression::number(0));
26612 let unit = match &iv.unit {
26613 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26614 None => {
26615 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26616 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26617 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26618 if parts.len() == 2 {
26619 let unit_str = parts[1].trim().to_uppercase();
26620 let parsed_unit = match unit_str.as_str() {
26621 "YEAR" | "YEARS" => IntervalUnit::Year,
26622 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26623 "MONTH" | "MONTHS" => IntervalUnit::Month,
26624 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26625 "DAY" | "DAYS" => IntervalUnit::Day,
26626 "HOUR" | "HOURS" => IntervalUnit::Hour,
26627 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26628 "SECOND" | "SECONDS" => IntervalUnit::Second,
26629 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26630 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26631 _ => IntervalUnit::Day,
26632 };
26633 // Return just the numeric part as value and parsed unit
26634 return (
26635 Expression::Literal(crate::expressions::Literal::String(
26636 parts[0].to_string(),
26637 )),
26638 parsed_unit,
26639 );
26640 }
26641 IntervalUnit::Day
26642 } else {
26643 IntervalUnit::Day
26644 }
26645 }
26646 _ => IntervalUnit::Day,
26647 };
26648 (val, unit)
26649 } else {
26650 // Not an interval - pass through
26651 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26652 }
26653 }
26654
26655 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26656 fn normalize_bigquery_function(
26657 e: Expression,
26658 source: DialectType,
26659 target: DialectType,
26660 ) -> Result<Expression> {
26661 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26662
26663 let f = if let Expression::Function(f) = e {
26664 *f
26665 } else {
26666 return Ok(e);
26667 };
26668 let name = f.name.to_uppercase();
26669 let mut args = f.args;
26670
26671 /// Helper to extract unit string from an identifier, column, or literal expression
26672 fn get_unit_str(expr: &Expression) -> String {
26673 match expr {
26674 Expression::Identifier(id) => id.name.to_uppercase(),
26675 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26676 Expression::Column(col) => col.name.name.to_uppercase(),
26677 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26678 Expression::Function(f) => {
26679 let base = f.name.to_uppercase();
26680 if !f.args.is_empty() {
26681 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26682 let inner = get_unit_str(&f.args[0]);
26683 format!("{}({})", base, inner)
26684 } else {
26685 base
26686 }
26687 }
26688 _ => "DAY".to_string(),
26689 }
26690 }
26691
26692 /// Parse unit string to IntervalUnit
26693 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26694 match s {
26695 "YEAR" => crate::expressions::IntervalUnit::Year,
26696 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26697 "MONTH" => crate::expressions::IntervalUnit::Month,
26698 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26699 "DAY" => crate::expressions::IntervalUnit::Day,
26700 "HOUR" => crate::expressions::IntervalUnit::Hour,
26701 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26702 "SECOND" => crate::expressions::IntervalUnit::Second,
26703 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26704 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26705 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26706 _ => crate::expressions::IntervalUnit::Day,
26707 }
26708 }
26709
26710 match name.as_str() {
26711 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26712 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26713 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26714 let date1 = args.remove(0);
26715 let date2 = args.remove(0);
26716 let unit_expr = args.remove(0);
26717 let unit_str = get_unit_str(&unit_expr);
26718
26719 if matches!(target, DialectType::BigQuery) {
26720 // BigQuery -> BigQuery: just uppercase the unit
26721 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26722 return Ok(Expression::Function(Box::new(Function::new(
26723 f.name,
26724 vec![date1, date2, unit],
26725 ))));
26726 }
26727
26728 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26729 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26730 if matches!(target, DialectType::Snowflake) {
26731 return Ok(Expression::TimestampDiff(Box::new(
26732 crate::expressions::TimestampDiff {
26733 this: Box::new(date2),
26734 expression: Box::new(date1),
26735 unit: Some(unit_str),
26736 },
26737 )));
26738 }
26739
26740 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26741 if matches!(target, DialectType::DuckDB) {
26742 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26743 // CAST to TIME
26744 let cast_fn = |e: Expression| -> Expression {
26745 match e {
26746 Expression::Literal(Literal::String(s)) => {
26747 Expression::Cast(Box::new(Cast {
26748 this: Expression::Literal(Literal::String(s)),
26749 to: DataType::Custom {
26750 name: "TIME".to_string(),
26751 },
26752 trailing_comments: vec![],
26753 double_colon_syntax: false,
26754 format: None,
26755 default: None,
26756 inferred_type: None,
26757 }))
26758 }
26759 other => other,
26760 }
26761 };
26762 (cast_fn(date1), cast_fn(date2))
26763 } else if name == "DATETIME_DIFF" {
26764 // CAST to TIMESTAMP
26765 (
26766 Self::ensure_cast_timestamp(date1),
26767 Self::ensure_cast_timestamp(date2),
26768 )
26769 } else {
26770 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26771 (
26772 Self::ensure_cast_timestamptz(date1),
26773 Self::ensure_cast_timestamptz(date2),
26774 )
26775 };
26776 return Ok(Expression::Function(Box::new(Function::new(
26777 "DATE_DIFF".to_string(),
26778 vec![
26779 Expression::Literal(Literal::String(unit_str)),
26780 cast_d2,
26781 cast_d1,
26782 ],
26783 ))));
26784 }
26785
26786 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26787 let unit = Expression::Identifier(Identifier::new(unit_str));
26788 Ok(Expression::Function(Box::new(Function::new(
26789 "TIMESTAMPDIFF".to_string(),
26790 vec![unit, date2, date1],
26791 ))))
26792 }
26793
26794 // DATEDIFF(unit, start, end) -> target-specific form
26795 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26796 "DATEDIFF" if args.len() == 3 => {
26797 let arg0 = args.remove(0);
26798 let arg1 = args.remove(0);
26799 let arg2 = args.remove(0);
26800 let unit_str = get_unit_str(&arg0);
26801
26802 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26803 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26804 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26805
26806 if matches!(target, DialectType::Snowflake) {
26807 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26808 let unit = Expression::Identifier(Identifier::new(unit_str));
26809 return Ok(Expression::Function(Box::new(Function::new(
26810 "DATEDIFF".to_string(),
26811 vec![unit, arg1, arg2],
26812 ))));
26813 }
26814
26815 if matches!(target, DialectType::DuckDB) {
26816 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26817 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26818 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26819 return Ok(Expression::Function(Box::new(Function::new(
26820 "DATE_DIFF".to_string(),
26821 vec![
26822 Expression::Literal(Literal::String(unit_str)),
26823 cast_d1,
26824 cast_d2,
26825 ],
26826 ))));
26827 }
26828
26829 if matches!(target, DialectType::BigQuery) {
26830 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26831 let cast_d1 = Self::ensure_cast_datetime(arg1);
26832 let cast_d2 = Self::ensure_cast_datetime(arg2);
26833 let unit = Expression::Identifier(Identifier::new(unit_str));
26834 return Ok(Expression::Function(Box::new(Function::new(
26835 "DATE_DIFF".to_string(),
26836 vec![cast_d2, cast_d1, unit],
26837 ))));
26838 }
26839
26840 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26841 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26842 let unit = Expression::Identifier(Identifier::new(unit_str));
26843 return Ok(Expression::Function(Box::new(Function::new(
26844 "DATEDIFF".to_string(),
26845 vec![unit, arg1, arg2],
26846 ))));
26847 }
26848
26849 if matches!(target, DialectType::Hive) {
26850 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26851 match unit_str.as_str() {
26852 "MONTH" => {
26853 return Ok(Expression::Function(Box::new(Function::new(
26854 "CAST".to_string(),
26855 vec![Expression::Function(Box::new(Function::new(
26856 "MONTHS_BETWEEN".to_string(),
26857 vec![arg2, arg1],
26858 )))],
26859 ))));
26860 }
26861 "WEEK" => {
26862 return Ok(Expression::Cast(Box::new(Cast {
26863 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26864 Expression::Function(Box::new(Function::new(
26865 "DATEDIFF".to_string(),
26866 vec![arg2, arg1],
26867 ))),
26868 Expression::Literal(Literal::Number("7".to_string())),
26869 ))),
26870 to: DataType::Int {
26871 length: None,
26872 integer_spelling: false,
26873 },
26874 trailing_comments: vec![],
26875 double_colon_syntax: false,
26876 format: None,
26877 default: None,
26878 inferred_type: None,
26879 })));
26880 }
26881 _ => {
26882 // Default: DATEDIFF(end, start) for DAY
26883 return Ok(Expression::Function(Box::new(Function::new(
26884 "DATEDIFF".to_string(),
26885 vec![arg2, arg1],
26886 ))));
26887 }
26888 }
26889 }
26890
26891 if matches!(
26892 target,
26893 DialectType::Presto | DialectType::Trino | DialectType::Athena
26894 ) {
26895 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26896 return Ok(Expression::Function(Box::new(Function::new(
26897 "DATE_DIFF".to_string(),
26898 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26899 ))));
26900 }
26901
26902 if matches!(target, DialectType::TSQL) {
26903 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26904 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26905 let unit = Expression::Identifier(Identifier::new(unit_str));
26906 return Ok(Expression::Function(Box::new(Function::new(
26907 "DATEDIFF".to_string(),
26908 vec![unit, arg1, cast_d2],
26909 ))));
26910 }
26911
26912 if matches!(target, DialectType::PostgreSQL) {
26913 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26914 // For now, use DATEDIFF (passthrough) with uppercased unit
26915 let unit = Expression::Identifier(Identifier::new(unit_str));
26916 return Ok(Expression::Function(Box::new(Function::new(
26917 "DATEDIFF".to_string(),
26918 vec![unit, arg1, arg2],
26919 ))));
26920 }
26921
26922 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26923 let unit = Expression::Identifier(Identifier::new(unit_str));
26924 Ok(Expression::Function(Box::new(Function::new(
26925 "DATEDIFF".to_string(),
26926 vec![unit, arg1, arg2],
26927 ))))
26928 }
26929
26930 // DATE_DIFF(date1, date2, unit) -> standard form
26931 "DATE_DIFF" if args.len() == 3 => {
26932 let date1 = args.remove(0);
26933 let date2 = args.remove(0);
26934 let unit_expr = args.remove(0);
26935 let unit_str = get_unit_str(&unit_expr);
26936
26937 if matches!(target, DialectType::BigQuery) {
26938 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26939 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26940 "WEEK".to_string()
26941 } else {
26942 unit_str
26943 };
26944 let norm_d1 = Self::date_literal_to_cast(date1);
26945 let norm_d2 = Self::date_literal_to_cast(date2);
26946 let unit = Expression::Identifier(Identifier::new(norm_unit));
26947 return Ok(Expression::Function(Box::new(Function::new(
26948 f.name,
26949 vec![norm_d1, norm_d2, unit],
26950 ))));
26951 }
26952
26953 if matches!(target, DialectType::MySQL) {
26954 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26955 let norm_d1 = Self::date_literal_to_cast(date1);
26956 let norm_d2 = Self::date_literal_to_cast(date2);
26957 return Ok(Expression::Function(Box::new(Function::new(
26958 "DATEDIFF".to_string(),
26959 vec![norm_d1, norm_d2],
26960 ))));
26961 }
26962
26963 if matches!(target, DialectType::StarRocks) {
26964 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26965 let norm_d1 = Self::date_literal_to_cast(date1);
26966 let norm_d2 = Self::date_literal_to_cast(date2);
26967 return Ok(Expression::Function(Box::new(Function::new(
26968 "DATE_DIFF".to_string(),
26969 vec![
26970 Expression::Literal(Literal::String(unit_str)),
26971 norm_d1,
26972 norm_d2,
26973 ],
26974 ))));
26975 }
26976
26977 if matches!(target, DialectType::DuckDB) {
26978 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26979 let norm_d1 = Self::ensure_cast_date(date1);
26980 let norm_d2 = Self::ensure_cast_date(date2);
26981
26982 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26983 let is_week_variant = unit_str == "WEEK"
26984 || unit_str.starts_with("WEEK(")
26985 || unit_str == "ISOWEEK";
26986 if is_week_variant {
26987 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26988 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26989 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26990 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26991 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26992 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26993 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26994 Some("1") // Shift Sunday to Monday alignment
26995 } else if unit_str == "WEEK(SATURDAY)" {
26996 Some("-5")
26997 } else if unit_str == "WEEK(TUESDAY)" {
26998 Some("-1")
26999 } else if unit_str == "WEEK(WEDNESDAY)" {
27000 Some("-2")
27001 } else if unit_str == "WEEK(THURSDAY)" {
27002 Some("-3")
27003 } else if unit_str == "WEEK(FRIDAY)" {
27004 Some("-4")
27005 } else {
27006 Some("1") // default to Sunday
27007 };
27008
27009 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
27010 let shifted = if let Some(off) = offset {
27011 let interval =
27012 Expression::Interval(Box::new(crate::expressions::Interval {
27013 this: Some(Expression::Literal(Literal::String(
27014 off.to_string(),
27015 ))),
27016 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27017 unit: crate::expressions::IntervalUnit::Day,
27018 use_plural: false,
27019 }),
27020 }));
27021 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
27022 date, interval,
27023 )))
27024 } else {
27025 date
27026 };
27027 Expression::Function(Box::new(Function::new(
27028 "DATE_TRUNC".to_string(),
27029 vec![
27030 Expression::Literal(Literal::String("WEEK".to_string())),
27031 shifted,
27032 ],
27033 )))
27034 };
27035
27036 let trunc_d2 = make_trunc(norm_d2, day_offset);
27037 let trunc_d1 = make_trunc(norm_d1, day_offset);
27038 return Ok(Expression::Function(Box::new(Function::new(
27039 "DATE_DIFF".to_string(),
27040 vec![
27041 Expression::Literal(Literal::String("WEEK".to_string())),
27042 trunc_d2,
27043 trunc_d1,
27044 ],
27045 ))));
27046 }
27047
27048 return Ok(Expression::Function(Box::new(Function::new(
27049 "DATE_DIFF".to_string(),
27050 vec![
27051 Expression::Literal(Literal::String(unit_str)),
27052 norm_d2,
27053 norm_d1,
27054 ],
27055 ))));
27056 }
27057
27058 // Default: DATEDIFF(unit, date2, date1)
27059 let unit = Expression::Identifier(Identifier::new(unit_str));
27060 Ok(Expression::Function(Box::new(Function::new(
27061 "DATEDIFF".to_string(),
27062 vec![unit, date2, date1],
27063 ))))
27064 }
27065
27066 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
27067 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
27068 let ts = args.remove(0);
27069 let interval_expr = args.remove(0);
27070 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27071
27072 match target {
27073 DialectType::Snowflake => {
27074 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
27075 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
27076 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
27077 let unit_str = Self::interval_unit_to_string(&unit);
27078 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
27079 Ok(Expression::TimestampAdd(Box::new(
27080 crate::expressions::TimestampAdd {
27081 this: Box::new(val),
27082 expression: Box::new(cast_ts),
27083 unit: Some(unit_str),
27084 },
27085 )))
27086 }
27087 DialectType::Spark | DialectType::Databricks => {
27088 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
27089 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
27090 let interval =
27091 Expression::Interval(Box::new(crate::expressions::Interval {
27092 this: Some(val),
27093 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27094 unit,
27095 use_plural: false,
27096 }),
27097 }));
27098 Ok(Expression::Add(Box::new(
27099 crate::expressions::BinaryOp::new(ts, interval),
27100 )))
27101 } else if name == "DATETIME_ADD"
27102 && matches!(target, DialectType::Databricks)
27103 {
27104 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
27105 let unit_str = Self::interval_unit_to_string(&unit);
27106 Ok(Expression::Function(Box::new(Function::new(
27107 "TIMESTAMPADD".to_string(),
27108 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
27109 ))))
27110 } else {
27111 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
27112 let unit_str = Self::interval_unit_to_string(&unit);
27113 let cast_ts =
27114 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
27115 Self::maybe_cast_ts(ts)
27116 } else {
27117 ts
27118 };
27119 Ok(Expression::Function(Box::new(Function::new(
27120 "DATE_ADD".to_string(),
27121 vec![
27122 Expression::Identifier(Identifier::new(unit_str)),
27123 val,
27124 cast_ts,
27125 ],
27126 ))))
27127 }
27128 }
27129 DialectType::MySQL => {
27130 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
27131 let mysql_ts = if name.starts_with("TIMESTAMP") {
27132 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
27133 match &ts {
27134 Expression::Function(ref inner_f)
27135 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
27136 {
27137 // Already wrapped, keep as-is
27138 ts
27139 }
27140 _ => {
27141 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
27142 let unwrapped = match ts {
27143 Expression::Literal(Literal::Timestamp(s)) => {
27144 Expression::Literal(Literal::String(s))
27145 }
27146 other => other,
27147 };
27148 Expression::Function(Box::new(Function::new(
27149 "TIMESTAMP".to_string(),
27150 vec![unwrapped],
27151 )))
27152 }
27153 }
27154 } else {
27155 ts
27156 };
27157 Ok(Expression::DateAdd(Box::new(
27158 crate::expressions::DateAddFunc {
27159 this: mysql_ts,
27160 interval: val,
27161 unit,
27162 },
27163 )))
27164 }
27165 _ => {
27166 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
27167 let cast_ts = if matches!(target, DialectType::DuckDB) {
27168 if name == "DATETIME_ADD" {
27169 Self::ensure_cast_timestamp(ts)
27170 } else if name.starts_with("TIMESTAMP") {
27171 Self::maybe_cast_ts_to_tz(ts, &name)
27172 } else {
27173 ts
27174 }
27175 } else {
27176 ts
27177 };
27178 Ok(Expression::DateAdd(Box::new(
27179 crate::expressions::DateAddFunc {
27180 this: cast_ts,
27181 interval: val,
27182 unit,
27183 },
27184 )))
27185 }
27186 }
27187 }
27188
27189 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
27190 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
27191 let ts = args.remove(0);
27192 let interval_expr = args.remove(0);
27193 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27194
27195 match target {
27196 DialectType::Snowflake => {
27197 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
27198 let unit_str = Self::interval_unit_to_string(&unit);
27199 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
27200 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27201 val,
27202 Expression::Neg(Box::new(crate::expressions::UnaryOp {
27203 this: Expression::number(1),
27204 inferred_type: None,
27205 })),
27206 )));
27207 Ok(Expression::TimestampAdd(Box::new(
27208 crate::expressions::TimestampAdd {
27209 this: Box::new(neg_val),
27210 expression: Box::new(cast_ts),
27211 unit: Some(unit_str),
27212 },
27213 )))
27214 }
27215 DialectType::Spark | DialectType::Databricks => {
27216 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
27217 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
27218 {
27219 // Spark: ts - INTERVAL val UNIT
27220 let cast_ts = if name.starts_with("TIMESTAMP") {
27221 Self::maybe_cast_ts(ts)
27222 } else {
27223 ts
27224 };
27225 let interval =
27226 Expression::Interval(Box::new(crate::expressions::Interval {
27227 this: Some(val),
27228 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27229 unit,
27230 use_plural: false,
27231 }),
27232 }));
27233 Ok(Expression::Sub(Box::new(
27234 crate::expressions::BinaryOp::new(cast_ts, interval),
27235 )))
27236 } else {
27237 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
27238 let unit_str = Self::interval_unit_to_string(&unit);
27239 let neg_val =
27240 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27241 val,
27242 Expression::Neg(Box::new(crate::expressions::UnaryOp {
27243 this: Expression::number(1),
27244 inferred_type: None,
27245 })),
27246 )));
27247 Ok(Expression::Function(Box::new(Function::new(
27248 "TIMESTAMPADD".to_string(),
27249 vec![
27250 Expression::Identifier(Identifier::new(unit_str)),
27251 neg_val,
27252 ts,
27253 ],
27254 ))))
27255 }
27256 }
27257 DialectType::MySQL => {
27258 let mysql_ts = if name.starts_with("TIMESTAMP") {
27259 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
27260 match &ts {
27261 Expression::Function(ref inner_f)
27262 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
27263 {
27264 // Already wrapped, keep as-is
27265 ts
27266 }
27267 _ => {
27268 let unwrapped = match ts {
27269 Expression::Literal(Literal::Timestamp(s)) => {
27270 Expression::Literal(Literal::String(s))
27271 }
27272 other => other,
27273 };
27274 Expression::Function(Box::new(Function::new(
27275 "TIMESTAMP".to_string(),
27276 vec![unwrapped],
27277 )))
27278 }
27279 }
27280 } else {
27281 ts
27282 };
27283 Ok(Expression::DateSub(Box::new(
27284 crate::expressions::DateAddFunc {
27285 this: mysql_ts,
27286 interval: val,
27287 unit,
27288 },
27289 )))
27290 }
27291 _ => {
27292 let cast_ts = if matches!(target, DialectType::DuckDB) {
27293 if name == "DATETIME_SUB" {
27294 Self::ensure_cast_timestamp(ts)
27295 } else if name.starts_with("TIMESTAMP") {
27296 Self::maybe_cast_ts_to_tz(ts, &name)
27297 } else {
27298 ts
27299 }
27300 } else {
27301 ts
27302 };
27303 Ok(Expression::DateSub(Box::new(
27304 crate::expressions::DateAddFunc {
27305 this: cast_ts,
27306 interval: val,
27307 unit,
27308 },
27309 )))
27310 }
27311 }
27312 }
27313
27314 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
27315 "DATE_SUB" if args.len() == 2 => {
27316 let date = args.remove(0);
27317 let interval_expr = args.remove(0);
27318 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27319
27320 match target {
27321 DialectType::Databricks | DialectType::Spark => {
27322 // Databricks/Spark: DATE_ADD(date, -val)
27323 // Use DateAdd expression with negative val so it generates correctly
27324 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
27325 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
27326 // Instead, we directly output as a simple negated DateSub
27327 Ok(Expression::DateSub(Box::new(
27328 crate::expressions::DateAddFunc {
27329 this: date,
27330 interval: val,
27331 unit,
27332 },
27333 )))
27334 }
27335 DialectType::DuckDB => {
27336 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
27337 let cast_date = Self::ensure_cast_date(date);
27338 let interval =
27339 Expression::Interval(Box::new(crate::expressions::Interval {
27340 this: Some(val),
27341 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27342 unit,
27343 use_plural: false,
27344 }),
27345 }));
27346 Ok(Expression::Sub(Box::new(
27347 crate::expressions::BinaryOp::new(cast_date, interval),
27348 )))
27349 }
27350 DialectType::Snowflake => {
27351 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
27352 // Just ensure the date is cast properly
27353 let cast_date = Self::ensure_cast_date(date);
27354 Ok(Expression::DateSub(Box::new(
27355 crate::expressions::DateAddFunc {
27356 this: cast_date,
27357 interval: val,
27358 unit,
27359 },
27360 )))
27361 }
27362 DialectType::PostgreSQL => {
27363 // PostgreSQL: date - INTERVAL 'val UNIT'
27364 let unit_str = Self::interval_unit_to_string(&unit);
27365 let interval =
27366 Expression::Interval(Box::new(crate::expressions::Interval {
27367 this: Some(Expression::Literal(Literal::String(format!(
27368 "{} {}",
27369 Self::expr_to_string(&val),
27370 unit_str
27371 )))),
27372 unit: None,
27373 }));
27374 Ok(Expression::Sub(Box::new(
27375 crate::expressions::BinaryOp::new(date, interval),
27376 )))
27377 }
27378 _ => Ok(Expression::DateSub(Box::new(
27379 crate::expressions::DateAddFunc {
27380 this: date,
27381 interval: val,
27382 unit,
27383 },
27384 ))),
27385 }
27386 }
27387
27388 // DATEADD(unit, val, date) -> target-specific form
27389 // Used by: Redshift, Snowflake, TSQL, ClickHouse
27390 "DATEADD" if args.len() == 3 => {
27391 let arg0 = args.remove(0);
27392 let arg1 = args.remove(0);
27393 let arg2 = args.remove(0);
27394 let unit_str = get_unit_str(&arg0);
27395
27396 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
27397 // Keep DATEADD(UNIT, val, date) with uppercased unit
27398 let unit = Expression::Identifier(Identifier::new(unit_str));
27399 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
27400 let date = if matches!(target, DialectType::TSQL)
27401 && !matches!(
27402 source,
27403 DialectType::Spark | DialectType::Databricks | DialectType::Hive
27404 ) {
27405 Self::ensure_cast_datetime2(arg2)
27406 } else {
27407 arg2
27408 };
27409 return Ok(Expression::Function(Box::new(Function::new(
27410 "DATEADD".to_string(),
27411 vec![unit, arg1, date],
27412 ))));
27413 }
27414
27415 if matches!(target, DialectType::DuckDB) {
27416 // DuckDB: date + INTERVAL 'val' UNIT
27417 let iu = parse_interval_unit(&unit_str);
27418 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27419 this: Some(arg1),
27420 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27421 unit: iu,
27422 use_plural: false,
27423 }),
27424 }));
27425 let cast_date = Self::ensure_cast_timestamp(arg2);
27426 return Ok(Expression::Add(Box::new(
27427 crate::expressions::BinaryOp::new(cast_date, interval),
27428 )));
27429 }
27430
27431 if matches!(target, DialectType::BigQuery) {
27432 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
27433 let iu = parse_interval_unit(&unit_str);
27434 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27435 this: Some(arg1),
27436 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27437 unit: iu,
27438 use_plural: false,
27439 }),
27440 }));
27441 return Ok(Expression::Function(Box::new(Function::new(
27442 "DATE_ADD".to_string(),
27443 vec![arg2, interval],
27444 ))));
27445 }
27446
27447 if matches!(target, DialectType::Databricks) {
27448 // Databricks: keep DATEADD(UNIT, val, date) format
27449 let unit = Expression::Identifier(Identifier::new(unit_str));
27450 return Ok(Expression::Function(Box::new(Function::new(
27451 "DATEADD".to_string(),
27452 vec![unit, arg1, arg2],
27453 ))));
27454 }
27455
27456 if matches!(target, DialectType::Spark) {
27457 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
27458 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
27459 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
27460 if let Ok(val) = n.parse::<i64>() {
27461 return Expression::Literal(crate::expressions::Literal::Number(
27462 (val * factor).to_string(),
27463 ));
27464 }
27465 }
27466 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27467 expr,
27468 Expression::Literal(crate::expressions::Literal::Number(
27469 factor.to_string(),
27470 )),
27471 )))
27472 }
27473 match unit_str.as_str() {
27474 "YEAR" => {
27475 let months = multiply_expr_dateadd(arg1, 12);
27476 return Ok(Expression::Function(Box::new(Function::new(
27477 "ADD_MONTHS".to_string(),
27478 vec![arg2, months],
27479 ))));
27480 }
27481 "QUARTER" => {
27482 let months = multiply_expr_dateadd(arg1, 3);
27483 return Ok(Expression::Function(Box::new(Function::new(
27484 "ADD_MONTHS".to_string(),
27485 vec![arg2, months],
27486 ))));
27487 }
27488 "MONTH" => {
27489 return Ok(Expression::Function(Box::new(Function::new(
27490 "ADD_MONTHS".to_string(),
27491 vec![arg2, arg1],
27492 ))));
27493 }
27494 "WEEK" => {
27495 let days = multiply_expr_dateadd(arg1, 7);
27496 return Ok(Expression::Function(Box::new(Function::new(
27497 "DATE_ADD".to_string(),
27498 vec![arg2, days],
27499 ))));
27500 }
27501 "DAY" => {
27502 return Ok(Expression::Function(Box::new(Function::new(
27503 "DATE_ADD".to_string(),
27504 vec![arg2, arg1],
27505 ))));
27506 }
27507 _ => {
27508 let unit = Expression::Identifier(Identifier::new(unit_str));
27509 return Ok(Expression::Function(Box::new(Function::new(
27510 "DATE_ADD".to_string(),
27511 vec![unit, arg1, arg2],
27512 ))));
27513 }
27514 }
27515 }
27516
27517 if matches!(target, DialectType::Hive) {
27518 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27519 match unit_str.as_str() {
27520 "DAY" => {
27521 return Ok(Expression::Function(Box::new(Function::new(
27522 "DATE_ADD".to_string(),
27523 vec![arg2, arg1],
27524 ))));
27525 }
27526 "MONTH" => {
27527 return Ok(Expression::Function(Box::new(Function::new(
27528 "ADD_MONTHS".to_string(),
27529 vec![arg2, arg1],
27530 ))));
27531 }
27532 _ => {
27533 let iu = parse_interval_unit(&unit_str);
27534 let interval =
27535 Expression::Interval(Box::new(crate::expressions::Interval {
27536 this: Some(arg1),
27537 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27538 unit: iu,
27539 use_plural: false,
27540 }),
27541 }));
27542 return Ok(Expression::Add(Box::new(
27543 crate::expressions::BinaryOp::new(arg2, interval),
27544 )));
27545 }
27546 }
27547 }
27548
27549 if matches!(target, DialectType::PostgreSQL) {
27550 // PostgreSQL: date + INTERVAL 'val UNIT'
27551 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27552 this: Some(Expression::Literal(Literal::String(format!(
27553 "{} {}",
27554 Self::expr_to_string(&arg1),
27555 unit_str
27556 )))),
27557 unit: None,
27558 }));
27559 return Ok(Expression::Add(Box::new(
27560 crate::expressions::BinaryOp::new(arg2, interval),
27561 )));
27562 }
27563
27564 if matches!(
27565 target,
27566 DialectType::Presto | DialectType::Trino | DialectType::Athena
27567 ) {
27568 // Presto/Trino: DATE_ADD('UNIT', val, date)
27569 return Ok(Expression::Function(Box::new(Function::new(
27570 "DATE_ADD".to_string(),
27571 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27572 ))));
27573 }
27574
27575 if matches!(target, DialectType::ClickHouse) {
27576 // ClickHouse: DATE_ADD(UNIT, val, date)
27577 let unit = Expression::Identifier(Identifier::new(unit_str));
27578 return Ok(Expression::Function(Box::new(Function::new(
27579 "DATE_ADD".to_string(),
27580 vec![unit, arg1, arg2],
27581 ))));
27582 }
27583
27584 // Default: keep DATEADD with uppercased unit
27585 let unit = Expression::Identifier(Identifier::new(unit_str));
27586 Ok(Expression::Function(Box::new(Function::new(
27587 "DATEADD".to_string(),
27588 vec![unit, arg1, arg2],
27589 ))))
27590 }
27591
27592 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27593 "DATE_ADD" if args.len() == 3 => {
27594 let arg0 = args.remove(0);
27595 let arg1 = args.remove(0);
27596 let arg2 = args.remove(0);
27597 let unit_str = get_unit_str(&arg0);
27598
27599 if matches!(
27600 target,
27601 DialectType::Presto | DialectType::Trino | DialectType::Athena
27602 ) {
27603 // Presto/Trino: DATE_ADD('UNIT', val, date)
27604 return Ok(Expression::Function(Box::new(Function::new(
27605 "DATE_ADD".to_string(),
27606 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27607 ))));
27608 }
27609
27610 if matches!(
27611 target,
27612 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27613 ) {
27614 // DATEADD(UNIT, val, date)
27615 let unit = Expression::Identifier(Identifier::new(unit_str));
27616 let date = if matches!(target, DialectType::TSQL) {
27617 Self::ensure_cast_datetime2(arg2)
27618 } else {
27619 arg2
27620 };
27621 return Ok(Expression::Function(Box::new(Function::new(
27622 "DATEADD".to_string(),
27623 vec![unit, arg1, date],
27624 ))));
27625 }
27626
27627 if matches!(target, DialectType::DuckDB) {
27628 // DuckDB: date + INTERVAL val UNIT
27629 let iu = parse_interval_unit(&unit_str);
27630 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27631 this: Some(arg1),
27632 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27633 unit: iu,
27634 use_plural: false,
27635 }),
27636 }));
27637 return Ok(Expression::Add(Box::new(
27638 crate::expressions::BinaryOp::new(arg2, interval),
27639 )));
27640 }
27641
27642 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27643 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27644 let unit = Expression::Identifier(Identifier::new(unit_str));
27645 return Ok(Expression::Function(Box::new(Function::new(
27646 "DATE_ADD".to_string(),
27647 vec![unit, arg1, arg2],
27648 ))));
27649 }
27650
27651 // Default: DATE_ADD(UNIT, val, date)
27652 let unit = Expression::Identifier(Identifier::new(unit_str));
27653 Ok(Expression::Function(Box::new(Function::new(
27654 "DATE_ADD".to_string(),
27655 vec![unit, arg1, arg2],
27656 ))))
27657 }
27658
27659 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27660 "DATE_ADD" if args.len() == 2 => {
27661 let date = args.remove(0);
27662 let interval_expr = args.remove(0);
27663 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27664 let unit_str = Self::interval_unit_to_string(&unit);
27665
27666 match target {
27667 DialectType::DuckDB => {
27668 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27669 let cast_date = Self::ensure_cast_date(date);
27670 let quoted_val = Self::quote_interval_val(&val);
27671 let interval =
27672 Expression::Interval(Box::new(crate::expressions::Interval {
27673 this: Some(quoted_val),
27674 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27675 unit,
27676 use_plural: false,
27677 }),
27678 }));
27679 Ok(Expression::Add(Box::new(
27680 crate::expressions::BinaryOp::new(cast_date, interval),
27681 )))
27682 }
27683 DialectType::PostgreSQL => {
27684 // PostgreSQL: date + INTERVAL 'val UNIT'
27685 let interval =
27686 Expression::Interval(Box::new(crate::expressions::Interval {
27687 this: Some(Expression::Literal(Literal::String(format!(
27688 "{} {}",
27689 Self::expr_to_string(&val),
27690 unit_str
27691 )))),
27692 unit: None,
27693 }));
27694 Ok(Expression::Add(Box::new(
27695 crate::expressions::BinaryOp::new(date, interval),
27696 )))
27697 }
27698 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27699 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27700 let val_str = Self::expr_to_string(&val);
27701 Ok(Expression::Function(Box::new(Function::new(
27702 "DATE_ADD".to_string(),
27703 vec![
27704 Expression::Literal(Literal::String(unit_str)),
27705 Expression::Cast(Box::new(Cast {
27706 this: Expression::Literal(Literal::String(val_str)),
27707 to: DataType::BigInt { length: None },
27708 trailing_comments: vec![],
27709 double_colon_syntax: false,
27710 format: None,
27711 default: None,
27712 inferred_type: None,
27713 })),
27714 date,
27715 ],
27716 ))))
27717 }
27718 DialectType::Spark | DialectType::Hive => {
27719 // Spark/Hive: DATE_ADD(date, val) for DAY
27720 match unit_str.as_str() {
27721 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27722 "DATE_ADD".to_string(),
27723 vec![date, val],
27724 )))),
27725 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27726 "ADD_MONTHS".to_string(),
27727 vec![date, val],
27728 )))),
27729 _ => {
27730 let iu = parse_interval_unit(&unit_str);
27731 let interval =
27732 Expression::Interval(Box::new(crate::expressions::Interval {
27733 this: Some(val),
27734 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27735 unit: iu,
27736 use_plural: false,
27737 }),
27738 }));
27739 Ok(Expression::Function(Box::new(Function::new(
27740 "DATE_ADD".to_string(),
27741 vec![date, interval],
27742 ))))
27743 }
27744 }
27745 }
27746 DialectType::Snowflake => {
27747 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27748 let cast_date = Self::ensure_cast_date(date);
27749 let val_str = Self::expr_to_string(&val);
27750 Ok(Expression::Function(Box::new(Function::new(
27751 "DATEADD".to_string(),
27752 vec![
27753 Expression::Identifier(Identifier::new(unit_str)),
27754 Expression::Literal(Literal::String(val_str)),
27755 cast_date,
27756 ],
27757 ))))
27758 }
27759 DialectType::TSQL | DialectType::Fabric => {
27760 let cast_date = Self::ensure_cast_datetime2(date);
27761 Ok(Expression::Function(Box::new(Function::new(
27762 "DATEADD".to_string(),
27763 vec![
27764 Expression::Identifier(Identifier::new(unit_str)),
27765 val,
27766 cast_date,
27767 ],
27768 ))))
27769 }
27770 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27771 "DATEADD".to_string(),
27772 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27773 )))),
27774 DialectType::MySQL => {
27775 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27776 let quoted_val = Self::quote_interval_val(&val);
27777 let iu = parse_interval_unit(&unit_str);
27778 let interval =
27779 Expression::Interval(Box::new(crate::expressions::Interval {
27780 this: Some(quoted_val),
27781 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27782 unit: iu,
27783 use_plural: false,
27784 }),
27785 }));
27786 Ok(Expression::Function(Box::new(Function::new(
27787 "DATE_ADD".to_string(),
27788 vec![date, interval],
27789 ))))
27790 }
27791 DialectType::BigQuery => {
27792 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27793 let quoted_val = Self::quote_interval_val(&val);
27794 let iu = parse_interval_unit(&unit_str);
27795 let interval =
27796 Expression::Interval(Box::new(crate::expressions::Interval {
27797 this: Some(quoted_val),
27798 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27799 unit: iu,
27800 use_plural: false,
27801 }),
27802 }));
27803 Ok(Expression::Function(Box::new(Function::new(
27804 "DATE_ADD".to_string(),
27805 vec![date, interval],
27806 ))))
27807 }
27808 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27809 "DATEADD".to_string(),
27810 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27811 )))),
27812 _ => {
27813 // Default: keep as DATE_ADD with decomposed interval
27814 Ok(Expression::DateAdd(Box::new(
27815 crate::expressions::DateAddFunc {
27816 this: date,
27817 interval: val,
27818 unit,
27819 },
27820 )))
27821 }
27822 }
27823 }
27824
27825 // ADD_MONTHS(date, val) -> target-specific form
27826 "ADD_MONTHS" if args.len() == 2 => {
27827 let date = args.remove(0);
27828 let val = args.remove(0);
27829
27830 if matches!(target, DialectType::TSQL) {
27831 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27832 let cast_date = Self::ensure_cast_datetime2(date);
27833 return Ok(Expression::Function(Box::new(Function::new(
27834 "DATEADD".to_string(),
27835 vec![
27836 Expression::Identifier(Identifier::new("MONTH")),
27837 val,
27838 cast_date,
27839 ],
27840 ))));
27841 }
27842
27843 if matches!(target, DialectType::DuckDB) {
27844 // DuckDB: date + INTERVAL val MONTH
27845 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27846 this: Some(val),
27847 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27848 unit: crate::expressions::IntervalUnit::Month,
27849 use_plural: false,
27850 }),
27851 }));
27852 return Ok(Expression::Add(Box::new(
27853 crate::expressions::BinaryOp::new(date, interval),
27854 )));
27855 }
27856
27857 if matches!(target, DialectType::Snowflake) {
27858 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27859 if matches!(source, DialectType::Snowflake) {
27860 return Ok(Expression::Function(Box::new(Function::new(
27861 "ADD_MONTHS".to_string(),
27862 vec![date, val],
27863 ))));
27864 }
27865 return Ok(Expression::Function(Box::new(Function::new(
27866 "DATEADD".to_string(),
27867 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27868 ))));
27869 }
27870
27871 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27872 // Spark: ADD_MONTHS(date, val) - keep as is
27873 return Ok(Expression::Function(Box::new(Function::new(
27874 "ADD_MONTHS".to_string(),
27875 vec![date, val],
27876 ))));
27877 }
27878
27879 if matches!(target, DialectType::Hive) {
27880 return Ok(Expression::Function(Box::new(Function::new(
27881 "ADD_MONTHS".to_string(),
27882 vec![date, val],
27883 ))));
27884 }
27885
27886 if matches!(
27887 target,
27888 DialectType::Presto | DialectType::Trino | DialectType::Athena
27889 ) {
27890 // Presto: DATE_ADD('MONTH', val, date)
27891 return Ok(Expression::Function(Box::new(Function::new(
27892 "DATE_ADD".to_string(),
27893 vec![
27894 Expression::Literal(Literal::String("MONTH".to_string())),
27895 val,
27896 date,
27897 ],
27898 ))));
27899 }
27900
27901 // Default: keep ADD_MONTHS
27902 Ok(Expression::Function(Box::new(Function::new(
27903 "ADD_MONTHS".to_string(),
27904 vec![date, val],
27905 ))))
27906 }
27907
27908 // SAFE_DIVIDE(x, y) -> target-specific form directly
27909 "SAFE_DIVIDE" if args.len() == 2 => {
27910 let x = args.remove(0);
27911 let y = args.remove(0);
27912 // Wrap x and y in parens if they're complex expressions
27913 let y_ref = match &y {
27914 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27915 y.clone()
27916 }
27917 _ => Expression::Paren(Box::new(Paren {
27918 this: y.clone(),
27919 trailing_comments: vec![],
27920 })),
27921 };
27922 let x_ref = match &x {
27923 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27924 x.clone()
27925 }
27926 _ => Expression::Paren(Box::new(Paren {
27927 this: x.clone(),
27928 trailing_comments: vec![],
27929 })),
27930 };
27931 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27932 y_ref.clone(),
27933 Expression::number(0),
27934 )));
27935 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27936 x_ref.clone(),
27937 y_ref.clone(),
27938 )));
27939
27940 match target {
27941 DialectType::DuckDB | DialectType::PostgreSQL => {
27942 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27943 let result_div = if matches!(target, DialectType::PostgreSQL) {
27944 let cast_x = Expression::Cast(Box::new(Cast {
27945 this: x_ref,
27946 to: DataType::Custom {
27947 name: "DOUBLE PRECISION".to_string(),
27948 },
27949 trailing_comments: vec![],
27950 double_colon_syntax: false,
27951 format: None,
27952 default: None,
27953 inferred_type: None,
27954 }));
27955 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27956 cast_x, y_ref,
27957 )))
27958 } else {
27959 div_expr
27960 };
27961 Ok(Expression::Case(Box::new(crate::expressions::Case {
27962 operand: None,
27963 whens: vec![(condition, result_div)],
27964 else_: Some(Expression::Null(crate::expressions::Null)),
27965 comments: Vec::new(),
27966 inferred_type: None,
27967 })))
27968 }
27969 DialectType::Snowflake => {
27970 // IFF(y <> 0, x / y, NULL)
27971 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27972 condition,
27973 true_value: div_expr,
27974 false_value: Some(Expression::Null(crate::expressions::Null)),
27975 original_name: Some("IFF".to_string()),
27976 inferred_type: None,
27977 })))
27978 }
27979 DialectType::Presto | DialectType::Trino => {
27980 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27981 let cast_x = Expression::Cast(Box::new(Cast {
27982 this: x_ref,
27983 to: DataType::Double {
27984 precision: None,
27985 scale: None,
27986 },
27987 trailing_comments: vec![],
27988 double_colon_syntax: false,
27989 format: None,
27990 default: None,
27991 inferred_type: None,
27992 }));
27993 let cast_div = Expression::Div(Box::new(
27994 crate::expressions::BinaryOp::new(cast_x, y_ref),
27995 ));
27996 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27997 condition,
27998 true_value: cast_div,
27999 false_value: Some(Expression::Null(crate::expressions::Null)),
28000 original_name: None,
28001 inferred_type: None,
28002 })))
28003 }
28004 _ => {
28005 // IF(y <> 0, x / y, NULL)
28006 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
28007 condition,
28008 true_value: div_expr,
28009 false_value: Some(Expression::Null(crate::expressions::Null)),
28010 original_name: None,
28011 inferred_type: None,
28012 })))
28013 }
28014 }
28015 }
28016
28017 // GENERATE_UUID() -> UUID() with CAST to string
28018 "GENERATE_UUID" => {
28019 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
28020 this: None,
28021 name: None,
28022 is_string: None,
28023 }));
28024 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
28025 let cast_type = match target {
28026 DialectType::DuckDB => Some(DataType::Text),
28027 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
28028 length: None,
28029 parenthesized_length: false,
28030 }),
28031 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28032 Some(DataType::String { length: None })
28033 }
28034 _ => None,
28035 };
28036 if let Some(dt) = cast_type {
28037 Ok(Expression::Cast(Box::new(Cast {
28038 this: uuid_expr,
28039 to: dt,
28040 trailing_comments: vec![],
28041 double_colon_syntax: false,
28042 format: None,
28043 default: None,
28044 inferred_type: None,
28045 })))
28046 } else {
28047 Ok(uuid_expr)
28048 }
28049 }
28050
28051 // COUNTIF(x) -> CountIf expression
28052 "COUNTIF" if args.len() == 1 => {
28053 let arg = args.remove(0);
28054 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
28055 this: arg,
28056 distinct: false,
28057 filter: None,
28058 order_by: vec![],
28059 name: None,
28060 ignore_nulls: None,
28061 having_max: None,
28062 limit: None,
28063 inferred_type: None,
28064 })))
28065 }
28066
28067 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
28068 "EDIT_DISTANCE" => {
28069 // Strip named arguments (max_distance => N) and pass as positional
28070 let mut positional_args: Vec<Expression> = vec![];
28071 for arg in args {
28072 match arg {
28073 Expression::NamedArgument(na) => {
28074 positional_args.push(na.value);
28075 }
28076 other => positional_args.push(other),
28077 }
28078 }
28079 if positional_args.len() >= 2 {
28080 let col1 = positional_args.remove(0);
28081 let col2 = positional_args.remove(0);
28082 let levenshtein = crate::expressions::BinaryFunc {
28083 this: col1,
28084 expression: col2,
28085 original_name: None,
28086 inferred_type: None,
28087 };
28088 // Pass extra args through a function wrapper with all args
28089 if !positional_args.is_empty() {
28090 let max_dist = positional_args.remove(0);
28091 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
28092 if matches!(target, DialectType::DuckDB) {
28093 let lev = Expression::Function(Box::new(Function::new(
28094 "LEVENSHTEIN".to_string(),
28095 vec![levenshtein.this, levenshtein.expression],
28096 )));
28097 let lev_is_null =
28098 Expression::IsNull(Box::new(crate::expressions::IsNull {
28099 this: lev.clone(),
28100 not: false,
28101 postfix_form: false,
28102 }));
28103 let max_is_null =
28104 Expression::IsNull(Box::new(crate::expressions::IsNull {
28105 this: max_dist.clone(),
28106 not: false,
28107 postfix_form: false,
28108 }));
28109 let null_check =
28110 Expression::Or(Box::new(crate::expressions::BinaryOp {
28111 left: lev_is_null,
28112 right: max_is_null,
28113 left_comments: Vec::new(),
28114 operator_comments: Vec::new(),
28115 trailing_comments: Vec::new(),
28116 inferred_type: None,
28117 }));
28118 let least =
28119 Expression::Least(Box::new(crate::expressions::VarArgFunc {
28120 expressions: vec![lev, max_dist],
28121 original_name: None,
28122 inferred_type: None,
28123 }));
28124 return Ok(Expression::Case(Box::new(crate::expressions::Case {
28125 operand: None,
28126 whens: vec![(
28127 null_check,
28128 Expression::Null(crate::expressions::Null),
28129 )],
28130 else_: Some(least),
28131 comments: Vec::new(),
28132 inferred_type: None,
28133 })));
28134 }
28135 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
28136 all_args.extend(positional_args);
28137 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
28138 let func_name = if matches!(target, DialectType::PostgreSQL) {
28139 "LEVENSHTEIN_LESS_EQUAL"
28140 } else {
28141 "LEVENSHTEIN"
28142 };
28143 return Ok(Expression::Function(Box::new(Function::new(
28144 func_name.to_string(),
28145 all_args,
28146 ))));
28147 }
28148 Ok(Expression::Levenshtein(Box::new(levenshtein)))
28149 } else {
28150 Ok(Expression::Function(Box::new(Function::new(
28151 "EDIT_DISTANCE".to_string(),
28152 positional_args,
28153 ))))
28154 }
28155 }
28156
28157 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
28158 "TIMESTAMP_SECONDS" if args.len() == 1 => {
28159 let arg = args.remove(0);
28160 Ok(Expression::UnixToTime(Box::new(
28161 crate::expressions::UnixToTime {
28162 this: Box::new(arg),
28163 scale: Some(0),
28164 zone: None,
28165 hours: None,
28166 minutes: None,
28167 format: None,
28168 target_type: None,
28169 },
28170 )))
28171 }
28172
28173 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
28174 "TIMESTAMP_MILLIS" if args.len() == 1 => {
28175 let arg = args.remove(0);
28176 Ok(Expression::UnixToTime(Box::new(
28177 crate::expressions::UnixToTime {
28178 this: Box::new(arg),
28179 scale: Some(3),
28180 zone: None,
28181 hours: None,
28182 minutes: None,
28183 format: None,
28184 target_type: None,
28185 },
28186 )))
28187 }
28188
28189 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
28190 "TIMESTAMP_MICROS" if args.len() == 1 => {
28191 let arg = args.remove(0);
28192 Ok(Expression::UnixToTime(Box::new(
28193 crate::expressions::UnixToTime {
28194 this: Box::new(arg),
28195 scale: Some(6),
28196 zone: None,
28197 hours: None,
28198 minutes: None,
28199 format: None,
28200 target_type: None,
28201 },
28202 )))
28203 }
28204
28205 // DIV(x, y) -> IntDiv expression
28206 "DIV" if args.len() == 2 => {
28207 let x = args.remove(0);
28208 let y = args.remove(0);
28209 Ok(Expression::IntDiv(Box::new(
28210 crate::expressions::BinaryFunc {
28211 this: x,
28212 expression: y,
28213 original_name: None,
28214 inferred_type: None,
28215 },
28216 )))
28217 }
28218
28219 // TO_HEX(x) -> target-specific form
28220 "TO_HEX" if args.len() == 1 => {
28221 let arg = args.remove(0);
28222 // Check if inner function already returns hex string in certain targets
28223 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
28224 if matches!(target, DialectType::BigQuery) {
28225 // BQ->BQ: keep as TO_HEX
28226 Ok(Expression::Function(Box::new(Function::new(
28227 "TO_HEX".to_string(),
28228 vec![arg],
28229 ))))
28230 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
28231 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
28232 Ok(arg)
28233 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
28234 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
28235 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
28236 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
28237 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
28238 if let Expression::Function(ref inner_f) = arg {
28239 let inner_args = inner_f.args.clone();
28240 let binary_func = match inner_f.name.to_uppercase().as_str() {
28241 "SHA1" => Expression::Function(Box::new(Function::new(
28242 "SHA1_BINARY".to_string(),
28243 inner_args,
28244 ))),
28245 "MD5" => Expression::Function(Box::new(Function::new(
28246 "MD5_BINARY".to_string(),
28247 inner_args,
28248 ))),
28249 "SHA256" => {
28250 let mut a = inner_args;
28251 a.push(Expression::number(256));
28252 Expression::Function(Box::new(Function::new(
28253 "SHA2_BINARY".to_string(),
28254 a,
28255 )))
28256 }
28257 "SHA512" => {
28258 let mut a = inner_args;
28259 a.push(Expression::number(512));
28260 Expression::Function(Box::new(Function::new(
28261 "SHA2_BINARY".to_string(),
28262 a,
28263 )))
28264 }
28265 _ => arg.clone(),
28266 };
28267 Ok(Expression::Function(Box::new(Function::new(
28268 "TO_CHAR".to_string(),
28269 vec![binary_func],
28270 ))))
28271 } else {
28272 let inner = Expression::Function(Box::new(Function::new(
28273 "HEX".to_string(),
28274 vec![arg],
28275 )));
28276 Ok(Expression::Lower(Box::new(
28277 crate::expressions::UnaryFunc::new(inner),
28278 )))
28279 }
28280 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
28281 let inner = Expression::Function(Box::new(Function::new(
28282 "TO_HEX".to_string(),
28283 vec![arg],
28284 )));
28285 Ok(Expression::Lower(Box::new(
28286 crate::expressions::UnaryFunc::new(inner),
28287 )))
28288 } else {
28289 let inner =
28290 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
28291 Ok(Expression::Lower(Box::new(
28292 crate::expressions::UnaryFunc::new(inner),
28293 )))
28294 }
28295 }
28296
28297 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
28298 "LAST_DAY" if args.len() == 2 => {
28299 let date = args.remove(0);
28300 let _unit = args.remove(0); // Strip the unit (MONTH is default)
28301 Ok(Expression::Function(Box::new(Function::new(
28302 "LAST_DAY".to_string(),
28303 vec![date],
28304 ))))
28305 }
28306
28307 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
28308 "GENERATE_ARRAY" => {
28309 let start = args.get(0).cloned();
28310 let end = args.get(1).cloned();
28311 let step = args.get(2).cloned();
28312 Ok(Expression::GenerateSeries(Box::new(
28313 crate::expressions::GenerateSeries {
28314 start: start.map(Box::new),
28315 end: end.map(Box::new),
28316 step: step.map(Box::new),
28317 is_end_exclusive: None,
28318 },
28319 )))
28320 }
28321
28322 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
28323 "GENERATE_TIMESTAMP_ARRAY" => {
28324 let start = args.get(0).cloned();
28325 let end = args.get(1).cloned();
28326 let step = args.get(2).cloned();
28327
28328 if matches!(target, DialectType::DuckDB) {
28329 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
28330 // Only cast string literals - leave columns/expressions as-is
28331 let maybe_cast_ts = |expr: Expression| -> Expression {
28332 if matches!(&expr, Expression::Literal(Literal::String(_))) {
28333 Expression::Cast(Box::new(Cast {
28334 this: expr,
28335 to: DataType::Timestamp {
28336 precision: None,
28337 timezone: false,
28338 },
28339 trailing_comments: vec![],
28340 double_colon_syntax: false,
28341 format: None,
28342 default: None,
28343 inferred_type: None,
28344 }))
28345 } else {
28346 expr
28347 }
28348 };
28349 let cast_start = start.map(maybe_cast_ts);
28350 let cast_end = end.map(maybe_cast_ts);
28351 Ok(Expression::GenerateSeries(Box::new(
28352 crate::expressions::GenerateSeries {
28353 start: cast_start.map(Box::new),
28354 end: cast_end.map(Box::new),
28355 step: step.map(Box::new),
28356 is_end_exclusive: None,
28357 },
28358 )))
28359 } else {
28360 Ok(Expression::GenerateSeries(Box::new(
28361 crate::expressions::GenerateSeries {
28362 start: start.map(Box::new),
28363 end: end.map(Box::new),
28364 step: step.map(Box::new),
28365 is_end_exclusive: None,
28366 },
28367 )))
28368 }
28369 }
28370
28371 // TO_JSON(x) -> target-specific (from Spark/Hive)
28372 "TO_JSON" => {
28373 match target {
28374 DialectType::Presto | DialectType::Trino => {
28375 // JSON_FORMAT(CAST(x AS JSON))
28376 let arg = args
28377 .into_iter()
28378 .next()
28379 .unwrap_or(Expression::Null(crate::expressions::Null));
28380 let cast_json = Expression::Cast(Box::new(Cast {
28381 this: arg,
28382 to: DataType::Custom {
28383 name: "JSON".to_string(),
28384 },
28385 trailing_comments: vec![],
28386 double_colon_syntax: false,
28387 format: None,
28388 default: None,
28389 inferred_type: None,
28390 }));
28391 Ok(Expression::Function(Box::new(Function::new(
28392 "JSON_FORMAT".to_string(),
28393 vec![cast_json],
28394 ))))
28395 }
28396 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
28397 "TO_JSON_STRING".to_string(),
28398 args,
28399 )))),
28400 DialectType::DuckDB => {
28401 // CAST(TO_JSON(x) AS TEXT)
28402 let arg = args
28403 .into_iter()
28404 .next()
28405 .unwrap_or(Expression::Null(crate::expressions::Null));
28406 let to_json = Expression::Function(Box::new(Function::new(
28407 "TO_JSON".to_string(),
28408 vec![arg],
28409 )));
28410 Ok(Expression::Cast(Box::new(Cast {
28411 this: to_json,
28412 to: DataType::Text,
28413 trailing_comments: vec![],
28414 double_colon_syntax: false,
28415 format: None,
28416 default: None,
28417 inferred_type: None,
28418 })))
28419 }
28420 _ => Ok(Expression::Function(Box::new(Function::new(
28421 "TO_JSON".to_string(),
28422 args,
28423 )))),
28424 }
28425 }
28426
28427 // TO_JSON_STRING(x) -> target-specific
28428 "TO_JSON_STRING" => {
28429 match target {
28430 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
28431 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
28432 ),
28433 DialectType::Presto | DialectType::Trino => {
28434 // JSON_FORMAT(CAST(x AS JSON))
28435 let arg = args
28436 .into_iter()
28437 .next()
28438 .unwrap_or(Expression::Null(crate::expressions::Null));
28439 let cast_json = Expression::Cast(Box::new(Cast {
28440 this: arg,
28441 to: DataType::Custom {
28442 name: "JSON".to_string(),
28443 },
28444 trailing_comments: vec![],
28445 double_colon_syntax: false,
28446 format: None,
28447 default: None,
28448 inferred_type: None,
28449 }));
28450 Ok(Expression::Function(Box::new(Function::new(
28451 "JSON_FORMAT".to_string(),
28452 vec![cast_json],
28453 ))))
28454 }
28455 DialectType::DuckDB => {
28456 // CAST(TO_JSON(x) AS TEXT)
28457 let arg = args
28458 .into_iter()
28459 .next()
28460 .unwrap_or(Expression::Null(crate::expressions::Null));
28461 let to_json = Expression::Function(Box::new(Function::new(
28462 "TO_JSON".to_string(),
28463 vec![arg],
28464 )));
28465 Ok(Expression::Cast(Box::new(Cast {
28466 this: to_json,
28467 to: DataType::Text,
28468 trailing_comments: vec![],
28469 double_colon_syntax: false,
28470 format: None,
28471 default: None,
28472 inferred_type: None,
28473 })))
28474 }
28475 DialectType::Snowflake => {
28476 // TO_JSON(x)
28477 Ok(Expression::Function(Box::new(Function::new(
28478 "TO_JSON".to_string(),
28479 args,
28480 ))))
28481 }
28482 _ => Ok(Expression::Function(Box::new(Function::new(
28483 "TO_JSON_STRING".to_string(),
28484 args,
28485 )))),
28486 }
28487 }
28488
28489 // SAFE_ADD(x, y) -> SafeAdd expression
28490 "SAFE_ADD" if args.len() == 2 => {
28491 let x = args.remove(0);
28492 let y = args.remove(0);
28493 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
28494 this: Box::new(x),
28495 expression: Box::new(y),
28496 })))
28497 }
28498
28499 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
28500 "SAFE_SUBTRACT" if args.len() == 2 => {
28501 let x = args.remove(0);
28502 let y = args.remove(0);
28503 Ok(Expression::SafeSubtract(Box::new(
28504 crate::expressions::SafeSubtract {
28505 this: Box::new(x),
28506 expression: Box::new(y),
28507 },
28508 )))
28509 }
28510
28511 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
28512 "SAFE_MULTIPLY" if args.len() == 2 => {
28513 let x = args.remove(0);
28514 let y = args.remove(0);
28515 Ok(Expression::SafeMultiply(Box::new(
28516 crate::expressions::SafeMultiply {
28517 this: Box::new(x),
28518 expression: Box::new(y),
28519 },
28520 )))
28521 }
28522
28523 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
28524 "REGEXP_CONTAINS" if args.len() == 2 => {
28525 let str_expr = args.remove(0);
28526 let pattern = args.remove(0);
28527 Ok(Expression::RegexpLike(Box::new(
28528 crate::expressions::RegexpFunc {
28529 this: str_expr,
28530 pattern,
28531 flags: None,
28532 },
28533 )))
28534 }
28535
28536 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28537 "CONTAINS_SUBSTR" if args.len() == 2 => {
28538 let a = args.remove(0);
28539 let b = args.remove(0);
28540 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28541 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28542 Ok(Expression::Function(Box::new(Function::new(
28543 "CONTAINS".to_string(),
28544 vec![lower_a, lower_b],
28545 ))))
28546 }
28547
28548 // INT64(x) -> CAST(x AS BIGINT)
28549 "INT64" if args.len() == 1 => {
28550 let arg = args.remove(0);
28551 Ok(Expression::Cast(Box::new(Cast {
28552 this: arg,
28553 to: DataType::BigInt { length: None },
28554 trailing_comments: vec![],
28555 double_colon_syntax: false,
28556 format: None,
28557 default: None,
28558 inferred_type: None,
28559 })))
28560 }
28561
28562 // INSTR(str, substr) -> target-specific
28563 "INSTR" if args.len() >= 2 => {
28564 let str_expr = args.remove(0);
28565 let substr = args.remove(0);
28566 if matches!(target, DialectType::Snowflake) {
28567 // CHARINDEX(substr, str)
28568 Ok(Expression::Function(Box::new(Function::new(
28569 "CHARINDEX".to_string(),
28570 vec![substr, str_expr],
28571 ))))
28572 } else if matches!(target, DialectType::BigQuery) {
28573 // Keep as INSTR
28574 Ok(Expression::Function(Box::new(Function::new(
28575 "INSTR".to_string(),
28576 vec![str_expr, substr],
28577 ))))
28578 } else {
28579 // Default: keep as INSTR
28580 Ok(Expression::Function(Box::new(Function::new(
28581 "INSTR".to_string(),
28582 vec![str_expr, substr],
28583 ))))
28584 }
28585 }
28586
28587 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28588 "DATE_TRUNC" if args.len() == 2 => {
28589 let expr = args.remove(0);
28590 let unit_expr = args.remove(0);
28591 let unit_str = get_unit_str(&unit_expr);
28592
28593 match target {
28594 DialectType::DuckDB
28595 | DialectType::Snowflake
28596 | DialectType::PostgreSQL
28597 | DialectType::Presto
28598 | DialectType::Trino
28599 | DialectType::Databricks
28600 | DialectType::Spark
28601 | DialectType::Redshift
28602 | DialectType::ClickHouse
28603 | DialectType::TSQL => {
28604 // Standard: DATE_TRUNC('UNIT', expr)
28605 Ok(Expression::Function(Box::new(Function::new(
28606 "DATE_TRUNC".to_string(),
28607 vec![Expression::Literal(Literal::String(unit_str)), expr],
28608 ))))
28609 }
28610 _ => {
28611 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28612 Ok(Expression::Function(Box::new(Function::new(
28613 "DATE_TRUNC".to_string(),
28614 vec![expr, unit_expr],
28615 ))))
28616 }
28617 }
28618 }
28619
28620 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28621 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28622 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28623 let ts = args.remove(0);
28624 let unit_expr = args.remove(0);
28625 let tz = if !args.is_empty() {
28626 Some(args.remove(0))
28627 } else {
28628 None
28629 };
28630 let unit_str = get_unit_str(&unit_expr);
28631
28632 match target {
28633 DialectType::DuckDB => {
28634 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28635 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28636 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28637 let is_coarse = matches!(
28638 unit_str.as_str(),
28639 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28640 );
28641 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28642 let cast_ts = if name == "DATETIME_TRUNC" {
28643 match ts {
28644 Expression::Literal(Literal::String(ref _s)) => {
28645 Expression::Cast(Box::new(Cast {
28646 this: ts,
28647 to: DataType::Timestamp {
28648 precision: None,
28649 timezone: false,
28650 },
28651 trailing_comments: vec![],
28652 double_colon_syntax: false,
28653 format: None,
28654 default: None,
28655 inferred_type: None,
28656 }))
28657 }
28658 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28659 }
28660 } else {
28661 Self::maybe_cast_ts_to_tz(ts, &name)
28662 };
28663
28664 if let Some(tz_arg) = tz {
28665 if is_coarse {
28666 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28667 let at_tz = Expression::AtTimeZone(Box::new(
28668 crate::expressions::AtTimeZone {
28669 this: cast_ts,
28670 zone: tz_arg.clone(),
28671 },
28672 ));
28673 let date_trunc = Expression::Function(Box::new(Function::new(
28674 "DATE_TRUNC".to_string(),
28675 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28676 )));
28677 Ok(Expression::AtTimeZone(Box::new(
28678 crate::expressions::AtTimeZone {
28679 this: date_trunc,
28680 zone: tz_arg,
28681 },
28682 )))
28683 } else {
28684 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28685 Ok(Expression::Function(Box::new(Function::new(
28686 "DATE_TRUNC".to_string(),
28687 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28688 ))))
28689 }
28690 } else {
28691 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28692 Ok(Expression::Function(Box::new(Function::new(
28693 "DATE_TRUNC".to_string(),
28694 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28695 ))))
28696 }
28697 }
28698 DialectType::Databricks | DialectType::Spark => {
28699 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28700 Ok(Expression::Function(Box::new(Function::new(
28701 "DATE_TRUNC".to_string(),
28702 vec![Expression::Literal(Literal::String(unit_str)), ts],
28703 ))))
28704 }
28705 _ => {
28706 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28707 let unit = Expression::Literal(Literal::String(unit_str));
28708 let mut date_trunc_args = vec![unit, ts];
28709 if let Some(tz_arg) = tz {
28710 date_trunc_args.push(tz_arg);
28711 }
28712 Ok(Expression::Function(Box::new(Function::new(
28713 "TIMESTAMP_TRUNC".to_string(),
28714 date_trunc_args,
28715 ))))
28716 }
28717 }
28718 }
28719
28720 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28721 "TIME" => {
28722 if args.len() == 3 {
28723 // TIME(h, m, s) constructor
28724 match target {
28725 DialectType::TSQL => {
28726 // TIMEFROMPARTS(h, m, s, 0, 0)
28727 args.push(Expression::number(0));
28728 args.push(Expression::number(0));
28729 Ok(Expression::Function(Box::new(Function::new(
28730 "TIMEFROMPARTS".to_string(),
28731 args,
28732 ))))
28733 }
28734 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28735 "MAKETIME".to_string(),
28736 args,
28737 )))),
28738 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28739 Function::new("MAKE_TIME".to_string(), args),
28740 ))),
28741 _ => Ok(Expression::Function(Box::new(Function::new(
28742 "TIME".to_string(),
28743 args,
28744 )))),
28745 }
28746 } else if args.len() == 1 {
28747 let arg = args.remove(0);
28748 if matches!(target, DialectType::Spark) {
28749 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28750 Ok(Expression::Cast(Box::new(Cast {
28751 this: arg,
28752 to: DataType::Timestamp {
28753 timezone: false,
28754 precision: None,
28755 },
28756 trailing_comments: vec![],
28757 double_colon_syntax: false,
28758 format: None,
28759 default: None,
28760 inferred_type: None,
28761 })))
28762 } else {
28763 // Most targets: CAST(x AS TIME)
28764 Ok(Expression::Cast(Box::new(Cast {
28765 this: arg,
28766 to: DataType::Time {
28767 precision: None,
28768 timezone: false,
28769 },
28770 trailing_comments: vec![],
28771 double_colon_syntax: false,
28772 format: None,
28773 default: None,
28774 inferred_type: None,
28775 })))
28776 }
28777 } else if args.len() == 2 {
28778 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28779 let expr = args.remove(0);
28780 let tz = args.remove(0);
28781 let cast_tstz = Expression::Cast(Box::new(Cast {
28782 this: expr,
28783 to: DataType::Timestamp {
28784 timezone: true,
28785 precision: None,
28786 },
28787 trailing_comments: vec![],
28788 double_colon_syntax: false,
28789 format: None,
28790 default: None,
28791 inferred_type: None,
28792 }));
28793 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28794 this: cast_tstz,
28795 zone: tz,
28796 }));
28797 Ok(Expression::Cast(Box::new(Cast {
28798 this: at_tz,
28799 to: DataType::Time {
28800 precision: None,
28801 timezone: false,
28802 },
28803 trailing_comments: vec![],
28804 double_colon_syntax: false,
28805 format: None,
28806 default: None,
28807 inferred_type: None,
28808 })))
28809 } else {
28810 Ok(Expression::Function(Box::new(Function::new(
28811 "TIME".to_string(),
28812 args,
28813 ))))
28814 }
28815 }
28816
28817 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28818 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28819 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28820 // DATETIME(y, m, d, h, min, s) -> target-specific
28821 "DATETIME" => {
28822 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28823 if matches!(target, DialectType::BigQuery) {
28824 if args.len() == 2 {
28825 let has_time_literal =
28826 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28827 if has_time_literal {
28828 let first = args.remove(0);
28829 let second = args.remove(0);
28830 let time_as_cast = match second {
28831 Expression::Literal(Literal::Time(s)) => {
28832 Expression::Cast(Box::new(Cast {
28833 this: Expression::Literal(Literal::String(s)),
28834 to: DataType::Time {
28835 precision: None,
28836 timezone: false,
28837 },
28838 trailing_comments: vec![],
28839 double_colon_syntax: false,
28840 format: None,
28841 default: None,
28842 inferred_type: None,
28843 }))
28844 }
28845 other => other,
28846 };
28847 return Ok(Expression::Function(Box::new(Function::new(
28848 "DATETIME".to_string(),
28849 vec![first, time_as_cast],
28850 ))));
28851 }
28852 }
28853 return Ok(Expression::Function(Box::new(Function::new(
28854 "DATETIME".to_string(),
28855 args,
28856 ))));
28857 }
28858
28859 if args.len() == 1 {
28860 let arg = args.remove(0);
28861 Ok(Expression::Cast(Box::new(Cast {
28862 this: arg,
28863 to: DataType::Timestamp {
28864 timezone: false,
28865 precision: None,
28866 },
28867 trailing_comments: vec![],
28868 double_colon_syntax: false,
28869 format: None,
28870 default: None,
28871 inferred_type: None,
28872 })))
28873 } else if args.len() == 2 {
28874 let first = args.remove(0);
28875 let second = args.remove(0);
28876 // Check if second arg is a TIME literal
28877 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28878 if is_time_literal {
28879 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28880 let cast_date = Expression::Cast(Box::new(Cast {
28881 this: first,
28882 to: DataType::Date,
28883 trailing_comments: vec![],
28884 double_colon_syntax: false,
28885 format: None,
28886 default: None,
28887 inferred_type: None,
28888 }));
28889 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28890 let time_as_string = match second {
28891 Expression::Literal(Literal::Time(s)) => {
28892 Expression::Literal(Literal::String(s))
28893 }
28894 other => other,
28895 };
28896 let cast_time = Expression::Cast(Box::new(Cast {
28897 this: time_as_string,
28898 to: DataType::Time {
28899 precision: None,
28900 timezone: false,
28901 },
28902 trailing_comments: vec![],
28903 double_colon_syntax: false,
28904 format: None,
28905 default: None,
28906 inferred_type: None,
28907 }));
28908 let add_expr =
28909 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28910 Ok(Expression::Cast(Box::new(Cast {
28911 this: add_expr,
28912 to: DataType::Timestamp {
28913 timezone: false,
28914 precision: None,
28915 },
28916 trailing_comments: vec![],
28917 double_colon_syntax: false,
28918 format: None,
28919 default: None,
28920 inferred_type: None,
28921 })))
28922 } else {
28923 // DATETIME('string', 'timezone')
28924 let cast_tstz = Expression::Cast(Box::new(Cast {
28925 this: first,
28926 to: DataType::Timestamp {
28927 timezone: true,
28928 precision: None,
28929 },
28930 trailing_comments: vec![],
28931 double_colon_syntax: false,
28932 format: None,
28933 default: None,
28934 inferred_type: None,
28935 }));
28936 let at_tz =
28937 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28938 this: cast_tstz,
28939 zone: second,
28940 }));
28941 Ok(Expression::Cast(Box::new(Cast {
28942 this: at_tz,
28943 to: DataType::Timestamp {
28944 timezone: false,
28945 precision: None,
28946 },
28947 trailing_comments: vec![],
28948 double_colon_syntax: false,
28949 format: None,
28950 default: None,
28951 inferred_type: None,
28952 })))
28953 }
28954 } else if args.len() >= 3 {
28955 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28956 // For other targets, use MAKE_TIMESTAMP or similar
28957 if matches!(target, DialectType::Snowflake) {
28958 Ok(Expression::Function(Box::new(Function::new(
28959 "TIMESTAMP_FROM_PARTS".to_string(),
28960 args,
28961 ))))
28962 } else {
28963 Ok(Expression::Function(Box::new(Function::new(
28964 "DATETIME".to_string(),
28965 args,
28966 ))))
28967 }
28968 } else {
28969 Ok(Expression::Function(Box::new(Function::new(
28970 "DATETIME".to_string(),
28971 args,
28972 ))))
28973 }
28974 }
28975
28976 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28977 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28978 "TIMESTAMP" => {
28979 if args.len() == 1 {
28980 let arg = args.remove(0);
28981 Ok(Expression::Cast(Box::new(Cast {
28982 this: arg,
28983 to: DataType::Timestamp {
28984 timezone: true,
28985 precision: None,
28986 },
28987 trailing_comments: vec![],
28988 double_colon_syntax: false,
28989 format: None,
28990 default: None,
28991 inferred_type: None,
28992 })))
28993 } else if args.len() == 2 {
28994 let arg = args.remove(0);
28995 let tz = args.remove(0);
28996 let cast_ts = Expression::Cast(Box::new(Cast {
28997 this: arg,
28998 to: DataType::Timestamp {
28999 timezone: false,
29000 precision: None,
29001 },
29002 trailing_comments: vec![],
29003 double_colon_syntax: false,
29004 format: None,
29005 default: None,
29006 inferred_type: None,
29007 }));
29008 if matches!(target, DialectType::Snowflake) {
29009 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
29010 Ok(Expression::Function(Box::new(Function::new(
29011 "CONVERT_TIMEZONE".to_string(),
29012 vec![tz, cast_ts],
29013 ))))
29014 } else {
29015 Ok(Expression::AtTimeZone(Box::new(
29016 crate::expressions::AtTimeZone {
29017 this: cast_ts,
29018 zone: tz,
29019 },
29020 )))
29021 }
29022 } else {
29023 Ok(Expression::Function(Box::new(Function::new(
29024 "TIMESTAMP".to_string(),
29025 args,
29026 ))))
29027 }
29028 }
29029
29030 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
29031 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
29032 "STRING" => {
29033 if args.len() == 1 {
29034 let arg = args.remove(0);
29035 let cast_type = match target {
29036 DialectType::DuckDB => DataType::Text,
29037 _ => DataType::VarChar {
29038 length: None,
29039 parenthesized_length: false,
29040 },
29041 };
29042 Ok(Expression::Cast(Box::new(Cast {
29043 this: arg,
29044 to: cast_type,
29045 trailing_comments: vec![],
29046 double_colon_syntax: false,
29047 format: None,
29048 default: None,
29049 inferred_type: None,
29050 })))
29051 } else if args.len() == 2 {
29052 let arg = args.remove(0);
29053 let tz = args.remove(0);
29054 let cast_type = match target {
29055 DialectType::DuckDB => DataType::Text,
29056 _ => DataType::VarChar {
29057 length: None,
29058 parenthesized_length: false,
29059 },
29060 };
29061 if matches!(target, DialectType::Snowflake) {
29062 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
29063 let convert_tz = Expression::Function(Box::new(Function::new(
29064 "CONVERT_TIMEZONE".to_string(),
29065 vec![
29066 Expression::Literal(Literal::String("UTC".to_string())),
29067 tz,
29068 arg,
29069 ],
29070 )));
29071 Ok(Expression::Cast(Box::new(Cast {
29072 this: convert_tz,
29073 to: cast_type,
29074 trailing_comments: vec![],
29075 double_colon_syntax: false,
29076 format: None,
29077 default: None,
29078 inferred_type: None,
29079 })))
29080 } else {
29081 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
29082 let cast_ts = Expression::Cast(Box::new(Cast {
29083 this: arg,
29084 to: DataType::Timestamp {
29085 timezone: false,
29086 precision: None,
29087 },
29088 trailing_comments: vec![],
29089 double_colon_syntax: false,
29090 format: None,
29091 default: None,
29092 inferred_type: None,
29093 }));
29094 let at_utc =
29095 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29096 this: cast_ts,
29097 zone: Expression::Literal(Literal::String("UTC".to_string())),
29098 }));
29099 let at_tz =
29100 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29101 this: at_utc,
29102 zone: tz,
29103 }));
29104 Ok(Expression::Cast(Box::new(Cast {
29105 this: at_tz,
29106 to: cast_type,
29107 trailing_comments: vec![],
29108 double_colon_syntax: false,
29109 format: None,
29110 default: None,
29111 inferred_type: None,
29112 })))
29113 }
29114 } else {
29115 Ok(Expression::Function(Box::new(Function::new(
29116 "STRING".to_string(),
29117 args,
29118 ))))
29119 }
29120 }
29121
29122 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
29123 "UNIX_SECONDS" if args.len() == 1 => {
29124 let ts = args.remove(0);
29125 match target {
29126 DialectType::DuckDB => {
29127 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29128 let cast_ts = Self::ensure_cast_timestamptz(ts);
29129 let epoch = Expression::Function(Box::new(Function::new(
29130 "EPOCH".to_string(),
29131 vec![cast_ts],
29132 )));
29133 Ok(Expression::Cast(Box::new(Cast {
29134 this: epoch,
29135 to: DataType::BigInt { length: None },
29136 trailing_comments: vec![],
29137 double_colon_syntax: false,
29138 format: None,
29139 default: None,
29140 inferred_type: None,
29141 })))
29142 }
29143 DialectType::Snowflake => {
29144 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29145 let epoch = Expression::Cast(Box::new(Cast {
29146 this: Expression::Literal(Literal::String(
29147 "1970-01-01 00:00:00+00".to_string(),
29148 )),
29149 to: DataType::Timestamp {
29150 timezone: true,
29151 precision: None,
29152 },
29153 trailing_comments: vec![],
29154 double_colon_syntax: false,
29155 format: None,
29156 default: None,
29157 inferred_type: None,
29158 }));
29159 Ok(Expression::TimestampDiff(Box::new(
29160 crate::expressions::TimestampDiff {
29161 this: Box::new(epoch),
29162 expression: Box::new(ts),
29163 unit: Some("SECONDS".to_string()),
29164 },
29165 )))
29166 }
29167 _ => Ok(Expression::Function(Box::new(Function::new(
29168 "UNIX_SECONDS".to_string(),
29169 vec![ts],
29170 )))),
29171 }
29172 }
29173
29174 "UNIX_MILLIS" if args.len() == 1 => {
29175 let ts = args.remove(0);
29176 match target {
29177 DialectType::DuckDB => {
29178 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
29179 let cast_ts = Self::ensure_cast_timestamptz(ts);
29180 Ok(Expression::Function(Box::new(Function::new(
29181 "EPOCH_MS".to_string(),
29182 vec![cast_ts],
29183 ))))
29184 }
29185 _ => Ok(Expression::Function(Box::new(Function::new(
29186 "UNIX_MILLIS".to_string(),
29187 vec![ts],
29188 )))),
29189 }
29190 }
29191
29192 "UNIX_MICROS" if args.len() == 1 => {
29193 let ts = args.remove(0);
29194 match target {
29195 DialectType::DuckDB => {
29196 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
29197 let cast_ts = Self::ensure_cast_timestamptz(ts);
29198 Ok(Expression::Function(Box::new(Function::new(
29199 "EPOCH_US".to_string(),
29200 vec![cast_ts],
29201 ))))
29202 }
29203 _ => Ok(Expression::Function(Box::new(Function::new(
29204 "UNIX_MICROS".to_string(),
29205 vec![ts],
29206 )))),
29207 }
29208 }
29209
29210 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
29211 "ARRAY_CONCAT" | "LIST_CONCAT" => {
29212 match target {
29213 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29214 // CONCAT(arr1, arr2, ...)
29215 Ok(Expression::Function(Box::new(Function::new(
29216 "CONCAT".to_string(),
29217 args,
29218 ))))
29219 }
29220 DialectType::Presto | DialectType::Trino => {
29221 // CONCAT(arr1, arr2, ...)
29222 Ok(Expression::Function(Box::new(Function::new(
29223 "CONCAT".to_string(),
29224 args,
29225 ))))
29226 }
29227 DialectType::Snowflake => {
29228 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
29229 if args.len() == 1 {
29230 // ARRAY_CAT requires 2 args, add empty array as []
29231 let empty_arr = Expression::ArrayFunc(Box::new(
29232 crate::expressions::ArrayConstructor {
29233 expressions: vec![],
29234 bracket_notation: true,
29235 use_list_keyword: false,
29236 },
29237 ));
29238 let mut new_args = args;
29239 new_args.push(empty_arr);
29240 Ok(Expression::Function(Box::new(Function::new(
29241 "ARRAY_CAT".to_string(),
29242 new_args,
29243 ))))
29244 } else if args.is_empty() {
29245 Ok(Expression::Function(Box::new(Function::new(
29246 "ARRAY_CAT".to_string(),
29247 args,
29248 ))))
29249 } else {
29250 let mut it = args.into_iter().rev();
29251 let mut result = it.next().unwrap();
29252 for arr in it {
29253 result = Expression::Function(Box::new(Function::new(
29254 "ARRAY_CAT".to_string(),
29255 vec![arr, result],
29256 )));
29257 }
29258 Ok(result)
29259 }
29260 }
29261 DialectType::PostgreSQL => {
29262 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
29263 if args.len() <= 1 {
29264 Ok(Expression::Function(Box::new(Function::new(
29265 "ARRAY_CAT".to_string(),
29266 args,
29267 ))))
29268 } else {
29269 let mut it = args.into_iter().rev();
29270 let mut result = it.next().unwrap();
29271 for arr in it {
29272 result = Expression::Function(Box::new(Function::new(
29273 "ARRAY_CAT".to_string(),
29274 vec![arr, result],
29275 )));
29276 }
29277 Ok(result)
29278 }
29279 }
29280 DialectType::Redshift => {
29281 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
29282 if args.len() <= 2 {
29283 Ok(Expression::Function(Box::new(Function::new(
29284 "ARRAY_CONCAT".to_string(),
29285 args,
29286 ))))
29287 } else {
29288 let mut it = args.into_iter().rev();
29289 let mut result = it.next().unwrap();
29290 for arr in it {
29291 result = Expression::Function(Box::new(Function::new(
29292 "ARRAY_CONCAT".to_string(),
29293 vec![arr, result],
29294 )));
29295 }
29296 Ok(result)
29297 }
29298 }
29299 DialectType::DuckDB => {
29300 // LIST_CONCAT supports multiple args natively in DuckDB
29301 Ok(Expression::Function(Box::new(Function::new(
29302 "LIST_CONCAT".to_string(),
29303 args,
29304 ))))
29305 }
29306 _ => Ok(Expression::Function(Box::new(Function::new(
29307 "ARRAY_CONCAT".to_string(),
29308 args,
29309 )))),
29310 }
29311 }
29312
29313 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
29314 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
29315 let arg = args.remove(0);
29316 match target {
29317 DialectType::Snowflake => {
29318 let array_agg =
29319 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
29320 this: arg,
29321 distinct: false,
29322 filter: None,
29323 order_by: vec![],
29324 name: None,
29325 ignore_nulls: None,
29326 having_max: None,
29327 limit: None,
29328 inferred_type: None,
29329 }));
29330 Ok(Expression::Function(Box::new(Function::new(
29331 "ARRAY_FLATTEN".to_string(),
29332 vec![array_agg],
29333 ))))
29334 }
29335 _ => Ok(Expression::Function(Box::new(Function::new(
29336 "ARRAY_CONCAT_AGG".to_string(),
29337 vec![arg],
29338 )))),
29339 }
29340 }
29341
29342 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
29343 "MD5" if args.len() == 1 => {
29344 let arg = args.remove(0);
29345 match target {
29346 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29347 // UNHEX(MD5(x))
29348 let md5 = Expression::Function(Box::new(Function::new(
29349 "MD5".to_string(),
29350 vec![arg],
29351 )));
29352 Ok(Expression::Function(Box::new(Function::new(
29353 "UNHEX".to_string(),
29354 vec![md5],
29355 ))))
29356 }
29357 DialectType::Snowflake => {
29358 // MD5_BINARY(x)
29359 Ok(Expression::Function(Box::new(Function::new(
29360 "MD5_BINARY".to_string(),
29361 vec![arg],
29362 ))))
29363 }
29364 _ => Ok(Expression::Function(Box::new(Function::new(
29365 "MD5".to_string(),
29366 vec![arg],
29367 )))),
29368 }
29369 }
29370
29371 "SHA1" if args.len() == 1 => {
29372 let arg = args.remove(0);
29373 match target {
29374 DialectType::DuckDB => {
29375 // UNHEX(SHA1(x))
29376 let sha1 = Expression::Function(Box::new(Function::new(
29377 "SHA1".to_string(),
29378 vec![arg],
29379 )));
29380 Ok(Expression::Function(Box::new(Function::new(
29381 "UNHEX".to_string(),
29382 vec![sha1],
29383 ))))
29384 }
29385 _ => Ok(Expression::Function(Box::new(Function::new(
29386 "SHA1".to_string(),
29387 vec![arg],
29388 )))),
29389 }
29390 }
29391
29392 "SHA256" if args.len() == 1 => {
29393 let arg = args.remove(0);
29394 match target {
29395 DialectType::DuckDB => {
29396 // UNHEX(SHA256(x))
29397 let sha = Expression::Function(Box::new(Function::new(
29398 "SHA256".to_string(),
29399 vec![arg],
29400 )));
29401 Ok(Expression::Function(Box::new(Function::new(
29402 "UNHEX".to_string(),
29403 vec![sha],
29404 ))))
29405 }
29406 DialectType::Snowflake => {
29407 // SHA2_BINARY(x, 256)
29408 Ok(Expression::Function(Box::new(Function::new(
29409 "SHA2_BINARY".to_string(),
29410 vec![arg, Expression::number(256)],
29411 ))))
29412 }
29413 DialectType::Redshift | DialectType::Spark => {
29414 // SHA2(x, 256)
29415 Ok(Expression::Function(Box::new(Function::new(
29416 "SHA2".to_string(),
29417 vec![arg, Expression::number(256)],
29418 ))))
29419 }
29420 _ => Ok(Expression::Function(Box::new(Function::new(
29421 "SHA256".to_string(),
29422 vec![arg],
29423 )))),
29424 }
29425 }
29426
29427 "SHA512" if args.len() == 1 => {
29428 let arg = args.remove(0);
29429 match target {
29430 DialectType::Snowflake => {
29431 // SHA2_BINARY(x, 512)
29432 Ok(Expression::Function(Box::new(Function::new(
29433 "SHA2_BINARY".to_string(),
29434 vec![arg, Expression::number(512)],
29435 ))))
29436 }
29437 DialectType::Redshift | DialectType::Spark => {
29438 // SHA2(x, 512)
29439 Ok(Expression::Function(Box::new(Function::new(
29440 "SHA2".to_string(),
29441 vec![arg, Expression::number(512)],
29442 ))))
29443 }
29444 _ => Ok(Expression::Function(Box::new(Function::new(
29445 "SHA512".to_string(),
29446 vec![arg],
29447 )))),
29448 }
29449 }
29450
29451 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
29452 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
29453 let str_expr = args.remove(0);
29454 let pattern = args.remove(0);
29455
29456 // Check if pattern contains capturing groups (parentheses)
29457 let has_groups = match &pattern {
29458 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
29459 _ => false,
29460 };
29461
29462 match target {
29463 DialectType::DuckDB => {
29464 let group = if has_groups {
29465 Expression::number(1)
29466 } else {
29467 Expression::number(0)
29468 };
29469 Ok(Expression::Function(Box::new(Function::new(
29470 "REGEXP_EXTRACT_ALL".to_string(),
29471 vec![str_expr, pattern, group],
29472 ))))
29473 }
29474 DialectType::Spark | DialectType::Databricks => {
29475 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
29476 if has_groups {
29477 Ok(Expression::Function(Box::new(Function::new(
29478 "REGEXP_EXTRACT_ALL".to_string(),
29479 vec![str_expr, pattern],
29480 ))))
29481 } else {
29482 Ok(Expression::Function(Box::new(Function::new(
29483 "REGEXP_EXTRACT_ALL".to_string(),
29484 vec![str_expr, pattern, Expression::number(0)],
29485 ))))
29486 }
29487 }
29488 DialectType::Presto | DialectType::Trino => {
29489 if has_groups {
29490 Ok(Expression::Function(Box::new(Function::new(
29491 "REGEXP_EXTRACT_ALL".to_string(),
29492 vec![str_expr, pattern, Expression::number(1)],
29493 ))))
29494 } else {
29495 Ok(Expression::Function(Box::new(Function::new(
29496 "REGEXP_EXTRACT_ALL".to_string(),
29497 vec![str_expr, pattern],
29498 ))))
29499 }
29500 }
29501 DialectType::Snowflake => {
29502 if has_groups {
29503 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
29504 Ok(Expression::Function(Box::new(Function::new(
29505 "REGEXP_EXTRACT_ALL".to_string(),
29506 vec![
29507 str_expr,
29508 pattern,
29509 Expression::number(1),
29510 Expression::number(1),
29511 Expression::Literal(Literal::String("c".to_string())),
29512 Expression::number(1),
29513 ],
29514 ))))
29515 } else {
29516 Ok(Expression::Function(Box::new(Function::new(
29517 "REGEXP_EXTRACT_ALL".to_string(),
29518 vec![str_expr, pattern],
29519 ))))
29520 }
29521 }
29522 _ => Ok(Expression::Function(Box::new(Function::new(
29523 "REGEXP_EXTRACT_ALL".to_string(),
29524 vec![str_expr, pattern],
29525 )))),
29526 }
29527 }
29528
29529 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
29530 "MOD" if args.len() == 2 => {
29531 match target {
29532 DialectType::PostgreSQL
29533 | DialectType::DuckDB
29534 | DialectType::Presto
29535 | DialectType::Trino
29536 | DialectType::Athena
29537 | DialectType::Snowflake => {
29538 let x = args.remove(0);
29539 let y = args.remove(0);
29540 // Wrap complex expressions in parens to preserve precedence
29541 let needs_paren = |e: &Expression| {
29542 matches!(
29543 e,
29544 Expression::Add(_)
29545 | Expression::Sub(_)
29546 | Expression::Mul(_)
29547 | Expression::Div(_)
29548 )
29549 };
29550 let x = if needs_paren(&x) {
29551 Expression::Paren(Box::new(crate::expressions::Paren {
29552 this: x,
29553 trailing_comments: vec![],
29554 }))
29555 } else {
29556 x
29557 };
29558 let y = if needs_paren(&y) {
29559 Expression::Paren(Box::new(crate::expressions::Paren {
29560 this: y,
29561 trailing_comments: vec![],
29562 }))
29563 } else {
29564 y
29565 };
29566 Ok(Expression::Mod(Box::new(
29567 crate::expressions::BinaryOp::new(x, y),
29568 )))
29569 }
29570 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29571 // Hive/Spark: a % b
29572 let x = args.remove(0);
29573 let y = args.remove(0);
29574 let needs_paren = |e: &Expression| {
29575 matches!(
29576 e,
29577 Expression::Add(_)
29578 | Expression::Sub(_)
29579 | Expression::Mul(_)
29580 | Expression::Div(_)
29581 )
29582 };
29583 let x = if needs_paren(&x) {
29584 Expression::Paren(Box::new(crate::expressions::Paren {
29585 this: x,
29586 trailing_comments: vec![],
29587 }))
29588 } else {
29589 x
29590 };
29591 let y = if needs_paren(&y) {
29592 Expression::Paren(Box::new(crate::expressions::Paren {
29593 this: y,
29594 trailing_comments: vec![],
29595 }))
29596 } else {
29597 y
29598 };
29599 Ok(Expression::Mod(Box::new(
29600 crate::expressions::BinaryOp::new(x, y),
29601 )))
29602 }
29603 _ => Ok(Expression::Function(Box::new(Function::new(
29604 "MOD".to_string(),
29605 args,
29606 )))),
29607 }
29608 }
29609
29610 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29611 "ARRAY_FILTER" if args.len() == 2 => {
29612 let name = match target {
29613 DialectType::DuckDB => "LIST_FILTER",
29614 DialectType::StarRocks => "ARRAY_FILTER",
29615 _ => "FILTER",
29616 };
29617 Ok(Expression::Function(Box::new(Function::new(
29618 name.to_string(),
29619 args,
29620 ))))
29621 }
29622 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29623 "FILTER" if args.len() == 2 => {
29624 let name = match target {
29625 DialectType::DuckDB => "LIST_FILTER",
29626 DialectType::StarRocks => "ARRAY_FILTER",
29627 _ => "FILTER",
29628 };
29629 Ok(Expression::Function(Box::new(Function::new(
29630 name.to_string(),
29631 args,
29632 ))))
29633 }
29634 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29635 "REDUCE" if args.len() >= 3 => {
29636 let name = match target {
29637 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29638 _ => "REDUCE",
29639 };
29640 Ok(Expression::Function(Box::new(Function::new(
29641 name.to_string(),
29642 args,
29643 ))))
29644 }
29645 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29646 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29647 Function::new("ARRAY_REVERSE".to_string(), args),
29648 ))),
29649
29650 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29651 "CONCAT" if args.len() > 2 => match target {
29652 DialectType::DuckDB => {
29653 let mut it = args.into_iter();
29654 let mut result = it.next().unwrap();
29655 for arg in it {
29656 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29657 this: Box::new(result),
29658 expression: Box::new(arg),
29659 safe: None,
29660 }));
29661 }
29662 Ok(result)
29663 }
29664 _ => Ok(Expression::Function(Box::new(Function::new(
29665 "CONCAT".to_string(),
29666 args,
29667 )))),
29668 },
29669
29670 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29671 "GENERATE_DATE_ARRAY" => {
29672 if matches!(target, DialectType::BigQuery) {
29673 // BQ->BQ: add default interval if not present
29674 if args.len() == 2 {
29675 let start = args.remove(0);
29676 let end = args.remove(0);
29677 let default_interval =
29678 Expression::Interval(Box::new(crate::expressions::Interval {
29679 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29680 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29681 unit: crate::expressions::IntervalUnit::Day,
29682 use_plural: false,
29683 }),
29684 }));
29685 Ok(Expression::Function(Box::new(Function::new(
29686 "GENERATE_DATE_ARRAY".to_string(),
29687 vec![start, end, default_interval],
29688 ))))
29689 } else {
29690 Ok(Expression::Function(Box::new(Function::new(
29691 "GENERATE_DATE_ARRAY".to_string(),
29692 args,
29693 ))))
29694 }
29695 } else if matches!(target, DialectType::DuckDB) {
29696 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29697 let start = args.get(0).cloned();
29698 let end = args.get(1).cloned();
29699 let step = args.get(2).cloned().or_else(|| {
29700 Some(Expression::Interval(Box::new(
29701 crate::expressions::Interval {
29702 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29703 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29704 unit: crate::expressions::IntervalUnit::Day,
29705 use_plural: false,
29706 }),
29707 },
29708 )))
29709 });
29710
29711 // Wrap start/end in CAST(... AS DATE) only for string literals
29712 let maybe_cast_date = |expr: Expression| -> Expression {
29713 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29714 Expression::Cast(Box::new(Cast {
29715 this: expr,
29716 to: DataType::Date,
29717 trailing_comments: vec![],
29718 double_colon_syntax: false,
29719 format: None,
29720 default: None,
29721 inferred_type: None,
29722 }))
29723 } else {
29724 expr
29725 }
29726 };
29727 let cast_start = start.map(maybe_cast_date);
29728 let cast_end = end.map(maybe_cast_date);
29729
29730 let gen_series =
29731 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29732 start: cast_start.map(Box::new),
29733 end: cast_end.map(Box::new),
29734 step: step.map(Box::new),
29735 is_end_exclusive: None,
29736 }));
29737
29738 // Wrap in CAST(... AS DATE[])
29739 Ok(Expression::Cast(Box::new(Cast {
29740 this: gen_series,
29741 to: DataType::Array {
29742 element_type: Box::new(DataType::Date),
29743 dimension: None,
29744 },
29745 trailing_comments: vec![],
29746 double_colon_syntax: false,
29747 format: None,
29748 default: None,
29749 inferred_type: None,
29750 })))
29751 } else if matches!(target, DialectType::Snowflake) {
29752 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29753 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29754 if args.len() == 2 {
29755 let start = args.remove(0);
29756 let end = args.remove(0);
29757 let default_interval =
29758 Expression::Interval(Box::new(crate::expressions::Interval {
29759 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29760 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29761 unit: crate::expressions::IntervalUnit::Day,
29762 use_plural: false,
29763 }),
29764 }));
29765 Ok(Expression::Function(Box::new(Function::new(
29766 "GENERATE_DATE_ARRAY".to_string(),
29767 vec![start, end, default_interval],
29768 ))))
29769 } else {
29770 Ok(Expression::Function(Box::new(Function::new(
29771 "GENERATE_DATE_ARRAY".to_string(),
29772 args,
29773 ))))
29774 }
29775 } else {
29776 // Convert to GenerateSeries for other targets
29777 let start = args.get(0).cloned();
29778 let end = args.get(1).cloned();
29779 let step = args.get(2).cloned().or_else(|| {
29780 Some(Expression::Interval(Box::new(
29781 crate::expressions::Interval {
29782 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29783 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29784 unit: crate::expressions::IntervalUnit::Day,
29785 use_plural: false,
29786 }),
29787 },
29788 )))
29789 });
29790 Ok(Expression::GenerateSeries(Box::new(
29791 crate::expressions::GenerateSeries {
29792 start: start.map(Box::new),
29793 end: end.map(Box::new),
29794 step: step.map(Box::new),
29795 is_end_exclusive: None,
29796 },
29797 )))
29798 }
29799 }
29800
29801 // PARSE_DATE(format, str) -> target-specific
29802 "PARSE_DATE" if args.len() == 2 => {
29803 let format = args.remove(0);
29804 let str_expr = args.remove(0);
29805 match target {
29806 DialectType::DuckDB => {
29807 // CAST(STRPTIME(str, duck_format) AS DATE)
29808 let duck_format = Self::bq_format_to_duckdb(&format);
29809 let strptime = Expression::Function(Box::new(Function::new(
29810 "STRPTIME".to_string(),
29811 vec![str_expr, duck_format],
29812 )));
29813 Ok(Expression::Cast(Box::new(Cast {
29814 this: strptime,
29815 to: DataType::Date,
29816 trailing_comments: vec![],
29817 double_colon_syntax: false,
29818 format: None,
29819 default: None,
29820 inferred_type: None,
29821 })))
29822 }
29823 DialectType::Snowflake => {
29824 // _POLYGLOT_DATE(str, snowflake_format)
29825 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29826 let sf_format = Self::bq_format_to_snowflake(&format);
29827 Ok(Expression::Function(Box::new(Function::new(
29828 "_POLYGLOT_DATE".to_string(),
29829 vec![str_expr, sf_format],
29830 ))))
29831 }
29832 _ => Ok(Expression::Function(Box::new(Function::new(
29833 "PARSE_DATE".to_string(),
29834 vec![format, str_expr],
29835 )))),
29836 }
29837 }
29838
29839 // PARSE_TIMESTAMP(format, str) -> target-specific
29840 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29841 let format = args.remove(0);
29842 let str_expr = args.remove(0);
29843 let tz = if !args.is_empty() {
29844 Some(args.remove(0))
29845 } else {
29846 None
29847 };
29848 match target {
29849 DialectType::DuckDB => {
29850 let duck_format = Self::bq_format_to_duckdb(&format);
29851 let strptime = Expression::Function(Box::new(Function::new(
29852 "STRPTIME".to_string(),
29853 vec![str_expr, duck_format],
29854 )));
29855 Ok(strptime)
29856 }
29857 _ => {
29858 let mut result_args = vec![format, str_expr];
29859 if let Some(tz_arg) = tz {
29860 result_args.push(tz_arg);
29861 }
29862 Ok(Expression::Function(Box::new(Function::new(
29863 "PARSE_TIMESTAMP".to_string(),
29864 result_args,
29865 ))))
29866 }
29867 }
29868 }
29869
29870 // FORMAT_DATE(format, date) -> target-specific
29871 "FORMAT_DATE" if args.len() == 2 => {
29872 let format = args.remove(0);
29873 let date_expr = args.remove(0);
29874 match target {
29875 DialectType::DuckDB => {
29876 // STRFTIME(CAST(date AS DATE), format)
29877 let cast_date = Expression::Cast(Box::new(Cast {
29878 this: date_expr,
29879 to: DataType::Date,
29880 trailing_comments: vec![],
29881 double_colon_syntax: false,
29882 format: None,
29883 default: None,
29884 inferred_type: None,
29885 }));
29886 Ok(Expression::Function(Box::new(Function::new(
29887 "STRFTIME".to_string(),
29888 vec![cast_date, format],
29889 ))))
29890 }
29891 _ => Ok(Expression::Function(Box::new(Function::new(
29892 "FORMAT_DATE".to_string(),
29893 vec![format, date_expr],
29894 )))),
29895 }
29896 }
29897
29898 // FORMAT_DATETIME(format, datetime) -> target-specific
29899 "FORMAT_DATETIME" if args.len() == 2 => {
29900 let format = args.remove(0);
29901 let dt_expr = args.remove(0);
29902
29903 if matches!(target, DialectType::BigQuery) {
29904 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29905 let norm_format = Self::bq_format_normalize_bq(&format);
29906 // Also strip DATETIME keyword from typed literals
29907 let norm_dt = match dt_expr {
29908 Expression::Literal(Literal::Timestamp(s)) => {
29909 Expression::Cast(Box::new(Cast {
29910 this: Expression::Literal(Literal::String(s)),
29911 to: DataType::Custom {
29912 name: "DATETIME".to_string(),
29913 },
29914 trailing_comments: vec![],
29915 double_colon_syntax: false,
29916 format: None,
29917 default: None,
29918 inferred_type: None,
29919 }))
29920 }
29921 other => other,
29922 };
29923 return Ok(Expression::Function(Box::new(Function::new(
29924 "FORMAT_DATETIME".to_string(),
29925 vec![norm_format, norm_dt],
29926 ))));
29927 }
29928
29929 match target {
29930 DialectType::DuckDB => {
29931 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29932 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29933 let duck_format = Self::bq_format_to_duckdb(&format);
29934 Ok(Expression::Function(Box::new(Function::new(
29935 "STRFTIME".to_string(),
29936 vec![cast_dt, duck_format],
29937 ))))
29938 }
29939 _ => Ok(Expression::Function(Box::new(Function::new(
29940 "FORMAT_DATETIME".to_string(),
29941 vec![format, dt_expr],
29942 )))),
29943 }
29944 }
29945
29946 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29947 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29948 let format = args.remove(0);
29949 let ts_expr = args.remove(0);
29950 match target {
29951 DialectType::DuckDB => {
29952 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29953 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29954 let cast_ts = Expression::Cast(Box::new(Cast {
29955 this: cast_tstz,
29956 to: DataType::Timestamp {
29957 timezone: false,
29958 precision: None,
29959 },
29960 trailing_comments: vec![],
29961 double_colon_syntax: false,
29962 format: None,
29963 default: None,
29964 inferred_type: None,
29965 }));
29966 Ok(Expression::Function(Box::new(Function::new(
29967 "STRFTIME".to_string(),
29968 vec![cast_ts, format],
29969 ))))
29970 }
29971 DialectType::Snowflake => {
29972 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29973 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29974 let cast_ts = Expression::Cast(Box::new(Cast {
29975 this: cast_tstz,
29976 to: DataType::Timestamp {
29977 timezone: false,
29978 precision: None,
29979 },
29980 trailing_comments: vec![],
29981 double_colon_syntax: false,
29982 format: None,
29983 default: None,
29984 inferred_type: None,
29985 }));
29986 let sf_format = Self::bq_format_to_snowflake(&format);
29987 Ok(Expression::Function(Box::new(Function::new(
29988 "TO_CHAR".to_string(),
29989 vec![cast_ts, sf_format],
29990 ))))
29991 }
29992 _ => Ok(Expression::Function(Box::new(Function::new(
29993 "FORMAT_TIMESTAMP".to_string(),
29994 vec![format, ts_expr],
29995 )))),
29996 }
29997 }
29998
29999 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
30000 "UNIX_DATE" if args.len() == 1 => {
30001 let date = args.remove(0);
30002 match target {
30003 DialectType::DuckDB => {
30004 let epoch = Expression::Cast(Box::new(Cast {
30005 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
30006 to: DataType::Date,
30007 trailing_comments: vec![],
30008 double_colon_syntax: false,
30009 format: None,
30010 default: None,
30011 inferred_type: None,
30012 }));
30013 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
30014 // Need to convert DATE literal to CAST
30015 let norm_date = Self::date_literal_to_cast(date);
30016 Ok(Expression::Function(Box::new(Function::new(
30017 "DATE_DIFF".to_string(),
30018 vec![
30019 Expression::Literal(Literal::String("DAY".to_string())),
30020 epoch,
30021 norm_date,
30022 ],
30023 ))))
30024 }
30025 _ => Ok(Expression::Function(Box::new(Function::new(
30026 "UNIX_DATE".to_string(),
30027 vec![date],
30028 )))),
30029 }
30030 }
30031
30032 // UNIX_SECONDS(ts) -> target-specific
30033 "UNIX_SECONDS" if args.len() == 1 => {
30034 let ts = args.remove(0);
30035 match target {
30036 DialectType::DuckDB => {
30037 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
30038 let norm_ts = Self::ts_literal_to_cast_tz(ts);
30039 let epoch = Expression::Function(Box::new(Function::new(
30040 "EPOCH".to_string(),
30041 vec![norm_ts],
30042 )));
30043 Ok(Expression::Cast(Box::new(Cast {
30044 this: epoch,
30045 to: DataType::BigInt { length: None },
30046 trailing_comments: vec![],
30047 double_colon_syntax: false,
30048 format: None,
30049 default: None,
30050 inferred_type: None,
30051 })))
30052 }
30053 DialectType::Snowflake => {
30054 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
30055 let epoch = Expression::Cast(Box::new(Cast {
30056 this: Expression::Literal(Literal::String(
30057 "1970-01-01 00:00:00+00".to_string(),
30058 )),
30059 to: DataType::Timestamp {
30060 timezone: true,
30061 precision: None,
30062 },
30063 trailing_comments: vec![],
30064 double_colon_syntax: false,
30065 format: None,
30066 default: None,
30067 inferred_type: None,
30068 }));
30069 Ok(Expression::Function(Box::new(Function::new(
30070 "TIMESTAMPDIFF".to_string(),
30071 vec![
30072 Expression::Identifier(Identifier::new("SECONDS".to_string())),
30073 epoch,
30074 ts,
30075 ],
30076 ))))
30077 }
30078 _ => Ok(Expression::Function(Box::new(Function::new(
30079 "UNIX_SECONDS".to_string(),
30080 vec![ts],
30081 )))),
30082 }
30083 }
30084
30085 // UNIX_MILLIS(ts) -> target-specific
30086 "UNIX_MILLIS" if args.len() == 1 => {
30087 let ts = args.remove(0);
30088 match target {
30089 DialectType::DuckDB => {
30090 let norm_ts = Self::ts_literal_to_cast_tz(ts);
30091 Ok(Expression::Function(Box::new(Function::new(
30092 "EPOCH_MS".to_string(),
30093 vec![norm_ts],
30094 ))))
30095 }
30096 _ => Ok(Expression::Function(Box::new(Function::new(
30097 "UNIX_MILLIS".to_string(),
30098 vec![ts],
30099 )))),
30100 }
30101 }
30102
30103 // UNIX_MICROS(ts) -> target-specific
30104 "UNIX_MICROS" if args.len() == 1 => {
30105 let ts = args.remove(0);
30106 match target {
30107 DialectType::DuckDB => {
30108 let norm_ts = Self::ts_literal_to_cast_tz(ts);
30109 Ok(Expression::Function(Box::new(Function::new(
30110 "EPOCH_US".to_string(),
30111 vec![norm_ts],
30112 ))))
30113 }
30114 _ => Ok(Expression::Function(Box::new(Function::new(
30115 "UNIX_MICROS".to_string(),
30116 vec![ts],
30117 )))),
30118 }
30119 }
30120
30121 // INSTR(str, substr) -> target-specific
30122 "INSTR" => {
30123 if matches!(target, DialectType::BigQuery) {
30124 // BQ->BQ: keep as INSTR
30125 Ok(Expression::Function(Box::new(Function::new(
30126 "INSTR".to_string(),
30127 args,
30128 ))))
30129 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
30130 // Snowflake: CHARINDEX(substr, str) - swap args
30131 let str_expr = args.remove(0);
30132 let substr = args.remove(0);
30133 Ok(Expression::Function(Box::new(Function::new(
30134 "CHARINDEX".to_string(),
30135 vec![substr, str_expr],
30136 ))))
30137 } else {
30138 // Keep as INSTR for other targets
30139 Ok(Expression::Function(Box::new(Function::new(
30140 "INSTR".to_string(),
30141 args,
30142 ))))
30143 }
30144 }
30145
30146 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
30147 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
30148 if matches!(target, DialectType::BigQuery) {
30149 // BQ->BQ: always output with parens (function form), keep any timezone arg
30150 Ok(Expression::Function(Box::new(Function::new(name, args))))
30151 } else if name == "CURRENT_DATE" && args.len() == 1 {
30152 // CURRENT_DATE('UTC') - has timezone arg
30153 let tz_arg = args.remove(0);
30154 match target {
30155 DialectType::DuckDB => {
30156 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
30157 let ct = Expression::CurrentTimestamp(
30158 crate::expressions::CurrentTimestamp {
30159 precision: None,
30160 sysdate: false,
30161 },
30162 );
30163 let at_tz =
30164 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30165 this: ct,
30166 zone: tz_arg,
30167 }));
30168 Ok(Expression::Cast(Box::new(Cast {
30169 this: at_tz,
30170 to: DataType::Date,
30171 trailing_comments: vec![],
30172 double_colon_syntax: false,
30173 format: None,
30174 default: None,
30175 inferred_type: None,
30176 })))
30177 }
30178 DialectType::Snowflake => {
30179 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
30180 let ct = Expression::Function(Box::new(Function::new(
30181 "CURRENT_TIMESTAMP".to_string(),
30182 vec![],
30183 )));
30184 let convert = Expression::Function(Box::new(Function::new(
30185 "CONVERT_TIMEZONE".to_string(),
30186 vec![tz_arg, ct],
30187 )));
30188 Ok(Expression::Cast(Box::new(Cast {
30189 this: convert,
30190 to: DataType::Date,
30191 trailing_comments: vec![],
30192 double_colon_syntax: false,
30193 format: None,
30194 default: None,
30195 inferred_type: None,
30196 })))
30197 }
30198 _ => {
30199 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
30200 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
30201 Ok(Expression::AtTimeZone(Box::new(
30202 crate::expressions::AtTimeZone {
30203 this: cd,
30204 zone: tz_arg,
30205 },
30206 )))
30207 }
30208 }
30209 } else if (name == "CURRENT_TIMESTAMP"
30210 || name == "CURRENT_TIME"
30211 || name == "CURRENT_DATE")
30212 && args.is_empty()
30213 && matches!(
30214 target,
30215 DialectType::PostgreSQL
30216 | DialectType::DuckDB
30217 | DialectType::Presto
30218 | DialectType::Trino
30219 )
30220 {
30221 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
30222 if name == "CURRENT_TIMESTAMP" {
30223 Ok(Expression::CurrentTimestamp(
30224 crate::expressions::CurrentTimestamp {
30225 precision: None,
30226 sysdate: false,
30227 },
30228 ))
30229 } else if name == "CURRENT_DATE" {
30230 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
30231 } else {
30232 // CURRENT_TIME
30233 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
30234 precision: None,
30235 }))
30236 }
30237 } else {
30238 // All other targets: keep as function (with parens)
30239 Ok(Expression::Function(Box::new(Function::new(name, args))))
30240 }
30241 }
30242
30243 // JSON_QUERY(json, path) -> target-specific
30244 "JSON_QUERY" if args.len() == 2 => {
30245 match target {
30246 DialectType::DuckDB | DialectType::SQLite => {
30247 // json -> path syntax
30248 let json_expr = args.remove(0);
30249 let path = args.remove(0);
30250 Ok(Expression::JsonExtract(Box::new(
30251 crate::expressions::JsonExtractFunc {
30252 this: json_expr,
30253 path,
30254 returning: None,
30255 arrow_syntax: true,
30256 hash_arrow_syntax: false,
30257 wrapper_option: None,
30258 quotes_option: None,
30259 on_scalar_string: false,
30260 on_error: None,
30261 },
30262 )))
30263 }
30264 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30265 Ok(Expression::Function(Box::new(Function::new(
30266 "GET_JSON_OBJECT".to_string(),
30267 args,
30268 ))))
30269 }
30270 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
30271 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
30272 )),
30273 _ => Ok(Expression::Function(Box::new(Function::new(
30274 "JSON_QUERY".to_string(),
30275 args,
30276 )))),
30277 }
30278 }
30279
30280 // JSON_VALUE_ARRAY(json, path) -> target-specific
30281 "JSON_VALUE_ARRAY" if args.len() == 2 => {
30282 match target {
30283 DialectType::DuckDB => {
30284 // CAST(json -> path AS TEXT[])
30285 let json_expr = args.remove(0);
30286 let path = args.remove(0);
30287 let arrow = Expression::JsonExtract(Box::new(
30288 crate::expressions::JsonExtractFunc {
30289 this: json_expr,
30290 path,
30291 returning: None,
30292 arrow_syntax: true,
30293 hash_arrow_syntax: false,
30294 wrapper_option: None,
30295 quotes_option: None,
30296 on_scalar_string: false,
30297 on_error: None,
30298 },
30299 ));
30300 Ok(Expression::Cast(Box::new(Cast {
30301 this: arrow,
30302 to: DataType::Array {
30303 element_type: Box::new(DataType::Text),
30304 dimension: None,
30305 },
30306 trailing_comments: vec![],
30307 double_colon_syntax: false,
30308 format: None,
30309 default: None,
30310 inferred_type: None,
30311 })))
30312 }
30313 DialectType::Snowflake => {
30314 let json_expr = args.remove(0);
30315 let path_expr = args.remove(0);
30316 // Convert JSON path from $.path to just path
30317 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
30318 {
30319 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
30320 Expression::Literal(Literal::String(trimmed.to_string()))
30321 } else {
30322 path_expr
30323 };
30324 let parse_json = Expression::Function(Box::new(Function::new(
30325 "PARSE_JSON".to_string(),
30326 vec![json_expr],
30327 )));
30328 let get_path = Expression::Function(Box::new(Function::new(
30329 "GET_PATH".to_string(),
30330 vec![parse_json, sf_path],
30331 )));
30332 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
30333 let cast_expr = Expression::Cast(Box::new(Cast {
30334 this: Expression::Identifier(Identifier::new("x")),
30335 to: DataType::VarChar {
30336 length: None,
30337 parenthesized_length: false,
30338 },
30339 trailing_comments: vec![],
30340 double_colon_syntax: false,
30341 format: None,
30342 default: None,
30343 inferred_type: None,
30344 }));
30345 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30346 parameters: vec![Identifier::new("x")],
30347 body: cast_expr,
30348 colon: false,
30349 parameter_types: vec![],
30350 }));
30351 Ok(Expression::Function(Box::new(Function::new(
30352 "TRANSFORM".to_string(),
30353 vec![get_path, lambda],
30354 ))))
30355 }
30356 _ => Ok(Expression::Function(Box::new(Function::new(
30357 "JSON_VALUE_ARRAY".to_string(),
30358 args,
30359 )))),
30360 }
30361 }
30362
30363 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
30364 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
30365 // This is different from Hive/Spark where 3rd arg is "group_index"
30366 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
30367 match target {
30368 DialectType::DuckDB
30369 | DialectType::Presto
30370 | DialectType::Trino
30371 | DialectType::Athena => {
30372 if args.len() == 2 {
30373 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
30374 args.push(Expression::number(1));
30375 Ok(Expression::Function(Box::new(Function::new(
30376 "REGEXP_EXTRACT".to_string(),
30377 args,
30378 ))))
30379 } else if args.len() == 3 {
30380 let val = args.remove(0);
30381 let regex = args.remove(0);
30382 let position = args.remove(0);
30383 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
30384 if is_pos_1 {
30385 Ok(Expression::Function(Box::new(Function::new(
30386 "REGEXP_EXTRACT".to_string(),
30387 vec![val, regex, Expression::number(1)],
30388 ))))
30389 } else {
30390 let substring_expr = Expression::Function(Box::new(Function::new(
30391 "SUBSTRING".to_string(),
30392 vec![val, position],
30393 )));
30394 let nullif_expr = Expression::Function(Box::new(Function::new(
30395 "NULLIF".to_string(),
30396 vec![
30397 substring_expr,
30398 Expression::Literal(Literal::String(String::new())),
30399 ],
30400 )));
30401 Ok(Expression::Function(Box::new(Function::new(
30402 "REGEXP_EXTRACT".to_string(),
30403 vec![nullif_expr, regex, Expression::number(1)],
30404 ))))
30405 }
30406 } else if args.len() == 4 {
30407 let val = args.remove(0);
30408 let regex = args.remove(0);
30409 let position = args.remove(0);
30410 let occurrence = args.remove(0);
30411 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
30412 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
30413 if is_pos_1 && is_occ_1 {
30414 Ok(Expression::Function(Box::new(Function::new(
30415 "REGEXP_EXTRACT".to_string(),
30416 vec![val, regex, Expression::number(1)],
30417 ))))
30418 } else {
30419 let subject = if is_pos_1 {
30420 val
30421 } else {
30422 let substring_expr = Expression::Function(Box::new(
30423 Function::new("SUBSTRING".to_string(), vec![val, position]),
30424 ));
30425 Expression::Function(Box::new(Function::new(
30426 "NULLIF".to_string(),
30427 vec![
30428 substring_expr,
30429 Expression::Literal(Literal::String(String::new())),
30430 ],
30431 )))
30432 };
30433 let extract_all = Expression::Function(Box::new(Function::new(
30434 "REGEXP_EXTRACT_ALL".to_string(),
30435 vec![subject, regex, Expression::number(1)],
30436 )));
30437 Ok(Expression::Function(Box::new(Function::new(
30438 "ARRAY_EXTRACT".to_string(),
30439 vec![extract_all, occurrence],
30440 ))))
30441 }
30442 } else {
30443 Ok(Expression::Function(Box::new(Function {
30444 name: f.name,
30445 args,
30446 distinct: f.distinct,
30447 trailing_comments: f.trailing_comments,
30448 use_bracket_syntax: f.use_bracket_syntax,
30449 no_parens: f.no_parens,
30450 quoted: f.quoted,
30451 span: None,
30452 inferred_type: None,
30453 })))
30454 }
30455 }
30456 DialectType::Snowflake => {
30457 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
30458 Ok(Expression::Function(Box::new(Function::new(
30459 "REGEXP_SUBSTR".to_string(),
30460 args,
30461 ))))
30462 }
30463 _ => {
30464 // For other targets (Hive/Spark/BigQuery): pass through as-is
30465 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
30466 Ok(Expression::Function(Box::new(Function {
30467 name: f.name,
30468 args,
30469 distinct: f.distinct,
30470 trailing_comments: f.trailing_comments,
30471 use_bracket_syntax: f.use_bracket_syntax,
30472 no_parens: f.no_parens,
30473 quoted: f.quoted,
30474 span: None,
30475 inferred_type: None,
30476 })))
30477 }
30478 }
30479 }
30480
30481 // BigQuery STRUCT(args) -> target-specific struct expression
30482 "STRUCT" => {
30483 // Convert Function args to Struct fields
30484 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
30485 for (i, arg) in args.into_iter().enumerate() {
30486 match arg {
30487 Expression::Alias(a) => {
30488 // Named field: expr AS name
30489 fields.push((Some(a.alias.name.clone()), a.this));
30490 }
30491 other => {
30492 // Unnamed field: for Spark/Hive, keep as None
30493 // For Snowflake, auto-name as _N
30494 // For DuckDB, use column name for column refs, _N for others
30495 if matches!(target, DialectType::Snowflake) {
30496 fields.push((Some(format!("_{}", i)), other));
30497 } else if matches!(target, DialectType::DuckDB) {
30498 let auto_name = match &other {
30499 Expression::Column(col) => col.name.name.clone(),
30500 _ => format!("_{}", i),
30501 };
30502 fields.push((Some(auto_name), other));
30503 } else {
30504 fields.push((None, other));
30505 }
30506 }
30507 }
30508 }
30509
30510 match target {
30511 DialectType::Snowflake => {
30512 // OBJECT_CONSTRUCT('name', value, ...)
30513 let mut oc_args = Vec::new();
30514 for (name, val) in &fields {
30515 if let Some(n) = name {
30516 oc_args.push(Expression::Literal(Literal::String(n.clone())));
30517 oc_args.push(val.clone());
30518 } else {
30519 oc_args.push(val.clone());
30520 }
30521 }
30522 Ok(Expression::Function(Box::new(Function::new(
30523 "OBJECT_CONSTRUCT".to_string(),
30524 oc_args,
30525 ))))
30526 }
30527 DialectType::DuckDB => {
30528 // {'name': value, ...}
30529 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30530 fields,
30531 })))
30532 }
30533 DialectType::Hive => {
30534 // STRUCT(val1, val2, ...) - strip aliases
30535 let hive_fields: Vec<(Option<String>, Expression)> =
30536 fields.into_iter().map(|(_, v)| (None, v)).collect();
30537 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30538 fields: hive_fields,
30539 })))
30540 }
30541 DialectType::Spark | DialectType::Databricks => {
30542 // Use Expression::Struct to bypass Spark target transform auto-naming
30543 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30544 fields,
30545 })))
30546 }
30547 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30548 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
30549 let all_named =
30550 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
30551 let all_types_inferable = all_named
30552 && fields
30553 .iter()
30554 .all(|(_, val)| Self::can_infer_presto_type(val));
30555 let row_args: Vec<Expression> =
30556 fields.iter().map(|(_, v)| v.clone()).collect();
30557 let row_expr = Expression::Function(Box::new(Function::new(
30558 "ROW".to_string(),
30559 row_args,
30560 )));
30561 if all_named && all_types_inferable {
30562 // Build ROW type with inferred types
30563 let mut row_type_fields = Vec::new();
30564 for (name, val) in &fields {
30565 if let Some(n) = name {
30566 let type_str = Self::infer_sql_type_for_presto(val);
30567 row_type_fields.push(crate::expressions::StructField::new(
30568 n.clone(),
30569 crate::expressions::DataType::Custom { name: type_str },
30570 ));
30571 }
30572 }
30573 let row_type = crate::expressions::DataType::Struct {
30574 fields: row_type_fields,
30575 nested: true,
30576 };
30577 Ok(Expression::Cast(Box::new(Cast {
30578 this: row_expr,
30579 to: row_type,
30580 trailing_comments: Vec::new(),
30581 double_colon_syntax: false,
30582 format: None,
30583 default: None,
30584 inferred_type: None,
30585 })))
30586 } else {
30587 Ok(row_expr)
30588 }
30589 }
30590 _ => {
30591 // Default: keep as STRUCT function with original args
30592 let mut new_args = Vec::new();
30593 for (name, val) in fields {
30594 if let Some(n) = name {
30595 new_args.push(Expression::Alias(Box::new(
30596 crate::expressions::Alias::new(val, Identifier::new(n)),
30597 )));
30598 } else {
30599 new_args.push(val);
30600 }
30601 }
30602 Ok(Expression::Function(Box::new(Function::new(
30603 "STRUCT".to_string(),
30604 new_args,
30605 ))))
30606 }
30607 }
30608 }
30609
30610 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30611 "ROUND" if args.len() == 3 => {
30612 let x = args.remove(0);
30613 let n = args.remove(0);
30614 let mode = args.remove(0);
30615 // Check if mode is 'ROUND_HALF_EVEN'
30616 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30617 if is_half_even && matches!(target, DialectType::DuckDB) {
30618 Ok(Expression::Function(Box::new(Function::new(
30619 "ROUND_EVEN".to_string(),
30620 vec![x, n],
30621 ))))
30622 } else {
30623 // Pass through with all args
30624 Ok(Expression::Function(Box::new(Function::new(
30625 "ROUND".to_string(),
30626 vec![x, n, mode],
30627 ))))
30628 }
30629 }
30630
30631 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30632 "MAKE_INTERVAL" => {
30633 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30634 // The positional args are: year, month
30635 // Named args are: day =>, minute =>, etc.
30636 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30637 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30638 // For BigQuery->BigQuery: reorder named args (day before minute)
30639 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30640 let mut parts: Vec<(String, String)> = Vec::new();
30641 let mut pos_idx = 0;
30642 let pos_units = ["year", "month"];
30643 for arg in &args {
30644 if let Expression::NamedArgument(na) = arg {
30645 // Named arg like minute => 5
30646 let unit = na.name.name.clone();
30647 if let Expression::Literal(Literal::Number(n)) = &na.value {
30648 parts.push((unit, n.clone()));
30649 }
30650 } else if pos_idx < pos_units.len() {
30651 if let Expression::Literal(Literal::Number(n)) = arg {
30652 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30653 }
30654 pos_idx += 1;
30655 }
30656 }
30657 // Don't sort - preserve original argument order
30658 let separator = if matches!(target, DialectType::Snowflake) {
30659 ", "
30660 } else {
30661 " "
30662 };
30663 let interval_str = parts
30664 .iter()
30665 .map(|(u, v)| format!("{} {}", v, u))
30666 .collect::<Vec<_>>()
30667 .join(separator);
30668 Ok(Expression::Interval(Box::new(
30669 crate::expressions::Interval {
30670 this: Some(Expression::Literal(Literal::String(interval_str))),
30671 unit: None,
30672 },
30673 )))
30674 } else if matches!(target, DialectType::BigQuery) {
30675 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30676 let mut positional = Vec::new();
30677 let mut named: Vec<(
30678 String,
30679 Expression,
30680 crate::expressions::NamedArgSeparator,
30681 )> = Vec::new();
30682 let _pos_units = ["year", "month"];
30683 let mut _pos_idx = 0;
30684 for arg in args {
30685 if let Expression::NamedArgument(na) = arg {
30686 named.push((na.name.name.clone(), na.value, na.separator));
30687 } else {
30688 positional.push(arg);
30689 _pos_idx += 1;
30690 }
30691 }
30692 // Sort named args by: day, hour, minute, second
30693 let unit_order = |u: &str| -> usize {
30694 match u.to_lowercase().as_str() {
30695 "day" => 0,
30696 "hour" => 1,
30697 "minute" => 2,
30698 "second" => 3,
30699 _ => 4,
30700 }
30701 };
30702 named.sort_by_key(|(u, _, _)| unit_order(u));
30703 let mut result_args = positional;
30704 for (name, value, sep) in named {
30705 result_args.push(Expression::NamedArgument(Box::new(
30706 crate::expressions::NamedArgument {
30707 name: Identifier::new(&name),
30708 value,
30709 separator: sep,
30710 },
30711 )));
30712 }
30713 Ok(Expression::Function(Box::new(Function::new(
30714 "MAKE_INTERVAL".to_string(),
30715 result_args,
30716 ))))
30717 } else {
30718 Ok(Expression::Function(Box::new(Function::new(
30719 "MAKE_INTERVAL".to_string(),
30720 args,
30721 ))))
30722 }
30723 }
30724
30725 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30726 "ARRAY_TO_STRING" if args.len() == 3 => {
30727 let arr = args.remove(0);
30728 let sep = args.remove(0);
30729 let null_text = args.remove(0);
30730 match target {
30731 DialectType::DuckDB => {
30732 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30733 let _lambda_param =
30734 Expression::Identifier(crate::expressions::Identifier::new("x"));
30735 let coalesce =
30736 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30737 original_name: None,
30738 expressions: vec![
30739 Expression::Identifier(crate::expressions::Identifier::new(
30740 "x",
30741 )),
30742 null_text,
30743 ],
30744 inferred_type: None,
30745 }));
30746 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30747 parameters: vec![crate::expressions::Identifier::new("x")],
30748 body: coalesce,
30749 colon: false,
30750 parameter_types: vec![],
30751 }));
30752 let list_transform = Expression::Function(Box::new(Function::new(
30753 "LIST_TRANSFORM".to_string(),
30754 vec![arr, lambda],
30755 )));
30756 Ok(Expression::Function(Box::new(Function::new(
30757 "ARRAY_TO_STRING".to_string(),
30758 vec![list_transform, sep],
30759 ))))
30760 }
30761 _ => Ok(Expression::Function(Box::new(Function::new(
30762 "ARRAY_TO_STRING".to_string(),
30763 vec![arr, sep, null_text],
30764 )))),
30765 }
30766 }
30767
30768 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30769 "LENGTH" if args.len() == 1 => {
30770 let arg = args.remove(0);
30771 match target {
30772 DialectType::DuckDB => {
30773 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30774 let typeof_func = Expression::Function(Box::new(Function::new(
30775 "TYPEOF".to_string(),
30776 vec![arg.clone()],
30777 )));
30778 let blob_cast = Expression::Cast(Box::new(Cast {
30779 this: arg.clone(),
30780 to: DataType::VarBinary { length: None },
30781 trailing_comments: vec![],
30782 double_colon_syntax: false,
30783 format: None,
30784 default: None,
30785 inferred_type: None,
30786 }));
30787 let octet_length = Expression::Function(Box::new(Function::new(
30788 "OCTET_LENGTH".to_string(),
30789 vec![blob_cast],
30790 )));
30791 let text_cast = Expression::Cast(Box::new(Cast {
30792 this: arg,
30793 to: DataType::Text,
30794 trailing_comments: vec![],
30795 double_colon_syntax: false,
30796 format: None,
30797 default: None,
30798 inferred_type: None,
30799 }));
30800 let length_text = Expression::Function(Box::new(Function::new(
30801 "LENGTH".to_string(),
30802 vec![text_cast],
30803 )));
30804 Ok(Expression::Case(Box::new(crate::expressions::Case {
30805 operand: Some(typeof_func),
30806 whens: vec![(
30807 Expression::Literal(Literal::String("BLOB".to_string())),
30808 octet_length,
30809 )],
30810 else_: Some(length_text),
30811 comments: Vec::new(),
30812 inferred_type: None,
30813 })))
30814 }
30815 _ => Ok(Expression::Function(Box::new(Function::new(
30816 "LENGTH".to_string(),
30817 vec![arg],
30818 )))),
30819 }
30820 }
30821
30822 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30823 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30824 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30825 // The args should be [x, fraction] with the null handling stripped
30826 // For DuckDB: QUANTILE_CONT(x, fraction)
30827 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30828 match target {
30829 DialectType::DuckDB => {
30830 // Strip down to just 2 args, rename to QUANTILE_CONT
30831 let x = args[0].clone();
30832 let frac = args[1].clone();
30833 Ok(Expression::Function(Box::new(Function::new(
30834 "QUANTILE_CONT".to_string(),
30835 vec![x, frac],
30836 ))))
30837 }
30838 _ => Ok(Expression::Function(Box::new(Function::new(
30839 "PERCENTILE_CONT".to_string(),
30840 args,
30841 )))),
30842 }
30843 }
30844
30845 // All others: pass through
30846 _ => Ok(Expression::Function(Box::new(Function {
30847 name: f.name,
30848 args,
30849 distinct: f.distinct,
30850 trailing_comments: f.trailing_comments,
30851 use_bracket_syntax: f.use_bracket_syntax,
30852 no_parens: f.no_parens,
30853 quoted: f.quoted,
30854 span: None,
30855 inferred_type: None,
30856 }))),
30857 }
30858 }
30859
30860 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30861 /// Returns false for column references and other non-literal expressions where the type is unknown.
30862 fn can_infer_presto_type(expr: &Expression) -> bool {
30863 match expr {
30864 Expression::Literal(_) => true,
30865 Expression::Boolean(_) => true,
30866 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30867 Expression::Struct(_) | Expression::StructFunc(_) => true,
30868 Expression::Function(f) => {
30869 let up = f.name.to_uppercase();
30870 up == "STRUCT"
30871 || up == "ROW"
30872 || up == "CURRENT_DATE"
30873 || up == "CURRENT_TIMESTAMP"
30874 || up == "NOW"
30875 }
30876 Expression::Cast(_) => true,
30877 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30878 _ => false,
30879 }
30880 }
30881
30882 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30883 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30884 use crate::expressions::Literal;
30885 match expr {
30886 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30887 Expression::Literal(Literal::Number(n)) => {
30888 if n.contains('.') {
30889 "DOUBLE".to_string()
30890 } else {
30891 "INTEGER".to_string()
30892 }
30893 }
30894 Expression::Boolean(_) => "BOOLEAN".to_string(),
30895 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30896 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30897 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30898 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30899 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30900 Expression::Function(f) => {
30901 let up = f.name.to_uppercase();
30902 if up == "STRUCT" || up == "ROW" {
30903 "ROW".to_string()
30904 } else if up == "CURRENT_DATE" {
30905 "DATE".to_string()
30906 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30907 "TIMESTAMP".to_string()
30908 } else {
30909 "VARCHAR".to_string()
30910 }
30911 }
30912 Expression::Cast(c) => {
30913 // If already cast, use the target type
30914 Self::data_type_to_presto_string(&c.to)
30915 }
30916 _ => "VARCHAR".to_string(),
30917 }
30918 }
30919
30920 /// Convert a DataType to its Presto/Trino string representation for ROW type
30921 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30922 use crate::expressions::DataType;
30923 match dt {
30924 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30925 "VARCHAR".to_string()
30926 }
30927 DataType::Int { .. }
30928 | DataType::BigInt { .. }
30929 | DataType::SmallInt { .. }
30930 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30931 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30932 DataType::Boolean => "BOOLEAN".to_string(),
30933 DataType::Date => "DATE".to_string(),
30934 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30935 DataType::Struct { fields, .. } => {
30936 let field_strs: Vec<String> = fields
30937 .iter()
30938 .map(|f| {
30939 format!(
30940 "{} {}",
30941 f.name,
30942 Self::data_type_to_presto_string(&f.data_type)
30943 )
30944 })
30945 .collect();
30946 format!("ROW({})", field_strs.join(", "))
30947 }
30948 DataType::Array { element_type, .. } => {
30949 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30950 }
30951 DataType::Custom { name } => {
30952 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30953 name.clone()
30954 }
30955 _ => "VARCHAR".to_string(),
30956 }
30957 }
30958
30959 /// Convert IntervalUnit to string
30960 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30961 match unit {
30962 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30963 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30964 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30965 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30966 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30967 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30968 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30969 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30970 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30971 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30972 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30973 }
30974 }
30975
30976 /// Extract unit string from an expression (uppercased)
30977 fn get_unit_str_static(expr: &Expression) -> String {
30978 use crate::expressions::Literal;
30979 match expr {
30980 Expression::Identifier(id) => id.name.to_uppercase(),
30981 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30982 Expression::Column(col) => col.name.name.to_uppercase(),
30983 Expression::Function(f) => {
30984 let base = f.name.to_uppercase();
30985 if !f.args.is_empty() {
30986 let inner = Self::get_unit_str_static(&f.args[0]);
30987 format!("{}({})", base, inner)
30988 } else {
30989 base
30990 }
30991 }
30992 _ => "DAY".to_string(),
30993 }
30994 }
30995
30996 /// Parse unit string to IntervalUnit
30997 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30998 match s {
30999 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
31000 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
31001 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
31002 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
31003 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
31004 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
31005 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
31006 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
31007 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
31008 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
31009 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
31010 _ => crate::expressions::IntervalUnit::Day,
31011 }
31012 }
31013
31014 /// Convert expression to simple string for interval building
31015 fn expr_to_string_static(expr: &Expression) -> String {
31016 use crate::expressions::Literal;
31017 match expr {
31018 Expression::Literal(Literal::Number(s)) => s.clone(),
31019 Expression::Literal(Literal::String(s)) => s.clone(),
31020 Expression::Identifier(id) => id.name.clone(),
31021 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
31022 _ => "1".to_string(),
31023 }
31024 }
31025
31026 /// Extract a simple string representation from a literal expression
31027 fn expr_to_string(expr: &Expression) -> String {
31028 use crate::expressions::Literal;
31029 match expr {
31030 Expression::Literal(Literal::Number(s)) => s.clone(),
31031 Expression::Literal(Literal::String(s)) => s.clone(),
31032 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
31033 Expression::Identifier(id) => id.name.clone(),
31034 _ => "1".to_string(),
31035 }
31036 }
31037
31038 /// Quote an interval value expression as a string literal if it's a number (or negated number)
31039 fn quote_interval_val(expr: &Expression) -> Expression {
31040 use crate::expressions::Literal;
31041 match expr {
31042 Expression::Literal(Literal::Number(n)) => {
31043 Expression::Literal(Literal::String(n.clone()))
31044 }
31045 Expression::Literal(Literal::String(_)) => expr.clone(),
31046 Expression::Neg(inner) => {
31047 if let Expression::Literal(Literal::Number(n)) = &inner.this {
31048 Expression::Literal(Literal::String(format!("-{}", n)))
31049 } else {
31050 expr.clone()
31051 }
31052 }
31053 _ => expr.clone(),
31054 }
31055 }
31056
31057 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
31058 fn timestamp_string_has_timezone(ts: &str) -> bool {
31059 let trimmed = ts.trim();
31060 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
31061 if let Some(last_space) = trimmed.rfind(' ') {
31062 let suffix = &trimmed[last_space + 1..];
31063 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
31064 let rest = &suffix[1..];
31065 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
31066 return true;
31067 }
31068 }
31069 }
31070 // Check for named timezone abbreviations
31071 let ts_lower = trimmed.to_lowercase();
31072 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
31073 for abbrev in &tz_abbrevs {
31074 if ts_lower.ends_with(abbrev) {
31075 return true;
31076 }
31077 }
31078 false
31079 }
31080
31081 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
31082 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
31083 use crate::expressions::{Cast, DataType, Literal};
31084 match expr {
31085 Expression::Literal(Literal::Timestamp(s)) => {
31086 let tz = func_name.starts_with("TIMESTAMP");
31087 Expression::Cast(Box::new(Cast {
31088 this: Expression::Literal(Literal::String(s)),
31089 to: if tz {
31090 DataType::Timestamp {
31091 timezone: true,
31092 precision: None,
31093 }
31094 } else {
31095 DataType::Timestamp {
31096 timezone: false,
31097 precision: None,
31098 }
31099 },
31100 trailing_comments: vec![],
31101 double_colon_syntax: false,
31102 format: None,
31103 default: None,
31104 inferred_type: None,
31105 }))
31106 }
31107 other => other,
31108 }
31109 }
31110
31111 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
31112 fn maybe_cast_ts(expr: Expression) -> Expression {
31113 use crate::expressions::{Cast, DataType, Literal};
31114 match expr {
31115 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31116 this: Expression::Literal(Literal::String(s)),
31117 to: DataType::Timestamp {
31118 timezone: false,
31119 precision: None,
31120 },
31121 trailing_comments: vec![],
31122 double_colon_syntax: false,
31123 format: None,
31124 default: None,
31125 inferred_type: None,
31126 })),
31127 other => other,
31128 }
31129 }
31130
31131 /// Convert DATE 'x' literal to CAST('x' AS DATE)
31132 fn date_literal_to_cast(expr: Expression) -> Expression {
31133 use crate::expressions::{Cast, DataType, Literal};
31134 match expr {
31135 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
31136 this: Expression::Literal(Literal::String(s)),
31137 to: DataType::Date,
31138 trailing_comments: vec![],
31139 double_colon_syntax: false,
31140 format: None,
31141 default: None,
31142 inferred_type: None,
31143 })),
31144 other => other,
31145 }
31146 }
31147
31148 /// Ensure an expression that should be a date is CAST(... AS DATE).
31149 /// Handles both DATE literals and string literals that look like dates.
31150 fn ensure_cast_date(expr: Expression) -> Expression {
31151 use crate::expressions::{Cast, DataType, Literal};
31152 match expr {
31153 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
31154 this: Expression::Literal(Literal::String(s)),
31155 to: DataType::Date,
31156 trailing_comments: vec![],
31157 double_colon_syntax: false,
31158 format: None,
31159 default: None,
31160 inferred_type: None,
31161 })),
31162 Expression::Literal(Literal::String(ref _s)) => {
31163 // String literal that should be a date -> CAST('s' AS DATE)
31164 Expression::Cast(Box::new(Cast {
31165 this: expr,
31166 to: DataType::Date,
31167 trailing_comments: vec![],
31168 double_colon_syntax: false,
31169 format: None,
31170 default: None,
31171 inferred_type: None,
31172 }))
31173 }
31174 // Already a CAST or other expression -> leave as-is
31175 other => other,
31176 }
31177 }
31178
31179 /// Force CAST(expr AS DATE) for any expression (not just literals)
31180 /// Skips if the expression is already a CAST to DATE
31181 fn force_cast_date(expr: Expression) -> Expression {
31182 use crate::expressions::{Cast, DataType};
31183 // If it's already a CAST to DATE, don't double-wrap
31184 if let Expression::Cast(ref c) = expr {
31185 if matches!(c.to, DataType::Date) {
31186 return expr;
31187 }
31188 }
31189 Expression::Cast(Box::new(Cast {
31190 this: expr,
31191 to: DataType::Date,
31192 trailing_comments: vec![],
31193 double_colon_syntax: false,
31194 format: None,
31195 default: None,
31196 inferred_type: None,
31197 }))
31198 }
31199
31200 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
31201 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
31202 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
31203 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
31204
31205 fn ensure_to_date_preserved(expr: Expression) -> Expression {
31206 use crate::expressions::{Function, Literal};
31207 if matches!(expr, Expression::Literal(Literal::String(_))) {
31208 Expression::Function(Box::new(Function::new(
31209 Self::PRESERVED_TO_DATE.to_string(),
31210 vec![expr],
31211 )))
31212 } else {
31213 expr
31214 }
31215 }
31216
31217 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
31218 fn try_cast_date(expr: Expression) -> Expression {
31219 use crate::expressions::{Cast, DataType};
31220 Expression::TryCast(Box::new(Cast {
31221 this: expr,
31222 to: DataType::Date,
31223 trailing_comments: vec![],
31224 double_colon_syntax: false,
31225 format: None,
31226 default: None,
31227 inferred_type: None,
31228 }))
31229 }
31230
31231 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
31232 fn double_cast_timestamp_date(expr: Expression) -> Expression {
31233 use crate::expressions::{Cast, DataType};
31234 let inner = Expression::Cast(Box::new(Cast {
31235 this: expr,
31236 to: DataType::Timestamp {
31237 timezone: false,
31238 precision: None,
31239 },
31240 trailing_comments: vec![],
31241 double_colon_syntax: false,
31242 format: None,
31243 default: None,
31244 inferred_type: None,
31245 }));
31246 Expression::Cast(Box::new(Cast {
31247 this: inner,
31248 to: DataType::Date,
31249 trailing_comments: vec![],
31250 double_colon_syntax: false,
31251 format: None,
31252 default: None,
31253 inferred_type: None,
31254 }))
31255 }
31256
31257 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
31258 fn double_cast_datetime_date(expr: Expression) -> Expression {
31259 use crate::expressions::{Cast, DataType};
31260 let inner = Expression::Cast(Box::new(Cast {
31261 this: expr,
31262 to: DataType::Custom {
31263 name: "DATETIME".to_string(),
31264 },
31265 trailing_comments: vec![],
31266 double_colon_syntax: false,
31267 format: None,
31268 default: None,
31269 inferred_type: None,
31270 }));
31271 Expression::Cast(Box::new(Cast {
31272 this: inner,
31273 to: DataType::Date,
31274 trailing_comments: vec![],
31275 double_colon_syntax: false,
31276 format: None,
31277 default: None,
31278 inferred_type: None,
31279 }))
31280 }
31281
31282 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
31283 fn double_cast_datetime2_date(expr: Expression) -> Expression {
31284 use crate::expressions::{Cast, DataType};
31285 let inner = Expression::Cast(Box::new(Cast {
31286 this: expr,
31287 to: DataType::Custom {
31288 name: "DATETIME2".to_string(),
31289 },
31290 trailing_comments: vec![],
31291 double_colon_syntax: false,
31292 format: None,
31293 default: None,
31294 inferred_type: None,
31295 }));
31296 Expression::Cast(Box::new(Cast {
31297 this: inner,
31298 to: DataType::Date,
31299 trailing_comments: vec![],
31300 double_colon_syntax: false,
31301 format: None,
31302 default: None,
31303 inferred_type: None,
31304 }))
31305 }
31306
31307 /// Convert Hive/Java-style date format strings to C-style (strftime) format
31308 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
31309 fn hive_format_to_c_format(fmt: &str) -> String {
31310 let mut result = String::new();
31311 let chars: Vec<char> = fmt.chars().collect();
31312 let mut i = 0;
31313 while i < chars.len() {
31314 match chars[i] {
31315 'y' => {
31316 let mut count = 0;
31317 while i < chars.len() && chars[i] == 'y' {
31318 count += 1;
31319 i += 1;
31320 }
31321 if count >= 4 {
31322 result.push_str("%Y");
31323 } else if count == 2 {
31324 result.push_str("%y");
31325 } else {
31326 result.push_str("%Y");
31327 }
31328 }
31329 'M' => {
31330 let mut count = 0;
31331 while i < chars.len() && chars[i] == 'M' {
31332 count += 1;
31333 i += 1;
31334 }
31335 if count >= 3 {
31336 result.push_str("%b");
31337 } else if count == 2 {
31338 result.push_str("%m");
31339 } else {
31340 result.push_str("%m");
31341 }
31342 }
31343 'd' => {
31344 let mut _count = 0;
31345 while i < chars.len() && chars[i] == 'd' {
31346 _count += 1;
31347 i += 1;
31348 }
31349 result.push_str("%d");
31350 }
31351 'H' => {
31352 let mut _count = 0;
31353 while i < chars.len() && chars[i] == 'H' {
31354 _count += 1;
31355 i += 1;
31356 }
31357 result.push_str("%H");
31358 }
31359 'h' => {
31360 let mut _count = 0;
31361 while i < chars.len() && chars[i] == 'h' {
31362 _count += 1;
31363 i += 1;
31364 }
31365 result.push_str("%I");
31366 }
31367 'm' => {
31368 let mut _count = 0;
31369 while i < chars.len() && chars[i] == 'm' {
31370 _count += 1;
31371 i += 1;
31372 }
31373 result.push_str("%M");
31374 }
31375 's' => {
31376 let mut _count = 0;
31377 while i < chars.len() && chars[i] == 's' {
31378 _count += 1;
31379 i += 1;
31380 }
31381 result.push_str("%S");
31382 }
31383 'S' => {
31384 // Fractional seconds - skip
31385 while i < chars.len() && chars[i] == 'S' {
31386 i += 1;
31387 }
31388 result.push_str("%f");
31389 }
31390 'a' => {
31391 // AM/PM
31392 while i < chars.len() && chars[i] == 'a' {
31393 i += 1;
31394 }
31395 result.push_str("%p");
31396 }
31397 'E' => {
31398 let mut count = 0;
31399 while i < chars.len() && chars[i] == 'E' {
31400 count += 1;
31401 i += 1;
31402 }
31403 if count >= 4 {
31404 result.push_str("%A");
31405 } else {
31406 result.push_str("%a");
31407 }
31408 }
31409 '\'' => {
31410 // Quoted literal text - pass through the quotes and content
31411 result.push('\'');
31412 i += 1;
31413 while i < chars.len() && chars[i] != '\'' {
31414 result.push(chars[i]);
31415 i += 1;
31416 }
31417 if i < chars.len() {
31418 result.push('\'');
31419 i += 1;
31420 }
31421 }
31422 c => {
31423 result.push(c);
31424 i += 1;
31425 }
31426 }
31427 }
31428 result
31429 }
31430
31431 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
31432 fn hive_format_to_presto_format(fmt: &str) -> String {
31433 let c_fmt = Self::hive_format_to_c_format(fmt);
31434 // Presto uses %T for HH:MM:SS
31435 c_fmt.replace("%H:%M:%S", "%T")
31436 }
31437
31438 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
31439 fn ensure_cast_timestamp(expr: Expression) -> Expression {
31440 use crate::expressions::{Cast, DataType, Literal};
31441 match expr {
31442 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31443 this: Expression::Literal(Literal::String(s)),
31444 to: DataType::Timestamp {
31445 timezone: false,
31446 precision: None,
31447 },
31448 trailing_comments: vec![],
31449 double_colon_syntax: false,
31450 format: None,
31451 default: None,
31452 inferred_type: None,
31453 })),
31454 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31455 this: expr,
31456 to: DataType::Timestamp {
31457 timezone: false,
31458 precision: None,
31459 },
31460 trailing_comments: vec![],
31461 double_colon_syntax: false,
31462 format: None,
31463 default: None,
31464 inferred_type: None,
31465 })),
31466 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31467 this: Expression::Literal(Literal::String(s)),
31468 to: DataType::Timestamp {
31469 timezone: false,
31470 precision: None,
31471 },
31472 trailing_comments: vec![],
31473 double_colon_syntax: false,
31474 format: None,
31475 default: None,
31476 inferred_type: None,
31477 })),
31478 other => other,
31479 }
31480 }
31481
31482 /// Force CAST to TIMESTAMP for any expression (not just literals)
31483 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
31484 fn force_cast_timestamp(expr: Expression) -> Expression {
31485 use crate::expressions::{Cast, DataType};
31486 // Don't double-wrap if already a CAST to TIMESTAMP
31487 if let Expression::Cast(ref c) = expr {
31488 if matches!(c.to, DataType::Timestamp { .. }) {
31489 return expr;
31490 }
31491 }
31492 Expression::Cast(Box::new(Cast {
31493 this: expr,
31494 to: DataType::Timestamp {
31495 timezone: false,
31496 precision: None,
31497 },
31498 trailing_comments: vec![],
31499 double_colon_syntax: false,
31500 format: None,
31501 default: None,
31502 inferred_type: None,
31503 }))
31504 }
31505
31506 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
31507 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
31508 use crate::expressions::{Cast, DataType, Literal};
31509 match expr {
31510 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31511 this: Expression::Literal(Literal::String(s)),
31512 to: DataType::Timestamp {
31513 timezone: true,
31514 precision: None,
31515 },
31516 trailing_comments: vec![],
31517 double_colon_syntax: false,
31518 format: None,
31519 default: None,
31520 inferred_type: None,
31521 })),
31522 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31523 this: expr,
31524 to: DataType::Timestamp {
31525 timezone: true,
31526 precision: None,
31527 },
31528 trailing_comments: vec![],
31529 double_colon_syntax: false,
31530 format: None,
31531 default: None,
31532 inferred_type: None,
31533 })),
31534 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31535 this: Expression::Literal(Literal::String(s)),
31536 to: DataType::Timestamp {
31537 timezone: true,
31538 precision: None,
31539 },
31540 trailing_comments: vec![],
31541 double_colon_syntax: false,
31542 format: None,
31543 default: None,
31544 inferred_type: None,
31545 })),
31546 other => other,
31547 }
31548 }
31549
31550 /// Ensure expression is CAST to DATETIME (for BigQuery)
31551 fn ensure_cast_datetime(expr: Expression) -> Expression {
31552 use crate::expressions::{Cast, DataType, Literal};
31553 match expr {
31554 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31555 this: expr,
31556 to: DataType::Custom {
31557 name: "DATETIME".to_string(),
31558 },
31559 trailing_comments: vec![],
31560 double_colon_syntax: false,
31561 format: None,
31562 default: None,
31563 inferred_type: None,
31564 })),
31565 other => other,
31566 }
31567 }
31568
31569 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
31570 fn force_cast_datetime(expr: Expression) -> Expression {
31571 use crate::expressions::{Cast, DataType};
31572 if let Expression::Cast(ref c) = expr {
31573 if let DataType::Custom { ref name } = c.to {
31574 if name.eq_ignore_ascii_case("DATETIME") {
31575 return expr;
31576 }
31577 }
31578 }
31579 Expression::Cast(Box::new(Cast {
31580 this: expr,
31581 to: DataType::Custom {
31582 name: "DATETIME".to_string(),
31583 },
31584 trailing_comments: vec![],
31585 double_colon_syntax: false,
31586 format: None,
31587 default: None,
31588 inferred_type: None,
31589 }))
31590 }
31591
31592 /// Ensure expression is CAST to DATETIME2 (for TSQL)
31593 fn ensure_cast_datetime2(expr: Expression) -> Expression {
31594 use crate::expressions::{Cast, DataType, Literal};
31595 match expr {
31596 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31597 this: expr,
31598 to: DataType::Custom {
31599 name: "DATETIME2".to_string(),
31600 },
31601 trailing_comments: vec![],
31602 double_colon_syntax: false,
31603 format: None,
31604 default: None,
31605 inferred_type: None,
31606 })),
31607 other => other,
31608 }
31609 }
31610
31611 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31612 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31613 use crate::expressions::{Cast, DataType, Literal};
31614 match expr {
31615 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31616 this: Expression::Literal(Literal::String(s)),
31617 to: DataType::Timestamp {
31618 timezone: true,
31619 precision: None,
31620 },
31621 trailing_comments: vec![],
31622 double_colon_syntax: false,
31623 format: None,
31624 default: None,
31625 inferred_type: None,
31626 })),
31627 other => other,
31628 }
31629 }
31630
31631 /// Convert BigQuery format string to Snowflake format string
31632 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31633 use crate::expressions::Literal;
31634 if let Expression::Literal(Literal::String(s)) = format_expr {
31635 let sf = s
31636 .replace("%Y", "yyyy")
31637 .replace("%m", "mm")
31638 .replace("%d", "DD")
31639 .replace("%H", "HH24")
31640 .replace("%M", "MI")
31641 .replace("%S", "SS")
31642 .replace("%b", "mon")
31643 .replace("%B", "Month")
31644 .replace("%e", "FMDD");
31645 Expression::Literal(Literal::String(sf))
31646 } else {
31647 format_expr.clone()
31648 }
31649 }
31650
31651 /// Convert BigQuery format string to DuckDB format string
31652 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31653 use crate::expressions::Literal;
31654 if let Expression::Literal(Literal::String(s)) = format_expr {
31655 let duck = s
31656 .replace("%T", "%H:%M:%S")
31657 .replace("%F", "%Y-%m-%d")
31658 .replace("%D", "%m/%d/%y")
31659 .replace("%x", "%m/%d/%y")
31660 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31661 .replace("%e", "%-d")
31662 .replace("%E6S", "%S.%f");
31663 Expression::Literal(Literal::String(duck))
31664 } else {
31665 format_expr.clone()
31666 }
31667 }
31668
31669 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31670 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31671 use crate::expressions::Literal;
31672 if let Expression::Literal(Literal::String(s)) = format_expr {
31673 // Replace format elements from longest to shortest to avoid partial matches
31674 let result = s
31675 .replace("YYYYMMDD", "%Y%m%d")
31676 .replace("YYYY", "%Y")
31677 .replace("YY", "%y")
31678 .replace("MONTH", "%B")
31679 .replace("MON", "%b")
31680 .replace("MM", "%m")
31681 .replace("DD", "%d")
31682 .replace("HH24", "%H")
31683 .replace("HH12", "%I")
31684 .replace("HH", "%I")
31685 .replace("MI", "%M")
31686 .replace("SSTZH", "%S%z")
31687 .replace("SS", "%S")
31688 .replace("TZH", "%z");
31689 Expression::Literal(Literal::String(result))
31690 } else {
31691 format_expr.clone()
31692 }
31693 }
31694
31695 /// Normalize BigQuery format strings for BQ->BQ output
31696 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31697 use crate::expressions::Literal;
31698 if let Expression::Literal(Literal::String(s)) = format_expr {
31699 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31700 Expression::Literal(Literal::String(norm))
31701 } else {
31702 format_expr.clone()
31703 }
31704 }
31705}
31706
31707#[cfg(test)]
31708mod tests {
31709 use super::*;
31710
31711 #[test]
31712 fn test_dialect_type_from_str() {
31713 assert_eq!(
31714 "postgres".parse::<DialectType>().unwrap(),
31715 DialectType::PostgreSQL
31716 );
31717 assert_eq!(
31718 "postgresql".parse::<DialectType>().unwrap(),
31719 DialectType::PostgreSQL
31720 );
31721 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31722 assert_eq!(
31723 "bigquery".parse::<DialectType>().unwrap(),
31724 DialectType::BigQuery
31725 );
31726 }
31727
31728 #[test]
31729 fn test_basic_transpile() {
31730 let dialect = Dialect::get(DialectType::Generic);
31731 let result = dialect
31732 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31733 .unwrap();
31734 assert_eq!(result.len(), 1);
31735 assert_eq!(result[0], "SELECT 1");
31736 }
31737
31738 #[test]
31739 fn test_function_transformation_mysql() {
31740 // NVL should be transformed to IFNULL in MySQL
31741 let dialect = Dialect::get(DialectType::Generic);
31742 let result = dialect
31743 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31744 .unwrap();
31745 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31746 }
31747
31748 #[test]
31749 fn test_get_path_duckdb() {
31750 // Test: step by step
31751 let snowflake = Dialect::get(DialectType::Snowflake);
31752
31753 // Step 1: Parse and check what Snowflake produces as intermediate
31754 let result_sf_sf = snowflake
31755 .transpile_to(
31756 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31757 DialectType::Snowflake,
31758 )
31759 .unwrap();
31760 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31761
31762 // Step 2: DuckDB target
31763 let result_sf_dk = snowflake
31764 .transpile_to(
31765 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31766 DialectType::DuckDB,
31767 )
31768 .unwrap();
31769 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31770
31771 // Step 3: GET_PATH directly
31772 let result_gp = snowflake
31773 .transpile_to(
31774 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31775 DialectType::DuckDB,
31776 )
31777 .unwrap();
31778 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31779 }
31780
31781 #[test]
31782 fn test_function_transformation_postgres() {
31783 // IFNULL should be transformed to COALESCE in PostgreSQL
31784 let dialect = Dialect::get(DialectType::Generic);
31785 let result = dialect
31786 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31787 .unwrap();
31788 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31789
31790 // NVL should also be transformed to COALESCE
31791 let result = dialect
31792 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31793 .unwrap();
31794 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31795 }
31796
31797 #[test]
31798 fn test_hive_cast_to_trycast() {
31799 // Hive CAST should become TRY_CAST for targets that support it
31800 let hive = Dialect::get(DialectType::Hive);
31801 let result = hive
31802 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31803 .unwrap();
31804 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31805
31806 let result = hive
31807 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31808 .unwrap();
31809 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31810 }
31811
31812 #[test]
31813 fn test_hive_array_identity() {
31814 // Hive ARRAY<DATE> should preserve angle bracket syntax
31815 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31816 let hive = Dialect::get(DialectType::Hive);
31817
31818 // Test via transpile_to (this works)
31819 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31820 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31821 assert!(
31822 result[0].contains("ARRAY<DATE>"),
31823 "transpile_to: Expected ARRAY<DATE>, got: {}",
31824 result[0]
31825 );
31826
31827 // Test via parse -> transform -> generate (identity test path)
31828 let ast = hive.parse(sql).unwrap();
31829 let transformed = hive.transform(ast[0].clone()).unwrap();
31830 let output = hive.generate(&transformed).unwrap();
31831 eprintln!("Hive ARRAY via identity path: {}", output);
31832 assert!(
31833 output.contains("ARRAY<DATE>"),
31834 "identity path: Expected ARRAY<DATE>, got: {}",
31835 output
31836 );
31837 }
31838
31839 #[test]
31840 fn test_starrocks_delete_between_expansion() {
31841 // StarRocks doesn't support BETWEEN in DELETE statements
31842 let dialect = Dialect::get(DialectType::Generic);
31843
31844 // BETWEEN should be expanded to >= AND <= in DELETE
31845 let result = dialect
31846 .transpile_to(
31847 "DELETE FROM t WHERE a BETWEEN b AND c",
31848 DialectType::StarRocks,
31849 )
31850 .unwrap();
31851 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31852
31853 // NOT BETWEEN should be expanded to < OR > in DELETE
31854 let result = dialect
31855 .transpile_to(
31856 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31857 DialectType::StarRocks,
31858 )
31859 .unwrap();
31860 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31861
31862 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31863 let result = dialect
31864 .transpile_to(
31865 "SELECT * FROM t WHERE a BETWEEN b AND c",
31866 DialectType::StarRocks,
31867 )
31868 .unwrap();
31869 assert!(
31870 result[0].contains("BETWEEN"),
31871 "BETWEEN should be preserved in SELECT"
31872 );
31873 }
31874
31875 #[test]
31876 fn test_snowflake_ltrim_rtrim_parse() {
31877 let sf = Dialect::get(DialectType::Snowflake);
31878 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31879 let result = sf.transpile_to(sql, DialectType::DuckDB);
31880 match &result {
31881 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31882 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31883 }
31884 assert!(
31885 result.is_ok(),
31886 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31887 result.err()
31888 );
31889 }
31890
31891 #[test]
31892 fn test_duckdb_count_if_parse() {
31893 let duck = Dialect::get(DialectType::DuckDB);
31894 let sql = "COUNT_IF(x)";
31895 let result = duck.transpile_to(sql, DialectType::DuckDB);
31896 match &result {
31897 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31898 Err(e) => eprintln!("COUNT_IF error: {}", e),
31899 }
31900 assert!(
31901 result.is_ok(),
31902 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31903 result.err()
31904 );
31905 }
31906
31907 #[test]
31908 fn test_tsql_cast_tinyint_parse() {
31909 let tsql = Dialect::get(DialectType::TSQL);
31910 let sql = "CAST(X AS TINYINT)";
31911 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31912 match &result {
31913 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31914 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31915 }
31916 assert!(
31917 result.is_ok(),
31918 "Expected successful transpile, got error: {:?}",
31919 result.err()
31920 );
31921 }
31922
31923 #[test]
31924 fn test_pg_hash_bitwise_xor() {
31925 let dialect = Dialect::get(DialectType::PostgreSQL);
31926 let result = dialect
31927 .transpile_to("x # y", DialectType::PostgreSQL)
31928 .unwrap();
31929 assert_eq!(result[0], "x # y");
31930 }
31931
31932 #[test]
31933 fn test_pg_array_to_duckdb() {
31934 let dialect = Dialect::get(DialectType::PostgreSQL);
31935 let result = dialect
31936 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31937 .unwrap();
31938 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31939 }
31940
31941 #[test]
31942 fn test_array_remove_bigquery() {
31943 let dialect = Dialect::get(DialectType::Generic);
31944 let result = dialect
31945 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31946 .unwrap();
31947 assert_eq!(
31948 result[0],
31949 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31950 );
31951 }
31952
31953 #[test]
31954 fn test_map_clickhouse_case() {
31955 let dialect = Dialect::get(DialectType::Generic);
31956 let parsed = dialect
31957 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31958 .unwrap();
31959 eprintln!("MAP parsed: {:?}", parsed);
31960 let result = dialect
31961 .transpile_to(
31962 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31963 DialectType::ClickHouse,
31964 )
31965 .unwrap();
31966 eprintln!("MAP result: {}", result[0]);
31967 }
31968
31969 #[test]
31970 fn test_generate_date_array_presto() {
31971 let dialect = Dialect::get(DialectType::Generic);
31972 let result = dialect.transpile_to(
31973 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31974 DialectType::Presto,
31975 ).unwrap();
31976 eprintln!("GDA -> Presto: {}", result[0]);
31977 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31978 }
31979
31980 #[test]
31981 fn test_generate_date_array_postgres() {
31982 let dialect = Dialect::get(DialectType::Generic);
31983 let result = dialect.transpile_to(
31984 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31985 DialectType::PostgreSQL,
31986 ).unwrap();
31987 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31988 }
31989
31990 #[test]
31991 fn test_generate_date_array_snowflake() {
31992 std::thread::Builder::new()
31993 .stack_size(16 * 1024 * 1024)
31994 .spawn(|| {
31995 let dialect = Dialect::get(DialectType::Generic);
31996 let result = dialect.transpile_to(
31997 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31998 DialectType::Snowflake,
31999 ).unwrap();
32000 eprintln!("GDA -> Snowflake: {}", result[0]);
32001 })
32002 .unwrap()
32003 .join()
32004 .unwrap();
32005 }
32006
32007 #[test]
32008 fn test_array_length_generate_date_array_snowflake() {
32009 let dialect = Dialect::get(DialectType::Generic);
32010 let result = dialect.transpile_to(
32011 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
32012 DialectType::Snowflake,
32013 ).unwrap();
32014 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
32015 }
32016
32017 #[test]
32018 fn test_generate_date_array_mysql() {
32019 let dialect = Dialect::get(DialectType::Generic);
32020 let result = dialect.transpile_to(
32021 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
32022 DialectType::MySQL,
32023 ).unwrap();
32024 eprintln!("GDA -> MySQL: {}", result[0]);
32025 }
32026
32027 #[test]
32028 fn test_generate_date_array_redshift() {
32029 let dialect = Dialect::get(DialectType::Generic);
32030 let result = dialect.transpile_to(
32031 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
32032 DialectType::Redshift,
32033 ).unwrap();
32034 eprintln!("GDA -> Redshift: {}", result[0]);
32035 }
32036
32037 #[test]
32038 fn test_generate_date_array_tsql() {
32039 let dialect = Dialect::get(DialectType::Generic);
32040 let result = dialect.transpile_to(
32041 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
32042 DialectType::TSQL,
32043 ).unwrap();
32044 eprintln!("GDA -> TSQL: {}", result[0]);
32045 }
32046
32047 #[test]
32048 fn test_struct_colon_syntax() {
32049 let dialect = Dialect::get(DialectType::Generic);
32050 // Test without colon first
32051 let result = dialect.transpile_to(
32052 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
32053 DialectType::ClickHouse,
32054 );
32055 match result {
32056 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
32057 Err(e) => eprintln!("STRUCT no colon error: {}", e),
32058 }
32059 // Now test with colon
32060 let result = dialect.transpile_to(
32061 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
32062 DialectType::ClickHouse,
32063 );
32064 match result {
32065 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
32066 Err(e) => eprintln!("STRUCT colon error: {}", e),
32067 }
32068 }
32069
32070 #[test]
32071 fn test_generate_date_array_cte_wrapped_mysql() {
32072 let dialect = Dialect::get(DialectType::Generic);
32073 let result = dialect.transpile_to(
32074 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
32075 DialectType::MySQL,
32076 ).unwrap();
32077 eprintln!("GDA CTE -> MySQL: {}", result[0]);
32078 }
32079
32080 #[test]
32081 fn test_generate_date_array_cte_wrapped_tsql() {
32082 let dialect = Dialect::get(DialectType::Generic);
32083 let result = dialect.transpile_to(
32084 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
32085 DialectType::TSQL,
32086 ).unwrap();
32087 eprintln!("GDA CTE -> TSQL: {}", result[0]);
32088 }
32089
32090 #[test]
32091 fn test_decode_literal_no_null_check() {
32092 // Oracle DECODE with all literals should produce simple equality, no IS NULL
32093 let dialect = Dialect::get(DialectType::Oracle);
32094 let result = dialect
32095 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
32096 .unwrap();
32097 assert_eq!(
32098 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
32099 "Literal DECODE should not have IS NULL checks"
32100 );
32101 }
32102
32103 #[test]
32104 fn test_decode_column_vs_literal_no_null_check() {
32105 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
32106 let dialect = Dialect::get(DialectType::Oracle);
32107 let result = dialect
32108 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
32109 .unwrap();
32110 assert_eq!(
32111 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
32112 "Column vs literal DECODE should not have IS NULL checks"
32113 );
32114 }
32115
32116 #[test]
32117 fn test_decode_column_vs_column_keeps_null_check() {
32118 // Oracle DECODE with column vs column should keep null-safe comparison
32119 let dialect = Dialect::get(DialectType::Oracle);
32120 let result = dialect
32121 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
32122 .unwrap();
32123 assert!(
32124 result[0].contains("IS NULL"),
32125 "Column vs column DECODE should have IS NULL checks, got: {}",
32126 result[0]
32127 );
32128 }
32129
32130 #[test]
32131 fn test_decode_null_search() {
32132 // Oracle DECODE with NULL search should use IS NULL
32133 let dialect = Dialect::get(DialectType::Oracle);
32134 let result = dialect
32135 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
32136 .unwrap();
32137 assert_eq!(
32138 result[0],
32139 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
32140 );
32141 }
32142}