polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // First recursively transform children, then apply the transform function
563 let expr = match expr {
564 Expression::Select(mut select) => {
565 select.expressions = select
566 .expressions
567 .into_iter()
568 .map(|e| transform_recursive(e, transform_fn))
569 .collect::<Result<Vec<_>>>()?;
570
571 // Transform FROM clause
572 if let Some(mut from) = select.from.take() {
573 from.expressions = from
574 .expressions
575 .into_iter()
576 .map(|e| transform_recursive(e, transform_fn))
577 .collect::<Result<Vec<_>>>()?;
578 select.from = Some(from);
579 }
580
581 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
582 select.joins = select
583 .joins
584 .into_iter()
585 .map(|mut join| {
586 join.this = transform_recursive(join.this, transform_fn)?;
587 if let Some(on) = join.on.take() {
588 join.on = Some(transform_recursive(on, transform_fn)?);
589 }
590 // Wrap join in Expression::Join to allow transform_fn to transform it
591 match transform_fn(Expression::Join(Box::new(join)))? {
592 Expression::Join(j) => Ok(*j),
593 _ => Err(crate::error::Error::parse(
594 "Join transformation returned non-join expression",
595 0,
596 0,
597 0,
598 0,
599 )),
600 }
601 })
602 .collect::<Result<Vec<_>>>()?;
603
604 // Transform LATERAL VIEW expressions (Hive/Spark)
605 select.lateral_views = select
606 .lateral_views
607 .into_iter()
608 .map(|mut lv| {
609 lv.this = transform_recursive(lv.this, transform_fn)?;
610 Ok(lv)
611 })
612 .collect::<Result<Vec<_>>>()?;
613
614 // Transform WHERE clause
615 if let Some(mut where_clause) = select.where_clause.take() {
616 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
617 select.where_clause = Some(where_clause);
618 }
619
620 // Transform GROUP BY
621 if let Some(mut group_by) = select.group_by.take() {
622 group_by.expressions = group_by
623 .expressions
624 .into_iter()
625 .map(|e| transform_recursive(e, transform_fn))
626 .collect::<Result<Vec<_>>>()?;
627 select.group_by = Some(group_by);
628 }
629
630 // Transform HAVING
631 if let Some(mut having) = select.having.take() {
632 having.this = transform_recursive(having.this, transform_fn)?;
633 select.having = Some(having);
634 }
635
636 // Transform WITH (CTEs)
637 if let Some(mut with) = select.with.take() {
638 with.ctes = with
639 .ctes
640 .into_iter()
641 .map(|mut cte| {
642 let original = cte.this.clone();
643 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
644 cte
645 })
646 .collect();
647 select.with = Some(with);
648 }
649
650 // Transform ORDER BY
651 if let Some(mut order) = select.order_by.take() {
652 order.expressions = order
653 .expressions
654 .into_iter()
655 .map(|o| {
656 let mut o = o;
657 let original = o.this.clone();
658 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
659 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
660 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
661 Ok(Expression::Ordered(transformed)) => *transformed,
662 Ok(_) | Err(_) => o,
663 }
664 })
665 .collect();
666 select.order_by = Some(order);
667 }
668
669 // Transform WINDOW clause order_by
670 if let Some(ref mut windows) = select.windows {
671 for nw in windows.iter_mut() {
672 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
673 .into_iter()
674 .map(|o| {
675 let mut o = o;
676 let original = o.this.clone();
677 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
678 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
679 Ok(Expression::Ordered(transformed)) => *transformed,
680 Ok(_) | Err(_) => o,
681 }
682 })
683 .collect();
684 }
685 }
686
687 // Transform QUALIFY
688 if let Some(mut qual) = select.qualify.take() {
689 qual.this = transform_recursive(qual.this, transform_fn)?;
690 select.qualify = Some(qual);
691 }
692
693 Expression::Select(select)
694 }
695 Expression::Function(mut f) => {
696 f.args = f
697 .args
698 .into_iter()
699 .map(|e| transform_recursive(e, transform_fn))
700 .collect::<Result<Vec<_>>>()?;
701 Expression::Function(f)
702 }
703 Expression::AggregateFunction(mut f) => {
704 f.args = f
705 .args
706 .into_iter()
707 .map(|e| transform_recursive(e, transform_fn))
708 .collect::<Result<Vec<_>>>()?;
709 if let Some(filter) = f.filter {
710 f.filter = Some(transform_recursive(filter, transform_fn)?);
711 }
712 Expression::AggregateFunction(f)
713 }
714 Expression::WindowFunction(mut wf) => {
715 wf.this = transform_recursive(wf.this, transform_fn)?;
716 wf.over.partition_by = wf
717 .over
718 .partition_by
719 .into_iter()
720 .map(|e| transform_recursive(e, transform_fn))
721 .collect::<Result<Vec<_>>>()?;
722 // Transform order_by items through Expression::Ordered wrapper
723 wf.over.order_by = wf
724 .over
725 .order_by
726 .into_iter()
727 .map(|o| {
728 let mut o = o;
729 o.this = transform_recursive(o.this, transform_fn)?;
730 match transform_fn(Expression::Ordered(Box::new(o)))? {
731 Expression::Ordered(transformed) => Ok(*transformed),
732 _ => Err(crate::error::Error::parse(
733 "Ordered transformation returned non-Ordered expression",
734 0,
735 0,
736 0,
737 0,
738 )),
739 }
740 })
741 .collect::<Result<Vec<_>>>()?;
742 Expression::WindowFunction(wf)
743 }
744 Expression::Alias(mut a) => {
745 a.this = transform_recursive(a.this, transform_fn)?;
746 Expression::Alias(a)
747 }
748 Expression::Cast(mut c) => {
749 c.this = transform_recursive(c.this, transform_fn)?;
750 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
751 c.to = transform_data_type_recursive(c.to, transform_fn)?;
752 Expression::Cast(c)
753 }
754 Expression::And(op) => transform_binary!(And, *op),
755 Expression::Or(op) => transform_binary!(Or, *op),
756 Expression::Add(op) => transform_binary!(Add, *op),
757 Expression::Sub(op) => transform_binary!(Sub, *op),
758 Expression::Mul(op) => transform_binary!(Mul, *op),
759 Expression::Div(op) => transform_binary!(Div, *op),
760 Expression::Eq(op) => transform_binary!(Eq, *op),
761 Expression::Lt(op) => transform_binary!(Lt, *op),
762 Expression::Gt(op) => transform_binary!(Gt, *op),
763 Expression::Paren(mut p) => {
764 p.this = transform_recursive(p.this, transform_fn)?;
765 Expression::Paren(p)
766 }
767 Expression::Coalesce(mut f) => {
768 f.expressions = f
769 .expressions
770 .into_iter()
771 .map(|e| transform_recursive(e, transform_fn))
772 .collect::<Result<Vec<_>>>()?;
773 Expression::Coalesce(f)
774 }
775 Expression::IfNull(mut f) => {
776 f.this = transform_recursive(f.this, transform_fn)?;
777 f.expression = transform_recursive(f.expression, transform_fn)?;
778 Expression::IfNull(f)
779 }
780 Expression::Nvl(mut f) => {
781 f.this = transform_recursive(f.this, transform_fn)?;
782 f.expression = transform_recursive(f.expression, transform_fn)?;
783 Expression::Nvl(f)
784 }
785 Expression::In(mut i) => {
786 i.this = transform_recursive(i.this, transform_fn)?;
787 i.expressions = i
788 .expressions
789 .into_iter()
790 .map(|e| transform_recursive(e, transform_fn))
791 .collect::<Result<Vec<_>>>()?;
792 if let Some(query) = i.query {
793 i.query = Some(transform_recursive(query, transform_fn)?);
794 }
795 Expression::In(i)
796 }
797 Expression::Not(mut n) => {
798 n.this = transform_recursive(n.this, transform_fn)?;
799 Expression::Not(n)
800 }
801 Expression::ArraySlice(mut s) => {
802 s.this = transform_recursive(s.this, transform_fn)?;
803 if let Some(start) = s.start {
804 s.start = Some(transform_recursive(start, transform_fn)?);
805 }
806 if let Some(end) = s.end {
807 s.end = Some(transform_recursive(end, transform_fn)?);
808 }
809 Expression::ArraySlice(s)
810 }
811 Expression::Subscript(mut s) => {
812 s.this = transform_recursive(s.this, transform_fn)?;
813 s.index = transform_recursive(s.index, transform_fn)?;
814 Expression::Subscript(s)
815 }
816 Expression::Array(mut a) => {
817 a.expressions = a
818 .expressions
819 .into_iter()
820 .map(|e| transform_recursive(e, transform_fn))
821 .collect::<Result<Vec<_>>>()?;
822 Expression::Array(a)
823 }
824 Expression::Struct(mut s) => {
825 let mut new_fields = Vec::new();
826 for (name, expr) in s.fields {
827 let transformed = transform_recursive(expr, transform_fn)?;
828 new_fields.push((name, transformed));
829 }
830 s.fields = new_fields;
831 Expression::Struct(s)
832 }
833 Expression::NamedArgument(mut na) => {
834 na.value = transform_recursive(na.value, transform_fn)?;
835 Expression::NamedArgument(na)
836 }
837 Expression::MapFunc(mut m) => {
838 m.keys = m
839 .keys
840 .into_iter()
841 .map(|e| transform_recursive(e, transform_fn))
842 .collect::<Result<Vec<_>>>()?;
843 m.values = m
844 .values
845 .into_iter()
846 .map(|e| transform_recursive(e, transform_fn))
847 .collect::<Result<Vec<_>>>()?;
848 Expression::MapFunc(m)
849 }
850 Expression::ArrayFunc(mut a) => {
851 a.expressions = a
852 .expressions
853 .into_iter()
854 .map(|e| transform_recursive(e, transform_fn))
855 .collect::<Result<Vec<_>>>()?;
856 Expression::ArrayFunc(a)
857 }
858 Expression::Lambda(mut l) => {
859 l.body = transform_recursive(l.body, transform_fn)?;
860 Expression::Lambda(l)
861 }
862 Expression::JsonExtract(mut f) => {
863 f.this = transform_recursive(f.this, transform_fn)?;
864 f.path = transform_recursive(f.path, transform_fn)?;
865 Expression::JsonExtract(f)
866 }
867 Expression::JsonExtractScalar(mut f) => {
868 f.this = transform_recursive(f.this, transform_fn)?;
869 f.path = transform_recursive(f.path, transform_fn)?;
870 Expression::JsonExtractScalar(f)
871 }
872
873 // ===== UnaryFunc-based expressions =====
874 // These all have a single `this: Expression` child
875 Expression::Length(mut f) => {
876 f.this = transform_recursive(f.this, transform_fn)?;
877 Expression::Length(f)
878 }
879 Expression::Upper(mut f) => {
880 f.this = transform_recursive(f.this, transform_fn)?;
881 Expression::Upper(f)
882 }
883 Expression::Lower(mut f) => {
884 f.this = transform_recursive(f.this, transform_fn)?;
885 Expression::Lower(f)
886 }
887 Expression::LTrim(mut f) => {
888 f.this = transform_recursive(f.this, transform_fn)?;
889 Expression::LTrim(f)
890 }
891 Expression::RTrim(mut f) => {
892 f.this = transform_recursive(f.this, transform_fn)?;
893 Expression::RTrim(f)
894 }
895 Expression::Reverse(mut f) => {
896 f.this = transform_recursive(f.this, transform_fn)?;
897 Expression::Reverse(f)
898 }
899 Expression::Abs(mut f) => {
900 f.this = transform_recursive(f.this, transform_fn)?;
901 Expression::Abs(f)
902 }
903 Expression::Ceil(mut f) => {
904 f.this = transform_recursive(f.this, transform_fn)?;
905 Expression::Ceil(f)
906 }
907 Expression::Floor(mut f) => {
908 f.this = transform_recursive(f.this, transform_fn)?;
909 Expression::Floor(f)
910 }
911 Expression::Sign(mut f) => {
912 f.this = transform_recursive(f.this, transform_fn)?;
913 Expression::Sign(f)
914 }
915 Expression::Sqrt(mut f) => {
916 f.this = transform_recursive(f.this, transform_fn)?;
917 Expression::Sqrt(f)
918 }
919 Expression::Cbrt(mut f) => {
920 f.this = transform_recursive(f.this, transform_fn)?;
921 Expression::Cbrt(f)
922 }
923 Expression::Ln(mut f) => {
924 f.this = transform_recursive(f.this, transform_fn)?;
925 Expression::Ln(f)
926 }
927 Expression::Log(mut f) => {
928 f.this = transform_recursive(f.this, transform_fn)?;
929 if let Some(base) = f.base {
930 f.base = Some(transform_recursive(base, transform_fn)?);
931 }
932 Expression::Log(f)
933 }
934 Expression::Exp(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Exp(f)
937 }
938 Expression::Date(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Date(f)
941 }
942 Expression::Stddev(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 Expression::Stddev(f)
945 }
946 Expression::Variance(mut f) => {
947 f.this = transform_recursive(f.this, transform_fn)?;
948 Expression::Variance(f)
949 }
950
951 // ===== BinaryFunc-based expressions =====
952 Expression::ModFunc(mut f) => {
953 f.this = transform_recursive(f.this, transform_fn)?;
954 f.expression = transform_recursive(f.expression, transform_fn)?;
955 Expression::ModFunc(f)
956 }
957 Expression::Power(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 f.expression = transform_recursive(f.expression, transform_fn)?;
960 Expression::Power(f)
961 }
962 Expression::MapFromArrays(mut f) => {
963 f.this = transform_recursive(f.this, transform_fn)?;
964 f.expression = transform_recursive(f.expression, transform_fn)?;
965 Expression::MapFromArrays(f)
966 }
967 Expression::ElementAt(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ElementAt(f)
971 }
972 Expression::MapContainsKey(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::MapContainsKey(f)
976 }
977 Expression::Left(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.length = transform_recursive(f.length, transform_fn)?;
980 Expression::Left(f)
981 }
982 Expression::Right(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.length = transform_recursive(f.length, transform_fn)?;
985 Expression::Right(f)
986 }
987 Expression::Repeat(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.times = transform_recursive(f.times, transform_fn)?;
990 Expression::Repeat(f)
991 }
992
993 // ===== Complex function expressions =====
994 Expression::Substring(mut f) => {
995 f.this = transform_recursive(f.this, transform_fn)?;
996 f.start = transform_recursive(f.start, transform_fn)?;
997 if let Some(len) = f.length {
998 f.length = Some(transform_recursive(len, transform_fn)?);
999 }
1000 Expression::Substring(f)
1001 }
1002 Expression::Replace(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.old = transform_recursive(f.old, transform_fn)?;
1005 f.new = transform_recursive(f.new, transform_fn)?;
1006 Expression::Replace(f)
1007 }
1008 Expression::ConcatWs(mut f) => {
1009 f.separator = transform_recursive(f.separator, transform_fn)?;
1010 f.expressions = f
1011 .expressions
1012 .into_iter()
1013 .map(|e| transform_recursive(e, transform_fn))
1014 .collect::<Result<Vec<_>>>()?;
1015 Expression::ConcatWs(f)
1016 }
1017 Expression::Trim(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 if let Some(chars) = f.characters {
1020 f.characters = Some(transform_recursive(chars, transform_fn)?);
1021 }
1022 Expression::Trim(f)
1023 }
1024 Expression::Split(mut f) => {
1025 f.this = transform_recursive(f.this, transform_fn)?;
1026 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1027 Expression::Split(f)
1028 }
1029 Expression::Lpad(mut f) => {
1030 f.this = transform_recursive(f.this, transform_fn)?;
1031 f.length = transform_recursive(f.length, transform_fn)?;
1032 if let Some(fill) = f.fill {
1033 f.fill = Some(transform_recursive(fill, transform_fn)?);
1034 }
1035 Expression::Lpad(f)
1036 }
1037 Expression::Rpad(mut f) => {
1038 f.this = transform_recursive(f.this, transform_fn)?;
1039 f.length = transform_recursive(f.length, transform_fn)?;
1040 if let Some(fill) = f.fill {
1041 f.fill = Some(transform_recursive(fill, transform_fn)?);
1042 }
1043 Expression::Rpad(f)
1044 }
1045
1046 // ===== Conditional expressions =====
1047 Expression::Case(mut c) => {
1048 if let Some(operand) = c.operand {
1049 c.operand = Some(transform_recursive(operand, transform_fn)?);
1050 }
1051 c.whens = c
1052 .whens
1053 .into_iter()
1054 .map(|(cond, then)| {
1055 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1056 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1057 (new_cond, new_then)
1058 })
1059 .collect();
1060 if let Some(else_expr) = c.else_ {
1061 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1062 }
1063 Expression::Case(c)
1064 }
1065 Expression::IfFunc(mut f) => {
1066 f.condition = transform_recursive(f.condition, transform_fn)?;
1067 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1068 if let Some(false_val) = f.false_value {
1069 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1070 }
1071 Expression::IfFunc(f)
1072 }
1073
1074 // ===== Date/Time expressions =====
1075 Expression::DateAdd(mut f) => {
1076 f.this = transform_recursive(f.this, transform_fn)?;
1077 f.interval = transform_recursive(f.interval, transform_fn)?;
1078 Expression::DateAdd(f)
1079 }
1080 Expression::DateSub(mut f) => {
1081 f.this = transform_recursive(f.this, transform_fn)?;
1082 f.interval = transform_recursive(f.interval, transform_fn)?;
1083 Expression::DateSub(f)
1084 }
1085 Expression::DateDiff(mut f) => {
1086 f.this = transform_recursive(f.this, transform_fn)?;
1087 f.expression = transform_recursive(f.expression, transform_fn)?;
1088 Expression::DateDiff(f)
1089 }
1090 Expression::DateTrunc(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 Expression::DateTrunc(f)
1093 }
1094 Expression::Extract(mut f) => {
1095 f.this = transform_recursive(f.this, transform_fn)?;
1096 Expression::Extract(f)
1097 }
1098
1099 // ===== JSON expressions =====
1100 Expression::JsonObject(mut f) => {
1101 f.pairs = f
1102 .pairs
1103 .into_iter()
1104 .map(|(k, v)| {
1105 let new_k = transform_recursive(k, transform_fn)?;
1106 let new_v = transform_recursive(v, transform_fn)?;
1107 Ok((new_k, new_v))
1108 })
1109 .collect::<Result<Vec<_>>>()?;
1110 Expression::JsonObject(f)
1111 }
1112
1113 // ===== Subquery expressions =====
1114 Expression::Subquery(mut s) => {
1115 s.this = transform_recursive(s.this, transform_fn)?;
1116 Expression::Subquery(s)
1117 }
1118 Expression::Exists(mut e) => {
1119 e.this = transform_recursive(e.this, transform_fn)?;
1120 Expression::Exists(e)
1121 }
1122
1123 // ===== Set operations =====
1124 Expression::Union(mut u) => {
1125 u.left = transform_recursive(u.left, transform_fn)?;
1126 u.right = transform_recursive(u.right, transform_fn)?;
1127 Expression::Union(u)
1128 }
1129 Expression::Intersect(mut i) => {
1130 i.left = transform_recursive(i.left, transform_fn)?;
1131 i.right = transform_recursive(i.right, transform_fn)?;
1132 Expression::Intersect(i)
1133 }
1134 Expression::Except(mut e) => {
1135 e.left = transform_recursive(e.left, transform_fn)?;
1136 e.right = transform_recursive(e.right, transform_fn)?;
1137 Expression::Except(e)
1138 }
1139
1140 // ===== DML expressions =====
1141 Expression::Insert(mut ins) => {
1142 // Transform VALUES clause expressions
1143 let mut new_values = Vec::new();
1144 for row in ins.values {
1145 let mut new_row = Vec::new();
1146 for e in row {
1147 new_row.push(transform_recursive(e, transform_fn)?);
1148 }
1149 new_values.push(new_row);
1150 }
1151 ins.values = new_values;
1152
1153 // Transform query (for INSERT ... SELECT)
1154 if let Some(query) = ins.query {
1155 ins.query = Some(transform_recursive(query, transform_fn)?);
1156 }
1157
1158 // Transform RETURNING clause
1159 let mut new_returning = Vec::new();
1160 for e in ins.returning {
1161 new_returning.push(transform_recursive(e, transform_fn)?);
1162 }
1163 ins.returning = new_returning;
1164
1165 // Transform ON CONFLICT clause
1166 if let Some(on_conflict) = ins.on_conflict {
1167 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1168 }
1169
1170 Expression::Insert(ins)
1171 }
1172 Expression::Update(mut upd) => {
1173 upd.set = upd
1174 .set
1175 .into_iter()
1176 .map(|(id, val)| {
1177 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1178 (id, new_val)
1179 })
1180 .collect();
1181 if let Some(mut where_clause) = upd.where_clause.take() {
1182 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1183 upd.where_clause = Some(where_clause);
1184 }
1185 Expression::Update(upd)
1186 }
1187 Expression::Delete(mut del) => {
1188 if let Some(mut where_clause) = del.where_clause.take() {
1189 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1190 del.where_clause = Some(where_clause);
1191 }
1192 Expression::Delete(del)
1193 }
1194
1195 // ===== CTE expressions =====
1196 Expression::With(mut w) => {
1197 w.ctes = w
1198 .ctes
1199 .into_iter()
1200 .map(|mut cte| {
1201 let original = cte.this.clone();
1202 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1203 cte
1204 })
1205 .collect();
1206 Expression::With(w)
1207 }
1208 Expression::Cte(mut c) => {
1209 c.this = transform_recursive(c.this, transform_fn)?;
1210 Expression::Cte(c)
1211 }
1212
1213 // ===== Order expressions =====
1214 Expression::Ordered(mut o) => {
1215 o.this = transform_recursive(o.this, transform_fn)?;
1216 Expression::Ordered(o)
1217 }
1218
1219 // ===== Negation =====
1220 Expression::Neg(mut n) => {
1221 n.this = transform_recursive(n.this, transform_fn)?;
1222 Expression::Neg(n)
1223 }
1224
1225 // ===== Between =====
1226 Expression::Between(mut b) => {
1227 b.this = transform_recursive(b.this, transform_fn)?;
1228 b.low = transform_recursive(b.low, transform_fn)?;
1229 b.high = transform_recursive(b.high, transform_fn)?;
1230 Expression::Between(b)
1231 }
1232 Expression::IsNull(mut i) => {
1233 i.this = transform_recursive(i.this, transform_fn)?;
1234 Expression::IsNull(i)
1235 }
1236 Expression::IsTrue(mut i) => {
1237 i.this = transform_recursive(i.this, transform_fn)?;
1238 Expression::IsTrue(i)
1239 }
1240 Expression::IsFalse(mut i) => {
1241 i.this = transform_recursive(i.this, transform_fn)?;
1242 Expression::IsFalse(i)
1243 }
1244
1245 // ===== Like expressions =====
1246 Expression::Like(mut l) => {
1247 l.left = transform_recursive(l.left, transform_fn)?;
1248 l.right = transform_recursive(l.right, transform_fn)?;
1249 Expression::Like(l)
1250 }
1251 Expression::ILike(mut l) => {
1252 l.left = transform_recursive(l.left, transform_fn)?;
1253 l.right = transform_recursive(l.right, transform_fn)?;
1254 Expression::ILike(l)
1255 }
1256
1257 // ===== Additional binary ops not covered by macro =====
1258 Expression::Neq(op) => transform_binary!(Neq, *op),
1259 Expression::Lte(op) => transform_binary!(Lte, *op),
1260 Expression::Gte(op) => transform_binary!(Gte, *op),
1261 Expression::Mod(op) => transform_binary!(Mod, *op),
1262 Expression::Concat(op) => transform_binary!(Concat, *op),
1263 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1264 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1265 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1266 Expression::Is(op) => transform_binary!(Is, *op),
1267
1268 // ===== TryCast / SafeCast =====
1269 Expression::TryCast(mut c) => {
1270 c.this = transform_recursive(c.this, transform_fn)?;
1271 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1272 Expression::TryCast(c)
1273 }
1274 Expression::SafeCast(mut c) => {
1275 c.this = transform_recursive(c.this, transform_fn)?;
1276 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1277 Expression::SafeCast(c)
1278 }
1279
1280 // ===== Misc =====
1281 Expression::Unnest(mut f) => {
1282 f.this = transform_recursive(f.this, transform_fn)?;
1283 f.expressions = f
1284 .expressions
1285 .into_iter()
1286 .map(|e| transform_recursive(e, transform_fn))
1287 .collect::<Result<Vec<_>>>()?;
1288 Expression::Unnest(f)
1289 }
1290 Expression::Explode(mut f) => {
1291 f.this = transform_recursive(f.this, transform_fn)?;
1292 Expression::Explode(f)
1293 }
1294 Expression::GroupConcat(mut f) => {
1295 f.this = transform_recursive(f.this, transform_fn)?;
1296 Expression::GroupConcat(f)
1297 }
1298 Expression::StringAgg(mut f) => {
1299 f.this = transform_recursive(f.this, transform_fn)?;
1300 Expression::StringAgg(f)
1301 }
1302 Expression::ListAgg(mut f) => {
1303 f.this = transform_recursive(f.this, transform_fn)?;
1304 Expression::ListAgg(f)
1305 }
1306 Expression::ArrayAgg(mut f) => {
1307 f.this = transform_recursive(f.this, transform_fn)?;
1308 Expression::ArrayAgg(f)
1309 }
1310 Expression::ParseJson(mut f) => {
1311 f.this = transform_recursive(f.this, transform_fn)?;
1312 Expression::ParseJson(f)
1313 }
1314 Expression::ToJson(mut f) => {
1315 f.this = transform_recursive(f.this, transform_fn)?;
1316 Expression::ToJson(f)
1317 }
1318 Expression::JSONExtract(mut e) => {
1319 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1320 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1321 Expression::JSONExtract(e)
1322 }
1323 Expression::JSONExtractScalar(mut e) => {
1324 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1325 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1326 Expression::JSONExtractScalar(e)
1327 }
1328
1329 // StrToTime: recurse into this
1330 Expression::StrToTime(mut e) => {
1331 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1332 Expression::StrToTime(e)
1333 }
1334
1335 // UnixToTime: recurse into this
1336 Expression::UnixToTime(mut e) => {
1337 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1338 Expression::UnixToTime(e)
1339 }
1340
1341 // CreateTable: recurse into column defaults, on_update expressions, and data types
1342 Expression::CreateTable(mut ct) => {
1343 for col in &mut ct.columns {
1344 if let Some(default_expr) = col.default.take() {
1345 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1346 }
1347 if let Some(on_update_expr) = col.on_update.take() {
1348 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1349 }
1350 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1351 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1352 // contexts and may not produce correct results for DDL column definitions.
1353 // The DDL type mappings would need dedicated handling per source/target pair.
1354 }
1355 if let Some(as_select) = ct.as_select.take() {
1356 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1357 }
1358 Expression::CreateTable(ct)
1359 }
1360
1361 // CreateProcedure: recurse into body expressions
1362 Expression::CreateProcedure(mut cp) => {
1363 if let Some(body) = cp.body.take() {
1364 cp.body = Some(match body {
1365 FunctionBody::Expression(expr) => {
1366 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1367 }
1368 FunctionBody::Return(expr) => {
1369 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1370 }
1371 FunctionBody::Statements(stmts) => {
1372 let transformed_stmts = stmts
1373 .into_iter()
1374 .map(|s| transform_recursive(s, transform_fn))
1375 .collect::<Result<Vec<_>>>()?;
1376 FunctionBody::Statements(transformed_stmts)
1377 }
1378 other => other,
1379 });
1380 }
1381 Expression::CreateProcedure(cp)
1382 }
1383
1384 // CreateFunction: recurse into body expressions
1385 Expression::CreateFunction(mut cf) => {
1386 if let Some(body) = cf.body.take() {
1387 cf.body = Some(match body {
1388 FunctionBody::Expression(expr) => {
1389 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1390 }
1391 FunctionBody::Return(expr) => {
1392 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1393 }
1394 FunctionBody::Statements(stmts) => {
1395 let transformed_stmts = stmts
1396 .into_iter()
1397 .map(|s| transform_recursive(s, transform_fn))
1398 .collect::<Result<Vec<_>>>()?;
1399 FunctionBody::Statements(transformed_stmts)
1400 }
1401 other => other,
1402 });
1403 }
1404 Expression::CreateFunction(cf)
1405 }
1406
1407 // MemberOf: recurse into left and right operands
1408 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1409 // ArrayContainsAll (@>): recurse into left and right operands
1410 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1411 // ArrayContainedBy (<@): recurse into left and right operands
1412 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1413 // ArrayOverlaps (&&): recurse into left and right operands
1414 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1415 // TsMatch (@@): recurse into left and right operands
1416 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1417 // Adjacent (-|-): recurse into left and right operands
1418 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1419
1420 // Table: recurse into when (HistoricalData) and changes fields
1421 Expression::Table(mut t) => {
1422 if let Some(when) = t.when.take() {
1423 let transformed =
1424 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1425 if let Expression::HistoricalData(hd) = transformed {
1426 t.when = Some(hd);
1427 }
1428 }
1429 if let Some(changes) = t.changes.take() {
1430 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1431 if let Expression::Changes(c) = transformed {
1432 t.changes = Some(c);
1433 }
1434 }
1435 Expression::Table(t)
1436 }
1437
1438 // HistoricalData (Snowflake time travel): recurse into expression
1439 Expression::HistoricalData(mut hd) => {
1440 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1441 Expression::HistoricalData(hd)
1442 }
1443
1444 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1445 Expression::Changes(mut c) => {
1446 if let Some(at_before) = c.at_before.take() {
1447 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1448 }
1449 if let Some(end) = c.end.take() {
1450 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1451 }
1452 Expression::Changes(c)
1453 }
1454
1455 // TableArgument: TABLE(expr) or MODEL(expr)
1456 Expression::TableArgument(mut ta) => {
1457 ta.this = transform_recursive(ta.this, transform_fn)?;
1458 Expression::TableArgument(ta)
1459 }
1460
1461 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1462 Expression::JoinedTable(mut jt) => {
1463 jt.left = transform_recursive(jt.left, transform_fn)?;
1464 for join in &mut jt.joins {
1465 join.this = transform_recursive(
1466 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1467 transform_fn,
1468 )?;
1469 if let Some(on) = join.on.take() {
1470 join.on = Some(transform_recursive(on, transform_fn)?);
1471 }
1472 }
1473 jt.lateral_views = jt
1474 .lateral_views
1475 .into_iter()
1476 .map(|mut lv| {
1477 lv.this = transform_recursive(lv.this, transform_fn)?;
1478 Ok(lv)
1479 })
1480 .collect::<Result<Vec<_>>>()?;
1481 Expression::JoinedTable(jt)
1482 }
1483
1484 // Lateral: LATERAL func() - recurse into the function expression
1485 Expression::Lateral(mut lat) => {
1486 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1487 Expression::Lateral(lat)
1488 }
1489
1490 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1491 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1492 // as a unit together with the WithinGroup wrapper
1493 Expression::WithinGroup(mut wg) => {
1494 wg.order_by = wg
1495 .order_by
1496 .into_iter()
1497 .map(|mut o| {
1498 let original = o.this.clone();
1499 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1500 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1501 Ok(Expression::Ordered(transformed)) => *transformed,
1502 Ok(_) | Err(_) => o,
1503 }
1504 })
1505 .collect();
1506 Expression::WithinGroup(wg)
1507 }
1508
1509 // Filter: recurse into both the aggregate and the filter condition
1510 Expression::Filter(mut f) => {
1511 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1512 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1513 Expression::Filter(f)
1514 }
1515
1516 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1517 Expression::BitwiseOrAgg(mut f) => {
1518 f.this = transform_recursive(f.this, transform_fn)?;
1519 Expression::BitwiseOrAgg(f)
1520 }
1521 Expression::BitwiseAndAgg(mut f) => {
1522 f.this = transform_recursive(f.this, transform_fn)?;
1523 Expression::BitwiseAndAgg(f)
1524 }
1525 Expression::BitwiseXorAgg(mut f) => {
1526 f.this = transform_recursive(f.this, transform_fn)?;
1527 Expression::BitwiseXorAgg(f)
1528 }
1529 Expression::PipeOperator(mut pipe) => {
1530 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1531 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1532 Expression::PipeOperator(pipe)
1533 }
1534
1535 // Pass through leaf nodes unchanged
1536 other => other,
1537 };
1538
1539 // Then apply the transform function
1540 transform_fn(expr)
1541}
1542
1543/// Returns the tokenizer config, generator config, and expression transform closure
1544/// for a built-in dialect type. This is the shared implementation used by both
1545/// `Dialect::get()` and custom dialect construction.
1546fn configs_for_dialect_type(
1547 dt: DialectType,
1548) -> (
1549 TokenizerConfig,
1550 GeneratorConfig,
1551 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1552) {
1553 macro_rules! dialect_configs {
1554 ($dialect_struct:ident) => {{
1555 let d = $dialect_struct;
1556 (
1557 d.tokenizer_config(),
1558 d.generator_config(),
1559 Box::new(move |e| $dialect_struct.transform_expr(e)),
1560 )
1561 }};
1562 }
1563 match dt {
1564 #[cfg(feature = "dialect-postgresql")]
1565 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1566 #[cfg(feature = "dialect-mysql")]
1567 DialectType::MySQL => dialect_configs!(MySQLDialect),
1568 #[cfg(feature = "dialect-bigquery")]
1569 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1570 #[cfg(feature = "dialect-snowflake")]
1571 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1572 #[cfg(feature = "dialect-duckdb")]
1573 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1574 #[cfg(feature = "dialect-tsql")]
1575 DialectType::TSQL => dialect_configs!(TSQLDialect),
1576 #[cfg(feature = "dialect-oracle")]
1577 DialectType::Oracle => dialect_configs!(OracleDialect),
1578 #[cfg(feature = "dialect-hive")]
1579 DialectType::Hive => dialect_configs!(HiveDialect),
1580 #[cfg(feature = "dialect-spark")]
1581 DialectType::Spark => dialect_configs!(SparkDialect),
1582 #[cfg(feature = "dialect-sqlite")]
1583 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1584 #[cfg(feature = "dialect-presto")]
1585 DialectType::Presto => dialect_configs!(PrestoDialect),
1586 #[cfg(feature = "dialect-trino")]
1587 DialectType::Trino => dialect_configs!(TrinoDialect),
1588 #[cfg(feature = "dialect-redshift")]
1589 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1590 #[cfg(feature = "dialect-clickhouse")]
1591 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1592 #[cfg(feature = "dialect-databricks")]
1593 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1594 #[cfg(feature = "dialect-athena")]
1595 DialectType::Athena => dialect_configs!(AthenaDialect),
1596 #[cfg(feature = "dialect-teradata")]
1597 DialectType::Teradata => dialect_configs!(TeradataDialect),
1598 #[cfg(feature = "dialect-doris")]
1599 DialectType::Doris => dialect_configs!(DorisDialect),
1600 #[cfg(feature = "dialect-starrocks")]
1601 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1602 #[cfg(feature = "dialect-materialize")]
1603 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1604 #[cfg(feature = "dialect-risingwave")]
1605 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1606 #[cfg(feature = "dialect-singlestore")]
1607 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1608 #[cfg(feature = "dialect-cockroachdb")]
1609 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1610 #[cfg(feature = "dialect-tidb")]
1611 DialectType::TiDB => dialect_configs!(TiDBDialect),
1612 #[cfg(feature = "dialect-druid")]
1613 DialectType::Druid => dialect_configs!(DruidDialect),
1614 #[cfg(feature = "dialect-solr")]
1615 DialectType::Solr => dialect_configs!(SolrDialect),
1616 #[cfg(feature = "dialect-tableau")]
1617 DialectType::Tableau => dialect_configs!(TableauDialect),
1618 #[cfg(feature = "dialect-dune")]
1619 DialectType::Dune => dialect_configs!(DuneDialect),
1620 #[cfg(feature = "dialect-fabric")]
1621 DialectType::Fabric => dialect_configs!(FabricDialect),
1622 #[cfg(feature = "dialect-drill")]
1623 DialectType::Drill => dialect_configs!(DrillDialect),
1624 #[cfg(feature = "dialect-dremio")]
1625 DialectType::Dremio => dialect_configs!(DremioDialect),
1626 #[cfg(feature = "dialect-exasol")]
1627 DialectType::Exasol => dialect_configs!(ExasolDialect),
1628 #[cfg(feature = "dialect-datafusion")]
1629 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1630 _ => dialect_configs!(GenericDialect),
1631 }
1632}
1633
1634// ---------------------------------------------------------------------------
1635// Custom dialect registry
1636// ---------------------------------------------------------------------------
1637
1638static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1639 LazyLock::new(|| RwLock::new(HashMap::new()));
1640
1641struct CustomDialectConfig {
1642 name: String,
1643 base_dialect: DialectType,
1644 tokenizer_config: TokenizerConfig,
1645 generator_config: GeneratorConfig,
1646 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1647 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1648}
1649
1650/// Fluent builder for creating and registering custom SQL dialects.
1651///
1652/// A custom dialect is based on an existing built-in dialect and allows selective
1653/// overrides of tokenizer configuration, generator configuration, and expression
1654/// transforms.
1655///
1656/// # Example
1657///
1658/// ```rust,ignore
1659/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1660/// use polyglot_sql::generator::NormalizeFunctions;
1661///
1662/// CustomDialectBuilder::new("my_postgres")
1663/// .based_on(DialectType::PostgreSQL)
1664/// .generator_config_modifier(|gc| {
1665/// gc.normalize_functions = NormalizeFunctions::Lower;
1666/// })
1667/// .register()
1668/// .unwrap();
1669///
1670/// let d = Dialect::get_by_name("my_postgres").unwrap();
1671/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1672/// let sql = d.generate(&exprs[0]).unwrap();
1673/// assert_eq!(sql, "select count(*)");
1674///
1675/// polyglot_sql::unregister_custom_dialect("my_postgres");
1676/// ```
1677pub struct CustomDialectBuilder {
1678 name: String,
1679 base_dialect: DialectType,
1680 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1681 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1682 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1683 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1684}
1685
1686impl CustomDialectBuilder {
1687 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1688 pub fn new(name: impl Into<String>) -> Self {
1689 Self {
1690 name: name.into(),
1691 base_dialect: DialectType::Generic,
1692 tokenizer_modifier: None,
1693 generator_modifier: None,
1694 transform: None,
1695 preprocess: None,
1696 }
1697 }
1698
1699 /// Set the base built-in dialect to inherit configuration from.
1700 pub fn based_on(mut self, dialect: DialectType) -> Self {
1701 self.base_dialect = dialect;
1702 self
1703 }
1704
1705 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1706 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1707 where
1708 F: FnOnce(&mut TokenizerConfig) + 'static,
1709 {
1710 self.tokenizer_modifier = Some(Box::new(f));
1711 self
1712 }
1713
1714 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1715 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1716 where
1717 F: FnOnce(&mut GeneratorConfig) + 'static,
1718 {
1719 self.generator_modifier = Some(Box::new(f));
1720 self
1721 }
1722
1723 /// Set a custom per-node expression transform function.
1724 ///
1725 /// This replaces the base dialect's transform. It is called on every expression
1726 /// node during the recursive transform pass.
1727 pub fn transform_fn<F>(mut self, f: F) -> Self
1728 where
1729 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1730 {
1731 self.transform = Some(Arc::new(f));
1732 self
1733 }
1734
1735 /// Set a custom whole-tree preprocessing function.
1736 ///
1737 /// This replaces the base dialect's built-in preprocessing. It is called once
1738 /// on the entire expression tree before the recursive per-node transform.
1739 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1740 where
1741 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1742 {
1743 self.preprocess = Some(Arc::new(f));
1744 self
1745 }
1746
1747 /// Build the custom dialect configuration and register it in the global registry.
1748 ///
1749 /// Returns an error if:
1750 /// - The name collides with a built-in dialect name
1751 /// - A custom dialect with the same name is already registered
1752 pub fn register(self) -> Result<()> {
1753 // Reject names that collide with built-in dialects
1754 if DialectType::from_str(&self.name).is_ok() {
1755 return Err(crate::error::Error::parse(
1756 format!(
1757 "Cannot register custom dialect '{}': name collides with built-in dialect",
1758 self.name
1759 ),
1760 0,
1761 0,
1762 0,
1763 0,
1764 ));
1765 }
1766
1767 // Get base configs
1768 let (mut tok_config, mut gen_config, _base_transform) =
1769 configs_for_dialect_type(self.base_dialect);
1770
1771 // Apply modifiers
1772 if let Some(tok_mod) = self.tokenizer_modifier {
1773 tok_mod(&mut tok_config);
1774 }
1775 if let Some(gen_mod) = self.generator_modifier {
1776 gen_mod(&mut gen_config);
1777 }
1778
1779 let config = CustomDialectConfig {
1780 name: self.name.clone(),
1781 base_dialect: self.base_dialect,
1782 tokenizer_config: tok_config,
1783 generator_config: gen_config,
1784 transform: self.transform,
1785 preprocess: self.preprocess,
1786 };
1787
1788 register_custom_dialect(config)
1789 }
1790}
1791
1792use std::str::FromStr;
1793
1794fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1795 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1796 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1797 })?;
1798
1799 if registry.contains_key(&config.name) {
1800 return Err(crate::error::Error::parse(
1801 format!("Custom dialect '{}' is already registered", config.name),
1802 0,
1803 0,
1804 0,
1805 0,
1806 ));
1807 }
1808
1809 registry.insert(config.name.clone(), Arc::new(config));
1810 Ok(())
1811}
1812
1813/// Remove a custom dialect from the global registry.
1814///
1815/// Returns `true` if a dialect with that name was found and removed,
1816/// `false` if no such custom dialect existed.
1817pub fn unregister_custom_dialect(name: &str) -> bool {
1818 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1819 registry.remove(name).is_some()
1820 } else {
1821 false
1822 }
1823}
1824
1825fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1826 CUSTOM_DIALECT_REGISTRY
1827 .read()
1828 .ok()
1829 .and_then(|registry| registry.get(name).cloned())
1830}
1831
1832/// Main entry point for dialect-specific SQL operations.
1833///
1834/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1835/// transformer for a specific SQL database engine. It is the high-level API through
1836/// which callers parse, generate, transform, and transpile SQL.
1837///
1838/// # Usage
1839///
1840/// ```rust,ignore
1841/// use polyglot_sql::dialects::{Dialect, DialectType};
1842///
1843/// // Parse PostgreSQL SQL into an AST
1844/// let pg = Dialect::get(DialectType::PostgreSQL);
1845/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1846///
1847/// // Transpile from PostgreSQL to BigQuery
1848/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1849/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1850/// ```
1851///
1852/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1853/// The struct is `Send + Sync` safe so it can be shared across threads.
1854pub struct Dialect {
1855 dialect_type: DialectType,
1856 tokenizer: Tokenizer,
1857 generator_config: GeneratorConfig,
1858 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1859 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1860 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1861 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1862 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1863}
1864
1865impl Dialect {
1866 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1867 ///
1868 /// This is the primary constructor. It initializes the tokenizer, generator config,
1869 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1870 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1871 /// config routing.
1872 pub fn get(dialect_type: DialectType) -> Self {
1873 let (tokenizer_config, generator_config, transformer) =
1874 configs_for_dialect_type(dialect_type);
1875
1876 // Set up expression-specific generator config for hybrid dialects
1877 let generator_config_for_expr: Option<
1878 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1879 > = match dialect_type {
1880 #[cfg(feature = "dialect-athena")]
1881 DialectType::Athena => Some(Box::new(|expr| {
1882 AthenaDialect.generator_config_for_expr(expr)
1883 })),
1884 _ => None,
1885 };
1886
1887 Self {
1888 dialect_type,
1889 tokenizer: Tokenizer::new(tokenizer_config),
1890 generator_config,
1891 transformer,
1892 generator_config_for_expr,
1893 custom_preprocess: None,
1894 }
1895 }
1896
1897 /// Look up a dialect by string name.
1898 ///
1899 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1900 /// falls back to the custom dialect registry. Returns `None` if no dialect
1901 /// with the given name exists.
1902 pub fn get_by_name(name: &str) -> Option<Self> {
1903 // Try built-in first
1904 if let Ok(dt) = DialectType::from_str(name) {
1905 return Some(Self::get(dt));
1906 }
1907
1908 // Try custom registry
1909 let config = get_custom_dialect_config(name)?;
1910 Some(Self::from_custom_config(&config))
1911 }
1912
1913 /// Construct a `Dialect` from a custom dialect configuration.
1914 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1915 // Build the transformer: use custom if provided, else use base dialect's
1916 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1917 if let Some(ref custom_transform) = config.transform {
1918 let t = Arc::clone(custom_transform);
1919 Box::new(move |e| t(e))
1920 } else {
1921 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1922 base_transform
1923 };
1924
1925 // Build the custom preprocess: use custom if provided
1926 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1927 config.preprocess.as_ref().map(|p| {
1928 let p = Arc::clone(p);
1929 Box::new(move |e: Expression| p(e))
1930 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1931 });
1932
1933 Self {
1934 dialect_type: config.base_dialect,
1935 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1936 generator_config: config.generator_config.clone(),
1937 transformer,
1938 generator_config_for_expr: None,
1939 custom_preprocess,
1940 }
1941 }
1942
1943 /// Get the dialect type
1944 pub fn dialect_type(&self) -> DialectType {
1945 self.dialect_type
1946 }
1947
1948 /// Get the generator configuration
1949 pub fn generator_config(&self) -> &GeneratorConfig {
1950 &self.generator_config
1951 }
1952
1953 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1954 ///
1955 /// The input may contain multiple semicolon-separated statements; each one
1956 /// produces a separate element in the returned vector. Tokenization uses
1957 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1958 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1959 let tokens = self.tokenizer.tokenize(sql)?;
1960 let config = crate::parser::ParserConfig {
1961 dialect: Some(self.dialect_type),
1962 ..Default::default()
1963 };
1964 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1965 parser.parse()
1966 }
1967
1968 /// Tokenize SQL using this dialect's tokenizer configuration.
1969 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1970 self.tokenizer.tokenize(sql)
1971 }
1972
1973 /// Get the generator config for a specific expression (supports hybrid dialects)
1974 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1975 if let Some(ref config_fn) = self.generator_config_for_expr {
1976 config_fn(expr)
1977 } else {
1978 self.generator_config.clone()
1979 }
1980 }
1981
1982 /// Generates a SQL string from an [`Expression`] AST node.
1983 ///
1984 /// The output uses this dialect's generator configuration for identifier quoting,
1985 /// keyword casing, function name normalization, and syntax style. The result is
1986 /// a single-line (non-pretty) SQL string.
1987 pub fn generate(&self, expr: &Expression) -> Result<String> {
1988 let config = self.get_config_for_expr(expr);
1989 let mut generator = Generator::with_config(config);
1990 generator.generate(expr)
1991 }
1992
1993 /// Generate SQL from an expression with pretty printing enabled
1994 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
1995 let mut config = self.get_config_for_expr(expr);
1996 config.pretty = true;
1997 let mut generator = Generator::with_config(config);
1998 generator.generate(expr)
1999 }
2000
2001 /// Generate SQL from an expression with source dialect info (for transpilation)
2002 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2003 let mut config = self.get_config_for_expr(expr);
2004 config.source_dialect = Some(source);
2005 let mut generator = Generator::with_config(config);
2006 generator.generate(expr)
2007 }
2008
2009 /// Generate SQL from an expression with pretty printing and source dialect info
2010 pub fn generate_pretty_with_source(
2011 &self,
2012 expr: &Expression,
2013 source: DialectType,
2014 ) -> Result<String> {
2015 let mut config = self.get_config_for_expr(expr);
2016 config.pretty = true;
2017 config.source_dialect = Some(source);
2018 let mut generator = Generator::with_config(config);
2019 generator.generate(expr)
2020 }
2021
2022 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2023 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2024 let mut config = self.get_config_for_expr(expr);
2025 config.always_quote_identifiers = true;
2026 let mut generator = Generator::with_config(config);
2027 generator.generate(expr)
2028 }
2029
2030 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2031 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2032 let mut config = self.generator_config.clone();
2033 config.pretty = true;
2034 config.always_quote_identifiers = true;
2035 let mut generator = Generator::with_config(config);
2036 generator.generate(expr)
2037 }
2038
2039 /// Generate SQL from an expression with caller-specified config overrides
2040 pub fn generate_with_overrides(
2041 &self,
2042 expr: &Expression,
2043 overrides: impl FnOnce(&mut GeneratorConfig),
2044 ) -> Result<String> {
2045 let mut config = self.get_config_for_expr(expr);
2046 overrides(&mut config);
2047 let mut generator = Generator::with_config(config);
2048 generator.generate(expr)
2049 }
2050
2051 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2052 ///
2053 /// The transformation proceeds in two phases:
2054 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2055 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2056 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2057 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2058 ///
2059 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2060 /// and for identity transforms (normalizing SQL within the same dialect).
2061 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2062 // Apply preprocessing transforms based on dialect
2063 let preprocessed = self.preprocess(expr)?;
2064 // Then apply recursive transformation
2065 transform_recursive(preprocessed, &self.transformer)
2066 }
2067
2068 /// Apply dialect-specific preprocessing transforms
2069 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2070 // If a custom preprocess function is set, use it instead of the built-in logic
2071 if let Some(ref custom_preprocess) = self.custom_preprocess {
2072 return custom_preprocess(expr);
2073 }
2074
2075 #[cfg(any(
2076 feature = "dialect-mysql",
2077 feature = "dialect-postgresql",
2078 feature = "dialect-bigquery",
2079 feature = "dialect-snowflake",
2080 feature = "dialect-tsql",
2081 feature = "dialect-spark",
2082 feature = "dialect-databricks",
2083 feature = "dialect-hive",
2084 feature = "dialect-sqlite",
2085 feature = "dialect-trino",
2086 feature = "dialect-presto",
2087 feature = "dialect-duckdb",
2088 feature = "dialect-redshift",
2089 feature = "dialect-starrocks",
2090 feature = "dialect-oracle",
2091 feature = "dialect-clickhouse",
2092 ))]
2093 use crate::transforms;
2094
2095 match self.dialect_type {
2096 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2097 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2098 #[cfg(feature = "dialect-mysql")]
2099 DialectType::MySQL => {
2100 let expr = transforms::eliminate_qualify(expr)?;
2101 let expr = transforms::eliminate_full_outer_join(expr)?;
2102 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2103 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2104 Ok(expr)
2105 }
2106 // PostgreSQL doesn't support QUALIFY
2107 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2108 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2109 #[cfg(feature = "dialect-postgresql")]
2110 DialectType::PostgreSQL => {
2111 let expr = transforms::eliminate_qualify(expr)?;
2112 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2113 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2114 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2115 // Only normalize when sqlglot would fully parse (no body) —
2116 // sqlglot falls back to Command for complex function bodies,
2117 // preserving the original text including TO.
2118 let expr = if let Expression::CreateFunction(mut cf) = expr {
2119 if cf.body.is_none() {
2120 for opt in &mut cf.set_options {
2121 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2122 &mut opt.value
2123 {
2124 *use_to = false;
2125 }
2126 }
2127 }
2128 Expression::CreateFunction(cf)
2129 } else {
2130 expr
2131 };
2132 Ok(expr)
2133 }
2134 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2135 #[cfg(feature = "dialect-bigquery")]
2136 DialectType::BigQuery => {
2137 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2138 let expr = transforms::pushdown_cte_column_names(expr)?;
2139 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2140 Ok(expr)
2141 }
2142 // Snowflake
2143 #[cfg(feature = "dialect-snowflake")]
2144 DialectType::Snowflake => {
2145 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2146 let expr = transforms::eliminate_window_clause(expr)?;
2147 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2148 Ok(expr)
2149 }
2150 // TSQL doesn't support QUALIFY
2151 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2152 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2153 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2154 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2155 #[cfg(feature = "dialect-tsql")]
2156 DialectType::TSQL => {
2157 let expr = transforms::eliminate_qualify(expr)?;
2158 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2159 let expr = transforms::ensure_bools(expr)?;
2160 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2161 let expr = transforms::move_ctes_to_top_level(expr)?;
2162 let expr = transforms::qualify_derived_table_outputs(expr)?;
2163 Ok(expr)
2164 }
2165 // Spark doesn't support QUALIFY (but Databricks does)
2166 // Spark doesn't support CTEs in subqueries (hoist to top level)
2167 #[cfg(feature = "dialect-spark")]
2168 DialectType::Spark => {
2169 let expr = transforms::eliminate_qualify(expr)?;
2170 let expr = transforms::add_auto_table_alias(expr)?;
2171 let expr = transforms::simplify_nested_paren_values(expr)?;
2172 let expr = transforms::move_ctes_to_top_level(expr)?;
2173 Ok(expr)
2174 }
2175 // Databricks supports QUALIFY natively
2176 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2177 #[cfg(feature = "dialect-databricks")]
2178 DialectType::Databricks => {
2179 let expr = transforms::add_auto_table_alias(expr)?;
2180 let expr = transforms::simplify_nested_paren_values(expr)?;
2181 let expr = transforms::move_ctes_to_top_level(expr)?;
2182 Ok(expr)
2183 }
2184 // Hive doesn't support QUALIFY or CTEs in subqueries
2185 #[cfg(feature = "dialect-hive")]
2186 DialectType::Hive => {
2187 let expr = transforms::eliminate_qualify(expr)?;
2188 let expr = transforms::move_ctes_to_top_level(expr)?;
2189 Ok(expr)
2190 }
2191 // SQLite doesn't support QUALIFY
2192 #[cfg(feature = "dialect-sqlite")]
2193 DialectType::SQLite => {
2194 let expr = transforms::eliminate_qualify(expr)?;
2195 Ok(expr)
2196 }
2197 // Trino doesn't support QUALIFY
2198 #[cfg(feature = "dialect-trino")]
2199 DialectType::Trino => {
2200 let expr = transforms::eliminate_qualify(expr)?;
2201 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2202 Ok(expr)
2203 }
2204 // Presto doesn't support QUALIFY or WINDOW clause
2205 #[cfg(feature = "dialect-presto")]
2206 DialectType::Presto => {
2207 let expr = transforms::eliminate_qualify(expr)?;
2208 let expr = transforms::eliminate_window_clause(expr)?;
2209 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2210 Ok(expr)
2211 }
2212 // DuckDB supports QUALIFY - no elimination needed
2213 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2214 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2215 #[cfg(feature = "dialect-duckdb")]
2216 DialectType::DuckDB => {
2217 let expr = transforms::expand_posexplode_duckdb(expr)?;
2218 let expr = transforms::expand_like_any(expr)?;
2219 Ok(expr)
2220 }
2221 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2222 #[cfg(feature = "dialect-redshift")]
2223 DialectType::Redshift => {
2224 let expr = transforms::eliminate_qualify(expr)?;
2225 let expr = transforms::eliminate_window_clause(expr)?;
2226 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2227 Ok(expr)
2228 }
2229 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2230 #[cfg(feature = "dialect-starrocks")]
2231 DialectType::StarRocks => {
2232 let expr = transforms::eliminate_qualify(expr)?;
2233 let expr = transforms::expand_between_in_delete(expr)?;
2234 Ok(expr)
2235 }
2236 // DataFusion supports QUALIFY and semi/anti joins natively
2237 #[cfg(feature = "dialect-datafusion")]
2238 DialectType::DataFusion => Ok(expr),
2239 // Oracle doesn't support QUALIFY
2240 #[cfg(feature = "dialect-oracle")]
2241 DialectType::Oracle => {
2242 let expr = transforms::eliminate_qualify(expr)?;
2243 Ok(expr)
2244 }
2245 // Drill - no special preprocessing needed
2246 #[cfg(feature = "dialect-drill")]
2247 DialectType::Drill => Ok(expr),
2248 // Teradata - no special preprocessing needed
2249 #[cfg(feature = "dialect-teradata")]
2250 DialectType::Teradata => Ok(expr),
2251 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2252 #[cfg(feature = "dialect-clickhouse")]
2253 DialectType::ClickHouse => {
2254 let expr = transforms::no_limit_order_by_union(expr)?;
2255 Ok(expr)
2256 }
2257 // Other dialects - no preprocessing
2258 _ => Ok(expr),
2259 }
2260 }
2261
2262 /// Transpile SQL from this dialect to another
2263 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2264 self.transpile_to_inner(sql, target, false)
2265 }
2266
2267 /// Transpile SQL from this dialect to another with pretty printing enabled
2268 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2269 self.transpile_to_inner(sql, target, true)
2270 }
2271
2272 #[cfg(not(feature = "transpile"))]
2273 fn transpile_to_inner(
2274 &self,
2275 sql: &str,
2276 target: DialectType,
2277 pretty: bool,
2278 ) -> Result<Vec<String>> {
2279 // Without the transpile feature, only same-dialect or to/from generic is supported
2280 if self.dialect_type != target
2281 && self.dialect_type != DialectType::Generic
2282 && target != DialectType::Generic
2283 {
2284 return Err(crate::error::Error::parse(
2285 "Cross-dialect transpilation not available in this build",
2286 0,
2287 0,
2288 0,
2289 0,
2290 ));
2291 }
2292
2293 let expressions = self.parse(sql)?;
2294 let target_dialect = Dialect::get(target);
2295 let generic_identity =
2296 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2297
2298 if generic_identity {
2299 return expressions
2300 .into_iter()
2301 .map(|expr| {
2302 if pretty {
2303 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2304 } else {
2305 target_dialect.generate_with_source(&expr, self.dialect_type)
2306 }
2307 })
2308 .collect();
2309 }
2310
2311 expressions
2312 .into_iter()
2313 .map(|expr| {
2314 let transformed = target_dialect.transform(expr)?;
2315 if pretty {
2316 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2317 } else {
2318 target_dialect.generate_with_source(&transformed, self.dialect_type)
2319 }
2320 })
2321 .collect()
2322 }
2323
2324 #[cfg(feature = "transpile")]
2325 fn transpile_to_inner(
2326 &self,
2327 sql: &str,
2328 target: DialectType,
2329 pretty: bool,
2330 ) -> Result<Vec<String>> {
2331 let expressions = self.parse(sql)?;
2332 let target_dialect = Dialect::get(target);
2333 let generic_identity =
2334 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2335
2336 if generic_identity {
2337 return expressions
2338 .into_iter()
2339 .map(|expr| {
2340 if pretty {
2341 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2342 } else {
2343 target_dialect.generate_with_source(&expr, self.dialect_type)
2344 }
2345 })
2346 .collect();
2347 }
2348
2349 expressions
2350 .into_iter()
2351 .map(|expr| {
2352 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2353 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2354 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2355 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2356 use crate::expressions::DataType as DT;
2357 transform_recursive(expr, &|e| match e {
2358 Expression::DataType(DT::VarChar { .. }) => {
2359 Ok(Expression::DataType(DT::Text))
2360 }
2361 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2362 _ => Ok(e),
2363 })?
2364 } else {
2365 expr
2366 };
2367
2368 // When source and target differ, first normalize the source dialect's
2369 // AST constructs to standard SQL, so that the target dialect can handle them.
2370 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2371 let normalized =
2372 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2373 self.transform(expr)?
2374 } else {
2375 expr
2376 };
2377
2378 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2379 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2380 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2381 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2382 let normalized =
2383 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2384 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2385 {
2386 transform_recursive(normalized, &|e| {
2387 if let Expression::Function(ref f) = e {
2388 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2389 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2390 if let (
2391 Expression::Function(ref jq),
2392 Expression::Function(ref jv),
2393 ) = (&f.args[0], &f.args[1])
2394 {
2395 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2396 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2397 {
2398 // Unwrap: return just JSON_QUERY(...)
2399 return Ok(f.args[0].clone());
2400 }
2401 }
2402 }
2403 }
2404 Ok(e)
2405 })?
2406 } else {
2407 normalized
2408 };
2409
2410 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2411 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2412 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2413 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2414 && !matches!(target, DialectType::Snowflake)
2415 {
2416 transform_recursive(normalized, &|e| {
2417 if let Expression::Function(ref f) = e {
2418 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2419 return Ok(Expression::Localtime(Box::new(
2420 crate::expressions::Localtime { this: None },
2421 )));
2422 }
2423 }
2424 Ok(e)
2425 })?
2426 } else {
2427 normalized
2428 };
2429
2430 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2431 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2432 // transform. DuckDB requires the count argument to be BIGINT.
2433 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2434 && matches!(target, DialectType::DuckDB)
2435 {
2436 transform_recursive(normalized, &|e| {
2437 if let Expression::Function(ref f) = e {
2438 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2439 // Check if first arg is space string literal
2440 if let Expression::Literal(crate::expressions::Literal::String(
2441 ref s,
2442 )) = f.args[0]
2443 {
2444 if s == " " {
2445 // Wrap second arg in CAST(... AS BIGINT) if not already
2446 if !matches!(f.args[1], Expression::Cast(_)) {
2447 let mut new_args = f.args.clone();
2448 new_args[1] = Expression::Cast(Box::new(
2449 crate::expressions::Cast {
2450 this: new_args[1].clone(),
2451 to: crate::expressions::DataType::BigInt {
2452 length: None,
2453 },
2454 trailing_comments: Vec::new(),
2455 double_colon_syntax: false,
2456 format: None,
2457 default: None,
2458 inferred_type: None,
2459 },
2460 ));
2461 return Ok(Expression::Function(Box::new(
2462 crate::expressions::Function {
2463 name: f.name.clone(),
2464 args: new_args,
2465 distinct: f.distinct,
2466 trailing_comments: f.trailing_comments.clone(),
2467 use_bracket_syntax: f.use_bracket_syntax,
2468 no_parens: f.no_parens,
2469 quoted: f.quoted,
2470 span: None,
2471 inferred_type: None,
2472 },
2473 )));
2474 }
2475 }
2476 }
2477 }
2478 }
2479 Ok(e)
2480 })?
2481 } else {
2482 normalized
2483 };
2484
2485 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2486 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2487 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2488 && !matches!(target, DialectType::BigQuery)
2489 {
2490 crate::transforms::propagate_struct_field_names(normalized)?
2491 } else {
2492 normalized
2493 };
2494
2495 // Apply cross-dialect semantic normalizations
2496 let normalized =
2497 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2498
2499 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2500 // (SELECT UNNEST(..., max_depth => 2)) subquery
2501 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2502 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2503 && matches!(target, DialectType::DuckDB)
2504 {
2505 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2506 } else {
2507 normalized
2508 };
2509
2510 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2511 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2512 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2513 && matches!(
2514 target,
2515 DialectType::DuckDB
2516 | DialectType::Presto
2517 | DialectType::Trino
2518 | DialectType::Athena
2519 | DialectType::Spark
2520 | DialectType::Databricks
2521 ) {
2522 crate::transforms::unnest_alias_to_column_alias(normalized)?
2523 } else if matches!(self.dialect_type, DialectType::BigQuery)
2524 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2525 {
2526 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2527 // but don't convert alias format (no _t0 wrapper)
2528 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2529 // For Redshift: strip UNNEST when arg is a column reference path
2530 if matches!(target, DialectType::Redshift) {
2531 crate::transforms::strip_unnest_column_refs(result)?
2532 } else {
2533 result
2534 }
2535 } else {
2536 normalized
2537 };
2538
2539 // For Presto/Trino targets from PostgreSQL/Redshift source:
2540 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2541 let normalized = if matches!(
2542 self.dialect_type,
2543 DialectType::PostgreSQL | DialectType::Redshift
2544 ) && matches!(
2545 target,
2546 DialectType::Presto | DialectType::Trino | DialectType::Athena
2547 ) {
2548 crate::transforms::wrap_unnest_join_aliases(normalized)?
2549 } else {
2550 normalized
2551 };
2552
2553 // Eliminate DISTINCT ON with target-dialect awareness
2554 // This must happen after source transform (which may produce DISTINCT ON)
2555 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2556 let normalized =
2557 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2558
2559 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2560 let normalized = if matches!(target, DialectType::Snowflake) {
2561 Self::transform_generate_date_array_snowflake(normalized)?
2562 } else {
2563 normalized
2564 };
2565
2566 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2567 let normalized = if matches!(
2568 target,
2569 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2570 ) {
2571 crate::transforms::unnest_to_explode_select(normalized)?
2572 } else {
2573 normalized
2574 };
2575
2576 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2577 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2578 crate::transforms::no_limit_order_by_union(normalized)?
2579 } else {
2580 normalized
2581 };
2582
2583 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2584 // Python sqlglot does this in the TSQL generator, but we can't do it there
2585 // because it would break TSQL -> TSQL identity
2586 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2587 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2588 {
2589 transform_recursive(normalized, &|e| {
2590 if let Expression::Count(ref c) = e {
2591 // Build COUNT_BIG(...) as an AggregateFunction
2592 let args = if c.star {
2593 vec![Expression::Star(crate::expressions::Star {
2594 table: None,
2595 except: None,
2596 replace: None,
2597 rename: None,
2598 trailing_comments: Vec::new(),
2599 span: None,
2600 })]
2601 } else if let Some(ref this) = c.this {
2602 vec![this.clone()]
2603 } else {
2604 vec![]
2605 };
2606 Ok(Expression::AggregateFunction(Box::new(
2607 crate::expressions::AggregateFunction {
2608 name: "COUNT_BIG".to_string(),
2609 args,
2610 distinct: c.distinct,
2611 filter: c.filter.clone(),
2612 order_by: Vec::new(),
2613 limit: None,
2614 ignore_nulls: None,
2615 inferred_type: None,
2616 },
2617 )))
2618 } else {
2619 Ok(e)
2620 }
2621 })?
2622 } else {
2623 normalized
2624 };
2625
2626 let transformed = target_dialect.transform(normalized)?;
2627 let mut sql = if pretty {
2628 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2629 } else {
2630 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2631 };
2632
2633 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2634 if pretty && target == DialectType::Snowflake {
2635 sql = Self::normalize_snowflake_pretty(sql);
2636 }
2637
2638 Ok(sql)
2639 })
2640 .collect()
2641 }
2642}
2643
2644// Transpile-only methods: cross-dialect normalization and helpers
2645#[cfg(feature = "transpile")]
2646impl Dialect {
2647 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2648 /// Converts:
2649 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2650 /// To:
2651 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2652 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2653 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2654 use crate::expressions::*;
2655 transform_recursive(expr, &|e| {
2656 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2657 if let Expression::ArraySize(ref af) = e {
2658 if let Expression::Function(ref f) = af.this {
2659 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2660 let result = Self::convert_array_size_gda_snowflake(f)?;
2661 return Ok(result);
2662 }
2663 }
2664 }
2665
2666 let Expression::Select(mut sel) = e else {
2667 return Ok(e);
2668 };
2669
2670 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2671 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2672 let mut gda_join_idx: Option<usize> = None;
2673
2674 for (idx, join) in sel.joins.iter().enumerate() {
2675 // The join.this may be:
2676 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2677 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2678 let (unnest_ref, alias_name) = match &join.this {
2679 Expression::Unnest(ref unnest) => {
2680 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2681 (Some(unnest.as_ref()), alias)
2682 }
2683 Expression::Alias(ref a) => {
2684 if let Expression::Unnest(ref unnest) = a.this {
2685 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2686 } else {
2687 (None, None)
2688 }
2689 }
2690 _ => (None, None),
2691 };
2692
2693 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2694 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2695 if let Expression::Function(ref f) = unnest.this {
2696 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2697 let start_expr = f.args[0].clone();
2698 let end_expr = f.args[1].clone();
2699 let step = f.args.get(2).cloned();
2700
2701 // Extract unit from step interval
2702 let unit = if let Some(Expression::Interval(ref iv)) = step {
2703 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2704 Some(format!("{:?}", unit).to_uppercase())
2705 } else if let Some(ref this) = iv.this {
2706 // The interval may be stored as a string like "1 MONTH"
2707 if let Expression::Literal(Literal::String(ref s)) = this {
2708 let parts: Vec<&str> = s.split_whitespace().collect();
2709 if parts.len() == 2 {
2710 Some(parts[1].to_uppercase())
2711 } else if parts.len() == 1 {
2712 // Single word like "MONTH" or just "1"
2713 let upper = parts[0].to_uppercase();
2714 if matches!(
2715 upper.as_str(),
2716 "YEAR"
2717 | "QUARTER"
2718 | "MONTH"
2719 | "WEEK"
2720 | "DAY"
2721 | "HOUR"
2722 | "MINUTE"
2723 | "SECOND"
2724 ) {
2725 Some(upper)
2726 } else {
2727 None
2728 }
2729 } else {
2730 None
2731 }
2732 } else {
2733 None
2734 }
2735 } else {
2736 None
2737 }
2738 } else {
2739 None
2740 };
2741
2742 if let Some(unit_str) = unit {
2743 gda_info = Some((alias, start_expr, end_expr, unit_str));
2744 gda_join_idx = Some(idx);
2745 }
2746 }
2747 }
2748 }
2749 if gda_info.is_some() {
2750 break;
2751 }
2752 }
2753
2754 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2755 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2756 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2757 let result = Self::try_transform_from_gda_snowflake(sel);
2758 return result;
2759 };
2760 let join_idx = gda_join_idx.unwrap();
2761
2762 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2763 let datediff = Expression::Function(Box::new(Function::new(
2764 "DATEDIFF".to_string(),
2765 vec![
2766 Expression::Column(Column {
2767 name: Identifier::new(&unit_str),
2768 table: None,
2769 join_mark: false,
2770 trailing_comments: vec![],
2771 span: None,
2772 inferred_type: None,
2773 }),
2774 start_expr.clone(),
2775 end_expr.clone(),
2776 ],
2777 )));
2778 // (DATEDIFF(...) + 1 - 1) + 1
2779 let plus_one = Expression::Add(Box::new(BinaryOp {
2780 left: datediff,
2781 right: Expression::Literal(Literal::Number("1".to_string())),
2782 left_comments: vec![],
2783 operator_comments: vec![],
2784 trailing_comments: vec![],
2785 inferred_type: None,
2786 }));
2787 let minus_one = Expression::Sub(Box::new(BinaryOp {
2788 left: plus_one,
2789 right: Expression::Literal(Literal::Number("1".to_string())),
2790 left_comments: vec![],
2791 operator_comments: vec![],
2792 trailing_comments: vec![],
2793 inferred_type: None,
2794 }));
2795 let paren_inner = Expression::Paren(Box::new(Paren {
2796 this: minus_one,
2797 trailing_comments: vec![],
2798 }));
2799 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2800 left: paren_inner,
2801 right: Expression::Literal(Literal::Number("1".to_string())),
2802 left_comments: vec![],
2803 operator_comments: vec![],
2804 trailing_comments: vec![],
2805 inferred_type: None,
2806 }));
2807
2808 let array_gen_range = Expression::Function(Box::new(Function::new(
2809 "ARRAY_GENERATE_RANGE".to_string(),
2810 vec![
2811 Expression::Literal(Literal::Number("0".to_string())),
2812 outer_plus_one,
2813 ],
2814 )));
2815
2816 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2817 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2818 name: Identifier::new("INPUT"),
2819 value: array_gen_range,
2820 separator: crate::expressions::NamedArgSeparator::DArrow,
2821 }));
2822 let flatten = Expression::Function(Box::new(Function::new(
2823 "FLATTEN".to_string(),
2824 vec![flatten_input],
2825 )));
2826
2827 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2828 let alias_table = Alias {
2829 this: flatten,
2830 alias: Identifier::new("_t0"),
2831 column_aliases: vec![
2832 Identifier::new("seq"),
2833 Identifier::new("key"),
2834 Identifier::new("path"),
2835 Identifier::new("index"),
2836 Identifier::new(&alias_name),
2837 Identifier::new("this"),
2838 ],
2839 pre_alias_comments: vec![],
2840 trailing_comments: vec![],
2841 inferred_type: None,
2842 };
2843 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2844 this: Box::new(Expression::Alias(Box::new(alias_table))),
2845 view: None,
2846 outer: None,
2847 alias: None,
2848 alias_quoted: false,
2849 cross_apply: None,
2850 ordinality: None,
2851 column_aliases: vec![],
2852 }));
2853
2854 // Remove the original join and add to FROM expressions
2855 sel.joins.remove(join_idx);
2856 if let Some(ref mut from) = sel.from {
2857 from.expressions.push(lateral_expr);
2858 }
2859
2860 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
2861 let dateadd_expr = Expression::Function(Box::new(Function::new(
2862 "DATEADD".to_string(),
2863 vec![
2864 Expression::Column(Column {
2865 name: Identifier::new(&unit_str),
2866 table: None,
2867 join_mark: false,
2868 trailing_comments: vec![],
2869 span: None,
2870 inferred_type: None,
2871 }),
2872 Expression::Cast(Box::new(Cast {
2873 this: Expression::Column(Column {
2874 name: Identifier::new(&alias_name),
2875 table: None,
2876 join_mark: false,
2877 trailing_comments: vec![],
2878 span: None,
2879 inferred_type: None,
2880 }),
2881 to: DataType::Int {
2882 length: None,
2883 integer_spelling: false,
2884 },
2885 trailing_comments: vec![],
2886 double_colon_syntax: false,
2887 format: None,
2888 default: None,
2889 inferred_type: None,
2890 })),
2891 Expression::Cast(Box::new(Cast {
2892 this: start_expr.clone(),
2893 to: DataType::Date,
2894 trailing_comments: vec![],
2895 double_colon_syntax: false,
2896 format: None,
2897 default: None,
2898 inferred_type: None,
2899 })),
2900 ],
2901 )));
2902
2903 // Replace references to the alias in the SELECT list
2904 let new_exprs: Vec<Expression> = sel
2905 .expressions
2906 .iter()
2907 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
2908 .collect();
2909 sel.expressions = new_exprs;
2910
2911 Ok(Expression::Select(sel))
2912 })
2913 }
2914
2915 /// Helper: replace column references to `alias_name` with dateadd expression
2916 fn replace_column_ref_with_dateadd(
2917 expr: &Expression,
2918 alias_name: &str,
2919 dateadd: &Expression,
2920 ) -> Expression {
2921 use crate::expressions::*;
2922 match expr {
2923 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2924 // Plain column reference -> DATEADD(...) AS alias_name
2925 Expression::Alias(Box::new(Alias {
2926 this: dateadd.clone(),
2927 alias: Identifier::new(alias_name),
2928 column_aliases: vec![],
2929 pre_alias_comments: vec![],
2930 trailing_comments: vec![],
2931 inferred_type: None,
2932 }))
2933 }
2934 Expression::Alias(a) => {
2935 // Check if the inner expression references the alias
2936 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
2937 Expression::Alias(Box::new(Alias {
2938 this: new_this,
2939 alias: a.alias.clone(),
2940 column_aliases: a.column_aliases.clone(),
2941 pre_alias_comments: a.pre_alias_comments.clone(),
2942 trailing_comments: a.trailing_comments.clone(),
2943 inferred_type: None,
2944 }))
2945 }
2946 _ => expr.clone(),
2947 }
2948 }
2949
2950 /// Helper: replace column references in inner expression (not top-level)
2951 fn replace_column_ref_inner(
2952 expr: &Expression,
2953 alias_name: &str,
2954 dateadd: &Expression,
2955 ) -> Expression {
2956 use crate::expressions::*;
2957 match expr {
2958 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
2959 dateadd.clone()
2960 }
2961 Expression::Add(op) => {
2962 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2963 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2964 Expression::Add(Box::new(BinaryOp {
2965 left,
2966 right,
2967 left_comments: op.left_comments.clone(),
2968 operator_comments: op.operator_comments.clone(),
2969 trailing_comments: op.trailing_comments.clone(),
2970 inferred_type: None,
2971 }))
2972 }
2973 Expression::Sub(op) => {
2974 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2975 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2976 Expression::Sub(Box::new(BinaryOp {
2977 left,
2978 right,
2979 left_comments: op.left_comments.clone(),
2980 operator_comments: op.operator_comments.clone(),
2981 trailing_comments: op.trailing_comments.clone(),
2982 inferred_type: None,
2983 }))
2984 }
2985 Expression::Mul(op) => {
2986 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
2987 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
2988 Expression::Mul(Box::new(BinaryOp {
2989 left,
2990 right,
2991 left_comments: op.left_comments.clone(),
2992 operator_comments: op.operator_comments.clone(),
2993 trailing_comments: op.trailing_comments.clone(),
2994 inferred_type: None,
2995 }))
2996 }
2997 _ => expr.clone(),
2998 }
2999 }
3000
3001 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3002 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3003 fn try_transform_from_gda_snowflake(
3004 mut sel: Box<crate::expressions::Select>,
3005 ) -> Result<Expression> {
3006 use crate::expressions::*;
3007
3008 // Extract GDA info from FROM clause
3009 let mut gda_info: Option<(
3010 usize,
3011 String,
3012 Expression,
3013 Expression,
3014 String,
3015 Option<(String, Vec<Identifier>)>,
3016 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3017
3018 if let Some(ref from) = sel.from {
3019 for (idx, table_expr) in from.expressions.iter().enumerate() {
3020 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3021 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3022 let (unnest_opt, outer_alias_info) = match table_expr {
3023 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3024 Expression::Alias(ref a) => {
3025 if let Expression::Unnest(ref unnest) = a.this {
3026 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3027 (Some(unnest.as_ref()), Some(alias_info))
3028 } else {
3029 (None, None)
3030 }
3031 }
3032 _ => (None, None),
3033 };
3034
3035 if let Some(unnest) = unnest_opt {
3036 // Check for GENERATE_DATE_ARRAY function
3037 let func_opt = match &unnest.this {
3038 Expression::Function(ref f)
3039 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3040 && f.args.len() >= 2 =>
3041 {
3042 Some(f)
3043 }
3044 // Also check for GenerateSeries (from earlier normalization)
3045 _ => None,
3046 };
3047
3048 if let Some(f) = func_opt {
3049 let start_expr = f.args[0].clone();
3050 let end_expr = f.args[1].clone();
3051 let step = f.args.get(2).cloned();
3052
3053 // Extract unit and column name
3054 let unit = Self::extract_interval_unit_str(&step);
3055 let col_name = outer_alias_info
3056 .as_ref()
3057 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3058 .unwrap_or_else(|| "value".to_string());
3059
3060 if let Some(unit_str) = unit {
3061 gda_info = Some((
3062 idx,
3063 col_name,
3064 start_expr,
3065 end_expr,
3066 unit_str,
3067 outer_alias_info,
3068 ));
3069 break;
3070 }
3071 }
3072 }
3073 }
3074 }
3075
3076 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3077 else {
3078 return Ok(Expression::Select(sel));
3079 };
3080
3081 // Build the Snowflake subquery:
3082 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3083 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3084
3085 // DATEDIFF(unit, start, end)
3086 let datediff = Expression::Function(Box::new(Function::new(
3087 "DATEDIFF".to_string(),
3088 vec![
3089 Expression::Column(Column {
3090 name: Identifier::new(&unit_str),
3091 table: None,
3092 join_mark: false,
3093 trailing_comments: vec![],
3094 span: None,
3095 inferred_type: None,
3096 }),
3097 start_expr.clone(),
3098 end_expr.clone(),
3099 ],
3100 )));
3101 // (DATEDIFF(...) + 1 - 1) + 1
3102 let plus_one = Expression::Add(Box::new(BinaryOp {
3103 left: datediff,
3104 right: Expression::Literal(Literal::Number("1".to_string())),
3105 left_comments: vec![],
3106 operator_comments: vec![],
3107 trailing_comments: vec![],
3108 inferred_type: None,
3109 }));
3110 let minus_one = Expression::Sub(Box::new(BinaryOp {
3111 left: plus_one,
3112 right: Expression::Literal(Literal::Number("1".to_string())),
3113 left_comments: vec![],
3114 operator_comments: vec![],
3115 trailing_comments: vec![],
3116 inferred_type: None,
3117 }));
3118 let paren_inner = Expression::Paren(Box::new(Paren {
3119 this: minus_one,
3120 trailing_comments: vec![],
3121 }));
3122 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3123 left: paren_inner,
3124 right: Expression::Literal(Literal::Number("1".to_string())),
3125 left_comments: vec![],
3126 operator_comments: vec![],
3127 trailing_comments: vec![],
3128 inferred_type: None,
3129 }));
3130
3131 let array_gen_range = Expression::Function(Box::new(Function::new(
3132 "ARRAY_GENERATE_RANGE".to_string(),
3133 vec![
3134 Expression::Literal(Literal::Number("0".to_string())),
3135 outer_plus_one,
3136 ],
3137 )));
3138
3139 // TABLE(FLATTEN(INPUT => ...))
3140 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3141 name: Identifier::new("INPUT"),
3142 value: array_gen_range,
3143 separator: crate::expressions::NamedArgSeparator::DArrow,
3144 }));
3145 let flatten = Expression::Function(Box::new(Function::new(
3146 "FLATTEN".to_string(),
3147 vec![flatten_input],
3148 )));
3149
3150 // Determine alias name for the table: use outer alias or _t0
3151 let table_alias_name = outer_alias_info
3152 .as_ref()
3153 .map(|(name, _)| name.clone())
3154 .unwrap_or_else(|| "_t0".to_string());
3155
3156 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3157 let table_func =
3158 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3159 let flatten_aliased = Expression::Alias(Box::new(Alias {
3160 this: table_func,
3161 alias: Identifier::new(&table_alias_name),
3162 column_aliases: vec![
3163 Identifier::new("seq"),
3164 Identifier::new("key"),
3165 Identifier::new("path"),
3166 Identifier::new("index"),
3167 Identifier::new(&col_name),
3168 Identifier::new("this"),
3169 ],
3170 pre_alias_comments: vec![],
3171 trailing_comments: vec![],
3172 inferred_type: None,
3173 }));
3174
3175 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3176 let dateadd_expr = Expression::Function(Box::new(Function::new(
3177 "DATEADD".to_string(),
3178 vec![
3179 Expression::Column(Column {
3180 name: Identifier::new(&unit_str),
3181 table: None,
3182 join_mark: false,
3183 trailing_comments: vec![],
3184 span: None,
3185 inferred_type: None,
3186 }),
3187 Expression::Cast(Box::new(Cast {
3188 this: Expression::Column(Column {
3189 name: Identifier::new(&col_name),
3190 table: None,
3191 join_mark: false,
3192 trailing_comments: vec![],
3193 span: None,
3194 inferred_type: None,
3195 }),
3196 to: DataType::Int {
3197 length: None,
3198 integer_spelling: false,
3199 },
3200 trailing_comments: vec![],
3201 double_colon_syntax: false,
3202 format: None,
3203 default: None,
3204 inferred_type: None,
3205 })),
3206 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3207 start_expr.clone(),
3208 ],
3209 )));
3210 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3211 this: dateadd_expr,
3212 alias: Identifier::new(&col_name),
3213 column_aliases: vec![],
3214 pre_alias_comments: vec![],
3215 trailing_comments: vec![],
3216 inferred_type: None,
3217 }));
3218
3219 // Build inner SELECT
3220 let mut inner_select = Select::new();
3221 inner_select.expressions = vec![dateadd_aliased];
3222 inner_select.from = Some(From {
3223 expressions: vec![flatten_aliased],
3224 });
3225
3226 let inner_select_expr = Expression::Select(Box::new(inner_select));
3227 let subquery = Expression::Subquery(Box::new(Subquery {
3228 this: inner_select_expr,
3229 alias: None,
3230 column_aliases: vec![],
3231 order_by: None,
3232 limit: None,
3233 offset: None,
3234 distribute_by: None,
3235 sort_by: None,
3236 cluster_by: None,
3237 lateral: false,
3238 modifiers_inside: false,
3239 trailing_comments: vec![],
3240 inferred_type: None,
3241 }));
3242
3243 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3244 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3245 Expression::Alias(Box::new(Alias {
3246 this: subquery,
3247 alias: Identifier::new(&alias_name),
3248 column_aliases: col_aliases,
3249 pre_alias_comments: vec![],
3250 trailing_comments: vec![],
3251 inferred_type: None,
3252 }))
3253 } else {
3254 subquery
3255 };
3256
3257 // Replace the FROM expression
3258 if let Some(ref mut from) = sel.from {
3259 from.expressions[from_idx] = replacement;
3260 }
3261
3262 Ok(Expression::Select(sel))
3263 }
3264
3265 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3266 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3267 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3268 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3269 use crate::expressions::*;
3270
3271 let start_expr = f.args[0].clone();
3272 let end_expr = f.args[1].clone();
3273 let step = f.args.get(2).cloned();
3274 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3275 let col_name = "value";
3276
3277 // Build the inner subquery: same as try_transform_from_gda_snowflake
3278 let datediff = Expression::Function(Box::new(Function::new(
3279 "DATEDIFF".to_string(),
3280 vec![
3281 Expression::Column(Column {
3282 name: Identifier::new(&unit_str),
3283 table: None,
3284 join_mark: false,
3285 trailing_comments: vec![],
3286 span: None,
3287 inferred_type: None,
3288 }),
3289 start_expr.clone(),
3290 end_expr.clone(),
3291 ],
3292 )));
3293 let plus_one = Expression::Add(Box::new(BinaryOp {
3294 left: datediff,
3295 right: Expression::Literal(Literal::Number("1".to_string())),
3296 left_comments: vec![],
3297 operator_comments: vec![],
3298 trailing_comments: vec![],
3299 inferred_type: None,
3300 }));
3301 let minus_one = Expression::Sub(Box::new(BinaryOp {
3302 left: plus_one,
3303 right: Expression::Literal(Literal::Number("1".to_string())),
3304 left_comments: vec![],
3305 operator_comments: vec![],
3306 trailing_comments: vec![],
3307 inferred_type: None,
3308 }));
3309 let paren_inner = Expression::Paren(Box::new(Paren {
3310 this: minus_one,
3311 trailing_comments: vec![],
3312 }));
3313 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3314 left: paren_inner,
3315 right: Expression::Literal(Literal::Number("1".to_string())),
3316 left_comments: vec![],
3317 operator_comments: vec![],
3318 trailing_comments: vec![],
3319 inferred_type: None,
3320 }));
3321
3322 let array_gen_range = Expression::Function(Box::new(Function::new(
3323 "ARRAY_GENERATE_RANGE".to_string(),
3324 vec![
3325 Expression::Literal(Literal::Number("0".to_string())),
3326 outer_plus_one,
3327 ],
3328 )));
3329
3330 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3331 name: Identifier::new("INPUT"),
3332 value: array_gen_range,
3333 separator: crate::expressions::NamedArgSeparator::DArrow,
3334 }));
3335 let flatten = Expression::Function(Box::new(Function::new(
3336 "FLATTEN".to_string(),
3337 vec![flatten_input],
3338 )));
3339
3340 let table_func =
3341 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3342 let flatten_aliased = Expression::Alias(Box::new(Alias {
3343 this: table_func,
3344 alias: Identifier::new("_t0"),
3345 column_aliases: vec![
3346 Identifier::new("seq"),
3347 Identifier::new("key"),
3348 Identifier::new("path"),
3349 Identifier::new("index"),
3350 Identifier::new(col_name),
3351 Identifier::new("this"),
3352 ],
3353 pre_alias_comments: vec![],
3354 trailing_comments: vec![],
3355 inferred_type: None,
3356 }));
3357
3358 let dateadd_expr = Expression::Function(Box::new(Function::new(
3359 "DATEADD".to_string(),
3360 vec![
3361 Expression::Column(Column {
3362 name: Identifier::new(&unit_str),
3363 table: None,
3364 join_mark: false,
3365 trailing_comments: vec![],
3366 span: None,
3367 inferred_type: None,
3368 }),
3369 Expression::Cast(Box::new(Cast {
3370 this: Expression::Column(Column {
3371 name: Identifier::new(col_name),
3372 table: None,
3373 join_mark: false,
3374 trailing_comments: vec![],
3375 span: None,
3376 inferred_type: None,
3377 }),
3378 to: DataType::Int {
3379 length: None,
3380 integer_spelling: false,
3381 },
3382 trailing_comments: vec![],
3383 double_colon_syntax: false,
3384 format: None,
3385 default: None,
3386 inferred_type: None,
3387 })),
3388 start_expr.clone(),
3389 ],
3390 )));
3391 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3392 this: dateadd_expr,
3393 alias: Identifier::new(col_name),
3394 column_aliases: vec![],
3395 pre_alias_comments: vec![],
3396 trailing_comments: vec![],
3397 inferred_type: None,
3398 }));
3399
3400 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3401 let mut inner_select = Select::new();
3402 inner_select.expressions = vec![dateadd_aliased];
3403 inner_select.from = Some(From {
3404 expressions: vec![flatten_aliased],
3405 });
3406
3407 // Wrap in subquery for the inner part
3408 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3409 this: Expression::Select(Box::new(inner_select)),
3410 alias: None,
3411 column_aliases: vec![],
3412 order_by: None,
3413 limit: None,
3414 offset: None,
3415 distribute_by: None,
3416 sort_by: None,
3417 cluster_by: None,
3418 lateral: false,
3419 modifiers_inside: false,
3420 trailing_comments: vec![],
3421 inferred_type: None,
3422 }));
3423
3424 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3425 let star = Expression::Star(Star {
3426 table: None,
3427 except: None,
3428 replace: None,
3429 rename: None,
3430 trailing_comments: vec![],
3431 span: None,
3432 });
3433 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3434 this: star,
3435 distinct: false,
3436 filter: None,
3437 order_by: vec![],
3438 name: Some("ARRAY_AGG".to_string()),
3439 ignore_nulls: None,
3440 having_max: None,
3441 limit: None,
3442 inferred_type: None,
3443 }));
3444
3445 let mut outer_select = Select::new();
3446 outer_select.expressions = vec![array_agg];
3447 outer_select.from = Some(From {
3448 expressions: vec![inner_subquery],
3449 });
3450
3451 // Wrap in a subquery
3452 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3453 this: Expression::Select(Box::new(outer_select)),
3454 alias: None,
3455 column_aliases: vec![],
3456 order_by: None,
3457 limit: None,
3458 offset: None,
3459 distribute_by: None,
3460 sort_by: None,
3461 cluster_by: None,
3462 lateral: false,
3463 modifiers_inside: false,
3464 trailing_comments: vec![],
3465 inferred_type: None,
3466 }));
3467
3468 // ARRAY_SIZE(subquery)
3469 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3470 outer_subquery,
3471 ))))
3472 }
3473
3474 /// Extract interval unit string from an optional step expression.
3475 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3476 use crate::expressions::*;
3477 if let Some(Expression::Interval(ref iv)) = step {
3478 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3479 return Some(format!("{:?}", unit).to_uppercase());
3480 }
3481 if let Some(ref this) = iv.this {
3482 if let Expression::Literal(Literal::String(ref s)) = this {
3483 let parts: Vec<&str> = s.split_whitespace().collect();
3484 if parts.len() == 2 {
3485 return Some(parts[1].to_uppercase());
3486 } else if parts.len() == 1 {
3487 let upper = parts[0].to_uppercase();
3488 if matches!(
3489 upper.as_str(),
3490 "YEAR"
3491 | "QUARTER"
3492 | "MONTH"
3493 | "WEEK"
3494 | "DAY"
3495 | "HOUR"
3496 | "MINUTE"
3497 | "SECOND"
3498 ) {
3499 return Some(upper);
3500 }
3501 }
3502 }
3503 }
3504 }
3505 // Default to DAY if no step or no interval
3506 if step.is_none() {
3507 return Some("DAY".to_string());
3508 }
3509 None
3510 }
3511
3512 fn normalize_snowflake_pretty(mut sql: String) -> String {
3513 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3514 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3515 {
3516 sql = sql.replace(
3517 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3518 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3519 );
3520
3521 sql = sql.replace(
3522 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3523 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3524 );
3525
3526 sql = sql.replace(
3527 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3528 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3529 );
3530 }
3531
3532 sql
3533 }
3534
3535 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3536 /// This handles cases where the same syntax has different semantics across dialects.
3537 fn cross_dialect_normalize(
3538 expr: Expression,
3539 source: DialectType,
3540 target: DialectType,
3541 ) -> Result<Expression> {
3542 use crate::expressions::{
3543 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3544 Function, Identifier, IsNull, Literal, Null, Paren,
3545 };
3546
3547 // Helper to tag which kind of transform to apply
3548 #[derive(Debug)]
3549 enum Action {
3550 None,
3551 GreatestLeastNull,
3552 ArrayGenerateRange,
3553 Div0TypedDivision,
3554 ArrayAggCollectList,
3555 ArrayAggWithinGroupFilter,
3556 ArrayAggFilter,
3557 CastTimestampToDatetime,
3558 DateTruncWrapCast,
3559 ToDateToCast,
3560 ConvertTimezoneToExpr,
3561 SetToVariable,
3562 RegexpReplaceSnowflakeToDuckDB,
3563 BigQueryFunctionNormalize,
3564 BigQuerySafeDivide,
3565 BigQueryCastType,
3566 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3567 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3568 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3569 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3570 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3571 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3572 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3573 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3574 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3575 EpochConvert, // Expression::Epoch -> target-specific epoch function
3576 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3577 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3578 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3579 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3580 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3581 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3582 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3583 TempTableHash, // TSQL #table -> temp table normalization
3584 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3585 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3586 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3587 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3588 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3589 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3590 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3591 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3592 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3593 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3594 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3595 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3596 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3597 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3598 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3599 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3600 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3601 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3602 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3603 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3604 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3605 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3606 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3607 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3608 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3609 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3610 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3611 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3612 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3613 DollarParamConvert, // $foo -> @foo for BigQuery
3614 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3615 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3616 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3617 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3618 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3619 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3620 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3621 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3622 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3623 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3624 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3625 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3626 RespectNullsConvert, // RESPECT NULLS window function handling
3627 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3628 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3629 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3630 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3631 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3632 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3633 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3634 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3635 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3636 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3637 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3638 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3639 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3640 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3641 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3642 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3643 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3644 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3645 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3646 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3647 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3648 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3649 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3650 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3651 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3652 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3653 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3654 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3655 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3656 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3657 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3658 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3659 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3660 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3661 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3662 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3663 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3664 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3665 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3666 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3667 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3668 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3669 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3670 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3671 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3672 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3673 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3674 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3675 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3676 ArraySumConvert, // ARRAY_SUM -> target-specific
3677 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3678 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3679 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3680 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3681 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3682 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3683 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3684 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3685 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3686 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3687 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3688 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3689 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3690 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3691 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3692 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3693 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3694 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3695 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3696 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3697 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3698 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3699 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3700 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3701 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3702 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3703 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3704 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3705 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3706 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3707 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3708 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3709 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3710 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3711 }
3712
3713 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3714 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3715 Self::transform_select_into(expr, source, target)
3716 } else {
3717 expr
3718 };
3719
3720 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3721 let expr = if !matches!(
3722 target,
3723 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3724 ) {
3725 if let Expression::Select(mut select) = expr {
3726 if let Some(ref mut offset) = select.offset {
3727 offset.rows = None;
3728 }
3729 Expression::Select(select)
3730 } else {
3731 expr
3732 }
3733 } else {
3734 expr
3735 };
3736
3737 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3738 let expr = if matches!(target, DialectType::Oracle) {
3739 if let Expression::Select(mut select) = expr {
3740 if let Some(limit) = select.limit.take() {
3741 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3742 select.fetch = Some(crate::expressions::Fetch {
3743 direction: "FIRST".to_string(),
3744 count: Some(limit.this),
3745 percent: false,
3746 rows: true,
3747 with_ties: false,
3748 });
3749 }
3750 // Add ROWS to OFFSET if present
3751 if let Some(ref mut offset) = select.offset {
3752 offset.rows = Some(true);
3753 }
3754 Expression::Select(select)
3755 } else {
3756 expr
3757 }
3758 } else {
3759 expr
3760 };
3761
3762 // Handle CreateTable WITH properties transformation before recursive transforms
3763 let expr = if let Expression::CreateTable(mut ct) = expr {
3764 Self::transform_create_table_properties(&mut ct, source, target);
3765
3766 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3767 // When the PARTITIONED BY clause contains column definitions, merge them into the
3768 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3769 if matches!(
3770 source,
3771 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3772 ) {
3773 let mut partition_col_names: Vec<String> = Vec::new();
3774 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3775 let mut has_col_def_partitions = false;
3776
3777 // Check if any PARTITIONED BY property contains ColumnDef expressions
3778 for prop in &ct.properties {
3779 if let Expression::PartitionedByProperty(ref pbp) = prop {
3780 if let Expression::Tuple(ref tuple) = *pbp.this {
3781 for expr in &tuple.expressions {
3782 if let Expression::ColumnDef(ref cd) = expr {
3783 has_col_def_partitions = true;
3784 partition_col_names.push(cd.name.name.clone());
3785 partition_col_defs.push(*cd.clone());
3786 }
3787 }
3788 }
3789 }
3790 }
3791
3792 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3793 // Merge partition columns into main column list
3794 for cd in partition_col_defs {
3795 ct.columns.push(cd);
3796 }
3797
3798 // Replace PARTITIONED BY property with column-name-only version
3799 ct.properties
3800 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3801
3802 if matches!(
3803 target,
3804 DialectType::Presto | DialectType::Trino | DialectType::Athena
3805 ) {
3806 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3807 let array_elements: Vec<String> = partition_col_names
3808 .iter()
3809 .map(|n| format!("'{}'", n))
3810 .collect();
3811 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3812 ct.with_properties
3813 .push(("PARTITIONED_BY".to_string(), array_value));
3814 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3815 // Spark: PARTITIONED BY (y, z) - just column names
3816 let name_exprs: Vec<Expression> = partition_col_names
3817 .iter()
3818 .map(|n| {
3819 Expression::Column(crate::expressions::Column {
3820 name: crate::expressions::Identifier::new(n.clone()),
3821 table: None,
3822 join_mark: false,
3823 trailing_comments: Vec::new(),
3824 span: None,
3825 inferred_type: None,
3826 })
3827 })
3828 .collect();
3829 ct.properties.insert(
3830 0,
3831 Expression::PartitionedByProperty(Box::new(
3832 crate::expressions::PartitionedByProperty {
3833 this: Box::new(Expression::Tuple(Box::new(
3834 crate::expressions::Tuple {
3835 expressions: name_exprs,
3836 },
3837 ))),
3838 },
3839 )),
3840 );
3841 }
3842 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3843 }
3844
3845 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3846 // are handled by transform_create_table_properties which runs first
3847 }
3848
3849 // Strip LOCATION property for Presto/Trino (not supported)
3850 if matches!(
3851 target,
3852 DialectType::Presto | DialectType::Trino | DialectType::Athena
3853 ) {
3854 ct.properties
3855 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
3856 }
3857
3858 // Strip table-level constraints for Spark/Hive/Databricks
3859 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
3860 if matches!(
3861 target,
3862 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3863 ) {
3864 ct.constraints.retain(|c| {
3865 matches!(
3866 c,
3867 crate::expressions::TableConstraint::PrimaryKey { .. }
3868 | crate::expressions::TableConstraint::Like { .. }
3869 )
3870 });
3871 for constraint in &mut ct.constraints {
3872 if let crate::expressions::TableConstraint::PrimaryKey {
3873 columns,
3874 modifiers,
3875 ..
3876 } = constraint
3877 {
3878 // Strip ASC/DESC from column names
3879 for col in columns.iter_mut() {
3880 if col.name.ends_with(" ASC") {
3881 col.name = col.name[..col.name.len() - 4].to_string();
3882 } else if col.name.ends_with(" DESC") {
3883 col.name = col.name[..col.name.len() - 5].to_string();
3884 }
3885 }
3886 // Strip TSQL-specific modifiers
3887 modifiers.clustered = None;
3888 modifiers.with_options.clear();
3889 modifiers.on_filegroup = None;
3890 }
3891 }
3892 }
3893
3894 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
3895 if matches!(target, DialectType::Databricks) {
3896 for col in &mut ct.columns {
3897 if col.auto_increment {
3898 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
3899 col.data_type = crate::expressions::DataType::BigInt { length: None };
3900 }
3901 }
3902 }
3903 }
3904
3905 // Spark/Databricks: INTEGER -> INT in column definitions
3906 // Python sqlglot always outputs INT for Spark/Databricks
3907 if matches!(target, DialectType::Spark | DialectType::Databricks) {
3908 for col in &mut ct.columns {
3909 if let crate::expressions::DataType::Int {
3910 integer_spelling, ..
3911 } = &mut col.data_type
3912 {
3913 *integer_spelling = false;
3914 }
3915 }
3916 }
3917
3918 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
3919 if matches!(target, DialectType::Hive | DialectType::Spark) {
3920 for col in &mut ct.columns {
3921 // If nullable is explicitly true (NULL), change to None (omit it)
3922 if col.nullable == Some(true) {
3923 col.nullable = None;
3924 }
3925 // Also remove from constraints if stored there
3926 col.constraints
3927 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
3928 }
3929 }
3930
3931 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
3932 if ct.on_property.is_some()
3933 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3934 {
3935 ct.on_property = None;
3936 }
3937
3938 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
3939 // Snowflake doesn't support typed arrays in DDL
3940 if matches!(target, DialectType::Snowflake) {
3941 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
3942 if let crate::expressions::DataType::Array { .. } = dt {
3943 *dt = crate::expressions::DataType::Custom {
3944 name: "ARRAY".to_string(),
3945 };
3946 }
3947 }
3948 for col in &mut ct.columns {
3949 strip_array_type_params(&mut col.data_type);
3950 }
3951 }
3952
3953 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
3954 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
3955 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
3956 if matches!(target, DialectType::PostgreSQL) {
3957 for col in &mut ct.columns {
3958 if col.auto_increment && !col.constraint_order.is_empty() {
3959 use crate::expressions::ConstraintType;
3960 let has_explicit_not_null = col
3961 .constraint_order
3962 .iter()
3963 .any(|ct| *ct == ConstraintType::NotNull);
3964
3965 if has_explicit_not_null {
3966 // Source had explicit NOT NULL - preserve original order
3967 // Just ensure nullable is set
3968 if col.nullable != Some(false) {
3969 col.nullable = Some(false);
3970 }
3971 } else {
3972 // Source didn't have explicit NOT NULL - build order with
3973 // AutoIncrement + NotNull first, then remaining constraints
3974 let mut new_order = Vec::new();
3975 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
3976 new_order.push(ConstraintType::AutoIncrement);
3977 new_order.push(ConstraintType::NotNull);
3978 // Add remaining constraints in original order (except AutoIncrement)
3979 for ct_type in &col.constraint_order {
3980 if *ct_type != ConstraintType::AutoIncrement {
3981 new_order.push(ct_type.clone());
3982 }
3983 }
3984 col.constraint_order = new_order;
3985 col.nullable = Some(false);
3986 }
3987 }
3988 }
3989 }
3990
3991 Expression::CreateTable(ct)
3992 } else {
3993 expr
3994 };
3995
3996 // Handle CreateView column stripping for Presto/Trino target
3997 let expr = if let Expression::CreateView(mut cv) = expr {
3998 // Presto/Trino: drop column list when view has a SELECT body
3999 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4000 {
4001 if !matches!(&cv.query, Expression::Null(_)) {
4002 cv.columns.clear();
4003 }
4004 }
4005 Expression::CreateView(cv)
4006 } else {
4007 expr
4008 };
4009
4010 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4011 let expr = if !matches!(
4012 target,
4013 DialectType::Presto | DialectType::Trino | DialectType::Athena
4014 ) {
4015 if let Expression::Select(mut select) = expr {
4016 if let Some(ref mut with) = select.with {
4017 for cte in &mut with.ctes {
4018 if let Expression::Values(ref vals) = cte.this {
4019 // Build: SELECT * FROM (VALUES ...) AS _values
4020 let values_subquery =
4021 Expression::Subquery(Box::new(crate::expressions::Subquery {
4022 this: Expression::Values(vals.clone()),
4023 alias: Some(Identifier::new("_values".to_string())),
4024 column_aliases: Vec::new(),
4025 order_by: None,
4026 limit: None,
4027 offset: None,
4028 distribute_by: None,
4029 sort_by: None,
4030 cluster_by: None,
4031 lateral: false,
4032 modifiers_inside: false,
4033 trailing_comments: Vec::new(),
4034 inferred_type: None,
4035 }));
4036 let mut new_select = crate::expressions::Select::new();
4037 new_select.expressions =
4038 vec![Expression::Star(crate::expressions::Star {
4039 table: None,
4040 except: None,
4041 replace: None,
4042 rename: None,
4043 trailing_comments: Vec::new(),
4044 span: None,
4045 })];
4046 new_select.from = Some(crate::expressions::From {
4047 expressions: vec![values_subquery],
4048 });
4049 cte.this = Expression::Select(Box::new(new_select));
4050 }
4051 }
4052 }
4053 Expression::Select(select)
4054 } else {
4055 expr
4056 }
4057 } else {
4058 expr
4059 };
4060
4061 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4062 let expr = if matches!(target, DialectType::PostgreSQL) {
4063 if let Expression::CreateIndex(mut ci) = expr {
4064 for col in &mut ci.columns {
4065 if col.nulls_first.is_none() {
4066 col.nulls_first = Some(true);
4067 }
4068 }
4069 Expression::CreateIndex(ci)
4070 } else {
4071 expr
4072 }
4073 } else {
4074 expr
4075 };
4076
4077 transform_recursive(expr, &|e| {
4078 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4079 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4080 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4081 if let Expression::Cast(ref c) = e {
4082 // Check if this is a CAST of an array to a struct array type
4083 let is_struct_array_cast =
4084 matches!(&c.to, crate::expressions::DataType::Array { .. });
4085 if is_struct_array_cast {
4086 let has_auto_named_structs = match &c.this {
4087 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4088 if let Expression::Struct(s) = elem {
4089 s.fields.iter().all(|(name, _)| {
4090 name.as_ref().map_or(true, |n| {
4091 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4092 })
4093 })
4094 } else {
4095 false
4096 }
4097 }),
4098 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4099 if let Expression::Struct(s) = elem {
4100 s.fields.iter().all(|(name, _)| {
4101 name.as_ref().map_or(true, |n| {
4102 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4103 })
4104 })
4105 } else {
4106 false
4107 }
4108 }),
4109 _ => false,
4110 };
4111 if has_auto_named_structs {
4112 let convert_struct_to_row = |elem: Expression| -> Expression {
4113 if let Expression::Struct(s) = elem {
4114 let row_args: Vec<Expression> =
4115 s.fields.into_iter().map(|(_, v)| v).collect();
4116 Expression::Function(Box::new(Function::new(
4117 "ROW".to_string(),
4118 row_args,
4119 )))
4120 } else {
4121 elem
4122 }
4123 };
4124 let mut c_clone = c.as_ref().clone();
4125 match &mut c_clone.this {
4126 Expression::Array(arr) => {
4127 arr.expressions = arr
4128 .expressions
4129 .drain(..)
4130 .map(convert_struct_to_row)
4131 .collect();
4132 }
4133 Expression::ArrayFunc(arr) => {
4134 arr.expressions = arr
4135 .expressions
4136 .drain(..)
4137 .map(convert_struct_to_row)
4138 .collect();
4139 }
4140 _ => {}
4141 }
4142 return Ok(Expression::Cast(Box::new(c_clone)));
4143 }
4144 }
4145 }
4146 }
4147
4148 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4149 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4150 if let Expression::Select(ref sel) = e {
4151 if sel.kind.as_deref() == Some("STRUCT") {
4152 let mut fields = Vec::new();
4153 for expr in &sel.expressions {
4154 match expr {
4155 Expression::Alias(a) => {
4156 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4157 }
4158 Expression::Column(c) => {
4159 fields.push((Some(c.name.name.clone()), expr.clone()));
4160 }
4161 _ => {
4162 fields.push((None, expr.clone()));
4163 }
4164 }
4165 }
4166 let struct_lit =
4167 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4168 let mut new_select = sel.as_ref().clone();
4169 new_select.kind = None;
4170 new_select.expressions = vec![struct_lit];
4171 return Ok(Expression::Select(Box::new(new_select)));
4172 }
4173 }
4174 }
4175
4176 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4177 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4178 && matches!(
4179 target,
4180 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4181 )
4182 {
4183 if let Expression::Parameter(ref p) = e {
4184 if p.style == crate::expressions::ParameterStyle::At {
4185 if let Some(ref name) = p.name {
4186 return Ok(Expression::Parameter(Box::new(
4187 crate::expressions::Parameter {
4188 name: Some(name.clone()),
4189 index: p.index,
4190 style: crate::expressions::ParameterStyle::DollarBrace,
4191 quoted: p.quoted,
4192 string_quoted: p.string_quoted,
4193 expression: None,
4194 },
4195 )));
4196 }
4197 }
4198 }
4199 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4200 if let Expression::Column(ref col) = e {
4201 if col.name.name.starts_with('@') && col.table.is_none() {
4202 let var_name = col.name.name.trim_start_matches('@').to_string();
4203 return Ok(Expression::Parameter(Box::new(
4204 crate::expressions::Parameter {
4205 name: Some(var_name),
4206 index: None,
4207 style: crate::expressions::ParameterStyle::DollarBrace,
4208 quoted: false,
4209 string_quoted: false,
4210 expression: None,
4211 },
4212 )));
4213 }
4214 }
4215 }
4216
4217 // Convert @variable -> variable in SET statements for Spark/Databricks
4218 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4219 && matches!(target, DialectType::Spark | DialectType::Databricks)
4220 {
4221 if let Expression::SetStatement(ref s) = e {
4222 let mut new_items = s.items.clone();
4223 let mut changed = false;
4224 for item in &mut new_items {
4225 // Strip @ from the SET name (Parameter style)
4226 if let Expression::Parameter(ref p) = item.name {
4227 if p.style == crate::expressions::ParameterStyle::At {
4228 if let Some(ref name) = p.name {
4229 item.name = Expression::Identifier(Identifier::new(name));
4230 changed = true;
4231 }
4232 }
4233 }
4234 // Strip @ from the SET name (Identifier style - SET parser)
4235 if let Expression::Identifier(ref id) = item.name {
4236 if id.name.starts_with('@') {
4237 let var_name = id.name.trim_start_matches('@').to_string();
4238 item.name = Expression::Identifier(Identifier::new(&var_name));
4239 changed = true;
4240 }
4241 }
4242 // Strip @ from the SET name (Column style - alternative parsing)
4243 if let Expression::Column(ref col) = item.name {
4244 if col.name.name.starts_with('@') && col.table.is_none() {
4245 let var_name = col.name.name.trim_start_matches('@').to_string();
4246 item.name = Expression::Identifier(Identifier::new(&var_name));
4247 changed = true;
4248 }
4249 }
4250 }
4251 if changed {
4252 let mut new_set = (**s).clone();
4253 new_set.items = new_items;
4254 return Ok(Expression::SetStatement(Box::new(new_set)));
4255 }
4256 }
4257 }
4258
4259 // Strip NOLOCK hint for non-TSQL targets
4260 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4261 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4262 {
4263 if let Expression::Table(ref tr) = e {
4264 if !tr.hints.is_empty() {
4265 let mut new_tr = tr.clone();
4266 new_tr.hints.clear();
4267 return Ok(Expression::Table(new_tr));
4268 }
4269 }
4270 }
4271
4272 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4273 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4274 if matches!(target, DialectType::Snowflake) {
4275 if let Expression::IsTrue(ref itf) = e {
4276 if let Expression::Boolean(ref b) = itf.this {
4277 if !itf.not {
4278 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4279 value: b.value,
4280 }));
4281 } else {
4282 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4283 value: !b.value,
4284 }));
4285 }
4286 }
4287 }
4288 if let Expression::IsFalse(ref itf) = e {
4289 if let Expression::Boolean(ref b) = itf.this {
4290 if !itf.not {
4291 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4292 value: !b.value,
4293 }));
4294 } else {
4295 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4296 value: b.value,
4297 }));
4298 }
4299 }
4300 }
4301 }
4302
4303 // BigQuery: split dotted backtick identifiers in table names
4304 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4305 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4306 if let Expression::CreateTable(ref ct) = e {
4307 let mut changed = false;
4308 let mut new_ct = ct.clone();
4309 // Split the table name
4310 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4311 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4312 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4313 let was_quoted = ct.name.name.quoted;
4314 let mk_id = |s: &str| {
4315 if was_quoted {
4316 Identifier::quoted(s)
4317 } else {
4318 Identifier::new(s)
4319 }
4320 };
4321 if parts.len() == 3 {
4322 new_ct.name.catalog = Some(mk_id(parts[0]));
4323 new_ct.name.schema = Some(mk_id(parts[1]));
4324 new_ct.name.name = mk_id(parts[2]);
4325 changed = true;
4326 } else if parts.len() == 2 {
4327 new_ct.name.schema = Some(mk_id(parts[0]));
4328 new_ct.name.name = mk_id(parts[1]);
4329 changed = true;
4330 }
4331 }
4332 // Split the clone source name
4333 if let Some(ref clone_src) = ct.clone_source {
4334 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4335 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4336 let was_quoted = clone_src.name.quoted;
4337 let mk_id = |s: &str| {
4338 if was_quoted {
4339 Identifier::quoted(s)
4340 } else {
4341 Identifier::new(s)
4342 }
4343 };
4344 let mut new_src = clone_src.clone();
4345 if parts.len() == 3 {
4346 new_src.catalog = Some(mk_id(parts[0]));
4347 new_src.schema = Some(mk_id(parts[1]));
4348 new_src.name = mk_id(parts[2]);
4349 new_ct.clone_source = Some(new_src);
4350 changed = true;
4351 } else if parts.len() == 2 {
4352 new_src.schema = Some(mk_id(parts[0]));
4353 new_src.name = mk_id(parts[1]);
4354 new_ct.clone_source = Some(new_src);
4355 changed = true;
4356 }
4357 }
4358 }
4359 if changed {
4360 return Ok(Expression::CreateTable(new_ct));
4361 }
4362 }
4363 }
4364
4365 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4366 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4367 if matches!(source, DialectType::BigQuery)
4368 && matches!(
4369 target,
4370 DialectType::DuckDB
4371 | DialectType::Presto
4372 | DialectType::Trino
4373 | DialectType::Athena
4374 )
4375 {
4376 if let Expression::Subscript(ref sub) = e {
4377 let (new_index, is_safe) = match &sub.index {
4378 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4379 Expression::Literal(Literal::Number(n)) => {
4380 if let Ok(val) = n.parse::<i64>() {
4381 (
4382 Some(Expression::Literal(Literal::Number(
4383 (val + 1).to_string(),
4384 ))),
4385 false,
4386 )
4387 } else {
4388 (None, false)
4389 }
4390 }
4391 // OFFSET(n) -> n+1 (0-based)
4392 Expression::Function(ref f)
4393 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4394 {
4395 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4396 if let Ok(val) = n.parse::<i64>() {
4397 (
4398 Some(Expression::Literal(Literal::Number(
4399 (val + 1).to_string(),
4400 ))),
4401 false,
4402 )
4403 } else {
4404 (
4405 Some(Expression::Add(Box::new(
4406 crate::expressions::BinaryOp::new(
4407 f.args[0].clone(),
4408 Expression::number(1),
4409 ),
4410 ))),
4411 false,
4412 )
4413 }
4414 } else {
4415 (
4416 Some(Expression::Add(Box::new(
4417 crate::expressions::BinaryOp::new(
4418 f.args[0].clone(),
4419 Expression::number(1),
4420 ),
4421 ))),
4422 false,
4423 )
4424 }
4425 }
4426 // ORDINAL(n) -> n (already 1-based)
4427 Expression::Function(ref f)
4428 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4429 {
4430 (Some(f.args[0].clone()), false)
4431 }
4432 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4433 Expression::Function(ref f)
4434 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4435 {
4436 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4437 if let Ok(val) = n.parse::<i64>() {
4438 (
4439 Some(Expression::Literal(Literal::Number(
4440 (val + 1).to_string(),
4441 ))),
4442 true,
4443 )
4444 } else {
4445 (
4446 Some(Expression::Add(Box::new(
4447 crate::expressions::BinaryOp::new(
4448 f.args[0].clone(),
4449 Expression::number(1),
4450 ),
4451 ))),
4452 true,
4453 )
4454 }
4455 } else {
4456 (
4457 Some(Expression::Add(Box::new(
4458 crate::expressions::BinaryOp::new(
4459 f.args[0].clone(),
4460 Expression::number(1),
4461 ),
4462 ))),
4463 true,
4464 )
4465 }
4466 }
4467 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4468 Expression::Function(ref f)
4469 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4470 {
4471 (Some(f.args[0].clone()), true)
4472 }
4473 _ => (None, false),
4474 };
4475 if let Some(idx) = new_index {
4476 if is_safe
4477 && matches!(
4478 target,
4479 DialectType::Presto | DialectType::Trino | DialectType::Athena
4480 )
4481 {
4482 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4483 return Ok(Expression::Function(Box::new(Function::new(
4484 "ELEMENT_AT".to_string(),
4485 vec![sub.this.clone(), idx],
4486 ))));
4487 } else {
4488 // DuckDB or non-safe: just use subscript with converted index
4489 return Ok(Expression::Subscript(Box::new(
4490 crate::expressions::Subscript {
4491 this: sub.this.clone(),
4492 index: idx,
4493 },
4494 )));
4495 }
4496 }
4497 }
4498 }
4499
4500 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4501 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4502 if let Expression::Length(ref uf) = e {
4503 let arg = uf.this.clone();
4504 let typeof_func = Expression::Function(Box::new(Function::new(
4505 "TYPEOF".to_string(),
4506 vec![arg.clone()],
4507 )));
4508 let blob_cast = Expression::Cast(Box::new(Cast {
4509 this: arg.clone(),
4510 to: DataType::VarBinary { length: None },
4511 trailing_comments: vec![],
4512 double_colon_syntax: false,
4513 format: None,
4514 default: None,
4515 inferred_type: None,
4516 }));
4517 let octet_length = Expression::Function(Box::new(Function::new(
4518 "OCTET_LENGTH".to_string(),
4519 vec![blob_cast],
4520 )));
4521 let text_cast = Expression::Cast(Box::new(Cast {
4522 this: arg,
4523 to: DataType::Text,
4524 trailing_comments: vec![],
4525 double_colon_syntax: false,
4526 format: None,
4527 default: None,
4528 inferred_type: None,
4529 }));
4530 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4531 this: text_cast,
4532 original_name: None,
4533 inferred_type: None,
4534 }));
4535 return Ok(Expression::Case(Box::new(Case {
4536 operand: Some(typeof_func),
4537 whens: vec![(
4538 Expression::Literal(Literal::String("BLOB".to_string())),
4539 octet_length,
4540 )],
4541 else_: Some(length_text),
4542 comments: Vec::new(),
4543 inferred_type: None,
4544 })));
4545 }
4546 }
4547
4548 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4549 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4550 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4551 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4552 if let Expression::Alias(ref a) = e {
4553 if matches!(&a.this, Expression::Unnest(_)) {
4554 if a.column_aliases.is_empty() {
4555 // Drop the entire alias, return just the UNNEST expression
4556 return Ok(a.this.clone());
4557 } else {
4558 // Use first column alias as the main alias
4559 let mut new_alias = a.as_ref().clone();
4560 new_alias.alias = a.column_aliases[0].clone();
4561 new_alias.column_aliases.clear();
4562 return Ok(Expression::Alias(Box::new(new_alias)));
4563 }
4564 }
4565 }
4566 }
4567
4568 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4569 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4570 if let Expression::In(ref in_expr) = e {
4571 if let Some(ref unnest_inner) = in_expr.unnest {
4572 // Build the function call for the target dialect
4573 let func_expr = if matches!(
4574 target,
4575 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4576 ) {
4577 // Use EXPLODE for Hive/Spark
4578 Expression::Function(Box::new(Function::new(
4579 "EXPLODE".to_string(),
4580 vec![*unnest_inner.clone()],
4581 )))
4582 } else {
4583 // Use UNNEST for Presto/Trino/DuckDB/etc.
4584 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4585 this: *unnest_inner.clone(),
4586 expressions: Vec::new(),
4587 with_ordinality: false,
4588 alias: None,
4589 offset_alias: None,
4590 }))
4591 };
4592
4593 // Wrap in SELECT
4594 let mut inner_select = crate::expressions::Select::new();
4595 inner_select.expressions = vec![func_expr];
4596
4597 let subquery_expr = Expression::Select(Box::new(inner_select));
4598
4599 return Ok(Expression::In(Box::new(crate::expressions::In {
4600 this: in_expr.this.clone(),
4601 expressions: Vec::new(),
4602 query: Some(subquery_expr),
4603 not: in_expr.not,
4604 global: in_expr.global,
4605 unnest: None,
4606 is_field: false,
4607 })));
4608 }
4609 }
4610 }
4611
4612 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4613 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4614 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4615 if let Expression::Alias(ref a) = e {
4616 if let Expression::Function(ref f) = a.this {
4617 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4618 && !a.column_aliases.is_empty()
4619 {
4620 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4621 let col_alias = a.column_aliases[0].clone();
4622 let mut inner_select = crate::expressions::Select::new();
4623 inner_select.expressions =
4624 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4625 Expression::Identifier(Identifier::new("value".to_string())),
4626 col_alias,
4627 )))];
4628 inner_select.from = Some(crate::expressions::From {
4629 expressions: vec![a.this.clone()],
4630 });
4631 let subquery =
4632 Expression::Subquery(Box::new(crate::expressions::Subquery {
4633 this: Expression::Select(Box::new(inner_select)),
4634 alias: Some(a.alias.clone()),
4635 column_aliases: Vec::new(),
4636 order_by: None,
4637 limit: None,
4638 offset: None,
4639 lateral: false,
4640 modifiers_inside: false,
4641 trailing_comments: Vec::new(),
4642 distribute_by: None,
4643 sort_by: None,
4644 cluster_by: None,
4645 inferred_type: None,
4646 }));
4647 return Ok(subquery);
4648 }
4649 }
4650 }
4651 }
4652
4653 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4654 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4655 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4656 if matches!(source, DialectType::BigQuery) {
4657 if let Expression::Select(ref s) = e {
4658 if let Some(ref from) = s.from {
4659 if from.expressions.len() >= 2 {
4660 // Collect table names from first expression
4661 let first_tables: Vec<String> = from
4662 .expressions
4663 .iter()
4664 .take(1)
4665 .filter_map(|expr| {
4666 if let Expression::Table(t) = expr {
4667 Some(t.name.name.to_lowercase())
4668 } else {
4669 None
4670 }
4671 })
4672 .collect();
4673
4674 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4675 // or have a dotted name matching a table
4676 let mut needs_rewrite = false;
4677 for expr in from.expressions.iter().skip(1) {
4678 if let Expression::Table(t) = expr {
4679 if let Some(ref schema) = t.schema {
4680 if first_tables.contains(&schema.name.to_lowercase()) {
4681 needs_rewrite = true;
4682 break;
4683 }
4684 }
4685 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4686 if t.schema.is_none() && t.name.name.contains('.') {
4687 let parts: Vec<&str> = t.name.name.split('.').collect();
4688 if parts.len() >= 2
4689 && first_tables.contains(&parts[0].to_lowercase())
4690 {
4691 needs_rewrite = true;
4692 break;
4693 }
4694 }
4695 }
4696 }
4697
4698 if needs_rewrite {
4699 let mut new_select = s.clone();
4700 let mut new_from_exprs = vec![from.expressions[0].clone()];
4701 let mut new_joins = s.joins.clone();
4702
4703 for expr in from.expressions.iter().skip(1) {
4704 if let Expression::Table(ref t) = expr {
4705 if let Some(ref schema) = t.schema {
4706 if first_tables.contains(&schema.name.to_lowercase()) {
4707 // This is an array path reference, convert to CROSS JOIN UNNEST
4708 let col_expr = Expression::Column(
4709 crate::expressions::Column {
4710 name: t.name.clone(),
4711 table: Some(schema.clone()),
4712 join_mark: false,
4713 trailing_comments: vec![],
4714 span: None,
4715 inferred_type: None,
4716 },
4717 );
4718 let unnest_expr = Expression::Unnest(Box::new(
4719 crate::expressions::UnnestFunc {
4720 this: col_expr,
4721 expressions: Vec::new(),
4722 with_ordinality: false,
4723 alias: None,
4724 offset_alias: None,
4725 },
4726 ));
4727 let join_this = if let Some(ref alias) = t.alias {
4728 if matches!(
4729 target,
4730 DialectType::Presto
4731 | DialectType::Trino
4732 | DialectType::Athena
4733 ) {
4734 // Presto: UNNEST(x) AS _t0(results)
4735 Expression::Alias(Box::new(
4736 crate::expressions::Alias {
4737 this: unnest_expr,
4738 alias: Identifier::new("_t0"),
4739 column_aliases: vec![alias.clone()],
4740 pre_alias_comments: vec![],
4741 trailing_comments: vec![],
4742 inferred_type: None,
4743 },
4744 ))
4745 } else {
4746 // BigQuery: UNNEST(x) AS results
4747 Expression::Alias(Box::new(
4748 crate::expressions::Alias {
4749 this: unnest_expr,
4750 alias: alias.clone(),
4751 column_aliases: vec![],
4752 pre_alias_comments: vec![],
4753 trailing_comments: vec![],
4754 inferred_type: None,
4755 },
4756 ))
4757 }
4758 } else {
4759 unnest_expr
4760 };
4761 new_joins.push(crate::expressions::Join {
4762 kind: crate::expressions::JoinKind::Cross,
4763 this: join_this,
4764 on: None,
4765 using: Vec::new(),
4766 use_inner_keyword: false,
4767 use_outer_keyword: false,
4768 deferred_condition: false,
4769 join_hint: None,
4770 match_condition: None,
4771 pivots: Vec::new(),
4772 comments: Vec::new(),
4773 nesting_group: 0,
4774 directed: false,
4775 });
4776 } else {
4777 new_from_exprs.push(expr.clone());
4778 }
4779 } else if t.schema.is_none() && t.name.name.contains('.') {
4780 // Dotted name in quoted identifier: `Coordinates.position`
4781 let parts: Vec<&str> = t.name.name.split('.').collect();
4782 if parts.len() >= 2
4783 && first_tables.contains(&parts[0].to_lowercase())
4784 {
4785 let join_this =
4786 if matches!(target, DialectType::BigQuery) {
4787 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4788 Expression::Table(t.clone())
4789 } else {
4790 // Other targets: split into "schema"."name"
4791 let mut new_t = t.clone();
4792 new_t.schema =
4793 Some(Identifier::quoted(parts[0]));
4794 new_t.name = Identifier::quoted(parts[1]);
4795 Expression::Table(new_t)
4796 };
4797 new_joins.push(crate::expressions::Join {
4798 kind: crate::expressions::JoinKind::Cross,
4799 this: join_this,
4800 on: None,
4801 using: Vec::new(),
4802 use_inner_keyword: false,
4803 use_outer_keyword: false,
4804 deferred_condition: false,
4805 join_hint: None,
4806 match_condition: None,
4807 pivots: Vec::new(),
4808 comments: Vec::new(),
4809 nesting_group: 0,
4810 directed: false,
4811 });
4812 } else {
4813 new_from_exprs.push(expr.clone());
4814 }
4815 } else {
4816 new_from_exprs.push(expr.clone());
4817 }
4818 } else {
4819 new_from_exprs.push(expr.clone());
4820 }
4821 }
4822
4823 new_select.from = Some(crate::expressions::From {
4824 expressions: new_from_exprs,
4825 ..from.clone()
4826 });
4827 new_select.joins = new_joins;
4828 return Ok(Expression::Select(new_select));
4829 }
4830 }
4831 }
4832 }
4833 }
4834
4835 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4836 if matches!(
4837 target,
4838 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4839 ) {
4840 if let Expression::Select(ref s) = e {
4841 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4842 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4843 matches!(expr, Expression::Unnest(_))
4844 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4845 };
4846 let has_unnest_join = s.joins.iter().any(|j| {
4847 j.kind == crate::expressions::JoinKind::Cross && (
4848 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
4849 || is_unnest_or_explode_expr(&j.this)
4850 )
4851 });
4852 if has_unnest_join {
4853 let mut select = s.clone();
4854 let mut new_joins = Vec::new();
4855 for join in select.joins.drain(..) {
4856 if join.kind == crate::expressions::JoinKind::Cross {
4857 // Extract the UNNEST/EXPLODE from the join
4858 let (func_expr, table_alias, col_aliases) = match &join.this {
4859 Expression::Alias(a) => {
4860 let ta = if a.alias.is_empty() {
4861 None
4862 } else {
4863 Some(a.alias.clone())
4864 };
4865 let cas = a.column_aliases.clone();
4866 match &a.this {
4867 Expression::Unnest(u) => {
4868 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
4869 if !u.expressions.is_empty() {
4870 let mut all_args = vec![u.this.clone()];
4871 all_args.extend(u.expressions.clone());
4872 let arrays_zip =
4873 Expression::Function(Box::new(
4874 crate::expressions::Function::new(
4875 "ARRAYS_ZIP".to_string(),
4876 all_args,
4877 ),
4878 ));
4879 let inline = Expression::Function(Box::new(
4880 crate::expressions::Function::new(
4881 "INLINE".to_string(),
4882 vec![arrays_zip],
4883 ),
4884 ));
4885 (Some(inline), ta, a.column_aliases.clone())
4886 } else {
4887 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
4888 let func_name = if u.with_ordinality {
4889 "POSEXPLODE"
4890 } else {
4891 "EXPLODE"
4892 };
4893 let explode = Expression::Function(Box::new(
4894 crate::expressions::Function::new(
4895 func_name.to_string(),
4896 vec![u.this.clone()],
4897 ),
4898 ));
4899 // For POSEXPLODE, add 'pos' to column aliases
4900 let cas = if u.with_ordinality {
4901 let mut pos_aliases =
4902 vec![Identifier::new(
4903 "pos".to_string(),
4904 )];
4905 pos_aliases
4906 .extend(a.column_aliases.clone());
4907 pos_aliases
4908 } else {
4909 a.column_aliases.clone()
4910 };
4911 (Some(explode), ta, cas)
4912 }
4913 }
4914 Expression::Function(f)
4915 if f.name.eq_ignore_ascii_case("EXPLODE") =>
4916 {
4917 (Some(Expression::Function(f.clone())), ta, cas)
4918 }
4919 _ => (None, None, Vec::new()),
4920 }
4921 }
4922 Expression::Unnest(u) => {
4923 let func_name = if u.with_ordinality {
4924 "POSEXPLODE"
4925 } else {
4926 "EXPLODE"
4927 };
4928 let explode = Expression::Function(Box::new(
4929 crate::expressions::Function::new(
4930 func_name.to_string(),
4931 vec![u.this.clone()],
4932 ),
4933 ));
4934 let ta = u.alias.clone();
4935 let col_aliases = if u.with_ordinality {
4936 vec![Identifier::new("pos".to_string())]
4937 } else {
4938 Vec::new()
4939 };
4940 (Some(explode), ta, col_aliases)
4941 }
4942 _ => (None, None, Vec::new()),
4943 };
4944 if let Some(func) = func_expr {
4945 select.lateral_views.push(crate::expressions::LateralView {
4946 this: func,
4947 table_alias,
4948 column_aliases: col_aliases,
4949 outer: false,
4950 });
4951 } else {
4952 new_joins.push(join);
4953 }
4954 } else {
4955 new_joins.push(join);
4956 }
4957 }
4958 select.joins = new_joins;
4959 return Ok(Expression::Select(select));
4960 }
4961 }
4962 }
4963
4964 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
4965 // for BigQuery, Presto/Trino, Snowflake
4966 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
4967 && matches!(
4968 target,
4969 DialectType::BigQuery
4970 | DialectType::Presto
4971 | DialectType::Trino
4972 | DialectType::Snowflake
4973 )
4974 {
4975 if let Expression::Select(ref s) = e {
4976 // Check if any SELECT expressions contain UNNEST
4977 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
4978 let has_unnest_in_select = s.expressions.iter().any(|expr| {
4979 fn contains_unnest(e: &Expression) -> bool {
4980 match e {
4981 Expression::Unnest(_) => true,
4982 Expression::Function(f)
4983 if f.name.eq_ignore_ascii_case("UNNEST") =>
4984 {
4985 true
4986 }
4987 Expression::Alias(a) => contains_unnest(&a.this),
4988 Expression::Add(op)
4989 | Expression::Sub(op)
4990 | Expression::Mul(op)
4991 | Expression::Div(op) => {
4992 contains_unnest(&op.left) || contains_unnest(&op.right)
4993 }
4994 _ => false,
4995 }
4996 }
4997 contains_unnest(expr)
4998 });
4999
5000 if has_unnest_in_select {
5001 let rewritten = Self::rewrite_unnest_expansion(s, target);
5002 if let Some(new_select) = rewritten {
5003 return Ok(Expression::Select(Box::new(new_select)));
5004 }
5005 }
5006 }
5007 }
5008
5009 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5010 // BigQuery '\n' -> PostgreSQL literal newline in string
5011 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5012 {
5013 if let Expression::Literal(Literal::String(ref s)) = e {
5014 if s.contains("\\n")
5015 || s.contains("\\t")
5016 || s.contains("\\r")
5017 || s.contains("\\\\")
5018 {
5019 let converted = s
5020 .replace("\\n", "\n")
5021 .replace("\\t", "\t")
5022 .replace("\\r", "\r")
5023 .replace("\\\\", "\\");
5024 return Ok(Expression::Literal(Literal::String(converted)));
5025 }
5026 }
5027 }
5028
5029 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5030 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5031 if source != target {
5032 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
5033 let s = s.clone();
5034 // MySQL: TIMESTAMP handling depends on source dialect
5035 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5036 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5037 if matches!(target, DialectType::MySQL) {
5038 if matches!(source, DialectType::BigQuery) {
5039 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5040 return Ok(Expression::Function(Box::new(Function::new(
5041 "TIMESTAMP".to_string(),
5042 vec![Expression::Literal(Literal::String(s))],
5043 ))));
5044 } else {
5045 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5046 return Ok(Expression::Cast(Box::new(Cast {
5047 this: Expression::Literal(Literal::String(s)),
5048 to: DataType::Custom {
5049 name: "DATETIME".to_string(),
5050 },
5051 trailing_comments: Vec::new(),
5052 double_colon_syntax: false,
5053 format: None,
5054 default: None,
5055 inferred_type: None,
5056 })));
5057 }
5058 }
5059 let dt = match target {
5060 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5061 name: "DATETIME".to_string(),
5062 },
5063 DialectType::Snowflake => {
5064 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5065 if matches!(source, DialectType::BigQuery) {
5066 DataType::Custom {
5067 name: "TIMESTAMPTZ".to_string(),
5068 }
5069 } else if matches!(
5070 source,
5071 DialectType::PostgreSQL
5072 | DialectType::Redshift
5073 | DialectType::Snowflake
5074 ) {
5075 DataType::Timestamp {
5076 precision: None,
5077 timezone: false,
5078 }
5079 } else {
5080 DataType::Custom {
5081 name: "TIMESTAMPNTZ".to_string(),
5082 }
5083 }
5084 }
5085 DialectType::Spark | DialectType::Databricks => {
5086 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5087 if matches!(source, DialectType::BigQuery) {
5088 DataType::Timestamp {
5089 precision: None,
5090 timezone: false,
5091 }
5092 } else {
5093 DataType::Custom {
5094 name: "TIMESTAMP_NTZ".to_string(),
5095 }
5096 }
5097 }
5098 DialectType::ClickHouse => DataType::Nullable {
5099 inner: Box::new(DataType::Custom {
5100 name: "DateTime".to_string(),
5101 }),
5102 },
5103 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5104 name: "DATETIME2".to_string(),
5105 },
5106 DialectType::DuckDB => {
5107 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5108 // or when the timestamp string explicitly has timezone info
5109 if matches!(source, DialectType::BigQuery)
5110 || Self::timestamp_string_has_timezone(&s)
5111 {
5112 DataType::Custom {
5113 name: "TIMESTAMPTZ".to_string(),
5114 }
5115 } else {
5116 DataType::Timestamp {
5117 precision: None,
5118 timezone: false,
5119 }
5120 }
5121 }
5122 _ => DataType::Timestamp {
5123 precision: None,
5124 timezone: false,
5125 },
5126 };
5127 return Ok(Expression::Cast(Box::new(Cast {
5128 this: Expression::Literal(Literal::String(s)),
5129 to: dt,
5130 trailing_comments: vec![],
5131 double_colon_syntax: false,
5132 format: None,
5133 default: None,
5134 inferred_type: None,
5135 })));
5136 }
5137 }
5138
5139 // PostgreSQL DELETE requires explicit AS for table aliases
5140 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5141 if let Expression::Delete(ref del) = e {
5142 if del.alias.is_some() && !del.alias_explicit_as {
5143 let mut new_del = del.clone();
5144 new_del.alias_explicit_as = true;
5145 return Ok(Expression::Delete(new_del));
5146 }
5147 }
5148 }
5149
5150 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5151 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5152 // while others don't support it (Presto, Spark, DuckDB, etc.)
5153 {
5154 let needs_distinct =
5155 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5156 let drop_distinct = matches!(
5157 target,
5158 DialectType::Presto
5159 | DialectType::Trino
5160 | DialectType::Athena
5161 | DialectType::Spark
5162 | DialectType::Databricks
5163 | DialectType::DuckDB
5164 | DialectType::Hive
5165 | DialectType::MySQL
5166 | DialectType::PostgreSQL
5167 | DialectType::SQLite
5168 | DialectType::TSQL
5169 | DialectType::Redshift
5170 | DialectType::Snowflake
5171 | DialectType::Oracle
5172 | DialectType::Teradata
5173 | DialectType::Drill
5174 | DialectType::Doris
5175 | DialectType::StarRocks
5176 );
5177 match &e {
5178 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5179 let mut new_u = (**u).clone();
5180 new_u.distinct = true;
5181 return Ok(Expression::Union(Box::new(new_u)));
5182 }
5183 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5184 let mut new_i = (**i).clone();
5185 new_i.distinct = true;
5186 return Ok(Expression::Intersect(Box::new(new_i)));
5187 }
5188 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5189 let mut new_ex = (**ex).clone();
5190 new_ex.distinct = true;
5191 return Ok(Expression::Except(Box::new(new_ex)));
5192 }
5193 Expression::Union(u) if u.distinct && drop_distinct => {
5194 let mut new_u = (**u).clone();
5195 new_u.distinct = false;
5196 return Ok(Expression::Union(Box::new(new_u)));
5197 }
5198 Expression::Intersect(i) if i.distinct && drop_distinct => {
5199 let mut new_i = (**i).clone();
5200 new_i.distinct = false;
5201 return Ok(Expression::Intersect(Box::new(new_i)));
5202 }
5203 Expression::Except(ex) if ex.distinct && drop_distinct => {
5204 let mut new_ex = (**ex).clone();
5205 new_ex.distinct = false;
5206 return Ok(Expression::Except(Box::new(new_ex)));
5207 }
5208 _ => {}
5209 }
5210 }
5211
5212 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5213 if matches!(target, DialectType::ClickHouse) {
5214 if let Expression::Function(ref f) = e {
5215 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5216 let mut new_f = f.as_ref().clone();
5217 new_f.name = "map".to_string();
5218 return Ok(Expression::Function(Box::new(new_f)));
5219 }
5220 }
5221 }
5222
5223 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5224 if matches!(target, DialectType::ClickHouse) {
5225 if let Expression::Intersect(ref i) = e {
5226 if i.all {
5227 let mut new_i = (**i).clone();
5228 new_i.all = false;
5229 return Ok(Expression::Intersect(Box::new(new_i)));
5230 }
5231 }
5232 }
5233
5234 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5235 // Only from Generic source, to prevent double-wrapping
5236 if matches!(source, DialectType::Generic) {
5237 if let Expression::Div(ref op) = e {
5238 let cast_type = match target {
5239 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5240 precision: None,
5241 scale: None,
5242 real_spelling: false,
5243 }),
5244 DialectType::Drill
5245 | DialectType::Trino
5246 | DialectType::Athena
5247 | DialectType::Presto => Some(DataType::Double {
5248 precision: None,
5249 scale: None,
5250 }),
5251 DialectType::PostgreSQL
5252 | DialectType::Redshift
5253 | DialectType::Materialize
5254 | DialectType::Teradata
5255 | DialectType::RisingWave => Some(DataType::Double {
5256 precision: None,
5257 scale: None,
5258 }),
5259 _ => None,
5260 };
5261 if let Some(dt) = cast_type {
5262 let cast_left = Expression::Cast(Box::new(Cast {
5263 this: op.left.clone(),
5264 to: dt,
5265 double_colon_syntax: false,
5266 trailing_comments: Vec::new(),
5267 format: None,
5268 default: None,
5269 inferred_type: None,
5270 }));
5271 let new_op = crate::expressions::BinaryOp {
5272 left: cast_left,
5273 right: op.right.clone(),
5274 left_comments: op.left_comments.clone(),
5275 operator_comments: op.operator_comments.clone(),
5276 trailing_comments: op.trailing_comments.clone(),
5277 inferred_type: None,
5278 };
5279 return Ok(Expression::Div(Box::new(new_op)));
5280 }
5281 }
5282 }
5283
5284 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5285 if matches!(target, DialectType::DuckDB) {
5286 if let Expression::CreateDatabase(db) = e {
5287 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5288 schema.if_not_exists = db.if_not_exists;
5289 return Ok(Expression::CreateSchema(Box::new(schema)));
5290 }
5291 if let Expression::DropDatabase(db) = e {
5292 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5293 schema.if_exists = db.if_exists;
5294 return Ok(Expression::DropSchema(Box::new(schema)));
5295 }
5296 }
5297
5298 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5299 if matches!(source, DialectType::ClickHouse)
5300 && !matches!(target, DialectType::ClickHouse)
5301 {
5302 if let Expression::Cast(ref c) = e {
5303 if let DataType::Custom { ref name } = c.to {
5304 let upper = name.to_uppercase();
5305 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5306 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5307 let inner_upper = inner.to_uppercase();
5308 let new_dt = match inner_upper.as_str() {
5309 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5310 precision: None,
5311 timezone: false,
5312 },
5313 "DATE" => DataType::Date,
5314 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5315 "INT32" | "INT" | "INTEGER" => DataType::Int {
5316 length: None,
5317 integer_spelling: false,
5318 },
5319 "FLOAT64" | "DOUBLE" => DataType::Double {
5320 precision: None,
5321 scale: None,
5322 },
5323 "STRING" => DataType::Text,
5324 _ => DataType::Custom {
5325 name: inner.to_string(),
5326 },
5327 };
5328 let mut new_cast = c.clone();
5329 new_cast.to = new_dt;
5330 return Ok(Expression::Cast(new_cast));
5331 }
5332 }
5333 }
5334 }
5335
5336 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5337 if matches!(target, DialectType::Snowflake) {
5338 if let Expression::ArrayConcatAgg(ref agg) = e {
5339 let mut agg_clone = agg.as_ref().clone();
5340 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5341 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5342 let flatten = Expression::Function(Box::new(Function::new(
5343 "ARRAY_FLATTEN".to_string(),
5344 vec![array_agg],
5345 )));
5346 return Ok(flatten);
5347 }
5348 }
5349
5350 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5351 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5352 if let Expression::ArrayConcatAgg(agg) = e {
5353 let arg = agg.this;
5354 return Ok(Expression::Function(Box::new(Function::new(
5355 "ARRAY_CONCAT_AGG".to_string(),
5356 vec![arg],
5357 ))));
5358 }
5359 }
5360
5361 // Determine what action to take by inspecting e immutably
5362 let action = {
5363 let source_propagates_nulls =
5364 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5365 let target_ignores_nulls =
5366 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5367
5368 match &e {
5369 Expression::Function(f) => {
5370 let name = f.name.to_uppercase();
5371 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5372 if (name == "DATE_PART" || name == "DATEPART")
5373 && f.args.len() == 2
5374 && matches!(target, DialectType::Snowflake)
5375 && !matches!(source, DialectType::Snowflake)
5376 && matches!(
5377 &f.args[0],
5378 Expression::Literal(crate::expressions::Literal::String(_))
5379 )
5380 {
5381 Action::DatePartUnquote
5382 } else if source_propagates_nulls
5383 && target_ignores_nulls
5384 && (name == "GREATEST" || name == "LEAST")
5385 && f.args.len() >= 2
5386 {
5387 Action::GreatestLeastNull
5388 } else if matches!(source, DialectType::Snowflake)
5389 && name == "ARRAY_GENERATE_RANGE"
5390 && f.args.len() >= 2
5391 {
5392 Action::ArrayGenerateRange
5393 } else if matches!(source, DialectType::Snowflake)
5394 && matches!(target, DialectType::DuckDB)
5395 && name == "DATE_TRUNC"
5396 && f.args.len() == 2
5397 {
5398 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5399 // Logic based on Python sqlglot's input_type_preserved flag:
5400 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5401 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5402 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5403 let unit_str = match &f.args[0] {
5404 Expression::Literal(crate::expressions::Literal::String(s)) => {
5405 Some(s.to_uppercase())
5406 }
5407 _ => None,
5408 };
5409 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5410 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5411 });
5412 match &f.args[1] {
5413 Expression::Cast(c) => match &c.to {
5414 DataType::Time { .. } => Action::DateTruncWrapCast,
5415 DataType::Custom { name }
5416 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5417 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5418 {
5419 Action::DateTruncWrapCast
5420 }
5421 DataType::Timestamp { timezone: true, .. } => {
5422 Action::DateTruncWrapCast
5423 }
5424 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5425 DataType::Timestamp {
5426 timezone: false, ..
5427 } if is_date_unit => Action::DateTruncWrapCast,
5428 _ => Action::None,
5429 },
5430 _ => Action::None,
5431 }
5432 } else if matches!(source, DialectType::Snowflake)
5433 && matches!(target, DialectType::DuckDB)
5434 && name == "TO_DATE"
5435 && f.args.len() == 1
5436 && !matches!(
5437 &f.args[0],
5438 Expression::Literal(crate::expressions::Literal::String(_))
5439 )
5440 {
5441 Action::ToDateToCast
5442 } else if !matches!(source, DialectType::Redshift)
5443 && matches!(target, DialectType::Redshift)
5444 && name == "CONVERT_TIMEZONE"
5445 && (f.args.len() == 2 || f.args.len() == 3)
5446 {
5447 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5448 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5449 // The Redshift parser adds 'UTC' as default source_tz, but when
5450 // transpiling from other dialects, we should preserve the original form.
5451 Action::ConvertTimezoneToExpr
5452 } else if matches!(source, DialectType::Snowflake)
5453 && matches!(target, DialectType::DuckDB)
5454 && name == "REGEXP_REPLACE"
5455 && f.args.len() == 4
5456 && !matches!(
5457 &f.args[3],
5458 Expression::Literal(crate::expressions::Literal::String(_))
5459 )
5460 {
5461 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5462 Action::RegexpReplaceSnowflakeToDuckDB
5463 } else if name == "_BQ_TO_HEX" {
5464 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5465 Action::BigQueryToHexBare
5466 } else if matches!(source, DialectType::BigQuery)
5467 && !matches!(target, DialectType::BigQuery)
5468 {
5469 // BigQuery-specific functions that need to be converted to standard forms
5470 match name.as_str() {
5471 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5472 | "DATE_DIFF"
5473 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5474 | "DATETIME_ADD" | "DATETIME_SUB"
5475 | "TIME_ADD" | "TIME_SUB"
5476 | "DATE_ADD" | "DATE_SUB"
5477 | "SAFE_DIVIDE"
5478 | "GENERATE_UUID"
5479 | "COUNTIF"
5480 | "EDIT_DISTANCE"
5481 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5482 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5483 | "TO_HEX"
5484 | "TO_JSON_STRING"
5485 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5486 | "DIV"
5487 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5488 | "LAST_DAY"
5489 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5490 | "REGEXP_CONTAINS"
5491 | "CONTAINS_SUBSTR"
5492 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5493 | "SAFE_CAST"
5494 | "GENERATE_DATE_ARRAY"
5495 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5496 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5497 | "ARRAY_CONCAT"
5498 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5499 | "INSTR"
5500 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5501 | "GENERATE_UUID()" // just in case
5502 | "REGEXP_EXTRACT_ALL"
5503 | "REGEXP_EXTRACT"
5504 | "INT64"
5505 | "ARRAY_CONCAT_AGG"
5506 | "DATE_DIFF(" // just in case
5507 | "TO_HEX_MD5" // internal
5508 | "MOD"
5509 | "CONCAT"
5510 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5511 | "STRUCT"
5512 | "ROUND"
5513 | "MAKE_INTERVAL"
5514 | "ARRAY_TO_STRING"
5515 | "PERCENTILE_CONT"
5516 => Action::BigQueryFunctionNormalize,
5517 "ARRAY" if matches!(target, DialectType::Snowflake)
5518 && f.args.len() == 1
5519 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5520 => Action::BigQueryArraySelectAsStructToSnowflake,
5521 _ => Action::None,
5522 }
5523 } else if matches!(source, DialectType::BigQuery)
5524 && matches!(target, DialectType::BigQuery)
5525 {
5526 // BigQuery -> BigQuery normalizations
5527 match name.as_str() {
5528 "TIMESTAMP_DIFF"
5529 | "DATETIME_DIFF"
5530 | "TIME_DIFF"
5531 | "DATE_DIFF"
5532 | "DATE_ADD"
5533 | "TO_HEX"
5534 | "CURRENT_TIMESTAMP"
5535 | "CURRENT_DATE"
5536 | "CURRENT_TIME"
5537 | "CURRENT_DATETIME"
5538 | "GENERATE_DATE_ARRAY"
5539 | "INSTR"
5540 | "FORMAT_DATETIME"
5541 | "DATETIME"
5542 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5543 _ => Action::None,
5544 }
5545 } else {
5546 // Generic function normalization for non-BigQuery sources
5547 match name.as_str() {
5548 "ARBITRARY" | "AGGREGATE"
5549 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5550 | "STRUCT_EXTRACT"
5551 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5552 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5553 | "SUBSTRINGINDEX"
5554 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5555 | "UNICODE"
5556 | "XOR"
5557 | "ARRAY_REVERSE_SORT"
5558 | "ENCODE" | "DECODE"
5559 | "QUANTILE"
5560 | "EPOCH" | "EPOCH_MS"
5561 | "HASHBYTES"
5562 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5563 | "APPROX_DISTINCT"
5564 | "DATE_PARSE" | "FORMAT_DATETIME"
5565 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5566 | "RLIKE"
5567 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5568 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5569 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5570 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5571 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5572 | "MAP" | "MAP_FROM_ENTRIES"
5573 | "COLLECT_LIST" | "COLLECT_SET"
5574 | "ISNAN" | "IS_NAN"
5575 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5576 | "FORMAT_NUMBER"
5577 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5578 | "ELEMENT_AT"
5579 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5580 | "SPLIT_PART"
5581 // GENERATE_SERIES: handled separately below
5582 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5583 | "JSON_QUERY" | "JSON_VALUE"
5584 | "JSON_SEARCH"
5585 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5586 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5587 | "CURDATE" | "CURTIME"
5588 | "ARRAY_TO_STRING"
5589 | "ARRAY_SORT" | "SORT_ARRAY"
5590 | "LEFT" | "RIGHT"
5591 | "MAP_FROM_ARRAYS"
5592 | "LIKE" | "ILIKE"
5593 | "ARRAY_CONCAT" | "LIST_CONCAT"
5594 | "QUANTILE_CONT" | "QUANTILE_DISC"
5595 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5596 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5597 | "LOCATE" | "STRPOS" | "INSTR"
5598 | "CHAR"
5599 // CONCAT: handled separately for COALESCE wrapping
5600 | "ARRAY_JOIN"
5601 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5602 | "ISNULL"
5603 | "MONTHNAME"
5604 | "TO_TIMESTAMP"
5605 | "TO_DATE"
5606 | "TO_JSON"
5607 | "REGEXP_SPLIT"
5608 | "SPLIT"
5609 | "FORMATDATETIME"
5610 | "ARRAYJOIN"
5611 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5612 | "NVL"
5613 | "TO_CHAR"
5614 | "DBMS_RANDOM.VALUE"
5615 | "REGEXP_LIKE"
5616 | "REPLICATE"
5617 | "LEN"
5618 | "COUNT_BIG"
5619 | "DATEFROMPARTS"
5620 | "DATETIMEFROMPARTS"
5621 | "CONVERT" | "TRY_CONVERT"
5622 | "STRFTIME" | "STRPTIME"
5623 | "DATE_FORMAT" | "FORMAT_DATE"
5624 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5625 | "FROM_BASE64" | "TO_BASE64"
5626 | "GETDATE"
5627 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5628 | "TO_UTF8" | "FROM_UTF8"
5629 | "STARTS_WITH" | "STARTSWITH"
5630 | "APPROX_COUNT_DISTINCT"
5631 | "JSON_FORMAT"
5632 | "SYSDATE"
5633 | "LOGICAL_OR" | "LOGICAL_AND"
5634 | "MONTHS_ADD"
5635 | "SCHEMA_NAME"
5636 | "STRTOL"
5637 | "EDITDIST3"
5638 | "FORMAT"
5639 | "LIST_CONTAINS" | "LIST_HAS"
5640 | "VARIANCE" | "STDDEV"
5641 | "ISINF"
5642 | "TO_UNIXTIME"
5643 | "FROM_UNIXTIME"
5644 | "DATEPART" | "DATE_PART"
5645 | "DATENAME"
5646 | "STRING_AGG"
5647 | "JSON_ARRAYAGG"
5648 | "APPROX_QUANTILE"
5649 | "MAKE_DATE"
5650 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5651 | "RANGE"
5652 | "TRY_ELEMENT_AT"
5653 | "STR_TO_MAP"
5654 | "STRING"
5655 | "STR_TO_TIME"
5656 | "CURRENT_SCHEMA"
5657 | "LTRIM" | "RTRIM"
5658 | "UUID"
5659 | "FARM_FINGERPRINT"
5660 | "JSON_KEYS"
5661 | "WEEKOFYEAR"
5662 | "CONCAT_WS"
5663 | "ARRAY_SLICE"
5664 | "ARRAY_PREPEND"
5665 | "ARRAY_REMOVE"
5666 | "GENERATE_DATE_ARRAY"
5667 | "PARSE_JSON"
5668 | "JSON_REMOVE"
5669 | "JSON_SET"
5670 | "LEVENSHTEIN"
5671 => Action::GenericFunctionNormalize,
5672 // Canonical date functions -> dialect-specific
5673 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5674 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5675 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5676 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5677 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5678 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5679 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5680 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5681 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5682 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5683 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5684 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5685 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5686 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5687 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5688 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5689 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5690 // STR_TO_DATE(x, fmt) -> dialect-specific
5691 "STR_TO_DATE" if f.args.len() == 2
5692 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5693 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5694 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5695 "TS_OR_DS_ADD" if f.args.len() == 3
5696 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5697 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5698 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5699 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5700 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5701 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5702 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5703 // IS_ASCII(x) -> dialect-specific
5704 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5705 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5706 "STR_POSITION" => Action::StrPositionConvert,
5707 // ARRAY_SUM -> dialect-specific
5708 "ARRAY_SUM" => Action::ArraySumConvert,
5709 // ARRAY_SIZE -> dialect-specific (Drill only)
5710 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5711 // ARRAY_ANY -> dialect-specific
5712 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5713 // Functions needing specific cross-dialect transforms
5714 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5715 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5716 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5717 "ARRAY" if matches!(source, DialectType::BigQuery)
5718 && matches!(target, DialectType::Snowflake)
5719 && f.args.len() == 1
5720 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5721 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5722 "TRUNC" if f.args.len() == 2 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5723 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5724 "DATE_TRUNC" if f.args.len() == 2
5725 && matches!(source, DialectType::Generic)
5726 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5727 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5728 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5729 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5730 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5731 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5732 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5733 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5734 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5735 // GENERATE_SERIES with interval normalization for PG target
5736 "GENERATE_SERIES" if f.args.len() >= 3
5737 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5738 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5739 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5740 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5741 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5742 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5743 "CONCAT" => Action::GenericFunctionNormalize,
5744 // DIV(a, b) -> target-specific integer division
5745 "DIV" if f.args.len() == 2
5746 && matches!(source, DialectType::PostgreSQL)
5747 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5748 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5749 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5750 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5751 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5752 "JSONB_EXISTS" if f.args.len() == 2
5753 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5754 // DATE_BIN -> TIME_BUCKET for DuckDB
5755 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5756 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5757 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5758 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5759 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5760 // ClickHouse any -> ANY_VALUE for other dialects
5761 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5762 _ => Action::None,
5763 }
5764 }
5765 }
5766 Expression::AggregateFunction(af) => {
5767 let name = af.name.to_uppercase();
5768 match name.as_str() {
5769 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5770 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5771 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5772 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5773 if matches!(target, DialectType::DuckDB) =>
5774 {
5775 Action::JsonObjectAggConvert
5776 }
5777 "ARRAY_AGG"
5778 if matches!(
5779 target,
5780 DialectType::Hive
5781 | DialectType::Spark
5782 | DialectType::Databricks
5783 ) =>
5784 {
5785 Action::ArrayAggToCollectList
5786 }
5787 "MAX_BY" | "MIN_BY"
5788 if matches!(
5789 target,
5790 DialectType::ClickHouse
5791 | DialectType::Spark
5792 | DialectType::Databricks
5793 | DialectType::DuckDB
5794 ) =>
5795 {
5796 Action::MaxByMinByConvert
5797 }
5798 "COLLECT_LIST"
5799 if matches!(
5800 target,
5801 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
5802 ) =>
5803 {
5804 Action::CollectListToArrayAgg
5805 }
5806 "COLLECT_SET"
5807 if matches!(
5808 target,
5809 DialectType::Presto
5810 | DialectType::Trino
5811 | DialectType::Snowflake
5812 | DialectType::DuckDB
5813 ) =>
5814 {
5815 Action::CollectSetConvert
5816 }
5817 "PERCENTILE"
5818 if matches!(
5819 target,
5820 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5821 ) =>
5822 {
5823 Action::PercentileConvert
5824 }
5825 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
5826 "CORR"
5827 if matches!(target, DialectType::DuckDB)
5828 && matches!(source, DialectType::Snowflake) =>
5829 {
5830 Action::CorrIsnanWrap
5831 }
5832 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5833 "APPROX_QUANTILES"
5834 if matches!(source, DialectType::BigQuery)
5835 && matches!(target, DialectType::DuckDB) =>
5836 {
5837 Action::BigQueryApproxQuantiles
5838 }
5839 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5840 "PERCENTILE_CONT"
5841 if matches!(source, DialectType::BigQuery)
5842 && matches!(target, DialectType::DuckDB)
5843 && af.args.len() >= 2 =>
5844 {
5845 Action::BigQueryPercentileContToDuckDB
5846 }
5847 _ => Action::None,
5848 }
5849 }
5850 Expression::JSONArrayAgg(_) => match target {
5851 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
5852 _ => Action::None,
5853 },
5854 Expression::ToNumber(tn) => {
5855 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
5856 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
5857 match target {
5858 DialectType::Oracle
5859 | DialectType::Snowflake
5860 | DialectType::Teradata => Action::None,
5861 _ => Action::GenericFunctionNormalize,
5862 }
5863 } else {
5864 Action::None
5865 }
5866 }
5867 Expression::Nvl2(_) => {
5868 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
5869 // Keep as NVL2 for dialects that support it natively
5870 match target {
5871 DialectType::Oracle
5872 | DialectType::Snowflake
5873 | DialectType::Teradata
5874 | DialectType::Spark
5875 | DialectType::Databricks
5876 | DialectType::Redshift => Action::None,
5877 _ => Action::Nvl2Expand,
5878 }
5879 }
5880 Expression::Decode(_) | Expression::DecodeCase(_) => {
5881 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
5882 // Keep as DECODE for Oracle/Snowflake
5883 match target {
5884 DialectType::Oracle | DialectType::Snowflake => Action::None,
5885 _ => Action::DecodeSimplify,
5886 }
5887 }
5888 Expression::Coalesce(ref cf) => {
5889 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
5890 // BigQuery keeps IFNULL natively when source is also BigQuery
5891 if cf.original_name.as_deref() == Some("IFNULL")
5892 && !(matches!(source, DialectType::BigQuery)
5893 && matches!(target, DialectType::BigQuery))
5894 {
5895 Action::IfnullToCoalesce
5896 } else {
5897 Action::None
5898 }
5899 }
5900 Expression::IfFunc(if_func) => {
5901 if matches!(source, DialectType::Snowflake)
5902 && matches!(
5903 target,
5904 DialectType::Presto | DialectType::Trino | DialectType::SQLite
5905 )
5906 && matches!(if_func.false_value, Some(Expression::Div(_)))
5907 {
5908 Action::Div0TypedDivision
5909 } else {
5910 Action::None
5911 }
5912 }
5913 Expression::ToJson(_) => match target {
5914 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
5915 DialectType::BigQuery => Action::ToJsonConvert,
5916 DialectType::DuckDB => Action::ToJsonConvert,
5917 _ => Action::None,
5918 },
5919 Expression::ArrayAgg(ref agg) => {
5920 if matches!(
5921 target,
5922 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5923 ) {
5924 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
5925 Action::ArrayAggToCollectList
5926 } else if matches!(
5927 source,
5928 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5929 ) && matches!(target, DialectType::DuckDB)
5930 && agg.filter.is_some()
5931 {
5932 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
5933 // Need to add NOT x IS NULL to existing filter
5934 Action::ArrayAggNullFilter
5935 } else if matches!(target, DialectType::DuckDB)
5936 && agg.ignore_nulls == Some(true)
5937 && !agg.order_by.is_empty()
5938 {
5939 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
5940 Action::ArrayAggIgnoreNullsDuckDB
5941 } else if !matches!(source, DialectType::Snowflake) {
5942 Action::None
5943 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5944 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
5945 == Some("ARRAY_AGG".to_string())
5946 || agg.name.is_none();
5947 if is_array_agg {
5948 Action::ArrayAggCollectList
5949 } else {
5950 Action::None
5951 }
5952 } else if matches!(
5953 target,
5954 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5955 ) && agg.filter.is_none()
5956 {
5957 Action::ArrayAggFilter
5958 } else {
5959 Action::None
5960 }
5961 }
5962 Expression::WithinGroup(wg) => {
5963 if matches!(source, DialectType::Snowflake)
5964 && matches!(
5965 target,
5966 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
5967 )
5968 && matches!(wg.this, Expression::ArrayAgg(_))
5969 {
5970 Action::ArrayAggWithinGroupFilter
5971 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
5972 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
5973 || matches!(&wg.this, Expression::StringAgg(_))
5974 {
5975 Action::StringAggConvert
5976 } else if matches!(
5977 target,
5978 DialectType::Presto
5979 | DialectType::Trino
5980 | DialectType::Athena
5981 | DialectType::Spark
5982 | DialectType::Databricks
5983 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5984 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
5985 || matches!(&wg.this, Expression::PercentileCont(_)))
5986 {
5987 Action::PercentileContConvert
5988 } else {
5989 Action::None
5990 }
5991 }
5992 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
5993 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
5994 // DATETIME is the timezone-unaware type
5995 Expression::Cast(ref c) => {
5996 if c.format.is_some()
5997 && (matches!(source, DialectType::BigQuery)
5998 || matches!(source, DialectType::Teradata))
5999 {
6000 Action::BigQueryCastFormat
6001 } else if matches!(target, DialectType::BigQuery)
6002 && !matches!(source, DialectType::BigQuery)
6003 && matches!(
6004 c.to,
6005 DataType::Timestamp {
6006 timezone: false,
6007 ..
6008 }
6009 )
6010 {
6011 Action::CastTimestampToDatetime
6012 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6013 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6014 && matches!(
6015 c.to,
6016 DataType::Timestamp {
6017 timezone: false,
6018 ..
6019 }
6020 )
6021 {
6022 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6023 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6024 Action::CastTimestampToDatetime
6025 } else if matches!(
6026 source,
6027 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6028 ) && matches!(
6029 target,
6030 DialectType::Presto
6031 | DialectType::Trino
6032 | DialectType::Athena
6033 | DialectType::DuckDB
6034 | DialectType::Snowflake
6035 | DialectType::BigQuery
6036 | DialectType::Databricks
6037 | DialectType::TSQL
6038 ) {
6039 Action::HiveCastToTryCast
6040 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6041 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6042 {
6043 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6044 Action::CastTimestamptzToFunc
6045 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6046 && matches!(
6047 target,
6048 DialectType::Hive
6049 | DialectType::Spark
6050 | DialectType::Databricks
6051 | DialectType::BigQuery
6052 )
6053 {
6054 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6055 Action::CastTimestampStripTz
6056 } else if matches!(&c.to, DataType::Json)
6057 && matches!(&c.this, Expression::Literal(Literal::String(_)))
6058 && matches!(
6059 target,
6060 DialectType::Presto
6061 | DialectType::Trino
6062 | DialectType::Athena
6063 | DialectType::Snowflake
6064 )
6065 {
6066 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6067 // Only when the input is a string literal (JSON 'value' syntax)
6068 Action::JsonLiteralToJsonParse
6069 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6070 && matches!(target, DialectType::Spark | DialectType::Databricks)
6071 {
6072 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6073 Action::CastToJsonForSpark
6074 } else if (matches!(
6075 &c.to,
6076 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6077 )) && matches!(
6078 target,
6079 DialectType::Spark | DialectType::Databricks
6080 ) && (matches!(&c.this, Expression::ParseJson(_))
6081 || matches!(
6082 &c.this,
6083 Expression::Function(f)
6084 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6085 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6086 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6087 ))
6088 {
6089 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6090 // -> FROM_JSON(..., type_string) for Spark
6091 Action::CastJsonToFromJson
6092 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6093 && matches!(
6094 c.to,
6095 DataType::Timestamp {
6096 timezone: false,
6097 ..
6098 }
6099 )
6100 && matches!(source, DialectType::DuckDB)
6101 {
6102 Action::StrftimeCastTimestamp
6103 } else if matches!(source, DialectType::DuckDB)
6104 && matches!(
6105 c.to,
6106 DataType::Decimal {
6107 precision: None,
6108 ..
6109 }
6110 )
6111 {
6112 Action::DecimalDefaultPrecision
6113 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6114 && matches!(c.to, DataType::Char { length: None })
6115 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6116 {
6117 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6118 Action::MysqlCastCharToText
6119 } else if matches!(
6120 source,
6121 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6122 ) && matches!(
6123 target,
6124 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6125 ) && Self::has_varchar_char_type(&c.to)
6126 {
6127 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6128 Action::SparkCastVarcharToString
6129 } else {
6130 Action::None
6131 }
6132 }
6133 Expression::SafeCast(ref c) => {
6134 if c.format.is_some()
6135 && matches!(source, DialectType::BigQuery)
6136 && !matches!(target, DialectType::BigQuery)
6137 {
6138 Action::BigQueryCastFormat
6139 } else {
6140 Action::None
6141 }
6142 }
6143 // For DuckDB: DATE_TRUNC should preserve the input type
6144 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6145 if matches!(source, DialectType::Snowflake)
6146 && matches!(target, DialectType::DuckDB)
6147 {
6148 Action::DateTruncWrapCast
6149 } else {
6150 Action::None
6151 }
6152 }
6153 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6154 Expression::SetStatement(s) => {
6155 if matches!(target, DialectType::DuckDB)
6156 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6157 && s.items.iter().any(|item| item.kind.is_none())
6158 {
6159 Action::SetToVariable
6160 } else {
6161 Action::None
6162 }
6163 }
6164 // Cross-dialect NULL ordering normalization.
6165 // When nulls_first is not specified, fill in the source dialect's implied
6166 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6167 Expression::Ordered(o) => {
6168 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6169 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6170 Action::MysqlNullsOrdering
6171 } else {
6172 // Skip targets that don't support NULLS FIRST/LAST syntax
6173 let target_supports_nulls = !matches!(
6174 target,
6175 DialectType::MySQL
6176 | DialectType::TSQL
6177 | DialectType::StarRocks
6178 | DialectType::Doris
6179 );
6180 if o.nulls_first.is_none() && source != target && target_supports_nulls
6181 {
6182 Action::NullsOrdering
6183 } else {
6184 Action::None
6185 }
6186 }
6187 }
6188 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6189 Expression::DataType(dt) => {
6190 if matches!(source, DialectType::BigQuery)
6191 && !matches!(target, DialectType::BigQuery)
6192 {
6193 match dt {
6194 DataType::Custom { ref name }
6195 if name.eq_ignore_ascii_case("INT64")
6196 || name.eq_ignore_ascii_case("FLOAT64")
6197 || name.eq_ignore_ascii_case("BOOL")
6198 || name.eq_ignore_ascii_case("BYTES")
6199 || name.eq_ignore_ascii_case("NUMERIC")
6200 || name.eq_ignore_ascii_case("STRING")
6201 || name.eq_ignore_ascii_case("DATETIME") =>
6202 {
6203 Action::BigQueryCastType
6204 }
6205 _ => Action::None,
6206 }
6207 } else if matches!(source, DialectType::TSQL) {
6208 // For TSQL source -> any target (including TSQL itself for REAL)
6209 match dt {
6210 // REAL -> FLOAT even for TSQL->TSQL
6211 DataType::Custom { ref name }
6212 if name.eq_ignore_ascii_case("REAL") =>
6213 {
6214 Action::TSQLTypeNormalize
6215 }
6216 DataType::Float {
6217 real_spelling: true,
6218 ..
6219 } => Action::TSQLTypeNormalize,
6220 // Other TSQL type normalizations only for non-TSQL targets
6221 DataType::Custom { ref name }
6222 if !matches!(target, DialectType::TSQL)
6223 && (name.eq_ignore_ascii_case("MONEY")
6224 || name.eq_ignore_ascii_case("SMALLMONEY")
6225 || name.eq_ignore_ascii_case("DATETIME2")
6226 || name.eq_ignore_ascii_case("IMAGE")
6227 || name.eq_ignore_ascii_case("BIT")
6228 || name.eq_ignore_ascii_case("ROWVERSION")
6229 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6230 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6231 || name.to_uppercase().starts_with("NUMERIC")
6232 || name.to_uppercase().starts_with("DATETIME2(")
6233 || name.to_uppercase().starts_with("TIME(")) =>
6234 {
6235 Action::TSQLTypeNormalize
6236 }
6237 DataType::Float {
6238 precision: Some(_), ..
6239 } if !matches!(target, DialectType::TSQL) => {
6240 Action::TSQLTypeNormalize
6241 }
6242 DataType::TinyInt { .. }
6243 if !matches!(target, DialectType::TSQL) =>
6244 {
6245 Action::TSQLTypeNormalize
6246 }
6247 // INTEGER -> INT for Databricks/Spark targets
6248 DataType::Int {
6249 integer_spelling: true,
6250 ..
6251 } if matches!(
6252 target,
6253 DialectType::Databricks | DialectType::Spark
6254 ) =>
6255 {
6256 Action::TSQLTypeNormalize
6257 }
6258 _ => Action::None,
6259 }
6260 } else if (matches!(source, DialectType::Oracle)
6261 || matches!(source, DialectType::Generic))
6262 && !matches!(target, DialectType::Oracle)
6263 {
6264 match dt {
6265 DataType::Custom { ref name }
6266 if name.to_uppercase().starts_with("VARCHAR2(")
6267 || name.to_uppercase().starts_with("NVARCHAR2(")
6268 || name.eq_ignore_ascii_case("VARCHAR2")
6269 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6270 {
6271 Action::OracleVarchar2ToVarchar
6272 }
6273 _ => Action::None,
6274 }
6275 } else if matches!(target, DialectType::Snowflake)
6276 && !matches!(source, DialectType::Snowflake)
6277 {
6278 // When target is Snowflake but source is NOT Snowflake,
6279 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6280 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6281 // should keep their FLOAT spelling.
6282 match dt {
6283 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6284 _ => Action::None,
6285 }
6286 } else {
6287 Action::None
6288 }
6289 }
6290 // LOWER patterns from BigQuery TO_HEX conversions:
6291 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6292 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6293 Expression::Lower(uf) => {
6294 if matches!(source, DialectType::BigQuery) {
6295 match &uf.this {
6296 Expression::Lower(_) => Action::BigQueryToHexLower,
6297 Expression::Function(f)
6298 if f.name == "TO_HEX"
6299 && matches!(target, DialectType::BigQuery) =>
6300 {
6301 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6302 Action::BigQueryToHexLower
6303 }
6304 _ => Action::None,
6305 }
6306 } else {
6307 Action::None
6308 }
6309 }
6310 // UPPER patterns from BigQuery TO_HEX conversions:
6311 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6312 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6313 Expression::Upper(uf) => {
6314 if matches!(source, DialectType::BigQuery) {
6315 match &uf.this {
6316 Expression::Lower(_) => Action::BigQueryToHexUpper,
6317 _ => Action::None,
6318 }
6319 } else {
6320 Action::None
6321 }
6322 }
6323 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6324 // Snowflake supports LAST_DAY with unit, so keep it there
6325 Expression::LastDay(ld) => {
6326 if matches!(source, DialectType::BigQuery)
6327 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6328 && ld.unit.is_some()
6329 {
6330 Action::BigQueryLastDayStripUnit
6331 } else {
6332 Action::None
6333 }
6334 }
6335 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6336 Expression::SafeDivide(_) => {
6337 if matches!(source, DialectType::BigQuery)
6338 && !matches!(target, DialectType::BigQuery)
6339 {
6340 Action::BigQuerySafeDivide
6341 } else {
6342 Action::None
6343 }
6344 }
6345 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6346 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6347 Expression::AnyValue(ref agg) => {
6348 if matches!(source, DialectType::BigQuery)
6349 && matches!(target, DialectType::DuckDB)
6350 && agg.having_max.is_some()
6351 {
6352 Action::BigQueryAnyValueHaving
6353 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6354 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6355 && agg.ignore_nulls.is_none()
6356 {
6357 Action::AnyValueIgnoreNulls
6358 } else {
6359 Action::None
6360 }
6361 }
6362 Expression::Any(ref q) => {
6363 if matches!(source, DialectType::PostgreSQL)
6364 && matches!(
6365 target,
6366 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6367 )
6368 && q.op.is_some()
6369 && !matches!(
6370 q.subquery,
6371 Expression::Select(_) | Expression::Subquery(_)
6372 )
6373 {
6374 Action::AnyToExists
6375 } else {
6376 Action::None
6377 }
6378 }
6379 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6380 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6381 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6382 Expression::RegexpLike(_)
6383 if !matches!(source, DialectType::DuckDB)
6384 && matches!(target, DialectType::DuckDB) =>
6385 {
6386 Action::RegexpLikeToDuckDB
6387 }
6388 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6389 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6390 Expression::Div(ref op)
6391 if matches!(
6392 source,
6393 DialectType::MySQL
6394 | DialectType::DuckDB
6395 | DialectType::SingleStore
6396 | DialectType::TiDB
6397 | DialectType::ClickHouse
6398 | DialectType::Doris
6399 ) && matches!(
6400 target,
6401 DialectType::PostgreSQL
6402 | DialectType::Redshift
6403 | DialectType::Drill
6404 | DialectType::Trino
6405 | DialectType::Presto
6406 | DialectType::Athena
6407 | DialectType::TSQL
6408 | DialectType::Teradata
6409 | DialectType::SQLite
6410 | DialectType::BigQuery
6411 | DialectType::Snowflake
6412 | DialectType::Databricks
6413 | DialectType::Oracle
6414 | DialectType::Materialize
6415 | DialectType::RisingWave
6416 ) =>
6417 {
6418 // Only wrap if RHS is not already NULLIF
6419 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6420 {
6421 Action::MySQLSafeDivide
6422 } else {
6423 Action::None
6424 }
6425 }
6426 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6427 // For TSQL/Fabric, convert to sp_rename instead
6428 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6429 if let Some(crate::expressions::AlterTableAction::RenameTable(
6430 ref new_tbl,
6431 )) = at.actions.first()
6432 {
6433 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6434 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6435 Action::AlterTableToSpRename
6436 } else if new_tbl.schema.is_some()
6437 && matches!(
6438 target,
6439 DialectType::BigQuery
6440 | DialectType::Doris
6441 | DialectType::StarRocks
6442 | DialectType::DuckDB
6443 | DialectType::PostgreSQL
6444 | DialectType::Redshift
6445 )
6446 {
6447 Action::AlterTableRenameStripSchema
6448 } else {
6449 Action::None
6450 }
6451 } else {
6452 Action::None
6453 }
6454 }
6455 // EPOCH(x) expression -> target-specific epoch conversion
6456 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6457 Action::EpochConvert
6458 }
6459 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6460 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6461 Action::EpochMsConvert
6462 }
6463 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6464 Expression::StringAgg(_) => {
6465 if matches!(
6466 target,
6467 DialectType::MySQL
6468 | DialectType::SingleStore
6469 | DialectType::Doris
6470 | DialectType::StarRocks
6471 | DialectType::SQLite
6472 ) {
6473 Action::StringAggConvert
6474 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6475 Action::StringAggConvert
6476 } else {
6477 Action::None
6478 }
6479 }
6480 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6481 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6482 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6483 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6484 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6485 Action::ArrayLengthConvert
6486 }
6487 Expression::ArraySize(_) => {
6488 if matches!(target, DialectType::Drill) {
6489 Action::ArraySizeDrill
6490 } else {
6491 Action::ArrayLengthConvert
6492 }
6493 }
6494 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6495 Expression::ArrayRemove(_) => match target {
6496 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6497 Action::ArrayRemoveConvert
6498 }
6499 _ => Action::None,
6500 },
6501 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6502 Expression::ArrayReverse(_) => match target {
6503 DialectType::ClickHouse => Action::ArrayReverseConvert,
6504 _ => Action::None,
6505 },
6506 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6507 Expression::JsonKeys(_) => match target {
6508 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6509 Action::JsonKeysConvert
6510 }
6511 _ => Action::None,
6512 },
6513 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6514 Expression::ParseJson(_) => match target {
6515 DialectType::SQLite
6516 | DialectType::Doris
6517 | DialectType::MySQL
6518 | DialectType::StarRocks => Action::ParseJsonStrip,
6519 _ => Action::None,
6520 },
6521 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6522 Expression::WeekOfYear(_)
6523 if matches!(target, DialectType::Snowflake)
6524 && !matches!(source, DialectType::Snowflake) =>
6525 {
6526 Action::WeekOfYearToWeekIso
6527 }
6528 // NVL: clear original_name so generator uses dialect-specific function names
6529 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6530 // XOR: expand for dialects that don't support the XOR keyword
6531 Expression::Xor(_) => {
6532 let target_supports_xor = matches!(
6533 target,
6534 DialectType::MySQL
6535 | DialectType::SingleStore
6536 | DialectType::Doris
6537 | DialectType::StarRocks
6538 );
6539 if !target_supports_xor {
6540 Action::XorExpand
6541 } else {
6542 Action::None
6543 }
6544 }
6545 // TSQL #table -> temp table normalization (CREATE TABLE)
6546 Expression::CreateTable(ct)
6547 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6548 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6549 && ct.name.name.name.starts_with('#') =>
6550 {
6551 Action::TempTableHash
6552 }
6553 // TSQL #table -> strip # from table references in SELECT/etc.
6554 Expression::Table(tr)
6555 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6556 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6557 && tr.name.name.starts_with('#') =>
6558 {
6559 Action::TempTableHash
6560 }
6561 // TSQL #table -> strip # from DROP TABLE names
6562 Expression::DropTable(ref dt)
6563 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6564 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6565 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6566 {
6567 Action::TempTableHash
6568 }
6569 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6570 Expression::JsonExtract(_)
6571 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6572 {
6573 Action::JsonExtractToTsql
6574 }
6575 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6576 Expression::JsonExtractScalar(_)
6577 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6578 {
6579 Action::JsonExtractToTsql
6580 }
6581 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6582 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6583 Action::JsonExtractToClickHouse
6584 }
6585 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6586 Expression::JsonExtractScalar(_)
6587 if matches!(target, DialectType::ClickHouse) =>
6588 {
6589 Action::JsonExtractToClickHouse
6590 }
6591 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6592 Expression::JsonExtract(ref f)
6593 if !f.arrow_syntax
6594 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6595 {
6596 Action::JsonExtractToArrow
6597 }
6598 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6599 Expression::JsonExtract(ref f)
6600 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6601 && !matches!(
6602 source,
6603 DialectType::PostgreSQL
6604 | DialectType::Redshift
6605 | DialectType::Materialize
6606 )
6607 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6608 {
6609 Action::JsonExtractToGetJsonObject
6610 }
6611 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6612 Expression::JsonExtract(_)
6613 if matches!(
6614 target,
6615 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6616 ) =>
6617 {
6618 Action::JsonExtractToGetJsonObject
6619 }
6620 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6621 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6622 Expression::JsonExtractScalar(ref f)
6623 if !f.arrow_syntax
6624 && !f.hash_arrow_syntax
6625 && matches!(
6626 target,
6627 DialectType::PostgreSQL
6628 | DialectType::Redshift
6629 | DialectType::Snowflake
6630 | DialectType::SQLite
6631 | DialectType::DuckDB
6632 ) =>
6633 {
6634 Action::JsonExtractScalarConvert
6635 }
6636 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6637 Expression::JsonExtractScalar(_)
6638 if matches!(
6639 target,
6640 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6641 ) =>
6642 {
6643 Action::JsonExtractScalarToGetJsonObject
6644 }
6645 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6646 Expression::JsonExtract(ref f)
6647 if !f.arrow_syntax
6648 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6649 {
6650 Action::JsonPathNormalize
6651 }
6652 // JsonQuery (parsed JSON_QUERY) -> target-specific
6653 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6654 // JsonValue (parsed JSON_VALUE) -> target-specific
6655 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6656 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6657 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6658 Expression::AtTimeZone(_)
6659 if matches!(
6660 target,
6661 DialectType::Presto
6662 | DialectType::Trino
6663 | DialectType::Athena
6664 | DialectType::Spark
6665 | DialectType::Databricks
6666 | DialectType::BigQuery
6667 | DialectType::Snowflake
6668 ) =>
6669 {
6670 Action::AtTimeZoneConvert
6671 }
6672 // DAY_OF_WEEK -> dialect-specific
6673 Expression::DayOfWeek(_)
6674 if matches!(
6675 target,
6676 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6677 ) =>
6678 {
6679 Action::DayOfWeekConvert
6680 }
6681 // CURRENT_USER -> CURRENT_USER() for Snowflake
6682 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6683 Action::CurrentUserParens
6684 }
6685 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6686 Expression::ElementAt(_)
6687 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6688 {
6689 Action::ElementAtConvert
6690 }
6691 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6692 Expression::ArrayFunc(ref arr)
6693 if !arr.bracket_notation
6694 && matches!(
6695 target,
6696 DialectType::Spark
6697 | DialectType::Databricks
6698 | DialectType::Hive
6699 | DialectType::BigQuery
6700 | DialectType::DuckDB
6701 | DialectType::Snowflake
6702 | DialectType::Presto
6703 | DialectType::Trino
6704 | DialectType::Athena
6705 | DialectType::ClickHouse
6706 | DialectType::StarRocks
6707 ) =>
6708 {
6709 Action::ArraySyntaxConvert
6710 }
6711 // VARIANCE expression -> varSamp for ClickHouse
6712 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6713 Action::VarianceToClickHouse
6714 }
6715 // STDDEV expression -> stddevSamp for ClickHouse
6716 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6717 Action::StddevToClickHouse
6718 }
6719 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6720 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6721 Action::ApproxQuantileConvert
6722 }
6723 // MonthsBetween -> target-specific
6724 Expression::MonthsBetween(_)
6725 if !matches!(
6726 target,
6727 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6728 ) =>
6729 {
6730 Action::MonthsBetweenConvert
6731 }
6732 // AddMonths -> target-specific DATEADD/DATE_ADD
6733 Expression::AddMonths(_) => Action::AddMonthsConvert,
6734 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6735 Expression::MapFromArrays(_)
6736 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6737 {
6738 Action::MapFromArraysConvert
6739 }
6740 // CURRENT_USER -> CURRENT_USER() for Spark
6741 Expression::CurrentUser(_)
6742 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6743 {
6744 Action::CurrentUserSparkParens
6745 }
6746 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6747 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6748 if matches!(
6749 source,
6750 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6751 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6752 && matches!(
6753 target,
6754 DialectType::DuckDB
6755 | DialectType::Presto
6756 | DialectType::Trino
6757 | DialectType::Athena
6758 | DialectType::PostgreSQL
6759 | DialectType::Redshift
6760 ) =>
6761 {
6762 Action::SparkDateFuncCast
6763 }
6764 // $parameter -> @parameter for BigQuery
6765 Expression::Parameter(ref p)
6766 if matches!(target, DialectType::BigQuery)
6767 && matches!(source, DialectType::DuckDB)
6768 && (p.style == crate::expressions::ParameterStyle::Dollar
6769 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6770 {
6771 Action::DollarParamConvert
6772 }
6773 // EscapeString literal: normalize literal newlines to \n
6774 Expression::Literal(Literal::EscapeString(ref s))
6775 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6776 {
6777 Action::EscapeStringNormalize
6778 }
6779 // straight_join: keep lowercase for DuckDB, quote for MySQL
6780 Expression::Column(ref col)
6781 if col.name.name == "STRAIGHT_JOIN"
6782 && col.table.is_none()
6783 && matches!(source, DialectType::DuckDB)
6784 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6785 {
6786 Action::StraightJoinCase
6787 }
6788 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
6789 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
6790 Expression::Interval(ref iv)
6791 if matches!(
6792 target,
6793 DialectType::Snowflake
6794 | DialectType::PostgreSQL
6795 | DialectType::Redshift
6796 ) && iv.unit.is_some()
6797 && matches!(
6798 &iv.this,
6799 Some(Expression::Literal(Literal::String(_)))
6800 ) =>
6801 {
6802 Action::SnowflakeIntervalFormat
6803 }
6804 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
6805 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
6806 if let Some(ref sample) = ts.sample {
6807 if !sample.explicit_method {
6808 Action::TablesampleReservoir
6809 } else {
6810 Action::None
6811 }
6812 } else {
6813 Action::None
6814 }
6815 }
6816 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
6817 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
6818 Expression::TableSample(ref ts)
6819 if matches!(target, DialectType::Snowflake)
6820 && !matches!(source, DialectType::Snowflake)
6821 && ts.sample.is_some() =>
6822 {
6823 if let Some(ref sample) = ts.sample {
6824 if !sample.explicit_method {
6825 Action::TablesampleSnowflakeStrip
6826 } else {
6827 Action::None
6828 }
6829 } else {
6830 Action::None
6831 }
6832 }
6833 Expression::Table(ref t)
6834 if matches!(target, DialectType::Snowflake)
6835 && !matches!(source, DialectType::Snowflake)
6836 && t.table_sample.is_some() =>
6837 {
6838 if let Some(ref sample) = t.table_sample {
6839 if !sample.explicit_method {
6840 Action::TablesampleSnowflakeStrip
6841 } else {
6842 Action::None
6843 }
6844 } else {
6845 Action::None
6846 }
6847 }
6848 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6849 Expression::AlterTable(ref at)
6850 if matches!(target, DialectType::TSQL | DialectType::Fabric)
6851 && !at.actions.is_empty()
6852 && matches!(
6853 at.actions.first(),
6854 Some(crate::expressions::AlterTableAction::RenameTable(_))
6855 ) =>
6856 {
6857 Action::AlterTableToSpRename
6858 }
6859 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
6860 Expression::Subscript(ref sub)
6861 if matches!(
6862 target,
6863 DialectType::BigQuery
6864 | DialectType::Hive
6865 | DialectType::Spark
6866 | DialectType::Databricks
6867 ) && matches!(
6868 source,
6869 DialectType::DuckDB
6870 | DialectType::PostgreSQL
6871 | DialectType::Presto
6872 | DialectType::Trino
6873 | DialectType::Redshift
6874 | DialectType::ClickHouse
6875 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
6876 {
6877 Action::ArrayIndexConvert
6878 }
6879 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
6880 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
6881 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
6882 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
6883 Expression::WindowFunction(ref wf) => {
6884 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
6885 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
6886 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
6887 if matches!(target, DialectType::BigQuery)
6888 && !is_row_number
6889 && !wf.over.order_by.is_empty()
6890 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
6891 {
6892 Action::BigQueryNullsOrdering
6893 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
6894 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
6895 } else {
6896 let source_nulls_last = matches!(source, DialectType::DuckDB);
6897 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
6898 matches!(
6899 f.kind,
6900 crate::expressions::WindowFrameKind::Range
6901 | crate::expressions::WindowFrameKind::Groups
6902 )
6903 });
6904 if source_nulls_last
6905 && matches!(target, DialectType::MySQL)
6906 && !wf.over.order_by.is_empty()
6907 && wf.over.order_by.iter().any(|o| !o.desc)
6908 && !has_range_frame
6909 {
6910 Action::MysqlNullsLastRewrite
6911 } else {
6912 match &wf.this {
6913 Expression::FirstValue(ref vf)
6914 | Expression::LastValue(ref vf)
6915 if vf.ignore_nulls == Some(false) =>
6916 {
6917 // RESPECT NULLS
6918 match target {
6919 DialectType::SQLite => Action::RespectNullsConvert,
6920 _ => Action::None,
6921 }
6922 }
6923 _ => Action::None,
6924 }
6925 }
6926 }
6927 }
6928 // CREATE TABLE a LIKE b -> dialect-specific transformations
6929 Expression::CreateTable(ref ct)
6930 if ct.columns.is_empty()
6931 && ct.constraints.iter().any(|c| {
6932 matches!(c, crate::expressions::TableConstraint::Like { .. })
6933 })
6934 && matches!(
6935 target,
6936 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
6937 ) =>
6938 {
6939 Action::CreateTableLikeToCtas
6940 }
6941 Expression::CreateTable(ref ct)
6942 if ct.columns.is_empty()
6943 && ct.constraints.iter().any(|c| {
6944 matches!(c, crate::expressions::TableConstraint::Like { .. })
6945 })
6946 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6947 {
6948 Action::CreateTableLikeToSelectInto
6949 }
6950 Expression::CreateTable(ref ct)
6951 if ct.columns.is_empty()
6952 && ct.constraints.iter().any(|c| {
6953 matches!(c, crate::expressions::TableConstraint::Like { .. })
6954 })
6955 && matches!(target, DialectType::ClickHouse) =>
6956 {
6957 Action::CreateTableLikeToAs
6958 }
6959 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6960 Expression::CreateTable(ref ct)
6961 if matches!(target, DialectType::DuckDB)
6962 && matches!(
6963 source,
6964 DialectType::DuckDB
6965 | DialectType::Spark
6966 | DialectType::Databricks
6967 | DialectType::Hive
6968 ) =>
6969 {
6970 let has_comment = ct.columns.iter().any(|c| {
6971 c.comment.is_some()
6972 || c.constraints.iter().any(|con| {
6973 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
6974 })
6975 });
6976 let has_props = !ct.properties.is_empty();
6977 if has_comment || has_props {
6978 Action::CreateTableStripComment
6979 } else {
6980 Action::None
6981 }
6982 }
6983 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
6984 Expression::Array(_)
6985 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
6986 {
6987 Action::ArrayConcatBracketConvert
6988 }
6989 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
6990 Expression::ArrayFunc(ref arr)
6991 if arr.bracket_notation
6992 && matches!(source, DialectType::BigQuery)
6993 && matches!(target, DialectType::Redshift) =>
6994 {
6995 Action::ArrayConcatBracketConvert
6996 }
6997 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
6998 Expression::BitwiseOrAgg(ref f)
6999 | Expression::BitwiseAndAgg(ref f)
7000 | Expression::BitwiseXorAgg(ref f) => {
7001 if matches!(target, DialectType::DuckDB) {
7002 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7003 if let Expression::Cast(ref c) = f.this {
7004 match &c.to {
7005 DataType::Float { .. }
7006 | DataType::Double { .. }
7007 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7008 DataType::Custom { ref name }
7009 if name.eq_ignore_ascii_case("REAL") =>
7010 {
7011 Action::BitAggFloatCast
7012 }
7013 _ => Action::None,
7014 }
7015 } else {
7016 Action::None
7017 }
7018 } else if matches!(target, DialectType::Snowflake) {
7019 Action::BitAggSnowflakeRename
7020 } else {
7021 Action::None
7022 }
7023 }
7024 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7025 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7026 Action::FilterToIff
7027 }
7028 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7029 Expression::Avg(ref f)
7030 | Expression::Sum(ref f)
7031 | Expression::Min(ref f)
7032 | Expression::Max(ref f)
7033 | Expression::CountIf(ref f)
7034 | Expression::Stddev(ref f)
7035 | Expression::StddevPop(ref f)
7036 | Expression::StddevSamp(ref f)
7037 | Expression::Variance(ref f)
7038 | Expression::VarPop(ref f)
7039 | Expression::VarSamp(ref f)
7040 | Expression::Median(ref f)
7041 | Expression::Mode(ref f)
7042 | Expression::First(ref f)
7043 | Expression::Last(ref f)
7044 | Expression::ApproxDistinct(ref f)
7045 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7046 {
7047 Action::AggFilterToIff
7048 }
7049 Expression::Count(ref c)
7050 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7051 {
7052 Action::AggFilterToIff
7053 }
7054 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7055 Expression::Count(ref c)
7056 if c.distinct
7057 && matches!(&c.this, Some(Expression::Tuple(_)))
7058 && matches!(
7059 target,
7060 DialectType::Presto
7061 | DialectType::Trino
7062 | DialectType::DuckDB
7063 | DialectType::PostgreSQL
7064 ) =>
7065 {
7066 Action::CountDistinctMultiArg
7067 }
7068 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7069 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7070 Action::JsonToGetPath
7071 }
7072 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7073 Expression::Struct(_)
7074 if matches!(
7075 target,
7076 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7077 ) && matches!(source, DialectType::DuckDB) =>
7078 {
7079 Action::StructToRow
7080 }
7081 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7082 Expression::MapFunc(ref m)
7083 if m.curly_brace_syntax
7084 && matches!(
7085 target,
7086 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7087 )
7088 && matches!(source, DialectType::DuckDB) =>
7089 {
7090 Action::StructToRow
7091 }
7092 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7093 Expression::ApproxCountDistinct(_)
7094 if matches!(
7095 target,
7096 DialectType::Presto | DialectType::Trino | DialectType::Athena
7097 ) =>
7098 {
7099 Action::ApproxCountDistinctToApproxDistinct
7100 }
7101 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7102 Expression::ArrayContains(_)
7103 if matches!(
7104 target,
7105 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7106 ) =>
7107 {
7108 Action::ArrayContainsConvert
7109 }
7110 // StrPosition with position -> complex expansion for Presto/DuckDB
7111 // STRPOS doesn't support a position arg in these dialects
7112 Expression::StrPosition(ref sp)
7113 if sp.position.is_some()
7114 && matches!(
7115 target,
7116 DialectType::Presto
7117 | DialectType::Trino
7118 | DialectType::Athena
7119 | DialectType::DuckDB
7120 ) =>
7121 {
7122 Action::StrPositionExpand
7123 }
7124 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7125 Expression::First(ref f)
7126 if f.ignore_nulls == Some(true)
7127 && matches!(target, DialectType::DuckDB) =>
7128 {
7129 Action::FirstToAnyValue
7130 }
7131 // BEGIN -> START TRANSACTION for Presto/Trino
7132 Expression::Command(ref cmd)
7133 if cmd.this.eq_ignore_ascii_case("BEGIN")
7134 && matches!(
7135 target,
7136 DialectType::Presto | DialectType::Trino | DialectType::Athena
7137 ) =>
7138 {
7139 // Handled inline below
7140 Action::None // We'll handle it directly
7141 }
7142 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7143 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7144 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7145 Expression::Concat(ref _op)
7146 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7147 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7148 {
7149 Action::PipeConcatToConcat
7150 }
7151 _ => Action::None,
7152 }
7153 };
7154
7155 match action {
7156 Action::None => {
7157 // Handle inline transforms that don't need a dedicated action
7158
7159 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7160 if let Expression::Between(ref b) = e {
7161 if let Some(sym) = b.symmetric {
7162 let keeps_symmetric =
7163 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7164 if !keeps_symmetric {
7165 if sym {
7166 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7167 let b = if let Expression::Between(b) = e {
7168 *b
7169 } else {
7170 unreachable!()
7171 };
7172 let between1 = Expression::Between(Box::new(
7173 crate::expressions::Between {
7174 this: b.this.clone(),
7175 low: b.low.clone(),
7176 high: b.high.clone(),
7177 not: b.not,
7178 symmetric: None,
7179 },
7180 ));
7181 let between2 = Expression::Between(Box::new(
7182 crate::expressions::Between {
7183 this: b.this,
7184 low: b.high,
7185 high: b.low,
7186 not: b.not,
7187 symmetric: None,
7188 },
7189 ));
7190 return Ok(Expression::Paren(Box::new(
7191 crate::expressions::Paren {
7192 this: Expression::Or(Box::new(
7193 crate::expressions::BinaryOp::new(
7194 between1, between2,
7195 ),
7196 )),
7197 trailing_comments: vec![],
7198 },
7199 )));
7200 } else {
7201 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7202 let b = if let Expression::Between(b) = e {
7203 *b
7204 } else {
7205 unreachable!()
7206 };
7207 return Ok(Expression::Between(Box::new(
7208 crate::expressions::Between {
7209 this: b.this,
7210 low: b.low,
7211 high: b.high,
7212 not: b.not,
7213 symmetric: None,
7214 },
7215 )));
7216 }
7217 }
7218 }
7219 }
7220
7221 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7222 if let Expression::ILike(ref _like) = e {
7223 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7224 let like = if let Expression::ILike(l) = e {
7225 *l
7226 } else {
7227 unreachable!()
7228 };
7229 let lower_left = Expression::Function(Box::new(Function::new(
7230 "LOWER".to_string(),
7231 vec![like.left],
7232 )));
7233 let lower_right = Expression::Function(Box::new(Function::new(
7234 "LOWER".to_string(),
7235 vec![like.right],
7236 )));
7237 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7238 left: lower_left,
7239 right: lower_right,
7240 escape: like.escape,
7241 quantifier: like.quantifier,
7242 inferred_type: None,
7243 })));
7244 }
7245 }
7246
7247 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7248 if let Expression::MethodCall(ref mc) = e {
7249 if matches!(source, DialectType::Oracle)
7250 && mc.method.name.eq_ignore_ascii_case("VALUE")
7251 && mc.args.is_empty()
7252 {
7253 let is_dbms_random = match &mc.this {
7254 Expression::Identifier(id) => {
7255 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7256 }
7257 Expression::Column(col) => {
7258 col.table.is_none()
7259 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7260 }
7261 _ => false,
7262 };
7263 if is_dbms_random {
7264 let func_name = match target {
7265 DialectType::PostgreSQL
7266 | DialectType::Redshift
7267 | DialectType::DuckDB
7268 | DialectType::SQLite => "RANDOM",
7269 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7270 _ => "RAND",
7271 };
7272 return Ok(Expression::Function(Box::new(Function::new(
7273 func_name.to_string(),
7274 vec![],
7275 ))));
7276 }
7277 }
7278 }
7279 // TRIM without explicit position -> add BOTH for ClickHouse
7280 if let Expression::Trim(ref trim) = e {
7281 if matches!(target, DialectType::ClickHouse)
7282 && trim.sql_standard_syntax
7283 && trim.characters.is_some()
7284 && !trim.position_explicit
7285 {
7286 let mut new_trim = (**trim).clone();
7287 new_trim.position_explicit = true;
7288 return Ok(Expression::Trim(Box::new(new_trim)));
7289 }
7290 }
7291 // BEGIN -> START TRANSACTION for Presto/Trino
7292 if let Expression::Transaction(ref txn) = e {
7293 if matches!(
7294 target,
7295 DialectType::Presto | DialectType::Trino | DialectType::Athena
7296 ) {
7297 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7298 let mut txn = txn.clone();
7299 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7300 "START".to_string(),
7301 ))));
7302 return Ok(Expression::Transaction(Box::new(*txn)));
7303 }
7304 }
7305 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7306 if matches!(
7307 target,
7308 DialectType::Presto | DialectType::Trino | DialectType::Athena
7309 ) {
7310 match &e {
7311 Expression::IsTrue(itf) if !itf.not => {
7312 // x IS TRUE -> x
7313 return Ok(itf.this.clone());
7314 }
7315 Expression::IsTrue(itf) if itf.not => {
7316 // x IS NOT TRUE -> NOT x
7317 return Ok(Expression::Not(Box::new(
7318 crate::expressions::UnaryOp {
7319 this: itf.this.clone(),
7320 inferred_type: None,
7321 },
7322 )));
7323 }
7324 Expression::IsFalse(itf) if !itf.not => {
7325 // x IS FALSE -> NOT x
7326 return Ok(Expression::Not(Box::new(
7327 crate::expressions::UnaryOp {
7328 this: itf.this.clone(),
7329 inferred_type: None,
7330 },
7331 )));
7332 }
7333 Expression::IsFalse(itf) if itf.not => {
7334 // x IS NOT FALSE -> NOT NOT x
7335 let not_x =
7336 Expression::Not(Box::new(crate::expressions::UnaryOp {
7337 this: itf.this.clone(),
7338 inferred_type: None,
7339 }));
7340 return Ok(Expression::Not(Box::new(
7341 crate::expressions::UnaryOp {
7342 this: not_x,
7343 inferred_type: None,
7344 },
7345 )));
7346 }
7347 _ => {}
7348 }
7349 }
7350 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7351 if matches!(target, DialectType::Redshift) {
7352 if let Expression::IsFalse(ref itf) = e {
7353 if itf.not {
7354 return Ok(Expression::Not(Box::new(
7355 crate::expressions::UnaryOp {
7356 this: Expression::IsFalse(Box::new(
7357 crate::expressions::IsTrueFalse {
7358 this: itf.this.clone(),
7359 not: false,
7360 },
7361 )),
7362 inferred_type: None,
7363 },
7364 )));
7365 }
7366 }
7367 }
7368 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7369 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7370 if let Expression::Function(ref f) = e {
7371 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7372 && matches!(source, DialectType::Snowflake)
7373 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7374 {
7375 if f.args.len() == 3 {
7376 let mut args = f.args.clone();
7377 args.push(Expression::string("g"));
7378 return Ok(Expression::Function(Box::new(Function::new(
7379 "REGEXP_REPLACE".to_string(),
7380 args,
7381 ))));
7382 } else if f.args.len() == 4 {
7383 // 4th arg might be position, add 'g' as 5th
7384 let mut args = f.args.clone();
7385 args.push(Expression::string("g"));
7386 return Ok(Expression::Function(Box::new(Function::new(
7387 "REGEXP_REPLACE".to_string(),
7388 args,
7389 ))));
7390 }
7391 }
7392 }
7393 Ok(e)
7394 }
7395
7396 Action::GreatestLeastNull => {
7397 let f = if let Expression::Function(f) = e {
7398 *f
7399 } else {
7400 unreachable!("action only triggered for Function expressions")
7401 };
7402 let mut null_checks: Vec<Expression> = f
7403 .args
7404 .iter()
7405 .map(|a| {
7406 Expression::IsNull(Box::new(IsNull {
7407 this: a.clone(),
7408 not: false,
7409 postfix_form: false,
7410 }))
7411 })
7412 .collect();
7413 let condition = if null_checks.len() == 1 {
7414 null_checks.remove(0)
7415 } else {
7416 let first = null_checks.remove(0);
7417 null_checks.into_iter().fold(first, |acc, check| {
7418 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7419 })
7420 };
7421 Ok(Expression::Case(Box::new(Case {
7422 operand: None,
7423 whens: vec![(condition, Expression::Null(Null))],
7424 else_: Some(Expression::Function(Box::new(Function::new(
7425 f.name, f.args,
7426 )))),
7427 comments: Vec::new(),
7428 inferred_type: None,
7429 })))
7430 }
7431
7432 Action::ArrayGenerateRange => {
7433 let f = if let Expression::Function(f) = e {
7434 *f
7435 } else {
7436 unreachable!("action only triggered for Function expressions")
7437 };
7438 let start = f.args[0].clone();
7439 let end = f.args[1].clone();
7440 let step = f.args.get(2).cloned();
7441
7442 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7443 end.clone(),
7444 Expression::number(1),
7445 )));
7446
7447 match target {
7448 DialectType::PostgreSQL | DialectType::Redshift => {
7449 let mut args = vec![start, end_minus_1];
7450 if let Some(s) = step {
7451 args.push(s);
7452 }
7453 Ok(Expression::Function(Box::new(Function::new(
7454 "GENERATE_SERIES".to_string(),
7455 args,
7456 ))))
7457 }
7458 DialectType::Presto | DialectType::Trino => {
7459 let mut args = vec![start, end_minus_1];
7460 if let Some(s) = step {
7461 args.push(s);
7462 }
7463 Ok(Expression::Function(Box::new(Function::new(
7464 "SEQUENCE".to_string(),
7465 args,
7466 ))))
7467 }
7468 DialectType::BigQuery => {
7469 let mut args = vec![start, end_minus_1];
7470 if let Some(s) = step {
7471 args.push(s);
7472 }
7473 Ok(Expression::Function(Box::new(Function::new(
7474 "GENERATE_ARRAY".to_string(),
7475 args,
7476 ))))
7477 }
7478 DialectType::Snowflake => {
7479 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7480 Expression::Paren(Box::new(Paren {
7481 this: end_minus_1,
7482 trailing_comments: vec![],
7483 })),
7484 Expression::number(1),
7485 )));
7486 let mut args = vec![start, normalized_end];
7487 if let Some(s) = step {
7488 args.push(s);
7489 }
7490 Ok(Expression::Function(Box::new(Function::new(
7491 "ARRAY_GENERATE_RANGE".to_string(),
7492 args,
7493 ))))
7494 }
7495 _ => Ok(Expression::Function(Box::new(Function::new(
7496 f.name, f.args,
7497 )))),
7498 }
7499 }
7500
7501 Action::Div0TypedDivision => {
7502 let if_func = if let Expression::IfFunc(f) = e {
7503 *f
7504 } else {
7505 unreachable!("action only triggered for IfFunc expressions")
7506 };
7507 if let Some(Expression::Div(div)) = if_func.false_value {
7508 let cast_type = if matches!(target, DialectType::SQLite) {
7509 DataType::Float {
7510 precision: None,
7511 scale: None,
7512 real_spelling: true,
7513 }
7514 } else {
7515 DataType::Double {
7516 precision: None,
7517 scale: None,
7518 }
7519 };
7520 let casted_left = Expression::Cast(Box::new(Cast {
7521 this: div.left,
7522 to: cast_type,
7523 trailing_comments: vec![],
7524 double_colon_syntax: false,
7525 format: None,
7526 default: None,
7527 inferred_type: None,
7528 }));
7529 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7530 condition: if_func.condition,
7531 true_value: if_func.true_value,
7532 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7533 casted_left,
7534 div.right,
7535 )))),
7536 original_name: if_func.original_name,
7537 })))
7538 } else {
7539 // Not actually a Div, reconstruct
7540 Ok(Expression::IfFunc(Box::new(if_func)))
7541 }
7542 }
7543
7544 Action::ArrayAggCollectList => {
7545 let agg = if let Expression::ArrayAgg(a) = e {
7546 *a
7547 } else {
7548 unreachable!("action only triggered for ArrayAgg expressions")
7549 };
7550 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7551 name: Some("COLLECT_LIST".to_string()),
7552 ..agg
7553 })))
7554 }
7555
7556 Action::ArrayAggWithinGroupFilter => {
7557 let wg = if let Expression::WithinGroup(w) = e {
7558 *w
7559 } else {
7560 unreachable!("action only triggered for WithinGroup expressions")
7561 };
7562 if let Expression::ArrayAgg(inner_agg) = wg.this {
7563 let col = inner_agg.this.clone();
7564 let filter = Expression::IsNull(Box::new(IsNull {
7565 this: col,
7566 not: true,
7567 postfix_form: false,
7568 }));
7569 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7570 let order_by = if matches!(target, DialectType::DuckDB) {
7571 wg.order_by
7572 .into_iter()
7573 .map(|mut o| {
7574 if o.desc && o.nulls_first.is_none() {
7575 o.nulls_first = Some(true);
7576 }
7577 o
7578 })
7579 .collect()
7580 } else {
7581 wg.order_by
7582 };
7583 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7584 this: inner_agg.this,
7585 distinct: inner_agg.distinct,
7586 filter: Some(filter),
7587 order_by,
7588 name: inner_agg.name,
7589 ignore_nulls: inner_agg.ignore_nulls,
7590 having_max: inner_agg.having_max,
7591 limit: inner_agg.limit,
7592 inferred_type: None,
7593 })))
7594 } else {
7595 Ok(Expression::WithinGroup(Box::new(wg)))
7596 }
7597 }
7598
7599 Action::ArrayAggFilter => {
7600 let agg = if let Expression::ArrayAgg(a) = e {
7601 *a
7602 } else {
7603 unreachable!("action only triggered for ArrayAgg expressions")
7604 };
7605 let col = agg.this.clone();
7606 let filter = Expression::IsNull(Box::new(IsNull {
7607 this: col,
7608 not: true,
7609 postfix_form: false,
7610 }));
7611 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7612 filter: Some(filter),
7613 ..agg
7614 })))
7615 }
7616
7617 Action::ArrayAggNullFilter => {
7618 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7619 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7620 let agg = if let Expression::ArrayAgg(a) = e {
7621 *a
7622 } else {
7623 unreachable!("action only triggered for ArrayAgg expressions")
7624 };
7625 let col = agg.this.clone();
7626 let not_null = Expression::IsNull(Box::new(IsNull {
7627 this: col,
7628 not: true,
7629 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7630 }));
7631 let new_filter = if let Some(existing_filter) = agg.filter {
7632 // AND the NOT IS NULL with existing filter
7633 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7634 existing_filter,
7635 not_null,
7636 )))
7637 } else {
7638 not_null
7639 };
7640 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7641 filter: Some(new_filter),
7642 ..agg
7643 })))
7644 }
7645
7646 Action::BigQueryArraySelectAsStructToSnowflake => {
7647 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7648 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7649 if let Expression::Function(mut f) = e {
7650 let is_match = f.args.len() == 1
7651 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7652 if is_match {
7653 let inner_select = match f.args.remove(0) {
7654 Expression::Select(s) => *s,
7655 _ => unreachable!(
7656 "argument already verified to be a Select expression"
7657 ),
7658 };
7659 // Build OBJECT_CONSTRUCT args from SELECT expressions
7660 let mut oc_args = Vec::new();
7661 for expr in &inner_select.expressions {
7662 match expr {
7663 Expression::Alias(a) => {
7664 let key = Expression::Literal(Literal::String(
7665 a.alias.name.clone(),
7666 ));
7667 let value = a.this.clone();
7668 oc_args.push(key);
7669 oc_args.push(value);
7670 }
7671 Expression::Column(c) => {
7672 let key = Expression::Literal(Literal::String(
7673 c.name.name.clone(),
7674 ));
7675 oc_args.push(key);
7676 oc_args.push(expr.clone());
7677 }
7678 _ => {
7679 oc_args.push(expr.clone());
7680 }
7681 }
7682 }
7683 let object_construct = Expression::Function(Box::new(Function::new(
7684 "OBJECT_CONSTRUCT".to_string(),
7685 oc_args,
7686 )));
7687 let array_agg = Expression::Function(Box::new(Function::new(
7688 "ARRAY_AGG".to_string(),
7689 vec![object_construct],
7690 )));
7691 let mut new_select = crate::expressions::Select::new();
7692 new_select.expressions = vec![array_agg];
7693 new_select.from = inner_select.from.clone();
7694 new_select.where_clause = inner_select.where_clause.clone();
7695 new_select.group_by = inner_select.group_by.clone();
7696 new_select.having = inner_select.having.clone();
7697 new_select.joins = inner_select.joins.clone();
7698 Ok(Expression::Subquery(Box::new(
7699 crate::expressions::Subquery {
7700 this: Expression::Select(Box::new(new_select)),
7701 alias: None,
7702 column_aliases: Vec::new(),
7703 order_by: None,
7704 limit: None,
7705 offset: None,
7706 distribute_by: None,
7707 sort_by: None,
7708 cluster_by: None,
7709 lateral: false,
7710 modifiers_inside: false,
7711 trailing_comments: Vec::new(),
7712 inferred_type: None,
7713 },
7714 )))
7715 } else {
7716 Ok(Expression::Function(f))
7717 }
7718 } else {
7719 Ok(e)
7720 }
7721 }
7722
7723 Action::BigQueryPercentileContToDuckDB => {
7724 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7725 if let Expression::AggregateFunction(mut af) = e {
7726 af.name = "QUANTILE_CONT".to_string();
7727 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7728 // Keep only first 2 args
7729 if af.args.len() > 2 {
7730 af.args.truncate(2);
7731 }
7732 Ok(Expression::AggregateFunction(af))
7733 } else {
7734 Ok(e)
7735 }
7736 }
7737
7738 Action::ArrayAggIgnoreNullsDuckDB => {
7739 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7740 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7741 let mut agg = if let Expression::ArrayAgg(a) = e {
7742 *a
7743 } else {
7744 unreachable!("action only triggered for ArrayAgg expressions")
7745 };
7746 agg.ignore_nulls = None; // Strip IGNORE NULLS
7747 if !agg.order_by.is_empty() {
7748 agg.order_by[0].nulls_first = Some(true);
7749 }
7750 Ok(Expression::ArrayAgg(Box::new(agg)))
7751 }
7752
7753 Action::CountDistinctMultiArg => {
7754 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
7755 if let Expression::Count(c) = e {
7756 if let Some(Expression::Tuple(t)) = c.this {
7757 let args = t.expressions;
7758 // Build CASE expression:
7759 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
7760 let mut whens = Vec::new();
7761 for arg in &args {
7762 whens.push((
7763 Expression::IsNull(Box::new(IsNull {
7764 this: arg.clone(),
7765 not: false,
7766 postfix_form: false,
7767 })),
7768 Expression::Null(crate::expressions::Null),
7769 ));
7770 }
7771 // Build the tuple for ELSE
7772 let tuple_expr =
7773 Expression::Tuple(Box::new(crate::expressions::Tuple {
7774 expressions: args,
7775 }));
7776 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
7777 operand: None,
7778 whens,
7779 else_: Some(tuple_expr),
7780 comments: Vec::new(),
7781 inferred_type: None,
7782 }));
7783 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
7784 this: Some(case_expr),
7785 star: false,
7786 distinct: true,
7787 filter: c.filter,
7788 ignore_nulls: c.ignore_nulls,
7789 original_name: c.original_name,
7790 })))
7791 } else {
7792 Ok(Expression::Count(c))
7793 }
7794 } else {
7795 Ok(e)
7796 }
7797 }
7798
7799 Action::CastTimestampToDatetime => {
7800 let c = if let Expression::Cast(c) = e {
7801 *c
7802 } else {
7803 unreachable!("action only triggered for Cast expressions")
7804 };
7805 Ok(Expression::Cast(Box::new(Cast {
7806 to: DataType::Custom {
7807 name: "DATETIME".to_string(),
7808 },
7809 ..c
7810 })))
7811 }
7812
7813 Action::CastTimestampStripTz => {
7814 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7815 let c = if let Expression::Cast(c) = e {
7816 *c
7817 } else {
7818 unreachable!("action only triggered for Cast expressions")
7819 };
7820 Ok(Expression::Cast(Box::new(Cast {
7821 to: DataType::Timestamp {
7822 precision: None,
7823 timezone: false,
7824 },
7825 ..c
7826 })))
7827 }
7828
7829 Action::CastTimestamptzToFunc => {
7830 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7831 let c = if let Expression::Cast(c) = e {
7832 *c
7833 } else {
7834 unreachable!("action only triggered for Cast expressions")
7835 };
7836 Ok(Expression::Function(Box::new(Function::new(
7837 "TIMESTAMP".to_string(),
7838 vec![c.this],
7839 ))))
7840 }
7841
7842 Action::ToDateToCast => {
7843 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
7844 if let Expression::Function(f) = e {
7845 let arg = f.args.into_iter().next().unwrap();
7846 Ok(Expression::Cast(Box::new(Cast {
7847 this: arg,
7848 to: DataType::Date,
7849 double_colon_syntax: false,
7850 trailing_comments: vec![],
7851 format: None,
7852 default: None,
7853 inferred_type: None,
7854 })))
7855 } else {
7856 Ok(e)
7857 }
7858 }
7859 Action::DateTruncWrapCast => {
7860 // Handle both Expression::DateTrunc/TimestampTrunc and
7861 // Expression::Function("DATE_TRUNC", [unit, expr])
7862 match e {
7863 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
7864 let input_type = match &d.this {
7865 Expression::Cast(c) => Some(c.to.clone()),
7866 _ => None,
7867 };
7868 if let Some(cast_type) = input_type {
7869 let is_time = matches!(cast_type, DataType::Time { .. });
7870 if is_time {
7871 let date_expr = Expression::Cast(Box::new(Cast {
7872 this: Expression::Literal(
7873 crate::expressions::Literal::String(
7874 "1970-01-01".to_string(),
7875 ),
7876 ),
7877 to: DataType::Date,
7878 double_colon_syntax: false,
7879 trailing_comments: vec![],
7880 format: None,
7881 default: None,
7882 inferred_type: None,
7883 }));
7884 let add_expr =
7885 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
7886 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
7887 this: add_expr,
7888 unit: d.unit,
7889 }));
7890 Ok(Expression::Cast(Box::new(Cast {
7891 this: inner,
7892 to: cast_type,
7893 double_colon_syntax: false,
7894 trailing_comments: vec![],
7895 format: None,
7896 default: None,
7897 inferred_type: None,
7898 })))
7899 } else {
7900 let inner = Expression::DateTrunc(Box::new(*d));
7901 Ok(Expression::Cast(Box::new(Cast {
7902 this: inner,
7903 to: cast_type,
7904 double_colon_syntax: false,
7905 trailing_comments: vec![],
7906 format: None,
7907 default: None,
7908 inferred_type: None,
7909 })))
7910 }
7911 } else {
7912 Ok(Expression::DateTrunc(d))
7913 }
7914 }
7915 Expression::Function(f) if f.args.len() == 2 => {
7916 // Function-based DATE_TRUNC(unit, expr)
7917 let input_type = match &f.args[1] {
7918 Expression::Cast(c) => Some(c.to.clone()),
7919 _ => None,
7920 };
7921 if let Some(cast_type) = input_type {
7922 let is_time = matches!(cast_type, DataType::Time { .. });
7923 if is_time {
7924 let date_expr = Expression::Cast(Box::new(Cast {
7925 this: Expression::Literal(
7926 crate::expressions::Literal::String(
7927 "1970-01-01".to_string(),
7928 ),
7929 ),
7930 to: DataType::Date,
7931 double_colon_syntax: false,
7932 trailing_comments: vec![],
7933 format: None,
7934 default: None,
7935 inferred_type: None,
7936 }));
7937 let mut args = f.args;
7938 let unit_arg = args.remove(0);
7939 let time_expr = args.remove(0);
7940 let add_expr = Expression::Add(Box::new(BinaryOp::new(
7941 date_expr, time_expr,
7942 )));
7943 let inner = Expression::Function(Box::new(Function::new(
7944 "DATE_TRUNC".to_string(),
7945 vec![unit_arg, add_expr],
7946 )));
7947 Ok(Expression::Cast(Box::new(Cast {
7948 this: inner,
7949 to: cast_type,
7950 double_colon_syntax: false,
7951 trailing_comments: vec![],
7952 format: None,
7953 default: None,
7954 inferred_type: None,
7955 })))
7956 } else {
7957 // Wrap the function in CAST
7958 Ok(Expression::Cast(Box::new(Cast {
7959 this: Expression::Function(f),
7960 to: cast_type,
7961 double_colon_syntax: false,
7962 trailing_comments: vec![],
7963 format: None,
7964 default: None,
7965 inferred_type: None,
7966 })))
7967 }
7968 } else {
7969 Ok(Expression::Function(f))
7970 }
7971 }
7972 other => Ok(other),
7973 }
7974 }
7975
7976 Action::RegexpReplaceSnowflakeToDuckDB => {
7977 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
7978 if let Expression::Function(f) = e {
7979 let mut args = f.args;
7980 let subject = args.remove(0);
7981 let pattern = args.remove(0);
7982 let replacement = args.remove(0);
7983 Ok(Expression::Function(Box::new(Function::new(
7984 "REGEXP_REPLACE".to_string(),
7985 vec![
7986 subject,
7987 pattern,
7988 replacement,
7989 Expression::Literal(crate::expressions::Literal::String(
7990 "g".to_string(),
7991 )),
7992 ],
7993 ))))
7994 } else {
7995 Ok(e)
7996 }
7997 }
7998
7999 Action::SetToVariable => {
8000 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8001 if let Expression::SetStatement(mut s) = e {
8002 for item in &mut s.items {
8003 if item.kind.is_none() {
8004 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
8005 let already_variable = match &item.name {
8006 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
8007 _ => false,
8008 };
8009 if already_variable {
8010 // Extract the actual name and set kind
8011 if let Expression::Identifier(ref mut id) = item.name {
8012 let actual_name = id.name["VARIABLE ".len()..].to_string();
8013 id.name = actual_name;
8014 }
8015 }
8016 item.kind = Some("VARIABLE".to_string());
8017 }
8018 }
8019 Ok(Expression::SetStatement(s))
8020 } else {
8021 Ok(e)
8022 }
8023 }
8024
8025 Action::ConvertTimezoneToExpr => {
8026 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
8027 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
8028 if let Expression::Function(f) = e {
8029 if f.args.len() == 2 {
8030 let mut args = f.args;
8031 let target_tz = args.remove(0);
8032 let timestamp = args.remove(0);
8033 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
8034 source_tz: None,
8035 target_tz: Some(Box::new(target_tz)),
8036 timestamp: Some(Box::new(timestamp)),
8037 options: vec![],
8038 })))
8039 } else if f.args.len() == 3 {
8040 let mut args = f.args;
8041 let source_tz = args.remove(0);
8042 let target_tz = args.remove(0);
8043 let timestamp = args.remove(0);
8044 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
8045 source_tz: Some(Box::new(source_tz)),
8046 target_tz: Some(Box::new(target_tz)),
8047 timestamp: Some(Box::new(timestamp)),
8048 options: vec![],
8049 })))
8050 } else {
8051 Ok(Expression::Function(f))
8052 }
8053 } else {
8054 Ok(e)
8055 }
8056 }
8057
8058 Action::BigQueryCastType => {
8059 // Convert BigQuery types to standard SQL types
8060 if let Expression::DataType(dt) = e {
8061 match dt {
8062 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
8063 Ok(Expression::DataType(DataType::BigInt { length: None }))
8064 }
8065 DataType::Custom { ref name }
8066 if name.eq_ignore_ascii_case("FLOAT64") =>
8067 {
8068 Ok(Expression::DataType(DataType::Double {
8069 precision: None,
8070 scale: None,
8071 }))
8072 }
8073 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
8074 Ok(Expression::DataType(DataType::Boolean))
8075 }
8076 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
8077 Ok(Expression::DataType(DataType::VarBinary { length: None }))
8078 }
8079 DataType::Custom { ref name }
8080 if name.eq_ignore_ascii_case("NUMERIC") =>
8081 {
8082 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
8083 // default precision (18, 3) being added to bare DECIMAL
8084 if matches!(target, DialectType::DuckDB) {
8085 Ok(Expression::DataType(DataType::Custom {
8086 name: "DECIMAL".to_string(),
8087 }))
8088 } else {
8089 Ok(Expression::DataType(DataType::Decimal {
8090 precision: None,
8091 scale: None,
8092 }))
8093 }
8094 }
8095 DataType::Custom { ref name }
8096 if name.eq_ignore_ascii_case("STRING") =>
8097 {
8098 Ok(Expression::DataType(DataType::String { length: None }))
8099 }
8100 DataType::Custom { ref name }
8101 if name.eq_ignore_ascii_case("DATETIME") =>
8102 {
8103 Ok(Expression::DataType(DataType::Timestamp {
8104 precision: None,
8105 timezone: false,
8106 }))
8107 }
8108 _ => Ok(Expression::DataType(dt)),
8109 }
8110 } else {
8111 Ok(e)
8112 }
8113 }
8114
8115 Action::BigQuerySafeDivide => {
8116 // Convert SafeDivide expression to IF/CASE form for most targets
8117 if let Expression::SafeDivide(sd) = e {
8118 let x = *sd.this;
8119 let y = *sd.expression;
8120 // Wrap x and y in parens if they're complex expressions
8121 let y_ref = match &y {
8122 Expression::Column(_)
8123 | Expression::Literal(_)
8124 | Expression::Identifier(_) => y.clone(),
8125 _ => Expression::Paren(Box::new(Paren {
8126 this: y.clone(),
8127 trailing_comments: vec![],
8128 })),
8129 };
8130 let x_ref = match &x {
8131 Expression::Column(_)
8132 | Expression::Literal(_)
8133 | Expression::Identifier(_) => x.clone(),
8134 _ => Expression::Paren(Box::new(Paren {
8135 this: x.clone(),
8136 trailing_comments: vec![],
8137 })),
8138 };
8139 let condition = Expression::Neq(Box::new(BinaryOp::new(
8140 y_ref.clone(),
8141 Expression::number(0),
8142 )));
8143 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
8144
8145 if matches!(target, DialectType::Presto | DialectType::Trino) {
8146 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
8147 let cast_x = Expression::Cast(Box::new(Cast {
8148 this: match &x {
8149 Expression::Column(_)
8150 | Expression::Literal(_)
8151 | Expression::Identifier(_) => x,
8152 _ => Expression::Paren(Box::new(Paren {
8153 this: x,
8154 trailing_comments: vec![],
8155 })),
8156 },
8157 to: DataType::Double {
8158 precision: None,
8159 scale: None,
8160 },
8161 trailing_comments: vec![],
8162 double_colon_syntax: false,
8163 format: None,
8164 default: None,
8165 inferred_type: None,
8166 }));
8167 let cast_div = Expression::Div(Box::new(BinaryOp::new(
8168 cast_x,
8169 match &y {
8170 Expression::Column(_)
8171 | Expression::Literal(_)
8172 | Expression::Identifier(_) => y,
8173 _ => Expression::Paren(Box::new(Paren {
8174 this: y,
8175 trailing_comments: vec![],
8176 })),
8177 },
8178 )));
8179 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8180 condition,
8181 true_value: cast_div,
8182 false_value: Some(Expression::Null(Null)),
8183 original_name: None,
8184 })))
8185 } else if matches!(target, DialectType::PostgreSQL) {
8186 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
8187 let cast_x = Expression::Cast(Box::new(Cast {
8188 this: match &x {
8189 Expression::Column(_)
8190 | Expression::Literal(_)
8191 | Expression::Identifier(_) => x,
8192 _ => Expression::Paren(Box::new(Paren {
8193 this: x,
8194 trailing_comments: vec![],
8195 })),
8196 },
8197 to: DataType::Custom {
8198 name: "DOUBLE PRECISION".to_string(),
8199 },
8200 trailing_comments: vec![],
8201 double_colon_syntax: false,
8202 format: None,
8203 default: None,
8204 inferred_type: None,
8205 }));
8206 let y_paren = match &y {
8207 Expression::Column(_)
8208 | Expression::Literal(_)
8209 | Expression::Identifier(_) => y,
8210 _ => Expression::Paren(Box::new(Paren {
8211 this: y,
8212 trailing_comments: vec![],
8213 })),
8214 };
8215 let cast_div =
8216 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
8217 Ok(Expression::Case(Box::new(Case {
8218 operand: None,
8219 whens: vec![(condition, cast_div)],
8220 else_: Some(Expression::Null(Null)),
8221 comments: Vec::new(),
8222 inferred_type: None,
8223 })))
8224 } else if matches!(target, DialectType::DuckDB) {
8225 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
8226 Ok(Expression::Case(Box::new(Case {
8227 operand: None,
8228 whens: vec![(condition, div_expr)],
8229 else_: Some(Expression::Null(Null)),
8230 comments: Vec::new(),
8231 inferred_type: None,
8232 })))
8233 } else if matches!(target, DialectType::Snowflake) {
8234 // Snowflake: IFF(y <> 0, x / y, NULL)
8235 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8236 condition,
8237 true_value: div_expr,
8238 false_value: Some(Expression::Null(Null)),
8239 original_name: Some("IFF".to_string()),
8240 })))
8241 } else {
8242 // All others: IF(y <> 0, x / y, NULL)
8243 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8244 condition,
8245 true_value: div_expr,
8246 false_value: Some(Expression::Null(Null)),
8247 original_name: None,
8248 })))
8249 }
8250 } else {
8251 Ok(e)
8252 }
8253 }
8254
8255 Action::BigQueryLastDayStripUnit => {
8256 if let Expression::LastDay(mut ld) = e {
8257 ld.unit = None; // Strip the unit (MONTH is default)
8258 match target {
8259 DialectType::PostgreSQL => {
8260 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
8261 let date_trunc = Expression::Function(Box::new(Function::new(
8262 "DATE_TRUNC".to_string(),
8263 vec![
8264 Expression::Literal(crate::expressions::Literal::String(
8265 "MONTH".to_string(),
8266 )),
8267 ld.this.clone(),
8268 ],
8269 )));
8270 let plus_month =
8271 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
8272 date_trunc,
8273 Expression::Interval(Box::new(
8274 crate::expressions::Interval {
8275 this: Some(Expression::Literal(
8276 crate::expressions::Literal::String(
8277 "1 MONTH".to_string(),
8278 ),
8279 )),
8280 unit: None,
8281 },
8282 )),
8283 )));
8284 let minus_day =
8285 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
8286 plus_month,
8287 Expression::Interval(Box::new(
8288 crate::expressions::Interval {
8289 this: Some(Expression::Literal(
8290 crate::expressions::Literal::String(
8291 "1 DAY".to_string(),
8292 ),
8293 )),
8294 unit: None,
8295 },
8296 )),
8297 )));
8298 Ok(Expression::Cast(Box::new(Cast {
8299 this: minus_day,
8300 to: DataType::Date,
8301 trailing_comments: vec![],
8302 double_colon_syntax: false,
8303 format: None,
8304 default: None,
8305 inferred_type: None,
8306 })))
8307 }
8308 DialectType::Presto => {
8309 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
8310 Ok(Expression::Function(Box::new(Function::new(
8311 "LAST_DAY_OF_MONTH".to_string(),
8312 vec![ld.this],
8313 ))))
8314 }
8315 DialectType::ClickHouse => {
8316 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
8317 // Need to wrap the DATE type in Nullable
8318 let nullable_date = match ld.this {
8319 Expression::Cast(mut c) => {
8320 c.to = DataType::Nullable {
8321 inner: Box::new(DataType::Date),
8322 };
8323 Expression::Cast(c)
8324 }
8325 other => other,
8326 };
8327 ld.this = nullable_date;
8328 Ok(Expression::LastDay(ld))
8329 }
8330 _ => Ok(Expression::LastDay(ld)),
8331 }
8332 } else {
8333 Ok(e)
8334 }
8335 }
8336
8337 Action::BigQueryCastFormat => {
8338 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
8339 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
8340 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
8341 let (this, to, format_expr, is_safe) = match e {
8342 Expression::Cast(ref c) if c.format.is_some() => (
8343 c.this.clone(),
8344 c.to.clone(),
8345 c.format.as_ref().unwrap().as_ref().clone(),
8346 false,
8347 ),
8348 Expression::SafeCast(ref c) if c.format.is_some() => (
8349 c.this.clone(),
8350 c.to.clone(),
8351 c.format.as_ref().unwrap().as_ref().clone(),
8352 true,
8353 ),
8354 _ => return Ok(e),
8355 };
8356 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
8357 if matches!(target, DialectType::BigQuery) {
8358 match &to {
8359 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
8360 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
8361 return Ok(e);
8362 }
8363 _ => {}
8364 }
8365 }
8366 // Extract timezone from format if AT TIME ZONE is present
8367 let (actual_format_expr, timezone) = match &format_expr {
8368 Expression::AtTimeZone(ref atz) => {
8369 (atz.this.clone(), Some(atz.zone.clone()))
8370 }
8371 _ => (format_expr.clone(), None),
8372 };
8373 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
8374 match target {
8375 DialectType::BigQuery => {
8376 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
8377 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
8378 let func_name = match &to {
8379 DataType::Date => "PARSE_DATE",
8380 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
8381 DataType::Time { .. } => "PARSE_TIMESTAMP",
8382 _ => "PARSE_TIMESTAMP",
8383 };
8384 let mut func_args = vec![strftime_fmt, this];
8385 if let Some(tz) = timezone {
8386 func_args.push(tz);
8387 }
8388 Ok(Expression::Function(Box::new(Function::new(
8389 func_name.to_string(),
8390 func_args,
8391 ))))
8392 }
8393 DialectType::DuckDB => {
8394 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
8395 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
8396 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
8397 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
8398 let parse_call = Expression::Function(Box::new(Function::new(
8399 parse_fn_name.to_string(),
8400 vec![this, duck_fmt],
8401 )));
8402 Ok(Expression::Cast(Box::new(Cast {
8403 this: parse_call,
8404 to,
8405 trailing_comments: vec![],
8406 double_colon_syntax: false,
8407 format: None,
8408 default: None,
8409 inferred_type: None,
8410 })))
8411 }
8412 _ => Ok(e),
8413 }
8414 }
8415
8416 Action::BigQueryFunctionNormalize => {
8417 Self::normalize_bigquery_function(e, source, target)
8418 }
8419
8420 Action::BigQueryToHexBare => {
8421 // Not used anymore - handled directly in normalize_bigquery_function
8422 Ok(e)
8423 }
8424
8425 Action::BigQueryToHexLower => {
8426 if let Expression::Lower(uf) = e {
8427 match uf.this {
8428 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8429 Expression::Function(f)
8430 if matches!(target, DialectType::BigQuery)
8431 && f.name == "TO_HEX" =>
8432 {
8433 Ok(Expression::Function(f))
8434 }
8435 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
8436 Expression::Lower(inner_uf) => {
8437 if matches!(target, DialectType::BigQuery) {
8438 // BQ->BQ: extract TO_HEX
8439 if let Expression::Function(f) = inner_uf.this {
8440 Ok(Expression::Function(Box::new(Function::new(
8441 "TO_HEX".to_string(),
8442 f.args,
8443 ))))
8444 } else {
8445 Ok(Expression::Lower(inner_uf))
8446 }
8447 } else {
8448 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
8449 Ok(Expression::Lower(inner_uf))
8450 }
8451 }
8452 other => {
8453 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
8454 this: other,
8455 original_name: None,
8456 inferred_type: None,
8457 })))
8458 }
8459 }
8460 } else {
8461 Ok(e)
8462 }
8463 }
8464
8465 Action::BigQueryToHexUpper => {
8466 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
8467 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
8468 if let Expression::Upper(uf) = e {
8469 if let Expression::Lower(inner_uf) = uf.this {
8470 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
8471 if matches!(target, DialectType::BigQuery) {
8472 // Restore TO_HEX name in inner function
8473 if let Expression::Function(f) = inner_uf.this {
8474 let restored = Expression::Function(Box::new(Function::new(
8475 "TO_HEX".to_string(),
8476 f.args,
8477 )));
8478 Ok(Expression::Upper(Box::new(
8479 crate::expressions::UnaryFunc::new(restored),
8480 )))
8481 } else {
8482 Ok(Expression::Upper(inner_uf))
8483 }
8484 } else {
8485 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
8486 Ok(inner_uf.this)
8487 }
8488 } else {
8489 Ok(Expression::Upper(uf))
8490 }
8491 } else {
8492 Ok(e)
8493 }
8494 }
8495
8496 Action::BigQueryAnyValueHaving => {
8497 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
8498 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
8499 if let Expression::AnyValue(agg) = e {
8500 if let Some((having_expr, is_max)) = agg.having_max {
8501 let func_name = if is_max {
8502 "ARG_MAX_NULL"
8503 } else {
8504 "ARG_MIN_NULL"
8505 };
8506 Ok(Expression::Function(Box::new(Function::new(
8507 func_name.to_string(),
8508 vec![agg.this, *having_expr],
8509 ))))
8510 } else {
8511 Ok(Expression::AnyValue(agg))
8512 }
8513 } else {
8514 Ok(e)
8515 }
8516 }
8517
8518 Action::BigQueryApproxQuantiles => {
8519 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
8520 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
8521 if let Expression::AggregateFunction(agg) = e {
8522 if agg.args.len() >= 2 {
8523 let x_expr = agg.args[0].clone();
8524 let n_expr = &agg.args[1];
8525
8526 // Extract the numeric value from n_expr
8527 let n = match n_expr {
8528 Expression::Literal(crate::expressions::Literal::Number(s)) => {
8529 s.parse::<usize>().unwrap_or(2)
8530 }
8531 _ => 2,
8532 };
8533
8534 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
8535 let mut quantiles = Vec::new();
8536 for i in 0..=n {
8537 let q = i as f64 / n as f64;
8538 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
8539 if q == 0.0 {
8540 quantiles.push(Expression::number(0));
8541 } else if q == 1.0 {
8542 quantiles.push(Expression::number(1));
8543 } else {
8544 quantiles.push(Expression::Literal(
8545 crate::expressions::Literal::Number(format!("{}", q)),
8546 ));
8547 }
8548 }
8549
8550 let array_expr =
8551 Expression::Array(Box::new(crate::expressions::Array {
8552 expressions: quantiles,
8553 }));
8554
8555 // Preserve DISTINCT modifier
8556 let mut new_func = Function::new(
8557 "APPROX_QUANTILE".to_string(),
8558 vec![x_expr, array_expr],
8559 );
8560 new_func.distinct = agg.distinct;
8561 Ok(Expression::Function(Box::new(new_func)))
8562 } else {
8563 Ok(Expression::AggregateFunction(agg))
8564 }
8565 } else {
8566 Ok(e)
8567 }
8568 }
8569
8570 Action::GenericFunctionNormalize => {
8571 // Helper closure to convert ARBITRARY to target-specific function
8572 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
8573 let name = match target {
8574 DialectType::ClickHouse => "any",
8575 DialectType::TSQL | DialectType::SQLite => "MAX",
8576 DialectType::Hive => "FIRST",
8577 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
8578 "ARBITRARY"
8579 }
8580 _ => "ANY_VALUE",
8581 };
8582 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
8583 }
8584
8585 if let Expression::Function(f) = e {
8586 let name = f.name.to_uppercase();
8587 match name.as_str() {
8588 "ARBITRARY" if f.args.len() == 1 => {
8589 let arg = f.args.into_iter().next().unwrap();
8590 Ok(convert_arbitrary(arg, target))
8591 }
8592 "TO_NUMBER" if f.args.len() == 1 => {
8593 let arg = f.args.into_iter().next().unwrap();
8594 match target {
8595 DialectType::Oracle | DialectType::Snowflake => {
8596 Ok(Expression::Function(Box::new(Function::new(
8597 "TO_NUMBER".to_string(),
8598 vec![arg],
8599 ))))
8600 }
8601 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
8602 this: arg,
8603 to: crate::expressions::DataType::Double {
8604 precision: None,
8605 scale: None,
8606 },
8607 double_colon_syntax: false,
8608 trailing_comments: Vec::new(),
8609 format: None,
8610 default: None,
8611 inferred_type: None,
8612 }))),
8613 }
8614 }
8615 "AGGREGATE" if f.args.len() >= 3 => match target {
8616 DialectType::DuckDB
8617 | DialectType::Hive
8618 | DialectType::Presto
8619 | DialectType::Trino => Ok(Expression::Function(Box::new(
8620 Function::new("REDUCE".to_string(), f.args),
8621 ))),
8622 _ => Ok(Expression::Function(f)),
8623 },
8624 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
8625 "REGEXP_MATCHES" if f.args.len() >= 2 => {
8626 if matches!(target, DialectType::DuckDB) {
8627 Ok(Expression::Function(f))
8628 } else {
8629 let mut args = f.args;
8630 let this = args.remove(0);
8631 let pattern = args.remove(0);
8632 let flags = if args.is_empty() {
8633 None
8634 } else {
8635 Some(args.remove(0))
8636 };
8637 Ok(Expression::RegexpLike(Box::new(
8638 crate::expressions::RegexpFunc {
8639 this,
8640 pattern,
8641 flags,
8642 },
8643 )))
8644 }
8645 }
8646 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
8647 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
8648 if matches!(target, DialectType::DuckDB) {
8649 Ok(Expression::Function(f))
8650 } else {
8651 let mut args = f.args;
8652 let this = args.remove(0);
8653 let pattern = args.remove(0);
8654 let flags = if args.is_empty() {
8655 None
8656 } else {
8657 Some(args.remove(0))
8658 };
8659 Ok(Expression::RegexpLike(Box::new(
8660 crate::expressions::RegexpFunc {
8661 this,
8662 pattern,
8663 flags,
8664 },
8665 )))
8666 }
8667 }
8668 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
8669 "STRUCT_EXTRACT" if f.args.len() == 2 => {
8670 let mut args = f.args;
8671 let this = args.remove(0);
8672 let field_expr = args.remove(0);
8673 // Extract string literal to get field name
8674 let field_name = match &field_expr {
8675 Expression::Literal(crate::expressions::Literal::String(s)) => {
8676 s.clone()
8677 }
8678 Expression::Identifier(id) => id.name.clone(),
8679 _ => {
8680 return Ok(Expression::Function(Box::new(Function::new(
8681 "STRUCT_EXTRACT".to_string(),
8682 vec![this, field_expr],
8683 ))))
8684 }
8685 };
8686 Ok(Expression::StructExtract(Box::new(
8687 crate::expressions::StructExtractFunc {
8688 this,
8689 field: crate::expressions::Identifier::new(field_name),
8690 },
8691 )))
8692 }
8693 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
8694 "LIST_FILTER" if f.args.len() == 2 => {
8695 let name = match target {
8696 DialectType::DuckDB => "LIST_FILTER",
8697 _ => "FILTER",
8698 };
8699 Ok(Expression::Function(Box::new(Function::new(
8700 name.to_string(),
8701 f.args,
8702 ))))
8703 }
8704 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
8705 "LIST_TRANSFORM" if f.args.len() == 2 => {
8706 let name = match target {
8707 DialectType::DuckDB => "LIST_TRANSFORM",
8708 _ => "TRANSFORM",
8709 };
8710 Ok(Expression::Function(Box::new(Function::new(
8711 name.to_string(),
8712 f.args,
8713 ))))
8714 }
8715 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
8716 "LIST_SORT" if f.args.len() >= 1 => {
8717 let name = match target {
8718 DialectType::DuckDB
8719 | DialectType::Presto
8720 | DialectType::Trino => "ARRAY_SORT",
8721 _ => "SORT_ARRAY",
8722 };
8723 Ok(Expression::Function(Box::new(Function::new(
8724 name.to_string(),
8725 f.args,
8726 ))))
8727 }
8728 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
8729 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
8730 match target {
8731 DialectType::DuckDB => Ok(Expression::Function(Box::new(
8732 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
8733 ))),
8734 DialectType::Spark
8735 | DialectType::Databricks
8736 | DialectType::Hive => {
8737 let mut args = f.args;
8738 args.push(Expression::Identifier(
8739 crate::expressions::Identifier::new("FALSE"),
8740 ));
8741 Ok(Expression::Function(Box::new(Function::new(
8742 "SORT_ARRAY".to_string(),
8743 args,
8744 ))))
8745 }
8746 DialectType::Presto
8747 | DialectType::Trino
8748 | DialectType::Athena => {
8749 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
8750 let arr = f.args.into_iter().next().unwrap();
8751 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8752 parameters: vec![
8753 crate::expressions::Identifier::new("a"),
8754 crate::expressions::Identifier::new("b"),
8755 ],
8756 body: Expression::Case(Box::new(Case {
8757 operand: None,
8758 whens: vec![
8759 (
8760 Expression::Lt(Box::new(BinaryOp::new(
8761 Expression::Identifier(crate::expressions::Identifier::new("a")),
8762 Expression::Identifier(crate::expressions::Identifier::new("b")),
8763 ))),
8764 Expression::number(1),
8765 ),
8766 (
8767 Expression::Gt(Box::new(BinaryOp::new(
8768 Expression::Identifier(crate::expressions::Identifier::new("a")),
8769 Expression::Identifier(crate::expressions::Identifier::new("b")),
8770 ))),
8771 Expression::Literal(Literal::Number("-1".to_string())),
8772 ),
8773 ],
8774 else_: Some(Expression::number(0)),
8775 comments: Vec::new(),
8776 inferred_type: None,
8777 })),
8778 colon: false,
8779 parameter_types: Vec::new(),
8780 }));
8781 Ok(Expression::Function(Box::new(Function::new(
8782 "ARRAY_SORT".to_string(),
8783 vec![arr, lambda],
8784 ))))
8785 }
8786 _ => Ok(Expression::Function(Box::new(Function::new(
8787 "LIST_REVERSE_SORT".to_string(),
8788 f.args,
8789 )))),
8790 }
8791 }
8792 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
8793 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
8794 let mut args = f.args;
8795 args.push(Expression::string(","));
8796 let name = match target {
8797 DialectType::DuckDB => "STR_SPLIT",
8798 DialectType::Presto | DialectType::Trino => "SPLIT",
8799 DialectType::Spark
8800 | DialectType::Databricks
8801 | DialectType::Hive => "SPLIT",
8802 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8803 DialectType::Redshift => "SPLIT_TO_ARRAY",
8804 _ => "SPLIT",
8805 };
8806 Ok(Expression::Function(Box::new(Function::new(
8807 name.to_string(),
8808 args,
8809 ))))
8810 }
8811 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
8812 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
8813 let name = match target {
8814 DialectType::DuckDB => "STR_SPLIT",
8815 DialectType::Presto | DialectType::Trino => "SPLIT",
8816 DialectType::Spark
8817 | DialectType::Databricks
8818 | DialectType::Hive => "SPLIT",
8819 DialectType::PostgreSQL => "STRING_TO_ARRAY",
8820 DialectType::Redshift => "SPLIT_TO_ARRAY",
8821 _ => "SPLIT",
8822 };
8823 Ok(Expression::Function(Box::new(Function::new(
8824 name.to_string(),
8825 f.args,
8826 ))))
8827 }
8828 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
8829 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
8830 let name = match target {
8831 DialectType::DuckDB => "STR_SPLIT",
8832 DialectType::Presto | DialectType::Trino => "SPLIT",
8833 DialectType::Spark
8834 | DialectType::Databricks
8835 | DialectType::Hive => "SPLIT",
8836 DialectType::Doris | DialectType::StarRocks => {
8837 "SPLIT_BY_STRING"
8838 }
8839 DialectType::PostgreSQL | DialectType::Redshift => {
8840 "STRING_TO_ARRAY"
8841 }
8842 _ => "SPLIT",
8843 };
8844 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
8845 if matches!(
8846 target,
8847 DialectType::Spark
8848 | DialectType::Databricks
8849 | DialectType::Hive
8850 ) {
8851 let mut args = f.args;
8852 let x = args.remove(0);
8853 let sep = args.remove(0);
8854 // Wrap separator in CONCAT('\\Q', sep, '\\E')
8855 let escaped_sep =
8856 Expression::Function(Box::new(Function::new(
8857 "CONCAT".to_string(),
8858 vec![
8859 Expression::string("\\Q"),
8860 sep,
8861 Expression::string("\\E"),
8862 ],
8863 )));
8864 Ok(Expression::Function(Box::new(Function::new(
8865 name.to_string(),
8866 vec![x, escaped_sep],
8867 ))))
8868 } else {
8869 Ok(Expression::Function(Box::new(Function::new(
8870 name.to_string(),
8871 f.args,
8872 ))))
8873 }
8874 }
8875 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
8876 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
8877 let name = match target {
8878 DialectType::DuckDB => "STR_SPLIT_REGEX",
8879 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
8880 DialectType::Spark
8881 | DialectType::Databricks
8882 | DialectType::Hive => "SPLIT",
8883 _ => "REGEXP_SPLIT",
8884 };
8885 Ok(Expression::Function(Box::new(Function::new(
8886 name.to_string(),
8887 f.args,
8888 ))))
8889 }
8890 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
8891 "SPLIT"
8892 if f.args.len() == 2
8893 && matches!(
8894 source,
8895 DialectType::Presto
8896 | DialectType::Trino
8897 | DialectType::Athena
8898 | DialectType::StarRocks
8899 | DialectType::Doris
8900 )
8901 && matches!(
8902 target,
8903 DialectType::Spark
8904 | DialectType::Databricks
8905 | DialectType::Hive
8906 ) =>
8907 {
8908 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
8909 let mut args = f.args;
8910 let x = args.remove(0);
8911 let sep = args.remove(0);
8912 let escaped_sep = Expression::Function(Box::new(Function::new(
8913 "CONCAT".to_string(),
8914 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
8915 )));
8916 Ok(Expression::Function(Box::new(Function::new(
8917 "SPLIT".to_string(),
8918 vec![x, escaped_sep],
8919 ))))
8920 }
8921 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
8922 // For ClickHouse target, preserve original name to maintain camelCase
8923 "SUBSTRINGINDEX" => {
8924 let name = if matches!(target, DialectType::ClickHouse) {
8925 f.name.clone()
8926 } else {
8927 "SUBSTRING_INDEX".to_string()
8928 };
8929 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
8930 }
8931 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
8932 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
8933 // Get the array argument (first arg, drop dimension args)
8934 let mut args = f.args;
8935 let arr = if args.is_empty() {
8936 return Ok(Expression::Function(Box::new(Function::new(
8937 name.to_string(),
8938 args,
8939 ))));
8940 } else {
8941 args.remove(0)
8942 };
8943 let name =
8944 match target {
8945 DialectType::Spark
8946 | DialectType::Databricks
8947 | DialectType::Hive => "SIZE",
8948 DialectType::Presto | DialectType::Trino => "CARDINALITY",
8949 DialectType::BigQuery => "ARRAY_LENGTH",
8950 DialectType::DuckDB => {
8951 // DuckDB: use ARRAY_LENGTH with all args
8952 let mut all_args = vec![arr];
8953 all_args.extend(args);
8954 return Ok(Expression::Function(Box::new(
8955 Function::new("ARRAY_LENGTH".to_string(), all_args),
8956 )));
8957 }
8958 DialectType::PostgreSQL | DialectType::Redshift => {
8959 // Keep ARRAY_LENGTH with dimension arg
8960 let mut all_args = vec![arr];
8961 all_args.extend(args);
8962 return Ok(Expression::Function(Box::new(
8963 Function::new("ARRAY_LENGTH".to_string(), all_args),
8964 )));
8965 }
8966 DialectType::ClickHouse => "LENGTH",
8967 _ => "ARRAY_LENGTH",
8968 };
8969 Ok(Expression::Function(Box::new(Function::new(
8970 name.to_string(),
8971 vec![arr],
8972 ))))
8973 }
8974 // UNICODE(x) -> target-specific codepoint function
8975 "UNICODE" if f.args.len() == 1 => {
8976 match target {
8977 DialectType::SQLite | DialectType::DuckDB => {
8978 Ok(Expression::Function(Box::new(Function::new(
8979 "UNICODE".to_string(),
8980 f.args,
8981 ))))
8982 }
8983 DialectType::Oracle => {
8984 // ASCII(UNISTR(x))
8985 let inner = Expression::Function(Box::new(Function::new(
8986 "UNISTR".to_string(),
8987 f.args,
8988 )));
8989 Ok(Expression::Function(Box::new(Function::new(
8990 "ASCII".to_string(),
8991 vec![inner],
8992 ))))
8993 }
8994 DialectType::MySQL => {
8995 // ORD(CONVERT(x USING utf32))
8996 let arg = f.args.into_iter().next().unwrap();
8997 let convert_expr = Expression::ConvertToCharset(Box::new(
8998 crate::expressions::ConvertToCharset {
8999 this: Box::new(arg),
9000 dest: Some(Box::new(Expression::Identifier(
9001 crate::expressions::Identifier::new("utf32"),
9002 ))),
9003 source: None,
9004 },
9005 ));
9006 Ok(Expression::Function(Box::new(Function::new(
9007 "ORD".to_string(),
9008 vec![convert_expr],
9009 ))))
9010 }
9011 _ => Ok(Expression::Function(Box::new(Function::new(
9012 "ASCII".to_string(),
9013 f.args,
9014 )))),
9015 }
9016 }
9017 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
9018 "XOR" if f.args.len() >= 2 => {
9019 match target {
9020 DialectType::ClickHouse => {
9021 // ClickHouse: keep as xor() function with lowercase name
9022 Ok(Expression::Function(Box::new(Function::new(
9023 "xor".to_string(),
9024 f.args,
9025 ))))
9026 }
9027 DialectType::Presto | DialectType::Trino => {
9028 if f.args.len() == 2 {
9029 Ok(Expression::Function(Box::new(Function::new(
9030 "BITWISE_XOR".to_string(),
9031 f.args,
9032 ))))
9033 } else {
9034 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
9035 let mut args = f.args;
9036 let first = args.remove(0);
9037 let second = args.remove(0);
9038 let mut result =
9039 Expression::Function(Box::new(Function::new(
9040 "BITWISE_XOR".to_string(),
9041 vec![first, second],
9042 )));
9043 for arg in args {
9044 result =
9045 Expression::Function(Box::new(Function::new(
9046 "BITWISE_XOR".to_string(),
9047 vec![result, arg],
9048 )));
9049 }
9050 Ok(result)
9051 }
9052 }
9053 DialectType::MySQL
9054 | DialectType::SingleStore
9055 | DialectType::Doris
9056 | DialectType::StarRocks => {
9057 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
9058 let args = f.args;
9059 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
9060 this: None,
9061 expression: None,
9062 expressions: args,
9063 })))
9064 }
9065 DialectType::PostgreSQL | DialectType::Redshift => {
9066 // PostgreSQL: a # b (hash operator for XOR)
9067 let mut args = f.args;
9068 let first = args.remove(0);
9069 let second = args.remove(0);
9070 let mut result = Expression::BitwiseXor(Box::new(
9071 BinaryOp::new(first, second),
9072 ));
9073 for arg in args {
9074 result = Expression::BitwiseXor(Box::new(
9075 BinaryOp::new(result, arg),
9076 ));
9077 }
9078 Ok(result)
9079 }
9080 DialectType::DuckDB => {
9081 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
9082 Ok(Expression::Function(Box::new(Function::new(
9083 "XOR".to_string(),
9084 f.args,
9085 ))))
9086 }
9087 DialectType::BigQuery => {
9088 // BigQuery: a ^ b (caret operator for XOR)
9089 let mut args = f.args;
9090 let first = args.remove(0);
9091 let second = args.remove(0);
9092 let mut result = Expression::BitwiseXor(Box::new(
9093 BinaryOp::new(first, second),
9094 ));
9095 for arg in args {
9096 result = Expression::BitwiseXor(Box::new(
9097 BinaryOp::new(result, arg),
9098 ));
9099 }
9100 Ok(result)
9101 }
9102 _ => Ok(Expression::Function(Box::new(Function::new(
9103 "XOR".to_string(),
9104 f.args,
9105 )))),
9106 }
9107 }
9108 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9109 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
9110 match target {
9111 DialectType::Spark
9112 | DialectType::Databricks
9113 | DialectType::Hive => {
9114 let mut args = f.args;
9115 args.push(Expression::Identifier(
9116 crate::expressions::Identifier::new("FALSE"),
9117 ));
9118 Ok(Expression::Function(Box::new(Function::new(
9119 "SORT_ARRAY".to_string(),
9120 args,
9121 ))))
9122 }
9123 DialectType::Presto
9124 | DialectType::Trino
9125 | DialectType::Athena => {
9126 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9127 let arr = f.args.into_iter().next().unwrap();
9128 let lambda = Expression::Lambda(Box::new(
9129 crate::expressions::LambdaExpr {
9130 parameters: vec![
9131 Identifier::new("a"),
9132 Identifier::new("b"),
9133 ],
9134 colon: false,
9135 parameter_types: Vec::new(),
9136 body: Expression::Case(Box::new(Case {
9137 operand: None,
9138 whens: vec![
9139 (
9140 Expression::Lt(Box::new(
9141 BinaryOp::new(
9142 Expression::Identifier(
9143 Identifier::new("a"),
9144 ),
9145 Expression::Identifier(
9146 Identifier::new("b"),
9147 ),
9148 ),
9149 )),
9150 Expression::number(1),
9151 ),
9152 (
9153 Expression::Gt(Box::new(
9154 BinaryOp::new(
9155 Expression::Identifier(
9156 Identifier::new("a"),
9157 ),
9158 Expression::Identifier(
9159 Identifier::new("b"),
9160 ),
9161 ),
9162 )),
9163 Expression::Neg(Box::new(
9164 crate::expressions::UnaryOp {
9165 this: Expression::number(1),
9166 inferred_type: None,
9167 },
9168 )),
9169 ),
9170 ],
9171 else_: Some(Expression::number(0)),
9172 comments: Vec::new(),
9173 inferred_type: None,
9174 })),
9175 },
9176 ));
9177 Ok(Expression::Function(Box::new(Function::new(
9178 "ARRAY_SORT".to_string(),
9179 vec![arr, lambda],
9180 ))))
9181 }
9182 _ => Ok(Expression::Function(Box::new(Function::new(
9183 "ARRAY_REVERSE_SORT".to_string(),
9184 f.args,
9185 )))),
9186 }
9187 }
9188 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
9189 "ENCODE" if f.args.len() == 1 => match target {
9190 DialectType::Spark
9191 | DialectType::Databricks
9192 | DialectType::Hive => {
9193 let mut args = f.args;
9194 args.push(Expression::string("utf-8"));
9195 Ok(Expression::Function(Box::new(Function::new(
9196 "ENCODE".to_string(),
9197 args,
9198 ))))
9199 }
9200 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9201 Ok(Expression::Function(Box::new(Function::new(
9202 "TO_UTF8".to_string(),
9203 f.args,
9204 ))))
9205 }
9206 _ => Ok(Expression::Function(Box::new(Function::new(
9207 "ENCODE".to_string(),
9208 f.args,
9209 )))),
9210 },
9211 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
9212 "DECODE" if f.args.len() == 1 => match target {
9213 DialectType::Spark
9214 | DialectType::Databricks
9215 | DialectType::Hive => {
9216 let mut args = f.args;
9217 args.push(Expression::string("utf-8"));
9218 Ok(Expression::Function(Box::new(Function::new(
9219 "DECODE".to_string(),
9220 args,
9221 ))))
9222 }
9223 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9224 Ok(Expression::Function(Box::new(Function::new(
9225 "FROM_UTF8".to_string(),
9226 f.args,
9227 ))))
9228 }
9229 _ => Ok(Expression::Function(Box::new(Function::new(
9230 "DECODE".to_string(),
9231 f.args,
9232 )))),
9233 },
9234 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
9235 "QUANTILE" if f.args.len() == 2 => {
9236 let name = match target {
9237 DialectType::Spark
9238 | DialectType::Databricks
9239 | DialectType::Hive => "PERCENTILE",
9240 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
9241 DialectType::BigQuery => "PERCENTILE_CONT",
9242 _ => "QUANTILE",
9243 };
9244 Ok(Expression::Function(Box::new(Function::new(
9245 name.to_string(),
9246 f.args,
9247 ))))
9248 }
9249 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9250 "QUANTILE_CONT" if f.args.len() == 2 => {
9251 let mut args = f.args;
9252 let column = args.remove(0);
9253 let quantile = args.remove(0);
9254 match target {
9255 DialectType::DuckDB => {
9256 Ok(Expression::Function(Box::new(Function::new(
9257 "QUANTILE_CONT".to_string(),
9258 vec![column, quantile],
9259 ))))
9260 }
9261 DialectType::PostgreSQL
9262 | DialectType::Redshift
9263 | DialectType::Snowflake => {
9264 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
9265 let inner = Expression::PercentileCont(Box::new(
9266 crate::expressions::PercentileFunc {
9267 this: column.clone(),
9268 percentile: quantile,
9269 order_by: None,
9270 filter: None,
9271 },
9272 ));
9273 Ok(Expression::WithinGroup(Box::new(
9274 crate::expressions::WithinGroup {
9275 this: inner,
9276 order_by: vec![crate::expressions::Ordered {
9277 this: column,
9278 desc: false,
9279 nulls_first: None,
9280 explicit_asc: false,
9281 with_fill: None,
9282 }],
9283 },
9284 )))
9285 }
9286 _ => Ok(Expression::Function(Box::new(Function::new(
9287 "QUANTILE_CONT".to_string(),
9288 vec![column, quantile],
9289 )))),
9290 }
9291 }
9292 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
9293 "QUANTILE_DISC" if f.args.len() == 2 => {
9294 let mut args = f.args;
9295 let column = args.remove(0);
9296 let quantile = args.remove(0);
9297 match target {
9298 DialectType::DuckDB => {
9299 Ok(Expression::Function(Box::new(Function::new(
9300 "QUANTILE_DISC".to_string(),
9301 vec![column, quantile],
9302 ))))
9303 }
9304 DialectType::PostgreSQL
9305 | DialectType::Redshift
9306 | DialectType::Snowflake => {
9307 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
9308 let inner = Expression::PercentileDisc(Box::new(
9309 crate::expressions::PercentileFunc {
9310 this: column.clone(),
9311 percentile: quantile,
9312 order_by: None,
9313 filter: None,
9314 },
9315 ));
9316 Ok(Expression::WithinGroup(Box::new(
9317 crate::expressions::WithinGroup {
9318 this: inner,
9319 order_by: vec![crate::expressions::Ordered {
9320 this: column,
9321 desc: false,
9322 nulls_first: None,
9323 explicit_asc: false,
9324 with_fill: None,
9325 }],
9326 },
9327 )))
9328 }
9329 _ => Ok(Expression::Function(Box::new(Function::new(
9330 "QUANTILE_DISC".to_string(),
9331 vec![column, quantile],
9332 )))),
9333 }
9334 }
9335 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
9336 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
9337 let name = match target {
9338 DialectType::Presto
9339 | DialectType::Trino
9340 | DialectType::Athena => "APPROX_PERCENTILE",
9341 DialectType::Spark
9342 | DialectType::Databricks
9343 | DialectType::Hive => "PERCENTILE_APPROX",
9344 DialectType::DuckDB => "APPROX_QUANTILE",
9345 DialectType::PostgreSQL | DialectType::Redshift => {
9346 "PERCENTILE_CONT"
9347 }
9348 _ => &f.name,
9349 };
9350 Ok(Expression::Function(Box::new(Function::new(
9351 name.to_string(),
9352 f.args,
9353 ))))
9354 }
9355 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
9356 "EPOCH" if f.args.len() == 1 => {
9357 let name = match target {
9358 DialectType::Spark
9359 | DialectType::Databricks
9360 | DialectType::Hive => "UNIX_TIMESTAMP",
9361 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
9362 _ => "EPOCH",
9363 };
9364 Ok(Expression::Function(Box::new(Function::new(
9365 name.to_string(),
9366 f.args,
9367 ))))
9368 }
9369 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
9370 "EPOCH_MS" if f.args.len() == 1 => {
9371 match target {
9372 DialectType::Spark | DialectType::Databricks => {
9373 Ok(Expression::Function(Box::new(Function::new(
9374 "TIMESTAMP_MILLIS".to_string(),
9375 f.args,
9376 ))))
9377 }
9378 DialectType::Hive => {
9379 // Hive: FROM_UNIXTIME(x / 1000)
9380 let arg = f.args.into_iter().next().unwrap();
9381 let div_expr = Expression::Div(Box::new(
9382 crate::expressions::BinaryOp::new(
9383 arg,
9384 Expression::number(1000),
9385 ),
9386 ));
9387 Ok(Expression::Function(Box::new(Function::new(
9388 "FROM_UNIXTIME".to_string(),
9389 vec![div_expr],
9390 ))))
9391 }
9392 DialectType::Presto | DialectType::Trino => {
9393 Ok(Expression::Function(Box::new(Function::new(
9394 "FROM_UNIXTIME".to_string(),
9395 vec![Expression::Div(Box::new(
9396 crate::expressions::BinaryOp::new(
9397 f.args.into_iter().next().unwrap(),
9398 Expression::number(1000),
9399 ),
9400 ))],
9401 ))))
9402 }
9403 _ => Ok(Expression::Function(Box::new(Function::new(
9404 "EPOCH_MS".to_string(),
9405 f.args,
9406 )))),
9407 }
9408 }
9409 // HASHBYTES('algorithm', x) -> target-specific hash function
9410 "HASHBYTES" if f.args.len() == 2 => {
9411 // Keep HASHBYTES as-is for TSQL target
9412 if matches!(target, DialectType::TSQL) {
9413 return Ok(Expression::Function(f));
9414 }
9415 let algo_expr = &f.args[0];
9416 let algo = match algo_expr {
9417 Expression::Literal(crate::expressions::Literal::String(s)) => {
9418 s.to_uppercase()
9419 }
9420 _ => return Ok(Expression::Function(f)),
9421 };
9422 let data_arg = f.args.into_iter().nth(1).unwrap();
9423 match algo.as_str() {
9424 "SHA1" => {
9425 let name = match target {
9426 DialectType::Spark | DialectType::Databricks => "SHA",
9427 DialectType::Hive => "SHA1",
9428 _ => "SHA1",
9429 };
9430 Ok(Expression::Function(Box::new(Function::new(
9431 name.to_string(),
9432 vec![data_arg],
9433 ))))
9434 }
9435 "SHA2_256" => {
9436 Ok(Expression::Function(Box::new(Function::new(
9437 "SHA2".to_string(),
9438 vec![data_arg, Expression::number(256)],
9439 ))))
9440 }
9441 "SHA2_512" => {
9442 Ok(Expression::Function(Box::new(Function::new(
9443 "SHA2".to_string(),
9444 vec![data_arg, Expression::number(512)],
9445 ))))
9446 }
9447 "MD5" => Ok(Expression::Function(Box::new(Function::new(
9448 "MD5".to_string(),
9449 vec![data_arg],
9450 )))),
9451 _ => Ok(Expression::Function(Box::new(Function::new(
9452 "HASHBYTES".to_string(),
9453 vec![Expression::string(&algo), data_arg],
9454 )))),
9455 }
9456 }
9457 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
9458 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
9459 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
9460 let mut args = f.args;
9461 let json_expr = args.remove(0);
9462 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
9463 let mut json_path = "$".to_string();
9464 for a in &args {
9465 match a {
9466 Expression::Literal(
9467 crate::expressions::Literal::String(s),
9468 ) => {
9469 // Numeric string keys become array indices: [0]
9470 if s.chars().all(|c| c.is_ascii_digit()) {
9471 json_path.push('[');
9472 json_path.push_str(s);
9473 json_path.push(']');
9474 } else {
9475 json_path.push('.');
9476 json_path.push_str(s);
9477 }
9478 }
9479 _ => {
9480 json_path.push_str(".?");
9481 }
9482 }
9483 }
9484 match target {
9485 DialectType::Spark
9486 | DialectType::Databricks
9487 | DialectType::Hive => {
9488 Ok(Expression::Function(Box::new(Function::new(
9489 "GET_JSON_OBJECT".to_string(),
9490 vec![json_expr, Expression::string(&json_path)],
9491 ))))
9492 }
9493 DialectType::Presto | DialectType::Trino => {
9494 let func_name = if is_text {
9495 "JSON_EXTRACT_SCALAR"
9496 } else {
9497 "JSON_EXTRACT"
9498 };
9499 Ok(Expression::Function(Box::new(Function::new(
9500 func_name.to_string(),
9501 vec![json_expr, Expression::string(&json_path)],
9502 ))))
9503 }
9504 DialectType::BigQuery | DialectType::MySQL => {
9505 let func_name = if is_text {
9506 "JSON_EXTRACT_SCALAR"
9507 } else {
9508 "JSON_EXTRACT"
9509 };
9510 Ok(Expression::Function(Box::new(Function::new(
9511 func_name.to_string(),
9512 vec![json_expr, Expression::string(&json_path)],
9513 ))))
9514 }
9515 DialectType::PostgreSQL | DialectType::Materialize => {
9516 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
9517 let func_name = if is_text {
9518 "JSON_EXTRACT_PATH_TEXT"
9519 } else {
9520 "JSON_EXTRACT_PATH"
9521 };
9522 let mut new_args = vec![json_expr];
9523 new_args.extend(args);
9524 Ok(Expression::Function(Box::new(Function::new(
9525 func_name.to_string(),
9526 new_args,
9527 ))))
9528 }
9529 DialectType::DuckDB | DialectType::SQLite => {
9530 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
9531 if is_text {
9532 Ok(Expression::JsonExtractScalar(Box::new(
9533 crate::expressions::JsonExtractFunc {
9534 this: json_expr,
9535 path: Expression::string(&json_path),
9536 returning: None,
9537 arrow_syntax: true,
9538 hash_arrow_syntax: false,
9539 wrapper_option: None,
9540 quotes_option: None,
9541 on_scalar_string: false,
9542 on_error: None,
9543 },
9544 )))
9545 } else {
9546 Ok(Expression::JsonExtract(Box::new(
9547 crate::expressions::JsonExtractFunc {
9548 this: json_expr,
9549 path: Expression::string(&json_path),
9550 returning: None,
9551 arrow_syntax: true,
9552 hash_arrow_syntax: false,
9553 wrapper_option: None,
9554 quotes_option: None,
9555 on_scalar_string: false,
9556 on_error: None,
9557 },
9558 )))
9559 }
9560 }
9561 DialectType::Redshift => {
9562 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
9563 let mut new_args = vec![json_expr];
9564 new_args.extend(args);
9565 Ok(Expression::Function(Box::new(Function::new(
9566 "JSON_EXTRACT_PATH_TEXT".to_string(),
9567 new_args,
9568 ))))
9569 }
9570 DialectType::TSQL => {
9571 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
9572 let jq = Expression::Function(Box::new(Function::new(
9573 "JSON_QUERY".to_string(),
9574 vec![json_expr.clone(), Expression::string(&json_path)],
9575 )));
9576 let jv = Expression::Function(Box::new(Function::new(
9577 "JSON_VALUE".to_string(),
9578 vec![json_expr, Expression::string(&json_path)],
9579 )));
9580 Ok(Expression::Function(Box::new(Function::new(
9581 "ISNULL".to_string(),
9582 vec![jq, jv],
9583 ))))
9584 }
9585 DialectType::ClickHouse => {
9586 let func_name = if is_text {
9587 "JSONExtractString"
9588 } else {
9589 "JSONExtractRaw"
9590 };
9591 let mut new_args = vec![json_expr];
9592 new_args.extend(args);
9593 Ok(Expression::Function(Box::new(Function::new(
9594 func_name.to_string(),
9595 new_args,
9596 ))))
9597 }
9598 _ => {
9599 let func_name = if is_text {
9600 "JSON_EXTRACT_SCALAR"
9601 } else {
9602 "JSON_EXTRACT"
9603 };
9604 Ok(Expression::Function(Box::new(Function::new(
9605 func_name.to_string(),
9606 vec![json_expr, Expression::string(&json_path)],
9607 ))))
9608 }
9609 }
9610 }
9611 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
9612 "APPROX_DISTINCT" if f.args.len() >= 1 => {
9613 let name = match target {
9614 DialectType::Spark
9615 | DialectType::Databricks
9616 | DialectType::Hive
9617 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
9618 _ => "APPROX_DISTINCT",
9619 };
9620 let mut args = f.args;
9621 // Hive doesn't support the accuracy parameter
9622 if name == "APPROX_COUNT_DISTINCT"
9623 && matches!(target, DialectType::Hive)
9624 {
9625 args.truncate(1);
9626 }
9627 Ok(Expression::Function(Box::new(Function::new(
9628 name.to_string(),
9629 args,
9630 ))))
9631 }
9632 // REGEXP_EXTRACT(x, pattern) - normalize default group index
9633 "REGEXP_EXTRACT" if f.args.len() == 2 => {
9634 // Determine source default group index
9635 let source_default = match source {
9636 DialectType::Presto
9637 | DialectType::Trino
9638 | DialectType::DuckDB => 0,
9639 _ => 1, // Hive/Spark/Databricks default = 1
9640 };
9641 // Determine target default group index
9642 let target_default = match target {
9643 DialectType::Presto
9644 | DialectType::Trino
9645 | DialectType::DuckDB
9646 | DialectType::BigQuery => 0,
9647 DialectType::Snowflake => {
9648 // Snowflake uses REGEXP_SUBSTR
9649 return Ok(Expression::Function(Box::new(Function::new(
9650 "REGEXP_SUBSTR".to_string(),
9651 f.args,
9652 ))));
9653 }
9654 _ => 1, // Hive/Spark/Databricks default = 1
9655 };
9656 if source_default != target_default {
9657 let mut args = f.args;
9658 args.push(Expression::number(source_default));
9659 Ok(Expression::Function(Box::new(Function::new(
9660 "REGEXP_EXTRACT".to_string(),
9661 args,
9662 ))))
9663 } else {
9664 Ok(Expression::Function(Box::new(Function::new(
9665 "REGEXP_EXTRACT".to_string(),
9666 f.args,
9667 ))))
9668 }
9669 }
9670 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
9671 "RLIKE" if f.args.len() == 2 => {
9672 let mut args = f.args;
9673 let str_expr = args.remove(0);
9674 let pattern = args.remove(0);
9675 match target {
9676 DialectType::DuckDB => {
9677 // REGEXP_MATCHES(str, pattern)
9678 Ok(Expression::Function(Box::new(Function::new(
9679 "REGEXP_MATCHES".to_string(),
9680 vec![str_expr, pattern],
9681 ))))
9682 }
9683 _ => {
9684 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
9685 Ok(Expression::RegexpLike(Box::new(
9686 crate::expressions::RegexpFunc {
9687 this: str_expr,
9688 pattern,
9689 flags: None,
9690 },
9691 )))
9692 }
9693 }
9694 }
9695 // EOMONTH(date[, month_offset]) -> target-specific
9696 "EOMONTH" if f.args.len() >= 1 => {
9697 let mut args = f.args;
9698 let date_arg = args.remove(0);
9699 let month_offset = if !args.is_empty() {
9700 Some(args.remove(0))
9701 } else {
9702 None
9703 };
9704
9705 // Helper: wrap date in CAST to DATE
9706 let cast_to_date = |e: Expression| -> Expression {
9707 Expression::Cast(Box::new(Cast {
9708 this: e,
9709 to: DataType::Date,
9710 trailing_comments: vec![],
9711 double_colon_syntax: false,
9712 format: None,
9713 default: None,
9714 inferred_type: None,
9715 }))
9716 };
9717
9718 match target {
9719 DialectType::TSQL | DialectType::Fabric => {
9720 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
9721 let date = cast_to_date(date_arg);
9722 let date = if let Some(offset) = month_offset {
9723 Expression::Function(Box::new(Function::new(
9724 "DATEADD".to_string(),
9725 vec![
9726 Expression::Identifier(Identifier::new(
9727 "MONTH",
9728 )),
9729 offset,
9730 date,
9731 ],
9732 )))
9733 } else {
9734 date
9735 };
9736 Ok(Expression::Function(Box::new(Function::new(
9737 "EOMONTH".to_string(),
9738 vec![date],
9739 ))))
9740 }
9741 DialectType::Presto
9742 | DialectType::Trino
9743 | DialectType::Athena => {
9744 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
9745 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
9746 let cast_ts = Expression::Cast(Box::new(Cast {
9747 this: date_arg,
9748 to: DataType::Timestamp {
9749 timezone: false,
9750 precision: None,
9751 },
9752 trailing_comments: vec![],
9753 double_colon_syntax: false,
9754 format: None,
9755 default: None,
9756 inferred_type: None,
9757 }));
9758 let date = cast_to_date(cast_ts);
9759 let date = if let Some(offset) = month_offset {
9760 Expression::Function(Box::new(Function::new(
9761 "DATE_ADD".to_string(),
9762 vec![Expression::string("MONTH"), offset, date],
9763 )))
9764 } else {
9765 date
9766 };
9767 Ok(Expression::Function(Box::new(Function::new(
9768 "LAST_DAY_OF_MONTH".to_string(),
9769 vec![date],
9770 ))))
9771 }
9772 DialectType::PostgreSQL => {
9773 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9774 let date = cast_to_date(date_arg);
9775 let date = if let Some(offset) = month_offset {
9776 let interval_str = format!(
9777 "{} MONTH",
9778 Self::expr_to_string_static(&offset)
9779 );
9780 Expression::Add(Box::new(
9781 crate::expressions::BinaryOp::new(
9782 date,
9783 Expression::Interval(Box::new(
9784 crate::expressions::Interval {
9785 this: Some(Expression::string(
9786 &interval_str,
9787 )),
9788 unit: None,
9789 },
9790 )),
9791 ),
9792 ))
9793 } else {
9794 date
9795 };
9796 let truncated =
9797 Expression::Function(Box::new(Function::new(
9798 "DATE_TRUNC".to_string(),
9799 vec![Expression::string("MONTH"), date],
9800 )));
9801 let plus_month = Expression::Add(Box::new(
9802 crate::expressions::BinaryOp::new(
9803 truncated,
9804 Expression::Interval(Box::new(
9805 crate::expressions::Interval {
9806 this: Some(Expression::string("1 MONTH")),
9807 unit: None,
9808 },
9809 )),
9810 ),
9811 ));
9812 let minus_day = Expression::Sub(Box::new(
9813 crate::expressions::BinaryOp::new(
9814 plus_month,
9815 Expression::Interval(Box::new(
9816 crate::expressions::Interval {
9817 this: Some(Expression::string("1 DAY")),
9818 unit: None,
9819 },
9820 )),
9821 ),
9822 ));
9823 Ok(Expression::Cast(Box::new(Cast {
9824 this: minus_day,
9825 to: DataType::Date,
9826 trailing_comments: vec![],
9827 double_colon_syntax: false,
9828 format: None,
9829 default: None,
9830 inferred_type: None,
9831 })))
9832 }
9833 DialectType::DuckDB => {
9834 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
9835 let date = cast_to_date(date_arg);
9836 let date = if let Some(offset) = month_offset {
9837 // Wrap negative numbers in parentheses for DuckDB INTERVAL
9838 let interval_val =
9839 if matches!(&offset, Expression::Neg(_)) {
9840 Expression::Paren(Box::new(
9841 crate::expressions::Paren {
9842 this: offset,
9843 trailing_comments: Vec::new(),
9844 },
9845 ))
9846 } else {
9847 offset
9848 };
9849 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9850 date,
9851 Expression::Interval(Box::new(crate::expressions::Interval {
9852 this: Some(interval_val),
9853 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9854 unit: crate::expressions::IntervalUnit::Month,
9855 use_plural: false,
9856 }),
9857 })),
9858 )))
9859 } else {
9860 date
9861 };
9862 Ok(Expression::Function(Box::new(Function::new(
9863 "LAST_DAY".to_string(),
9864 vec![date],
9865 ))))
9866 }
9867 DialectType::Snowflake | DialectType::Redshift => {
9868 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
9869 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
9870 let date = if matches!(target, DialectType::Snowflake) {
9871 Expression::Function(Box::new(Function::new(
9872 "TO_DATE".to_string(),
9873 vec![date_arg],
9874 )))
9875 } else {
9876 cast_to_date(date_arg)
9877 };
9878 let date = if let Some(offset) = month_offset {
9879 Expression::Function(Box::new(Function::new(
9880 "DATEADD".to_string(),
9881 vec![
9882 Expression::Identifier(Identifier::new(
9883 "MONTH",
9884 )),
9885 offset,
9886 date,
9887 ],
9888 )))
9889 } else {
9890 date
9891 };
9892 Ok(Expression::Function(Box::new(Function::new(
9893 "LAST_DAY".to_string(),
9894 vec![date],
9895 ))))
9896 }
9897 DialectType::Spark | DialectType::Databricks => {
9898 // Spark: LAST_DAY(TO_DATE(date))
9899 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
9900 let date = Expression::Function(Box::new(Function::new(
9901 "TO_DATE".to_string(),
9902 vec![date_arg],
9903 )));
9904 let date = if let Some(offset) = month_offset {
9905 Expression::Function(Box::new(Function::new(
9906 "ADD_MONTHS".to_string(),
9907 vec![date, offset],
9908 )))
9909 } else {
9910 date
9911 };
9912 Ok(Expression::Function(Box::new(Function::new(
9913 "LAST_DAY".to_string(),
9914 vec![date],
9915 ))))
9916 }
9917 DialectType::MySQL => {
9918 // MySQL: LAST_DAY(DATE(date)) - no offset
9919 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
9920 let date = if let Some(offset) = month_offset {
9921 let iu = crate::expressions::IntervalUnit::Month;
9922 Expression::DateAdd(Box::new(
9923 crate::expressions::DateAddFunc {
9924 this: date_arg,
9925 interval: offset,
9926 unit: iu,
9927 },
9928 ))
9929 } else {
9930 Expression::Function(Box::new(Function::new(
9931 "DATE".to_string(),
9932 vec![date_arg],
9933 )))
9934 };
9935 Ok(Expression::Function(Box::new(Function::new(
9936 "LAST_DAY".to_string(),
9937 vec![date],
9938 ))))
9939 }
9940 DialectType::BigQuery => {
9941 // BigQuery: LAST_DAY(CAST(date AS DATE))
9942 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
9943 let date = cast_to_date(date_arg);
9944 let date = if let Some(offset) = month_offset {
9945 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
9946 this: Some(offset),
9947 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
9948 unit: crate::expressions::IntervalUnit::Month,
9949 use_plural: false,
9950 }),
9951 }));
9952 Expression::Function(Box::new(Function::new(
9953 "DATE_ADD".to_string(),
9954 vec![date, interval],
9955 )))
9956 } else {
9957 date
9958 };
9959 Ok(Expression::Function(Box::new(Function::new(
9960 "LAST_DAY".to_string(),
9961 vec![date],
9962 ))))
9963 }
9964 DialectType::ClickHouse => {
9965 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
9966 let date = Expression::Cast(Box::new(Cast {
9967 this: date_arg,
9968 to: DataType::Nullable {
9969 inner: Box::new(DataType::Date),
9970 },
9971 trailing_comments: vec![],
9972 double_colon_syntax: false,
9973 format: None,
9974 default: None,
9975 inferred_type: None,
9976 }));
9977 let date = if let Some(offset) = month_offset {
9978 Expression::Function(Box::new(Function::new(
9979 "DATE_ADD".to_string(),
9980 vec![
9981 Expression::Identifier(Identifier::new(
9982 "MONTH",
9983 )),
9984 offset,
9985 date,
9986 ],
9987 )))
9988 } else {
9989 date
9990 };
9991 Ok(Expression::Function(Box::new(Function::new(
9992 "LAST_DAY".to_string(),
9993 vec![date],
9994 ))))
9995 }
9996 DialectType::Hive => {
9997 // Hive: LAST_DAY(date)
9998 let date = if let Some(offset) = month_offset {
9999 Expression::Function(Box::new(Function::new(
10000 "ADD_MONTHS".to_string(),
10001 vec![date_arg, offset],
10002 )))
10003 } else {
10004 date_arg
10005 };
10006 Ok(Expression::Function(Box::new(Function::new(
10007 "LAST_DAY".to_string(),
10008 vec![date],
10009 ))))
10010 }
10011 _ => {
10012 // Default: LAST_DAY(date)
10013 let date = if let Some(offset) = month_offset {
10014 let unit =
10015 Expression::Identifier(Identifier::new("MONTH"));
10016 Expression::Function(Box::new(Function::new(
10017 "DATEADD".to_string(),
10018 vec![unit, offset, date_arg],
10019 )))
10020 } else {
10021 date_arg
10022 };
10023 Ok(Expression::Function(Box::new(Function::new(
10024 "LAST_DAY".to_string(),
10025 vec![date],
10026 ))))
10027 }
10028 }
10029 }
10030 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
10031 "LAST_DAY" | "LAST_DAY_OF_MONTH"
10032 if !matches!(source, DialectType::BigQuery)
10033 && f.args.len() >= 1 =>
10034 {
10035 let first_arg = f.args.into_iter().next().unwrap();
10036 match target {
10037 DialectType::TSQL | DialectType::Fabric => {
10038 Ok(Expression::Function(Box::new(Function::new(
10039 "EOMONTH".to_string(),
10040 vec![first_arg],
10041 ))))
10042 }
10043 DialectType::Presto
10044 | DialectType::Trino
10045 | DialectType::Athena => {
10046 Ok(Expression::Function(Box::new(Function::new(
10047 "LAST_DAY_OF_MONTH".to_string(),
10048 vec![first_arg],
10049 ))))
10050 }
10051 _ => Ok(Expression::Function(Box::new(Function::new(
10052 "LAST_DAY".to_string(),
10053 vec![first_arg],
10054 )))),
10055 }
10056 }
10057 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
10058 "MAP"
10059 if f.args.len() == 2
10060 && matches!(
10061 source,
10062 DialectType::Presto
10063 | DialectType::Trino
10064 | DialectType::Athena
10065 ) =>
10066 {
10067 let keys_arg = f.args[0].clone();
10068 let vals_arg = f.args[1].clone();
10069
10070 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
10071 fn extract_array_elements(
10072 expr: &Expression,
10073 ) -> Option<&Vec<Expression>> {
10074 match expr {
10075 Expression::Array(arr) => Some(&arr.expressions),
10076 Expression::ArrayFunc(arr) => Some(&arr.expressions),
10077 Expression::Function(f)
10078 if f.name.eq_ignore_ascii_case("ARRAY") =>
10079 {
10080 Some(&f.args)
10081 }
10082 _ => None,
10083 }
10084 }
10085
10086 match target {
10087 DialectType::Spark | DialectType::Databricks => {
10088 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
10089 Ok(Expression::Function(Box::new(Function::new(
10090 "MAP_FROM_ARRAYS".to_string(),
10091 f.args,
10092 ))))
10093 }
10094 DialectType::Hive => {
10095 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
10096 if let (Some(keys), Some(vals)) = (
10097 extract_array_elements(&keys_arg),
10098 extract_array_elements(&vals_arg),
10099 ) {
10100 if keys.len() == vals.len() {
10101 let mut interleaved = Vec::new();
10102 for (k, v) in keys.iter().zip(vals.iter()) {
10103 interleaved.push(k.clone());
10104 interleaved.push(v.clone());
10105 }
10106 Ok(Expression::Function(Box::new(Function::new(
10107 "MAP".to_string(),
10108 interleaved,
10109 ))))
10110 } else {
10111 Ok(Expression::Function(Box::new(Function::new(
10112 "MAP".to_string(),
10113 f.args,
10114 ))))
10115 }
10116 } else {
10117 Ok(Expression::Function(Box::new(Function::new(
10118 "MAP".to_string(),
10119 f.args,
10120 ))))
10121 }
10122 }
10123 DialectType::Snowflake => {
10124 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
10125 if let (Some(keys), Some(vals)) = (
10126 extract_array_elements(&keys_arg),
10127 extract_array_elements(&vals_arg),
10128 ) {
10129 if keys.len() == vals.len() {
10130 let mut interleaved = Vec::new();
10131 for (k, v) in keys.iter().zip(vals.iter()) {
10132 interleaved.push(k.clone());
10133 interleaved.push(v.clone());
10134 }
10135 Ok(Expression::Function(Box::new(Function::new(
10136 "OBJECT_CONSTRUCT".to_string(),
10137 interleaved,
10138 ))))
10139 } else {
10140 Ok(Expression::Function(Box::new(Function::new(
10141 "MAP".to_string(),
10142 f.args,
10143 ))))
10144 }
10145 } else {
10146 Ok(Expression::Function(Box::new(Function::new(
10147 "MAP".to_string(),
10148 f.args,
10149 ))))
10150 }
10151 }
10152 _ => Ok(Expression::Function(f)),
10153 }
10154 }
10155 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
10156 "MAP"
10157 if f.args.is_empty()
10158 && matches!(
10159 source,
10160 DialectType::Hive
10161 | DialectType::Spark
10162 | DialectType::Databricks
10163 )
10164 && matches!(
10165 target,
10166 DialectType::Presto
10167 | DialectType::Trino
10168 | DialectType::Athena
10169 ) =>
10170 {
10171 let empty_keys =
10172 Expression::Array(Box::new(crate::expressions::Array {
10173 expressions: vec![],
10174 }));
10175 let empty_vals =
10176 Expression::Array(Box::new(crate::expressions::Array {
10177 expressions: vec![],
10178 }));
10179 Ok(Expression::Function(Box::new(Function::new(
10180 "MAP".to_string(),
10181 vec![empty_keys, empty_vals],
10182 ))))
10183 }
10184 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
10185 "MAP"
10186 if f.args.len() >= 2
10187 && f.args.len() % 2 == 0
10188 && matches!(
10189 source,
10190 DialectType::Hive
10191 | DialectType::Spark
10192 | DialectType::Databricks
10193 | DialectType::ClickHouse
10194 ) =>
10195 {
10196 let args = f.args;
10197 match target {
10198 DialectType::DuckDB => {
10199 // MAP([k1, k2], [v1, v2])
10200 let mut keys = Vec::new();
10201 let mut vals = Vec::new();
10202 for (i, arg) in args.into_iter().enumerate() {
10203 if i % 2 == 0 {
10204 keys.push(arg);
10205 } else {
10206 vals.push(arg);
10207 }
10208 }
10209 let keys_arr = Expression::Array(Box::new(
10210 crate::expressions::Array { expressions: keys },
10211 ));
10212 let vals_arr = Expression::Array(Box::new(
10213 crate::expressions::Array { expressions: vals },
10214 ));
10215 Ok(Expression::Function(Box::new(Function::new(
10216 "MAP".to_string(),
10217 vec![keys_arr, vals_arr],
10218 ))))
10219 }
10220 DialectType::Presto | DialectType::Trino => {
10221 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
10222 let mut keys = Vec::new();
10223 let mut vals = Vec::new();
10224 for (i, arg) in args.into_iter().enumerate() {
10225 if i % 2 == 0 {
10226 keys.push(arg);
10227 } else {
10228 vals.push(arg);
10229 }
10230 }
10231 let keys_arr = Expression::Array(Box::new(
10232 crate::expressions::Array { expressions: keys },
10233 ));
10234 let vals_arr = Expression::Array(Box::new(
10235 crate::expressions::Array { expressions: vals },
10236 ));
10237 Ok(Expression::Function(Box::new(Function::new(
10238 "MAP".to_string(),
10239 vec![keys_arr, vals_arr],
10240 ))))
10241 }
10242 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10243 Function::new("OBJECT_CONSTRUCT".to_string(), args),
10244 ))),
10245 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
10246 Function::new("map".to_string(), args),
10247 ))),
10248 _ => Ok(Expression::Function(Box::new(Function::new(
10249 "MAP".to_string(),
10250 args,
10251 )))),
10252 }
10253 }
10254 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
10255 "COLLECT_LIST" if f.args.len() >= 1 => {
10256 let name = match target {
10257 DialectType::Spark
10258 | DialectType::Databricks
10259 | DialectType::Hive => "COLLECT_LIST",
10260 DialectType::DuckDB
10261 | DialectType::PostgreSQL
10262 | DialectType::Redshift
10263 | DialectType::Snowflake
10264 | DialectType::BigQuery => "ARRAY_AGG",
10265 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
10266 _ => "ARRAY_AGG",
10267 };
10268 Ok(Expression::Function(Box::new(Function::new(
10269 name.to_string(),
10270 f.args,
10271 ))))
10272 }
10273 // COLLECT_SET(x) -> target-specific distinct array aggregation
10274 "COLLECT_SET" if f.args.len() >= 1 => {
10275 let name = match target {
10276 DialectType::Spark
10277 | DialectType::Databricks
10278 | DialectType::Hive => "COLLECT_SET",
10279 DialectType::Presto
10280 | DialectType::Trino
10281 | DialectType::Athena => "SET_AGG",
10282 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
10283 _ => "ARRAY_AGG",
10284 };
10285 Ok(Expression::Function(Box::new(Function::new(
10286 name.to_string(),
10287 f.args,
10288 ))))
10289 }
10290 // ISNAN(x) / IS_NAN(x) - normalize
10291 "ISNAN" | "IS_NAN" => {
10292 let name = match target {
10293 DialectType::Spark
10294 | DialectType::Databricks
10295 | DialectType::Hive => "ISNAN",
10296 DialectType::Presto
10297 | DialectType::Trino
10298 | DialectType::Athena => "IS_NAN",
10299 DialectType::BigQuery
10300 | DialectType::PostgreSQL
10301 | DialectType::Redshift => "IS_NAN",
10302 DialectType::ClickHouse => "IS_NAN",
10303 _ => "ISNAN",
10304 };
10305 Ok(Expression::Function(Box::new(Function::new(
10306 name.to_string(),
10307 f.args,
10308 ))))
10309 }
10310 // SPLIT_PART(str, delim, index) -> target-specific
10311 "SPLIT_PART" if f.args.len() == 3 => {
10312 match target {
10313 DialectType::Spark | DialectType::Databricks => {
10314 // Keep as SPLIT_PART (Spark 3.4+)
10315 Ok(Expression::Function(Box::new(Function::new(
10316 "SPLIT_PART".to_string(),
10317 f.args,
10318 ))))
10319 }
10320 DialectType::DuckDB
10321 | DialectType::PostgreSQL
10322 | DialectType::Snowflake
10323 | DialectType::Redshift
10324 | DialectType::Trino
10325 | DialectType::Presto => Ok(Expression::Function(Box::new(
10326 Function::new("SPLIT_PART".to_string(), f.args),
10327 ))),
10328 DialectType::Hive => {
10329 // SPLIT(str, delim)[index]
10330 // Complex conversion, just keep as-is for now
10331 Ok(Expression::Function(Box::new(Function::new(
10332 "SPLIT_PART".to_string(),
10333 f.args,
10334 ))))
10335 }
10336 _ => Ok(Expression::Function(Box::new(Function::new(
10337 "SPLIT_PART".to_string(),
10338 f.args,
10339 )))),
10340 }
10341 }
10342 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
10343 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
10344 let is_scalar = name == "JSON_EXTRACT_SCALAR";
10345 match target {
10346 DialectType::Spark
10347 | DialectType::Databricks
10348 | DialectType::Hive => {
10349 let mut args = f.args;
10350 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
10351 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
10352 if let Some(Expression::Function(inner)) = args.first() {
10353 if inner.name.eq_ignore_ascii_case("TRY")
10354 && inner.args.len() == 1
10355 {
10356 let mut inner_args = inner.args.clone();
10357 args[0] = inner_args.remove(0);
10358 }
10359 }
10360 Ok(Expression::Function(Box::new(Function::new(
10361 "GET_JSON_OBJECT".to_string(),
10362 args,
10363 ))))
10364 }
10365 DialectType::DuckDB | DialectType::SQLite => {
10366 // json -> path syntax
10367 let mut args = f.args;
10368 let json_expr = args.remove(0);
10369 let path = args.remove(0);
10370 Ok(Expression::JsonExtract(Box::new(
10371 crate::expressions::JsonExtractFunc {
10372 this: json_expr,
10373 path,
10374 returning: None,
10375 arrow_syntax: true,
10376 hash_arrow_syntax: false,
10377 wrapper_option: None,
10378 quotes_option: None,
10379 on_scalar_string: false,
10380 on_error: None,
10381 },
10382 )))
10383 }
10384 DialectType::TSQL => {
10385 let func_name = if is_scalar {
10386 "JSON_VALUE"
10387 } else {
10388 "JSON_QUERY"
10389 };
10390 Ok(Expression::Function(Box::new(Function::new(
10391 func_name.to_string(),
10392 f.args,
10393 ))))
10394 }
10395 DialectType::PostgreSQL | DialectType::Redshift => {
10396 let func_name = if is_scalar {
10397 "JSON_EXTRACT_PATH_TEXT"
10398 } else {
10399 "JSON_EXTRACT_PATH"
10400 };
10401 Ok(Expression::Function(Box::new(Function::new(
10402 func_name.to_string(),
10403 f.args,
10404 ))))
10405 }
10406 _ => Ok(Expression::Function(Box::new(Function::new(
10407 name.to_string(),
10408 f.args,
10409 )))),
10410 }
10411 }
10412 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
10413 "JSON_SEARCH"
10414 if matches!(target, DialectType::DuckDB)
10415 && (3..=5).contains(&f.args.len()) =>
10416 {
10417 let args = &f.args;
10418
10419 // Only rewrite deterministic modes and NULL/no escape-char variant.
10420 let mode = match &args[1] {
10421 Expression::Literal(crate::expressions::Literal::String(s)) => {
10422 s.to_ascii_lowercase()
10423 }
10424 _ => return Ok(Expression::Function(f)),
10425 };
10426 if mode != "one" && mode != "all" {
10427 return Ok(Expression::Function(f));
10428 }
10429 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
10430 return Ok(Expression::Function(f));
10431 }
10432
10433 let json_doc_sql = match Generator::sql(&args[0]) {
10434 Ok(sql) => sql,
10435 Err(_) => return Ok(Expression::Function(f)),
10436 };
10437 let search_sql = match Generator::sql(&args[2]) {
10438 Ok(sql) => sql,
10439 Err(_) => return Ok(Expression::Function(f)),
10440 };
10441 let path_sql = if args.len() == 5 {
10442 match Generator::sql(&args[4]) {
10443 Ok(sql) => sql,
10444 Err(_) => return Ok(Expression::Function(f)),
10445 }
10446 } else {
10447 "'$'".to_string()
10448 };
10449
10450 let rewrite_sql = if mode == "all" {
10451 format!(
10452 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
10453 json_doc_sql, path_sql, search_sql
10454 )
10455 } else {
10456 format!(
10457 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
10458 json_doc_sql, path_sql, search_sql
10459 )
10460 };
10461
10462 Ok(Expression::Raw(crate::expressions::Raw {
10463 sql: rewrite_sql,
10464 }))
10465 }
10466 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
10467 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
10468 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
10469 if f.args.len() >= 2
10470 && matches!(source, DialectType::SingleStore) =>
10471 {
10472 let is_bson = name == "BSON_EXTRACT_BSON";
10473 let mut args = f.args;
10474 let json_expr = args.remove(0);
10475
10476 // Build JSONPath from remaining arguments
10477 let mut path = String::from("$");
10478 for arg in &args {
10479 if let Expression::Literal(
10480 crate::expressions::Literal::String(s),
10481 ) = arg
10482 {
10483 // Check if it's a numeric string (array index)
10484 if s.parse::<i64>().is_ok() {
10485 path.push('[');
10486 path.push_str(s);
10487 path.push(']');
10488 } else {
10489 path.push('.');
10490 path.push_str(s);
10491 }
10492 }
10493 }
10494
10495 let target_func = if is_bson {
10496 "JSONB_EXTRACT"
10497 } else {
10498 "JSON_EXTRACT"
10499 };
10500 Ok(Expression::Function(Box::new(Function::new(
10501 target_func.to_string(),
10502 vec![json_expr, Expression::string(&path)],
10503 ))))
10504 }
10505 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
10506 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
10507 Ok(Expression::Function(Box::new(Function {
10508 name: "arraySum".to_string(),
10509 args: f.args,
10510 distinct: f.distinct,
10511 trailing_comments: f.trailing_comments,
10512 use_bracket_syntax: f.use_bracket_syntax,
10513 no_parens: f.no_parens,
10514 quoted: f.quoted,
10515 span: None,
10516 inferred_type: None,
10517 })))
10518 }
10519 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
10520 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
10521 // and is handled by JsonQueryValueConvert action. This handles the case where
10522 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
10523 "JSON_QUERY" | "JSON_VALUE"
10524 if f.args.len() == 2
10525 && matches!(
10526 source,
10527 DialectType::TSQL | DialectType::Fabric
10528 ) =>
10529 {
10530 match target {
10531 DialectType::Spark
10532 | DialectType::Databricks
10533 | DialectType::Hive => Ok(Expression::Function(Box::new(
10534 Function::new("GET_JSON_OBJECT".to_string(), f.args),
10535 ))),
10536 _ => Ok(Expression::Function(Box::new(Function::new(
10537 name.to_string(),
10538 f.args,
10539 )))),
10540 }
10541 }
10542 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
10543 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
10544 let arg = f.args.into_iter().next().unwrap();
10545 let is_hive_source = matches!(
10546 source,
10547 DialectType::Hive
10548 | DialectType::Spark
10549 | DialectType::Databricks
10550 );
10551 match target {
10552 DialectType::DuckDB if is_hive_source => {
10553 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
10554 let strptime =
10555 Expression::Function(Box::new(Function::new(
10556 "STRPTIME".to_string(),
10557 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
10558 )));
10559 Ok(Expression::Function(Box::new(Function::new(
10560 "EPOCH".to_string(),
10561 vec![strptime],
10562 ))))
10563 }
10564 DialectType::Presto | DialectType::Trino if is_hive_source => {
10565 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
10566 let cast_varchar =
10567 Expression::Cast(Box::new(crate::expressions::Cast {
10568 this: arg.clone(),
10569 to: DataType::VarChar {
10570 length: None,
10571 parenthesized_length: false,
10572 },
10573 trailing_comments: vec![],
10574 double_colon_syntax: false,
10575 format: None,
10576 default: None,
10577 inferred_type: None,
10578 }));
10579 let date_parse =
10580 Expression::Function(Box::new(Function::new(
10581 "DATE_PARSE".to_string(),
10582 vec![
10583 cast_varchar,
10584 Expression::string("%Y-%m-%d %T"),
10585 ],
10586 )));
10587 let try_expr = Expression::Function(Box::new(
10588 Function::new("TRY".to_string(), vec![date_parse]),
10589 ));
10590 let date_format =
10591 Expression::Function(Box::new(Function::new(
10592 "DATE_FORMAT".to_string(),
10593 vec![arg, Expression::string("%Y-%m-%d %T")],
10594 )));
10595 let parse_datetime =
10596 Expression::Function(Box::new(Function::new(
10597 "PARSE_DATETIME".to_string(),
10598 vec![
10599 date_format,
10600 Expression::string("yyyy-MM-dd HH:mm:ss"),
10601 ],
10602 )));
10603 let coalesce =
10604 Expression::Function(Box::new(Function::new(
10605 "COALESCE".to_string(),
10606 vec![try_expr, parse_datetime],
10607 )));
10608 Ok(Expression::Function(Box::new(Function::new(
10609 "TO_UNIXTIME".to_string(),
10610 vec![coalesce],
10611 ))))
10612 }
10613 DialectType::Presto | DialectType::Trino => {
10614 Ok(Expression::Function(Box::new(Function::new(
10615 "TO_UNIXTIME".to_string(),
10616 vec![arg],
10617 ))))
10618 }
10619 _ => Ok(Expression::Function(Box::new(Function::new(
10620 "UNIX_TIMESTAMP".to_string(),
10621 vec![arg],
10622 )))),
10623 }
10624 }
10625 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10626 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
10627 DialectType::Spark
10628 | DialectType::Databricks
10629 | DialectType::Hive => Ok(Expression::Function(Box::new(
10630 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
10631 ))),
10632 _ => Ok(Expression::Function(Box::new(Function::new(
10633 "TO_UNIX_TIMESTAMP".to_string(),
10634 f.args,
10635 )))),
10636 },
10637 // CURDATE() -> CURRENT_DATE
10638 "CURDATE" => {
10639 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
10640 }
10641 // CURTIME() -> CURRENT_TIME
10642 "CURTIME" => {
10643 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
10644 precision: None,
10645 }))
10646 }
10647 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
10648 "ARRAY_SORT" if f.args.len() >= 1 => {
10649 match target {
10650 DialectType::Hive => {
10651 let mut args = f.args;
10652 args.truncate(1); // Drop lambda comparator
10653 Ok(Expression::Function(Box::new(Function::new(
10654 "SORT_ARRAY".to_string(),
10655 args,
10656 ))))
10657 }
10658 _ => Ok(Expression::Function(f)),
10659 }
10660 }
10661 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
10662 "SORT_ARRAY" if f.args.len() == 1 => match target {
10663 DialectType::Hive
10664 | DialectType::Spark
10665 | DialectType::Databricks => Ok(Expression::Function(f)),
10666 _ => Ok(Expression::Function(Box::new(Function::new(
10667 "ARRAY_SORT".to_string(),
10668 f.args,
10669 )))),
10670 },
10671 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
10672 "SORT_ARRAY" if f.args.len() == 2 => {
10673 let is_desc =
10674 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
10675 if is_desc {
10676 match target {
10677 DialectType::DuckDB => {
10678 Ok(Expression::Function(Box::new(Function::new(
10679 "ARRAY_REVERSE_SORT".to_string(),
10680 vec![f.args.into_iter().next().unwrap()],
10681 ))))
10682 }
10683 DialectType::Presto | DialectType::Trino => {
10684 let arr_arg = f.args.into_iter().next().unwrap();
10685 let a =
10686 Expression::Column(crate::expressions::Column {
10687 name: crate::expressions::Identifier::new("a"),
10688 table: None,
10689 join_mark: false,
10690 trailing_comments: Vec::new(),
10691 span: None,
10692 inferred_type: None,
10693 });
10694 let b =
10695 Expression::Column(crate::expressions::Column {
10696 name: crate::expressions::Identifier::new("b"),
10697 table: None,
10698 join_mark: false,
10699 trailing_comments: Vec::new(),
10700 span: None,
10701 inferred_type: None,
10702 });
10703 let case_expr = Expression::Case(Box::new(
10704 crate::expressions::Case {
10705 operand: None,
10706 whens: vec![
10707 (
10708 Expression::Lt(Box::new(
10709 BinaryOp::new(a.clone(), b.clone()),
10710 )),
10711 Expression::Literal(Literal::Number(
10712 "1".to_string(),
10713 )),
10714 ),
10715 (
10716 Expression::Gt(Box::new(
10717 BinaryOp::new(a.clone(), b.clone()),
10718 )),
10719 Expression::Literal(Literal::Number(
10720 "-1".to_string(),
10721 )),
10722 ),
10723 ],
10724 else_: Some(Expression::Literal(
10725 Literal::Number("0".to_string()),
10726 )),
10727 comments: Vec::new(),
10728 inferred_type: None,
10729 },
10730 ));
10731 let lambda = Expression::Lambda(Box::new(
10732 crate::expressions::LambdaExpr {
10733 parameters: vec![
10734 crate::expressions::Identifier::new("a"),
10735 crate::expressions::Identifier::new("b"),
10736 ],
10737 body: case_expr,
10738 colon: false,
10739 parameter_types: Vec::new(),
10740 },
10741 ));
10742 Ok(Expression::Function(Box::new(Function::new(
10743 "ARRAY_SORT".to_string(),
10744 vec![arr_arg, lambda],
10745 ))))
10746 }
10747 _ => Ok(Expression::Function(f)),
10748 }
10749 } else {
10750 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
10751 match target {
10752 DialectType::Hive => Ok(Expression::Function(f)),
10753 _ => Ok(Expression::Function(Box::new(Function::new(
10754 "ARRAY_SORT".to_string(),
10755 vec![f.args.into_iter().next().unwrap()],
10756 )))),
10757 }
10758 }
10759 }
10760 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
10761 "LEFT" if f.args.len() == 2 => {
10762 match target {
10763 DialectType::Hive
10764 | DialectType::Presto
10765 | DialectType::Trino
10766 | DialectType::Athena => {
10767 let x = f.args[0].clone();
10768 let n = f.args[1].clone();
10769 Ok(Expression::Function(Box::new(Function::new(
10770 "SUBSTRING".to_string(),
10771 vec![x, Expression::number(1), n],
10772 ))))
10773 }
10774 DialectType::Spark | DialectType::Databricks
10775 if matches!(
10776 source,
10777 DialectType::TSQL | DialectType::Fabric
10778 ) =>
10779 {
10780 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
10781 let x = f.args[0].clone();
10782 let n = f.args[1].clone();
10783 let cast_x = Expression::Cast(Box::new(Cast {
10784 this: x,
10785 to: DataType::VarChar {
10786 length: None,
10787 parenthesized_length: false,
10788 },
10789 double_colon_syntax: false,
10790 trailing_comments: Vec::new(),
10791 format: None,
10792 default: None,
10793 inferred_type: None,
10794 }));
10795 Ok(Expression::Function(Box::new(Function::new(
10796 "LEFT".to_string(),
10797 vec![cast_x, n],
10798 ))))
10799 }
10800 _ => Ok(Expression::Function(f)),
10801 }
10802 }
10803 "RIGHT" if f.args.len() == 2 => {
10804 match target {
10805 DialectType::Hive
10806 | DialectType::Presto
10807 | DialectType::Trino
10808 | DialectType::Athena => {
10809 let x = f.args[0].clone();
10810 let n = f.args[1].clone();
10811 // SUBSTRING(x, LENGTH(x) - (n - 1))
10812 let len_x = Expression::Function(Box::new(Function::new(
10813 "LENGTH".to_string(),
10814 vec![x.clone()],
10815 )));
10816 let n_minus_1 = Expression::Sub(Box::new(
10817 crate::expressions::BinaryOp::new(
10818 n,
10819 Expression::number(1),
10820 ),
10821 ));
10822 let n_minus_1_paren = Expression::Paren(Box::new(
10823 crate::expressions::Paren {
10824 this: n_minus_1,
10825 trailing_comments: Vec::new(),
10826 },
10827 ));
10828 let offset = Expression::Sub(Box::new(
10829 crate::expressions::BinaryOp::new(
10830 len_x,
10831 n_minus_1_paren,
10832 ),
10833 ));
10834 Ok(Expression::Function(Box::new(Function::new(
10835 "SUBSTRING".to_string(),
10836 vec![x, offset],
10837 ))))
10838 }
10839 DialectType::Spark | DialectType::Databricks
10840 if matches!(
10841 source,
10842 DialectType::TSQL | DialectType::Fabric
10843 ) =>
10844 {
10845 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
10846 let x = f.args[0].clone();
10847 let n = f.args[1].clone();
10848 let cast_x = Expression::Cast(Box::new(Cast {
10849 this: x,
10850 to: DataType::VarChar {
10851 length: None,
10852 parenthesized_length: false,
10853 },
10854 double_colon_syntax: false,
10855 trailing_comments: Vec::new(),
10856 format: None,
10857 default: None,
10858 inferred_type: None,
10859 }));
10860 Ok(Expression::Function(Box::new(Function::new(
10861 "RIGHT".to_string(),
10862 vec![cast_x, n],
10863 ))))
10864 }
10865 _ => Ok(Expression::Function(f)),
10866 }
10867 }
10868 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
10869 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
10870 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10871 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
10872 ))),
10873 DialectType::Spark | DialectType::Databricks => {
10874 Ok(Expression::Function(Box::new(Function::new(
10875 "MAP_FROM_ARRAYS".to_string(),
10876 f.args,
10877 ))))
10878 }
10879 _ => Ok(Expression::Function(Box::new(Function::new(
10880 "MAP".to_string(),
10881 f.args,
10882 )))),
10883 },
10884 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
10885 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
10886 "LIKE" if f.args.len() >= 2 => {
10887 let (this, pattern) = if matches!(source, DialectType::SQLite) {
10888 // SQLite: LIKE(pattern, string) -> string LIKE pattern
10889 (f.args[1].clone(), f.args[0].clone())
10890 } else {
10891 // Standard: LIKE(string, pattern) -> string LIKE pattern
10892 (f.args[0].clone(), f.args[1].clone())
10893 };
10894 let escape = if f.args.len() >= 3 {
10895 Some(f.args[2].clone())
10896 } else {
10897 None
10898 };
10899 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10900 left: this,
10901 right: pattern,
10902 escape,
10903 quantifier: None,
10904 inferred_type: None,
10905 })))
10906 }
10907 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
10908 "ILIKE" if f.args.len() >= 2 => {
10909 let this = f.args[0].clone();
10910 let pattern = f.args[1].clone();
10911 let escape = if f.args.len() >= 3 {
10912 Some(f.args[2].clone())
10913 } else {
10914 None
10915 };
10916 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
10917 left: this,
10918 right: pattern,
10919 escape,
10920 quantifier: None,
10921 inferred_type: None,
10922 })))
10923 }
10924 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
10925 "CHAR" if f.args.len() == 1 => match target {
10926 DialectType::MySQL
10927 | DialectType::SingleStore
10928 | DialectType::TSQL => Ok(Expression::Function(f)),
10929 _ => Ok(Expression::Function(Box::new(Function::new(
10930 "CHR".to_string(),
10931 f.args,
10932 )))),
10933 },
10934 // CONCAT(a, b) -> a || b for PostgreSQL
10935 "CONCAT"
10936 if f.args.len() == 2
10937 && matches!(target, DialectType::PostgreSQL)
10938 && matches!(
10939 source,
10940 DialectType::ClickHouse | DialectType::MySQL
10941 ) =>
10942 {
10943 let mut args = f.args;
10944 let right = args.pop().unwrap();
10945 let left = args.pop().unwrap();
10946 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10947 this: Box::new(left),
10948 expression: Box::new(right),
10949 safe: None,
10950 })))
10951 }
10952 // ARRAY_TO_STRING(arr, delim) -> target-specific
10953 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
10954 DialectType::Presto | DialectType::Trino => {
10955 Ok(Expression::Function(Box::new(Function::new(
10956 "ARRAY_JOIN".to_string(),
10957 f.args,
10958 ))))
10959 }
10960 DialectType::TSQL => Ok(Expression::Function(Box::new(
10961 Function::new("STRING_AGG".to_string(), f.args),
10962 ))),
10963 _ => Ok(Expression::Function(f)),
10964 },
10965 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
10966 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
10967 DialectType::Spark
10968 | DialectType::Databricks
10969 | DialectType::Hive => Ok(Expression::Function(Box::new(
10970 Function::new("CONCAT".to_string(), f.args),
10971 ))),
10972 DialectType::Snowflake => Ok(Expression::Function(Box::new(
10973 Function::new("ARRAY_CAT".to_string(), f.args),
10974 ))),
10975 DialectType::Redshift => Ok(Expression::Function(Box::new(
10976 Function::new("ARRAY_CONCAT".to_string(), f.args),
10977 ))),
10978 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10979 Function::new("ARRAY_CAT".to_string(), f.args),
10980 ))),
10981 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10982 Function::new("LIST_CONCAT".to_string(), f.args),
10983 ))),
10984 DialectType::Presto | DialectType::Trino => {
10985 Ok(Expression::Function(Box::new(Function::new(
10986 "CONCAT".to_string(),
10987 f.args,
10988 ))))
10989 }
10990 DialectType::BigQuery => Ok(Expression::Function(Box::new(
10991 Function::new("ARRAY_CONCAT".to_string(), f.args),
10992 ))),
10993 _ => Ok(Expression::Function(f)),
10994 },
10995 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
10996 "HAS" if f.args.len() == 2 => match target {
10997 DialectType::Spark
10998 | DialectType::Databricks
10999 | DialectType::Hive => Ok(Expression::Function(Box::new(
11000 Function::new("ARRAY_CONTAINS".to_string(), f.args),
11001 ))),
11002 DialectType::Presto | DialectType::Trino => {
11003 Ok(Expression::Function(Box::new(Function::new(
11004 "CONTAINS".to_string(),
11005 f.args,
11006 ))))
11007 }
11008 _ => Ok(Expression::Function(f)),
11009 },
11010 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
11011 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
11012 Function::new("COALESCE".to_string(), f.args),
11013 ))),
11014 // ISNULL(x) in MySQL -> (x IS NULL)
11015 "ISNULL"
11016 if f.args.len() == 1
11017 && matches!(source, DialectType::MySQL)
11018 && matches!(target, DialectType::MySQL) =>
11019 {
11020 let arg = f.args.into_iter().next().unwrap();
11021 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
11022 this: Expression::IsNull(Box::new(
11023 crate::expressions::IsNull {
11024 this: arg,
11025 not: false,
11026 postfix_form: false,
11027 },
11028 )),
11029 trailing_comments: Vec::new(),
11030 })))
11031 }
11032 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
11033 "MONTHNAME"
11034 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
11035 {
11036 let arg = f.args.into_iter().next().unwrap();
11037 Ok(Expression::Function(Box::new(Function::new(
11038 "DATE_FORMAT".to_string(),
11039 vec![arg, Expression::string("%M")],
11040 ))))
11041 }
11042 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
11043 "SPLITBYSTRING" if f.args.len() == 2 => {
11044 let sep = f.args[0].clone();
11045 let str_arg = f.args[1].clone();
11046 match target {
11047 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11048 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
11049 ))),
11050 DialectType::Doris => {
11051 Ok(Expression::Function(Box::new(Function::new(
11052 "SPLIT_BY_STRING".to_string(),
11053 vec![str_arg, sep],
11054 ))))
11055 }
11056 DialectType::Hive
11057 | DialectType::Spark
11058 | DialectType::Databricks => {
11059 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
11060 let escaped =
11061 Expression::Function(Box::new(Function::new(
11062 "CONCAT".to_string(),
11063 vec![
11064 Expression::string("\\Q"),
11065 sep,
11066 Expression::string("\\E"),
11067 ],
11068 )));
11069 Ok(Expression::Function(Box::new(Function::new(
11070 "SPLIT".to_string(),
11071 vec![str_arg, escaped],
11072 ))))
11073 }
11074 _ => Ok(Expression::Function(f)),
11075 }
11076 }
11077 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
11078 "SPLITBYREGEXP" if f.args.len() == 2 => {
11079 let sep = f.args[0].clone();
11080 let str_arg = f.args[1].clone();
11081 match target {
11082 DialectType::DuckDB => {
11083 Ok(Expression::Function(Box::new(Function::new(
11084 "STR_SPLIT_REGEX".to_string(),
11085 vec![str_arg, sep],
11086 ))))
11087 }
11088 DialectType::Hive
11089 | DialectType::Spark
11090 | DialectType::Databricks => {
11091 Ok(Expression::Function(Box::new(Function::new(
11092 "SPLIT".to_string(),
11093 vec![str_arg, sep],
11094 ))))
11095 }
11096 _ => Ok(Expression::Function(f)),
11097 }
11098 }
11099 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
11100 "TOMONDAY" => {
11101 if f.args.len() == 1 {
11102 let arg = f.args.into_iter().next().unwrap();
11103 match target {
11104 DialectType::Doris => {
11105 Ok(Expression::Function(Box::new(Function::new(
11106 "DATE_TRUNC".to_string(),
11107 vec![arg, Expression::string("WEEK")],
11108 ))))
11109 }
11110 _ => Ok(Expression::Function(Box::new(Function::new(
11111 "DATE_TRUNC".to_string(),
11112 vec![Expression::string("WEEK"), arg],
11113 )))),
11114 }
11115 } else {
11116 Ok(Expression::Function(f))
11117 }
11118 }
11119 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
11120 "COLLECT_LIST" if f.args.len() == 1 => match target {
11121 DialectType::Spark
11122 | DialectType::Databricks
11123 | DialectType::Hive => Ok(Expression::Function(f)),
11124 _ => Ok(Expression::Function(Box::new(Function::new(
11125 "ARRAY_AGG".to_string(),
11126 f.args,
11127 )))),
11128 },
11129 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
11130 "TO_CHAR"
11131 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
11132 {
11133 let arg = f.args.into_iter().next().unwrap();
11134 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11135 this: arg,
11136 to: DataType::Custom {
11137 name: "STRING".to_string(),
11138 },
11139 double_colon_syntax: false,
11140 trailing_comments: Vec::new(),
11141 format: None,
11142 default: None,
11143 inferred_type: None,
11144 })))
11145 }
11146 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
11147 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
11148 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11149 Function::new("RANDOM".to_string(), vec![]),
11150 ))),
11151 _ => Ok(Expression::Function(f)),
11152 },
11153 // ClickHouse formatDateTime -> target-specific
11154 "FORMATDATETIME" if f.args.len() >= 2 => match target {
11155 DialectType::MySQL => Ok(Expression::Function(Box::new(
11156 Function::new("DATE_FORMAT".to_string(), f.args),
11157 ))),
11158 _ => Ok(Expression::Function(f)),
11159 },
11160 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
11161 "REPLICATE" if f.args.len() == 2 => match target {
11162 DialectType::TSQL => Ok(Expression::Function(f)),
11163 _ => Ok(Expression::Function(Box::new(Function::new(
11164 "REPEAT".to_string(),
11165 f.args,
11166 )))),
11167 },
11168 // LEN(x) -> LENGTH(x) for non-TSQL targets
11169 // No CAST needed when arg is already a string literal
11170 "LEN" if f.args.len() == 1 => {
11171 match target {
11172 DialectType::TSQL => Ok(Expression::Function(f)),
11173 DialectType::Spark | DialectType::Databricks => {
11174 let arg = f.args.into_iter().next().unwrap();
11175 // Don't wrap string literals with CAST - they're already strings
11176 let is_string = matches!(
11177 &arg,
11178 Expression::Literal(
11179 crate::expressions::Literal::String(_)
11180 )
11181 );
11182 let final_arg = if is_string {
11183 arg
11184 } else {
11185 Expression::Cast(Box::new(Cast {
11186 this: arg,
11187 to: DataType::VarChar {
11188 length: None,
11189 parenthesized_length: false,
11190 },
11191 double_colon_syntax: false,
11192 trailing_comments: Vec::new(),
11193 format: None,
11194 default: None,
11195 inferred_type: None,
11196 }))
11197 };
11198 Ok(Expression::Function(Box::new(Function::new(
11199 "LENGTH".to_string(),
11200 vec![final_arg],
11201 ))))
11202 }
11203 _ => {
11204 let arg = f.args.into_iter().next().unwrap();
11205 Ok(Expression::Function(Box::new(Function::new(
11206 "LENGTH".to_string(),
11207 vec![arg],
11208 ))))
11209 }
11210 }
11211 }
11212 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
11213 "COUNT_BIG" if f.args.len() == 1 => match target {
11214 DialectType::TSQL => Ok(Expression::Function(f)),
11215 _ => Ok(Expression::Function(Box::new(Function::new(
11216 "COUNT".to_string(),
11217 f.args,
11218 )))),
11219 },
11220 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
11221 "DATEFROMPARTS" if f.args.len() == 3 => match target {
11222 DialectType::TSQL => Ok(Expression::Function(f)),
11223 _ => Ok(Expression::Function(Box::new(Function::new(
11224 "MAKE_DATE".to_string(),
11225 f.args,
11226 )))),
11227 },
11228 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
11229 "REGEXP_LIKE" if f.args.len() >= 2 => {
11230 let str_expr = f.args[0].clone();
11231 let pattern = f.args[1].clone();
11232 let flags = if f.args.len() >= 3 {
11233 Some(f.args[2].clone())
11234 } else {
11235 None
11236 };
11237 match target {
11238 DialectType::DuckDB => {
11239 let mut new_args = vec![str_expr, pattern];
11240 if let Some(fl) = flags {
11241 new_args.push(fl);
11242 }
11243 Ok(Expression::Function(Box::new(Function::new(
11244 "REGEXP_MATCHES".to_string(),
11245 new_args,
11246 ))))
11247 }
11248 _ => Ok(Expression::RegexpLike(Box::new(
11249 crate::expressions::RegexpFunc {
11250 this: str_expr,
11251 pattern,
11252 flags,
11253 },
11254 ))),
11255 }
11256 }
11257 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
11258 "ARRAYJOIN" if f.args.len() == 1 => match target {
11259 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11260 Function::new("UNNEST".to_string(), f.args),
11261 ))),
11262 _ => Ok(Expression::Function(f)),
11263 },
11264 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
11265 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
11266 match target {
11267 DialectType::TSQL => Ok(Expression::Function(f)),
11268 DialectType::DuckDB => {
11269 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
11270 let mut args = f.args;
11271 let ms = args.pop().unwrap();
11272 let s = args.pop().unwrap();
11273 // s + (ms / 1000.0)
11274 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
11275 ms,
11276 Expression::Literal(
11277 crate::expressions::Literal::Number(
11278 "1000.0".to_string(),
11279 ),
11280 ),
11281 )));
11282 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
11283 s,
11284 Expression::Paren(Box::new(Paren {
11285 this: ms_frac,
11286 trailing_comments: vec![],
11287 })),
11288 )));
11289 args.push(s_with_ms);
11290 Ok(Expression::Function(Box::new(Function::new(
11291 "MAKE_TIMESTAMP".to_string(),
11292 args,
11293 ))))
11294 }
11295 DialectType::Snowflake => {
11296 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
11297 let mut args = f.args;
11298 let ms = args.pop().unwrap();
11299 // ms * 1000000
11300 let ns = Expression::Mul(Box::new(BinaryOp::new(
11301 ms,
11302 Expression::number(1000000),
11303 )));
11304 args.push(ns);
11305 Ok(Expression::Function(Box::new(Function::new(
11306 "TIMESTAMP_FROM_PARTS".to_string(),
11307 args,
11308 ))))
11309 }
11310 _ => {
11311 // Default: keep function name for other targets
11312 Ok(Expression::Function(Box::new(Function::new(
11313 "DATETIMEFROMPARTS".to_string(),
11314 f.args,
11315 ))))
11316 }
11317 }
11318 }
11319 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
11320 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
11321 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
11322 let is_try = name == "TRY_CONVERT";
11323 let type_expr = f.args[0].clone();
11324 let value_expr = f.args[1].clone();
11325 let style = if f.args.len() >= 3 {
11326 Some(&f.args[2])
11327 } else {
11328 None
11329 };
11330
11331 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
11332 if matches!(target, DialectType::TSQL) {
11333 let normalized_type = match &type_expr {
11334 Expression::DataType(dt) => {
11335 let new_dt = match dt {
11336 DataType::Int { .. } => DataType::Custom {
11337 name: "INTEGER".to_string(),
11338 },
11339 _ => dt.clone(),
11340 };
11341 Expression::DataType(new_dt)
11342 }
11343 Expression::Identifier(id) => {
11344 let upper = id.name.to_uppercase();
11345 let normalized = match upper.as_str() {
11346 "INT" => "INTEGER",
11347 _ => &upper,
11348 };
11349 Expression::Identifier(
11350 crate::expressions::Identifier::new(normalized),
11351 )
11352 }
11353 Expression::Column(col) => {
11354 let upper = col.name.name.to_uppercase();
11355 let normalized = match upper.as_str() {
11356 "INT" => "INTEGER",
11357 _ => &upper,
11358 };
11359 Expression::Identifier(
11360 crate::expressions::Identifier::new(normalized),
11361 )
11362 }
11363 _ => type_expr.clone(),
11364 };
11365 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
11366 let mut new_args = vec![normalized_type, value_expr];
11367 if let Some(s) = style {
11368 new_args.push(s.clone());
11369 }
11370 return Ok(Expression::Function(Box::new(Function::new(
11371 func_name.to_string(),
11372 new_args,
11373 ))));
11374 }
11375
11376 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
11377 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
11378 match e {
11379 Expression::DataType(dt) => {
11380 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
11381 match dt {
11382 DataType::Custom { name }
11383 if name.starts_with("NVARCHAR(")
11384 || name.starts_with("NCHAR(") =>
11385 {
11386 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
11387 let inner = &name[name.find('(').unwrap() + 1
11388 ..name.len() - 1];
11389 if inner.eq_ignore_ascii_case("MAX") {
11390 Some(DataType::Text)
11391 } else if let Ok(len) = inner.parse::<u32>() {
11392 if name.starts_with("NCHAR") {
11393 Some(DataType::Char {
11394 length: Some(len),
11395 })
11396 } else {
11397 Some(DataType::VarChar {
11398 length: Some(len),
11399 parenthesized_length: false,
11400 })
11401 }
11402 } else {
11403 Some(dt.clone())
11404 }
11405 }
11406 DataType::Custom { name } if name == "NVARCHAR" => {
11407 Some(DataType::VarChar {
11408 length: None,
11409 parenthesized_length: false,
11410 })
11411 }
11412 DataType::Custom { name } if name == "NCHAR" => {
11413 Some(DataType::Char { length: None })
11414 }
11415 DataType::Custom { name }
11416 if name == "NVARCHAR(MAX)"
11417 || name == "VARCHAR(MAX)" =>
11418 {
11419 Some(DataType::Text)
11420 }
11421 _ => Some(dt.clone()),
11422 }
11423 }
11424 Expression::Identifier(id) => {
11425 let name = id.name.to_uppercase();
11426 match name.as_str() {
11427 "INT" | "INTEGER" => Some(DataType::Int {
11428 length: None,
11429 integer_spelling: false,
11430 }),
11431 "BIGINT" => Some(DataType::BigInt { length: None }),
11432 "SMALLINT" => {
11433 Some(DataType::SmallInt { length: None })
11434 }
11435 "TINYINT" => {
11436 Some(DataType::TinyInt { length: None })
11437 }
11438 "FLOAT" => Some(DataType::Float {
11439 precision: None,
11440 scale: None,
11441 real_spelling: false,
11442 }),
11443 "REAL" => Some(DataType::Float {
11444 precision: None,
11445 scale: None,
11446 real_spelling: true,
11447 }),
11448 "DATETIME" | "DATETIME2" => {
11449 Some(DataType::Timestamp {
11450 timezone: false,
11451 precision: None,
11452 })
11453 }
11454 "DATE" => Some(DataType::Date),
11455 "BIT" => Some(DataType::Boolean),
11456 "TEXT" => Some(DataType::Text),
11457 "NUMERIC" => Some(DataType::Decimal {
11458 precision: None,
11459 scale: None,
11460 }),
11461 "MONEY" => Some(DataType::Decimal {
11462 precision: Some(15),
11463 scale: Some(4),
11464 }),
11465 "SMALLMONEY" => Some(DataType::Decimal {
11466 precision: Some(6),
11467 scale: Some(4),
11468 }),
11469 "VARCHAR" => Some(DataType::VarChar {
11470 length: None,
11471 parenthesized_length: false,
11472 }),
11473 "NVARCHAR" => Some(DataType::VarChar {
11474 length: None,
11475 parenthesized_length: false,
11476 }),
11477 "CHAR" => Some(DataType::Char { length: None }),
11478 "NCHAR" => Some(DataType::Char { length: None }),
11479 _ => Some(DataType::Custom { name }),
11480 }
11481 }
11482 Expression::Column(col) => {
11483 let name = col.name.name.to_uppercase();
11484 match name.as_str() {
11485 "INT" | "INTEGER" => Some(DataType::Int {
11486 length: None,
11487 integer_spelling: false,
11488 }),
11489 "BIGINT" => Some(DataType::BigInt { length: None }),
11490 "FLOAT" => Some(DataType::Float {
11491 precision: None,
11492 scale: None,
11493 real_spelling: false,
11494 }),
11495 "DATETIME" | "DATETIME2" => {
11496 Some(DataType::Timestamp {
11497 timezone: false,
11498 precision: None,
11499 })
11500 }
11501 "DATE" => Some(DataType::Date),
11502 "NUMERIC" => Some(DataType::Decimal {
11503 precision: None,
11504 scale: None,
11505 }),
11506 "VARCHAR" => Some(DataType::VarChar {
11507 length: None,
11508 parenthesized_length: false,
11509 }),
11510 "NVARCHAR" => Some(DataType::VarChar {
11511 length: None,
11512 parenthesized_length: false,
11513 }),
11514 "CHAR" => Some(DataType::Char { length: None }),
11515 "NCHAR" => Some(DataType::Char { length: None }),
11516 _ => Some(DataType::Custom { name }),
11517 }
11518 }
11519 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
11520 Expression::Function(f) => {
11521 let fname = f.name.to_uppercase();
11522 match fname.as_str() {
11523 "VARCHAR" | "NVARCHAR" => {
11524 let len = f.args.first().and_then(|a| {
11525 if let Expression::Literal(
11526 crate::expressions::Literal::Number(n),
11527 ) = a
11528 {
11529 n.parse::<u32>().ok()
11530 } else if let Expression::Identifier(id) = a
11531 {
11532 if id.name.eq_ignore_ascii_case("MAX") {
11533 None
11534 } else {
11535 None
11536 }
11537 } else {
11538 None
11539 }
11540 });
11541 // Check for VARCHAR(MAX) -> TEXT
11542 let is_max = f.args.first().map_or(false, |a| {
11543 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
11544 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
11545 });
11546 if is_max {
11547 Some(DataType::Text)
11548 } else {
11549 Some(DataType::VarChar {
11550 length: len,
11551 parenthesized_length: false,
11552 })
11553 }
11554 }
11555 "NCHAR" | "CHAR" => {
11556 let len = f.args.first().and_then(|a| {
11557 if let Expression::Literal(
11558 crate::expressions::Literal::Number(n),
11559 ) = a
11560 {
11561 n.parse::<u32>().ok()
11562 } else {
11563 None
11564 }
11565 });
11566 Some(DataType::Char { length: len })
11567 }
11568 "NUMERIC" | "DECIMAL" => {
11569 let precision = f.args.first().and_then(|a| {
11570 if let Expression::Literal(
11571 crate::expressions::Literal::Number(n),
11572 ) = a
11573 {
11574 n.parse::<u32>().ok()
11575 } else {
11576 None
11577 }
11578 });
11579 let scale = f.args.get(1).and_then(|a| {
11580 if let Expression::Literal(
11581 crate::expressions::Literal::Number(n),
11582 ) = a
11583 {
11584 n.parse::<u32>().ok()
11585 } else {
11586 None
11587 }
11588 });
11589 Some(DataType::Decimal { precision, scale })
11590 }
11591 _ => None,
11592 }
11593 }
11594 _ => None,
11595 }
11596 }
11597
11598 if let Some(mut dt) = expr_to_datatype(&type_expr) {
11599 // For TSQL source: VARCHAR/CHAR without length defaults to 30
11600 let is_tsql_source =
11601 matches!(source, DialectType::TSQL | DialectType::Fabric);
11602 if is_tsql_source {
11603 match &dt {
11604 DataType::VarChar { length: None, .. } => {
11605 dt = DataType::VarChar {
11606 length: Some(30),
11607 parenthesized_length: false,
11608 };
11609 }
11610 DataType::Char { length: None } => {
11611 dt = DataType::Char { length: Some(30) };
11612 }
11613 _ => {}
11614 }
11615 }
11616
11617 // Determine if this is a string type
11618 let is_string_type = matches!(
11619 dt,
11620 DataType::VarChar { .. }
11621 | DataType::Char { .. }
11622 | DataType::Text
11623 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
11624 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
11625 || name.starts_with("VARCHAR(") || name == "VARCHAR"
11626 || name == "STRING");
11627
11628 // Determine if this is a date/time type
11629 let is_datetime_type = matches!(
11630 dt,
11631 DataType::Timestamp { .. } | DataType::Date
11632 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
11633 || name == "DATETIME2" || name == "SMALLDATETIME");
11634
11635 // Check for date conversion with style
11636 if style.is_some() {
11637 let style_num = style.and_then(|s| {
11638 if let Expression::Literal(
11639 crate::expressions::Literal::Number(n),
11640 ) = s
11641 {
11642 n.parse::<u32>().ok()
11643 } else {
11644 None
11645 }
11646 });
11647
11648 // TSQL CONVERT date styles (Java format)
11649 let format_str = style_num.and_then(|n| match n {
11650 101 => Some("MM/dd/yyyy"),
11651 102 => Some("yyyy.MM.dd"),
11652 103 => Some("dd/MM/yyyy"),
11653 104 => Some("dd.MM.yyyy"),
11654 105 => Some("dd-MM-yyyy"),
11655 108 => Some("HH:mm:ss"),
11656 110 => Some("MM-dd-yyyy"),
11657 112 => Some("yyyyMMdd"),
11658 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
11659 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
11660 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
11661 _ => None,
11662 });
11663
11664 // Non-string, non-datetime types with style: just CAST, ignore the style
11665 if !is_string_type && !is_datetime_type {
11666 let cast_expr = if is_try {
11667 Expression::TryCast(Box::new(
11668 crate::expressions::Cast {
11669 this: value_expr,
11670 to: dt,
11671 trailing_comments: Vec::new(),
11672 double_colon_syntax: false,
11673 format: None,
11674 default: None,
11675 inferred_type: None,
11676 },
11677 ))
11678 } else {
11679 Expression::Cast(Box::new(
11680 crate::expressions::Cast {
11681 this: value_expr,
11682 to: dt,
11683 trailing_comments: Vec::new(),
11684 double_colon_syntax: false,
11685 format: None,
11686 default: None,
11687 inferred_type: None,
11688 },
11689 ))
11690 };
11691 return Ok(cast_expr);
11692 }
11693
11694 if let Some(java_fmt) = format_str {
11695 let c_fmt = java_fmt
11696 .replace("yyyy", "%Y")
11697 .replace("MM", "%m")
11698 .replace("dd", "%d")
11699 .replace("HH", "%H")
11700 .replace("mm", "%M")
11701 .replace("ss", "%S")
11702 .replace("SSSSSS", "%f")
11703 .replace("SSS", "%f")
11704 .replace("'T'", "T");
11705
11706 // For datetime target types: style is the INPUT format for parsing strings -> dates
11707 if is_datetime_type {
11708 match target {
11709 DialectType::DuckDB => {
11710 return Ok(Expression::Function(Box::new(
11711 Function::new(
11712 "STRPTIME".to_string(),
11713 vec![
11714 value_expr,
11715 Expression::string(&c_fmt),
11716 ],
11717 ),
11718 )));
11719 }
11720 DialectType::Spark
11721 | DialectType::Databricks => {
11722 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
11723 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
11724 let func_name =
11725 if matches!(dt, DataType::Date) {
11726 "TO_DATE"
11727 } else {
11728 "TO_TIMESTAMP"
11729 };
11730 return Ok(Expression::Function(Box::new(
11731 Function::new(
11732 func_name.to_string(),
11733 vec![
11734 value_expr,
11735 Expression::string(java_fmt),
11736 ],
11737 ),
11738 )));
11739 }
11740 DialectType::Hive => {
11741 return Ok(Expression::Function(Box::new(
11742 Function::new(
11743 "TO_TIMESTAMP".to_string(),
11744 vec![
11745 value_expr,
11746 Expression::string(java_fmt),
11747 ],
11748 ),
11749 )));
11750 }
11751 _ => {
11752 return Ok(Expression::Cast(Box::new(
11753 crate::expressions::Cast {
11754 this: value_expr,
11755 to: dt,
11756 trailing_comments: Vec::new(),
11757 double_colon_syntax: false,
11758 format: None,
11759 default: None,
11760 inferred_type: None,
11761 },
11762 )));
11763 }
11764 }
11765 }
11766
11767 // For string target types: style is the OUTPUT format for dates -> strings
11768 match target {
11769 DialectType::DuckDB => Ok(Expression::Function(
11770 Box::new(Function::new(
11771 "STRPTIME".to_string(),
11772 vec![
11773 value_expr,
11774 Expression::string(&c_fmt),
11775 ],
11776 )),
11777 )),
11778 DialectType::Spark | DialectType::Databricks => {
11779 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
11780 // Determine the target string type
11781 let string_dt = match &dt {
11782 DataType::VarChar {
11783 length: Some(l),
11784 ..
11785 } => DataType::VarChar {
11786 length: Some(*l),
11787 parenthesized_length: false,
11788 },
11789 DataType::Text => DataType::Custom {
11790 name: "STRING".to_string(),
11791 },
11792 _ => DataType::Custom {
11793 name: "STRING".to_string(),
11794 },
11795 };
11796 let date_format_expr = Expression::Function(
11797 Box::new(Function::new(
11798 "DATE_FORMAT".to_string(),
11799 vec![
11800 value_expr,
11801 Expression::string(java_fmt),
11802 ],
11803 )),
11804 );
11805 let cast_expr = if is_try {
11806 Expression::TryCast(Box::new(
11807 crate::expressions::Cast {
11808 this: date_format_expr,
11809 to: string_dt,
11810 trailing_comments: Vec::new(),
11811 double_colon_syntax: false,
11812 format: None,
11813 default: None,
11814 inferred_type: None,
11815 },
11816 ))
11817 } else {
11818 Expression::Cast(Box::new(
11819 crate::expressions::Cast {
11820 this: date_format_expr,
11821 to: string_dt,
11822 trailing_comments: Vec::new(),
11823 double_colon_syntax: false,
11824 format: None,
11825 default: None,
11826 inferred_type: None,
11827 },
11828 ))
11829 };
11830 Ok(cast_expr)
11831 }
11832 DialectType::MySQL | DialectType::SingleStore => {
11833 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
11834 let mysql_fmt = java_fmt
11835 .replace("yyyy", "%Y")
11836 .replace("MM", "%m")
11837 .replace("dd", "%d")
11838 .replace("HH:mm:ss.SSSSSS", "%T")
11839 .replace("HH:mm:ss", "%T")
11840 .replace("HH", "%H")
11841 .replace("mm", "%i")
11842 .replace("ss", "%S");
11843 let date_format_expr = Expression::Function(
11844 Box::new(Function::new(
11845 "DATE_FORMAT".to_string(),
11846 vec![
11847 value_expr,
11848 Expression::string(&mysql_fmt),
11849 ],
11850 )),
11851 );
11852 // MySQL uses CHAR for string casts
11853 let mysql_dt = match &dt {
11854 DataType::VarChar { length, .. } => {
11855 DataType::Char { length: *length }
11856 }
11857 _ => dt,
11858 };
11859 Ok(Expression::Cast(Box::new(
11860 crate::expressions::Cast {
11861 this: date_format_expr,
11862 to: mysql_dt,
11863 trailing_comments: Vec::new(),
11864 double_colon_syntax: false,
11865 format: None,
11866 default: None,
11867 inferred_type: None,
11868 },
11869 )))
11870 }
11871 DialectType::Hive => {
11872 let func_name = "TO_TIMESTAMP";
11873 Ok(Expression::Function(Box::new(
11874 Function::new(
11875 func_name.to_string(),
11876 vec![
11877 value_expr,
11878 Expression::string(java_fmt),
11879 ],
11880 ),
11881 )))
11882 }
11883 _ => Ok(Expression::Cast(Box::new(
11884 crate::expressions::Cast {
11885 this: value_expr,
11886 to: dt,
11887 trailing_comments: Vec::new(),
11888 double_colon_syntax: false,
11889 format: None,
11890 default: None,
11891 inferred_type: None,
11892 },
11893 ))),
11894 }
11895 } else {
11896 // Unknown style, just CAST
11897 let cast_expr = if is_try {
11898 Expression::TryCast(Box::new(
11899 crate::expressions::Cast {
11900 this: value_expr,
11901 to: dt,
11902 trailing_comments: Vec::new(),
11903 double_colon_syntax: false,
11904 format: None,
11905 default: None,
11906 inferred_type: None,
11907 },
11908 ))
11909 } else {
11910 Expression::Cast(Box::new(
11911 crate::expressions::Cast {
11912 this: value_expr,
11913 to: dt,
11914 trailing_comments: Vec::new(),
11915 double_colon_syntax: false,
11916 format: None,
11917 default: None,
11918 inferred_type: None,
11919 },
11920 ))
11921 };
11922 Ok(cast_expr)
11923 }
11924 } else {
11925 // No style - simple CAST
11926 let final_dt = if matches!(
11927 target,
11928 DialectType::MySQL | DialectType::SingleStore
11929 ) {
11930 match &dt {
11931 DataType::Int { .. }
11932 | DataType::BigInt { .. }
11933 | DataType::SmallInt { .. }
11934 | DataType::TinyInt { .. } => DataType::Custom {
11935 name: "SIGNED".to_string(),
11936 },
11937 DataType::VarChar { length, .. } => {
11938 DataType::Char { length: *length }
11939 }
11940 _ => dt,
11941 }
11942 } else {
11943 dt
11944 };
11945 let cast_expr = if is_try {
11946 Expression::TryCast(Box::new(
11947 crate::expressions::Cast {
11948 this: value_expr,
11949 to: final_dt,
11950 trailing_comments: Vec::new(),
11951 double_colon_syntax: false,
11952 format: None,
11953 default: None,
11954 inferred_type: None,
11955 },
11956 ))
11957 } else {
11958 Expression::Cast(Box::new(crate::expressions::Cast {
11959 this: value_expr,
11960 to: final_dt,
11961 trailing_comments: Vec::new(),
11962 double_colon_syntax: false,
11963 format: None,
11964 default: None,
11965 inferred_type: None,
11966 }))
11967 };
11968 Ok(cast_expr)
11969 }
11970 } else {
11971 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
11972 Ok(Expression::Function(f))
11973 }
11974 }
11975 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
11976 "STRFTIME" if f.args.len() == 2 => {
11977 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
11978 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
11979 // SQLite: args[0] = format, args[1] = value
11980 (f.args[1].clone(), &f.args[0])
11981 } else {
11982 // DuckDB and others: args[0] = value, args[1] = format
11983 (f.args[0].clone(), &f.args[1])
11984 };
11985
11986 // Helper to convert C-style format to Java-style
11987 fn c_to_java_format(fmt: &str) -> String {
11988 fmt.replace("%Y", "yyyy")
11989 .replace("%m", "MM")
11990 .replace("%d", "dd")
11991 .replace("%H", "HH")
11992 .replace("%M", "mm")
11993 .replace("%S", "ss")
11994 .replace("%f", "SSSSSS")
11995 .replace("%y", "yy")
11996 .replace("%-m", "M")
11997 .replace("%-d", "d")
11998 .replace("%-H", "H")
11999 .replace("%-I", "h")
12000 .replace("%I", "hh")
12001 .replace("%p", "a")
12002 .replace("%j", "DDD")
12003 .replace("%a", "EEE")
12004 .replace("%b", "MMM")
12005 .replace("%F", "yyyy-MM-dd")
12006 .replace("%T", "HH:mm:ss")
12007 }
12008
12009 // Helper: recursively convert format strings within expressions (handles CONCAT)
12010 fn convert_fmt_expr(
12011 expr: &Expression,
12012 converter: &dyn Fn(&str) -> String,
12013 ) -> Expression {
12014 match expr {
12015 Expression::Literal(
12016 crate::expressions::Literal::String(s),
12017 ) => Expression::string(&converter(s)),
12018 Expression::Function(func)
12019 if func.name.eq_ignore_ascii_case("CONCAT") =>
12020 {
12021 let new_args: Vec<Expression> = func
12022 .args
12023 .iter()
12024 .map(|a| convert_fmt_expr(a, converter))
12025 .collect();
12026 Expression::Function(Box::new(Function::new(
12027 "CONCAT".to_string(),
12028 new_args,
12029 )))
12030 }
12031 other => other.clone(),
12032 }
12033 }
12034
12035 match target {
12036 DialectType::DuckDB => {
12037 if matches!(source, DialectType::SQLite) {
12038 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
12039 let cast_val = Expression::Cast(Box::new(Cast {
12040 this: val,
12041 to: crate::expressions::DataType::Timestamp {
12042 precision: None,
12043 timezone: false,
12044 },
12045 trailing_comments: Vec::new(),
12046 double_colon_syntax: false,
12047 format: None,
12048 default: None,
12049 inferred_type: None,
12050 }));
12051 Ok(Expression::Function(Box::new(Function::new(
12052 "STRFTIME".to_string(),
12053 vec![cast_val, fmt_expr.clone()],
12054 ))))
12055 } else {
12056 Ok(Expression::Function(f))
12057 }
12058 }
12059 DialectType::Spark
12060 | DialectType::Databricks
12061 | DialectType::Hive => {
12062 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
12063 let converted_fmt =
12064 convert_fmt_expr(fmt_expr, &c_to_java_format);
12065 Ok(Expression::Function(Box::new(Function::new(
12066 "DATE_FORMAT".to_string(),
12067 vec![val, converted_fmt],
12068 ))))
12069 }
12070 DialectType::TSQL | DialectType::Fabric => {
12071 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
12072 let converted_fmt =
12073 convert_fmt_expr(fmt_expr, &c_to_java_format);
12074 Ok(Expression::Function(Box::new(Function::new(
12075 "FORMAT".to_string(),
12076 vec![val, converted_fmt],
12077 ))))
12078 }
12079 DialectType::Presto
12080 | DialectType::Trino
12081 | DialectType::Athena => {
12082 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
12083 if let Expression::Literal(
12084 crate::expressions::Literal::String(s),
12085 ) = fmt_expr
12086 {
12087 let presto_fmt = duckdb_to_presto_format(s);
12088 Ok(Expression::Function(Box::new(Function::new(
12089 "DATE_FORMAT".to_string(),
12090 vec![val, Expression::string(&presto_fmt)],
12091 ))))
12092 } else {
12093 Ok(Expression::Function(Box::new(Function::new(
12094 "DATE_FORMAT".to_string(),
12095 vec![val, fmt_expr.clone()],
12096 ))))
12097 }
12098 }
12099 DialectType::BigQuery => {
12100 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
12101 if let Expression::Literal(
12102 crate::expressions::Literal::String(s),
12103 ) = fmt_expr
12104 {
12105 let bq_fmt = duckdb_to_bigquery_format(s);
12106 Ok(Expression::Function(Box::new(Function::new(
12107 "FORMAT_DATE".to_string(),
12108 vec![Expression::string(&bq_fmt), val],
12109 ))))
12110 } else {
12111 Ok(Expression::Function(Box::new(Function::new(
12112 "FORMAT_DATE".to_string(),
12113 vec![fmt_expr.clone(), val],
12114 ))))
12115 }
12116 }
12117 DialectType::PostgreSQL | DialectType::Redshift => {
12118 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
12119 if let Expression::Literal(
12120 crate::expressions::Literal::String(s),
12121 ) = fmt_expr
12122 {
12123 let pg_fmt = s
12124 .replace("%Y", "YYYY")
12125 .replace("%m", "MM")
12126 .replace("%d", "DD")
12127 .replace("%H", "HH24")
12128 .replace("%M", "MI")
12129 .replace("%S", "SS")
12130 .replace("%y", "YY")
12131 .replace("%-m", "FMMM")
12132 .replace("%-d", "FMDD")
12133 .replace("%-H", "FMHH24")
12134 .replace("%-I", "FMHH12")
12135 .replace("%p", "AM")
12136 .replace("%F", "YYYY-MM-DD")
12137 .replace("%T", "HH24:MI:SS");
12138 Ok(Expression::Function(Box::new(Function::new(
12139 "TO_CHAR".to_string(),
12140 vec![val, Expression::string(&pg_fmt)],
12141 ))))
12142 } else {
12143 Ok(Expression::Function(Box::new(Function::new(
12144 "TO_CHAR".to_string(),
12145 vec![val, fmt_expr.clone()],
12146 ))))
12147 }
12148 }
12149 _ => Ok(Expression::Function(f)),
12150 }
12151 }
12152 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
12153 "STRPTIME" if f.args.len() == 2 => {
12154 let val = f.args[0].clone();
12155 let fmt_expr = &f.args[1];
12156
12157 fn c_to_java_format_parse(fmt: &str) -> String {
12158 fmt.replace("%Y", "yyyy")
12159 .replace("%m", "MM")
12160 .replace("%d", "dd")
12161 .replace("%H", "HH")
12162 .replace("%M", "mm")
12163 .replace("%S", "ss")
12164 .replace("%f", "SSSSSS")
12165 .replace("%y", "yy")
12166 .replace("%-m", "M")
12167 .replace("%-d", "d")
12168 .replace("%-H", "H")
12169 .replace("%-I", "h")
12170 .replace("%I", "hh")
12171 .replace("%p", "a")
12172 .replace("%F", "yyyy-MM-dd")
12173 .replace("%T", "HH:mm:ss")
12174 }
12175
12176 match target {
12177 DialectType::DuckDB => Ok(Expression::Function(f)),
12178 DialectType::Spark | DialectType::Databricks => {
12179 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
12180 if let Expression::Literal(
12181 crate::expressions::Literal::String(s),
12182 ) = fmt_expr
12183 {
12184 let java_fmt = c_to_java_format_parse(s);
12185 Ok(Expression::Function(Box::new(Function::new(
12186 "TO_TIMESTAMP".to_string(),
12187 vec![val, Expression::string(&java_fmt)],
12188 ))))
12189 } else {
12190 Ok(Expression::Function(Box::new(Function::new(
12191 "TO_TIMESTAMP".to_string(),
12192 vec![val, fmt_expr.clone()],
12193 ))))
12194 }
12195 }
12196 DialectType::Hive => {
12197 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
12198 if let Expression::Literal(
12199 crate::expressions::Literal::String(s),
12200 ) = fmt_expr
12201 {
12202 let java_fmt = c_to_java_format_parse(s);
12203 let unix_ts =
12204 Expression::Function(Box::new(Function::new(
12205 "UNIX_TIMESTAMP".to_string(),
12206 vec![val, Expression::string(&java_fmt)],
12207 )));
12208 let from_unix =
12209 Expression::Function(Box::new(Function::new(
12210 "FROM_UNIXTIME".to_string(),
12211 vec![unix_ts],
12212 )));
12213 Ok(Expression::Cast(Box::new(
12214 crate::expressions::Cast {
12215 this: from_unix,
12216 to: DataType::Timestamp {
12217 timezone: false,
12218 precision: None,
12219 },
12220 trailing_comments: Vec::new(),
12221 double_colon_syntax: false,
12222 format: None,
12223 default: None,
12224 inferred_type: None,
12225 },
12226 )))
12227 } else {
12228 Ok(Expression::Function(f))
12229 }
12230 }
12231 DialectType::Presto
12232 | DialectType::Trino
12233 | DialectType::Athena => {
12234 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
12235 if let Expression::Literal(
12236 crate::expressions::Literal::String(s),
12237 ) = fmt_expr
12238 {
12239 let presto_fmt = duckdb_to_presto_format(s);
12240 Ok(Expression::Function(Box::new(Function::new(
12241 "DATE_PARSE".to_string(),
12242 vec![val, Expression::string(&presto_fmt)],
12243 ))))
12244 } else {
12245 Ok(Expression::Function(Box::new(Function::new(
12246 "DATE_PARSE".to_string(),
12247 vec![val, fmt_expr.clone()],
12248 ))))
12249 }
12250 }
12251 DialectType::BigQuery => {
12252 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
12253 if let Expression::Literal(
12254 crate::expressions::Literal::String(s),
12255 ) = fmt_expr
12256 {
12257 let bq_fmt = duckdb_to_bigquery_format(s);
12258 Ok(Expression::Function(Box::new(Function::new(
12259 "PARSE_TIMESTAMP".to_string(),
12260 vec![Expression::string(&bq_fmt), val],
12261 ))))
12262 } else {
12263 Ok(Expression::Function(Box::new(Function::new(
12264 "PARSE_TIMESTAMP".to_string(),
12265 vec![fmt_expr.clone(), val],
12266 ))))
12267 }
12268 }
12269 _ => Ok(Expression::Function(f)),
12270 }
12271 }
12272 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
12273 "DATE_FORMAT"
12274 if f.args.len() >= 2
12275 && matches!(
12276 source,
12277 DialectType::Presto
12278 | DialectType::Trino
12279 | DialectType::Athena
12280 ) =>
12281 {
12282 let val = f.args[0].clone();
12283 let fmt_expr = &f.args[1];
12284
12285 match target {
12286 DialectType::Presto
12287 | DialectType::Trino
12288 | DialectType::Athena => {
12289 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
12290 if let Expression::Literal(
12291 crate::expressions::Literal::String(s),
12292 ) = fmt_expr
12293 {
12294 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12295 Ok(Expression::Function(Box::new(Function::new(
12296 "DATE_FORMAT".to_string(),
12297 vec![val, Expression::string(&normalized)],
12298 ))))
12299 } else {
12300 Ok(Expression::Function(f))
12301 }
12302 }
12303 DialectType::Hive
12304 | DialectType::Spark
12305 | DialectType::Databricks => {
12306 // Convert Presto C-style to Java-style format
12307 if let Expression::Literal(
12308 crate::expressions::Literal::String(s),
12309 ) = fmt_expr
12310 {
12311 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12312 Ok(Expression::Function(Box::new(Function::new(
12313 "DATE_FORMAT".to_string(),
12314 vec![val, Expression::string(&java_fmt)],
12315 ))))
12316 } else {
12317 Ok(Expression::Function(f))
12318 }
12319 }
12320 DialectType::DuckDB => {
12321 // Convert to STRFTIME(val, duckdb_fmt)
12322 if let Expression::Literal(
12323 crate::expressions::Literal::String(s),
12324 ) = fmt_expr
12325 {
12326 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12327 Ok(Expression::Function(Box::new(Function::new(
12328 "STRFTIME".to_string(),
12329 vec![val, Expression::string(&duckdb_fmt)],
12330 ))))
12331 } else {
12332 Ok(Expression::Function(Box::new(Function::new(
12333 "STRFTIME".to_string(),
12334 vec![val, fmt_expr.clone()],
12335 ))))
12336 }
12337 }
12338 DialectType::BigQuery => {
12339 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
12340 if let Expression::Literal(
12341 crate::expressions::Literal::String(s),
12342 ) = fmt_expr
12343 {
12344 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
12345 Ok(Expression::Function(Box::new(Function::new(
12346 "FORMAT_DATE".to_string(),
12347 vec![Expression::string(&bq_fmt), val],
12348 ))))
12349 } else {
12350 Ok(Expression::Function(Box::new(Function::new(
12351 "FORMAT_DATE".to_string(),
12352 vec![fmt_expr.clone(), val],
12353 ))))
12354 }
12355 }
12356 _ => Ok(Expression::Function(f)),
12357 }
12358 }
12359 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
12360 "DATE_PARSE"
12361 if f.args.len() >= 2
12362 && matches!(
12363 source,
12364 DialectType::Presto
12365 | DialectType::Trino
12366 | DialectType::Athena
12367 ) =>
12368 {
12369 let val = f.args[0].clone();
12370 let fmt_expr = &f.args[1];
12371
12372 match target {
12373 DialectType::Presto
12374 | DialectType::Trino
12375 | DialectType::Athena => {
12376 // Presto -> Presto: normalize format
12377 if let Expression::Literal(
12378 crate::expressions::Literal::String(s),
12379 ) = fmt_expr
12380 {
12381 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
12382 Ok(Expression::Function(Box::new(Function::new(
12383 "DATE_PARSE".to_string(),
12384 vec![val, Expression::string(&normalized)],
12385 ))))
12386 } else {
12387 Ok(Expression::Function(f))
12388 }
12389 }
12390 DialectType::Hive => {
12391 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
12392 if let Expression::Literal(
12393 crate::expressions::Literal::String(s),
12394 ) = fmt_expr
12395 {
12396 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
12397 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
12398 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12399 this: val,
12400 to: DataType::Timestamp { timezone: false, precision: None },
12401 trailing_comments: Vec::new(),
12402 double_colon_syntax: false,
12403 format: None,
12404 default: None,
12405 inferred_type: None,
12406 })))
12407 } else {
12408 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12409 Ok(Expression::Function(Box::new(Function::new(
12410 "TO_TIMESTAMP".to_string(),
12411 vec![val, Expression::string(&java_fmt)],
12412 ))))
12413 }
12414 } else {
12415 Ok(Expression::Function(f))
12416 }
12417 }
12418 DialectType::Spark | DialectType::Databricks => {
12419 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
12420 if let Expression::Literal(
12421 crate::expressions::Literal::String(s),
12422 ) = fmt_expr
12423 {
12424 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
12425 Ok(Expression::Function(Box::new(Function::new(
12426 "TO_TIMESTAMP".to_string(),
12427 vec![val, Expression::string(&java_fmt)],
12428 ))))
12429 } else {
12430 Ok(Expression::Function(f))
12431 }
12432 }
12433 DialectType::DuckDB => {
12434 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
12435 if let Expression::Literal(
12436 crate::expressions::Literal::String(s),
12437 ) = fmt_expr
12438 {
12439 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
12440 Ok(Expression::Function(Box::new(Function::new(
12441 "STRPTIME".to_string(),
12442 vec![val, Expression::string(&duckdb_fmt)],
12443 ))))
12444 } else {
12445 Ok(Expression::Function(Box::new(Function::new(
12446 "STRPTIME".to_string(),
12447 vec![val, fmt_expr.clone()],
12448 ))))
12449 }
12450 }
12451 _ => Ok(Expression::Function(f)),
12452 }
12453 }
12454 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
12455 "FROM_BASE64"
12456 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12457 {
12458 Ok(Expression::Function(Box::new(Function::new(
12459 "UNBASE64".to_string(),
12460 f.args,
12461 ))))
12462 }
12463 "TO_BASE64"
12464 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
12465 {
12466 Ok(Expression::Function(Box::new(Function::new(
12467 "BASE64".to_string(),
12468 f.args,
12469 ))))
12470 }
12471 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
12472 "FROM_UNIXTIME"
12473 if f.args.len() == 1
12474 && matches!(
12475 source,
12476 DialectType::Presto
12477 | DialectType::Trino
12478 | DialectType::Athena
12479 )
12480 && matches!(
12481 target,
12482 DialectType::Spark | DialectType::Databricks
12483 ) =>
12484 {
12485 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
12486 let from_unix = Expression::Function(Box::new(Function::new(
12487 "FROM_UNIXTIME".to_string(),
12488 f.args,
12489 )));
12490 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12491 this: from_unix,
12492 to: DataType::Timestamp {
12493 timezone: false,
12494 precision: None,
12495 },
12496 trailing_comments: Vec::new(),
12497 double_colon_syntax: false,
12498 format: None,
12499 default: None,
12500 inferred_type: None,
12501 })))
12502 }
12503 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
12504 "DATE_FORMAT"
12505 if f.args.len() >= 2
12506 && !matches!(
12507 target,
12508 DialectType::Hive
12509 | DialectType::Spark
12510 | DialectType::Databricks
12511 | DialectType::MySQL
12512 | DialectType::SingleStore
12513 ) =>
12514 {
12515 let val = f.args[0].clone();
12516 let fmt_expr = &f.args[1];
12517 let is_hive_source = matches!(
12518 source,
12519 DialectType::Hive
12520 | DialectType::Spark
12521 | DialectType::Databricks
12522 );
12523
12524 fn java_to_c_format(fmt: &str) -> String {
12525 // Replace Java patterns with C strftime patterns.
12526 // Uses multi-pass to handle patterns that conflict.
12527 // First pass: replace multi-char patterns (longer first)
12528 let result = fmt
12529 .replace("yyyy", "%Y")
12530 .replace("SSSSSS", "%f")
12531 .replace("EEEE", "%W")
12532 .replace("MM", "%m")
12533 .replace("dd", "%d")
12534 .replace("HH", "%H")
12535 .replace("mm", "%M")
12536 .replace("ss", "%S")
12537 .replace("yy", "%y");
12538 // Second pass: handle single-char timezone patterns
12539 // z -> %Z (timezone name), Z -> %z (timezone offset)
12540 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
12541 let mut out = String::new();
12542 let chars: Vec<char> = result.chars().collect();
12543 let mut i = 0;
12544 while i < chars.len() {
12545 if chars[i] == '%' && i + 1 < chars.len() {
12546 // Already a format specifier, skip both chars
12547 out.push(chars[i]);
12548 out.push(chars[i + 1]);
12549 i += 2;
12550 } else if chars[i] == 'z' {
12551 out.push_str("%Z");
12552 i += 1;
12553 } else if chars[i] == 'Z' {
12554 out.push_str("%z");
12555 i += 1;
12556 } else {
12557 out.push(chars[i]);
12558 i += 1;
12559 }
12560 }
12561 out
12562 }
12563
12564 fn java_to_presto_format(fmt: &str) -> String {
12565 // Presto uses %T for HH:MM:SS
12566 let c_fmt = java_to_c_format(fmt);
12567 c_fmt.replace("%H:%M:%S", "%T")
12568 }
12569
12570 fn java_to_bq_format(fmt: &str) -> String {
12571 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
12572 let c_fmt = java_to_c_format(fmt);
12573 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
12574 }
12575
12576 // For Hive source, CAST string literals to appropriate type
12577 let cast_val = if is_hive_source {
12578 match &val {
12579 Expression::Literal(
12580 crate::expressions::Literal::String(_),
12581 ) => {
12582 match target {
12583 DialectType::DuckDB
12584 | DialectType::Presto
12585 | DialectType::Trino
12586 | DialectType::Athena => {
12587 Self::ensure_cast_timestamp(val.clone())
12588 }
12589 DialectType::BigQuery => {
12590 // BigQuery: CAST(val AS DATETIME)
12591 Expression::Cast(Box::new(
12592 crate::expressions::Cast {
12593 this: val.clone(),
12594 to: DataType::Custom {
12595 name: "DATETIME".to_string(),
12596 },
12597 trailing_comments: vec![],
12598 double_colon_syntax: false,
12599 format: None,
12600 default: None,
12601 inferred_type: None,
12602 },
12603 ))
12604 }
12605 _ => val.clone(),
12606 }
12607 }
12608 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
12609 Expression::Cast(c)
12610 if matches!(c.to, DataType::Date)
12611 && matches!(
12612 target,
12613 DialectType::Presto
12614 | DialectType::Trino
12615 | DialectType::Athena
12616 ) =>
12617 {
12618 Expression::Cast(Box::new(crate::expressions::Cast {
12619 this: val.clone(),
12620 to: DataType::Timestamp {
12621 timezone: false,
12622 precision: None,
12623 },
12624 trailing_comments: vec![],
12625 double_colon_syntax: false,
12626 format: None,
12627 default: None,
12628 inferred_type: None,
12629 }))
12630 }
12631 Expression::Literal(crate::expressions::Literal::Date(
12632 _,
12633 )) if matches!(
12634 target,
12635 DialectType::Presto
12636 | DialectType::Trino
12637 | DialectType::Athena
12638 ) =>
12639 {
12640 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
12641 let cast_date = Self::date_literal_to_cast(val.clone());
12642 Expression::Cast(Box::new(crate::expressions::Cast {
12643 this: cast_date,
12644 to: DataType::Timestamp {
12645 timezone: false,
12646 precision: None,
12647 },
12648 trailing_comments: vec![],
12649 double_colon_syntax: false,
12650 format: None,
12651 default: None,
12652 inferred_type: None,
12653 }))
12654 }
12655 _ => val.clone(),
12656 }
12657 } else {
12658 val.clone()
12659 };
12660
12661 match target {
12662 DialectType::DuckDB => {
12663 if let Expression::Literal(
12664 crate::expressions::Literal::String(s),
12665 ) = fmt_expr
12666 {
12667 let c_fmt = if is_hive_source {
12668 java_to_c_format(s)
12669 } else {
12670 s.clone()
12671 };
12672 Ok(Expression::Function(Box::new(Function::new(
12673 "STRFTIME".to_string(),
12674 vec![cast_val, Expression::string(&c_fmt)],
12675 ))))
12676 } else {
12677 Ok(Expression::Function(Box::new(Function::new(
12678 "STRFTIME".to_string(),
12679 vec![cast_val, fmt_expr.clone()],
12680 ))))
12681 }
12682 }
12683 DialectType::Presto
12684 | DialectType::Trino
12685 | DialectType::Athena => {
12686 if is_hive_source {
12687 if let Expression::Literal(
12688 crate::expressions::Literal::String(s),
12689 ) = fmt_expr
12690 {
12691 let p_fmt = java_to_presto_format(s);
12692 Ok(Expression::Function(Box::new(Function::new(
12693 "DATE_FORMAT".to_string(),
12694 vec![cast_val, Expression::string(&p_fmt)],
12695 ))))
12696 } else {
12697 Ok(Expression::Function(Box::new(Function::new(
12698 "DATE_FORMAT".to_string(),
12699 vec![cast_val, fmt_expr.clone()],
12700 ))))
12701 }
12702 } else {
12703 Ok(Expression::Function(Box::new(Function::new(
12704 "DATE_FORMAT".to_string(),
12705 f.args,
12706 ))))
12707 }
12708 }
12709 DialectType::BigQuery => {
12710 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
12711 if let Expression::Literal(
12712 crate::expressions::Literal::String(s),
12713 ) = fmt_expr
12714 {
12715 let bq_fmt = if is_hive_source {
12716 java_to_bq_format(s)
12717 } else {
12718 java_to_c_format(s)
12719 };
12720 Ok(Expression::Function(Box::new(Function::new(
12721 "FORMAT_DATE".to_string(),
12722 vec![Expression::string(&bq_fmt), cast_val],
12723 ))))
12724 } else {
12725 Ok(Expression::Function(Box::new(Function::new(
12726 "FORMAT_DATE".to_string(),
12727 vec![fmt_expr.clone(), cast_val],
12728 ))))
12729 }
12730 }
12731 DialectType::PostgreSQL | DialectType::Redshift => {
12732 if let Expression::Literal(
12733 crate::expressions::Literal::String(s),
12734 ) = fmt_expr
12735 {
12736 let pg_fmt = s
12737 .replace("yyyy", "YYYY")
12738 .replace("MM", "MM")
12739 .replace("dd", "DD")
12740 .replace("HH", "HH24")
12741 .replace("mm", "MI")
12742 .replace("ss", "SS")
12743 .replace("yy", "YY");
12744 Ok(Expression::Function(Box::new(Function::new(
12745 "TO_CHAR".to_string(),
12746 vec![val, Expression::string(&pg_fmt)],
12747 ))))
12748 } else {
12749 Ok(Expression::Function(Box::new(Function::new(
12750 "TO_CHAR".to_string(),
12751 vec![val, fmt_expr.clone()],
12752 ))))
12753 }
12754 }
12755 _ => Ok(Expression::Function(f)),
12756 }
12757 }
12758 // DATEDIFF(unit, start, end) - 3-arg form
12759 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
12760 "DATEDIFF" if f.args.len() == 3 => {
12761 let mut args = f.args;
12762 // SQLite source: args = (date1, date2, unit_string)
12763 // Standard source: args = (unit, start, end)
12764 let (_arg0, arg1, arg2, unit_str) =
12765 if matches!(source, DialectType::SQLite) {
12766 let date1 = args.remove(0);
12767 let date2 = args.remove(0);
12768 let unit_expr = args.remove(0);
12769 let unit_s = Self::get_unit_str_static(&unit_expr);
12770
12771 // For SQLite target, generate JULIANDAY arithmetic directly
12772 if matches!(target, DialectType::SQLite) {
12773 let jd_first = Expression::Function(Box::new(
12774 Function::new("JULIANDAY".to_string(), vec![date1]),
12775 ));
12776 let jd_second = Expression::Function(Box::new(
12777 Function::new("JULIANDAY".to_string(), vec![date2]),
12778 ));
12779 let diff = Expression::Sub(Box::new(
12780 crate::expressions::BinaryOp::new(
12781 jd_first, jd_second,
12782 ),
12783 ));
12784 let paren_diff = Expression::Paren(Box::new(
12785 crate::expressions::Paren {
12786 this: diff,
12787 trailing_comments: Vec::new(),
12788 },
12789 ));
12790 let adjusted = match unit_s.as_str() {
12791 "HOUR" => Expression::Mul(Box::new(
12792 crate::expressions::BinaryOp::new(
12793 paren_diff,
12794 Expression::Literal(Literal::Number(
12795 "24.0".to_string(),
12796 )),
12797 ),
12798 )),
12799 "MINUTE" => Expression::Mul(Box::new(
12800 crate::expressions::BinaryOp::new(
12801 paren_diff,
12802 Expression::Literal(Literal::Number(
12803 "1440.0".to_string(),
12804 )),
12805 ),
12806 )),
12807 "SECOND" => Expression::Mul(Box::new(
12808 crate::expressions::BinaryOp::new(
12809 paren_diff,
12810 Expression::Literal(Literal::Number(
12811 "86400.0".to_string(),
12812 )),
12813 ),
12814 )),
12815 "MONTH" => Expression::Div(Box::new(
12816 crate::expressions::BinaryOp::new(
12817 paren_diff,
12818 Expression::Literal(Literal::Number(
12819 "30.0".to_string(),
12820 )),
12821 ),
12822 )),
12823 "YEAR" => Expression::Div(Box::new(
12824 crate::expressions::BinaryOp::new(
12825 paren_diff,
12826 Expression::Literal(Literal::Number(
12827 "365.0".to_string(),
12828 )),
12829 ),
12830 )),
12831 _ => paren_diff,
12832 };
12833 return Ok(Expression::Cast(Box::new(Cast {
12834 this: adjusted,
12835 to: DataType::Int {
12836 length: None,
12837 integer_spelling: true,
12838 },
12839 trailing_comments: vec![],
12840 double_colon_syntax: false,
12841 format: None,
12842 default: None,
12843 inferred_type: None,
12844 })));
12845 }
12846
12847 // For other targets, remap to standard (unit, start, end) form
12848 let unit_ident =
12849 Expression::Identifier(Identifier::new(&unit_s));
12850 (unit_ident, date1, date2, unit_s)
12851 } else {
12852 let arg0 = args.remove(0);
12853 let arg1 = args.remove(0);
12854 let arg2 = args.remove(0);
12855 let unit_s = Self::get_unit_str_static(&arg0);
12856 (arg0, arg1, arg2, unit_s)
12857 };
12858
12859 // For Hive/Spark source, string literal dates need to be cast
12860 // Note: Databricks is excluded - it handles string args like standard SQL
12861 let is_hive_spark =
12862 matches!(source, DialectType::Hive | DialectType::Spark);
12863
12864 match target {
12865 DialectType::Snowflake => {
12866 let unit =
12867 Expression::Identifier(Identifier::new(&unit_str));
12868 // Use ensure_to_date_preserved to add TO_DATE with a marker
12869 // that prevents the Snowflake TO_DATE handler from converting it to CAST
12870 let d1 = if is_hive_spark {
12871 Self::ensure_to_date_preserved(arg1)
12872 } else {
12873 arg1
12874 };
12875 let d2 = if is_hive_spark {
12876 Self::ensure_to_date_preserved(arg2)
12877 } else {
12878 arg2
12879 };
12880 Ok(Expression::Function(Box::new(Function::new(
12881 "DATEDIFF".to_string(),
12882 vec![unit, d1, d2],
12883 ))))
12884 }
12885 DialectType::Redshift => {
12886 let unit =
12887 Expression::Identifier(Identifier::new(&unit_str));
12888 let d1 = if is_hive_spark {
12889 Self::ensure_cast_date(arg1)
12890 } else {
12891 arg1
12892 };
12893 let d2 = if is_hive_spark {
12894 Self::ensure_cast_date(arg2)
12895 } else {
12896 arg2
12897 };
12898 Ok(Expression::Function(Box::new(Function::new(
12899 "DATEDIFF".to_string(),
12900 vec![unit, d1, d2],
12901 ))))
12902 }
12903 DialectType::TSQL => {
12904 let unit =
12905 Expression::Identifier(Identifier::new(&unit_str));
12906 Ok(Expression::Function(Box::new(Function::new(
12907 "DATEDIFF".to_string(),
12908 vec![unit, arg1, arg2],
12909 ))))
12910 }
12911 DialectType::DuckDB => {
12912 let is_redshift_tsql = matches!(
12913 source,
12914 DialectType::Redshift | DialectType::TSQL
12915 );
12916 if is_hive_spark {
12917 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
12918 let d1 = Self::ensure_cast_date(arg1);
12919 let d2 = Self::ensure_cast_date(arg2);
12920 Ok(Expression::Function(Box::new(Function::new(
12921 "DATE_DIFF".to_string(),
12922 vec![Expression::string(&unit_str), d1, d2],
12923 ))))
12924 } else if matches!(source, DialectType::Snowflake) {
12925 // For Snowflake source: special handling per unit
12926 match unit_str.as_str() {
12927 "NANOSECOND" => {
12928 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
12929 fn cast_to_timestamp_ns(
12930 expr: Expression,
12931 ) -> Expression
12932 {
12933 Expression::Cast(Box::new(Cast {
12934 this: expr,
12935 to: DataType::Custom {
12936 name: "TIMESTAMP_NS".to_string(),
12937 },
12938 trailing_comments: vec![],
12939 double_colon_syntax: false,
12940 format: None,
12941 default: None,
12942 inferred_type: None,
12943 }))
12944 }
12945 let epoch_end = Expression::Function(Box::new(
12946 Function::new(
12947 "EPOCH_NS".to_string(),
12948 vec![cast_to_timestamp_ns(arg2)],
12949 ),
12950 ));
12951 let epoch_start = Expression::Function(
12952 Box::new(Function::new(
12953 "EPOCH_NS".to_string(),
12954 vec![cast_to_timestamp_ns(arg1)],
12955 )),
12956 );
12957 Ok(Expression::Sub(Box::new(BinaryOp::new(
12958 epoch_end,
12959 epoch_start,
12960 ))))
12961 }
12962 "WEEK" => {
12963 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
12964 let d1 = Self::force_cast_date(arg1);
12965 let d2 = Self::force_cast_date(arg2);
12966 let dt1 = Expression::Function(Box::new(
12967 Function::new(
12968 "DATE_TRUNC".to_string(),
12969 vec![Expression::string("WEEK"), d1],
12970 ),
12971 ));
12972 let dt2 = Expression::Function(Box::new(
12973 Function::new(
12974 "DATE_TRUNC".to_string(),
12975 vec![Expression::string("WEEK"), d2],
12976 ),
12977 ));
12978 Ok(Expression::Function(Box::new(
12979 Function::new(
12980 "DATE_DIFF".to_string(),
12981 vec![
12982 Expression::string(&unit_str),
12983 dt1,
12984 dt2,
12985 ],
12986 ),
12987 )))
12988 }
12989 _ => {
12990 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
12991 let d1 = Self::force_cast_date(arg1);
12992 let d2 = Self::force_cast_date(arg2);
12993 Ok(Expression::Function(Box::new(
12994 Function::new(
12995 "DATE_DIFF".to_string(),
12996 vec![
12997 Expression::string(&unit_str),
12998 d1,
12999 d2,
13000 ],
13001 ),
13002 )))
13003 }
13004 }
13005 } else if is_redshift_tsql {
13006 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
13007 let d1 = Self::force_cast_timestamp(arg1);
13008 let d2 = Self::force_cast_timestamp(arg2);
13009 Ok(Expression::Function(Box::new(Function::new(
13010 "DATE_DIFF".to_string(),
13011 vec![Expression::string(&unit_str), d1, d2],
13012 ))))
13013 } else {
13014 // Keep as DATEDIFF so DuckDB's transform_datediff handles
13015 // DATE_TRUNC for WEEK, CAST for string literals, etc.
13016 let unit =
13017 Expression::Identifier(Identifier::new(&unit_str));
13018 Ok(Expression::Function(Box::new(Function::new(
13019 "DATEDIFF".to_string(),
13020 vec![unit, arg1, arg2],
13021 ))))
13022 }
13023 }
13024 DialectType::BigQuery => {
13025 let is_redshift_tsql = matches!(
13026 source,
13027 DialectType::Redshift
13028 | DialectType::TSQL
13029 | DialectType::Snowflake
13030 );
13031 let cast_d1 = if is_hive_spark {
13032 Self::ensure_cast_date(arg1)
13033 } else if is_redshift_tsql {
13034 Self::force_cast_datetime(arg1)
13035 } else {
13036 Self::ensure_cast_datetime(arg1)
13037 };
13038 let cast_d2 = if is_hive_spark {
13039 Self::ensure_cast_date(arg2)
13040 } else if is_redshift_tsql {
13041 Self::force_cast_datetime(arg2)
13042 } else {
13043 Self::ensure_cast_datetime(arg2)
13044 };
13045 let unit =
13046 Expression::Identifier(Identifier::new(&unit_str));
13047 Ok(Expression::Function(Box::new(Function::new(
13048 "DATE_DIFF".to_string(),
13049 vec![cast_d2, cast_d1, unit],
13050 ))))
13051 }
13052 DialectType::Presto
13053 | DialectType::Trino
13054 | DialectType::Athena => {
13055 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
13056 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
13057 let is_redshift_tsql = matches!(
13058 source,
13059 DialectType::Redshift
13060 | DialectType::TSQL
13061 | DialectType::Snowflake
13062 );
13063 let d1 = if is_hive_spark {
13064 Self::double_cast_timestamp_date(arg1)
13065 } else if is_redshift_tsql {
13066 Self::force_cast_timestamp(arg1)
13067 } else {
13068 arg1
13069 };
13070 let d2 = if is_hive_spark {
13071 Self::double_cast_timestamp_date(arg2)
13072 } else if is_redshift_tsql {
13073 Self::force_cast_timestamp(arg2)
13074 } else {
13075 arg2
13076 };
13077 Ok(Expression::Function(Box::new(Function::new(
13078 "DATE_DIFF".to_string(),
13079 vec![Expression::string(&unit_str), d1, d2],
13080 ))))
13081 }
13082 DialectType::Hive => match unit_str.as_str() {
13083 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
13084 this: Expression::Function(Box::new(Function::new(
13085 "MONTHS_BETWEEN".to_string(),
13086 vec![arg2, arg1],
13087 ))),
13088 to: DataType::Int {
13089 length: None,
13090 integer_spelling: false,
13091 },
13092 trailing_comments: vec![],
13093 double_colon_syntax: false,
13094 format: None,
13095 default: None,
13096 inferred_type: None,
13097 }))),
13098 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
13099 this: Expression::Div(Box::new(
13100 crate::expressions::BinaryOp::new(
13101 Expression::Function(Box::new(Function::new(
13102 "DATEDIFF".to_string(),
13103 vec![arg2, arg1],
13104 ))),
13105 Expression::number(7),
13106 ),
13107 )),
13108 to: DataType::Int {
13109 length: None,
13110 integer_spelling: false,
13111 },
13112 trailing_comments: vec![],
13113 double_colon_syntax: false,
13114 format: None,
13115 default: None,
13116 inferred_type: None,
13117 }))),
13118 _ => Ok(Expression::Function(Box::new(Function::new(
13119 "DATEDIFF".to_string(),
13120 vec![arg2, arg1],
13121 )))),
13122 },
13123 DialectType::Spark | DialectType::Databricks => {
13124 let unit =
13125 Expression::Identifier(Identifier::new(&unit_str));
13126 Ok(Expression::Function(Box::new(Function::new(
13127 "DATEDIFF".to_string(),
13128 vec![unit, arg1, arg2],
13129 ))))
13130 }
13131 _ => {
13132 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
13133 let d1 = if is_hive_spark {
13134 Self::ensure_cast_date(arg1)
13135 } else {
13136 arg1
13137 };
13138 let d2 = if is_hive_spark {
13139 Self::ensure_cast_date(arg2)
13140 } else {
13141 arg2
13142 };
13143 let unit =
13144 Expression::Identifier(Identifier::new(&unit_str));
13145 Ok(Expression::Function(Box::new(Function::new(
13146 "DATEDIFF".to_string(),
13147 vec![unit, d1, d2],
13148 ))))
13149 }
13150 }
13151 }
13152 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
13153 "DATEDIFF" if f.args.len() == 2 => {
13154 let mut args = f.args;
13155 let arg0 = args.remove(0);
13156 let arg1 = args.remove(0);
13157
13158 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
13159 // Also recognizes TryCast/Cast to DATE that may have been produced by
13160 // cross-dialect TO_DATE -> TRY_CAST conversion
13161 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
13162 if let Expression::Function(ref f) = e {
13163 if f.name.eq_ignore_ascii_case("TO_DATE")
13164 && f.args.len() == 1
13165 {
13166 return (f.args[0].clone(), true);
13167 }
13168 }
13169 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
13170 if let Expression::TryCast(ref c) = e {
13171 if matches!(c.to, DataType::Date) {
13172 return (e, true); // Already properly cast, return as-is
13173 }
13174 }
13175 (e, false)
13176 };
13177
13178 match target {
13179 DialectType::DuckDB => {
13180 // For Hive source, always CAST to DATE
13181 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
13182 let cast_d0 = if matches!(
13183 source,
13184 DialectType::Hive
13185 | DialectType::Spark
13186 | DialectType::Databricks
13187 ) {
13188 let (inner, was_to_date) = unwrap_to_date(arg1);
13189 if was_to_date {
13190 // Already a date expression, use directly
13191 if matches!(&inner, Expression::TryCast(_)) {
13192 inner // Already TRY_CAST(x AS DATE)
13193 } else {
13194 Self::try_cast_date(inner)
13195 }
13196 } else {
13197 Self::force_cast_date(inner)
13198 }
13199 } else {
13200 Self::ensure_cast_date(arg1)
13201 };
13202 let cast_d1 = if matches!(
13203 source,
13204 DialectType::Hive
13205 | DialectType::Spark
13206 | DialectType::Databricks
13207 ) {
13208 let (inner, was_to_date) = unwrap_to_date(arg0);
13209 if was_to_date {
13210 if matches!(&inner, Expression::TryCast(_)) {
13211 inner
13212 } else {
13213 Self::try_cast_date(inner)
13214 }
13215 } else {
13216 Self::force_cast_date(inner)
13217 }
13218 } else {
13219 Self::ensure_cast_date(arg0)
13220 };
13221 Ok(Expression::Function(Box::new(Function::new(
13222 "DATE_DIFF".to_string(),
13223 vec![Expression::string("DAY"), cast_d0, cast_d1],
13224 ))))
13225 }
13226 DialectType::Presto
13227 | DialectType::Trino
13228 | DialectType::Athena => {
13229 // For Hive/Spark source, apply double_cast_timestamp_date
13230 // For other sources (MySQL etc.), just swap args without casting
13231 if matches!(
13232 source,
13233 DialectType::Hive
13234 | DialectType::Spark
13235 | DialectType::Databricks
13236 ) {
13237 let cast_fn = |e: Expression| -> Expression {
13238 let (inner, was_to_date) = unwrap_to_date(e);
13239 if was_to_date {
13240 let first_cast =
13241 Self::double_cast_timestamp_date(inner);
13242 Self::double_cast_timestamp_date(first_cast)
13243 } else {
13244 Self::double_cast_timestamp_date(inner)
13245 }
13246 };
13247 Ok(Expression::Function(Box::new(Function::new(
13248 "DATE_DIFF".to_string(),
13249 vec![
13250 Expression::string("DAY"),
13251 cast_fn(arg1),
13252 cast_fn(arg0),
13253 ],
13254 ))))
13255 } else {
13256 Ok(Expression::Function(Box::new(Function::new(
13257 "DATE_DIFF".to_string(),
13258 vec![Expression::string("DAY"), arg1, arg0],
13259 ))))
13260 }
13261 }
13262 DialectType::Redshift => {
13263 let unit = Expression::Identifier(Identifier::new("DAY"));
13264 Ok(Expression::Function(Box::new(Function::new(
13265 "DATEDIFF".to_string(),
13266 vec![unit, arg1, arg0],
13267 ))))
13268 }
13269 _ => Ok(Expression::Function(Box::new(Function::new(
13270 "DATEDIFF".to_string(),
13271 vec![arg0, arg1],
13272 )))),
13273 }
13274 }
13275 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
13276 "DATE_DIFF" if f.args.len() == 3 => {
13277 let mut args = f.args;
13278 let arg0 = args.remove(0);
13279 let arg1 = args.remove(0);
13280 let arg2 = args.remove(0);
13281 let unit_str = Self::get_unit_str_static(&arg0);
13282
13283 match target {
13284 DialectType::DuckDB => {
13285 // DuckDB: DATE_DIFF('UNIT', start, end)
13286 Ok(Expression::Function(Box::new(Function::new(
13287 "DATE_DIFF".to_string(),
13288 vec![Expression::string(&unit_str), arg1, arg2],
13289 ))))
13290 }
13291 DialectType::Presto
13292 | DialectType::Trino
13293 | DialectType::Athena => {
13294 Ok(Expression::Function(Box::new(Function::new(
13295 "DATE_DIFF".to_string(),
13296 vec![Expression::string(&unit_str), arg1, arg2],
13297 ))))
13298 }
13299 DialectType::ClickHouse => {
13300 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
13301 let unit =
13302 Expression::Identifier(Identifier::new(&unit_str));
13303 Ok(Expression::Function(Box::new(Function::new(
13304 "DATE_DIFF".to_string(),
13305 vec![unit, arg1, arg2],
13306 ))))
13307 }
13308 DialectType::Snowflake | DialectType::Redshift => {
13309 let unit =
13310 Expression::Identifier(Identifier::new(&unit_str));
13311 Ok(Expression::Function(Box::new(Function::new(
13312 "DATEDIFF".to_string(),
13313 vec![unit, arg1, arg2],
13314 ))))
13315 }
13316 _ => {
13317 let unit =
13318 Expression::Identifier(Identifier::new(&unit_str));
13319 Ok(Expression::Function(Box::new(Function::new(
13320 "DATEDIFF".to_string(),
13321 vec![unit, arg1, arg2],
13322 ))))
13323 }
13324 }
13325 }
13326 // DATEADD(unit, val, date) - 3-arg form
13327 "DATEADD" if f.args.len() == 3 => {
13328 let mut args = f.args;
13329 let arg0 = args.remove(0);
13330 let arg1 = args.remove(0);
13331 let arg2 = args.remove(0);
13332 let unit_str = Self::get_unit_str_static(&arg0);
13333
13334 // Normalize TSQL unit abbreviations to standard names
13335 let unit_str = match unit_str.as_str() {
13336 "YY" | "YYYY" => "YEAR".to_string(),
13337 "QQ" | "Q" => "QUARTER".to_string(),
13338 "MM" | "M" => "MONTH".to_string(),
13339 "WK" | "WW" => "WEEK".to_string(),
13340 "DD" | "D" | "DY" => "DAY".to_string(),
13341 "HH" => "HOUR".to_string(),
13342 "MI" | "N" => "MINUTE".to_string(),
13343 "SS" | "S" => "SECOND".to_string(),
13344 "MS" => "MILLISECOND".to_string(),
13345 "MCS" | "US" => "MICROSECOND".to_string(),
13346 _ => unit_str,
13347 };
13348 match target {
13349 DialectType::Snowflake => {
13350 let unit =
13351 Expression::Identifier(Identifier::new(&unit_str));
13352 // Cast string literal to TIMESTAMP, but not for Snowflake source
13353 // (Snowflake natively accepts string literals in DATEADD)
13354 let arg2 = if matches!(
13355 &arg2,
13356 Expression::Literal(Literal::String(_))
13357 ) && !matches!(source, DialectType::Snowflake)
13358 {
13359 Expression::Cast(Box::new(Cast {
13360 this: arg2,
13361 to: DataType::Timestamp {
13362 precision: None,
13363 timezone: false,
13364 },
13365 trailing_comments: Vec::new(),
13366 double_colon_syntax: false,
13367 format: None,
13368 default: None,
13369 inferred_type: None,
13370 }))
13371 } else {
13372 arg2
13373 };
13374 Ok(Expression::Function(Box::new(Function::new(
13375 "DATEADD".to_string(),
13376 vec![unit, arg1, arg2],
13377 ))))
13378 }
13379 DialectType::TSQL => {
13380 let unit =
13381 Expression::Identifier(Identifier::new(&unit_str));
13382 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
13383 let arg2 = if matches!(
13384 &arg2,
13385 Expression::Literal(Literal::String(_))
13386 ) && !matches!(
13387 source,
13388 DialectType::Spark
13389 | DialectType::Databricks
13390 | DialectType::Hive
13391 ) {
13392 Expression::Cast(Box::new(Cast {
13393 this: arg2,
13394 to: DataType::Custom {
13395 name: "DATETIME2".to_string(),
13396 },
13397 trailing_comments: Vec::new(),
13398 double_colon_syntax: false,
13399 format: None,
13400 default: None,
13401 inferred_type: None,
13402 }))
13403 } else {
13404 arg2
13405 };
13406 Ok(Expression::Function(Box::new(Function::new(
13407 "DATEADD".to_string(),
13408 vec![unit, arg1, arg2],
13409 ))))
13410 }
13411 DialectType::Redshift => {
13412 let unit =
13413 Expression::Identifier(Identifier::new(&unit_str));
13414 Ok(Expression::Function(Box::new(Function::new(
13415 "DATEADD".to_string(),
13416 vec![unit, arg1, arg2],
13417 ))))
13418 }
13419 DialectType::Databricks => {
13420 let unit =
13421 Expression::Identifier(Identifier::new(&unit_str));
13422 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
13423 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
13424 let func_name = if matches!(
13425 source,
13426 DialectType::TSQL
13427 | DialectType::Fabric
13428 | DialectType::Databricks
13429 | DialectType::Snowflake
13430 ) {
13431 "DATEADD"
13432 } else {
13433 "DATE_ADD"
13434 };
13435 Ok(Expression::Function(Box::new(Function::new(
13436 func_name.to_string(),
13437 vec![unit, arg1, arg2],
13438 ))))
13439 }
13440 DialectType::DuckDB => {
13441 // Special handling for NANOSECOND from Snowflake
13442 if unit_str == "NANOSECOND"
13443 && matches!(source, DialectType::Snowflake)
13444 {
13445 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
13446 let cast_ts = Expression::Cast(Box::new(Cast {
13447 this: arg2,
13448 to: DataType::Custom {
13449 name: "TIMESTAMP_NS".to_string(),
13450 },
13451 trailing_comments: vec![],
13452 double_colon_syntax: false,
13453 format: None,
13454 default: None,
13455 inferred_type: None,
13456 }));
13457 let epoch_ns =
13458 Expression::Function(Box::new(Function::new(
13459 "EPOCH_NS".to_string(),
13460 vec![cast_ts],
13461 )));
13462 let sum = Expression::Add(Box::new(BinaryOp::new(
13463 epoch_ns, arg1,
13464 )));
13465 Ok(Expression::Function(Box::new(Function::new(
13466 "MAKE_TIMESTAMP_NS".to_string(),
13467 vec![sum],
13468 ))))
13469 } else {
13470 // DuckDB: convert to date + INTERVAL syntax with CAST
13471 let iu = Self::parse_interval_unit_static(&unit_str);
13472 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13473 this: Some(arg1),
13474 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
13475 }));
13476 // Cast string literal to TIMESTAMP
13477 let arg2 = if matches!(
13478 &arg2,
13479 Expression::Literal(Literal::String(_))
13480 ) {
13481 Expression::Cast(Box::new(Cast {
13482 this: arg2,
13483 to: DataType::Timestamp {
13484 precision: None,
13485 timezone: false,
13486 },
13487 trailing_comments: Vec::new(),
13488 double_colon_syntax: false,
13489 format: None,
13490 default: None,
13491 inferred_type: None,
13492 }))
13493 } else {
13494 arg2
13495 };
13496 Ok(Expression::Add(Box::new(
13497 crate::expressions::BinaryOp::new(arg2, interval),
13498 )))
13499 }
13500 }
13501 DialectType::Spark => {
13502 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
13503 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
13504 if matches!(source, DialectType::TSQL | DialectType::Fabric)
13505 {
13506 fn multiply_expr_spark(
13507 expr: Expression,
13508 factor: i64,
13509 ) -> Expression
13510 {
13511 if let Expression::Literal(
13512 crate::expressions::Literal::Number(n),
13513 ) = &expr
13514 {
13515 if let Ok(val) = n.parse::<i64>() {
13516 return Expression::Literal(
13517 crate::expressions::Literal::Number(
13518 (val * factor).to_string(),
13519 ),
13520 );
13521 }
13522 }
13523 Expression::Mul(Box::new(
13524 crate::expressions::BinaryOp::new(
13525 expr,
13526 Expression::Literal(
13527 crate::expressions::Literal::Number(
13528 factor.to_string(),
13529 ),
13530 ),
13531 ),
13532 ))
13533 }
13534 let normalized_unit = match unit_str.as_str() {
13535 "YEAR" | "YY" | "YYYY" => "YEAR",
13536 "QUARTER" | "QQ" | "Q" => "QUARTER",
13537 "MONTH" | "MM" | "M" => "MONTH",
13538 "WEEK" | "WK" | "WW" => "WEEK",
13539 "DAY" | "DD" | "D" | "DY" => "DAY",
13540 _ => &unit_str,
13541 };
13542 match normalized_unit {
13543 "YEAR" => {
13544 let months = multiply_expr_spark(arg1, 12);
13545 Ok(Expression::Function(Box::new(
13546 Function::new(
13547 "ADD_MONTHS".to_string(),
13548 vec![arg2, months],
13549 ),
13550 )))
13551 }
13552 "QUARTER" => {
13553 let months = multiply_expr_spark(arg1, 3);
13554 Ok(Expression::Function(Box::new(
13555 Function::new(
13556 "ADD_MONTHS".to_string(),
13557 vec![arg2, months],
13558 ),
13559 )))
13560 }
13561 "MONTH" => Ok(Expression::Function(Box::new(
13562 Function::new(
13563 "ADD_MONTHS".to_string(),
13564 vec![arg2, arg1],
13565 ),
13566 ))),
13567 "WEEK" => {
13568 let days = multiply_expr_spark(arg1, 7);
13569 Ok(Expression::Function(Box::new(
13570 Function::new(
13571 "DATE_ADD".to_string(),
13572 vec![arg2, days],
13573 ),
13574 )))
13575 }
13576 "DAY" => Ok(Expression::Function(Box::new(
13577 Function::new(
13578 "DATE_ADD".to_string(),
13579 vec![arg2, arg1],
13580 ),
13581 ))),
13582 _ => {
13583 let unit = Expression::Identifier(
13584 Identifier::new(&unit_str),
13585 );
13586 Ok(Expression::Function(Box::new(
13587 Function::new(
13588 "DATE_ADD".to_string(),
13589 vec![unit, arg1, arg2],
13590 ),
13591 )))
13592 }
13593 }
13594 } else {
13595 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
13596 let unit =
13597 Expression::Identifier(Identifier::new(&unit_str));
13598 Ok(Expression::Function(Box::new(Function::new(
13599 "DATE_ADD".to_string(),
13600 vec![unit, arg1, arg2],
13601 ))))
13602 }
13603 }
13604 DialectType::Hive => match unit_str.as_str() {
13605 "MONTH" => {
13606 Ok(Expression::Function(Box::new(Function::new(
13607 "ADD_MONTHS".to_string(),
13608 vec![arg2, arg1],
13609 ))))
13610 }
13611 _ => Ok(Expression::Function(Box::new(Function::new(
13612 "DATE_ADD".to_string(),
13613 vec![arg2, arg1],
13614 )))),
13615 },
13616 DialectType::Presto
13617 | DialectType::Trino
13618 | DialectType::Athena => {
13619 // Cast string literal date to TIMESTAMP
13620 let arg2 = if matches!(
13621 &arg2,
13622 Expression::Literal(Literal::String(_))
13623 ) {
13624 Expression::Cast(Box::new(Cast {
13625 this: arg2,
13626 to: DataType::Timestamp {
13627 precision: None,
13628 timezone: false,
13629 },
13630 trailing_comments: Vec::new(),
13631 double_colon_syntax: false,
13632 format: None,
13633 default: None,
13634 inferred_type: None,
13635 }))
13636 } else {
13637 arg2
13638 };
13639 Ok(Expression::Function(Box::new(Function::new(
13640 "DATE_ADD".to_string(),
13641 vec![Expression::string(&unit_str), arg1, arg2],
13642 ))))
13643 }
13644 DialectType::MySQL => {
13645 let iu = Self::parse_interval_unit_static(&unit_str);
13646 Ok(Expression::DateAdd(Box::new(
13647 crate::expressions::DateAddFunc {
13648 this: arg2,
13649 interval: arg1,
13650 unit: iu,
13651 },
13652 )))
13653 }
13654 DialectType::PostgreSQL => {
13655 // Cast string literal date to TIMESTAMP
13656 let arg2 = if matches!(
13657 &arg2,
13658 Expression::Literal(Literal::String(_))
13659 ) {
13660 Expression::Cast(Box::new(Cast {
13661 this: arg2,
13662 to: DataType::Timestamp {
13663 precision: None,
13664 timezone: false,
13665 },
13666 trailing_comments: Vec::new(),
13667 double_colon_syntax: false,
13668 format: None,
13669 default: None,
13670 inferred_type: None,
13671 }))
13672 } else {
13673 arg2
13674 };
13675 let interval = Expression::Interval(Box::new(
13676 crate::expressions::Interval {
13677 this: Some(Expression::string(&format!(
13678 "{} {}",
13679 Self::expr_to_string_static(&arg1),
13680 unit_str
13681 ))),
13682 unit: None,
13683 },
13684 ));
13685 Ok(Expression::Add(Box::new(
13686 crate::expressions::BinaryOp::new(arg2, interval),
13687 )))
13688 }
13689 DialectType::BigQuery => {
13690 let iu = Self::parse_interval_unit_static(&unit_str);
13691 let interval = Expression::Interval(Box::new(
13692 crate::expressions::Interval {
13693 this: Some(arg1),
13694 unit: Some(
13695 crate::expressions::IntervalUnitSpec::Simple {
13696 unit: iu,
13697 use_plural: false,
13698 },
13699 ),
13700 },
13701 ));
13702 // Non-TSQL sources: CAST string literal to DATETIME
13703 let arg2 = if !matches!(
13704 source,
13705 DialectType::TSQL | DialectType::Fabric
13706 ) && matches!(
13707 &arg2,
13708 Expression::Literal(Literal::String(_))
13709 ) {
13710 Expression::Cast(Box::new(Cast {
13711 this: arg2,
13712 to: DataType::Custom {
13713 name: "DATETIME".to_string(),
13714 },
13715 trailing_comments: Vec::new(),
13716 double_colon_syntax: false,
13717 format: None,
13718 default: None,
13719 inferred_type: None,
13720 }))
13721 } else {
13722 arg2
13723 };
13724 Ok(Expression::Function(Box::new(Function::new(
13725 "DATE_ADD".to_string(),
13726 vec![arg2, interval],
13727 ))))
13728 }
13729 _ => {
13730 let unit =
13731 Expression::Identifier(Identifier::new(&unit_str));
13732 Ok(Expression::Function(Box::new(Function::new(
13733 "DATEADD".to_string(),
13734 vec![unit, arg1, arg2],
13735 ))))
13736 }
13737 }
13738 }
13739 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
13740 // or (date, val, 'UNIT') from Generic canonical form
13741 "DATE_ADD" if f.args.len() == 3 => {
13742 let mut args = f.args;
13743 let arg0 = args.remove(0);
13744 let arg1 = args.remove(0);
13745 let arg2 = args.remove(0);
13746 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
13747 // where arg2 is a string literal matching a unit name
13748 let arg2_unit = match &arg2 {
13749 Expression::Literal(Literal::String(s)) => {
13750 let u = s.to_uppercase();
13751 if matches!(
13752 u.as_str(),
13753 "DAY"
13754 | "MONTH"
13755 | "YEAR"
13756 | "HOUR"
13757 | "MINUTE"
13758 | "SECOND"
13759 | "WEEK"
13760 | "QUARTER"
13761 | "MILLISECOND"
13762 | "MICROSECOND"
13763 ) {
13764 Some(u)
13765 } else {
13766 None
13767 }
13768 }
13769 _ => None,
13770 };
13771 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
13772 let (unit_str, val, date) = if let Some(u) = arg2_unit {
13773 (u, arg1, arg0)
13774 } else {
13775 (Self::get_unit_str_static(&arg0), arg1, arg2)
13776 };
13777 // Alias for backward compat with the rest of the match
13778 let arg1 = val;
13779 let arg2 = date;
13780
13781 match target {
13782 DialectType::Presto
13783 | DialectType::Trino
13784 | DialectType::Athena => {
13785 Ok(Expression::Function(Box::new(Function::new(
13786 "DATE_ADD".to_string(),
13787 vec![Expression::string(&unit_str), arg1, arg2],
13788 ))))
13789 }
13790 DialectType::DuckDB => {
13791 let iu = Self::parse_interval_unit_static(&unit_str);
13792 let interval = Expression::Interval(Box::new(
13793 crate::expressions::Interval {
13794 this: Some(arg1),
13795 unit: Some(
13796 crate::expressions::IntervalUnitSpec::Simple {
13797 unit: iu,
13798 use_plural: false,
13799 },
13800 ),
13801 },
13802 ));
13803 Ok(Expression::Add(Box::new(
13804 crate::expressions::BinaryOp::new(arg2, interval),
13805 )))
13806 }
13807 DialectType::PostgreSQL
13808 | DialectType::Materialize
13809 | DialectType::RisingWave => {
13810 // PostgreSQL: x + INTERVAL '1 DAY'
13811 let amount_str = Self::expr_to_string_static(&arg1);
13812 let interval = Expression::Interval(Box::new(
13813 crate::expressions::Interval {
13814 this: Some(Expression::string(&format!(
13815 "{} {}",
13816 amount_str, unit_str
13817 ))),
13818 unit: None,
13819 },
13820 ));
13821 Ok(Expression::Add(Box::new(
13822 crate::expressions::BinaryOp::new(arg2, interval),
13823 )))
13824 }
13825 DialectType::Snowflake
13826 | DialectType::TSQL
13827 | DialectType::Redshift => {
13828 let unit =
13829 Expression::Identifier(Identifier::new(&unit_str));
13830 Ok(Expression::Function(Box::new(Function::new(
13831 "DATEADD".to_string(),
13832 vec![unit, arg1, arg2],
13833 ))))
13834 }
13835 DialectType::BigQuery
13836 | DialectType::MySQL
13837 | DialectType::Doris
13838 | DialectType::StarRocks
13839 | DialectType::Drill => {
13840 // DATE_ADD(date, INTERVAL amount UNIT)
13841 let iu = Self::parse_interval_unit_static(&unit_str);
13842 let interval = Expression::Interval(Box::new(
13843 crate::expressions::Interval {
13844 this: Some(arg1),
13845 unit: Some(
13846 crate::expressions::IntervalUnitSpec::Simple {
13847 unit: iu,
13848 use_plural: false,
13849 },
13850 ),
13851 },
13852 ));
13853 Ok(Expression::Function(Box::new(Function::new(
13854 "DATE_ADD".to_string(),
13855 vec![arg2, interval],
13856 ))))
13857 }
13858 DialectType::SQLite => {
13859 // SQLite: DATE(x, '1 DAY')
13860 // Build the string '1 DAY' from amount and unit
13861 let amount_str = match &arg1 {
13862 Expression::Literal(Literal::Number(n)) => n.clone(),
13863 _ => "1".to_string(),
13864 };
13865 Ok(Expression::Function(Box::new(Function::new(
13866 "DATE".to_string(),
13867 vec![
13868 arg2,
13869 Expression::string(format!(
13870 "{} {}",
13871 amount_str, unit_str
13872 )),
13873 ],
13874 ))))
13875 }
13876 DialectType::Dremio => {
13877 // Dremio: DATE_ADD(date, amount) - drops unit
13878 Ok(Expression::Function(Box::new(Function::new(
13879 "DATE_ADD".to_string(),
13880 vec![arg2, arg1],
13881 ))))
13882 }
13883 DialectType::Spark => {
13884 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
13885 if unit_str == "DAY" {
13886 Ok(Expression::Function(Box::new(Function::new(
13887 "DATE_ADD".to_string(),
13888 vec![arg2, arg1],
13889 ))))
13890 } else {
13891 let unit =
13892 Expression::Identifier(Identifier::new(&unit_str));
13893 Ok(Expression::Function(Box::new(Function::new(
13894 "DATE_ADD".to_string(),
13895 vec![unit, arg1, arg2],
13896 ))))
13897 }
13898 }
13899 DialectType::Databricks => {
13900 let unit =
13901 Expression::Identifier(Identifier::new(&unit_str));
13902 Ok(Expression::Function(Box::new(Function::new(
13903 "DATE_ADD".to_string(),
13904 vec![unit, arg1, arg2],
13905 ))))
13906 }
13907 DialectType::Hive => {
13908 // Hive: DATE_ADD(date, val) for DAY
13909 Ok(Expression::Function(Box::new(Function::new(
13910 "DATE_ADD".to_string(),
13911 vec![arg2, arg1],
13912 ))))
13913 }
13914 _ => {
13915 let unit =
13916 Expression::Identifier(Identifier::new(&unit_str));
13917 Ok(Expression::Function(Box::new(Function::new(
13918 "DATE_ADD".to_string(),
13919 vec![unit, arg1, arg2],
13920 ))))
13921 }
13922 }
13923 }
13924 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
13925 "DATE_ADD"
13926 if f.args.len() == 2
13927 && matches!(
13928 source,
13929 DialectType::Hive
13930 | DialectType::Spark
13931 | DialectType::Databricks
13932 | DialectType::Generic
13933 ) =>
13934 {
13935 let mut args = f.args;
13936 let date = args.remove(0);
13937 let days = args.remove(0);
13938 match target {
13939 DialectType::Hive | DialectType::Spark => {
13940 // Keep as DATE_ADD(date, days) for Hive/Spark
13941 Ok(Expression::Function(Box::new(Function::new(
13942 "DATE_ADD".to_string(),
13943 vec![date, days],
13944 ))))
13945 }
13946 DialectType::Databricks => {
13947 // Databricks: DATEADD(DAY, days, date)
13948 Ok(Expression::Function(Box::new(Function::new(
13949 "DATEADD".to_string(),
13950 vec![
13951 Expression::Identifier(Identifier::new("DAY")),
13952 days,
13953 date,
13954 ],
13955 ))))
13956 }
13957 DialectType::DuckDB => {
13958 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
13959 let cast_date = Self::ensure_cast_date(date);
13960 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
13961 let interval_val = if matches!(
13962 days,
13963 Expression::Mul(_)
13964 | Expression::Sub(_)
13965 | Expression::Add(_)
13966 ) {
13967 Expression::Paren(Box::new(crate::expressions::Paren {
13968 this: days,
13969 trailing_comments: vec![],
13970 }))
13971 } else {
13972 days
13973 };
13974 let interval = Expression::Interval(Box::new(
13975 crate::expressions::Interval {
13976 this: Some(interval_val),
13977 unit: Some(
13978 crate::expressions::IntervalUnitSpec::Simple {
13979 unit: crate::expressions::IntervalUnit::Day,
13980 use_plural: false,
13981 },
13982 ),
13983 },
13984 ));
13985 Ok(Expression::Add(Box::new(
13986 crate::expressions::BinaryOp::new(cast_date, interval),
13987 )))
13988 }
13989 DialectType::Snowflake => {
13990 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
13991 let cast_date = if matches!(
13992 source,
13993 DialectType::Hive
13994 | DialectType::Spark
13995 | DialectType::Databricks
13996 ) {
13997 if matches!(
13998 date,
13999 Expression::Literal(Literal::String(_))
14000 ) {
14001 Self::double_cast_timestamp_date(date)
14002 } else {
14003 date
14004 }
14005 } else {
14006 date
14007 };
14008 Ok(Expression::Function(Box::new(Function::new(
14009 "DATEADD".to_string(),
14010 vec![
14011 Expression::Identifier(Identifier::new("DAY")),
14012 days,
14013 cast_date,
14014 ],
14015 ))))
14016 }
14017 DialectType::Redshift => {
14018 Ok(Expression::Function(Box::new(Function::new(
14019 "DATEADD".to_string(),
14020 vec![
14021 Expression::Identifier(Identifier::new("DAY")),
14022 days,
14023 date,
14024 ],
14025 ))))
14026 }
14027 DialectType::TSQL | DialectType::Fabric => {
14028 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
14029 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
14030 let cast_date = if matches!(
14031 source,
14032 DialectType::Hive | DialectType::Spark
14033 ) {
14034 if matches!(
14035 date,
14036 Expression::Literal(Literal::String(_))
14037 ) {
14038 Self::double_cast_datetime2_date(date)
14039 } else {
14040 date
14041 }
14042 } else {
14043 date
14044 };
14045 Ok(Expression::Function(Box::new(Function::new(
14046 "DATEADD".to_string(),
14047 vec![
14048 Expression::Identifier(Identifier::new("DAY")),
14049 days,
14050 cast_date,
14051 ],
14052 ))))
14053 }
14054 DialectType::Presto
14055 | DialectType::Trino
14056 | DialectType::Athena => {
14057 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
14058 let cast_date = if matches!(
14059 source,
14060 DialectType::Hive
14061 | DialectType::Spark
14062 | DialectType::Databricks
14063 ) {
14064 if matches!(
14065 date,
14066 Expression::Literal(Literal::String(_))
14067 ) {
14068 Self::double_cast_timestamp_date(date)
14069 } else {
14070 date
14071 }
14072 } else {
14073 date
14074 };
14075 Ok(Expression::Function(Box::new(Function::new(
14076 "DATE_ADD".to_string(),
14077 vec![Expression::string("DAY"), days, cast_date],
14078 ))))
14079 }
14080 DialectType::BigQuery => {
14081 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
14082 let cast_date = if matches!(
14083 source,
14084 DialectType::Hive
14085 | DialectType::Spark
14086 | DialectType::Databricks
14087 ) {
14088 Self::double_cast_datetime_date(date)
14089 } else {
14090 date
14091 };
14092 // Wrap complex expressions in Paren for interval
14093 let interval_val = if matches!(
14094 days,
14095 Expression::Mul(_)
14096 | Expression::Sub(_)
14097 | Expression::Add(_)
14098 ) {
14099 Expression::Paren(Box::new(crate::expressions::Paren {
14100 this: days,
14101 trailing_comments: vec![],
14102 }))
14103 } else {
14104 days
14105 };
14106 let interval = Expression::Interval(Box::new(
14107 crate::expressions::Interval {
14108 this: Some(interval_val),
14109 unit: Some(
14110 crate::expressions::IntervalUnitSpec::Simple {
14111 unit: crate::expressions::IntervalUnit::Day,
14112 use_plural: false,
14113 },
14114 ),
14115 },
14116 ));
14117 Ok(Expression::Function(Box::new(Function::new(
14118 "DATE_ADD".to_string(),
14119 vec![cast_date, interval],
14120 ))))
14121 }
14122 DialectType::MySQL => {
14123 let iu = crate::expressions::IntervalUnit::Day;
14124 Ok(Expression::DateAdd(Box::new(
14125 crate::expressions::DateAddFunc {
14126 this: date,
14127 interval: days,
14128 unit: iu,
14129 },
14130 )))
14131 }
14132 DialectType::PostgreSQL => {
14133 let interval = Expression::Interval(Box::new(
14134 crate::expressions::Interval {
14135 this: Some(Expression::string(&format!(
14136 "{} DAY",
14137 Self::expr_to_string_static(&days)
14138 ))),
14139 unit: None,
14140 },
14141 ));
14142 Ok(Expression::Add(Box::new(
14143 crate::expressions::BinaryOp::new(date, interval),
14144 )))
14145 }
14146 DialectType::Doris
14147 | DialectType::StarRocks
14148 | DialectType::Drill => {
14149 // DATE_ADD(date, INTERVAL days DAY)
14150 let interval = Expression::Interval(Box::new(
14151 crate::expressions::Interval {
14152 this: Some(days),
14153 unit: Some(
14154 crate::expressions::IntervalUnitSpec::Simple {
14155 unit: crate::expressions::IntervalUnit::Day,
14156 use_plural: false,
14157 },
14158 ),
14159 },
14160 ));
14161 Ok(Expression::Function(Box::new(Function::new(
14162 "DATE_ADD".to_string(),
14163 vec![date, interval],
14164 ))))
14165 }
14166 _ => Ok(Expression::Function(Box::new(Function::new(
14167 "DATE_ADD".to_string(),
14168 vec![date, days],
14169 )))),
14170 }
14171 }
14172 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
14173 "DATE_SUB"
14174 if f.args.len() == 2
14175 && matches!(
14176 source,
14177 DialectType::Hive
14178 | DialectType::Spark
14179 | DialectType::Databricks
14180 ) =>
14181 {
14182 let mut args = f.args;
14183 let date = args.remove(0);
14184 let days = args.remove(0);
14185 // Helper to create days * -1
14186 let make_neg_days = |d: Expression| -> Expression {
14187 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
14188 d,
14189 Expression::Literal(Literal::Number("-1".to_string())),
14190 )))
14191 };
14192 let is_string_literal =
14193 matches!(date, Expression::Literal(Literal::String(_)));
14194 match target {
14195 DialectType::Hive
14196 | DialectType::Spark
14197 | DialectType::Databricks => {
14198 // Keep as DATE_SUB(date, days) for Hive/Spark
14199 Ok(Expression::Function(Box::new(Function::new(
14200 "DATE_SUB".to_string(),
14201 vec![date, days],
14202 ))))
14203 }
14204 DialectType::DuckDB => {
14205 let cast_date = Self::ensure_cast_date(date);
14206 let neg = make_neg_days(days);
14207 let interval = Expression::Interval(Box::new(
14208 crate::expressions::Interval {
14209 this: Some(Expression::Paren(Box::new(
14210 crate::expressions::Paren {
14211 this: neg,
14212 trailing_comments: vec![],
14213 },
14214 ))),
14215 unit: Some(
14216 crate::expressions::IntervalUnitSpec::Simple {
14217 unit: crate::expressions::IntervalUnit::Day,
14218 use_plural: false,
14219 },
14220 ),
14221 },
14222 ));
14223 Ok(Expression::Add(Box::new(
14224 crate::expressions::BinaryOp::new(cast_date, interval),
14225 )))
14226 }
14227 DialectType::Snowflake => {
14228 let cast_date = if is_string_literal {
14229 Self::double_cast_timestamp_date(date)
14230 } else {
14231 date
14232 };
14233 let neg = make_neg_days(days);
14234 Ok(Expression::Function(Box::new(Function::new(
14235 "DATEADD".to_string(),
14236 vec![
14237 Expression::Identifier(Identifier::new("DAY")),
14238 neg,
14239 cast_date,
14240 ],
14241 ))))
14242 }
14243 DialectType::Redshift => {
14244 let neg = make_neg_days(days);
14245 Ok(Expression::Function(Box::new(Function::new(
14246 "DATEADD".to_string(),
14247 vec![
14248 Expression::Identifier(Identifier::new("DAY")),
14249 neg,
14250 date,
14251 ],
14252 ))))
14253 }
14254 DialectType::TSQL | DialectType::Fabric => {
14255 let cast_date = if is_string_literal {
14256 Self::double_cast_datetime2_date(date)
14257 } else {
14258 date
14259 };
14260 let neg = make_neg_days(days);
14261 Ok(Expression::Function(Box::new(Function::new(
14262 "DATEADD".to_string(),
14263 vec![
14264 Expression::Identifier(Identifier::new("DAY")),
14265 neg,
14266 cast_date,
14267 ],
14268 ))))
14269 }
14270 DialectType::Presto
14271 | DialectType::Trino
14272 | DialectType::Athena => {
14273 let cast_date = if is_string_literal {
14274 Self::double_cast_timestamp_date(date)
14275 } else {
14276 date
14277 };
14278 let neg = make_neg_days(days);
14279 Ok(Expression::Function(Box::new(Function::new(
14280 "DATE_ADD".to_string(),
14281 vec![Expression::string("DAY"), neg, cast_date],
14282 ))))
14283 }
14284 DialectType::BigQuery => {
14285 let cast_date = if is_string_literal {
14286 Self::double_cast_datetime_date(date)
14287 } else {
14288 date
14289 };
14290 let neg = make_neg_days(days);
14291 let interval = Expression::Interval(Box::new(
14292 crate::expressions::Interval {
14293 this: Some(Expression::Paren(Box::new(
14294 crate::expressions::Paren {
14295 this: neg,
14296 trailing_comments: vec![],
14297 },
14298 ))),
14299 unit: Some(
14300 crate::expressions::IntervalUnitSpec::Simple {
14301 unit: crate::expressions::IntervalUnit::Day,
14302 use_plural: false,
14303 },
14304 ),
14305 },
14306 ));
14307 Ok(Expression::Function(Box::new(Function::new(
14308 "DATE_ADD".to_string(),
14309 vec![cast_date, interval],
14310 ))))
14311 }
14312 _ => Ok(Expression::Function(Box::new(Function::new(
14313 "DATE_SUB".to_string(),
14314 vec![date, days],
14315 )))),
14316 }
14317 }
14318 // ADD_MONTHS(date, val) -> target-specific
14319 "ADD_MONTHS" if f.args.len() == 2 => {
14320 let mut args = f.args;
14321 let date = args.remove(0);
14322 let val = args.remove(0);
14323 match target {
14324 DialectType::TSQL => {
14325 let cast_date = Self::ensure_cast_datetime2(date);
14326 Ok(Expression::Function(Box::new(Function::new(
14327 "DATEADD".to_string(),
14328 vec![
14329 Expression::Identifier(Identifier::new("MONTH")),
14330 val,
14331 cast_date,
14332 ],
14333 ))))
14334 }
14335 DialectType::DuckDB => {
14336 let interval = Expression::Interval(Box::new(
14337 crate::expressions::Interval {
14338 this: Some(val),
14339 unit: Some(
14340 crate::expressions::IntervalUnitSpec::Simple {
14341 unit:
14342 crate::expressions::IntervalUnit::Month,
14343 use_plural: false,
14344 },
14345 ),
14346 },
14347 ));
14348 Ok(Expression::Add(Box::new(
14349 crate::expressions::BinaryOp::new(date, interval),
14350 )))
14351 }
14352 DialectType::Snowflake => {
14353 // Keep ADD_MONTHS when source is Snowflake
14354 if matches!(source, DialectType::Snowflake) {
14355 Ok(Expression::Function(Box::new(Function::new(
14356 "ADD_MONTHS".to_string(),
14357 vec![date, val],
14358 ))))
14359 } else {
14360 Ok(Expression::Function(Box::new(Function::new(
14361 "DATEADD".to_string(),
14362 vec![
14363 Expression::Identifier(Identifier::new(
14364 "MONTH",
14365 )),
14366 val,
14367 date,
14368 ],
14369 ))))
14370 }
14371 }
14372 DialectType::Redshift => {
14373 Ok(Expression::Function(Box::new(Function::new(
14374 "DATEADD".to_string(),
14375 vec![
14376 Expression::Identifier(Identifier::new("MONTH")),
14377 val,
14378 date,
14379 ],
14380 ))))
14381 }
14382 DialectType::Presto
14383 | DialectType::Trino
14384 | DialectType::Athena => {
14385 Ok(Expression::Function(Box::new(Function::new(
14386 "DATE_ADD".to_string(),
14387 vec![Expression::string("MONTH"), val, date],
14388 ))))
14389 }
14390 DialectType::BigQuery => {
14391 let interval = Expression::Interval(Box::new(
14392 crate::expressions::Interval {
14393 this: Some(val),
14394 unit: Some(
14395 crate::expressions::IntervalUnitSpec::Simple {
14396 unit:
14397 crate::expressions::IntervalUnit::Month,
14398 use_plural: false,
14399 },
14400 ),
14401 },
14402 ));
14403 Ok(Expression::Function(Box::new(Function::new(
14404 "DATE_ADD".to_string(),
14405 vec![date, interval],
14406 ))))
14407 }
14408 _ => Ok(Expression::Function(Box::new(Function::new(
14409 "ADD_MONTHS".to_string(),
14410 vec![date, val],
14411 )))),
14412 }
14413 }
14414 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
14415 "DATETRUNC" if f.args.len() == 2 => {
14416 let mut args = f.args;
14417 let arg0 = args.remove(0);
14418 let arg1 = args.remove(0);
14419 let unit_str = Self::get_unit_str_static(&arg0);
14420 match target {
14421 DialectType::TSQL | DialectType::Fabric => {
14422 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
14423 Ok(Expression::Function(Box::new(Function::new(
14424 "DATETRUNC".to_string(),
14425 vec![
14426 Expression::Identifier(Identifier::new(&unit_str)),
14427 arg1,
14428 ],
14429 ))))
14430 }
14431 DialectType::DuckDB => {
14432 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
14433 let date = Self::ensure_cast_timestamp(arg1);
14434 Ok(Expression::Function(Box::new(Function::new(
14435 "DATE_TRUNC".to_string(),
14436 vec![Expression::string(&unit_str), date],
14437 ))))
14438 }
14439 DialectType::ClickHouse => {
14440 // ClickHouse: dateTrunc('UNIT', expr)
14441 Ok(Expression::Function(Box::new(Function::new(
14442 "dateTrunc".to_string(),
14443 vec![Expression::string(&unit_str), arg1],
14444 ))))
14445 }
14446 _ => {
14447 // Standard: DATE_TRUNC('UNIT', expr)
14448 let unit = Expression::string(&unit_str);
14449 Ok(Expression::Function(Box::new(Function::new(
14450 "DATE_TRUNC".to_string(),
14451 vec![unit, arg1],
14452 ))))
14453 }
14454 }
14455 }
14456 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
14457 "GETDATE" if f.args.is_empty() => match target {
14458 DialectType::TSQL => Ok(Expression::Function(f)),
14459 DialectType::Redshift => Ok(Expression::Function(Box::new(
14460 Function::new("GETDATE".to_string(), vec![]),
14461 ))),
14462 _ => Ok(Expression::CurrentTimestamp(
14463 crate::expressions::CurrentTimestamp {
14464 precision: None,
14465 sysdate: false,
14466 },
14467 )),
14468 },
14469 // TO_HEX(x) / HEX(x) -> target-specific hex function
14470 "TO_HEX" | "HEX" if f.args.len() == 1 => {
14471 let name = match target {
14472 DialectType::Presto | DialectType::Trino => "TO_HEX",
14473 DialectType::Spark
14474 | DialectType::Databricks
14475 | DialectType::Hive => "HEX",
14476 DialectType::DuckDB
14477 | DialectType::PostgreSQL
14478 | DialectType::Redshift => "TO_HEX",
14479 _ => &f.name,
14480 };
14481 Ok(Expression::Function(Box::new(Function::new(
14482 name.to_string(),
14483 f.args,
14484 ))))
14485 }
14486 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
14487 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
14488 match target {
14489 DialectType::BigQuery => {
14490 // BigQuery: UNHEX(x) -> FROM_HEX(x)
14491 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
14492 // because BigQuery MD5 returns BYTES, not hex string
14493 let arg = &f.args[0];
14494 let wrapped_arg = match arg {
14495 Expression::Function(inner_f)
14496 if inner_f.name.to_uppercase() == "MD5"
14497 || inner_f.name.to_uppercase() == "SHA1"
14498 || inner_f.name.to_uppercase() == "SHA256"
14499 || inner_f.name.to_uppercase() == "SHA512" =>
14500 {
14501 // Wrap hash function in TO_HEX for BigQuery
14502 Expression::Function(Box::new(Function::new(
14503 "TO_HEX".to_string(),
14504 vec![arg.clone()],
14505 )))
14506 }
14507 _ => f.args.into_iter().next().unwrap(),
14508 };
14509 Ok(Expression::Function(Box::new(Function::new(
14510 "FROM_HEX".to_string(),
14511 vec![wrapped_arg],
14512 ))))
14513 }
14514 _ => {
14515 let name = match target {
14516 DialectType::Presto | DialectType::Trino => "FROM_HEX",
14517 DialectType::Spark
14518 | DialectType::Databricks
14519 | DialectType::Hive => "UNHEX",
14520 _ => &f.name,
14521 };
14522 Ok(Expression::Function(Box::new(Function::new(
14523 name.to_string(),
14524 f.args,
14525 ))))
14526 }
14527 }
14528 }
14529 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
14530 "TO_UTF8" if f.args.len() == 1 => match target {
14531 DialectType::Spark | DialectType::Databricks => {
14532 let mut args = f.args;
14533 args.push(Expression::string("utf-8"));
14534 Ok(Expression::Function(Box::new(Function::new(
14535 "ENCODE".to_string(),
14536 args,
14537 ))))
14538 }
14539 _ => Ok(Expression::Function(f)),
14540 },
14541 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
14542 "FROM_UTF8" if f.args.len() == 1 => match target {
14543 DialectType::Spark | DialectType::Databricks => {
14544 let mut args = f.args;
14545 args.push(Expression::string("utf-8"));
14546 Ok(Expression::Function(Box::new(Function::new(
14547 "DECODE".to_string(),
14548 args,
14549 ))))
14550 }
14551 _ => Ok(Expression::Function(f)),
14552 },
14553 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
14554 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
14555 let name = match target {
14556 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
14557 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
14558 DialectType::PostgreSQL | DialectType::Redshift => {
14559 "STARTS_WITH"
14560 }
14561 _ => &f.name,
14562 };
14563 Ok(Expression::Function(Box::new(Function::new(
14564 name.to_string(),
14565 f.args,
14566 ))))
14567 }
14568 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
14569 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
14570 let name = match target {
14571 DialectType::Presto
14572 | DialectType::Trino
14573 | DialectType::Athena => "APPROX_DISTINCT",
14574 _ => "APPROX_COUNT_DISTINCT",
14575 };
14576 Ok(Expression::Function(Box::new(Function::new(
14577 name.to_string(),
14578 f.args,
14579 ))))
14580 }
14581 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
14582 "JSON_EXTRACT"
14583 if f.args.len() == 2
14584 && !matches!(source, DialectType::BigQuery)
14585 && matches!(
14586 target,
14587 DialectType::Spark
14588 | DialectType::Databricks
14589 | DialectType::Hive
14590 ) =>
14591 {
14592 Ok(Expression::Function(Box::new(Function::new(
14593 "GET_JSON_OBJECT".to_string(),
14594 f.args,
14595 ))))
14596 }
14597 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
14598 "JSON_EXTRACT"
14599 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
14600 {
14601 let mut args = f.args;
14602 let path = args.remove(1);
14603 let this = args.remove(0);
14604 Ok(Expression::JsonExtract(Box::new(
14605 crate::expressions::JsonExtractFunc {
14606 this,
14607 path,
14608 returning: None,
14609 arrow_syntax: true,
14610 hash_arrow_syntax: false,
14611 wrapper_option: None,
14612 quotes_option: None,
14613 on_scalar_string: false,
14614 on_error: None,
14615 },
14616 )))
14617 }
14618 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
14619 "JSON_FORMAT" if f.args.len() == 1 => {
14620 match target {
14621 DialectType::Spark | DialectType::Databricks => {
14622 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
14623 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
14624 if matches!(
14625 source,
14626 DialectType::Presto
14627 | DialectType::Trino
14628 | DialectType::Athena
14629 ) {
14630 if let Some(Expression::ParseJson(pj)) = f.args.first()
14631 {
14632 if let Expression::Literal(Literal::String(s)) =
14633 &pj.this
14634 {
14635 let wrapped = Expression::Literal(
14636 Literal::String(format!("[{}]", s)),
14637 );
14638 let schema_of_json = Expression::Function(
14639 Box::new(Function::new(
14640 "SCHEMA_OF_JSON".to_string(),
14641 vec![wrapped.clone()],
14642 )),
14643 );
14644 let from_json = Expression::Function(Box::new(
14645 Function::new(
14646 "FROM_JSON".to_string(),
14647 vec![wrapped, schema_of_json],
14648 ),
14649 ));
14650 let to_json = Expression::Function(Box::new(
14651 Function::new(
14652 "TO_JSON".to_string(),
14653 vec![from_json],
14654 ),
14655 ));
14656 return Ok(Expression::Function(Box::new(
14657 Function::new(
14658 "REGEXP_EXTRACT".to_string(),
14659 vec![
14660 to_json,
14661 Expression::Literal(
14662 Literal::String(
14663 "^.(.*).$".to_string(),
14664 ),
14665 ),
14666 Expression::Literal(
14667 Literal::Number(
14668 "1".to_string(),
14669 ),
14670 ),
14671 ],
14672 ),
14673 )));
14674 }
14675 }
14676 }
14677
14678 // Strip inner CAST(... AS JSON) or TO_JSON() if present
14679 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
14680 let mut args = f.args;
14681 if let Some(Expression::Cast(ref c)) = args.first() {
14682 if matches!(&c.to, DataType::Json | DataType::JsonB) {
14683 args = vec![c.this.clone()];
14684 }
14685 } else if let Some(Expression::Function(ref inner_f)) =
14686 args.first()
14687 {
14688 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
14689 && inner_f.args.len() == 1
14690 {
14691 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
14692 args = inner_f.args.clone();
14693 }
14694 }
14695 Ok(Expression::Function(Box::new(Function::new(
14696 "TO_JSON".to_string(),
14697 args,
14698 ))))
14699 }
14700 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14701 Function::new("TO_JSON_STRING".to_string(), f.args),
14702 ))),
14703 DialectType::DuckDB => {
14704 // CAST(TO_JSON(x) AS TEXT)
14705 let to_json = Expression::Function(Box::new(
14706 Function::new("TO_JSON".to_string(), f.args),
14707 ));
14708 Ok(Expression::Cast(Box::new(Cast {
14709 this: to_json,
14710 to: DataType::Text,
14711 trailing_comments: Vec::new(),
14712 double_colon_syntax: false,
14713 format: None,
14714 default: None,
14715 inferred_type: None,
14716 })))
14717 }
14718 _ => Ok(Expression::Function(f)),
14719 }
14720 }
14721 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
14722 "SYSDATE" if f.args.is_empty() => {
14723 match target {
14724 DialectType::Oracle | DialectType::Redshift => {
14725 Ok(Expression::Function(f))
14726 }
14727 DialectType::Snowflake => {
14728 // Snowflake uses SYSDATE() with parens
14729 let mut f = *f;
14730 f.no_parens = false;
14731 Ok(Expression::Function(Box::new(f)))
14732 }
14733 DialectType::DuckDB => {
14734 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
14735 Ok(Expression::AtTimeZone(Box::new(
14736 crate::expressions::AtTimeZone {
14737 this: Expression::CurrentTimestamp(
14738 crate::expressions::CurrentTimestamp {
14739 precision: None,
14740 sysdate: false,
14741 },
14742 ),
14743 zone: Expression::Literal(Literal::String(
14744 "UTC".to_string(),
14745 )),
14746 },
14747 )))
14748 }
14749 _ => Ok(Expression::CurrentTimestamp(
14750 crate::expressions::CurrentTimestamp {
14751 precision: None,
14752 sysdate: true,
14753 },
14754 )),
14755 }
14756 }
14757 // LOGICAL_OR(x) -> BOOL_OR(x)
14758 "LOGICAL_OR" if f.args.len() == 1 => {
14759 let name = match target {
14760 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
14761 _ => &f.name,
14762 };
14763 Ok(Expression::Function(Box::new(Function::new(
14764 name.to_string(),
14765 f.args,
14766 ))))
14767 }
14768 // LOGICAL_AND(x) -> BOOL_AND(x)
14769 "LOGICAL_AND" if f.args.len() == 1 => {
14770 let name = match target {
14771 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
14772 _ => &f.name,
14773 };
14774 Ok(Expression::Function(Box::new(Function::new(
14775 name.to_string(),
14776 f.args,
14777 ))))
14778 }
14779 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
14780 "MONTHS_ADD" if f.args.len() == 2 => match target {
14781 DialectType::Oracle => Ok(Expression::Function(Box::new(
14782 Function::new("ADD_MONTHS".to_string(), f.args),
14783 ))),
14784 _ => Ok(Expression::Function(f)),
14785 },
14786 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
14787 "ARRAY_JOIN" if f.args.len() >= 2 => {
14788 match target {
14789 DialectType::Spark | DialectType::Databricks => {
14790 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
14791 Ok(Expression::Function(f))
14792 }
14793 DialectType::Hive => {
14794 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
14795 let mut args = f.args;
14796 let arr = args.remove(0);
14797 let sep = args.remove(0);
14798 // Drop any remaining args (null_replacement)
14799 Ok(Expression::Function(Box::new(Function::new(
14800 "CONCAT_WS".to_string(),
14801 vec![sep, arr],
14802 ))))
14803 }
14804 DialectType::Presto | DialectType::Trino => {
14805 Ok(Expression::Function(f))
14806 }
14807 _ => Ok(Expression::Function(f)),
14808 }
14809 }
14810 // LOCATE(substr, str, pos) 3-arg -> target-specific
14811 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
14812 "LOCATE"
14813 if f.args.len() == 3
14814 && matches!(
14815 target,
14816 DialectType::Presto
14817 | DialectType::Trino
14818 | DialectType::Athena
14819 | DialectType::DuckDB
14820 ) =>
14821 {
14822 let mut args = f.args;
14823 let substr = args.remove(0);
14824 let string = args.remove(0);
14825 let pos = args.remove(0);
14826 // STRPOS(SUBSTRING(string, pos), substr)
14827 let substring_call = Expression::Function(Box::new(Function::new(
14828 "SUBSTRING".to_string(),
14829 vec![string.clone(), pos.clone()],
14830 )));
14831 let strpos_call = Expression::Function(Box::new(Function::new(
14832 "STRPOS".to_string(),
14833 vec![substring_call, substr.clone()],
14834 )));
14835 // STRPOS(...) + pos - 1
14836 let pos_adjusted =
14837 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
14838 Expression::Add(Box::new(
14839 crate::expressions::BinaryOp::new(
14840 strpos_call.clone(),
14841 pos.clone(),
14842 ),
14843 )),
14844 Expression::number(1),
14845 )));
14846 // STRPOS(...) = 0
14847 let is_zero =
14848 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
14849 strpos_call.clone(),
14850 Expression::number(0),
14851 )));
14852
14853 match target {
14854 DialectType::Presto
14855 | DialectType::Trino
14856 | DialectType::Athena => {
14857 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
14858 Ok(Expression::Function(Box::new(Function::new(
14859 "IF".to_string(),
14860 vec![is_zero, Expression::number(0), pos_adjusted],
14861 ))))
14862 }
14863 DialectType::DuckDB => {
14864 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
14865 Ok(Expression::Case(Box::new(crate::expressions::Case {
14866 operand: None,
14867 whens: vec![(is_zero, Expression::number(0))],
14868 else_: Some(pos_adjusted),
14869 comments: Vec::new(),
14870 inferred_type: None,
14871 })))
14872 }
14873 _ => Ok(Expression::Function(Box::new(Function::new(
14874 "LOCATE".to_string(),
14875 vec![substr, string, pos],
14876 )))),
14877 }
14878 }
14879 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
14880 "STRPOS"
14881 if f.args.len() == 3
14882 && matches!(
14883 target,
14884 DialectType::BigQuery
14885 | DialectType::Oracle
14886 | DialectType::Teradata
14887 ) =>
14888 {
14889 let mut args = f.args;
14890 let haystack = args.remove(0);
14891 let needle = args.remove(0);
14892 let occurrence = args.remove(0);
14893 Ok(Expression::Function(Box::new(Function::new(
14894 "INSTR".to_string(),
14895 vec![haystack, needle, Expression::number(1), occurrence],
14896 ))))
14897 }
14898 // SCHEMA_NAME(id) -> target-specific
14899 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
14900 DialectType::MySQL | DialectType::SingleStore => {
14901 Ok(Expression::Function(Box::new(Function::new(
14902 "SCHEMA".to_string(),
14903 vec![],
14904 ))))
14905 }
14906 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
14907 crate::expressions::CurrentSchema { this: None },
14908 ))),
14909 DialectType::SQLite => Ok(Expression::string("main")),
14910 _ => Ok(Expression::Function(f)),
14911 },
14912 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
14913 "STRTOL" if f.args.len() == 2 => match target {
14914 DialectType::Presto | DialectType::Trino => {
14915 Ok(Expression::Function(Box::new(Function::new(
14916 "FROM_BASE".to_string(),
14917 f.args,
14918 ))))
14919 }
14920 _ => Ok(Expression::Function(f)),
14921 },
14922 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
14923 "EDITDIST3" if f.args.len() == 2 => match target {
14924 DialectType::Spark | DialectType::Databricks => {
14925 Ok(Expression::Function(Box::new(Function::new(
14926 "LEVENSHTEIN".to_string(),
14927 f.args,
14928 ))))
14929 }
14930 _ => Ok(Expression::Function(f)),
14931 },
14932 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
14933 "FORMAT"
14934 if f.args.len() == 2
14935 && matches!(
14936 source,
14937 DialectType::MySQL | DialectType::SingleStore
14938 )
14939 && matches!(target, DialectType::DuckDB) =>
14940 {
14941 let mut args = f.args;
14942 let num_expr = args.remove(0);
14943 let decimals_expr = args.remove(0);
14944 // Extract decimal count
14945 let dec_count = match &decimals_expr {
14946 Expression::Literal(Literal::Number(n)) => n.clone(),
14947 _ => "0".to_string(),
14948 };
14949 let fmt_str = format!("{{:,.{}f}}", dec_count);
14950 Ok(Expression::Function(Box::new(Function::new(
14951 "FORMAT".to_string(),
14952 vec![Expression::string(&fmt_str), num_expr],
14953 ))))
14954 }
14955 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
14956 "FORMAT"
14957 if f.args.len() == 2
14958 && matches!(
14959 source,
14960 DialectType::TSQL | DialectType::Fabric
14961 ) =>
14962 {
14963 let val_expr = f.args[0].clone();
14964 let fmt_expr = f.args[1].clone();
14965 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
14966 // Only expand shortcodes that are NOT also valid numeric format specifiers.
14967 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
14968 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
14969 let (expanded_fmt, is_shortcode) = match &fmt_expr {
14970 Expression::Literal(crate::expressions::Literal::String(s)) => {
14971 match s.as_str() {
14972 "m" | "M" => (Expression::string("MMMM d"), true),
14973 "t" => (Expression::string("h:mm tt"), true),
14974 "T" => (Expression::string("h:mm:ss tt"), true),
14975 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
14976 _ => (fmt_expr.clone(), false),
14977 }
14978 }
14979 _ => (fmt_expr.clone(), false),
14980 };
14981 // Check if the format looks like a date format
14982 let is_date_format = is_shortcode
14983 || match &expanded_fmt {
14984 Expression::Literal(
14985 crate::expressions::Literal::String(s),
14986 ) => {
14987 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
14988 s.contains("yyyy")
14989 || s.contains("YYYY")
14990 || s.contains("MM")
14991 || s.contains("dd")
14992 || s.contains("MMMM")
14993 || s.contains("HH")
14994 || s.contains("hh")
14995 || s.contains("ss")
14996 }
14997 _ => false,
14998 };
14999 match target {
15000 DialectType::Spark | DialectType::Databricks => {
15001 let func_name = if is_date_format {
15002 "DATE_FORMAT"
15003 } else {
15004 "FORMAT_NUMBER"
15005 };
15006 Ok(Expression::Function(Box::new(Function::new(
15007 func_name.to_string(),
15008 vec![val_expr, expanded_fmt],
15009 ))))
15010 }
15011 _ => {
15012 // For TSQL and other targets, expand shortcodes but keep FORMAT
15013 if is_shortcode {
15014 Ok(Expression::Function(Box::new(Function::new(
15015 "FORMAT".to_string(),
15016 vec![val_expr, expanded_fmt],
15017 ))))
15018 } else {
15019 Ok(Expression::Function(f))
15020 }
15021 }
15022 }
15023 }
15024 // FORMAT('%s', x) from Trino/Presto -> target-specific
15025 "FORMAT"
15026 if f.args.len() >= 2
15027 && matches!(
15028 source,
15029 DialectType::Trino
15030 | DialectType::Presto
15031 | DialectType::Athena
15032 ) =>
15033 {
15034 let fmt_expr = f.args[0].clone();
15035 let value_args: Vec<Expression> = f.args[1..].to_vec();
15036 match target {
15037 // DuckDB: replace %s with {} in format string
15038 DialectType::DuckDB => {
15039 let new_fmt = match &fmt_expr {
15040 Expression::Literal(Literal::String(s)) => {
15041 Expression::Literal(Literal::String(
15042 s.replace("%s", "{}"),
15043 ))
15044 }
15045 _ => fmt_expr,
15046 };
15047 let mut args = vec![new_fmt];
15048 args.extend(value_args);
15049 Ok(Expression::Function(Box::new(Function::new(
15050 "FORMAT".to_string(),
15051 args,
15052 ))))
15053 }
15054 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
15055 DialectType::Snowflake => match &fmt_expr {
15056 Expression::Literal(Literal::String(s))
15057 if s == "%s" && value_args.len() == 1 =>
15058 {
15059 Ok(Expression::Function(Box::new(Function::new(
15060 "TO_CHAR".to_string(),
15061 value_args,
15062 ))))
15063 }
15064 _ => Ok(Expression::Function(f)),
15065 },
15066 // Default: keep FORMAT as-is
15067 _ => Ok(Expression::Function(f)),
15068 }
15069 }
15070 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
15071 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
15072 if f.args.len() == 2 =>
15073 {
15074 match target {
15075 DialectType::PostgreSQL | DialectType::Redshift => {
15076 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
15077 let arr = f.args[0].clone();
15078 let needle = f.args[1].clone();
15079 // Convert [] to ARRAY[] for PostgreSQL
15080 let pg_arr = match arr {
15081 Expression::Array(a) => Expression::ArrayFunc(
15082 Box::new(crate::expressions::ArrayConstructor {
15083 expressions: a.expressions,
15084 bracket_notation: false,
15085 use_list_keyword: false,
15086 }),
15087 ),
15088 _ => arr,
15089 };
15090 // needle = ANY(arr) using the Any quantified expression
15091 let any_expr = Expression::Any(Box::new(
15092 crate::expressions::QuantifiedExpr {
15093 this: needle.clone(),
15094 subquery: pg_arr,
15095 op: Some(crate::expressions::QuantifiedOp::Eq),
15096 },
15097 ));
15098 let coalesce = Expression::Coalesce(Box::new(
15099 crate::expressions::VarArgFunc {
15100 expressions: vec![
15101 any_expr,
15102 Expression::Boolean(
15103 crate::expressions::BooleanLiteral {
15104 value: false,
15105 },
15106 ),
15107 ],
15108 original_name: None,
15109 inferred_type: None,
15110 },
15111 ));
15112 let is_null_check = Expression::IsNull(Box::new(
15113 crate::expressions::IsNull {
15114 this: needle,
15115 not: false,
15116 postfix_form: false,
15117 },
15118 ));
15119 Ok(Expression::Case(Box::new(Case {
15120 operand: None,
15121 whens: vec![(
15122 is_null_check,
15123 Expression::Null(crate::expressions::Null),
15124 )],
15125 else_: Some(coalesce),
15126 comments: Vec::new(),
15127 inferred_type: None,
15128 })))
15129 }
15130 _ => Ok(Expression::Function(Box::new(Function::new(
15131 "ARRAY_CONTAINS".to_string(),
15132 f.args,
15133 )))),
15134 }
15135 }
15136 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
15137 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
15138 match target {
15139 DialectType::PostgreSQL | DialectType::Redshift => {
15140 // arr1 && arr2 with ARRAY[] syntax
15141 let mut args = f.args;
15142 let arr1 = args.remove(0);
15143 let arr2 = args.remove(0);
15144 let pg_arr1 = match arr1 {
15145 Expression::Array(a) => Expression::ArrayFunc(
15146 Box::new(crate::expressions::ArrayConstructor {
15147 expressions: a.expressions,
15148 bracket_notation: false,
15149 use_list_keyword: false,
15150 }),
15151 ),
15152 _ => arr1,
15153 };
15154 let pg_arr2 = match arr2 {
15155 Expression::Array(a) => Expression::ArrayFunc(
15156 Box::new(crate::expressions::ArrayConstructor {
15157 expressions: a.expressions,
15158 bracket_notation: false,
15159 use_list_keyword: false,
15160 }),
15161 ),
15162 _ => arr2,
15163 };
15164 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15165 pg_arr1, pg_arr2,
15166 ))))
15167 }
15168 DialectType::DuckDB => {
15169 // DuckDB: arr1 && arr2 (native support)
15170 let mut args = f.args;
15171 let arr1 = args.remove(0);
15172 let arr2 = args.remove(0);
15173 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
15174 arr1, arr2,
15175 ))))
15176 }
15177 _ => Ok(Expression::Function(Box::new(Function::new(
15178 "LIST_HAS_ANY".to_string(),
15179 f.args,
15180 )))),
15181 }
15182 }
15183 // APPROX_QUANTILE(x, q) -> target-specific
15184 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
15185 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15186 Function::new("APPROX_PERCENTILE".to_string(), f.args),
15187 ))),
15188 DialectType::DuckDB => Ok(Expression::Function(f)),
15189 _ => Ok(Expression::Function(f)),
15190 },
15191 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
15192 "MAKE_DATE" if f.args.len() == 3 => match target {
15193 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15194 Function::new("DATE".to_string(), f.args),
15195 ))),
15196 _ => Ok(Expression::Function(f)),
15197 },
15198 // RANGE(start, end[, step]) -> target-specific
15199 "RANGE"
15200 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
15201 {
15202 let start = f.args[0].clone();
15203 let end = f.args[1].clone();
15204 let step = f.args.get(2).cloned();
15205 match target {
15206 DialectType::Spark | DialectType::Databricks => {
15207 // RANGE(start, end) -> SEQUENCE(start, end-1)
15208 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
15209 // RANGE(start, start) -> ARRAY() (empty)
15210 // RANGE(start, end, 0) -> ARRAY() (empty)
15211 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15212
15213 // Check for constant args
15214 fn extract_i64(e: &Expression) -> Option<i64> {
15215 match e {
15216 Expression::Literal(Literal::Number(n)) => {
15217 n.parse::<i64>().ok()
15218 }
15219 Expression::Neg(u) => {
15220 if let Expression::Literal(Literal::Number(n)) =
15221 &u.this
15222 {
15223 n.parse::<i64>().ok().map(|v| -v)
15224 } else {
15225 None
15226 }
15227 }
15228 _ => None,
15229 }
15230 }
15231 let start_val = extract_i64(&start);
15232 let end_val = extract_i64(&end);
15233 let step_val = step.as_ref().and_then(|s| extract_i64(s));
15234
15235 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
15236 if step_val == Some(0) {
15237 return Ok(Expression::Function(Box::new(
15238 Function::new("ARRAY".to_string(), vec![]),
15239 )));
15240 }
15241 if let (Some(s), Some(e_val)) = (start_val, end_val) {
15242 if s == e_val {
15243 return Ok(Expression::Function(Box::new(
15244 Function::new("ARRAY".to_string(), vec![]),
15245 )));
15246 }
15247 }
15248
15249 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
15250 // All constants - compute new end = end - step (if step provided) or end - 1
15251 match step_val {
15252 Some(st) if st < 0 => {
15253 // Negative step: SEQUENCE(start, end - step, step)
15254 let new_end = e_val - st; // end - step (= end + |step|)
15255 let mut args =
15256 vec![start, Expression::number(new_end)];
15257 if let Some(s) = step {
15258 args.push(s);
15259 }
15260 Ok(Expression::Function(Box::new(
15261 Function::new("SEQUENCE".to_string(), args),
15262 )))
15263 }
15264 Some(st) => {
15265 let new_end = e_val - st;
15266 let mut args =
15267 vec![start, Expression::number(new_end)];
15268 if let Some(s) = step {
15269 args.push(s);
15270 }
15271 Ok(Expression::Function(Box::new(
15272 Function::new("SEQUENCE".to_string(), args),
15273 )))
15274 }
15275 None => {
15276 // No step: SEQUENCE(start, end - 1)
15277 let new_end = e_val - 1;
15278 Ok(Expression::Function(Box::new(
15279 Function::new(
15280 "SEQUENCE".to_string(),
15281 vec![
15282 start,
15283 Expression::number(new_end),
15284 ],
15285 ),
15286 )))
15287 }
15288 }
15289 } else {
15290 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
15291 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
15292 end.clone(),
15293 Expression::number(1),
15294 )));
15295 let cond = Expression::Lte(Box::new(BinaryOp::new(
15296 Expression::Paren(Box::new(Paren {
15297 this: end_m1.clone(),
15298 trailing_comments: Vec::new(),
15299 })),
15300 start.clone(),
15301 )));
15302 let empty = Expression::Function(Box::new(
15303 Function::new("ARRAY".to_string(), vec![]),
15304 ));
15305 let mut seq_args = vec![
15306 start,
15307 Expression::Paren(Box::new(Paren {
15308 this: end_m1,
15309 trailing_comments: Vec::new(),
15310 })),
15311 ];
15312 if let Some(s) = step {
15313 seq_args.push(s);
15314 }
15315 let seq = Expression::Function(Box::new(
15316 Function::new("SEQUENCE".to_string(), seq_args),
15317 ));
15318 Ok(Expression::IfFunc(Box::new(
15319 crate::expressions::IfFunc {
15320 condition: cond,
15321 true_value: empty,
15322 false_value: Some(seq),
15323 original_name: None,
15324 },
15325 )))
15326 }
15327 }
15328 DialectType::SQLite => {
15329 // RANGE(start, end) -> GENERATE_SERIES(start, end)
15330 // The subquery wrapping is handled at the Alias level
15331 let mut args = vec![start, end];
15332 if let Some(s) = step {
15333 args.push(s);
15334 }
15335 Ok(Expression::Function(Box::new(Function::new(
15336 "GENERATE_SERIES".to_string(),
15337 args,
15338 ))))
15339 }
15340 _ => Ok(Expression::Function(f)),
15341 }
15342 }
15343 // ARRAY_REVERSE_SORT -> target-specific
15344 // (handled above as well, but also need DuckDB self-normalization)
15345 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
15346 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15347 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15348 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15349 ))),
15350 DialectType::Spark | DialectType::Databricks => {
15351 Ok(Expression::Function(Box::new(Function::new(
15352 "MAP_FROM_ARRAYS".to_string(),
15353 f.args,
15354 ))))
15355 }
15356 _ => Ok(Expression::Function(Box::new(Function::new(
15357 "MAP".to_string(),
15358 f.args,
15359 )))),
15360 },
15361 // VARIANCE(x) -> varSamp(x) for ClickHouse
15362 "VARIANCE" if f.args.len() == 1 => match target {
15363 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15364 Function::new("varSamp".to_string(), f.args),
15365 ))),
15366 _ => Ok(Expression::Function(f)),
15367 },
15368 // STDDEV(x) -> stddevSamp(x) for ClickHouse
15369 "STDDEV" if f.args.len() == 1 => match target {
15370 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
15371 Function::new("stddevSamp".to_string(), f.args),
15372 ))),
15373 _ => Ok(Expression::Function(f)),
15374 },
15375 // ISINF(x) -> IS_INF(x) for BigQuery
15376 "ISINF" if f.args.len() == 1 => match target {
15377 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15378 Function::new("IS_INF".to_string(), f.args),
15379 ))),
15380 _ => Ok(Expression::Function(f)),
15381 },
15382 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
15383 "CONTAINS" if f.args.len() == 2 => match target {
15384 DialectType::Spark
15385 | DialectType::Databricks
15386 | DialectType::Hive => Ok(Expression::Function(Box::new(
15387 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15388 ))),
15389 _ => Ok(Expression::Function(f)),
15390 },
15391 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
15392 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
15393 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
15394 Ok(Expression::Function(Box::new(Function::new(
15395 "CONTAINS".to_string(),
15396 f.args,
15397 ))))
15398 }
15399 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15400 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15401 ))),
15402 _ => Ok(Expression::Function(f)),
15403 },
15404 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
15405 "TO_UNIXTIME" if f.args.len() == 1 => match target {
15406 DialectType::Hive
15407 | DialectType::Spark
15408 | DialectType::Databricks => Ok(Expression::Function(Box::new(
15409 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15410 ))),
15411 _ => Ok(Expression::Function(f)),
15412 },
15413 // FROM_UNIXTIME(x) -> target-specific
15414 "FROM_UNIXTIME" if f.args.len() == 1 => {
15415 match target {
15416 DialectType::Hive
15417 | DialectType::Spark
15418 | DialectType::Databricks
15419 | DialectType::Presto
15420 | DialectType::Trino => Ok(Expression::Function(f)),
15421 DialectType::DuckDB => {
15422 // DuckDB: TO_TIMESTAMP(x)
15423 let arg = f.args.into_iter().next().unwrap();
15424 Ok(Expression::Function(Box::new(Function::new(
15425 "TO_TIMESTAMP".to_string(),
15426 vec![arg],
15427 ))))
15428 }
15429 DialectType::PostgreSQL => {
15430 // PG: TO_TIMESTAMP(col)
15431 let arg = f.args.into_iter().next().unwrap();
15432 Ok(Expression::Function(Box::new(Function::new(
15433 "TO_TIMESTAMP".to_string(),
15434 vec![arg],
15435 ))))
15436 }
15437 DialectType::Redshift => {
15438 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
15439 let arg = f.args.into_iter().next().unwrap();
15440 let epoch_ts = Expression::Literal(Literal::Timestamp(
15441 "epoch".to_string(),
15442 ));
15443 let interval = Expression::Interval(Box::new(
15444 crate::expressions::Interval {
15445 this: Some(Expression::string("1 SECOND")),
15446 unit: None,
15447 },
15448 ));
15449 let mul =
15450 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
15451 let add =
15452 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
15453 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15454 this: add,
15455 trailing_comments: Vec::new(),
15456 })))
15457 }
15458 _ => Ok(Expression::Function(f)),
15459 }
15460 }
15461 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
15462 "FROM_UNIXTIME"
15463 if f.args.len() == 2
15464 && matches!(
15465 source,
15466 DialectType::Hive
15467 | DialectType::Spark
15468 | DialectType::Databricks
15469 ) =>
15470 {
15471 let mut args = f.args;
15472 let unix_ts = args.remove(0);
15473 let fmt_expr = args.remove(0);
15474 match target {
15475 DialectType::DuckDB => {
15476 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
15477 let to_ts = Expression::Function(Box::new(Function::new(
15478 "TO_TIMESTAMP".to_string(),
15479 vec![unix_ts],
15480 )));
15481 if let Expression::Literal(
15482 crate::expressions::Literal::String(s),
15483 ) = &fmt_expr
15484 {
15485 let c_fmt = Self::hive_format_to_c_format(s);
15486 Ok(Expression::Function(Box::new(Function::new(
15487 "STRFTIME".to_string(),
15488 vec![to_ts, Expression::string(&c_fmt)],
15489 ))))
15490 } else {
15491 Ok(Expression::Function(Box::new(Function::new(
15492 "STRFTIME".to_string(),
15493 vec![to_ts, fmt_expr],
15494 ))))
15495 }
15496 }
15497 DialectType::Presto
15498 | DialectType::Trino
15499 | DialectType::Athena => {
15500 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
15501 let from_unix =
15502 Expression::Function(Box::new(Function::new(
15503 "FROM_UNIXTIME".to_string(),
15504 vec![unix_ts],
15505 )));
15506 if let Expression::Literal(
15507 crate::expressions::Literal::String(s),
15508 ) = &fmt_expr
15509 {
15510 let p_fmt = Self::hive_format_to_presto_format(s);
15511 Ok(Expression::Function(Box::new(Function::new(
15512 "DATE_FORMAT".to_string(),
15513 vec![from_unix, Expression::string(&p_fmt)],
15514 ))))
15515 } else {
15516 Ok(Expression::Function(Box::new(Function::new(
15517 "DATE_FORMAT".to_string(),
15518 vec![from_unix, fmt_expr],
15519 ))))
15520 }
15521 }
15522 _ => {
15523 // Keep as FROM_UNIXTIME(x, fmt) for other targets
15524 Ok(Expression::Function(Box::new(Function::new(
15525 "FROM_UNIXTIME".to_string(),
15526 vec![unix_ts, fmt_expr],
15527 ))))
15528 }
15529 }
15530 }
15531 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
15532 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
15533 let unit_str = Self::get_unit_str_static(&f.args[0]);
15534 // Get the raw unit text preserving original case
15535 let raw_unit = match &f.args[0] {
15536 Expression::Identifier(id) => id.name.clone(),
15537 Expression::Literal(crate::expressions::Literal::String(s)) => {
15538 s.clone()
15539 }
15540 Expression::Column(col) => col.name.name.clone(),
15541 _ => unit_str.clone(),
15542 };
15543 match target {
15544 DialectType::TSQL | DialectType::Fabric => {
15545 // Preserve original case of unit for TSQL
15546 let unit_name = match unit_str.as_str() {
15547 "YY" | "YYYY" => "YEAR".to_string(),
15548 "QQ" | "Q" => "QUARTER".to_string(),
15549 "MM" | "M" => "MONTH".to_string(),
15550 "WK" | "WW" => "WEEK".to_string(),
15551 "DD" | "D" | "DY" => "DAY".to_string(),
15552 "HH" => "HOUR".to_string(),
15553 "MI" | "N" => "MINUTE".to_string(),
15554 "SS" | "S" => "SECOND".to_string(),
15555 _ => raw_unit.clone(), // preserve original case
15556 };
15557 let mut args = f.args;
15558 args[0] =
15559 Expression::Identifier(Identifier::new(&unit_name));
15560 Ok(Expression::Function(Box::new(Function::new(
15561 "DATEPART".to_string(),
15562 args,
15563 ))))
15564 }
15565 DialectType::Spark | DialectType::Databricks => {
15566 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
15567 // Preserve original case for non-abbreviation units
15568 let unit = match unit_str.as_str() {
15569 "YY" | "YYYY" => "YEAR".to_string(),
15570 "QQ" | "Q" => "QUARTER".to_string(),
15571 "MM" | "M" => "MONTH".to_string(),
15572 "WK" | "WW" => "WEEK".to_string(),
15573 "DD" | "D" | "DY" => "DAY".to_string(),
15574 "HH" => "HOUR".to_string(),
15575 "MI" | "N" => "MINUTE".to_string(),
15576 "SS" | "S" => "SECOND".to_string(),
15577 _ => raw_unit, // preserve original case
15578 };
15579 Ok(Expression::Extract(Box::new(
15580 crate::expressions::ExtractFunc {
15581 this: f.args[1].clone(),
15582 field: crate::expressions::DateTimeField::Custom(
15583 unit,
15584 ),
15585 },
15586 )))
15587 }
15588 _ => Ok(Expression::Function(Box::new(Function::new(
15589 "DATE_PART".to_string(),
15590 f.args,
15591 )))),
15592 }
15593 }
15594 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
15595 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
15596 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
15597 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
15598 "DATENAME" if f.args.len() == 2 => {
15599 let unit_str = Self::get_unit_str_static(&f.args[0]);
15600 let date_expr = f.args[1].clone();
15601 match unit_str.as_str() {
15602 "MM" | "M" | "MONTH" => match target {
15603 DialectType::TSQL => {
15604 let cast_date = Expression::Cast(Box::new(
15605 crate::expressions::Cast {
15606 this: date_expr,
15607 to: DataType::Custom {
15608 name: "DATETIME2".to_string(),
15609 },
15610 trailing_comments: Vec::new(),
15611 double_colon_syntax: false,
15612 format: None,
15613 default: None,
15614 inferred_type: None,
15615 },
15616 ));
15617 Ok(Expression::Function(Box::new(Function::new(
15618 "FORMAT".to_string(),
15619 vec![cast_date, Expression::string("MMMM")],
15620 ))))
15621 }
15622 DialectType::Spark | DialectType::Databricks => {
15623 let cast_date = Expression::Cast(Box::new(
15624 crate::expressions::Cast {
15625 this: date_expr,
15626 to: DataType::Timestamp {
15627 timezone: false,
15628 precision: None,
15629 },
15630 trailing_comments: Vec::new(),
15631 double_colon_syntax: false,
15632 format: None,
15633 default: None,
15634 inferred_type: None,
15635 },
15636 ));
15637 Ok(Expression::Function(Box::new(Function::new(
15638 "DATE_FORMAT".to_string(),
15639 vec![cast_date, Expression::string("MMMM")],
15640 ))))
15641 }
15642 _ => Ok(Expression::Function(f)),
15643 },
15644 "DW" | "WEEKDAY" => match target {
15645 DialectType::TSQL => {
15646 let cast_date = Expression::Cast(Box::new(
15647 crate::expressions::Cast {
15648 this: date_expr,
15649 to: DataType::Custom {
15650 name: "DATETIME2".to_string(),
15651 },
15652 trailing_comments: Vec::new(),
15653 double_colon_syntax: false,
15654 format: None,
15655 default: None,
15656 inferred_type: None,
15657 },
15658 ));
15659 Ok(Expression::Function(Box::new(Function::new(
15660 "FORMAT".to_string(),
15661 vec![cast_date, Expression::string("dddd")],
15662 ))))
15663 }
15664 DialectType::Spark | DialectType::Databricks => {
15665 let cast_date = Expression::Cast(Box::new(
15666 crate::expressions::Cast {
15667 this: date_expr,
15668 to: DataType::Timestamp {
15669 timezone: false,
15670 precision: None,
15671 },
15672 trailing_comments: Vec::new(),
15673 double_colon_syntax: false,
15674 format: None,
15675 default: None,
15676 inferred_type: None,
15677 },
15678 ));
15679 Ok(Expression::Function(Box::new(Function::new(
15680 "DATE_FORMAT".to_string(),
15681 vec![cast_date, Expression::string("EEEE")],
15682 ))))
15683 }
15684 _ => Ok(Expression::Function(f)),
15685 },
15686 _ => Ok(Expression::Function(f)),
15687 }
15688 }
15689 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
15690 "STRING_AGG" if f.args.len() >= 2 => {
15691 let x = f.args[0].clone();
15692 let sep = f.args[1].clone();
15693 match target {
15694 DialectType::MySQL
15695 | DialectType::SingleStore
15696 | DialectType::Doris
15697 | DialectType::StarRocks => Ok(Expression::GroupConcat(
15698 Box::new(crate::expressions::GroupConcatFunc {
15699 this: x,
15700 separator: Some(sep),
15701 order_by: None,
15702 distinct: false,
15703 filter: None,
15704 }),
15705 )),
15706 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
15707 crate::expressions::GroupConcatFunc {
15708 this: x,
15709 separator: Some(sep),
15710 order_by: None,
15711 distinct: false,
15712 filter: None,
15713 },
15714 ))),
15715 DialectType::PostgreSQL | DialectType::Redshift => {
15716 Ok(Expression::StringAgg(Box::new(
15717 crate::expressions::StringAggFunc {
15718 this: x,
15719 separator: Some(sep),
15720 order_by: None,
15721 distinct: false,
15722 filter: None,
15723 limit: None,
15724 },
15725 )))
15726 }
15727 _ => Ok(Expression::Function(f)),
15728 }
15729 }
15730 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
15731 "JSON_ARRAYAGG" => match target {
15732 DialectType::PostgreSQL => {
15733 Ok(Expression::Function(Box::new(Function {
15734 name: "JSON_AGG".to_string(),
15735 ..(*f)
15736 })))
15737 }
15738 _ => Ok(Expression::Function(f)),
15739 },
15740 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
15741 "SCHEMA_NAME" => match target {
15742 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15743 crate::expressions::CurrentSchema { this: None },
15744 ))),
15745 DialectType::SQLite => Ok(Expression::string("main")),
15746 _ => Ok(Expression::Function(f)),
15747 },
15748 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
15749 "TO_TIMESTAMP"
15750 if f.args.len() == 2
15751 && matches!(
15752 source,
15753 DialectType::Spark
15754 | DialectType::Databricks
15755 | DialectType::Hive
15756 )
15757 && matches!(target, DialectType::DuckDB) =>
15758 {
15759 let mut args = f.args;
15760 let val = args.remove(0);
15761 let fmt_expr = args.remove(0);
15762 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15763 // Convert Java/Spark format to C strptime format
15764 fn java_to_c_fmt(fmt: &str) -> String {
15765 let result = fmt
15766 .replace("yyyy", "%Y")
15767 .replace("SSSSSS", "%f")
15768 .replace("EEEE", "%W")
15769 .replace("MM", "%m")
15770 .replace("dd", "%d")
15771 .replace("HH", "%H")
15772 .replace("mm", "%M")
15773 .replace("ss", "%S")
15774 .replace("yy", "%y");
15775 let mut out = String::new();
15776 let chars: Vec<char> = result.chars().collect();
15777 let mut i = 0;
15778 while i < chars.len() {
15779 if chars[i] == '%' && i + 1 < chars.len() {
15780 out.push(chars[i]);
15781 out.push(chars[i + 1]);
15782 i += 2;
15783 } else if chars[i] == 'z' {
15784 out.push_str("%Z");
15785 i += 1;
15786 } else if chars[i] == 'Z' {
15787 out.push_str("%z");
15788 i += 1;
15789 } else {
15790 out.push(chars[i]);
15791 i += 1;
15792 }
15793 }
15794 out
15795 }
15796 let c_fmt = java_to_c_fmt(s);
15797 Ok(Expression::Function(Box::new(Function::new(
15798 "STRPTIME".to_string(),
15799 vec![val, Expression::string(&c_fmt)],
15800 ))))
15801 } else {
15802 Ok(Expression::Function(Box::new(Function::new(
15803 "STRPTIME".to_string(),
15804 vec![val, fmt_expr],
15805 ))))
15806 }
15807 }
15808 // TO_DATE(x) 1-arg from Doris: date conversion
15809 "TO_DATE"
15810 if f.args.len() == 1
15811 && matches!(
15812 source,
15813 DialectType::Doris | DialectType::StarRocks
15814 ) =>
15815 {
15816 let arg = f.args.into_iter().next().unwrap();
15817 match target {
15818 DialectType::Oracle
15819 | DialectType::DuckDB
15820 | DialectType::TSQL => {
15821 // CAST(x AS DATE)
15822 Ok(Expression::Cast(Box::new(Cast {
15823 this: arg,
15824 to: DataType::Date,
15825 double_colon_syntax: false,
15826 trailing_comments: vec![],
15827 format: None,
15828 default: None,
15829 inferred_type: None,
15830 })))
15831 }
15832 DialectType::MySQL | DialectType::SingleStore => {
15833 // DATE(x)
15834 Ok(Expression::Function(Box::new(Function::new(
15835 "DATE".to_string(),
15836 vec![arg],
15837 ))))
15838 }
15839 _ => {
15840 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
15841 Ok(Expression::Function(Box::new(Function::new(
15842 "TO_DATE".to_string(),
15843 vec![arg],
15844 ))))
15845 }
15846 }
15847 }
15848 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
15849 "TO_DATE"
15850 if f.args.len() == 1
15851 && matches!(
15852 source,
15853 DialectType::Spark
15854 | DialectType::Databricks
15855 | DialectType::Hive
15856 ) =>
15857 {
15858 let arg = f.args.into_iter().next().unwrap();
15859 match target {
15860 DialectType::DuckDB => {
15861 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
15862 Ok(Expression::TryCast(Box::new(Cast {
15863 this: arg,
15864 to: DataType::Date,
15865 double_colon_syntax: false,
15866 trailing_comments: vec![],
15867 format: None,
15868 default: None,
15869 inferred_type: None,
15870 })))
15871 }
15872 DialectType::Presto
15873 | DialectType::Trino
15874 | DialectType::Athena => {
15875 // CAST(CAST(x AS TIMESTAMP) AS DATE)
15876 Ok(Self::double_cast_timestamp_date(arg))
15877 }
15878 DialectType::Snowflake => {
15879 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
15880 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
15881 Ok(Expression::Function(Box::new(Function::new(
15882 "TRY_TO_DATE".to_string(),
15883 vec![arg, Expression::string("yyyy-mm-DD")],
15884 ))))
15885 }
15886 _ => {
15887 // Default: keep as TO_DATE(x)
15888 Ok(Expression::Function(Box::new(Function::new(
15889 "TO_DATE".to_string(),
15890 vec![arg],
15891 ))))
15892 }
15893 }
15894 }
15895 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
15896 "TO_DATE"
15897 if f.args.len() == 2
15898 && matches!(
15899 source,
15900 DialectType::Spark
15901 | DialectType::Databricks
15902 | DialectType::Hive
15903 ) =>
15904 {
15905 let mut args = f.args;
15906 let val = args.remove(0);
15907 let fmt_expr = args.remove(0);
15908 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
15909
15910 if is_default_format {
15911 // Default format: same as 1-arg form
15912 match target {
15913 DialectType::DuckDB => {
15914 Ok(Expression::TryCast(Box::new(Cast {
15915 this: val,
15916 to: DataType::Date,
15917 double_colon_syntax: false,
15918 trailing_comments: vec![],
15919 format: None,
15920 default: None,
15921 inferred_type: None,
15922 })))
15923 }
15924 DialectType::Presto
15925 | DialectType::Trino
15926 | DialectType::Athena => {
15927 Ok(Self::double_cast_timestamp_date(val))
15928 }
15929 DialectType::Snowflake => {
15930 // TRY_TO_DATE(x, format) with Snowflake format mapping
15931 let sf_fmt = "yyyy-MM-dd"
15932 .replace("yyyy", "yyyy")
15933 .replace("MM", "mm")
15934 .replace("dd", "DD");
15935 Ok(Expression::Function(Box::new(Function::new(
15936 "TRY_TO_DATE".to_string(),
15937 vec![val, Expression::string(&sf_fmt)],
15938 ))))
15939 }
15940 _ => Ok(Expression::Function(Box::new(Function::new(
15941 "TO_DATE".to_string(),
15942 vec![val],
15943 )))),
15944 }
15945 } else {
15946 // Non-default format: use format-based parsing
15947 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
15948 match target {
15949 DialectType::DuckDB => {
15950 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
15951 fn java_to_c_fmt_todate(fmt: &str) -> String {
15952 let result = fmt
15953 .replace("yyyy", "%Y")
15954 .replace("SSSSSS", "%f")
15955 .replace("EEEE", "%W")
15956 .replace("MM", "%m")
15957 .replace("dd", "%d")
15958 .replace("HH", "%H")
15959 .replace("mm", "%M")
15960 .replace("ss", "%S")
15961 .replace("yy", "%y");
15962 let mut out = String::new();
15963 let chars: Vec<char> = result.chars().collect();
15964 let mut i = 0;
15965 while i < chars.len() {
15966 if chars[i] == '%' && i + 1 < chars.len() {
15967 out.push(chars[i]);
15968 out.push(chars[i + 1]);
15969 i += 2;
15970 } else if chars[i] == 'z' {
15971 out.push_str("%Z");
15972 i += 1;
15973 } else if chars[i] == 'Z' {
15974 out.push_str("%z");
15975 i += 1;
15976 } else {
15977 out.push(chars[i]);
15978 i += 1;
15979 }
15980 }
15981 out
15982 }
15983 let c_fmt = java_to_c_fmt_todate(s);
15984 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
15985 let try_strptime =
15986 Expression::Function(Box::new(Function::new(
15987 "TRY_STRPTIME".to_string(),
15988 vec![val, Expression::string(&c_fmt)],
15989 )));
15990 let cast_ts = Expression::Cast(Box::new(Cast {
15991 this: try_strptime,
15992 to: DataType::Timestamp {
15993 precision: None,
15994 timezone: false,
15995 },
15996 double_colon_syntax: false,
15997 trailing_comments: vec![],
15998 format: None,
15999 default: None,
16000 inferred_type: None,
16001 }));
16002 Ok(Expression::Cast(Box::new(Cast {
16003 this: cast_ts,
16004 to: DataType::Date,
16005 double_colon_syntax: false,
16006 trailing_comments: vec![],
16007 format: None,
16008 default: None,
16009 inferred_type: None,
16010 })))
16011 }
16012 DialectType::Presto
16013 | DialectType::Trino
16014 | DialectType::Athena => {
16015 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
16016 let p_fmt = s
16017 .replace("yyyy", "%Y")
16018 .replace("SSSSSS", "%f")
16019 .replace("MM", "%m")
16020 .replace("dd", "%d")
16021 .replace("HH", "%H")
16022 .replace("mm", "%M")
16023 .replace("ss", "%S")
16024 .replace("yy", "%y");
16025 let date_parse =
16026 Expression::Function(Box::new(Function::new(
16027 "DATE_PARSE".to_string(),
16028 vec![val, Expression::string(&p_fmt)],
16029 )));
16030 Ok(Expression::Cast(Box::new(Cast {
16031 this: date_parse,
16032 to: DataType::Date,
16033 double_colon_syntax: false,
16034 trailing_comments: vec![],
16035 format: None,
16036 default: None,
16037 inferred_type: None,
16038 })))
16039 }
16040 DialectType::Snowflake => {
16041 // TRY_TO_DATE(x, snowflake_fmt)
16042 Ok(Expression::Function(Box::new(Function::new(
16043 "TRY_TO_DATE".to_string(),
16044 vec![val, Expression::string(s)],
16045 ))))
16046 }
16047 _ => Ok(Expression::Function(Box::new(Function::new(
16048 "TO_DATE".to_string(),
16049 vec![val, fmt_expr],
16050 )))),
16051 }
16052 } else {
16053 Ok(Expression::Function(Box::new(Function::new(
16054 "TO_DATE".to_string(),
16055 vec![val, fmt_expr],
16056 ))))
16057 }
16058 }
16059 }
16060 // TO_TIMESTAMP(x) 1-arg: epoch conversion
16061 "TO_TIMESTAMP"
16062 if f.args.len() == 1
16063 && matches!(source, DialectType::DuckDB)
16064 && matches!(
16065 target,
16066 DialectType::BigQuery
16067 | DialectType::Presto
16068 | DialectType::Trino
16069 | DialectType::Hive
16070 | DialectType::Spark
16071 | DialectType::Databricks
16072 | DialectType::Athena
16073 ) =>
16074 {
16075 let arg = f.args.into_iter().next().unwrap();
16076 let func_name = match target {
16077 DialectType::BigQuery => "TIMESTAMP_SECONDS",
16078 DialectType::Presto
16079 | DialectType::Trino
16080 | DialectType::Athena
16081 | DialectType::Hive
16082 | DialectType::Spark
16083 | DialectType::Databricks => "FROM_UNIXTIME",
16084 _ => "TO_TIMESTAMP",
16085 };
16086 Ok(Expression::Function(Box::new(Function::new(
16087 func_name.to_string(),
16088 vec![arg],
16089 ))))
16090 }
16091 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
16092 "CONCAT" if f.args.len() == 1 => {
16093 let arg = f.args.into_iter().next().unwrap();
16094 match target {
16095 DialectType::Presto
16096 | DialectType::Trino
16097 | DialectType::Athena => {
16098 // CONCAT(a) -> CAST(a AS VARCHAR)
16099 Ok(Expression::Cast(Box::new(Cast {
16100 this: arg,
16101 to: DataType::VarChar {
16102 length: None,
16103 parenthesized_length: false,
16104 },
16105 trailing_comments: vec![],
16106 double_colon_syntax: false,
16107 format: None,
16108 default: None,
16109 inferred_type: None,
16110 })))
16111 }
16112 DialectType::TSQL => {
16113 // CONCAT(a) -> a
16114 Ok(arg)
16115 }
16116 DialectType::DuckDB => {
16117 // Keep CONCAT(a) for DuckDB (native support)
16118 Ok(Expression::Function(Box::new(Function::new(
16119 "CONCAT".to_string(),
16120 vec![arg],
16121 ))))
16122 }
16123 DialectType::Spark | DialectType::Databricks => {
16124 let coalesced = Expression::Coalesce(Box::new(
16125 crate::expressions::VarArgFunc {
16126 expressions: vec![arg, Expression::string("")],
16127 original_name: None,
16128 inferred_type: None,
16129 },
16130 ));
16131 Ok(Expression::Function(Box::new(Function::new(
16132 "CONCAT".to_string(),
16133 vec![coalesced],
16134 ))))
16135 }
16136 _ => Ok(Expression::Function(Box::new(Function::new(
16137 "CONCAT".to_string(),
16138 vec![arg],
16139 )))),
16140 }
16141 }
16142 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
16143 "REGEXP_EXTRACT"
16144 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
16145 {
16146 // If group_index is 0, drop it
16147 let drop_group = match &f.args[2] {
16148 Expression::Literal(Literal::Number(n)) => n == "0",
16149 _ => false,
16150 };
16151 if drop_group {
16152 let mut args = f.args;
16153 args.truncate(2);
16154 Ok(Expression::Function(Box::new(Function::new(
16155 "REGEXP_EXTRACT".to_string(),
16156 args,
16157 ))))
16158 } else {
16159 Ok(Expression::Function(f))
16160 }
16161 }
16162 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
16163 "REGEXP_EXTRACT"
16164 if f.args.len() == 4
16165 && matches!(target, DialectType::Snowflake) =>
16166 {
16167 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
16168 let mut args = f.args;
16169 let this = args.remove(0);
16170 let pattern = args.remove(0);
16171 let group = args.remove(0);
16172 let flags = args.remove(0);
16173 Ok(Expression::Function(Box::new(Function::new(
16174 "REGEXP_SUBSTR".to_string(),
16175 vec![
16176 this,
16177 pattern,
16178 Expression::number(1),
16179 Expression::number(1),
16180 flags,
16181 group,
16182 ],
16183 ))))
16184 }
16185 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
16186 "REGEXP_SUBSTR"
16187 if f.args.len() == 3
16188 && matches!(
16189 target,
16190 DialectType::DuckDB
16191 | DialectType::Presto
16192 | DialectType::Trino
16193 | DialectType::Spark
16194 | DialectType::Databricks
16195 ) =>
16196 {
16197 let mut args = f.args;
16198 let this = args.remove(0);
16199 let pattern = args.remove(0);
16200 let position = args.remove(0);
16201 // Wrap subject in SUBSTRING(this, position) to apply the offset
16202 let substring_expr = Expression::Function(Box::new(Function::new(
16203 "SUBSTRING".to_string(),
16204 vec![this, position],
16205 )));
16206 let target_name = match target {
16207 DialectType::DuckDB => "REGEXP_EXTRACT",
16208 _ => "REGEXP_EXTRACT",
16209 };
16210 Ok(Expression::Function(Box::new(Function::new(
16211 target_name.to_string(),
16212 vec![substring_expr, pattern],
16213 ))))
16214 }
16215 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
16216 "TO_DAYS" if f.args.len() == 1 => {
16217 let x = f.args.into_iter().next().unwrap();
16218 let epoch = Expression::string("0000-01-01");
16219 // Build the final target-specific expression directly
16220 let datediff_expr = match target {
16221 DialectType::MySQL | DialectType::SingleStore => {
16222 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
16223 Expression::Function(Box::new(Function::new(
16224 "DATEDIFF".to_string(),
16225 vec![x, epoch],
16226 )))
16227 }
16228 DialectType::DuckDB => {
16229 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
16230 let cast_epoch = Expression::Cast(Box::new(Cast {
16231 this: epoch,
16232 to: DataType::Date,
16233 trailing_comments: Vec::new(),
16234 double_colon_syntax: false,
16235 format: None,
16236 default: None,
16237 inferred_type: None,
16238 }));
16239 let cast_x = Expression::Cast(Box::new(Cast {
16240 this: x,
16241 to: DataType::Date,
16242 trailing_comments: Vec::new(),
16243 double_colon_syntax: false,
16244 format: None,
16245 default: None,
16246 inferred_type: None,
16247 }));
16248 Expression::Function(Box::new(Function::new(
16249 "DATE_DIFF".to_string(),
16250 vec![Expression::string("DAY"), cast_epoch, cast_x],
16251 )))
16252 }
16253 DialectType::Presto
16254 | DialectType::Trino
16255 | DialectType::Athena => {
16256 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
16257 let cast_epoch = Self::double_cast_timestamp_date(epoch);
16258 let cast_x = Self::double_cast_timestamp_date(x);
16259 Expression::Function(Box::new(Function::new(
16260 "DATE_DIFF".to_string(),
16261 vec![Expression::string("DAY"), cast_epoch, cast_x],
16262 )))
16263 }
16264 _ => {
16265 // Default: (DATEDIFF(x, '0000-01-01') + 1)
16266 Expression::Function(Box::new(Function::new(
16267 "DATEDIFF".to_string(),
16268 vec![x, epoch],
16269 )))
16270 }
16271 };
16272 let add_one = Expression::Add(Box::new(BinaryOp::new(
16273 datediff_expr,
16274 Expression::number(1),
16275 )));
16276 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16277 this: add_one,
16278 trailing_comments: Vec::new(),
16279 })))
16280 }
16281 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
16282 "STR_TO_DATE"
16283 if f.args.len() == 2
16284 && matches!(
16285 target,
16286 DialectType::Presto | DialectType::Trino
16287 ) =>
16288 {
16289 let mut args = f.args;
16290 let x = args.remove(0);
16291 let format_expr = args.remove(0);
16292 // Check if the format contains time components
16293 let has_time =
16294 if let Expression::Literal(Literal::String(ref fmt)) =
16295 format_expr
16296 {
16297 fmt.contains("%H")
16298 || fmt.contains("%T")
16299 || fmt.contains("%M")
16300 || fmt.contains("%S")
16301 || fmt.contains("%I")
16302 || fmt.contains("%p")
16303 } else {
16304 false
16305 };
16306 let date_parse = Expression::Function(Box::new(Function::new(
16307 "DATE_PARSE".to_string(),
16308 vec![x, format_expr],
16309 )));
16310 if has_time {
16311 // Has time components: just DATE_PARSE
16312 Ok(date_parse)
16313 } else {
16314 // Date-only: CAST(DATE_PARSE(...) AS DATE)
16315 Ok(Expression::Cast(Box::new(Cast {
16316 this: date_parse,
16317 to: DataType::Date,
16318 trailing_comments: Vec::new(),
16319 double_colon_syntax: false,
16320 format: None,
16321 default: None,
16322 inferred_type: None,
16323 })))
16324 }
16325 }
16326 "STR_TO_DATE"
16327 if f.args.len() == 2
16328 && matches!(
16329 target,
16330 DialectType::PostgreSQL | DialectType::Redshift
16331 ) =>
16332 {
16333 let mut args = f.args;
16334 let x = args.remove(0);
16335 let fmt = args.remove(0);
16336 let pg_fmt = match fmt {
16337 Expression::Literal(Literal::String(s)) => Expression::string(
16338 &s.replace("%Y", "YYYY")
16339 .replace("%m", "MM")
16340 .replace("%d", "DD")
16341 .replace("%H", "HH24")
16342 .replace("%M", "MI")
16343 .replace("%S", "SS"),
16344 ),
16345 other => other,
16346 };
16347 let to_date = Expression::Function(Box::new(Function::new(
16348 "TO_DATE".to_string(),
16349 vec![x, pg_fmt],
16350 )));
16351 Ok(Expression::Cast(Box::new(Cast {
16352 this: to_date,
16353 to: DataType::Timestamp {
16354 timezone: false,
16355 precision: None,
16356 },
16357 trailing_comments: Vec::new(),
16358 double_colon_syntax: false,
16359 format: None,
16360 default: None,
16361 inferred_type: None,
16362 })))
16363 }
16364 // RANGE(start, end) -> GENERATE_SERIES for SQLite
16365 "RANGE"
16366 if (f.args.len() == 1 || f.args.len() == 2)
16367 && matches!(target, DialectType::SQLite) =>
16368 {
16369 if f.args.len() == 2 {
16370 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
16371 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
16372 let mut args = f.args;
16373 let start = args.remove(0);
16374 let end = args.remove(0);
16375 Ok(Expression::Function(Box::new(Function::new(
16376 "GENERATE_SERIES".to_string(),
16377 vec![start, end],
16378 ))))
16379 } else {
16380 Ok(Expression::Function(f))
16381 }
16382 }
16383 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
16384 // When source is Snowflake, keep as-is (args already in correct form)
16385 "UNIFORM"
16386 if matches!(target, DialectType::Snowflake)
16387 && (f.args.len() == 2 || f.args.len() == 3) =>
16388 {
16389 if matches!(source, DialectType::Snowflake) {
16390 // Snowflake -> Snowflake: keep as-is
16391 Ok(Expression::Function(f))
16392 } else {
16393 let mut args = f.args;
16394 let low = args.remove(0);
16395 let high = args.remove(0);
16396 let random = if !args.is_empty() {
16397 let seed = args.remove(0);
16398 Expression::Function(Box::new(Function::new(
16399 "RANDOM".to_string(),
16400 vec![seed],
16401 )))
16402 } else {
16403 Expression::Function(Box::new(Function::new(
16404 "RANDOM".to_string(),
16405 vec![],
16406 )))
16407 };
16408 Ok(Expression::Function(Box::new(Function::new(
16409 "UNIFORM".to_string(),
16410 vec![low, high, random],
16411 ))))
16412 }
16413 }
16414 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16415 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
16416 let mut args = f.args;
16417 let ts_arg = args.remove(0);
16418 let tz_arg = args.remove(0);
16419 // Cast string literal to TIMESTAMP for all targets
16420 let ts_cast =
16421 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16422 Expression::Cast(Box::new(Cast {
16423 this: ts_arg,
16424 to: DataType::Timestamp {
16425 timezone: false,
16426 precision: None,
16427 },
16428 trailing_comments: vec![],
16429 double_colon_syntax: false,
16430 format: None,
16431 default: None,
16432 inferred_type: None,
16433 }))
16434 } else {
16435 ts_arg
16436 };
16437 match target {
16438 DialectType::Spark | DialectType::Databricks => {
16439 Ok(Expression::Function(Box::new(Function::new(
16440 "TO_UTC_TIMESTAMP".to_string(),
16441 vec![ts_cast, tz_arg],
16442 ))))
16443 }
16444 DialectType::Snowflake => {
16445 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
16446 Ok(Expression::Function(Box::new(Function::new(
16447 "CONVERT_TIMEZONE".to_string(),
16448 vec![tz_arg, Expression::string("UTC"), ts_cast],
16449 ))))
16450 }
16451 DialectType::Presto
16452 | DialectType::Trino
16453 | DialectType::Athena => {
16454 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
16455 let wtz = Expression::Function(Box::new(Function::new(
16456 "WITH_TIMEZONE".to_string(),
16457 vec![ts_cast, tz_arg],
16458 )));
16459 Ok(Expression::AtTimeZone(Box::new(
16460 crate::expressions::AtTimeZone {
16461 this: wtz,
16462 zone: Expression::string("UTC"),
16463 },
16464 )))
16465 }
16466 DialectType::BigQuery => {
16467 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
16468 let cast_dt = Expression::Cast(Box::new(Cast {
16469 this: if let Expression::Cast(c) = ts_cast {
16470 c.this
16471 } else {
16472 ts_cast.clone()
16473 },
16474 to: DataType::Custom {
16475 name: "DATETIME".to_string(),
16476 },
16477 trailing_comments: vec![],
16478 double_colon_syntax: false,
16479 format: None,
16480 default: None,
16481 inferred_type: None,
16482 }));
16483 let ts_func =
16484 Expression::Function(Box::new(Function::new(
16485 "TIMESTAMP".to_string(),
16486 vec![cast_dt, tz_arg],
16487 )));
16488 Ok(Expression::Function(Box::new(Function::new(
16489 "DATETIME".to_string(),
16490 vec![ts_func, Expression::string("UTC")],
16491 ))))
16492 }
16493 _ => {
16494 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
16495 let atz1 = Expression::AtTimeZone(Box::new(
16496 crate::expressions::AtTimeZone {
16497 this: ts_cast,
16498 zone: tz_arg,
16499 },
16500 ));
16501 Ok(Expression::AtTimeZone(Box::new(
16502 crate::expressions::AtTimeZone {
16503 this: atz1,
16504 zone: Expression::string("UTC"),
16505 },
16506 )))
16507 }
16508 }
16509 }
16510 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
16511 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
16512 let mut args = f.args;
16513 let ts_arg = args.remove(0);
16514 let tz_arg = args.remove(0);
16515 // Cast string literal to TIMESTAMP
16516 let ts_cast =
16517 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
16518 Expression::Cast(Box::new(Cast {
16519 this: ts_arg,
16520 to: DataType::Timestamp {
16521 timezone: false,
16522 precision: None,
16523 },
16524 trailing_comments: vec![],
16525 double_colon_syntax: false,
16526 format: None,
16527 default: None,
16528 inferred_type: None,
16529 }))
16530 } else {
16531 ts_arg
16532 };
16533 match target {
16534 DialectType::Spark | DialectType::Databricks => {
16535 Ok(Expression::Function(Box::new(Function::new(
16536 "FROM_UTC_TIMESTAMP".to_string(),
16537 vec![ts_cast, tz_arg],
16538 ))))
16539 }
16540 DialectType::Presto
16541 | DialectType::Trino
16542 | DialectType::Athena => {
16543 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
16544 Ok(Expression::Function(Box::new(Function::new(
16545 "AT_TIMEZONE".to_string(),
16546 vec![ts_cast, tz_arg],
16547 ))))
16548 }
16549 DialectType::Snowflake => {
16550 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
16551 Ok(Expression::Function(Box::new(Function::new(
16552 "CONVERT_TIMEZONE".to_string(),
16553 vec![Expression::string("UTC"), tz_arg, ts_cast],
16554 ))))
16555 }
16556 _ => {
16557 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
16558 Ok(Expression::AtTimeZone(Box::new(
16559 crate::expressions::AtTimeZone {
16560 this: ts_cast,
16561 zone: tz_arg,
16562 },
16563 )))
16564 }
16565 }
16566 }
16567 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16568 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
16569 let name = match target {
16570 DialectType::Snowflake => "OBJECT_CONSTRUCT",
16571 _ => "MAP",
16572 };
16573 Ok(Expression::Function(Box::new(Function::new(
16574 name.to_string(),
16575 f.args,
16576 ))))
16577 }
16578 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
16579 "STR_TO_MAP" if f.args.len() >= 1 => match target {
16580 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16581 Ok(Expression::Function(Box::new(Function::new(
16582 "SPLIT_TO_MAP".to_string(),
16583 f.args,
16584 ))))
16585 }
16586 _ => Ok(Expression::Function(f)),
16587 },
16588 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
16589 "TIME_TO_STR" if f.args.len() == 2 => {
16590 let mut args = f.args;
16591 let this = args.remove(0);
16592 let fmt_expr = args.remove(0);
16593 let format =
16594 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16595 s
16596 } else {
16597 "%Y-%m-%d %H:%M:%S".to_string()
16598 };
16599 Ok(Expression::TimeToStr(Box::new(
16600 crate::expressions::TimeToStr {
16601 this: Box::new(this),
16602 format,
16603 culture: None,
16604 zone: None,
16605 },
16606 )))
16607 }
16608 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
16609 "STR_TO_TIME" if f.args.len() == 2 => {
16610 let mut args = f.args;
16611 let this = args.remove(0);
16612 let fmt_expr = args.remove(0);
16613 let format =
16614 if let Expression::Literal(Literal::String(s)) = fmt_expr {
16615 s
16616 } else {
16617 "%Y-%m-%d %H:%M:%S".to_string()
16618 };
16619 Ok(Expression::StrToTime(Box::new(
16620 crate::expressions::StrToTime {
16621 this: Box::new(this),
16622 format,
16623 zone: None,
16624 safe: None,
16625 target_type: None,
16626 },
16627 )))
16628 }
16629 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
16630 "STR_TO_UNIX" if f.args.len() >= 1 => {
16631 let mut args = f.args;
16632 let this = args.remove(0);
16633 let format = if !args.is_empty() {
16634 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16635 {
16636 Some(s)
16637 } else {
16638 None
16639 }
16640 } else {
16641 None
16642 };
16643 Ok(Expression::StrToUnix(Box::new(
16644 crate::expressions::StrToUnix {
16645 this: Some(Box::new(this)),
16646 format,
16647 },
16648 )))
16649 }
16650 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
16651 "TIME_TO_UNIX" if f.args.len() == 1 => {
16652 let mut args = f.args;
16653 let this = args.remove(0);
16654 Ok(Expression::TimeToUnix(Box::new(
16655 crate::expressions::UnaryFunc {
16656 this,
16657 original_name: None,
16658 inferred_type: None,
16659 },
16660 )))
16661 }
16662 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
16663 "UNIX_TO_STR" if f.args.len() >= 1 => {
16664 let mut args = f.args;
16665 let this = args.remove(0);
16666 let format = if !args.is_empty() {
16667 if let Expression::Literal(Literal::String(s)) = args.remove(0)
16668 {
16669 Some(s)
16670 } else {
16671 None
16672 }
16673 } else {
16674 None
16675 };
16676 Ok(Expression::UnixToStr(Box::new(
16677 crate::expressions::UnixToStr {
16678 this: Box::new(this),
16679 format,
16680 },
16681 )))
16682 }
16683 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
16684 "UNIX_TO_TIME" if f.args.len() == 1 => {
16685 let mut args = f.args;
16686 let this = args.remove(0);
16687 Ok(Expression::UnixToTime(Box::new(
16688 crate::expressions::UnixToTime {
16689 this: Box::new(this),
16690 scale: None,
16691 zone: None,
16692 hours: None,
16693 minutes: None,
16694 format: None,
16695 target_type: None,
16696 },
16697 )))
16698 }
16699 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
16700 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
16701 let mut args = f.args;
16702 let this = args.remove(0);
16703 Ok(Expression::TimeStrToDate(Box::new(
16704 crate::expressions::UnaryFunc {
16705 this,
16706 original_name: None,
16707 inferred_type: None,
16708 },
16709 )))
16710 }
16711 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
16712 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
16713 let mut args = f.args;
16714 let this = args.remove(0);
16715 Ok(Expression::TimeStrToTime(Box::new(
16716 crate::expressions::TimeStrToTime {
16717 this: Box::new(this),
16718 zone: None,
16719 },
16720 )))
16721 }
16722 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
16723 "MONTHS_BETWEEN" if f.args.len() == 2 => {
16724 match target {
16725 DialectType::DuckDB => {
16726 let mut args = f.args;
16727 let end_date = args.remove(0);
16728 let start_date = args.remove(0);
16729 let cast_end = Self::ensure_cast_date(end_date);
16730 let cast_start = Self::ensure_cast_date(start_date);
16731 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
16732 let dd = Expression::Function(Box::new(Function::new(
16733 "DATE_DIFF".to_string(),
16734 vec![
16735 Expression::string("MONTH"),
16736 cast_start.clone(),
16737 cast_end.clone(),
16738 ],
16739 )));
16740 let day_end =
16741 Expression::Function(Box::new(Function::new(
16742 "DAY".to_string(),
16743 vec![cast_end.clone()],
16744 )));
16745 let day_start =
16746 Expression::Function(Box::new(Function::new(
16747 "DAY".to_string(),
16748 vec![cast_start.clone()],
16749 )));
16750 let last_day_end =
16751 Expression::Function(Box::new(Function::new(
16752 "LAST_DAY".to_string(),
16753 vec![cast_end.clone()],
16754 )));
16755 let last_day_start =
16756 Expression::Function(Box::new(Function::new(
16757 "LAST_DAY".to_string(),
16758 vec![cast_start.clone()],
16759 )));
16760 let day_last_end = Expression::Function(Box::new(
16761 Function::new("DAY".to_string(), vec![last_day_end]),
16762 ));
16763 let day_last_start = Expression::Function(Box::new(
16764 Function::new("DAY".to_string(), vec![last_day_start]),
16765 ));
16766 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
16767 day_end.clone(),
16768 day_last_end,
16769 )));
16770 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
16771 day_start.clone(),
16772 day_last_start,
16773 )));
16774 let both_cond =
16775 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
16776 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
16777 day_end, day_start,
16778 )));
16779 let day_diff_paren = Expression::Paren(Box::new(
16780 crate::expressions::Paren {
16781 this: day_diff,
16782 trailing_comments: Vec::new(),
16783 },
16784 ));
16785 let frac = Expression::Div(Box::new(BinaryOp::new(
16786 day_diff_paren,
16787 Expression::Literal(Literal::Number(
16788 "31.0".to_string(),
16789 )),
16790 )));
16791 let case_expr = Expression::Case(Box::new(Case {
16792 operand: None,
16793 whens: vec![(both_cond, Expression::number(0))],
16794 else_: Some(frac),
16795 comments: Vec::new(),
16796 inferred_type: None,
16797 }));
16798 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
16799 }
16800 DialectType::Snowflake | DialectType::Redshift => {
16801 let mut args = f.args;
16802 let end_date = args.remove(0);
16803 let start_date = args.remove(0);
16804 let unit = Expression::Identifier(Identifier::new("MONTH"));
16805 Ok(Expression::Function(Box::new(Function::new(
16806 "DATEDIFF".to_string(),
16807 vec![unit, start_date, end_date],
16808 ))))
16809 }
16810 DialectType::Presto
16811 | DialectType::Trino
16812 | DialectType::Athena => {
16813 let mut args = f.args;
16814 let end_date = args.remove(0);
16815 let start_date = args.remove(0);
16816 Ok(Expression::Function(Box::new(Function::new(
16817 "DATE_DIFF".to_string(),
16818 vec![Expression::string("MONTH"), start_date, end_date],
16819 ))))
16820 }
16821 _ => Ok(Expression::Function(f)),
16822 }
16823 }
16824 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
16825 // Drop the roundOff arg for non-Spark targets, keep it for Spark
16826 "MONTHS_BETWEEN" if f.args.len() == 3 => {
16827 match target {
16828 DialectType::Spark | DialectType::Databricks => {
16829 Ok(Expression::Function(f))
16830 }
16831 _ => {
16832 // Drop the 3rd arg and delegate to the 2-arg logic
16833 let mut args = f.args;
16834 let end_date = args.remove(0);
16835 let start_date = args.remove(0);
16836 // Re-create as 2-arg and process
16837 let f2 = Function::new(
16838 "MONTHS_BETWEEN".to_string(),
16839 vec![end_date, start_date],
16840 );
16841 let e2 = Expression::Function(Box::new(f2));
16842 Self::cross_dialect_normalize(e2, source, target)
16843 }
16844 }
16845 }
16846 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
16847 "TO_TIMESTAMP"
16848 if f.args.len() == 1
16849 && matches!(
16850 source,
16851 DialectType::Spark
16852 | DialectType::Databricks
16853 | DialectType::Hive
16854 ) =>
16855 {
16856 let arg = f.args.into_iter().next().unwrap();
16857 Ok(Expression::Cast(Box::new(Cast {
16858 this: arg,
16859 to: DataType::Timestamp {
16860 timezone: false,
16861 precision: None,
16862 },
16863 trailing_comments: vec![],
16864 double_colon_syntax: false,
16865 format: None,
16866 default: None,
16867 inferred_type: None,
16868 })))
16869 }
16870 // STRING(x) -> CAST(x AS STRING) for Spark target
16871 "STRING"
16872 if f.args.len() == 1
16873 && matches!(
16874 source,
16875 DialectType::Spark | DialectType::Databricks
16876 ) =>
16877 {
16878 let arg = f.args.into_iter().next().unwrap();
16879 let dt = match target {
16880 DialectType::Spark
16881 | DialectType::Databricks
16882 | DialectType::Hive => DataType::Custom {
16883 name: "STRING".to_string(),
16884 },
16885 _ => DataType::Text,
16886 };
16887 Ok(Expression::Cast(Box::new(Cast {
16888 this: arg,
16889 to: dt,
16890 trailing_comments: vec![],
16891 double_colon_syntax: false,
16892 format: None,
16893 default: None,
16894 inferred_type: None,
16895 })))
16896 }
16897 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
16898 "LOGICAL_OR" if f.args.len() == 1 => {
16899 let name = match target {
16900 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16901 _ => "LOGICAL_OR",
16902 };
16903 Ok(Expression::Function(Box::new(Function::new(
16904 name.to_string(),
16905 f.args,
16906 ))))
16907 }
16908 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
16909 "SPLIT"
16910 if f.args.len() == 2
16911 && matches!(
16912 source,
16913 DialectType::Spark
16914 | DialectType::Databricks
16915 | DialectType::Hive
16916 ) =>
16917 {
16918 let name = match target {
16919 DialectType::DuckDB => "STR_SPLIT_REGEX",
16920 DialectType::Presto
16921 | DialectType::Trino
16922 | DialectType::Athena => "REGEXP_SPLIT",
16923 DialectType::Spark
16924 | DialectType::Databricks
16925 | DialectType::Hive => "SPLIT",
16926 _ => "SPLIT",
16927 };
16928 Ok(Expression::Function(Box::new(Function::new(
16929 name.to_string(),
16930 f.args,
16931 ))))
16932 }
16933 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
16934 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
16935 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16936 Ok(Expression::Function(Box::new(Function::new(
16937 "ELEMENT_AT".to_string(),
16938 f.args,
16939 ))))
16940 }
16941 DialectType::DuckDB => {
16942 let mut args = f.args;
16943 let arr = args.remove(0);
16944 let idx = args.remove(0);
16945 Ok(Expression::Subscript(Box::new(
16946 crate::expressions::Subscript {
16947 this: arr,
16948 index: idx,
16949 },
16950 )))
16951 }
16952 _ => Ok(Expression::Function(f)),
16953 },
16954 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
16955 "ARRAY_FILTER" if f.args.len() == 2 => {
16956 let name = match target {
16957 DialectType::DuckDB => "LIST_FILTER",
16958 DialectType::StarRocks => "ARRAY_FILTER",
16959 _ => "FILTER",
16960 };
16961 Ok(Expression::Function(Box::new(Function::new(
16962 name.to_string(),
16963 f.args,
16964 ))))
16965 }
16966 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
16967 "FILTER" if f.args.len() == 2 => {
16968 let name = match target {
16969 DialectType::DuckDB => "LIST_FILTER",
16970 DialectType::StarRocks => "ARRAY_FILTER",
16971 _ => "FILTER",
16972 };
16973 Ok(Expression::Function(Box::new(Function::new(
16974 name.to_string(),
16975 f.args,
16976 ))))
16977 }
16978 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
16979 "REDUCE" if f.args.len() >= 3 => {
16980 let name = match target {
16981 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
16982 _ => "REDUCE",
16983 };
16984 Ok(Expression::Function(Box::new(Function::new(
16985 name.to_string(),
16986 f.args,
16987 ))))
16988 }
16989 // CURRENT_SCHEMA() -> dialect-specific
16990 "CURRENT_SCHEMA" => {
16991 match target {
16992 DialectType::PostgreSQL => {
16993 // PostgreSQL: CURRENT_SCHEMA (no parens)
16994 Ok(Expression::Function(Box::new(Function {
16995 name: "CURRENT_SCHEMA".to_string(),
16996 args: vec![],
16997 distinct: false,
16998 trailing_comments: vec![],
16999 use_bracket_syntax: false,
17000 no_parens: true,
17001 quoted: false,
17002 span: None,
17003 inferred_type: None,
17004 })))
17005 }
17006 DialectType::MySQL
17007 | DialectType::Doris
17008 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
17009 Function::new("SCHEMA".to_string(), vec![]),
17010 ))),
17011 DialectType::TSQL => Ok(Expression::Function(Box::new(
17012 Function::new("SCHEMA_NAME".to_string(), vec![]),
17013 ))),
17014 DialectType::SQLite => {
17015 Ok(Expression::Literal(Literal::String("main".to_string())))
17016 }
17017 _ => Ok(Expression::Function(f)),
17018 }
17019 }
17020 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
17021 "LTRIM" if f.args.len() == 2 => match target {
17022 DialectType::Spark
17023 | DialectType::Hive
17024 | DialectType::Databricks
17025 | DialectType::ClickHouse => {
17026 let mut args = f.args;
17027 let str_expr = args.remove(0);
17028 let chars = args.remove(0);
17029 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
17030 this: str_expr,
17031 characters: Some(chars),
17032 position: crate::expressions::TrimPosition::Leading,
17033 sql_standard_syntax: true,
17034 position_explicit: true,
17035 })))
17036 }
17037 _ => Ok(Expression::Function(f)),
17038 },
17039 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
17040 "RTRIM" if f.args.len() == 2 => match target {
17041 DialectType::Spark
17042 | DialectType::Hive
17043 | DialectType::Databricks
17044 | DialectType::ClickHouse => {
17045 let mut args = f.args;
17046 let str_expr = args.remove(0);
17047 let chars = args.remove(0);
17048 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
17049 this: str_expr,
17050 characters: Some(chars),
17051 position: crate::expressions::TrimPosition::Trailing,
17052 sql_standard_syntax: true,
17053 position_explicit: true,
17054 })))
17055 }
17056 _ => Ok(Expression::Function(f)),
17057 },
17058 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17059 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
17060 DialectType::ClickHouse => {
17061 let mut new_f = *f;
17062 new_f.name = "arrayReverse".to_string();
17063 Ok(Expression::Function(Box::new(new_f)))
17064 }
17065 _ => Ok(Expression::Function(f)),
17066 },
17067 // UUID() -> NEWID() for TSQL
17068 "UUID" if f.args.is_empty() => match target {
17069 DialectType::TSQL | DialectType::Fabric => {
17070 Ok(Expression::Function(Box::new(Function::new(
17071 "NEWID".to_string(),
17072 vec![],
17073 ))))
17074 }
17075 _ => Ok(Expression::Function(f)),
17076 },
17077 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
17078 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
17079 DialectType::ClickHouse => {
17080 let mut new_f = *f;
17081 new_f.name = "farmFingerprint64".to_string();
17082 Ok(Expression::Function(Box::new(new_f)))
17083 }
17084 DialectType::Redshift => {
17085 let mut new_f = *f;
17086 new_f.name = "FARMFINGERPRINT64".to_string();
17087 Ok(Expression::Function(Box::new(new_f)))
17088 }
17089 _ => Ok(Expression::Function(f)),
17090 },
17091 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
17092 "JSON_KEYS" => match target {
17093 DialectType::Databricks | DialectType::Spark => {
17094 let mut new_f = *f;
17095 new_f.name = "JSON_OBJECT_KEYS".to_string();
17096 Ok(Expression::Function(Box::new(new_f)))
17097 }
17098 DialectType::Snowflake => {
17099 let mut new_f = *f;
17100 new_f.name = "OBJECT_KEYS".to_string();
17101 Ok(Expression::Function(Box::new(new_f)))
17102 }
17103 _ => Ok(Expression::Function(f)),
17104 },
17105 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
17106 "WEEKOFYEAR" => match target {
17107 DialectType::Snowflake => {
17108 let mut new_f = *f;
17109 new_f.name = "WEEKISO".to_string();
17110 Ok(Expression::Function(Box::new(new_f)))
17111 }
17112 _ => Ok(Expression::Function(f)),
17113 },
17114 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
17115 "FORMAT"
17116 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
17117 {
17118 match target {
17119 DialectType::Databricks | DialectType::Spark => {
17120 let mut new_f = *f;
17121 new_f.name = "FORMAT_STRING".to_string();
17122 Ok(Expression::Function(Box::new(new_f)))
17123 }
17124 _ => Ok(Expression::Function(f)),
17125 }
17126 }
17127 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
17128 "CONCAT_WS" if f.args.len() >= 2 => match target {
17129 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17130 let mut args = f.args;
17131 let sep = args.remove(0);
17132 let cast_args: Vec<Expression> = args
17133 .into_iter()
17134 .map(|a| {
17135 Expression::Cast(Box::new(Cast {
17136 this: a,
17137 to: DataType::VarChar {
17138 length: None,
17139 parenthesized_length: false,
17140 },
17141 double_colon_syntax: false,
17142 trailing_comments: Vec::new(),
17143 format: None,
17144 default: None,
17145 inferred_type: None,
17146 }))
17147 })
17148 .collect();
17149 let mut new_args = vec![sep];
17150 new_args.extend(cast_args);
17151 Ok(Expression::Function(Box::new(Function::new(
17152 "CONCAT_WS".to_string(),
17153 new_args,
17154 ))))
17155 }
17156 _ => Ok(Expression::Function(f)),
17157 },
17158 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
17159 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
17160 DialectType::Presto
17161 | DialectType::Trino
17162 | DialectType::Athena
17163 | DialectType::Databricks
17164 | DialectType::Spark => {
17165 let mut new_f = *f;
17166 new_f.name = "SLICE".to_string();
17167 Ok(Expression::Function(Box::new(new_f)))
17168 }
17169 DialectType::ClickHouse => {
17170 let mut new_f = *f;
17171 new_f.name = "arraySlice".to_string();
17172 Ok(Expression::Function(Box::new(new_f)))
17173 }
17174 _ => Ok(Expression::Function(f)),
17175 },
17176 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
17177 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
17178 DialectType::DuckDB => {
17179 let mut args = f.args;
17180 let arr = args.remove(0);
17181 let val = args.remove(0);
17182 Ok(Expression::Function(Box::new(Function::new(
17183 "LIST_PREPEND".to_string(),
17184 vec![val, arr],
17185 ))))
17186 }
17187 _ => Ok(Expression::Function(f)),
17188 },
17189 // ARRAY_REMOVE(arr, target) -> dialect-specific
17190 "ARRAY_REMOVE" if f.args.len() == 2 => {
17191 match target {
17192 DialectType::DuckDB => {
17193 let mut args = f.args;
17194 let arr = args.remove(0);
17195 let target_val = args.remove(0);
17196 let u_id = crate::expressions::Identifier::new("_u");
17197 // LIST_FILTER(arr, _u -> _u <> target)
17198 let lambda = Expression::Lambda(Box::new(
17199 crate::expressions::LambdaExpr {
17200 parameters: vec![u_id.clone()],
17201 body: Expression::Neq(Box::new(BinaryOp {
17202 left: Expression::Identifier(u_id),
17203 right: target_val,
17204 left_comments: Vec::new(),
17205 operator_comments: Vec::new(),
17206 trailing_comments: Vec::new(),
17207 inferred_type: None,
17208 })),
17209 colon: false,
17210 parameter_types: Vec::new(),
17211 },
17212 ));
17213 Ok(Expression::Function(Box::new(Function::new(
17214 "LIST_FILTER".to_string(),
17215 vec![arr, lambda],
17216 ))))
17217 }
17218 DialectType::ClickHouse => {
17219 let mut args = f.args;
17220 let arr = args.remove(0);
17221 let target_val = args.remove(0);
17222 let u_id = crate::expressions::Identifier::new("_u");
17223 // arrayFilter(_u -> _u <> target, arr)
17224 let lambda = Expression::Lambda(Box::new(
17225 crate::expressions::LambdaExpr {
17226 parameters: vec![u_id.clone()],
17227 body: Expression::Neq(Box::new(BinaryOp {
17228 left: Expression::Identifier(u_id),
17229 right: target_val,
17230 left_comments: Vec::new(),
17231 operator_comments: Vec::new(),
17232 trailing_comments: Vec::new(),
17233 inferred_type: None,
17234 })),
17235 colon: false,
17236 parameter_types: Vec::new(),
17237 },
17238 ));
17239 Ok(Expression::Function(Box::new(Function::new(
17240 "arrayFilter".to_string(),
17241 vec![lambda, arr],
17242 ))))
17243 }
17244 DialectType::BigQuery => {
17245 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
17246 let mut args = f.args;
17247 let arr = args.remove(0);
17248 let target_val = args.remove(0);
17249 let u_id = crate::expressions::Identifier::new("_u");
17250 let u_col =
17251 Expression::Column(crate::expressions::Column {
17252 name: u_id.clone(),
17253 table: None,
17254 join_mark: false,
17255 trailing_comments: Vec::new(),
17256 span: None,
17257 inferred_type: None,
17258 });
17259 // UNNEST(the_array) AS _u
17260 let unnest_expr = Expression::Unnest(Box::new(
17261 crate::expressions::UnnestFunc {
17262 this: arr,
17263 expressions: Vec::new(),
17264 with_ordinality: false,
17265 alias: None,
17266 offset_alias: None,
17267 },
17268 ));
17269 let aliased_unnest = Expression::Alias(Box::new(
17270 crate::expressions::Alias {
17271 this: unnest_expr,
17272 alias: u_id.clone(),
17273 column_aliases: Vec::new(),
17274 pre_alias_comments: Vec::new(),
17275 trailing_comments: Vec::new(),
17276 inferred_type: None,
17277 },
17278 ));
17279 // _u <> target
17280 let where_cond = Expression::Neq(Box::new(BinaryOp {
17281 left: u_col.clone(),
17282 right: target_val,
17283 left_comments: Vec::new(),
17284 operator_comments: Vec::new(),
17285 trailing_comments: Vec::new(),
17286 inferred_type: None,
17287 }));
17288 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
17289 let subquery = Expression::Select(Box::new(
17290 crate::expressions::Select::new()
17291 .column(u_col)
17292 .from(aliased_unnest)
17293 .where_(where_cond),
17294 ));
17295 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
17296 Ok(Expression::ArrayFunc(Box::new(
17297 crate::expressions::ArrayConstructor {
17298 expressions: vec![subquery],
17299 bracket_notation: false,
17300 use_list_keyword: false,
17301 },
17302 )))
17303 }
17304 _ => Ok(Expression::Function(f)),
17305 }
17306 }
17307 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
17308 "PARSE_JSON" if f.args.len() == 1 => {
17309 match target {
17310 DialectType::SQLite
17311 | DialectType::Doris
17312 | DialectType::MySQL
17313 | DialectType::StarRocks => {
17314 // Strip PARSE_JSON, return the inner argument
17315 Ok(f.args.into_iter().next().unwrap())
17316 }
17317 _ => Ok(Expression::Function(f)),
17318 }
17319 }
17320 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
17321 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
17322 "JSON_REMOVE" => Ok(Expression::Function(f)),
17323 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
17324 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
17325 "JSON_SET" => Ok(Expression::Function(f)),
17326 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
17327 // Behavior per search value type:
17328 // NULL literal -> CASE WHEN x IS NULL THEN result
17329 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
17330 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
17331 "DECODE" if f.args.len() >= 3 => {
17332 // Keep as DECODE for targets that support it natively
17333 let keep_as_decode = matches!(
17334 target,
17335 DialectType::Oracle
17336 | DialectType::Snowflake
17337 | DialectType::Redshift
17338 | DialectType::Teradata
17339 | DialectType::Spark
17340 | DialectType::Databricks
17341 );
17342 if keep_as_decode {
17343 return Ok(Expression::Function(f));
17344 }
17345
17346 let mut args = f.args;
17347 let this_expr = args.remove(0);
17348 let mut pairs = Vec::new();
17349 let mut default = None;
17350 let mut i = 0;
17351 while i + 1 < args.len() {
17352 pairs.push((args[i].clone(), args[i + 1].clone()));
17353 i += 2;
17354 }
17355 if i < args.len() {
17356 default = Some(args[i].clone());
17357 }
17358 // Helper: check if expression is a literal value
17359 fn is_literal(e: &Expression) -> bool {
17360 matches!(
17361 e,
17362 Expression::Literal(_)
17363 | Expression::Boolean(_)
17364 | Expression::Neg(_)
17365 )
17366 }
17367 let whens: Vec<(Expression, Expression)> = pairs
17368 .into_iter()
17369 .map(|(search, result)| {
17370 if matches!(&search, Expression::Null(_)) {
17371 // NULL search -> IS NULL
17372 let condition = Expression::Is(Box::new(BinaryOp {
17373 left: this_expr.clone(),
17374 right: Expression::Null(crate::expressions::Null),
17375 left_comments: Vec::new(),
17376 operator_comments: Vec::new(),
17377 trailing_comments: Vec::new(),
17378 inferred_type: None,
17379 }));
17380 (condition, result)
17381 } else if is_literal(&search) {
17382 // Literal search -> simple equality
17383 let eq = Expression::Eq(Box::new(BinaryOp {
17384 left: this_expr.clone(),
17385 right: search,
17386 left_comments: Vec::new(),
17387 operator_comments: Vec::new(),
17388 trailing_comments: Vec::new(),
17389 inferred_type: None,
17390 }));
17391 (eq, result)
17392 } else {
17393 // Non-literal (column ref, expression) -> null-safe comparison
17394 let needs_paren = matches!(
17395 &search,
17396 Expression::Eq(_)
17397 | Expression::Neq(_)
17398 | Expression::Gt(_)
17399 | Expression::Gte(_)
17400 | Expression::Lt(_)
17401 | Expression::Lte(_)
17402 );
17403 let search_for_eq = if needs_paren {
17404 Expression::Paren(Box::new(
17405 crate::expressions::Paren {
17406 this: search.clone(),
17407 trailing_comments: Vec::new(),
17408 },
17409 ))
17410 } else {
17411 search.clone()
17412 };
17413 let eq = Expression::Eq(Box::new(BinaryOp {
17414 left: this_expr.clone(),
17415 right: search_for_eq,
17416 left_comments: Vec::new(),
17417 operator_comments: Vec::new(),
17418 trailing_comments: Vec::new(),
17419 inferred_type: None,
17420 }));
17421 let search_for_null = if needs_paren {
17422 Expression::Paren(Box::new(
17423 crate::expressions::Paren {
17424 this: search.clone(),
17425 trailing_comments: Vec::new(),
17426 },
17427 ))
17428 } else {
17429 search.clone()
17430 };
17431 let x_is_null = Expression::Is(Box::new(BinaryOp {
17432 left: this_expr.clone(),
17433 right: Expression::Null(crate::expressions::Null),
17434 left_comments: Vec::new(),
17435 operator_comments: Vec::new(),
17436 trailing_comments: Vec::new(),
17437 inferred_type: None,
17438 }));
17439 let s_is_null = Expression::Is(Box::new(BinaryOp {
17440 left: search_for_null,
17441 right: Expression::Null(crate::expressions::Null),
17442 left_comments: Vec::new(),
17443 operator_comments: Vec::new(),
17444 trailing_comments: Vec::new(),
17445 inferred_type: None,
17446 }));
17447 let both_null = Expression::And(Box::new(BinaryOp {
17448 left: x_is_null,
17449 right: s_is_null,
17450 left_comments: Vec::new(),
17451 operator_comments: Vec::new(),
17452 trailing_comments: Vec::new(),
17453 inferred_type: None,
17454 }));
17455 let condition = Expression::Or(Box::new(BinaryOp {
17456 left: eq,
17457 right: Expression::Paren(Box::new(
17458 crate::expressions::Paren {
17459 this: both_null,
17460 trailing_comments: Vec::new(),
17461 },
17462 )),
17463 left_comments: Vec::new(),
17464 operator_comments: Vec::new(),
17465 trailing_comments: Vec::new(),
17466 inferred_type: None,
17467 }));
17468 (condition, result)
17469 }
17470 })
17471 .collect();
17472 Ok(Expression::Case(Box::new(Case {
17473 operand: None,
17474 whens,
17475 else_: default,
17476 comments: Vec::new(),
17477 inferred_type: None,
17478 })))
17479 }
17480 // LEVENSHTEIN(a, b, ...) -> dialect-specific
17481 "LEVENSHTEIN" => {
17482 match target {
17483 DialectType::BigQuery => {
17484 let mut new_f = *f;
17485 new_f.name = "EDIT_DISTANCE".to_string();
17486 Ok(Expression::Function(Box::new(new_f)))
17487 }
17488 DialectType::Drill => {
17489 let mut new_f = *f;
17490 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
17491 Ok(Expression::Function(Box::new(new_f)))
17492 }
17493 DialectType::PostgreSQL if f.args.len() == 6 => {
17494 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
17495 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
17496 let mut new_f = *f;
17497 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
17498 Ok(Expression::Function(Box::new(new_f)))
17499 }
17500 _ => Ok(Expression::Function(f)),
17501 }
17502 }
17503 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
17504 "ARRAY_REVERSE" => match target {
17505 DialectType::ClickHouse => {
17506 let mut new_f = *f;
17507 new_f.name = "arrayReverse".to_string();
17508 Ok(Expression::Function(Box::new(new_f)))
17509 }
17510 _ => Ok(Expression::Function(f)),
17511 },
17512 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
17513 "GENERATE_DATE_ARRAY" => {
17514 let mut args = f.args;
17515 if matches!(target, DialectType::BigQuery) {
17516 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
17517 if args.len() == 2 {
17518 let default_interval = Expression::Interval(Box::new(
17519 crate::expressions::Interval {
17520 this: Some(Expression::Literal(Literal::String(
17521 "1".to_string(),
17522 ))),
17523 unit: Some(
17524 crate::expressions::IntervalUnitSpec::Simple {
17525 unit: crate::expressions::IntervalUnit::Day,
17526 use_plural: false,
17527 },
17528 ),
17529 },
17530 ));
17531 args.push(default_interval);
17532 }
17533 Ok(Expression::Function(Box::new(Function::new(
17534 "GENERATE_DATE_ARRAY".to_string(),
17535 args,
17536 ))))
17537 } else if matches!(target, DialectType::DuckDB) {
17538 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
17539 let start = args.get(0).cloned();
17540 let end = args.get(1).cloned();
17541 let step = args.get(2).cloned().or_else(|| {
17542 Some(Expression::Interval(Box::new(
17543 crate::expressions::Interval {
17544 this: Some(Expression::Literal(Literal::String(
17545 "1".to_string(),
17546 ))),
17547 unit: Some(
17548 crate::expressions::IntervalUnitSpec::Simple {
17549 unit: crate::expressions::IntervalUnit::Day,
17550 use_plural: false,
17551 },
17552 ),
17553 },
17554 )))
17555 });
17556 let gen_series = Expression::GenerateSeries(Box::new(
17557 crate::expressions::GenerateSeries {
17558 start: start.map(Box::new),
17559 end: end.map(Box::new),
17560 step: step.map(Box::new),
17561 is_end_exclusive: None,
17562 },
17563 ));
17564 Ok(Expression::Cast(Box::new(Cast {
17565 this: gen_series,
17566 to: DataType::Array {
17567 element_type: Box::new(DataType::Date),
17568 dimension: None,
17569 },
17570 trailing_comments: vec![],
17571 double_colon_syntax: false,
17572 format: None,
17573 default: None,
17574 inferred_type: None,
17575 })))
17576 } else if matches!(
17577 target,
17578 DialectType::Presto | DialectType::Trino | DialectType::Athena
17579 ) {
17580 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
17581 let start = args.get(0).cloned();
17582 let end = args.get(1).cloned();
17583 let step = args.get(2).cloned().or_else(|| {
17584 Some(Expression::Interval(Box::new(
17585 crate::expressions::Interval {
17586 this: Some(Expression::Literal(Literal::String(
17587 "1".to_string(),
17588 ))),
17589 unit: Some(
17590 crate::expressions::IntervalUnitSpec::Simple {
17591 unit: crate::expressions::IntervalUnit::Day,
17592 use_plural: false,
17593 },
17594 ),
17595 },
17596 )))
17597 });
17598 let gen_series = Expression::GenerateSeries(Box::new(
17599 crate::expressions::GenerateSeries {
17600 start: start.map(Box::new),
17601 end: end.map(Box::new),
17602 step: step.map(Box::new),
17603 is_end_exclusive: None,
17604 },
17605 ));
17606 Ok(gen_series)
17607 } else if matches!(
17608 target,
17609 DialectType::Spark | DialectType::Databricks
17610 ) {
17611 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
17612 let start = args.get(0).cloned();
17613 let end = args.get(1).cloned();
17614 let step = args.get(2).cloned().or_else(|| {
17615 Some(Expression::Interval(Box::new(
17616 crate::expressions::Interval {
17617 this: Some(Expression::Literal(Literal::String(
17618 "1".to_string(),
17619 ))),
17620 unit: Some(
17621 crate::expressions::IntervalUnitSpec::Simple {
17622 unit: crate::expressions::IntervalUnit::Day,
17623 use_plural: false,
17624 },
17625 ),
17626 },
17627 )))
17628 });
17629 let gen_series = Expression::GenerateSeries(Box::new(
17630 crate::expressions::GenerateSeries {
17631 start: start.map(Box::new),
17632 end: end.map(Box::new),
17633 step: step.map(Box::new),
17634 is_end_exclusive: None,
17635 },
17636 ));
17637 Ok(gen_series)
17638 } else if matches!(target, DialectType::Snowflake) {
17639 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
17640 if args.len() == 2 {
17641 let default_interval = Expression::Interval(Box::new(
17642 crate::expressions::Interval {
17643 this: Some(Expression::Literal(Literal::String(
17644 "1".to_string(),
17645 ))),
17646 unit: Some(
17647 crate::expressions::IntervalUnitSpec::Simple {
17648 unit: crate::expressions::IntervalUnit::Day,
17649 use_plural: false,
17650 },
17651 ),
17652 },
17653 ));
17654 args.push(default_interval);
17655 }
17656 Ok(Expression::Function(Box::new(Function::new(
17657 "GENERATE_DATE_ARRAY".to_string(),
17658 args,
17659 ))))
17660 } else if matches!(
17661 target,
17662 DialectType::MySQL
17663 | DialectType::TSQL
17664 | DialectType::Fabric
17665 | DialectType::Redshift
17666 ) {
17667 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
17668 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
17669 Ok(Expression::Function(Box::new(Function::new(
17670 "GENERATE_DATE_ARRAY".to_string(),
17671 args,
17672 ))))
17673 } else {
17674 // PostgreSQL/others: convert to GenerateSeries
17675 let start = args.get(0).cloned();
17676 let end = args.get(1).cloned();
17677 let step = args.get(2).cloned().or_else(|| {
17678 Some(Expression::Interval(Box::new(
17679 crate::expressions::Interval {
17680 this: Some(Expression::Literal(Literal::String(
17681 "1".to_string(),
17682 ))),
17683 unit: Some(
17684 crate::expressions::IntervalUnitSpec::Simple {
17685 unit: crate::expressions::IntervalUnit::Day,
17686 use_plural: false,
17687 },
17688 ),
17689 },
17690 )))
17691 });
17692 Ok(Expression::GenerateSeries(Box::new(
17693 crate::expressions::GenerateSeries {
17694 start: start.map(Box::new),
17695 end: end.map(Box::new),
17696 step: step.map(Box::new),
17697 is_end_exclusive: None,
17698 },
17699 )))
17700 }
17701 }
17702 _ => Ok(Expression::Function(f)),
17703 }
17704 } else if let Expression::AggregateFunction(mut af) = e {
17705 let name = af.name.to_uppercase();
17706 match name.as_str() {
17707 "ARBITRARY" if af.args.len() == 1 => {
17708 let arg = af.args.into_iter().next().unwrap();
17709 Ok(convert_arbitrary(arg, target))
17710 }
17711 "JSON_ARRAYAGG" => {
17712 match target {
17713 DialectType::PostgreSQL => {
17714 af.name = "JSON_AGG".to_string();
17715 // Add NULLS FIRST to ORDER BY items for PostgreSQL
17716 for ordered in af.order_by.iter_mut() {
17717 if ordered.nulls_first.is_none() {
17718 ordered.nulls_first = Some(true);
17719 }
17720 }
17721 Ok(Expression::AggregateFunction(af))
17722 }
17723 _ => Ok(Expression::AggregateFunction(af)),
17724 }
17725 }
17726 _ => Ok(Expression::AggregateFunction(af)),
17727 }
17728 } else if let Expression::JSONArrayAgg(ja) = e {
17729 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
17730 match target {
17731 DialectType::PostgreSQL => {
17732 let mut order_by = Vec::new();
17733 if let Some(order_expr) = ja.order {
17734 if let Expression::OrderBy(ob) = *order_expr {
17735 for mut ordered in ob.expressions {
17736 if ordered.nulls_first.is_none() {
17737 ordered.nulls_first = Some(true);
17738 }
17739 order_by.push(ordered);
17740 }
17741 }
17742 }
17743 Ok(Expression::AggregateFunction(Box::new(
17744 crate::expressions::AggregateFunction {
17745 name: "JSON_AGG".to_string(),
17746 args: vec![*ja.this],
17747 distinct: false,
17748 filter: None,
17749 order_by,
17750 limit: None,
17751 ignore_nulls: None,
17752 inferred_type: None,
17753 },
17754 )))
17755 }
17756 _ => Ok(Expression::JSONArrayAgg(ja)),
17757 }
17758 } else if let Expression::ToNumber(tn) = e {
17759 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
17760 let arg = *tn.this;
17761 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17762 this: arg,
17763 to: crate::expressions::DataType::Double {
17764 precision: None,
17765 scale: None,
17766 },
17767 double_colon_syntax: false,
17768 trailing_comments: Vec::new(),
17769 format: None,
17770 default: None,
17771 inferred_type: None,
17772 })))
17773 } else {
17774 Ok(e)
17775 }
17776 }
17777
17778 Action::RegexpLikeToDuckDB => {
17779 if let Expression::RegexpLike(f) = e {
17780 let mut args = vec![f.this, f.pattern];
17781 if let Some(flags) = f.flags {
17782 args.push(flags);
17783 }
17784 Ok(Expression::Function(Box::new(Function::new(
17785 "REGEXP_MATCHES".to_string(),
17786 args,
17787 ))))
17788 } else {
17789 Ok(e)
17790 }
17791 }
17792 Action::EpochConvert => {
17793 if let Expression::Epoch(f) = e {
17794 let arg = f.this;
17795 let name = match target {
17796 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
17797 "UNIX_TIMESTAMP"
17798 }
17799 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
17800 DialectType::BigQuery => "TIME_TO_UNIX",
17801 _ => "EPOCH",
17802 };
17803 Ok(Expression::Function(Box::new(Function::new(
17804 name.to_string(),
17805 vec![arg],
17806 ))))
17807 } else {
17808 Ok(e)
17809 }
17810 }
17811 Action::EpochMsConvert => {
17812 use crate::expressions::{BinaryOp, Cast};
17813 if let Expression::EpochMs(f) = e {
17814 let arg = f.this;
17815 match target {
17816 DialectType::Spark | DialectType::Databricks => {
17817 Ok(Expression::Function(Box::new(Function::new(
17818 "TIMESTAMP_MILLIS".to_string(),
17819 vec![arg],
17820 ))))
17821 }
17822 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17823 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
17824 ))),
17825 DialectType::Presto | DialectType::Trino => {
17826 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
17827 let cast_arg = Expression::Cast(Box::new(Cast {
17828 this: arg,
17829 to: DataType::Double {
17830 precision: None,
17831 scale: None,
17832 },
17833 trailing_comments: Vec::new(),
17834 double_colon_syntax: false,
17835 format: None,
17836 default: None,
17837 inferred_type: None,
17838 }));
17839 let div = Expression::Div(Box::new(BinaryOp::new(
17840 cast_arg,
17841 Expression::Function(Box::new(Function::new(
17842 "POW".to_string(),
17843 vec![Expression::number(10), Expression::number(3)],
17844 ))),
17845 )));
17846 Ok(Expression::Function(Box::new(Function::new(
17847 "FROM_UNIXTIME".to_string(),
17848 vec![div],
17849 ))))
17850 }
17851 DialectType::MySQL => {
17852 // FROM_UNIXTIME(x / POWER(10, 3))
17853 let div = Expression::Div(Box::new(BinaryOp::new(
17854 arg,
17855 Expression::Function(Box::new(Function::new(
17856 "POWER".to_string(),
17857 vec![Expression::number(10), Expression::number(3)],
17858 ))),
17859 )));
17860 Ok(Expression::Function(Box::new(Function::new(
17861 "FROM_UNIXTIME".to_string(),
17862 vec![div],
17863 ))))
17864 }
17865 DialectType::PostgreSQL | DialectType::Redshift => {
17866 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
17867 let cast_arg = Expression::Cast(Box::new(Cast {
17868 this: arg,
17869 to: DataType::Custom {
17870 name: "DOUBLE PRECISION".to_string(),
17871 },
17872 trailing_comments: Vec::new(),
17873 double_colon_syntax: false,
17874 format: None,
17875 default: None,
17876 inferred_type: None,
17877 }));
17878 let div = Expression::Div(Box::new(BinaryOp::new(
17879 cast_arg,
17880 Expression::Function(Box::new(Function::new(
17881 "POWER".to_string(),
17882 vec![Expression::number(10), Expression::number(3)],
17883 ))),
17884 )));
17885 Ok(Expression::Function(Box::new(Function::new(
17886 "TO_TIMESTAMP".to_string(),
17887 vec![div],
17888 ))))
17889 }
17890 DialectType::ClickHouse => {
17891 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
17892 let cast_arg = Expression::Cast(Box::new(Cast {
17893 this: arg,
17894 to: DataType::Nullable {
17895 inner: Box::new(DataType::BigInt { length: None }),
17896 },
17897 trailing_comments: Vec::new(),
17898 double_colon_syntax: false,
17899 format: None,
17900 default: None,
17901 inferred_type: None,
17902 }));
17903 Ok(Expression::Function(Box::new(Function::new(
17904 "fromUnixTimestamp64Milli".to_string(),
17905 vec![cast_arg],
17906 ))))
17907 }
17908 _ => Ok(Expression::Function(Box::new(Function::new(
17909 "EPOCH_MS".to_string(),
17910 vec![arg],
17911 )))),
17912 }
17913 } else {
17914 Ok(e)
17915 }
17916 }
17917 Action::TSQLTypeNormalize => {
17918 if let Expression::DataType(dt) = e {
17919 let new_dt = match &dt {
17920 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
17921 DataType::Decimal {
17922 precision: Some(15),
17923 scale: Some(4),
17924 }
17925 }
17926 DataType::Custom { name }
17927 if name.eq_ignore_ascii_case("SMALLMONEY") =>
17928 {
17929 DataType::Decimal {
17930 precision: Some(6),
17931 scale: Some(4),
17932 }
17933 }
17934 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
17935 DataType::Timestamp {
17936 timezone: false,
17937 precision: None,
17938 }
17939 }
17940 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
17941 DataType::Float {
17942 precision: None,
17943 scale: None,
17944 real_spelling: false,
17945 }
17946 }
17947 DataType::Float {
17948 real_spelling: true,
17949 ..
17950 } => DataType::Float {
17951 precision: None,
17952 scale: None,
17953 real_spelling: false,
17954 },
17955 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
17956 DataType::Custom {
17957 name: "BLOB".to_string(),
17958 }
17959 }
17960 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
17961 DataType::Boolean
17962 }
17963 DataType::Custom { name }
17964 if name.eq_ignore_ascii_case("ROWVERSION") =>
17965 {
17966 DataType::Custom {
17967 name: "BINARY".to_string(),
17968 }
17969 }
17970 DataType::Custom { name }
17971 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
17972 {
17973 match target {
17974 DialectType::Spark
17975 | DialectType::Databricks
17976 | DialectType::Hive => DataType::Custom {
17977 name: "STRING".to_string(),
17978 },
17979 _ => DataType::VarChar {
17980 length: Some(36),
17981 parenthesized_length: true,
17982 },
17983 }
17984 }
17985 DataType::Custom { name }
17986 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
17987 {
17988 match target {
17989 DialectType::Spark
17990 | DialectType::Databricks
17991 | DialectType::Hive => DataType::Timestamp {
17992 timezone: false,
17993 precision: None,
17994 },
17995 _ => DataType::Timestamp {
17996 timezone: true,
17997 precision: None,
17998 },
17999 }
18000 }
18001 DataType::Custom { ref name }
18002 if name.to_uppercase().starts_with("DATETIME2(") =>
18003 {
18004 // DATETIME2(n) -> TIMESTAMP
18005 DataType::Timestamp {
18006 timezone: false,
18007 precision: None,
18008 }
18009 }
18010 DataType::Custom { ref name }
18011 if name.to_uppercase().starts_with("TIME(") =>
18012 {
18013 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
18014 match target {
18015 DialectType::Spark
18016 | DialectType::Databricks
18017 | DialectType::Hive => DataType::Timestamp {
18018 timezone: false,
18019 precision: None,
18020 },
18021 _ => return Ok(Expression::DataType(dt)),
18022 }
18023 }
18024 DataType::Custom { ref name }
18025 if name.to_uppercase().starts_with("NUMERIC") =>
18026 {
18027 // Parse NUMERIC(p,s) back to Decimal(p,s)
18028 let upper = name.to_uppercase();
18029 if let Some(inner) = upper
18030 .strip_prefix("NUMERIC(")
18031 .and_then(|s| s.strip_suffix(')'))
18032 {
18033 let parts: Vec<&str> = inner.split(',').collect();
18034 let precision =
18035 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
18036 let scale =
18037 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
18038 DataType::Decimal { precision, scale }
18039 } else if upper == "NUMERIC" {
18040 DataType::Decimal {
18041 precision: None,
18042 scale: None,
18043 }
18044 } else {
18045 return Ok(Expression::DataType(dt));
18046 }
18047 }
18048 DataType::Float {
18049 precision: Some(p), ..
18050 } => {
18051 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
18052 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
18053 let boundary = match target {
18054 DialectType::Hive
18055 | DialectType::Spark
18056 | DialectType::Databricks => 32,
18057 _ => 24,
18058 };
18059 if *p <= boundary {
18060 DataType::Float {
18061 precision: None,
18062 scale: None,
18063 real_spelling: false,
18064 }
18065 } else {
18066 DataType::Double {
18067 precision: None,
18068 scale: None,
18069 }
18070 }
18071 }
18072 DataType::TinyInt { .. } => match target {
18073 DialectType::DuckDB => DataType::Custom {
18074 name: "UTINYINT".to_string(),
18075 },
18076 DialectType::Hive
18077 | DialectType::Spark
18078 | DialectType::Databricks => DataType::SmallInt { length: None },
18079 _ => return Ok(Expression::DataType(dt)),
18080 },
18081 // INTEGER -> INT for Spark/Databricks
18082 DataType::Int {
18083 length,
18084 integer_spelling: true,
18085 } => DataType::Int {
18086 length: *length,
18087 integer_spelling: false,
18088 },
18089 _ => return Ok(Expression::DataType(dt)),
18090 };
18091 Ok(Expression::DataType(new_dt))
18092 } else {
18093 Ok(e)
18094 }
18095 }
18096 Action::MySQLSafeDivide => {
18097 use crate::expressions::{BinaryOp, Cast};
18098 if let Expression::Div(op) = e {
18099 let left = op.left;
18100 let right = op.right;
18101 // For SQLite: CAST left as REAL but NO NULLIF wrapping
18102 if matches!(target, DialectType::SQLite) {
18103 let new_left = Expression::Cast(Box::new(Cast {
18104 this: left,
18105 to: DataType::Float {
18106 precision: None,
18107 scale: None,
18108 real_spelling: true,
18109 },
18110 trailing_comments: Vec::new(),
18111 double_colon_syntax: false,
18112 format: None,
18113 default: None,
18114 inferred_type: None,
18115 }));
18116 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
18117 }
18118 // Wrap right in NULLIF(right, 0)
18119 let nullif_right = Expression::Function(Box::new(Function::new(
18120 "NULLIF".to_string(),
18121 vec![right, Expression::number(0)],
18122 )));
18123 // For some dialects, also CAST the left side
18124 let new_left = match target {
18125 DialectType::PostgreSQL
18126 | DialectType::Redshift
18127 | DialectType::Teradata
18128 | DialectType::Materialize
18129 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
18130 this: left,
18131 to: DataType::Custom {
18132 name: "DOUBLE PRECISION".to_string(),
18133 },
18134 trailing_comments: Vec::new(),
18135 double_colon_syntax: false,
18136 format: None,
18137 default: None,
18138 inferred_type: None,
18139 })),
18140 DialectType::Drill
18141 | DialectType::Trino
18142 | DialectType::Presto
18143 | DialectType::Athena => Expression::Cast(Box::new(Cast {
18144 this: left,
18145 to: DataType::Double {
18146 precision: None,
18147 scale: None,
18148 },
18149 trailing_comments: Vec::new(),
18150 double_colon_syntax: false,
18151 format: None,
18152 default: None,
18153 inferred_type: None,
18154 })),
18155 DialectType::TSQL => Expression::Cast(Box::new(Cast {
18156 this: left,
18157 to: DataType::Float {
18158 precision: None,
18159 scale: None,
18160 real_spelling: false,
18161 },
18162 trailing_comments: Vec::new(),
18163 double_colon_syntax: false,
18164 format: None,
18165 default: None,
18166 inferred_type: None,
18167 })),
18168 _ => left,
18169 };
18170 Ok(Expression::Div(Box::new(BinaryOp::new(
18171 new_left,
18172 nullif_right,
18173 ))))
18174 } else {
18175 Ok(e)
18176 }
18177 }
18178 Action::AlterTableRenameStripSchema => {
18179 if let Expression::AlterTable(mut at) = e {
18180 if let Some(crate::expressions::AlterTableAction::RenameTable(
18181 ref mut new_tbl,
18182 )) = at.actions.first_mut()
18183 {
18184 new_tbl.schema = None;
18185 new_tbl.catalog = None;
18186 }
18187 Ok(Expression::AlterTable(at))
18188 } else {
18189 Ok(e)
18190 }
18191 }
18192 Action::NullsOrdering => {
18193 // Fill in the source dialect's implied null ordering default.
18194 // This makes implicit null ordering explicit so the target generator
18195 // can correctly strip or keep it.
18196 //
18197 // Dialect null ordering categories:
18198 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
18199 // ASC -> NULLS LAST, DESC -> NULLS FIRST
18200 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
18201 // ASC -> NULLS FIRST, DESC -> NULLS LAST
18202 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
18203 // NULLS LAST always (both ASC and DESC)
18204 if let Expression::Ordered(mut o) = e {
18205 let is_asc = !o.desc;
18206
18207 let is_source_nulls_large = matches!(
18208 source,
18209 DialectType::Oracle
18210 | DialectType::PostgreSQL
18211 | DialectType::Redshift
18212 | DialectType::Snowflake
18213 );
18214 let is_source_nulls_last = matches!(
18215 source,
18216 DialectType::DuckDB
18217 | DialectType::Presto
18218 | DialectType::Trino
18219 | DialectType::Dremio
18220 | DialectType::Athena
18221 | DialectType::ClickHouse
18222 | DialectType::Drill
18223 | DialectType::Exasol
18224 | DialectType::DataFusion
18225 );
18226
18227 // Determine target category to check if default matches
18228 let is_target_nulls_large = matches!(
18229 target,
18230 DialectType::Oracle
18231 | DialectType::PostgreSQL
18232 | DialectType::Redshift
18233 | DialectType::Snowflake
18234 );
18235 let is_target_nulls_last = matches!(
18236 target,
18237 DialectType::DuckDB
18238 | DialectType::Presto
18239 | DialectType::Trino
18240 | DialectType::Dremio
18241 | DialectType::Athena
18242 | DialectType::ClickHouse
18243 | DialectType::Drill
18244 | DialectType::Exasol
18245 | DialectType::DataFusion
18246 );
18247
18248 // Compute the implied nulls_first for source
18249 let source_nulls_first = if is_source_nulls_large {
18250 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
18251 } else if is_source_nulls_last {
18252 false // NULLS LAST always
18253 } else {
18254 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
18255 };
18256
18257 // Compute the target's default
18258 let target_nulls_first = if is_target_nulls_large {
18259 !is_asc
18260 } else if is_target_nulls_last {
18261 false
18262 } else {
18263 is_asc
18264 };
18265
18266 // Only add explicit nulls ordering if source and target defaults differ
18267 if source_nulls_first != target_nulls_first {
18268 o.nulls_first = Some(source_nulls_first);
18269 }
18270 // If they match, leave nulls_first as None so the generator won't output it
18271
18272 Ok(Expression::Ordered(o))
18273 } else {
18274 Ok(e)
18275 }
18276 }
18277 Action::StringAggConvert => {
18278 match e {
18279 Expression::WithinGroup(wg) => {
18280 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
18281 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
18282 let (x_opt, sep_opt, distinct) = match wg.this {
18283 Expression::AggregateFunction(ref af)
18284 if af.name.eq_ignore_ascii_case("STRING_AGG")
18285 && af.args.len() >= 2 =>
18286 {
18287 (
18288 Some(af.args[0].clone()),
18289 Some(af.args[1].clone()),
18290 af.distinct,
18291 )
18292 }
18293 Expression::Function(ref f)
18294 if f.name.eq_ignore_ascii_case("STRING_AGG")
18295 && f.args.len() >= 2 =>
18296 {
18297 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
18298 }
18299 Expression::StringAgg(ref sa) => {
18300 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
18301 }
18302 _ => (None, None, false),
18303 };
18304 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
18305 let order_by = wg.order_by;
18306
18307 match target {
18308 DialectType::TSQL | DialectType::Fabric => {
18309 // Keep as WithinGroup(StringAgg) for TSQL
18310 Ok(Expression::WithinGroup(Box::new(
18311 crate::expressions::WithinGroup {
18312 this: Expression::StringAgg(Box::new(
18313 crate::expressions::StringAggFunc {
18314 this: x,
18315 separator: Some(sep),
18316 order_by: None, // order_by goes in WithinGroup, not StringAgg
18317 distinct,
18318 filter: None,
18319 limit: None,
18320 },
18321 )),
18322 order_by,
18323 },
18324 )))
18325 }
18326 DialectType::MySQL
18327 | DialectType::SingleStore
18328 | DialectType::Doris
18329 | DialectType::StarRocks => {
18330 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
18331 Ok(Expression::GroupConcat(Box::new(
18332 crate::expressions::GroupConcatFunc {
18333 this: x,
18334 separator: Some(sep),
18335 order_by: Some(order_by),
18336 distinct,
18337 filter: None,
18338 },
18339 )))
18340 }
18341 DialectType::SQLite => {
18342 // GROUP_CONCAT(x, sep) - no ORDER BY support
18343 Ok(Expression::GroupConcat(Box::new(
18344 crate::expressions::GroupConcatFunc {
18345 this: x,
18346 separator: Some(sep),
18347 order_by: None,
18348 distinct,
18349 filter: None,
18350 },
18351 )))
18352 }
18353 DialectType::PostgreSQL | DialectType::Redshift => {
18354 // STRING_AGG(x, sep ORDER BY z)
18355 Ok(Expression::StringAgg(Box::new(
18356 crate::expressions::StringAggFunc {
18357 this: x,
18358 separator: Some(sep),
18359 order_by: Some(order_by),
18360 distinct,
18361 filter: None,
18362 limit: None,
18363 },
18364 )))
18365 }
18366 _ => {
18367 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
18368 Ok(Expression::StringAgg(Box::new(
18369 crate::expressions::StringAggFunc {
18370 this: x,
18371 separator: Some(sep),
18372 order_by: Some(order_by),
18373 distinct,
18374 filter: None,
18375 limit: None,
18376 },
18377 )))
18378 }
18379 }
18380 } else {
18381 Ok(Expression::WithinGroup(wg))
18382 }
18383 }
18384 Expression::StringAgg(sa) => {
18385 match target {
18386 DialectType::MySQL
18387 | DialectType::SingleStore
18388 | DialectType::Doris
18389 | DialectType::StarRocks => {
18390 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
18391 Ok(Expression::GroupConcat(Box::new(
18392 crate::expressions::GroupConcatFunc {
18393 this: sa.this,
18394 separator: sa.separator,
18395 order_by: sa.order_by,
18396 distinct: sa.distinct,
18397 filter: sa.filter,
18398 },
18399 )))
18400 }
18401 DialectType::SQLite => {
18402 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
18403 Ok(Expression::GroupConcat(Box::new(
18404 crate::expressions::GroupConcatFunc {
18405 this: sa.this,
18406 separator: sa.separator,
18407 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18408 distinct: sa.distinct,
18409 filter: sa.filter,
18410 },
18411 )))
18412 }
18413 DialectType::Spark | DialectType::Databricks => {
18414 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
18415 Ok(Expression::ListAgg(Box::new(
18416 crate::expressions::ListAggFunc {
18417 this: sa.this,
18418 separator: sa.separator,
18419 on_overflow: None,
18420 order_by: sa.order_by,
18421 distinct: sa.distinct,
18422 filter: None,
18423 },
18424 )))
18425 }
18426 _ => Ok(Expression::StringAgg(sa)),
18427 }
18428 }
18429 _ => Ok(e),
18430 }
18431 }
18432 Action::GroupConcatConvert => {
18433 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
18434 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
18435 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
18436 if let Expression::Function(ref f) = expr {
18437 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18438 let mut result = f.args[0].clone();
18439 for arg in &f.args[1..] {
18440 result = Expression::Concat(Box::new(BinaryOp {
18441 left: result,
18442 right: arg.clone(),
18443 left_comments: vec![],
18444 operator_comments: vec![],
18445 trailing_comments: vec![],
18446 inferred_type: None,
18447 }));
18448 }
18449 return result;
18450 }
18451 }
18452 expr
18453 }
18454 fn expand_concat_to_plus(expr: Expression) -> Expression {
18455 if let Expression::Function(ref f) = expr {
18456 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18457 let mut result = f.args[0].clone();
18458 for arg in &f.args[1..] {
18459 result = Expression::Add(Box::new(BinaryOp {
18460 left: result,
18461 right: arg.clone(),
18462 left_comments: vec![],
18463 operator_comments: vec![],
18464 trailing_comments: vec![],
18465 inferred_type: None,
18466 }));
18467 }
18468 return result;
18469 }
18470 }
18471 expr
18472 }
18473 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
18474 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
18475 if let Expression::Function(ref f) = expr {
18476 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
18477 let new_args: Vec<Expression> = f
18478 .args
18479 .iter()
18480 .map(|arg| {
18481 Expression::Cast(Box::new(crate::expressions::Cast {
18482 this: arg.clone(),
18483 to: crate::expressions::DataType::VarChar {
18484 length: None,
18485 parenthesized_length: false,
18486 },
18487 trailing_comments: Vec::new(),
18488 double_colon_syntax: false,
18489 format: None,
18490 default: None,
18491 inferred_type: None,
18492 }))
18493 })
18494 .collect();
18495 return Expression::Function(Box::new(
18496 crate::expressions::Function::new(
18497 "CONCAT".to_string(),
18498 new_args,
18499 ),
18500 ));
18501 }
18502 }
18503 expr
18504 }
18505 if let Expression::GroupConcat(gc) = e {
18506 match target {
18507 DialectType::Presto => {
18508 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
18509 let sep = gc.separator.unwrap_or(Expression::string(","));
18510 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18511 let this = wrap_concat_args_in_varchar_cast(gc.this);
18512 let array_agg =
18513 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
18514 this,
18515 distinct: gc.distinct,
18516 filter: gc.filter,
18517 order_by: gc.order_by.unwrap_or_default(),
18518 name: None,
18519 ignore_nulls: None,
18520 having_max: None,
18521 limit: None,
18522 inferred_type: None,
18523 }));
18524 Ok(Expression::ArrayJoin(Box::new(
18525 crate::expressions::ArrayJoinFunc {
18526 this: array_agg,
18527 separator: sep,
18528 null_replacement: None,
18529 },
18530 )))
18531 }
18532 DialectType::Trino => {
18533 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18534 let sep = gc.separator.unwrap_or(Expression::string(","));
18535 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
18536 let this = wrap_concat_args_in_varchar_cast(gc.this);
18537 Ok(Expression::ListAgg(Box::new(
18538 crate::expressions::ListAggFunc {
18539 this,
18540 separator: Some(sep),
18541 on_overflow: None,
18542 order_by: gc.order_by,
18543 distinct: gc.distinct,
18544 filter: gc.filter,
18545 },
18546 )))
18547 }
18548 DialectType::PostgreSQL
18549 | DialectType::Redshift
18550 | DialectType::Snowflake
18551 | DialectType::DuckDB
18552 | DialectType::Hive
18553 | DialectType::ClickHouse => {
18554 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
18555 let sep = gc.separator.unwrap_or(Expression::string(","));
18556 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
18557 let this = expand_concat_to_dpipe(gc.this);
18558 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
18559 let order_by = if target == DialectType::PostgreSQL {
18560 gc.order_by.map(|ords| {
18561 ords.into_iter()
18562 .map(|mut o| {
18563 if o.nulls_first.is_none() {
18564 if o.desc {
18565 o.nulls_first = Some(false);
18566 // NULLS LAST
18567 } else {
18568 o.nulls_first = Some(true);
18569 // NULLS FIRST
18570 }
18571 }
18572 o
18573 })
18574 .collect()
18575 })
18576 } else {
18577 gc.order_by
18578 };
18579 Ok(Expression::StringAgg(Box::new(
18580 crate::expressions::StringAggFunc {
18581 this,
18582 separator: Some(sep),
18583 order_by,
18584 distinct: gc.distinct,
18585 filter: gc.filter,
18586 limit: None,
18587 },
18588 )))
18589 }
18590 DialectType::TSQL => {
18591 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
18592 // TSQL doesn't support DISTINCT in STRING_AGG
18593 let sep = gc.separator.unwrap_or(Expression::string(","));
18594 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
18595 let this = expand_concat_to_plus(gc.this);
18596 Ok(Expression::StringAgg(Box::new(
18597 crate::expressions::StringAggFunc {
18598 this,
18599 separator: Some(sep),
18600 order_by: gc.order_by,
18601 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
18602 filter: gc.filter,
18603 limit: None,
18604 },
18605 )))
18606 }
18607 DialectType::SQLite => {
18608 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
18609 // SQLite GROUP_CONCAT doesn't support ORDER BY
18610 // Expand CONCAT(a,b,c) -> a || b || c
18611 let this = expand_concat_to_dpipe(gc.this);
18612 Ok(Expression::GroupConcat(Box::new(
18613 crate::expressions::GroupConcatFunc {
18614 this,
18615 separator: gc.separator,
18616 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
18617 distinct: gc.distinct,
18618 filter: gc.filter,
18619 },
18620 )))
18621 }
18622 DialectType::Spark | DialectType::Databricks => {
18623 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
18624 let sep = gc.separator.unwrap_or(Expression::string(","));
18625 Ok(Expression::ListAgg(Box::new(
18626 crate::expressions::ListAggFunc {
18627 this: gc.this,
18628 separator: Some(sep),
18629 on_overflow: None,
18630 order_by: gc.order_by,
18631 distinct: gc.distinct,
18632 filter: None,
18633 },
18634 )))
18635 }
18636 DialectType::MySQL
18637 | DialectType::SingleStore
18638 | DialectType::StarRocks => {
18639 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
18640 if gc.separator.is_none() {
18641 let mut gc = gc;
18642 gc.separator = Some(Expression::string(","));
18643 Ok(Expression::GroupConcat(gc))
18644 } else {
18645 Ok(Expression::GroupConcat(gc))
18646 }
18647 }
18648 _ => Ok(Expression::GroupConcat(gc)),
18649 }
18650 } else {
18651 Ok(e)
18652 }
18653 }
18654 Action::TempTableHash => {
18655 match e {
18656 Expression::CreateTable(mut ct) => {
18657 // TSQL #table -> TEMPORARY TABLE with # stripped from name
18658 let name = &ct.name.name.name;
18659 if name.starts_with('#') {
18660 ct.name.name.name = name.trim_start_matches('#').to_string();
18661 }
18662 // Set temporary flag
18663 ct.temporary = true;
18664 Ok(Expression::CreateTable(ct))
18665 }
18666 Expression::Table(mut tr) => {
18667 // Strip # from table references
18668 let name = &tr.name.name;
18669 if name.starts_with('#') {
18670 tr.name.name = name.trim_start_matches('#').to_string();
18671 }
18672 Ok(Expression::Table(tr))
18673 }
18674 Expression::DropTable(mut dt) => {
18675 // Strip # from DROP TABLE names
18676 for table_ref in &mut dt.names {
18677 if table_ref.name.name.starts_with('#') {
18678 table_ref.name.name =
18679 table_ref.name.name.trim_start_matches('#').to_string();
18680 }
18681 }
18682 Ok(Expression::DropTable(dt))
18683 }
18684 _ => Ok(e),
18685 }
18686 }
18687 Action::NvlClearOriginal => {
18688 if let Expression::Nvl(mut f) = e {
18689 f.original_name = None;
18690 Ok(Expression::Nvl(f))
18691 } else {
18692 Ok(e)
18693 }
18694 }
18695 Action::HiveCastToTryCast => {
18696 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
18697 if let Expression::Cast(mut c) = e {
18698 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
18699 // (Spark's TIMESTAMP is always timezone-aware)
18700 if matches!(target, DialectType::DuckDB)
18701 && matches!(source, DialectType::Spark | DialectType::Databricks)
18702 && matches!(
18703 c.to,
18704 DataType::Timestamp {
18705 timezone: false,
18706 ..
18707 }
18708 )
18709 {
18710 c.to = DataType::Custom {
18711 name: "TIMESTAMPTZ".to_string(),
18712 };
18713 }
18714 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
18715 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
18716 if matches!(target, DialectType::Databricks | DialectType::Spark)
18717 && matches!(
18718 source,
18719 DialectType::Spark | DialectType::Databricks | DialectType::Hive
18720 )
18721 && Self::has_varchar_char_type(&c.to)
18722 {
18723 c.to = Self::normalize_varchar_to_string(c.to);
18724 }
18725 Ok(Expression::TryCast(c))
18726 } else {
18727 Ok(e)
18728 }
18729 }
18730 Action::XorExpand => {
18731 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
18732 // Snowflake: use BOOLXOR(a, b) instead
18733 if let Expression::Xor(xor) = e {
18734 // Collect all XOR operands
18735 let mut operands = Vec::new();
18736 if let Some(this) = xor.this {
18737 operands.push(*this);
18738 }
18739 if let Some(expr) = xor.expression {
18740 operands.push(*expr);
18741 }
18742 operands.extend(xor.expressions);
18743
18744 // Snowflake: use BOOLXOR(a, b)
18745 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
18746 let a = operands.remove(0);
18747 let b = operands.remove(0);
18748 return Ok(Expression::Function(Box::new(Function::new(
18749 "BOOLXOR".to_string(),
18750 vec![a, b],
18751 ))));
18752 }
18753
18754 // Helper to build (a AND NOT b) OR (NOT a AND b)
18755 let make_xor = |a: Expression, b: Expression| -> Expression {
18756 let not_b = Expression::Not(Box::new(
18757 crate::expressions::UnaryOp::new(b.clone()),
18758 ));
18759 let not_a = Expression::Not(Box::new(
18760 crate::expressions::UnaryOp::new(a.clone()),
18761 ));
18762 let left_and = Expression::And(Box::new(BinaryOp {
18763 left: a,
18764 right: Expression::Paren(Box::new(Paren {
18765 this: not_b,
18766 trailing_comments: Vec::new(),
18767 })),
18768 left_comments: Vec::new(),
18769 operator_comments: Vec::new(),
18770 trailing_comments: Vec::new(),
18771 inferred_type: None,
18772 }));
18773 let right_and = Expression::And(Box::new(BinaryOp {
18774 left: Expression::Paren(Box::new(Paren {
18775 this: not_a,
18776 trailing_comments: Vec::new(),
18777 })),
18778 right: b,
18779 left_comments: Vec::new(),
18780 operator_comments: Vec::new(),
18781 trailing_comments: Vec::new(),
18782 inferred_type: None,
18783 }));
18784 Expression::Or(Box::new(BinaryOp {
18785 left: Expression::Paren(Box::new(Paren {
18786 this: left_and,
18787 trailing_comments: Vec::new(),
18788 })),
18789 right: Expression::Paren(Box::new(Paren {
18790 this: right_and,
18791 trailing_comments: Vec::new(),
18792 })),
18793 left_comments: Vec::new(),
18794 operator_comments: Vec::new(),
18795 trailing_comments: Vec::new(),
18796 inferred_type: None,
18797 }))
18798 };
18799
18800 if operands.len() >= 2 {
18801 let mut result = make_xor(operands.remove(0), operands.remove(0));
18802 for operand in operands {
18803 result = make_xor(result, operand);
18804 }
18805 Ok(result)
18806 } else if operands.len() == 1 {
18807 Ok(operands.remove(0))
18808 } else {
18809 // No operands - return FALSE (shouldn't happen)
18810 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
18811 value: false,
18812 }))
18813 }
18814 } else {
18815 Ok(e)
18816 }
18817 }
18818 Action::DatePartUnquote => {
18819 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
18820 // Convert the quoted string first arg to a bare Column/Identifier
18821 if let Expression::Function(mut f) = e {
18822 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
18823 f.args.first()
18824 {
18825 let bare_name = s.to_lowercase();
18826 f.args[0] = Expression::Column(crate::expressions::Column {
18827 name: Identifier::new(bare_name),
18828 table: None,
18829 join_mark: false,
18830 trailing_comments: Vec::new(),
18831 span: None,
18832 inferred_type: None,
18833 });
18834 }
18835 Ok(Expression::Function(f))
18836 } else {
18837 Ok(e)
18838 }
18839 }
18840 Action::ArrayLengthConvert => {
18841 // Extract the argument from the expression
18842 let arg = match e {
18843 Expression::Cardinality(ref f) => f.this.clone(),
18844 Expression::ArrayLength(ref f) => f.this.clone(),
18845 Expression::ArraySize(ref f) => f.this.clone(),
18846 _ => return Ok(e),
18847 };
18848 match target {
18849 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18850 Ok(Expression::Function(Box::new(Function::new(
18851 "SIZE".to_string(),
18852 vec![arg],
18853 ))))
18854 }
18855 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18856 Ok(Expression::Cardinality(Box::new(
18857 crate::expressions::UnaryFunc::new(arg),
18858 )))
18859 }
18860 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
18861 crate::expressions::UnaryFunc::new(arg),
18862 ))),
18863 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
18864 crate::expressions::UnaryFunc::new(arg),
18865 ))),
18866 DialectType::PostgreSQL | DialectType::Redshift => {
18867 // PostgreSQL ARRAY_LENGTH requires dimension arg
18868 Ok(Expression::Function(Box::new(Function::new(
18869 "ARRAY_LENGTH".to_string(),
18870 vec![arg, Expression::number(1)],
18871 ))))
18872 }
18873 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
18874 crate::expressions::UnaryFunc::new(arg),
18875 ))),
18876 _ => Ok(e), // Keep original
18877 }
18878 }
18879
18880 Action::JsonExtractToArrow => {
18881 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
18882 if let Expression::JsonExtract(mut f) = e {
18883 f.arrow_syntax = true;
18884 // Transform path: convert bracket notation to dot notation
18885 // SQLite strips wildcards, DuckDB preserves them
18886 if let Expression::Literal(Literal::String(ref s)) = f.path {
18887 let mut transformed = s.clone();
18888 if matches!(target, DialectType::SQLite) {
18889 transformed = Self::strip_json_wildcards(&transformed);
18890 }
18891 transformed = Self::bracket_to_dot_notation(&transformed);
18892 if transformed != *s {
18893 f.path = Expression::string(&transformed);
18894 }
18895 }
18896 Ok(Expression::JsonExtract(f))
18897 } else {
18898 Ok(e)
18899 }
18900 }
18901
18902 Action::JsonExtractToGetJsonObject => {
18903 if let Expression::JsonExtract(f) = e {
18904 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
18905 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
18906 // Use proper decomposition that handles brackets
18907 let keys: Vec<Expression> =
18908 if let Expression::Literal(Literal::String(ref s)) = f.path {
18909 let parts = Self::decompose_json_path(s);
18910 parts.into_iter().map(|k| Expression::string(&k)).collect()
18911 } else {
18912 vec![f.path]
18913 };
18914 let func_name = if matches!(target, DialectType::Redshift) {
18915 "JSON_EXTRACT_PATH_TEXT"
18916 } else {
18917 "JSON_EXTRACT_PATH"
18918 };
18919 let mut args = vec![f.this];
18920 args.extend(keys);
18921 Ok(Expression::Function(Box::new(Function::new(
18922 func_name.to_string(),
18923 args,
18924 ))))
18925 } else {
18926 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18927 // Convert bracket double quotes to single quotes
18928 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
18929 let normalized = Self::bracket_to_single_quotes(s);
18930 if normalized != *s {
18931 Expression::string(&normalized)
18932 } else {
18933 f.path
18934 }
18935 } else {
18936 f.path
18937 };
18938 Ok(Expression::Function(Box::new(Function::new(
18939 "GET_JSON_OBJECT".to_string(),
18940 vec![f.this, path],
18941 ))))
18942 }
18943 } else {
18944 Ok(e)
18945 }
18946 }
18947
18948 Action::JsonExtractScalarToGetJsonObject => {
18949 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
18950 if let Expression::JsonExtractScalar(f) = e {
18951 Ok(Expression::Function(Box::new(Function::new(
18952 "GET_JSON_OBJECT".to_string(),
18953 vec![f.this, f.path],
18954 ))))
18955 } else {
18956 Ok(e)
18957 }
18958 }
18959
18960 Action::JsonExtractToTsql => {
18961 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
18962 let (this, path) = match e {
18963 Expression::JsonExtract(f) => (f.this, f.path),
18964 Expression::JsonExtractScalar(f) => (f.this, f.path),
18965 _ => return Ok(e),
18966 };
18967 // Transform path: strip wildcards, convert bracket notation to dot notation
18968 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
18969 {
18970 let stripped = Self::strip_json_wildcards(s);
18971 let dotted = Self::bracket_to_dot_notation(&stripped);
18972 Expression::string(&dotted)
18973 } else {
18974 path
18975 };
18976 let json_query = Expression::Function(Box::new(Function::new(
18977 "JSON_QUERY".to_string(),
18978 vec![this.clone(), transformed_path.clone()],
18979 )));
18980 let json_value = Expression::Function(Box::new(Function::new(
18981 "JSON_VALUE".to_string(),
18982 vec![this, transformed_path],
18983 )));
18984 Ok(Expression::Function(Box::new(Function::new(
18985 "ISNULL".to_string(),
18986 vec![json_query, json_value],
18987 ))))
18988 }
18989
18990 Action::JsonExtractToClickHouse => {
18991 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
18992 let (this, path) = match e {
18993 Expression::JsonExtract(f) => (f.this, f.path),
18994 Expression::JsonExtractScalar(f) => (f.this, f.path),
18995 _ => return Ok(e),
18996 };
18997 let args: Vec<Expression> =
18998 if let Expression::Literal(Literal::String(ref s)) = path {
18999 let parts = Self::decompose_json_path(s);
19000 let mut result = vec![this];
19001 for part in parts {
19002 // ClickHouse uses 1-based integer indices for array access
19003 if let Ok(idx) = part.parse::<i64>() {
19004 result.push(Expression::number(idx + 1));
19005 } else {
19006 result.push(Expression::string(&part));
19007 }
19008 }
19009 result
19010 } else {
19011 vec![this, path]
19012 };
19013 Ok(Expression::Function(Box::new(Function::new(
19014 "JSONExtractString".to_string(),
19015 args,
19016 ))))
19017 }
19018
19019 Action::JsonExtractScalarConvert => {
19020 // JSON_EXTRACT_SCALAR -> target-specific
19021 if let Expression::JsonExtractScalar(f) = e {
19022 match target {
19023 DialectType::PostgreSQL | DialectType::Redshift => {
19024 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
19025 let keys: Vec<Expression> =
19026 if let Expression::Literal(Literal::String(ref s)) = f.path {
19027 let parts = Self::decompose_json_path(s);
19028 parts.into_iter().map(|k| Expression::string(&k)).collect()
19029 } else {
19030 vec![f.path]
19031 };
19032 let mut args = vec![f.this];
19033 args.extend(keys);
19034 Ok(Expression::Function(Box::new(Function::new(
19035 "JSON_EXTRACT_PATH_TEXT".to_string(),
19036 args,
19037 ))))
19038 }
19039 DialectType::Snowflake => {
19040 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
19041 let stripped_path =
19042 if let Expression::Literal(Literal::String(ref s)) = f.path {
19043 let stripped = Self::strip_json_dollar_prefix(s);
19044 Expression::string(&stripped)
19045 } else {
19046 f.path
19047 };
19048 Ok(Expression::Function(Box::new(Function::new(
19049 "JSON_EXTRACT_PATH_TEXT".to_string(),
19050 vec![f.this, stripped_path],
19051 ))))
19052 }
19053 DialectType::SQLite | DialectType::DuckDB => {
19054 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
19055 Ok(Expression::JsonExtractScalar(Box::new(
19056 crate::expressions::JsonExtractFunc {
19057 this: f.this,
19058 path: f.path,
19059 returning: f.returning,
19060 arrow_syntax: true,
19061 hash_arrow_syntax: false,
19062 wrapper_option: None,
19063 quotes_option: None,
19064 on_scalar_string: false,
19065 on_error: None,
19066 },
19067 )))
19068 }
19069 _ => Ok(Expression::JsonExtractScalar(f)),
19070 }
19071 } else {
19072 Ok(e)
19073 }
19074 }
19075
19076 Action::JsonPathNormalize => {
19077 // Normalize JSON path format for BigQuery, MySQL, etc.
19078 if let Expression::JsonExtract(mut f) = e {
19079 if let Expression::Literal(Literal::String(ref s)) = f.path {
19080 let mut normalized = s.clone();
19081 // Convert bracket notation and handle wildcards per dialect
19082 match target {
19083 DialectType::BigQuery => {
19084 // BigQuery strips wildcards and uses single quotes in brackets
19085 normalized = Self::strip_json_wildcards(&normalized);
19086 normalized = Self::bracket_to_single_quotes(&normalized);
19087 }
19088 DialectType::MySQL => {
19089 // MySQL preserves wildcards, converts brackets to dot notation
19090 normalized = Self::bracket_to_dot_notation(&normalized);
19091 }
19092 _ => {}
19093 }
19094 if normalized != *s {
19095 f.path = Expression::string(&normalized);
19096 }
19097 }
19098 Ok(Expression::JsonExtract(f))
19099 } else {
19100 Ok(e)
19101 }
19102 }
19103
19104 Action::JsonQueryValueConvert => {
19105 // JsonQuery/JsonValue -> target-specific
19106 let (f, is_query) = match e {
19107 Expression::JsonQuery(f) => (f, true),
19108 Expression::JsonValue(f) => (f, false),
19109 _ => return Ok(e),
19110 };
19111 match target {
19112 DialectType::TSQL | DialectType::Fabric => {
19113 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
19114 let json_query = Expression::Function(Box::new(Function::new(
19115 "JSON_QUERY".to_string(),
19116 vec![f.this.clone(), f.path.clone()],
19117 )));
19118 let json_value = Expression::Function(Box::new(Function::new(
19119 "JSON_VALUE".to_string(),
19120 vec![f.this, f.path],
19121 )));
19122 Ok(Expression::Function(Box::new(Function::new(
19123 "ISNULL".to_string(),
19124 vec![json_query, json_value],
19125 ))))
19126 }
19127 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
19128 Ok(Expression::Function(Box::new(Function::new(
19129 "GET_JSON_OBJECT".to_string(),
19130 vec![f.this, f.path],
19131 ))))
19132 }
19133 DialectType::PostgreSQL | DialectType::Redshift => {
19134 Ok(Expression::Function(Box::new(Function::new(
19135 "JSON_EXTRACT_PATH_TEXT".to_string(),
19136 vec![f.this, f.path],
19137 ))))
19138 }
19139 DialectType::DuckDB | DialectType::SQLite => {
19140 // json -> path arrow syntax
19141 Ok(Expression::JsonExtract(Box::new(
19142 crate::expressions::JsonExtractFunc {
19143 this: f.this,
19144 path: f.path,
19145 returning: f.returning,
19146 arrow_syntax: true,
19147 hash_arrow_syntax: false,
19148 wrapper_option: f.wrapper_option,
19149 quotes_option: f.quotes_option,
19150 on_scalar_string: f.on_scalar_string,
19151 on_error: f.on_error,
19152 },
19153 )))
19154 }
19155 DialectType::Snowflake => {
19156 // GET_PATH(PARSE_JSON(json), 'path')
19157 // Strip $. prefix from path
19158 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
19159 let json_expr = match &f.this {
19160 Expression::Function(ref inner_f)
19161 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
19162 {
19163 f.this
19164 }
19165 Expression::ParseJson(_) => {
19166 // Already a ParseJson expression, which generates as PARSE_JSON(...)
19167 f.this
19168 }
19169 _ => Expression::Function(Box::new(Function::new(
19170 "PARSE_JSON".to_string(),
19171 vec![f.this],
19172 ))),
19173 };
19174 let path_str = match &f.path {
19175 Expression::Literal(Literal::String(s)) => {
19176 let stripped = s.strip_prefix("$.").unwrap_or(s);
19177 Expression::Literal(Literal::String(stripped.to_string()))
19178 }
19179 other => other.clone(),
19180 };
19181 Ok(Expression::Function(Box::new(Function::new(
19182 "GET_PATH".to_string(),
19183 vec![json_expr, path_str],
19184 ))))
19185 }
19186 _ => {
19187 // Default: keep as JSON_QUERY/JSON_VALUE function
19188 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
19189 Ok(Expression::Function(Box::new(Function::new(
19190 func_name.to_string(),
19191 vec![f.this, f.path],
19192 ))))
19193 }
19194 }
19195 }
19196
19197 Action::JsonLiteralToJsonParse => {
19198 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
19199 if let Expression::Cast(c) = e {
19200 let func_name = if matches!(target, DialectType::Snowflake) {
19201 "PARSE_JSON"
19202 } else {
19203 "JSON_PARSE"
19204 };
19205 Ok(Expression::Function(Box::new(Function::new(
19206 func_name.to_string(),
19207 vec![c.this],
19208 ))))
19209 } else {
19210 Ok(e)
19211 }
19212 }
19213
19214 Action::AtTimeZoneConvert => {
19215 // AT TIME ZONE -> target-specific conversion
19216 if let Expression::AtTimeZone(atz) = e {
19217 match target {
19218 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19219 Ok(Expression::Function(Box::new(Function::new(
19220 "AT_TIMEZONE".to_string(),
19221 vec![atz.this, atz.zone],
19222 ))))
19223 }
19224 DialectType::Spark | DialectType::Databricks => {
19225 Ok(Expression::Function(Box::new(Function::new(
19226 "FROM_UTC_TIMESTAMP".to_string(),
19227 vec![atz.this, atz.zone],
19228 ))))
19229 }
19230 DialectType::Snowflake => {
19231 // CONVERT_TIMEZONE('zone', expr)
19232 Ok(Expression::Function(Box::new(Function::new(
19233 "CONVERT_TIMEZONE".to_string(),
19234 vec![atz.zone, atz.this],
19235 ))))
19236 }
19237 DialectType::BigQuery => {
19238 // TIMESTAMP(DATETIME(expr, 'zone'))
19239 let datetime_call = Expression::Function(Box::new(Function::new(
19240 "DATETIME".to_string(),
19241 vec![atz.this, atz.zone],
19242 )));
19243 Ok(Expression::Function(Box::new(Function::new(
19244 "TIMESTAMP".to_string(),
19245 vec![datetime_call],
19246 ))))
19247 }
19248 _ => Ok(Expression::Function(Box::new(Function::new(
19249 "AT_TIMEZONE".to_string(),
19250 vec![atz.this, atz.zone],
19251 )))),
19252 }
19253 } else {
19254 Ok(e)
19255 }
19256 }
19257
19258 Action::DayOfWeekConvert => {
19259 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
19260 if let Expression::DayOfWeek(f) = e {
19261 match target {
19262 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19263 Function::new("ISODOW".to_string(), vec![f.this]),
19264 ))),
19265 DialectType::Spark | DialectType::Databricks => {
19266 // ((DAYOFWEEK(x) % 7) + 1)
19267 let dayofweek = Expression::Function(Box::new(Function::new(
19268 "DAYOFWEEK".to_string(),
19269 vec![f.this],
19270 )));
19271 let modulo = Expression::Mod(Box::new(BinaryOp {
19272 left: dayofweek,
19273 right: Expression::number(7),
19274 left_comments: Vec::new(),
19275 operator_comments: Vec::new(),
19276 trailing_comments: Vec::new(),
19277 inferred_type: None,
19278 }));
19279 let paren_mod = Expression::Paren(Box::new(Paren {
19280 this: modulo,
19281 trailing_comments: Vec::new(),
19282 }));
19283 let add_one = Expression::Add(Box::new(BinaryOp {
19284 left: paren_mod,
19285 right: Expression::number(1),
19286 left_comments: Vec::new(),
19287 operator_comments: Vec::new(),
19288 trailing_comments: Vec::new(),
19289 inferred_type: None,
19290 }));
19291 Ok(Expression::Paren(Box::new(Paren {
19292 this: add_one,
19293 trailing_comments: Vec::new(),
19294 })))
19295 }
19296 _ => Ok(Expression::DayOfWeek(f)),
19297 }
19298 } else {
19299 Ok(e)
19300 }
19301 }
19302
19303 Action::MaxByMinByConvert => {
19304 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
19305 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
19306 // Handle both Expression::Function and Expression::AggregateFunction
19307 let (is_max, args) = match &e {
19308 Expression::Function(f) => {
19309 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
19310 }
19311 Expression::AggregateFunction(af) => {
19312 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
19313 }
19314 _ => return Ok(e),
19315 };
19316 match target {
19317 DialectType::ClickHouse => {
19318 let name = if is_max { "argMax" } else { "argMin" };
19319 let mut args = args;
19320 args.truncate(2);
19321 Ok(Expression::Function(Box::new(Function::new(
19322 name.to_string(),
19323 args,
19324 ))))
19325 }
19326 DialectType::DuckDB => {
19327 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
19328 Ok(Expression::Function(Box::new(Function::new(
19329 name.to_string(),
19330 args,
19331 ))))
19332 }
19333 DialectType::Spark | DialectType::Databricks => {
19334 let mut args = args;
19335 args.truncate(2);
19336 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
19337 Ok(Expression::Function(Box::new(Function::new(
19338 name.to_string(),
19339 args,
19340 ))))
19341 }
19342 _ => Ok(e),
19343 }
19344 }
19345
19346 Action::ElementAtConvert => {
19347 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
19348 let (arr, idx) = if let Expression::ElementAt(bf) = e {
19349 (bf.this, bf.expression)
19350 } else if let Expression::Function(ref f) = e {
19351 if f.args.len() >= 2 {
19352 if let Expression::Function(f) = e {
19353 let mut args = f.args;
19354 let arr = args.remove(0);
19355 let idx = args.remove(0);
19356 (arr, idx)
19357 } else {
19358 unreachable!("outer condition already matched Expression::Function")
19359 }
19360 } else {
19361 return Ok(e);
19362 }
19363 } else {
19364 return Ok(e);
19365 };
19366 match target {
19367 DialectType::PostgreSQL => {
19368 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
19369 let arr_expr = Expression::Paren(Box::new(Paren {
19370 this: arr,
19371 trailing_comments: vec![],
19372 }));
19373 Ok(Expression::Subscript(Box::new(
19374 crate::expressions::Subscript {
19375 this: arr_expr,
19376 index: idx,
19377 },
19378 )))
19379 }
19380 DialectType::BigQuery => {
19381 // BigQuery: convert ARRAY[...] to bare [...] for subscript
19382 let arr_expr = match arr {
19383 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
19384 crate::expressions::ArrayConstructor {
19385 expressions: af.expressions,
19386 bracket_notation: true,
19387 use_list_keyword: false,
19388 },
19389 )),
19390 other => other,
19391 };
19392 let safe_ordinal = Expression::Function(Box::new(Function::new(
19393 "SAFE_ORDINAL".to_string(),
19394 vec![idx],
19395 )));
19396 Ok(Expression::Subscript(Box::new(
19397 crate::expressions::Subscript {
19398 this: arr_expr,
19399 index: safe_ordinal,
19400 },
19401 )))
19402 }
19403 _ => Ok(Expression::Function(Box::new(Function::new(
19404 "ELEMENT_AT".to_string(),
19405 vec![arr, idx],
19406 )))),
19407 }
19408 }
19409
19410 Action::CurrentUserParens => {
19411 // CURRENT_USER -> CURRENT_USER() for Snowflake
19412 Ok(Expression::Function(Box::new(Function::new(
19413 "CURRENT_USER".to_string(),
19414 vec![],
19415 ))))
19416 }
19417
19418 Action::ArrayAggToCollectList => {
19419 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
19420 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
19421 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
19422 match e {
19423 Expression::AggregateFunction(mut af) => {
19424 let is_simple =
19425 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
19426 let args = if af.args.is_empty() {
19427 vec![]
19428 } else {
19429 vec![af.args[0].clone()]
19430 };
19431 af.name = "COLLECT_LIST".to_string();
19432 af.args = args;
19433 if is_simple {
19434 af.order_by = Vec::new();
19435 }
19436 Ok(Expression::AggregateFunction(af))
19437 }
19438 Expression::ArrayAgg(agg) => {
19439 let is_simple =
19440 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
19441 Ok(Expression::AggregateFunction(Box::new(
19442 crate::expressions::AggregateFunction {
19443 name: "COLLECT_LIST".to_string(),
19444 args: vec![agg.this.clone()],
19445 distinct: agg.distinct,
19446 filter: agg.filter.clone(),
19447 order_by: if is_simple {
19448 Vec::new()
19449 } else {
19450 agg.order_by.clone()
19451 },
19452 limit: agg.limit.clone(),
19453 ignore_nulls: agg.ignore_nulls,
19454 inferred_type: None,
19455 },
19456 )))
19457 }
19458 _ => Ok(e),
19459 }
19460 }
19461
19462 Action::ArraySyntaxConvert => {
19463 match e {
19464 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
19465 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
19466 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
19467 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
19468 expressions: arr.expressions,
19469 bracket_notation: true,
19470 use_list_keyword: false,
19471 })),
19472 ),
19473 // ARRAY(y) function style -> ArrayFunc for target dialect
19474 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
19475 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
19476 let bracket = matches!(
19477 target,
19478 DialectType::BigQuery
19479 | DialectType::DuckDB
19480 | DialectType::ClickHouse
19481 | DialectType::StarRocks
19482 );
19483 Ok(Expression::ArrayFunc(Box::new(
19484 crate::expressions::ArrayConstructor {
19485 expressions: f.args,
19486 bracket_notation: bracket,
19487 use_list_keyword: false,
19488 },
19489 )))
19490 }
19491 _ => Ok(e),
19492 }
19493 }
19494
19495 Action::CastToJsonForSpark => {
19496 // CAST(x AS JSON) -> TO_JSON(x) for Spark
19497 if let Expression::Cast(c) = e {
19498 Ok(Expression::Function(Box::new(Function::new(
19499 "TO_JSON".to_string(),
19500 vec![c.this],
19501 ))))
19502 } else {
19503 Ok(e)
19504 }
19505 }
19506
19507 Action::CastJsonToFromJson => {
19508 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
19509 if let Expression::Cast(c) = e {
19510 // Extract the string literal from ParseJson
19511 let literal_expr = if let Expression::ParseJson(pj) = c.this {
19512 pj.this
19513 } else {
19514 c.this
19515 };
19516 // Convert the target DataType to Spark's type string format
19517 let type_str = Self::data_type_to_spark_string(&c.to);
19518 Ok(Expression::Function(Box::new(Function::new(
19519 "FROM_JSON".to_string(),
19520 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
19521 ))))
19522 } else {
19523 Ok(e)
19524 }
19525 }
19526
19527 Action::ToJsonConvert => {
19528 // TO_JSON(x) -> target-specific conversion
19529 if let Expression::ToJson(f) = e {
19530 let arg = f.this;
19531 match target {
19532 DialectType::Presto | DialectType::Trino => {
19533 // JSON_FORMAT(CAST(x AS JSON))
19534 let cast_json = Expression::Cast(Box::new(Cast {
19535 this: arg,
19536 to: DataType::Custom {
19537 name: "JSON".to_string(),
19538 },
19539 trailing_comments: vec![],
19540 double_colon_syntax: false,
19541 format: None,
19542 default: None,
19543 inferred_type: None,
19544 }));
19545 Ok(Expression::Function(Box::new(Function::new(
19546 "JSON_FORMAT".to_string(),
19547 vec![cast_json],
19548 ))))
19549 }
19550 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19551 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
19552 ))),
19553 DialectType::DuckDB => {
19554 // CAST(TO_JSON(x) AS TEXT)
19555 let to_json =
19556 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
19557 this: arg,
19558 original_name: None,
19559 inferred_type: None,
19560 }));
19561 Ok(Expression::Cast(Box::new(Cast {
19562 this: to_json,
19563 to: DataType::Text,
19564 trailing_comments: vec![],
19565 double_colon_syntax: false,
19566 format: None,
19567 default: None,
19568 inferred_type: None,
19569 })))
19570 }
19571 _ => Ok(Expression::ToJson(Box::new(
19572 crate::expressions::UnaryFunc {
19573 this: arg,
19574 original_name: None,
19575 inferred_type: None,
19576 },
19577 ))),
19578 }
19579 } else {
19580 Ok(e)
19581 }
19582 }
19583
19584 Action::VarianceToClickHouse => {
19585 if let Expression::Variance(f) = e {
19586 Ok(Expression::Function(Box::new(Function::new(
19587 "varSamp".to_string(),
19588 vec![f.this],
19589 ))))
19590 } else {
19591 Ok(e)
19592 }
19593 }
19594
19595 Action::StddevToClickHouse => {
19596 if let Expression::Stddev(f) = e {
19597 Ok(Expression::Function(Box::new(Function::new(
19598 "stddevSamp".to_string(),
19599 vec![f.this],
19600 ))))
19601 } else {
19602 Ok(e)
19603 }
19604 }
19605
19606 Action::ApproxQuantileConvert => {
19607 if let Expression::ApproxQuantile(aq) = e {
19608 let mut args = vec![*aq.this];
19609 if let Some(q) = aq.quantile {
19610 args.push(*q);
19611 }
19612 Ok(Expression::Function(Box::new(Function::new(
19613 "APPROX_PERCENTILE".to_string(),
19614 args,
19615 ))))
19616 } else {
19617 Ok(e)
19618 }
19619 }
19620
19621 Action::DollarParamConvert => {
19622 if let Expression::Parameter(p) = e {
19623 Ok(Expression::Parameter(Box::new(
19624 crate::expressions::Parameter {
19625 name: p.name,
19626 index: p.index,
19627 style: crate::expressions::ParameterStyle::At,
19628 quoted: p.quoted,
19629 string_quoted: p.string_quoted,
19630 expression: p.expression,
19631 },
19632 )))
19633 } else {
19634 Ok(e)
19635 }
19636 }
19637
19638 Action::EscapeStringNormalize => {
19639 if let Expression::Literal(Literal::EscapeString(s)) = e {
19640 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
19641 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
19642 s[2..].to_string()
19643 } else {
19644 s
19645 };
19646 let normalized = stripped
19647 .replace('\n', "\\n")
19648 .replace('\r', "\\r")
19649 .replace('\t', "\\t");
19650 match target {
19651 DialectType::BigQuery => {
19652 // BigQuery: e'...' -> CAST(b'...' AS STRING)
19653 // Use Raw for the b'...' part to avoid double-escaping
19654 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
19655 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
19656 }
19657 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
19658 }
19659 } else {
19660 Ok(e)
19661 }
19662 }
19663
19664 Action::StraightJoinCase => {
19665 // straight_join: keep lowercase for DuckDB, quote for MySQL
19666 if let Expression::Column(col) = e {
19667 if col.name.name == "STRAIGHT_JOIN" {
19668 let mut new_col = col;
19669 new_col.name.name = "straight_join".to_string();
19670 if matches!(target, DialectType::MySQL) {
19671 // MySQL: needs quoting since it's a reserved keyword
19672 new_col.name.quoted = true;
19673 }
19674 Ok(Expression::Column(new_col))
19675 } else {
19676 Ok(Expression::Column(col))
19677 }
19678 } else {
19679 Ok(e)
19680 }
19681 }
19682
19683 Action::TablesampleReservoir => {
19684 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
19685 if let Expression::TableSample(mut ts) = e {
19686 if let Some(ref mut sample) = ts.sample {
19687 sample.method = crate::expressions::SampleMethod::Reservoir;
19688 sample.explicit_method = true;
19689 }
19690 Ok(Expression::TableSample(ts))
19691 } else {
19692 Ok(e)
19693 }
19694 }
19695
19696 Action::TablesampleSnowflakeStrip => {
19697 // Strip method and PERCENT for Snowflake target from non-Snowflake source
19698 match e {
19699 Expression::TableSample(mut ts) => {
19700 if let Some(ref mut sample) = ts.sample {
19701 sample.suppress_method_output = true;
19702 sample.unit_after_size = false;
19703 sample.is_percent = false;
19704 }
19705 Ok(Expression::TableSample(ts))
19706 }
19707 Expression::Table(mut t) => {
19708 if let Some(ref mut sample) = t.table_sample {
19709 sample.suppress_method_output = true;
19710 sample.unit_after_size = false;
19711 sample.is_percent = false;
19712 }
19713 Ok(Expression::Table(t))
19714 }
19715 _ => Ok(e),
19716 }
19717 }
19718
19719 Action::FirstToAnyValue => {
19720 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
19721 if let Expression::First(mut agg) = e {
19722 agg.ignore_nulls = None;
19723 agg.name = Some("ANY_VALUE".to_string());
19724 Ok(Expression::AnyValue(agg))
19725 } else {
19726 Ok(e)
19727 }
19728 }
19729
19730 Action::ArrayIndexConvert => {
19731 // Subscript index: 1-based to 0-based for BigQuery
19732 if let Expression::Subscript(mut sub) = e {
19733 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
19734 if let Ok(val) = n.parse::<i64>() {
19735 sub.index =
19736 Expression::Literal(Literal::Number((val - 1).to_string()));
19737 }
19738 }
19739 Ok(Expression::Subscript(sub))
19740 } else {
19741 Ok(e)
19742 }
19743 }
19744
19745 Action::AnyValueIgnoreNulls => {
19746 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
19747 if let Expression::AnyValue(mut av) = e {
19748 if av.ignore_nulls.is_none() {
19749 av.ignore_nulls = Some(true);
19750 }
19751 Ok(Expression::AnyValue(av))
19752 } else {
19753 Ok(e)
19754 }
19755 }
19756
19757 Action::BigQueryNullsOrdering => {
19758 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
19759 if let Expression::WindowFunction(mut wf) = e {
19760 for o in &mut wf.over.order_by {
19761 o.nulls_first = None;
19762 }
19763 Ok(Expression::WindowFunction(wf))
19764 } else if let Expression::Ordered(mut o) = e {
19765 o.nulls_first = None;
19766 Ok(Expression::Ordered(o))
19767 } else {
19768 Ok(e)
19769 }
19770 }
19771
19772 Action::SnowflakeFloatProtect => {
19773 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
19774 // Snowflake's target transform from converting it to DOUBLE.
19775 // Non-Snowflake sources should keep their FLOAT spelling.
19776 if let Expression::DataType(DataType::Float { .. }) = e {
19777 Ok(Expression::DataType(DataType::Custom {
19778 name: "FLOAT".to_string(),
19779 }))
19780 } else {
19781 Ok(e)
19782 }
19783 }
19784
19785 Action::MysqlNullsOrdering => {
19786 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
19787 if let Expression::Ordered(mut o) = e {
19788 let nulls_last = o.nulls_first == Some(false);
19789 let desc = o.desc;
19790 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
19791 // If requested ordering matches default, just strip NULLS clause
19792 let matches_default = if desc {
19793 // DESC default is NULLS FIRST, so nulls_first=true matches
19794 o.nulls_first == Some(true)
19795 } else {
19796 // ASC default is NULLS LAST, so nulls_first=false matches
19797 nulls_last
19798 };
19799 if matches_default {
19800 o.nulls_first = None;
19801 Ok(Expression::Ordered(o))
19802 } else {
19803 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
19804 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
19805 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
19806 let null_val = if desc { 1 } else { 0 };
19807 let non_null_val = if desc { 0 } else { 1 };
19808 let _case_expr = Expression::Case(Box::new(Case {
19809 operand: None,
19810 whens: vec![(
19811 Expression::IsNull(Box::new(crate::expressions::IsNull {
19812 this: o.this.clone(),
19813 not: false,
19814 postfix_form: false,
19815 })),
19816 Expression::number(null_val),
19817 )],
19818 else_: Some(Expression::number(non_null_val)),
19819 comments: Vec::new(),
19820 inferred_type: None,
19821 }));
19822 o.nulls_first = None;
19823 // Return a tuple of [case_expr, ordered_expr]
19824 // We need to return both as part of the ORDER BY
19825 // But since transform_recursive processes individual expressions,
19826 // we can't easily add extra ORDER BY items here.
19827 // Instead, strip the nulls_first
19828 o.nulls_first = None;
19829 Ok(Expression::Ordered(o))
19830 }
19831 } else {
19832 Ok(e)
19833 }
19834 }
19835
19836 Action::MysqlNullsLastRewrite => {
19837 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
19838 // to simulate NULLS LAST for ASC ordering
19839 if let Expression::WindowFunction(mut wf) = e {
19840 let mut new_order_by = Vec::new();
19841 for o in wf.over.order_by {
19842 if !o.desc {
19843 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
19844 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
19845 let case_expr = Expression::Case(Box::new(Case {
19846 operand: None,
19847 whens: vec![(
19848 Expression::IsNull(Box::new(crate::expressions::IsNull {
19849 this: o.this.clone(),
19850 not: false,
19851 postfix_form: false,
19852 })),
19853 Expression::Literal(Literal::Number("1".to_string())),
19854 )],
19855 else_: Some(Expression::Literal(Literal::Number(
19856 "0".to_string(),
19857 ))),
19858 comments: Vec::new(),
19859 inferred_type: None,
19860 }));
19861 new_order_by.push(crate::expressions::Ordered {
19862 this: case_expr,
19863 desc: false,
19864 nulls_first: None,
19865 explicit_asc: false,
19866 with_fill: None,
19867 });
19868 let mut ordered = o;
19869 ordered.nulls_first = None;
19870 new_order_by.push(ordered);
19871 } else {
19872 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
19873 // No change needed
19874 let mut ordered = o;
19875 ordered.nulls_first = None;
19876 new_order_by.push(ordered);
19877 }
19878 }
19879 wf.over.order_by = new_order_by;
19880 Ok(Expression::WindowFunction(wf))
19881 } else {
19882 Ok(e)
19883 }
19884 }
19885
19886 Action::RespectNullsConvert => {
19887 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
19888 if let Expression::WindowFunction(mut wf) = e {
19889 match &mut wf.this {
19890 Expression::FirstValue(ref mut vf) => {
19891 if vf.ignore_nulls == Some(false) {
19892 vf.ignore_nulls = None;
19893 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
19894 // but that's handled by the generator's NULLS ordering
19895 }
19896 }
19897 Expression::LastValue(ref mut vf) => {
19898 if vf.ignore_nulls == Some(false) {
19899 vf.ignore_nulls = None;
19900 }
19901 }
19902 _ => {}
19903 }
19904 Ok(Expression::WindowFunction(wf))
19905 } else {
19906 Ok(e)
19907 }
19908 }
19909
19910 Action::CreateTableStripComment => {
19911 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
19912 if let Expression::CreateTable(mut ct) = e {
19913 for col in &mut ct.columns {
19914 col.comment = None;
19915 col.constraints.retain(|c| {
19916 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
19917 });
19918 // Also remove Comment from constraint_order
19919 col.constraint_order.retain(|c| {
19920 !matches!(c, crate::expressions::ConstraintType::Comment)
19921 });
19922 }
19923 // Strip properties (USING, PARTITIONED BY, etc.)
19924 ct.properties.clear();
19925 Ok(Expression::CreateTable(ct))
19926 } else {
19927 Ok(e)
19928 }
19929 }
19930
19931 Action::AlterTableToSpRename => {
19932 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
19933 if let Expression::AlterTable(ref at) = e {
19934 if let Some(crate::expressions::AlterTableAction::RenameTable(
19935 ref new_tbl,
19936 )) = at.actions.first()
19937 {
19938 // Build the old table name using TSQL bracket quoting
19939 let old_name = if let Some(ref schema) = at.name.schema {
19940 if at.name.name.quoted || schema.quoted {
19941 format!("[{}].[{}]", schema.name, at.name.name.name)
19942 } else {
19943 format!("{}.{}", schema.name, at.name.name.name)
19944 }
19945 } else {
19946 if at.name.name.quoted {
19947 format!("[{}]", at.name.name.name)
19948 } else {
19949 at.name.name.name.clone()
19950 }
19951 };
19952 let new_name = new_tbl.name.name.clone();
19953 // EXEC sp_rename 'old_name', 'new_name'
19954 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
19955 Ok(Expression::Raw(crate::expressions::Raw { sql }))
19956 } else {
19957 Ok(e)
19958 }
19959 } else {
19960 Ok(e)
19961 }
19962 }
19963
19964 Action::SnowflakeIntervalFormat => {
19965 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
19966 if let Expression::Interval(mut iv) = e {
19967 if let (
19968 Some(Expression::Literal(Literal::String(ref val))),
19969 Some(ref unit_spec),
19970 ) = (&iv.this, &iv.unit)
19971 {
19972 let unit_str = match unit_spec {
19973 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
19974 match unit {
19975 crate::expressions::IntervalUnit::Year => "YEAR",
19976 crate::expressions::IntervalUnit::Quarter => "QUARTER",
19977 crate::expressions::IntervalUnit::Month => "MONTH",
19978 crate::expressions::IntervalUnit::Week => "WEEK",
19979 crate::expressions::IntervalUnit::Day => "DAY",
19980 crate::expressions::IntervalUnit::Hour => "HOUR",
19981 crate::expressions::IntervalUnit::Minute => "MINUTE",
19982 crate::expressions::IntervalUnit::Second => "SECOND",
19983 crate::expressions::IntervalUnit::Millisecond => {
19984 "MILLISECOND"
19985 }
19986 crate::expressions::IntervalUnit::Microsecond => {
19987 "MICROSECOND"
19988 }
19989 crate::expressions::IntervalUnit::Nanosecond => {
19990 "NANOSECOND"
19991 }
19992 }
19993 }
19994 _ => "",
19995 };
19996 if !unit_str.is_empty() {
19997 let combined = format!("{} {}", val, unit_str);
19998 iv.this = Some(Expression::Literal(Literal::String(combined)));
19999 iv.unit = None;
20000 }
20001 }
20002 Ok(Expression::Interval(iv))
20003 } else {
20004 Ok(e)
20005 }
20006 }
20007
20008 Action::ArrayConcatBracketConvert => {
20009 // Expression::Array/ArrayFunc -> target-specific
20010 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
20011 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
20012 match e {
20013 Expression::Array(arr) => {
20014 if matches!(target, DialectType::Redshift) {
20015 Ok(Expression::Function(Box::new(Function::new(
20016 "ARRAY".to_string(),
20017 arr.expressions,
20018 ))))
20019 } else {
20020 Ok(Expression::ArrayFunc(Box::new(
20021 crate::expressions::ArrayConstructor {
20022 expressions: arr.expressions,
20023 bracket_notation: false,
20024 use_list_keyword: false,
20025 },
20026 )))
20027 }
20028 }
20029 Expression::ArrayFunc(arr) => {
20030 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
20031 if matches!(target, DialectType::Redshift) {
20032 Ok(Expression::Function(Box::new(Function::new(
20033 "ARRAY".to_string(),
20034 arr.expressions,
20035 ))))
20036 } else {
20037 Ok(Expression::ArrayFunc(arr))
20038 }
20039 }
20040 _ => Ok(e),
20041 }
20042 }
20043
20044 Action::BitAggFloatCast => {
20045 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
20046 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
20047 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
20048 let int_type = DataType::Int {
20049 length: None,
20050 integer_spelling: false,
20051 };
20052 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
20053 if let Expression::Cast(c) = agg_this {
20054 match &c.to {
20055 DataType::Float { .. }
20056 | DataType::Double { .. }
20057 | DataType::Custom { .. } => {
20058 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
20059 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
20060 let inner_type = match &c.to {
20061 DataType::Float {
20062 precision, scale, ..
20063 } => DataType::Float {
20064 precision: *precision,
20065 scale: *scale,
20066 real_spelling: true,
20067 },
20068 other => other.clone(),
20069 };
20070 let inner_cast =
20071 Expression::Cast(Box::new(crate::expressions::Cast {
20072 this: c.this.clone(),
20073 to: inner_type,
20074 trailing_comments: Vec::new(),
20075 double_colon_syntax: false,
20076 format: None,
20077 default: None,
20078 inferred_type: None,
20079 }));
20080 let rounded = Expression::Function(Box::new(Function::new(
20081 "ROUND".to_string(),
20082 vec![inner_cast],
20083 )));
20084 Expression::Cast(Box::new(crate::expressions::Cast {
20085 this: rounded,
20086 to: int_dt,
20087 trailing_comments: Vec::new(),
20088 double_colon_syntax: false,
20089 format: None,
20090 default: None,
20091 inferred_type: None,
20092 }))
20093 }
20094 DataType::Decimal { .. } => {
20095 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
20096 Expression::Cast(Box::new(crate::expressions::Cast {
20097 this: Expression::Cast(c),
20098 to: int_dt,
20099 trailing_comments: Vec::new(),
20100 double_colon_syntax: false,
20101 format: None,
20102 default: None,
20103 inferred_type: None,
20104 }))
20105 }
20106 _ => Expression::Cast(c),
20107 }
20108 } else {
20109 agg_this
20110 }
20111 };
20112 match e {
20113 Expression::BitwiseOrAgg(mut f) => {
20114 f.this = wrap_agg(f.this, int_type);
20115 Ok(Expression::BitwiseOrAgg(f))
20116 }
20117 Expression::BitwiseAndAgg(mut f) => {
20118 let int_type = DataType::Int {
20119 length: None,
20120 integer_spelling: false,
20121 };
20122 f.this = wrap_agg(f.this, int_type);
20123 Ok(Expression::BitwiseAndAgg(f))
20124 }
20125 Expression::BitwiseXorAgg(mut f) => {
20126 let int_type = DataType::Int {
20127 length: None,
20128 integer_spelling: false,
20129 };
20130 f.this = wrap_agg(f.this, int_type);
20131 Ok(Expression::BitwiseXorAgg(f))
20132 }
20133 _ => Ok(e),
20134 }
20135 }
20136
20137 Action::BitAggSnowflakeRename => {
20138 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
20139 match e {
20140 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
20141 Function::new("BITORAGG".to_string(), vec![f.this]),
20142 ))),
20143 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
20144 Function::new("BITANDAGG".to_string(), vec![f.this]),
20145 ))),
20146 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
20147 Function::new("BITXORAGG".to_string(), vec![f.this]),
20148 ))),
20149 _ => Ok(e),
20150 }
20151 }
20152
20153 Action::StrftimeCastTimestamp => {
20154 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
20155 if let Expression::Cast(mut c) = e {
20156 if matches!(
20157 c.to,
20158 DataType::Timestamp {
20159 timezone: false,
20160 ..
20161 }
20162 ) {
20163 c.to = DataType::Custom {
20164 name: "TIMESTAMP_NTZ".to_string(),
20165 };
20166 }
20167 Ok(Expression::Cast(c))
20168 } else {
20169 Ok(e)
20170 }
20171 }
20172
20173 Action::DecimalDefaultPrecision => {
20174 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
20175 if let Expression::Cast(mut c) = e {
20176 if matches!(
20177 c.to,
20178 DataType::Decimal {
20179 precision: None,
20180 ..
20181 }
20182 ) {
20183 c.to = DataType::Decimal {
20184 precision: Some(18),
20185 scale: Some(3),
20186 };
20187 }
20188 Ok(Expression::Cast(c))
20189 } else {
20190 Ok(e)
20191 }
20192 }
20193
20194 Action::FilterToIff => {
20195 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
20196 if let Expression::Filter(f) = e {
20197 let condition = *f.expression;
20198 let agg = *f.this;
20199 // Strip WHERE from condition
20200 let cond = match condition {
20201 Expression::Where(w) => w.this,
20202 other => other,
20203 };
20204 // Extract the aggregate function and its argument
20205 // We want AVG(IFF(condition, x, NULL))
20206 match agg {
20207 Expression::Function(mut func) => {
20208 if !func.args.is_empty() {
20209 let orig_arg = func.args[0].clone();
20210 let iff_call = Expression::Function(Box::new(Function::new(
20211 "IFF".to_string(),
20212 vec![cond, orig_arg, Expression::Null(Null)],
20213 )));
20214 func.args[0] = iff_call;
20215 Ok(Expression::Function(func))
20216 } else {
20217 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20218 this: Box::new(Expression::Function(func)),
20219 expression: Box::new(cond),
20220 })))
20221 }
20222 }
20223 Expression::Avg(mut avg) => {
20224 let iff_call = Expression::Function(Box::new(Function::new(
20225 "IFF".to_string(),
20226 vec![cond, avg.this.clone(), Expression::Null(Null)],
20227 )));
20228 avg.this = iff_call;
20229 Ok(Expression::Avg(avg))
20230 }
20231 Expression::Sum(mut s) => {
20232 let iff_call = Expression::Function(Box::new(Function::new(
20233 "IFF".to_string(),
20234 vec![cond, s.this.clone(), Expression::Null(Null)],
20235 )));
20236 s.this = iff_call;
20237 Ok(Expression::Sum(s))
20238 }
20239 Expression::Count(mut c) => {
20240 if let Some(ref this_expr) = c.this {
20241 let iff_call = Expression::Function(Box::new(Function::new(
20242 "IFF".to_string(),
20243 vec![cond, this_expr.clone(), Expression::Null(Null)],
20244 )));
20245 c.this = Some(iff_call);
20246 }
20247 Ok(Expression::Count(c))
20248 }
20249 other => {
20250 // Fallback: keep as Filter
20251 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
20252 this: Box::new(other),
20253 expression: Box::new(cond),
20254 })))
20255 }
20256 }
20257 } else {
20258 Ok(e)
20259 }
20260 }
20261
20262 Action::AggFilterToIff => {
20263 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
20264 // Helper macro to handle the common AggFunc case
20265 macro_rules! handle_agg_filter_to_iff {
20266 ($variant:ident, $agg:expr) => {{
20267 let mut agg = $agg;
20268 if let Some(filter_cond) = agg.filter.take() {
20269 let iff_call = Expression::Function(Box::new(Function::new(
20270 "IFF".to_string(),
20271 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
20272 )));
20273 agg.this = iff_call;
20274 }
20275 Ok(Expression::$variant(agg))
20276 }};
20277 }
20278
20279 match e {
20280 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
20281 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
20282 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
20283 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
20284 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
20285 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
20286 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
20287 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
20288 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
20289 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
20290 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
20291 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
20292 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
20293 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
20294 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
20295 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
20296 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
20297 Expression::ApproxDistinct(agg) => {
20298 handle_agg_filter_to_iff!(ApproxDistinct, agg)
20299 }
20300 Expression::Count(mut c) => {
20301 if let Some(filter_cond) = c.filter.take() {
20302 if let Some(ref this_expr) = c.this {
20303 let iff_call = Expression::Function(Box::new(Function::new(
20304 "IFF".to_string(),
20305 vec![
20306 filter_cond,
20307 this_expr.clone(),
20308 Expression::Null(Null),
20309 ],
20310 )));
20311 c.this = Some(iff_call);
20312 }
20313 }
20314 Ok(Expression::Count(c))
20315 }
20316 other => Ok(other),
20317 }
20318 }
20319
20320 Action::JsonToGetPath => {
20321 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
20322 if let Expression::JsonExtract(je) = e {
20323 // Convert to PARSE_JSON() wrapper:
20324 // - JSON(x) -> PARSE_JSON(x)
20325 // - PARSE_JSON(x) -> keep as-is
20326 // - anything else -> wrap in PARSE_JSON()
20327 let this = match &je.this {
20328 Expression::Function(f)
20329 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
20330 {
20331 Expression::Function(Box::new(Function::new(
20332 "PARSE_JSON".to_string(),
20333 f.args.clone(),
20334 )))
20335 }
20336 Expression::Function(f)
20337 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20338 {
20339 je.this.clone()
20340 }
20341 // GET_PATH result is already JSON, don't wrap
20342 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
20343 je.this.clone()
20344 }
20345 other => {
20346 // Wrap non-JSON expressions in PARSE_JSON()
20347 Expression::Function(Box::new(Function::new(
20348 "PARSE_JSON".to_string(),
20349 vec![other.clone()],
20350 )))
20351 }
20352 };
20353 // Convert path: extract key from JSONPath or strip $. prefix from string
20354 let path = match &je.path {
20355 Expression::JSONPath(jp) => {
20356 // Extract the key from JSONPath: $root.key -> 'key'
20357 let mut key_parts = Vec::new();
20358 for expr in &jp.expressions {
20359 match expr {
20360 Expression::JSONPathRoot(_) => {} // skip root
20361 Expression::JSONPathKey(k) => {
20362 if let Expression::Literal(Literal::String(s)) =
20363 &*k.this
20364 {
20365 key_parts.push(s.clone());
20366 }
20367 }
20368 _ => {}
20369 }
20370 }
20371 if !key_parts.is_empty() {
20372 Expression::Literal(Literal::String(key_parts.join(".")))
20373 } else {
20374 je.path.clone()
20375 }
20376 }
20377 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
20378 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
20379 Expression::Literal(Literal::String(stripped))
20380 }
20381 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
20382 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
20383 Expression::Literal(Literal::String(stripped))
20384 }
20385 _ => je.path.clone(),
20386 };
20387 Ok(Expression::Function(Box::new(Function::new(
20388 "GET_PATH".to_string(),
20389 vec![this, path],
20390 ))))
20391 } else {
20392 Ok(e)
20393 }
20394 }
20395
20396 Action::StructToRow => {
20397 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
20398 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
20399
20400 // Extract key-value pairs from either Struct or MapFunc
20401 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
20402 Expression::Struct(s) => Some(
20403 s.fields
20404 .iter()
20405 .map(|(opt_name, field_expr)| {
20406 if let Some(name) = opt_name {
20407 (name.clone(), field_expr.clone())
20408 } else if let Expression::NamedArgument(na) = field_expr {
20409 (na.name.name.clone(), na.value.clone())
20410 } else {
20411 (String::new(), field_expr.clone())
20412 }
20413 })
20414 .collect(),
20415 ),
20416 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20417 m.keys
20418 .iter()
20419 .zip(m.values.iter())
20420 .map(|(key, value)| {
20421 let key_name = match key {
20422 Expression::Literal(Literal::String(s)) => s.clone(),
20423 Expression::Identifier(id) => id.name.clone(),
20424 _ => String::new(),
20425 };
20426 (key_name, value.clone())
20427 })
20428 .collect(),
20429 ),
20430 _ => None,
20431 };
20432
20433 if let Some(pairs) = kv_pairs {
20434 let mut named_args = Vec::new();
20435 for (key_name, value) in pairs {
20436 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
20437 named_args.push(Expression::Alias(Box::new(
20438 crate::expressions::Alias::new(
20439 value,
20440 Identifier::new(key_name),
20441 ),
20442 )));
20443 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20444 named_args.push(value);
20445 } else {
20446 named_args.push(value);
20447 }
20448 }
20449
20450 if matches!(target, DialectType::BigQuery) {
20451 Ok(Expression::Function(Box::new(Function::new(
20452 "STRUCT".to_string(),
20453 named_args,
20454 ))))
20455 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
20456 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
20457 let row_func = Expression::Function(Box::new(Function::new(
20458 "ROW".to_string(),
20459 named_args,
20460 )));
20461
20462 // Try to infer types for each pair
20463 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
20464 Expression::Struct(s) => Some(
20465 s.fields
20466 .iter()
20467 .map(|(opt_name, field_expr)| {
20468 if let Some(name) = opt_name {
20469 (name.clone(), field_expr.clone())
20470 } else if let Expression::NamedArgument(na) = field_expr
20471 {
20472 (na.name.name.clone(), na.value.clone())
20473 } else {
20474 (String::new(), field_expr.clone())
20475 }
20476 })
20477 .collect(),
20478 ),
20479 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
20480 m.keys
20481 .iter()
20482 .zip(m.values.iter())
20483 .map(|(key, value)| {
20484 let key_name = match key {
20485 Expression::Literal(Literal::String(s)) => {
20486 s.clone()
20487 }
20488 Expression::Identifier(id) => id.name.clone(),
20489 _ => String::new(),
20490 };
20491 (key_name, value.clone())
20492 })
20493 .collect(),
20494 ),
20495 _ => None,
20496 };
20497
20498 if let Some(pairs) = kv_pairs_again {
20499 // Infer types for all values
20500 let mut all_inferred = true;
20501 let mut fields = Vec::new();
20502 for (name, value) in &pairs {
20503 let inferred_type = match value {
20504 Expression::Literal(Literal::Number(n)) => {
20505 if n.contains('.') {
20506 Some(DataType::Double {
20507 precision: None,
20508 scale: None,
20509 })
20510 } else {
20511 Some(DataType::Int {
20512 length: None,
20513 integer_spelling: true,
20514 })
20515 }
20516 }
20517 Expression::Literal(Literal::String(_)) => {
20518 Some(DataType::VarChar {
20519 length: None,
20520 parenthesized_length: false,
20521 })
20522 }
20523 Expression::Boolean(_) => Some(DataType::Boolean),
20524 _ => None,
20525 };
20526 if let Some(dt) = inferred_type {
20527 fields.push(crate::expressions::StructField::new(
20528 name.clone(),
20529 dt,
20530 ));
20531 } else {
20532 all_inferred = false;
20533 break;
20534 }
20535 }
20536
20537 if all_inferred && !fields.is_empty() {
20538 let row_type = DataType::Struct {
20539 fields,
20540 nested: true,
20541 };
20542 Ok(Expression::Cast(Box::new(Cast {
20543 this: row_func,
20544 to: row_type,
20545 trailing_comments: Vec::new(),
20546 double_colon_syntax: false,
20547 format: None,
20548 default: None,
20549 inferred_type: None,
20550 })))
20551 } else {
20552 Ok(row_func)
20553 }
20554 } else {
20555 Ok(row_func)
20556 }
20557 } else {
20558 Ok(Expression::Function(Box::new(Function::new(
20559 "ROW".to_string(),
20560 named_args,
20561 ))))
20562 }
20563 } else {
20564 Ok(e)
20565 }
20566 }
20567
20568 Action::SparkStructConvert => {
20569 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
20570 // or DuckDB {'name': val, ...}
20571 if let Expression::Function(f) = e {
20572 // Extract name-value pairs from aliased args
20573 let mut pairs: Vec<(String, Expression)> = Vec::new();
20574 for arg in &f.args {
20575 match arg {
20576 Expression::Alias(a) => {
20577 pairs.push((a.alias.name.clone(), a.this.clone()));
20578 }
20579 _ => {
20580 pairs.push((String::new(), arg.clone()));
20581 }
20582 }
20583 }
20584
20585 match target {
20586 DialectType::DuckDB => {
20587 // Convert to DuckDB struct literal {'name': value, ...}
20588 let mut keys = Vec::new();
20589 let mut values = Vec::new();
20590 for (name, value) in &pairs {
20591 keys.push(Expression::Literal(Literal::String(name.clone())));
20592 values.push(value.clone());
20593 }
20594 Ok(Expression::MapFunc(Box::new(
20595 crate::expressions::MapConstructor {
20596 keys,
20597 values,
20598 curly_brace_syntax: true,
20599 with_map_keyword: false,
20600 },
20601 )))
20602 }
20603 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20604 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
20605 let row_args: Vec<Expression> =
20606 pairs.iter().map(|(_, v)| v.clone()).collect();
20607 let row_func = Expression::Function(Box::new(Function::new(
20608 "ROW".to_string(),
20609 row_args,
20610 )));
20611
20612 // Infer types
20613 let mut all_inferred = true;
20614 let mut fields = Vec::new();
20615 for (name, value) in &pairs {
20616 let inferred_type = match value {
20617 Expression::Literal(Literal::Number(n)) => {
20618 if n.contains('.') {
20619 Some(DataType::Double {
20620 precision: None,
20621 scale: None,
20622 })
20623 } else {
20624 Some(DataType::Int {
20625 length: None,
20626 integer_spelling: true,
20627 })
20628 }
20629 }
20630 Expression::Literal(Literal::String(_)) => {
20631 Some(DataType::VarChar {
20632 length: None,
20633 parenthesized_length: false,
20634 })
20635 }
20636 Expression::Boolean(_) => Some(DataType::Boolean),
20637 _ => None,
20638 };
20639 if let Some(dt) = inferred_type {
20640 fields.push(crate::expressions::StructField::new(
20641 name.clone(),
20642 dt,
20643 ));
20644 } else {
20645 all_inferred = false;
20646 break;
20647 }
20648 }
20649
20650 if all_inferred && !fields.is_empty() {
20651 let row_type = DataType::Struct {
20652 fields,
20653 nested: true,
20654 };
20655 Ok(Expression::Cast(Box::new(Cast {
20656 this: row_func,
20657 to: row_type,
20658 trailing_comments: Vec::new(),
20659 double_colon_syntax: false,
20660 format: None,
20661 default: None,
20662 inferred_type: None,
20663 })))
20664 } else {
20665 Ok(row_func)
20666 }
20667 }
20668 _ => Ok(Expression::Function(f)),
20669 }
20670 } else {
20671 Ok(e)
20672 }
20673 }
20674
20675 Action::ApproxCountDistinctToApproxDistinct => {
20676 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
20677 if let Expression::ApproxCountDistinct(f) = e {
20678 Ok(Expression::ApproxDistinct(f))
20679 } else {
20680 Ok(e)
20681 }
20682 }
20683
20684 Action::CollectListToArrayAgg => {
20685 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
20686 if let Expression::AggregateFunction(f) = e {
20687 let filter_expr = if !f.args.is_empty() {
20688 let arg = f.args[0].clone();
20689 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
20690 this: arg,
20691 not: true,
20692 postfix_form: false,
20693 })))
20694 } else {
20695 None
20696 };
20697 let agg = crate::expressions::AggFunc {
20698 this: if f.args.is_empty() {
20699 Expression::Null(crate::expressions::Null)
20700 } else {
20701 f.args[0].clone()
20702 },
20703 distinct: f.distinct,
20704 order_by: f.order_by.clone(),
20705 filter: filter_expr,
20706 ignore_nulls: None,
20707 name: None,
20708 having_max: None,
20709 limit: None,
20710 inferred_type: None,
20711 };
20712 Ok(Expression::ArrayAgg(Box::new(agg)))
20713 } else {
20714 Ok(e)
20715 }
20716 }
20717
20718 Action::CollectSetConvert => {
20719 // COLLECT_SET(x) -> target-specific
20720 if let Expression::AggregateFunction(f) = e {
20721 match target {
20722 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
20723 crate::expressions::AggregateFunction {
20724 name: "SET_AGG".to_string(),
20725 args: f.args,
20726 distinct: false,
20727 order_by: f.order_by,
20728 filter: f.filter,
20729 limit: f.limit,
20730 ignore_nulls: f.ignore_nulls,
20731 inferred_type: None,
20732 },
20733 ))),
20734 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
20735 crate::expressions::AggregateFunction {
20736 name: "ARRAY_UNIQUE_AGG".to_string(),
20737 args: f.args,
20738 distinct: false,
20739 order_by: f.order_by,
20740 filter: f.filter,
20741 limit: f.limit,
20742 ignore_nulls: f.ignore_nulls,
20743 inferred_type: None,
20744 },
20745 ))),
20746 DialectType::Trino | DialectType::DuckDB => {
20747 let agg = crate::expressions::AggFunc {
20748 this: if f.args.is_empty() {
20749 Expression::Null(crate::expressions::Null)
20750 } else {
20751 f.args[0].clone()
20752 },
20753 distinct: true,
20754 order_by: Vec::new(),
20755 filter: None,
20756 ignore_nulls: None,
20757 name: None,
20758 having_max: None,
20759 limit: None,
20760 inferred_type: None,
20761 };
20762 Ok(Expression::ArrayAgg(Box::new(agg)))
20763 }
20764 _ => Ok(Expression::AggregateFunction(f)),
20765 }
20766 } else {
20767 Ok(e)
20768 }
20769 }
20770
20771 Action::PercentileConvert => {
20772 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
20773 if let Expression::AggregateFunction(f) = e {
20774 let name = match target {
20775 DialectType::DuckDB => "QUANTILE",
20776 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
20777 _ => "PERCENTILE",
20778 };
20779 Ok(Expression::AggregateFunction(Box::new(
20780 crate::expressions::AggregateFunction {
20781 name: name.to_string(),
20782 args: f.args,
20783 distinct: f.distinct,
20784 order_by: f.order_by,
20785 filter: f.filter,
20786 limit: f.limit,
20787 ignore_nulls: f.ignore_nulls,
20788 inferred_type: None,
20789 },
20790 )))
20791 } else {
20792 Ok(e)
20793 }
20794 }
20795
20796 Action::CorrIsnanWrap => {
20797 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
20798 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
20799 let corr_clone = e.clone();
20800 let isnan = Expression::Function(Box::new(Function::new(
20801 "ISNAN".to_string(),
20802 vec![corr_clone.clone()],
20803 )));
20804 let case_expr = Expression::Case(Box::new(Case {
20805 operand: None,
20806 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
20807 else_: Some(corr_clone),
20808 comments: Vec::new(),
20809 inferred_type: None,
20810 }));
20811 Ok(case_expr)
20812 }
20813
20814 Action::TruncToDateTrunc => {
20815 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
20816 if let Expression::Function(f) = e {
20817 if f.args.len() == 2 {
20818 let timestamp = f.args[0].clone();
20819 let unit_expr = f.args[1].clone();
20820
20821 if matches!(target, DialectType::ClickHouse) {
20822 // For ClickHouse, produce Expression::DateTrunc which the generator
20823 // outputs as DATE_TRUNC(...) without going through the ClickHouse
20824 // target transform that would convert it to dateTrunc
20825 let unit_str = Self::get_unit_str_static(&unit_expr);
20826 let dt_field = match unit_str.as_str() {
20827 "YEAR" => DateTimeField::Year,
20828 "MONTH" => DateTimeField::Month,
20829 "DAY" => DateTimeField::Day,
20830 "HOUR" => DateTimeField::Hour,
20831 "MINUTE" => DateTimeField::Minute,
20832 "SECOND" => DateTimeField::Second,
20833 "WEEK" => DateTimeField::Week,
20834 "QUARTER" => DateTimeField::Quarter,
20835 _ => DateTimeField::Custom(unit_str),
20836 };
20837 Ok(Expression::DateTrunc(Box::new(
20838 crate::expressions::DateTruncFunc {
20839 this: timestamp,
20840 unit: dt_field,
20841 },
20842 )))
20843 } else {
20844 let new_args = vec![unit_expr, timestamp];
20845 Ok(Expression::Function(Box::new(Function::new(
20846 "DATE_TRUNC".to_string(),
20847 new_args,
20848 ))))
20849 }
20850 } else {
20851 Ok(Expression::Function(f))
20852 }
20853 } else {
20854 Ok(e)
20855 }
20856 }
20857
20858 Action::ArrayContainsConvert => {
20859 if let Expression::ArrayContains(f) = e {
20860 match target {
20861 DialectType::Presto | DialectType::Trino => {
20862 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
20863 Ok(Expression::Function(Box::new(Function::new(
20864 "CONTAINS".to_string(),
20865 vec![f.this, f.expression],
20866 ))))
20867 }
20868 DialectType::Snowflake => {
20869 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
20870 let cast_val =
20871 Expression::Cast(Box::new(crate::expressions::Cast {
20872 this: f.expression,
20873 to: crate::expressions::DataType::Custom {
20874 name: "VARIANT".to_string(),
20875 },
20876 trailing_comments: Vec::new(),
20877 double_colon_syntax: false,
20878 format: None,
20879 default: None,
20880 inferred_type: None,
20881 }));
20882 Ok(Expression::Function(Box::new(Function::new(
20883 "ARRAY_CONTAINS".to_string(),
20884 vec![cast_val, f.this],
20885 ))))
20886 }
20887 _ => Ok(Expression::ArrayContains(f)),
20888 }
20889 } else {
20890 Ok(e)
20891 }
20892 }
20893
20894 Action::StrPositionExpand => {
20895 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
20896 // LOCATE(substr, str, pos) / STRPOS(str, substr, pos) ->
20897 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20898 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20899 if let Expression::StrPosition(sp) = e {
20900 let crate::expressions::StrPosition {
20901 this,
20902 substr,
20903 position,
20904 occurrence,
20905 } = *sp;
20906 let string = *this;
20907 let substr_expr = match substr {
20908 Some(s) => *s,
20909 None => Expression::Null(Null),
20910 };
20911 let pos = match position {
20912 Some(p) => *p,
20913 None => Expression::number(1),
20914 };
20915
20916 // SUBSTRING(string, pos)
20917 let substring_call = Expression::Function(Box::new(Function::new(
20918 "SUBSTRING".to_string(),
20919 vec![string.clone(), pos.clone()],
20920 )));
20921 // STRPOS(SUBSTRING(string, pos), substr)
20922 let strpos_call = Expression::Function(Box::new(Function::new(
20923 "STRPOS".to_string(),
20924 vec![substring_call, substr_expr.clone()],
20925 )));
20926 // STRPOS(...) + pos - 1
20927 let pos_adjusted =
20928 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
20929 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
20930 strpos_call.clone(),
20931 pos.clone(),
20932 ))),
20933 Expression::number(1),
20934 )));
20935 // STRPOS(...) = 0
20936 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
20937 strpos_call.clone(),
20938 Expression::number(0),
20939 )));
20940
20941 match target {
20942 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20943 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
20944 Ok(Expression::Function(Box::new(Function::new(
20945 "IF".to_string(),
20946 vec![is_zero, Expression::number(0), pos_adjusted],
20947 ))))
20948 }
20949 DialectType::DuckDB => {
20950 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
20951 Ok(Expression::Case(Box::new(Case {
20952 operand: None,
20953 whens: vec![(is_zero, Expression::number(0))],
20954 else_: Some(pos_adjusted),
20955 comments: Vec::new(),
20956 inferred_type: None,
20957 })))
20958 }
20959 _ => {
20960 // Reconstruct StrPosition
20961 Ok(Expression::StrPosition(Box::new(
20962 crate::expressions::StrPosition {
20963 this: Box::new(string),
20964 substr: Some(Box::new(substr_expr)),
20965 position: Some(Box::new(pos)),
20966 occurrence,
20967 },
20968 )))
20969 }
20970 }
20971 } else {
20972 Ok(e)
20973 }
20974 }
20975
20976 Action::MonthsBetweenConvert => {
20977 if let Expression::MonthsBetween(mb) = e {
20978 let crate::expressions::BinaryFunc {
20979 this: end_date,
20980 expression: start_date,
20981 ..
20982 } = *mb;
20983 match target {
20984 DialectType::DuckDB => {
20985 let cast_end = Self::ensure_cast_date(end_date);
20986 let cast_start = Self::ensure_cast_date(start_date);
20987 let dd = Expression::Function(Box::new(Function::new(
20988 "DATE_DIFF".to_string(),
20989 vec![
20990 Expression::string("MONTH"),
20991 cast_start.clone(),
20992 cast_end.clone(),
20993 ],
20994 )));
20995 let day_end = Expression::Function(Box::new(Function::new(
20996 "DAY".to_string(),
20997 vec![cast_end.clone()],
20998 )));
20999 let day_start = Expression::Function(Box::new(Function::new(
21000 "DAY".to_string(),
21001 vec![cast_start.clone()],
21002 )));
21003 let last_day_end = Expression::Function(Box::new(Function::new(
21004 "LAST_DAY".to_string(),
21005 vec![cast_end.clone()],
21006 )));
21007 let last_day_start = Expression::Function(Box::new(Function::new(
21008 "LAST_DAY".to_string(),
21009 vec![cast_start.clone()],
21010 )));
21011 let day_last_end = Expression::Function(Box::new(Function::new(
21012 "DAY".to_string(),
21013 vec![last_day_end],
21014 )));
21015 let day_last_start = Expression::Function(Box::new(Function::new(
21016 "DAY".to_string(),
21017 vec![last_day_start],
21018 )));
21019 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
21020 day_end.clone(),
21021 day_last_end,
21022 )));
21023 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
21024 day_start.clone(),
21025 day_last_start,
21026 )));
21027 let both_cond =
21028 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
21029 let day_diff =
21030 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
21031 let day_diff_paren =
21032 Expression::Paren(Box::new(crate::expressions::Paren {
21033 this: day_diff,
21034 trailing_comments: Vec::new(),
21035 }));
21036 let frac = Expression::Div(Box::new(BinaryOp::new(
21037 day_diff_paren,
21038 Expression::Literal(Literal::Number("31.0".to_string())),
21039 )));
21040 let case_expr = Expression::Case(Box::new(Case {
21041 operand: None,
21042 whens: vec![(both_cond, Expression::number(0))],
21043 else_: Some(frac),
21044 comments: Vec::new(),
21045 inferred_type: None,
21046 }));
21047 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
21048 }
21049 DialectType::Snowflake | DialectType::Redshift => {
21050 let unit = Expression::Identifier(Identifier::new("MONTH"));
21051 Ok(Expression::Function(Box::new(Function::new(
21052 "DATEDIFF".to_string(),
21053 vec![unit, start_date, end_date],
21054 ))))
21055 }
21056 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21057 Ok(Expression::Function(Box::new(Function::new(
21058 "DATE_DIFF".to_string(),
21059 vec![Expression::string("MONTH"), start_date, end_date],
21060 ))))
21061 }
21062 _ => Ok(Expression::MonthsBetween(Box::new(
21063 crate::expressions::BinaryFunc {
21064 this: end_date,
21065 expression: start_date,
21066 original_name: None,
21067 inferred_type: None,
21068 },
21069 ))),
21070 }
21071 } else {
21072 Ok(e)
21073 }
21074 }
21075
21076 Action::AddMonthsConvert => {
21077 if let Expression::AddMonths(am) = e {
21078 let date = am.this;
21079 let val = am.expression;
21080 match target {
21081 DialectType::TSQL | DialectType::Fabric => {
21082 let cast_date = Self::ensure_cast_datetime2(date);
21083 Ok(Expression::Function(Box::new(Function::new(
21084 "DATEADD".to_string(),
21085 vec![
21086 Expression::Identifier(Identifier::new("MONTH")),
21087 val,
21088 cast_date,
21089 ],
21090 ))))
21091 }
21092 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
21093 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
21094 // Optionally wrapped in CAST(... AS type) if the input had a specific type
21095
21096 // Determine the cast type from the date expression
21097 let (cast_date, return_type) = match &date {
21098 Expression::Literal(Literal::String(_)) => {
21099 // String literal: CAST(str AS TIMESTAMP), no outer CAST
21100 (
21101 Expression::Cast(Box::new(Cast {
21102 this: date.clone(),
21103 to: DataType::Timestamp {
21104 precision: None,
21105 timezone: false,
21106 },
21107 trailing_comments: Vec::new(),
21108 double_colon_syntax: false,
21109 format: None,
21110 default: None,
21111 inferred_type: None,
21112 })),
21113 None,
21114 )
21115 }
21116 Expression::Cast(c) => {
21117 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
21118 (date.clone(), Some(c.to.clone()))
21119 }
21120 _ => {
21121 // Expression or NULL::TYPE - keep as-is, check for cast type
21122 if let Expression::Cast(c) = &date {
21123 (date.clone(), Some(c.to.clone()))
21124 } else {
21125 (date.clone(), None)
21126 }
21127 }
21128 };
21129
21130 // Build the interval expression
21131 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
21132 // For integer values, use INTERVAL val MONTH
21133 let is_non_integer_val = match &val {
21134 Expression::Literal(Literal::Number(n)) => n.contains('.'),
21135 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
21136 Expression::Neg(n) => {
21137 if let Expression::Literal(Literal::Number(s)) = &n.this {
21138 s.contains('.')
21139 } else {
21140 false
21141 }
21142 }
21143 _ => false,
21144 };
21145
21146 let add_interval = if is_non_integer_val {
21147 // TO_MONTHS(CAST(ROUND(val) AS INT))
21148 let round_val = Expression::Function(Box::new(Function::new(
21149 "ROUND".to_string(),
21150 vec![val.clone()],
21151 )));
21152 let cast_int = Expression::Cast(Box::new(Cast {
21153 this: round_val,
21154 to: DataType::Int {
21155 length: None,
21156 integer_spelling: false,
21157 },
21158 trailing_comments: Vec::new(),
21159 double_colon_syntax: false,
21160 format: None,
21161 default: None,
21162 inferred_type: None,
21163 }));
21164 Expression::Function(Box::new(Function::new(
21165 "TO_MONTHS".to_string(),
21166 vec![cast_int],
21167 )))
21168 } else {
21169 // INTERVAL val MONTH
21170 // For negative numbers, wrap in parens
21171 let interval_val = match &val {
21172 Expression::Literal(Literal::Number(n))
21173 if n.starts_with('-') =>
21174 {
21175 Expression::Paren(Box::new(Paren {
21176 this: val.clone(),
21177 trailing_comments: Vec::new(),
21178 }))
21179 }
21180 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
21181 this: val.clone(),
21182 trailing_comments: Vec::new(),
21183 })),
21184 Expression::Null(_) => Expression::Paren(Box::new(Paren {
21185 this: val.clone(),
21186 trailing_comments: Vec::new(),
21187 })),
21188 _ => val.clone(),
21189 };
21190 Expression::Interval(Box::new(crate::expressions::Interval {
21191 this: Some(interval_val),
21192 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21193 unit: crate::expressions::IntervalUnit::Month,
21194 use_plural: false,
21195 }),
21196 }))
21197 };
21198
21199 // Build: date + interval
21200 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
21201 cast_date.clone(),
21202 add_interval.clone(),
21203 )));
21204
21205 // Build LAST_DAY(date)
21206 let last_day_date = Expression::Function(Box::new(Function::new(
21207 "LAST_DAY".to_string(),
21208 vec![cast_date.clone()],
21209 )));
21210
21211 // Build LAST_DAY(date + interval)
21212 let last_day_date_plus =
21213 Expression::Function(Box::new(Function::new(
21214 "LAST_DAY".to_string(),
21215 vec![date_plus_interval.clone()],
21216 )));
21217
21218 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
21219 let case_expr = Expression::Case(Box::new(Case {
21220 operand: None,
21221 whens: vec![(
21222 Expression::Eq(Box::new(BinaryOp::new(
21223 last_day_date,
21224 cast_date.clone(),
21225 ))),
21226 last_day_date_plus,
21227 )],
21228 else_: Some(date_plus_interval),
21229 comments: Vec::new(),
21230 inferred_type: None,
21231 }));
21232
21233 // Wrap in CAST(... AS type) if needed
21234 if let Some(dt) = return_type {
21235 Ok(Expression::Cast(Box::new(Cast {
21236 this: case_expr,
21237 to: dt,
21238 trailing_comments: Vec::new(),
21239 double_colon_syntax: false,
21240 format: None,
21241 default: None,
21242 inferred_type: None,
21243 })))
21244 } else {
21245 Ok(case_expr)
21246 }
21247 }
21248 DialectType::DuckDB => {
21249 // Non-Snowflake source: simple date + INTERVAL
21250 let cast_date =
21251 if matches!(&date, Expression::Literal(Literal::String(_))) {
21252 Expression::Cast(Box::new(Cast {
21253 this: date,
21254 to: DataType::Timestamp {
21255 precision: None,
21256 timezone: false,
21257 },
21258 trailing_comments: Vec::new(),
21259 double_colon_syntax: false,
21260 format: None,
21261 default: None,
21262 inferred_type: None,
21263 }))
21264 } else {
21265 date
21266 };
21267 let interval =
21268 Expression::Interval(Box::new(crate::expressions::Interval {
21269 this: Some(val),
21270 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21271 unit: crate::expressions::IntervalUnit::Month,
21272 use_plural: false,
21273 }),
21274 }));
21275 Ok(Expression::Add(Box::new(BinaryOp::new(
21276 cast_date, interval,
21277 ))))
21278 }
21279 DialectType::Snowflake => {
21280 // Keep ADD_MONTHS when source is also Snowflake
21281 if matches!(source, DialectType::Snowflake) {
21282 Ok(Expression::Function(Box::new(Function::new(
21283 "ADD_MONTHS".to_string(),
21284 vec![date, val],
21285 ))))
21286 } else {
21287 Ok(Expression::Function(Box::new(Function::new(
21288 "DATEADD".to_string(),
21289 vec![
21290 Expression::Identifier(Identifier::new("MONTH")),
21291 val,
21292 date,
21293 ],
21294 ))))
21295 }
21296 }
21297 DialectType::Redshift => {
21298 Ok(Expression::Function(Box::new(Function::new(
21299 "DATEADD".to_string(),
21300 vec![
21301 Expression::Identifier(Identifier::new("MONTH")),
21302 val,
21303 date,
21304 ],
21305 ))))
21306 }
21307 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21308 let cast_date =
21309 if matches!(&date, Expression::Literal(Literal::String(_))) {
21310 Expression::Cast(Box::new(Cast {
21311 this: date,
21312 to: DataType::Timestamp {
21313 precision: None,
21314 timezone: false,
21315 },
21316 trailing_comments: Vec::new(),
21317 double_colon_syntax: false,
21318 format: None,
21319 default: None,
21320 inferred_type: None,
21321 }))
21322 } else {
21323 date
21324 };
21325 Ok(Expression::Function(Box::new(Function::new(
21326 "DATE_ADD".to_string(),
21327 vec![Expression::string("MONTH"), val, cast_date],
21328 ))))
21329 }
21330 DialectType::BigQuery => {
21331 let interval =
21332 Expression::Interval(Box::new(crate::expressions::Interval {
21333 this: Some(val),
21334 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
21335 unit: crate::expressions::IntervalUnit::Month,
21336 use_plural: false,
21337 }),
21338 }));
21339 let cast_date =
21340 if matches!(&date, Expression::Literal(Literal::String(_))) {
21341 Expression::Cast(Box::new(Cast {
21342 this: date,
21343 to: DataType::Custom {
21344 name: "DATETIME".to_string(),
21345 },
21346 trailing_comments: Vec::new(),
21347 double_colon_syntax: false,
21348 format: None,
21349 default: None,
21350 inferred_type: None,
21351 }))
21352 } else {
21353 date
21354 };
21355 Ok(Expression::Function(Box::new(Function::new(
21356 "DATE_ADD".to_string(),
21357 vec![cast_date, interval],
21358 ))))
21359 }
21360 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21361 Ok(Expression::Function(Box::new(Function::new(
21362 "ADD_MONTHS".to_string(),
21363 vec![date, val],
21364 ))))
21365 }
21366 _ => {
21367 // Default: keep as AddMonths expression
21368 Ok(Expression::AddMonths(Box::new(
21369 crate::expressions::BinaryFunc {
21370 this: date,
21371 expression: val,
21372 original_name: None,
21373 inferred_type: None,
21374 },
21375 )))
21376 }
21377 }
21378 } else {
21379 Ok(e)
21380 }
21381 }
21382
21383 Action::PercentileContConvert => {
21384 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
21385 // Presto/Trino: APPROX_PERCENTILE(col, p)
21386 // Spark/Databricks: PERCENTILE_APPROX(col, p)
21387 if let Expression::WithinGroup(wg) = e {
21388 // Extract percentile value and order by column
21389 let (percentile, _is_disc) = match &wg.this {
21390 Expression::Function(f) => {
21391 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21392 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
21393 Literal::Number("0.5".to_string()),
21394 ));
21395 (pct, is_disc)
21396 }
21397 Expression::AggregateFunction(af) => {
21398 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
21399 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
21400 Literal::Number("0.5".to_string()),
21401 ));
21402 (pct, is_disc)
21403 }
21404 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
21405 _ => return Ok(Expression::WithinGroup(wg)),
21406 };
21407 let col = wg
21408 .order_by
21409 .first()
21410 .map(|o| o.this.clone())
21411 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
21412
21413 let func_name = match target {
21414 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21415 "APPROX_PERCENTILE"
21416 }
21417 _ => "PERCENTILE_APPROX", // Spark, Databricks
21418 };
21419 Ok(Expression::Function(Box::new(Function::new(
21420 func_name.to_string(),
21421 vec![col, percentile],
21422 ))))
21423 } else {
21424 Ok(e)
21425 }
21426 }
21427
21428 Action::CurrentUserSparkParens => {
21429 // CURRENT_USER -> CURRENT_USER() for Spark
21430 if let Expression::CurrentUser(_) = e {
21431 Ok(Expression::Function(Box::new(Function::new(
21432 "CURRENT_USER".to_string(),
21433 vec![],
21434 ))))
21435 } else {
21436 Ok(e)
21437 }
21438 }
21439
21440 Action::SparkDateFuncCast => {
21441 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
21442 let cast_arg = |arg: Expression| -> Expression {
21443 match target {
21444 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21445 Self::double_cast_timestamp_date(arg)
21446 }
21447 _ => {
21448 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
21449 Self::ensure_cast_date(arg)
21450 }
21451 }
21452 };
21453 match e {
21454 Expression::Month(f) => Ok(Expression::Month(Box::new(
21455 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21456 ))),
21457 Expression::Year(f) => Ok(Expression::Year(Box::new(
21458 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21459 ))),
21460 Expression::Day(f) => Ok(Expression::Day(Box::new(
21461 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
21462 ))),
21463 other => Ok(other),
21464 }
21465 }
21466
21467 Action::MapFromArraysConvert => {
21468 // Expression::MapFromArrays -> target-specific
21469 if let Expression::MapFromArrays(mfa) = e {
21470 let keys = mfa.this;
21471 let values = mfa.expression;
21472 match target {
21473 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21474 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
21475 ))),
21476 _ => {
21477 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
21478 Ok(Expression::Function(Box::new(Function::new(
21479 "MAP".to_string(),
21480 vec![keys, values],
21481 ))))
21482 }
21483 }
21484 } else {
21485 Ok(e)
21486 }
21487 }
21488
21489 Action::AnyToExists => {
21490 if let Expression::Any(q) = e {
21491 if let Some(op) = q.op.clone() {
21492 let lambda_param = crate::expressions::Identifier::new("x");
21493 let rhs = Expression::Identifier(lambda_param.clone());
21494 let body = match op {
21495 crate::expressions::QuantifiedOp::Eq => {
21496 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
21497 }
21498 crate::expressions::QuantifiedOp::Neq => {
21499 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
21500 }
21501 crate::expressions::QuantifiedOp::Lt => {
21502 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
21503 }
21504 crate::expressions::QuantifiedOp::Lte => {
21505 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
21506 }
21507 crate::expressions::QuantifiedOp::Gt => {
21508 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
21509 }
21510 crate::expressions::QuantifiedOp::Gte => {
21511 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
21512 }
21513 };
21514 let lambda =
21515 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21516 parameters: vec![lambda_param],
21517 body,
21518 colon: false,
21519 parameter_types: Vec::new(),
21520 }));
21521 Ok(Expression::Function(Box::new(Function::new(
21522 "EXISTS".to_string(),
21523 vec![q.subquery, lambda],
21524 ))))
21525 } else {
21526 Ok(Expression::Any(q))
21527 }
21528 } else {
21529 Ok(e)
21530 }
21531 }
21532
21533 Action::GenerateSeriesConvert => {
21534 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
21535 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21536 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
21537 if let Expression::Function(f) = e {
21538 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
21539 let start = f.args[0].clone();
21540 let end = f.args[1].clone();
21541 let step = f.args.get(2).cloned();
21542
21543 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
21544 let step = step.map(|s| Self::normalize_interval_string(s, target));
21545
21546 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
21547 let maybe_cast_timestamp = |arg: Expression| -> Expression {
21548 if matches!(
21549 target,
21550 DialectType::Presto
21551 | DialectType::Trino
21552 | DialectType::Athena
21553 | DialectType::Spark
21554 | DialectType::Databricks
21555 | DialectType::Hive
21556 ) {
21557 match &arg {
21558 Expression::CurrentTimestamp(_) => {
21559 Expression::Cast(Box::new(Cast {
21560 this: arg,
21561 to: DataType::Timestamp {
21562 precision: None,
21563 timezone: false,
21564 },
21565 trailing_comments: Vec::new(),
21566 double_colon_syntax: false,
21567 format: None,
21568 default: None,
21569 inferred_type: None,
21570 }))
21571 }
21572 _ => arg,
21573 }
21574 } else {
21575 arg
21576 }
21577 };
21578
21579 let start = maybe_cast_timestamp(start);
21580 let end = maybe_cast_timestamp(end);
21581
21582 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
21583 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21584 let mut gs_args = vec![start, end];
21585 if let Some(step) = step {
21586 gs_args.push(step);
21587 }
21588 return Ok(Expression::Function(Box::new(Function::new(
21589 "GENERATE_SERIES".to_string(),
21590 gs_args,
21591 ))));
21592 }
21593
21594 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
21595 if matches!(target, DialectType::DuckDB) {
21596 let mut gs_args = vec![start, end];
21597 if let Some(step) = step {
21598 gs_args.push(step);
21599 }
21600 let gs = Expression::Function(Box::new(Function::new(
21601 "GENERATE_SERIES".to_string(),
21602 gs_args,
21603 )));
21604 return Ok(Expression::Function(Box::new(Function::new(
21605 "UNNEST".to_string(),
21606 vec![gs],
21607 ))));
21608 }
21609
21610 let mut seq_args = vec![start, end];
21611 if let Some(step) = step {
21612 seq_args.push(step);
21613 }
21614
21615 let seq = Expression::Function(Box::new(Function::new(
21616 "SEQUENCE".to_string(),
21617 seq_args,
21618 )));
21619
21620 match target {
21621 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21622 // Wrap in UNNEST
21623 Ok(Expression::Function(Box::new(Function::new(
21624 "UNNEST".to_string(),
21625 vec![seq],
21626 ))))
21627 }
21628 DialectType::Spark
21629 | DialectType::Databricks
21630 | DialectType::Hive => {
21631 // Wrap in EXPLODE
21632 Ok(Expression::Function(Box::new(Function::new(
21633 "EXPLODE".to_string(),
21634 vec![seq],
21635 ))))
21636 }
21637 _ => {
21638 // Just SEQUENCE for others
21639 Ok(seq)
21640 }
21641 }
21642 } else {
21643 Ok(Expression::Function(f))
21644 }
21645 } else {
21646 Ok(e)
21647 }
21648 }
21649
21650 Action::ConcatCoalesceWrap => {
21651 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
21652 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
21653 if let Expression::Function(f) = e {
21654 if f.name.eq_ignore_ascii_case("CONCAT") {
21655 let new_args: Vec<Expression> = f
21656 .args
21657 .into_iter()
21658 .map(|arg| {
21659 let cast_arg = if matches!(
21660 target,
21661 DialectType::Presto
21662 | DialectType::Trino
21663 | DialectType::Athena
21664 ) {
21665 Expression::Cast(Box::new(Cast {
21666 this: arg,
21667 to: DataType::VarChar {
21668 length: None,
21669 parenthesized_length: false,
21670 },
21671 trailing_comments: Vec::new(),
21672 double_colon_syntax: false,
21673 format: None,
21674 default: None,
21675 inferred_type: None,
21676 }))
21677 } else {
21678 arg
21679 };
21680 Expression::Function(Box::new(Function::new(
21681 "COALESCE".to_string(),
21682 vec![cast_arg, Expression::string("")],
21683 )))
21684 })
21685 .collect();
21686 Ok(Expression::Function(Box::new(Function::new(
21687 "CONCAT".to_string(),
21688 new_args,
21689 ))))
21690 } else {
21691 Ok(Expression::Function(f))
21692 }
21693 } else {
21694 Ok(e)
21695 }
21696 }
21697
21698 Action::PipeConcatToConcat => {
21699 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
21700 if let Expression::Concat(op) = e {
21701 let cast_left = Expression::Cast(Box::new(Cast {
21702 this: op.left,
21703 to: DataType::VarChar {
21704 length: None,
21705 parenthesized_length: false,
21706 },
21707 trailing_comments: Vec::new(),
21708 double_colon_syntax: false,
21709 format: None,
21710 default: None,
21711 inferred_type: None,
21712 }));
21713 let cast_right = Expression::Cast(Box::new(Cast {
21714 this: op.right,
21715 to: DataType::VarChar {
21716 length: None,
21717 parenthesized_length: false,
21718 },
21719 trailing_comments: Vec::new(),
21720 double_colon_syntax: false,
21721 format: None,
21722 default: None,
21723 inferred_type: None,
21724 }));
21725 Ok(Expression::Function(Box::new(Function::new(
21726 "CONCAT".to_string(),
21727 vec![cast_left, cast_right],
21728 ))))
21729 } else {
21730 Ok(e)
21731 }
21732 }
21733
21734 Action::DivFuncConvert => {
21735 // DIV(a, b) -> target-specific integer division
21736 if let Expression::Function(f) = e {
21737 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
21738 let a = f.args[0].clone();
21739 let b = f.args[1].clone();
21740 match target {
21741 DialectType::DuckDB => {
21742 // DIV(a, b) -> CAST(a // b AS DECIMAL)
21743 let int_div = Expression::IntDiv(Box::new(
21744 crate::expressions::BinaryFunc {
21745 this: a,
21746 expression: b,
21747 original_name: None,
21748 inferred_type: None,
21749 },
21750 ));
21751 Ok(Expression::Cast(Box::new(Cast {
21752 this: int_div,
21753 to: DataType::Decimal {
21754 precision: None,
21755 scale: None,
21756 },
21757 trailing_comments: Vec::new(),
21758 double_colon_syntax: false,
21759 format: None,
21760 default: None,
21761 inferred_type: None,
21762 })))
21763 }
21764 DialectType::BigQuery => {
21765 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
21766 let div_func = Expression::Function(Box::new(Function::new(
21767 "DIV".to_string(),
21768 vec![a, b],
21769 )));
21770 Ok(Expression::Cast(Box::new(Cast {
21771 this: div_func,
21772 to: DataType::Custom {
21773 name: "NUMERIC".to_string(),
21774 },
21775 trailing_comments: Vec::new(),
21776 double_colon_syntax: false,
21777 format: None,
21778 default: None,
21779 inferred_type: None,
21780 })))
21781 }
21782 DialectType::SQLite => {
21783 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
21784 let cast_a = Expression::Cast(Box::new(Cast {
21785 this: a,
21786 to: DataType::Custom {
21787 name: "REAL".to_string(),
21788 },
21789 trailing_comments: Vec::new(),
21790 double_colon_syntax: false,
21791 format: None,
21792 default: None,
21793 inferred_type: None,
21794 }));
21795 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
21796 let cast_int = Expression::Cast(Box::new(Cast {
21797 this: div,
21798 to: DataType::Int {
21799 length: None,
21800 integer_spelling: true,
21801 },
21802 trailing_comments: Vec::new(),
21803 double_colon_syntax: false,
21804 format: None,
21805 default: None,
21806 inferred_type: None,
21807 }));
21808 Ok(Expression::Cast(Box::new(Cast {
21809 this: cast_int,
21810 to: DataType::Custom {
21811 name: "REAL".to_string(),
21812 },
21813 trailing_comments: Vec::new(),
21814 double_colon_syntax: false,
21815 format: None,
21816 default: None,
21817 inferred_type: None,
21818 })))
21819 }
21820 _ => Ok(Expression::Function(f)),
21821 }
21822 } else {
21823 Ok(Expression::Function(f))
21824 }
21825 } else {
21826 Ok(e)
21827 }
21828 }
21829
21830 Action::JsonObjectAggConvert => {
21831 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
21832 match e {
21833 Expression::Function(f) => Ok(Expression::Function(Box::new(
21834 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
21835 ))),
21836 Expression::AggregateFunction(af) => {
21837 // AggregateFunction stores all args in the `args` vec
21838 Ok(Expression::Function(Box::new(Function::new(
21839 "JSON_GROUP_OBJECT".to_string(),
21840 af.args,
21841 ))))
21842 }
21843 other => Ok(other),
21844 }
21845 }
21846
21847 Action::JsonbExistsConvert => {
21848 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
21849 if let Expression::Function(f) = e {
21850 if f.args.len() == 2 {
21851 let json_expr = f.args[0].clone();
21852 let key = match &f.args[1] {
21853 Expression::Literal(crate::expressions::Literal::String(s)) => {
21854 format!("$.{}", s)
21855 }
21856 _ => return Ok(Expression::Function(f)),
21857 };
21858 Ok(Expression::Function(Box::new(Function::new(
21859 "JSON_EXISTS".to_string(),
21860 vec![json_expr, Expression::string(&key)],
21861 ))))
21862 } else {
21863 Ok(Expression::Function(f))
21864 }
21865 } else {
21866 Ok(e)
21867 }
21868 }
21869
21870 Action::DateBinConvert => {
21871 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
21872 if let Expression::Function(f) = e {
21873 Ok(Expression::Function(Box::new(Function::new(
21874 "TIME_BUCKET".to_string(),
21875 f.args,
21876 ))))
21877 } else {
21878 Ok(e)
21879 }
21880 }
21881
21882 Action::MysqlCastCharToText => {
21883 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
21884 if let Expression::Cast(mut c) = e {
21885 c.to = DataType::Text;
21886 Ok(Expression::Cast(c))
21887 } else {
21888 Ok(e)
21889 }
21890 }
21891
21892 Action::SparkCastVarcharToString => {
21893 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
21894 match e {
21895 Expression::Cast(mut c) => {
21896 c.to = Self::normalize_varchar_to_string(c.to);
21897 Ok(Expression::Cast(c))
21898 }
21899 Expression::TryCast(mut c) => {
21900 c.to = Self::normalize_varchar_to_string(c.to);
21901 Ok(Expression::TryCast(c))
21902 }
21903 _ => Ok(e),
21904 }
21905 }
21906
21907 Action::MinMaxToLeastGreatest => {
21908 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
21909 if let Expression::Function(f) = e {
21910 let name = f.name.to_uppercase();
21911 let new_name = match name.as_str() {
21912 "MIN" => "LEAST",
21913 "MAX" => "GREATEST",
21914 _ => return Ok(Expression::Function(f)),
21915 };
21916 Ok(Expression::Function(Box::new(Function::new(
21917 new_name.to_string(),
21918 f.args,
21919 ))))
21920 } else {
21921 Ok(e)
21922 }
21923 }
21924
21925 Action::ClickHouseUniqToApproxCountDistinct => {
21926 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
21927 if let Expression::Function(f) = e {
21928 Ok(Expression::Function(Box::new(Function::new(
21929 "APPROX_COUNT_DISTINCT".to_string(),
21930 f.args,
21931 ))))
21932 } else {
21933 Ok(e)
21934 }
21935 }
21936
21937 Action::ClickHouseAnyToAnyValue => {
21938 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
21939 if let Expression::Function(f) = e {
21940 Ok(Expression::Function(Box::new(Function::new(
21941 "ANY_VALUE".to_string(),
21942 f.args,
21943 ))))
21944 } else {
21945 Ok(e)
21946 }
21947 }
21948
21949 Action::OracleVarchar2ToVarchar => {
21950 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
21951 if let Expression::DataType(DataType::Custom { ref name }) = e {
21952 let upper = name.to_uppercase();
21953 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
21954 let inner =
21955 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
21956 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
21957 let end = name.len() - 1; // skip trailing ")"
21958 Some(&name[start..end])
21959 } else {
21960 Option::None
21961 };
21962 if let Some(inner_str) = inner {
21963 // Parse the number part, ignoring BYTE/CHAR qualifier
21964 let num_str = inner_str.split_whitespace().next().unwrap_or("");
21965 if let Ok(n) = num_str.parse::<u32>() {
21966 Ok(Expression::DataType(DataType::VarChar {
21967 length: Some(n),
21968 parenthesized_length: false,
21969 }))
21970 } else {
21971 Ok(e)
21972 }
21973 } else {
21974 // Plain VARCHAR2 / NVARCHAR2 without parens
21975 Ok(Expression::DataType(DataType::VarChar {
21976 length: Option::None,
21977 parenthesized_length: false,
21978 }))
21979 }
21980 } else {
21981 Ok(e)
21982 }
21983 }
21984
21985 Action::Nvl2Expand => {
21986 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
21987 // But keep as NVL2 for dialects that support it natively
21988 let nvl2_native = matches!(
21989 target,
21990 DialectType::Oracle
21991 | DialectType::Snowflake
21992 | DialectType::Redshift
21993 | DialectType::Teradata
21994 | DialectType::Spark
21995 | DialectType::Databricks
21996 );
21997 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
21998 if nvl2_native {
21999 return Ok(Expression::Nvl2(nvl2));
22000 }
22001 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
22002 } else if let Expression::Function(f) = e {
22003 if nvl2_native {
22004 return Ok(Expression::Function(Box::new(Function::new(
22005 "NVL2".to_string(),
22006 f.args,
22007 ))));
22008 }
22009 if f.args.len() < 2 {
22010 return Ok(Expression::Function(f));
22011 }
22012 let mut args = f.args;
22013 let a = args.remove(0);
22014 let b = args.remove(0);
22015 let c = if !args.is_empty() {
22016 Some(args.remove(0))
22017 } else {
22018 Option::None
22019 };
22020 (a, b, c)
22021 } else {
22022 return Ok(e);
22023 };
22024 // Build: NOT (a IS NULL)
22025 let is_null = Expression::IsNull(Box::new(IsNull {
22026 this: a,
22027 not: false,
22028 postfix_form: false,
22029 }));
22030 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
22031 this: is_null,
22032 inferred_type: None,
22033 }));
22034 Ok(Expression::Case(Box::new(Case {
22035 operand: Option::None,
22036 whens: vec![(not_null, b)],
22037 else_: c,
22038 comments: Vec::new(),
22039 inferred_type: None,
22040 })))
22041 }
22042
22043 Action::IfnullToCoalesce => {
22044 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
22045 if let Expression::Coalesce(mut cf) = e {
22046 cf.original_name = Option::None;
22047 Ok(Expression::Coalesce(cf))
22048 } else if let Expression::Function(f) = e {
22049 Ok(Expression::Function(Box::new(Function::new(
22050 "COALESCE".to_string(),
22051 f.args,
22052 ))))
22053 } else {
22054 Ok(e)
22055 }
22056 }
22057
22058 Action::IsAsciiConvert => {
22059 // IS_ASCII(x) -> dialect-specific ASCII check
22060 if let Expression::Function(f) = e {
22061 let arg = f.args.into_iter().next().unwrap();
22062 match target {
22063 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
22064 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
22065 Ok(Expression::Function(Box::new(Function::new(
22066 "REGEXP_LIKE".to_string(),
22067 vec![
22068 arg,
22069 Expression::Literal(Literal::String(
22070 "^[[:ascii:]]*$".to_string(),
22071 )),
22072 ],
22073 ))))
22074 }
22075 DialectType::PostgreSQL
22076 | DialectType::Redshift
22077 | DialectType::Materialize
22078 | DialectType::RisingWave => {
22079 // (x ~ '^[[:ascii:]]*$')
22080 Ok(Expression::Paren(Box::new(Paren {
22081 this: Expression::RegexpLike(Box::new(
22082 crate::expressions::RegexpFunc {
22083 this: arg,
22084 pattern: Expression::Literal(Literal::String(
22085 "^[[:ascii:]]*$".to_string(),
22086 )),
22087 flags: Option::None,
22088 },
22089 )),
22090 trailing_comments: Vec::new(),
22091 })))
22092 }
22093 DialectType::SQLite => {
22094 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
22095 let hex_lit = Expression::Literal(Literal::HexString(
22096 "2a5b5e012d7f5d2a".to_string(),
22097 ));
22098 let cast_expr = Expression::Cast(Box::new(Cast {
22099 this: hex_lit,
22100 to: DataType::Text,
22101 trailing_comments: Vec::new(),
22102 double_colon_syntax: false,
22103 format: Option::None,
22104 default: Option::None,
22105 inferred_type: None,
22106 }));
22107 let glob = Expression::Glob(Box::new(BinaryOp {
22108 left: arg,
22109 right: cast_expr,
22110 left_comments: Vec::new(),
22111 operator_comments: Vec::new(),
22112 trailing_comments: Vec::new(),
22113 inferred_type: None,
22114 }));
22115 Ok(Expression::Paren(Box::new(Paren {
22116 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
22117 this: glob,
22118 inferred_type: None,
22119 })),
22120 trailing_comments: Vec::new(),
22121 })))
22122 }
22123 DialectType::TSQL | DialectType::Fabric => {
22124 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
22125 let hex_lit = Expression::Literal(Literal::HexNumber(
22126 "255b5e002d7f5d25".to_string(),
22127 ));
22128 let convert_expr = Expression::Convert(Box::new(
22129 crate::expressions::ConvertFunc {
22130 this: hex_lit,
22131 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
22132 style: None,
22133 },
22134 ));
22135 let collated = Expression::Collation(Box::new(
22136 crate::expressions::CollationExpr {
22137 this: convert_expr,
22138 collation: "Latin1_General_BIN".to_string(),
22139 quoted: false,
22140 double_quoted: false,
22141 },
22142 ));
22143 let patindex = Expression::Function(Box::new(Function::new(
22144 "PATINDEX".to_string(),
22145 vec![collated, arg],
22146 )));
22147 let zero = Expression::Literal(Literal::Number("0".to_string()));
22148 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22149 left: patindex,
22150 right: zero,
22151 left_comments: Vec::new(),
22152 operator_comments: Vec::new(),
22153 trailing_comments: Vec::new(),
22154 inferred_type: None,
22155 }));
22156 Ok(Expression::Paren(Box::new(Paren {
22157 this: eq_zero,
22158 trailing_comments: Vec::new(),
22159 })))
22160 }
22161 DialectType::Oracle => {
22162 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
22163 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
22164 let s1 = Expression::Literal(Literal::String("^[".to_string()));
22165 let chr1 = Expression::Function(Box::new(Function::new(
22166 "CHR".to_string(),
22167 vec![Expression::Literal(Literal::Number("1".to_string()))],
22168 )));
22169 let dash = Expression::Literal(Literal::String("-".to_string()));
22170 let chr127 = Expression::Function(Box::new(Function::new(
22171 "CHR".to_string(),
22172 vec![Expression::Literal(Literal::Number("127".to_string()))],
22173 )));
22174 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
22175 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
22176 let concat1 =
22177 Expression::DPipe(Box::new(crate::expressions::DPipe {
22178 this: Box::new(s1),
22179 expression: Box::new(chr1),
22180 safe: None,
22181 }));
22182 let concat2 =
22183 Expression::DPipe(Box::new(crate::expressions::DPipe {
22184 this: Box::new(concat1),
22185 expression: Box::new(dash),
22186 safe: None,
22187 }));
22188 let concat3 =
22189 Expression::DPipe(Box::new(crate::expressions::DPipe {
22190 this: Box::new(concat2),
22191 expression: Box::new(chr127),
22192 safe: None,
22193 }));
22194 let concat4 =
22195 Expression::DPipe(Box::new(crate::expressions::DPipe {
22196 this: Box::new(concat3),
22197 expression: Box::new(s2),
22198 safe: None,
22199 }));
22200 let regexp_like = Expression::Function(Box::new(Function::new(
22201 "REGEXP_LIKE".to_string(),
22202 vec![arg, concat4],
22203 )));
22204 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
22205 let true_expr = Expression::Column(crate::expressions::Column {
22206 name: Identifier {
22207 name: "TRUE".to_string(),
22208 quoted: false,
22209 trailing_comments: Vec::new(),
22210 span: None,
22211 },
22212 table: None,
22213 join_mark: false,
22214 trailing_comments: Vec::new(),
22215 span: None,
22216 inferred_type: None,
22217 });
22218 let nvl = Expression::Function(Box::new(Function::new(
22219 "NVL".to_string(),
22220 vec![regexp_like, true_expr],
22221 )));
22222 Ok(nvl)
22223 }
22224 _ => Ok(Expression::Function(Box::new(Function::new(
22225 "IS_ASCII".to_string(),
22226 vec![arg],
22227 )))),
22228 }
22229 } else {
22230 Ok(e)
22231 }
22232 }
22233
22234 Action::StrPositionConvert => {
22235 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
22236 if let Expression::Function(f) = e {
22237 if f.args.len() < 2 {
22238 return Ok(Expression::Function(f));
22239 }
22240 let mut args = f.args;
22241
22242 let haystack = args.remove(0);
22243 let needle = args.remove(0);
22244 let position = if !args.is_empty() {
22245 Some(args.remove(0))
22246 } else {
22247 Option::None
22248 };
22249 let occurrence = if !args.is_empty() {
22250 Some(args.remove(0))
22251 } else {
22252 Option::None
22253 };
22254
22255 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
22256 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
22257 fn build_position_expansion(
22258 haystack: Expression,
22259 needle: Expression,
22260 pos: Expression,
22261 occurrence: Option<Expression>,
22262 inner_func: &str,
22263 wrapper: &str, // "CASE", "IF", "IIF"
22264 ) -> Expression {
22265 let substr = Expression::Function(Box::new(Function::new(
22266 "SUBSTRING".to_string(),
22267 vec![haystack, pos.clone()],
22268 )));
22269 let mut inner_args = vec![substr, needle];
22270 if let Some(occ) = occurrence {
22271 inner_args.push(occ);
22272 }
22273 let inner_call = Expression::Function(Box::new(Function::new(
22274 inner_func.to_string(),
22275 inner_args,
22276 )));
22277 let zero = Expression::Literal(Literal::Number("0".to_string()));
22278 let one = Expression::Literal(Literal::Number("1".to_string()));
22279 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22280 left: inner_call.clone(),
22281 right: zero.clone(),
22282 left_comments: Vec::new(),
22283 operator_comments: Vec::new(),
22284 trailing_comments: Vec::new(),
22285 inferred_type: None,
22286 }));
22287 let add_pos = Expression::Add(Box::new(BinaryOp {
22288 left: inner_call,
22289 right: pos,
22290 left_comments: Vec::new(),
22291 operator_comments: Vec::new(),
22292 trailing_comments: Vec::new(),
22293 inferred_type: None,
22294 }));
22295 let sub_one = Expression::Sub(Box::new(BinaryOp {
22296 left: add_pos,
22297 right: one,
22298 left_comments: Vec::new(),
22299 operator_comments: Vec::new(),
22300 trailing_comments: Vec::new(),
22301 inferred_type: None,
22302 }));
22303
22304 match wrapper {
22305 "CASE" => Expression::Case(Box::new(Case {
22306 operand: Option::None,
22307 whens: vec![(eq_zero, zero)],
22308 else_: Some(sub_one),
22309 comments: Vec::new(),
22310 inferred_type: None,
22311 })),
22312 "IIF" => Expression::Function(Box::new(Function::new(
22313 "IIF".to_string(),
22314 vec![eq_zero, zero, sub_one],
22315 ))),
22316 _ => Expression::Function(Box::new(Function::new(
22317 "IF".to_string(),
22318 vec![eq_zero, zero, sub_one],
22319 ))),
22320 }
22321 }
22322
22323 match target {
22324 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
22325 DialectType::Athena
22326 | DialectType::DuckDB
22327 | DialectType::Presto
22328 | DialectType::Trino
22329 | DialectType::Drill => {
22330 if let Some(pos) = position {
22331 let wrapper = if matches!(target, DialectType::DuckDB) {
22332 "CASE"
22333 } else {
22334 "IF"
22335 };
22336 let result = build_position_expansion(
22337 haystack, needle, pos, occurrence, "STRPOS", wrapper,
22338 );
22339 if matches!(target, DialectType::Drill) {
22340 // Drill uses backtick-quoted `IF`
22341 if let Expression::Function(mut f) = result {
22342 f.name = "`IF`".to_string();
22343 Ok(Expression::Function(f))
22344 } else {
22345 Ok(result)
22346 }
22347 } else {
22348 Ok(result)
22349 }
22350 } else {
22351 Ok(Expression::Function(Box::new(Function::new(
22352 "STRPOS".to_string(),
22353 vec![haystack, needle],
22354 ))))
22355 }
22356 }
22357 // SQLite: IIF wrapper
22358 DialectType::SQLite => {
22359 if let Some(pos) = position {
22360 Ok(build_position_expansion(
22361 haystack, needle, pos, occurrence, "INSTR", "IIF",
22362 ))
22363 } else {
22364 Ok(Expression::Function(Box::new(Function::new(
22365 "INSTR".to_string(),
22366 vec![haystack, needle],
22367 ))))
22368 }
22369 }
22370 // INSTR group: Teradata, BigQuery, Oracle
22371 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
22372 let mut a = vec![haystack, needle];
22373 if let Some(pos) = position {
22374 a.push(pos);
22375 }
22376 if let Some(occ) = occurrence {
22377 a.push(occ);
22378 }
22379 Ok(Expression::Function(Box::new(Function::new(
22380 "INSTR".to_string(),
22381 a,
22382 ))))
22383 }
22384 // CHARINDEX group: Snowflake, TSQL
22385 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
22386 let mut a = vec![needle, haystack];
22387 if let Some(pos) = position {
22388 a.push(pos);
22389 }
22390 Ok(Expression::Function(Box::new(Function::new(
22391 "CHARINDEX".to_string(),
22392 a,
22393 ))))
22394 }
22395 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
22396 DialectType::PostgreSQL
22397 | DialectType::Materialize
22398 | DialectType::RisingWave
22399 | DialectType::Redshift => {
22400 if let Some(pos) = position {
22401 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
22402 // ELSE POSITION(...) + pos - 1 END
22403 let substr = Expression::Substring(Box::new(
22404 crate::expressions::SubstringFunc {
22405 this: haystack,
22406 start: pos.clone(),
22407 length: Option::None,
22408 from_for_syntax: true,
22409 },
22410 ));
22411 let pos_in = Expression::StrPosition(Box::new(
22412 crate::expressions::StrPosition {
22413 this: Box::new(substr),
22414 substr: Some(Box::new(needle)),
22415 position: Option::None,
22416 occurrence: Option::None,
22417 },
22418 ));
22419 let zero =
22420 Expression::Literal(Literal::Number("0".to_string()));
22421 let one = Expression::Literal(Literal::Number("1".to_string()));
22422 let eq_zero = Expression::Eq(Box::new(BinaryOp {
22423 left: pos_in.clone(),
22424 right: zero.clone(),
22425 left_comments: Vec::new(),
22426 operator_comments: Vec::new(),
22427 trailing_comments: Vec::new(),
22428 inferred_type: None,
22429 }));
22430 let add_pos = Expression::Add(Box::new(BinaryOp {
22431 left: pos_in,
22432 right: pos,
22433 left_comments: Vec::new(),
22434 operator_comments: Vec::new(),
22435 trailing_comments: Vec::new(),
22436 inferred_type: None,
22437 }));
22438 let sub_one = Expression::Sub(Box::new(BinaryOp {
22439 left: add_pos,
22440 right: one,
22441 left_comments: Vec::new(),
22442 operator_comments: Vec::new(),
22443 trailing_comments: Vec::new(),
22444 inferred_type: None,
22445 }));
22446 Ok(Expression::Case(Box::new(Case {
22447 operand: Option::None,
22448 whens: vec![(eq_zero, zero)],
22449 else_: Some(sub_one),
22450 comments: Vec::new(),
22451 inferred_type: None,
22452 })))
22453 } else {
22454 Ok(Expression::StrPosition(Box::new(
22455 crate::expressions::StrPosition {
22456 this: Box::new(haystack),
22457 substr: Some(Box::new(needle)),
22458 position: Option::None,
22459 occurrence: Option::None,
22460 },
22461 )))
22462 }
22463 }
22464 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
22465 DialectType::MySQL
22466 | DialectType::SingleStore
22467 | DialectType::TiDB
22468 | DialectType::Hive
22469 | DialectType::Spark
22470 | DialectType::Databricks
22471 | DialectType::Doris
22472 | DialectType::StarRocks => {
22473 let mut a = vec![needle, haystack];
22474 if let Some(pos) = position {
22475 a.push(pos);
22476 }
22477 Ok(Expression::Function(Box::new(Function::new(
22478 "LOCATE".to_string(),
22479 a,
22480 ))))
22481 }
22482 // ClickHouse: POSITION(haystack, needle[, position])
22483 DialectType::ClickHouse => {
22484 let mut a = vec![haystack, needle];
22485 if let Some(pos) = position {
22486 a.push(pos);
22487 }
22488 Ok(Expression::Function(Box::new(Function::new(
22489 "POSITION".to_string(),
22490 a,
22491 ))))
22492 }
22493 _ => {
22494 let mut a = vec![haystack, needle];
22495 if let Some(pos) = position {
22496 a.push(pos);
22497 }
22498 if let Some(occ) = occurrence {
22499 a.push(occ);
22500 }
22501 Ok(Expression::Function(Box::new(Function::new(
22502 "STR_POSITION".to_string(),
22503 a,
22504 ))))
22505 }
22506 }
22507 } else {
22508 Ok(e)
22509 }
22510 }
22511
22512 Action::ArraySumConvert => {
22513 // ARRAY_SUM(arr) -> dialect-specific
22514 if let Expression::Function(f) = e {
22515 let args = f.args;
22516 match target {
22517 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22518 Function::new("LIST_SUM".to_string(), args),
22519 ))),
22520 DialectType::Spark | DialectType::Databricks => {
22521 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22522 let arr = args.into_iter().next().unwrap();
22523 let zero = Expression::Literal(Literal::Number("0".to_string()));
22524 let acc_id = Identifier::new("acc");
22525 let x_id = Identifier::new("x");
22526 let acc = Expression::Identifier(acc_id.clone());
22527 let x = Expression::Identifier(x_id.clone());
22528 let add = Expression::Add(Box::new(BinaryOp {
22529 left: acc.clone(),
22530 right: x,
22531 left_comments: Vec::new(),
22532 operator_comments: Vec::new(),
22533 trailing_comments: Vec::new(),
22534 inferred_type: None,
22535 }));
22536 let lambda1 =
22537 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22538 parameters: vec![acc_id.clone(), x_id],
22539 body: add,
22540 colon: false,
22541 parameter_types: Vec::new(),
22542 }));
22543 let lambda2 =
22544 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22545 parameters: vec![acc_id],
22546 body: acc,
22547 colon: false,
22548 parameter_types: Vec::new(),
22549 }));
22550 Ok(Expression::Function(Box::new(Function::new(
22551 "AGGREGATE".to_string(),
22552 vec![arr, zero, lambda1, lambda2],
22553 ))))
22554 }
22555 DialectType::Presto | DialectType::Athena => {
22556 // Presto/Athena keep ARRAY_SUM natively
22557 Ok(Expression::Function(Box::new(Function::new(
22558 "ARRAY_SUM".to_string(),
22559 args,
22560 ))))
22561 }
22562 DialectType::Trino => {
22563 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
22564 if args.len() == 1 {
22565 let arr = args.into_iter().next().unwrap();
22566 let zero =
22567 Expression::Literal(Literal::Number("0".to_string()));
22568 let acc_id = Identifier::new("acc");
22569 let x_id = Identifier::new("x");
22570 let acc = Expression::Identifier(acc_id.clone());
22571 let x = Expression::Identifier(x_id.clone());
22572 let add = Expression::Add(Box::new(BinaryOp {
22573 left: acc.clone(),
22574 right: x,
22575 left_comments: Vec::new(),
22576 operator_comments: Vec::new(),
22577 trailing_comments: Vec::new(),
22578 inferred_type: None,
22579 }));
22580 let lambda1 = Expression::Lambda(Box::new(
22581 crate::expressions::LambdaExpr {
22582 parameters: vec![acc_id.clone(), x_id],
22583 body: add,
22584 colon: false,
22585 parameter_types: Vec::new(),
22586 },
22587 ));
22588 let lambda2 = Expression::Lambda(Box::new(
22589 crate::expressions::LambdaExpr {
22590 parameters: vec![acc_id],
22591 body: acc,
22592 colon: false,
22593 parameter_types: Vec::new(),
22594 },
22595 ));
22596 Ok(Expression::Function(Box::new(Function::new(
22597 "REDUCE".to_string(),
22598 vec![arr, zero, lambda1, lambda2],
22599 ))))
22600 } else {
22601 Ok(Expression::Function(Box::new(Function::new(
22602 "ARRAY_SUM".to_string(),
22603 args,
22604 ))))
22605 }
22606 }
22607 DialectType::ClickHouse => {
22608 // arraySum(lambda, arr) or arraySum(arr)
22609 Ok(Expression::Function(Box::new(Function::new(
22610 "arraySum".to_string(),
22611 args,
22612 ))))
22613 }
22614 _ => Ok(Expression::Function(Box::new(Function::new(
22615 "ARRAY_SUM".to_string(),
22616 args,
22617 )))),
22618 }
22619 } else {
22620 Ok(e)
22621 }
22622 }
22623
22624 Action::ArraySizeConvert => {
22625 if let Expression::Function(f) = e {
22626 Ok(Expression::Function(Box::new(Function::new(
22627 "REPEATED_COUNT".to_string(),
22628 f.args,
22629 ))))
22630 } else {
22631 Ok(e)
22632 }
22633 }
22634
22635 Action::ArrayAnyConvert => {
22636 if let Expression::Function(f) = e {
22637 let mut args = f.args;
22638 if args.len() == 2 {
22639 let arr = args.remove(0);
22640 let lambda = args.remove(0);
22641
22642 // Extract lambda parameter name and body
22643 let (param_name, pred_body) =
22644 if let Expression::Lambda(ref lam) = lambda {
22645 let name = if let Some(p) = lam.parameters.first() {
22646 p.name.clone()
22647 } else {
22648 "x".to_string()
22649 };
22650 (name, lam.body.clone())
22651 } else {
22652 ("x".to_string(), lambda.clone())
22653 };
22654
22655 // Helper: build a function call Expression
22656 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
22657 Expression::Function(Box::new(Function::new(
22658 name.to_string(),
22659 args,
22660 )))
22661 };
22662
22663 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
22664 let build_filter_pattern = |len_func: &str,
22665 len_args_extra: Vec<Expression>,
22666 filter_expr: Expression|
22667 -> Expression {
22668 // len_func(arr, ...extra) = 0
22669 let mut len_arr_args = vec![arr.clone()];
22670 len_arr_args.extend(len_args_extra.clone());
22671 let len_arr = make_func(len_func, len_arr_args);
22672 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
22673 len_arr,
22674 Expression::number(0),
22675 )));
22676
22677 // len_func(filter_expr, ...extra) <> 0
22678 let mut len_filter_args = vec![filter_expr];
22679 len_filter_args.extend(len_args_extra);
22680 let len_filter = make_func(len_func, len_filter_args);
22681 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
22682 len_filter,
22683 Expression::number(0),
22684 )));
22685
22686 // (eq_zero OR neq_zero)
22687 let or_expr =
22688 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
22689 Expression::Paren(Box::new(Paren {
22690 this: or_expr,
22691 trailing_comments: Vec::new(),
22692 }))
22693 };
22694
22695 match target {
22696 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
22697 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
22698 }
22699 DialectType::ClickHouse => {
22700 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
22701 // ClickHouse arrayFilter takes lambda first, then array
22702 let filter_expr =
22703 make_func("arrayFilter", vec![lambda, arr.clone()]);
22704 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
22705 }
22706 DialectType::Databricks | DialectType::Spark => {
22707 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
22708 let filter_expr =
22709 make_func("FILTER", vec![arr.clone(), lambda]);
22710 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
22711 }
22712 DialectType::DuckDB => {
22713 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
22714 let filter_expr =
22715 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
22716 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
22717 }
22718 DialectType::Teradata => {
22719 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
22720 let filter_expr =
22721 make_func("FILTER", vec![arr.clone(), lambda]);
22722 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
22723 }
22724 DialectType::BigQuery => {
22725 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
22726 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
22727 let param_col = Expression::column(¶m_name);
22728 let unnest_expr = Expression::Unnest(Box::new(
22729 crate::expressions::UnnestFunc {
22730 this: arr.clone(),
22731 expressions: vec![],
22732 with_ordinality: false,
22733 alias: Some(Identifier::new(¶m_name)),
22734 offset_alias: None,
22735 },
22736 ));
22737 let mut sel = crate::expressions::Select::default();
22738 sel.expressions = vec![param_col];
22739 sel.from = Some(crate::expressions::From {
22740 expressions: vec![unnest_expr],
22741 });
22742 sel.where_clause =
22743 Some(crate::expressions::Where { this: pred_body });
22744 let array_subquery =
22745 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22746 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
22747 }
22748 DialectType::PostgreSQL => {
22749 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
22750 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
22751 let param_col = Expression::column(¶m_name);
22752 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
22753 let unnest_with_alias =
22754 Expression::Alias(Box::new(crate::expressions::Alias {
22755 this: Expression::Unnest(Box::new(
22756 crate::expressions::UnnestFunc {
22757 this: arr.clone(),
22758 expressions: vec![],
22759 with_ordinality: false,
22760 alias: None,
22761 offset_alias: None,
22762 },
22763 )),
22764 alias: Identifier::new("_t0"),
22765 column_aliases: vec![Identifier::new(¶m_name)],
22766 pre_alias_comments: Vec::new(),
22767 trailing_comments: Vec::new(),
22768 inferred_type: None,
22769 }));
22770 let mut sel = crate::expressions::Select::default();
22771 sel.expressions = vec![param_col];
22772 sel.from = Some(crate::expressions::From {
22773 expressions: vec![unnest_with_alias],
22774 });
22775 sel.where_clause =
22776 Some(crate::expressions::Where { this: pred_body });
22777 let array_subquery =
22778 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
22779 Ok(build_filter_pattern(
22780 "ARRAY_LENGTH",
22781 vec![Expression::number(1)],
22782 array_subquery,
22783 ))
22784 }
22785 _ => Ok(Expression::Function(Box::new(Function::new(
22786 "ARRAY_ANY".to_string(),
22787 vec![arr, lambda],
22788 )))),
22789 }
22790 } else {
22791 Ok(Expression::Function(Box::new(Function::new(
22792 "ARRAY_ANY".to_string(),
22793 args,
22794 ))))
22795 }
22796 } else {
22797 Ok(e)
22798 }
22799 }
22800
22801 Action::DecodeSimplify => {
22802 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
22803 // For literal search values: CASE WHEN x = search THEN result
22804 // For NULL search: CASE WHEN x IS NULL THEN result
22805 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22806 fn is_decode_literal(e: &Expression) -> bool {
22807 matches!(
22808 e,
22809 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
22810 )
22811 }
22812
22813 let build_decode_case =
22814 |this_expr: Expression,
22815 pairs: Vec<(Expression, Expression)>,
22816 default: Option<Expression>| {
22817 let whens: Vec<(Expression, Expression)> = pairs
22818 .into_iter()
22819 .map(|(search, result)| {
22820 if matches!(&search, Expression::Null(_)) {
22821 // NULL search -> IS NULL
22822 let condition = Expression::Is(Box::new(BinaryOp {
22823 left: this_expr.clone(),
22824 right: Expression::Null(crate::expressions::Null),
22825 left_comments: Vec::new(),
22826 operator_comments: Vec::new(),
22827 trailing_comments: Vec::new(),
22828 inferred_type: None,
22829 }));
22830 (condition, result)
22831 } else if is_decode_literal(&search)
22832 || is_decode_literal(&this_expr)
22833 {
22834 // At least one side is a literal -> simple equality (no NULL check needed)
22835 let eq = Expression::Eq(Box::new(BinaryOp {
22836 left: this_expr.clone(),
22837 right: search,
22838 left_comments: Vec::new(),
22839 operator_comments: Vec::new(),
22840 trailing_comments: Vec::new(),
22841 inferred_type: None,
22842 }));
22843 (eq, result)
22844 } else {
22845 // Non-literal -> null-safe comparison
22846 let needs_paren = matches!(
22847 &search,
22848 Expression::Eq(_)
22849 | Expression::Neq(_)
22850 | Expression::Gt(_)
22851 | Expression::Gte(_)
22852 | Expression::Lt(_)
22853 | Expression::Lte(_)
22854 );
22855 let search_ref = if needs_paren {
22856 Expression::Paren(Box::new(crate::expressions::Paren {
22857 this: search.clone(),
22858 trailing_comments: Vec::new(),
22859 }))
22860 } else {
22861 search.clone()
22862 };
22863 // Build: x = search OR (x IS NULL AND search IS NULL)
22864 let eq = Expression::Eq(Box::new(BinaryOp {
22865 left: this_expr.clone(),
22866 right: search_ref,
22867 left_comments: Vec::new(),
22868 operator_comments: Vec::new(),
22869 trailing_comments: Vec::new(),
22870 inferred_type: None,
22871 }));
22872 let search_in_null = if needs_paren {
22873 Expression::Paren(Box::new(crate::expressions::Paren {
22874 this: search.clone(),
22875 trailing_comments: Vec::new(),
22876 }))
22877 } else {
22878 search.clone()
22879 };
22880 let x_is_null = Expression::Is(Box::new(BinaryOp {
22881 left: this_expr.clone(),
22882 right: Expression::Null(crate::expressions::Null),
22883 left_comments: Vec::new(),
22884 operator_comments: Vec::new(),
22885 trailing_comments: Vec::new(),
22886 inferred_type: None,
22887 }));
22888 let search_is_null = Expression::Is(Box::new(BinaryOp {
22889 left: search_in_null,
22890 right: Expression::Null(crate::expressions::Null),
22891 left_comments: Vec::new(),
22892 operator_comments: Vec::new(),
22893 trailing_comments: Vec::new(),
22894 inferred_type: None,
22895 }));
22896 let both_null = Expression::And(Box::new(BinaryOp {
22897 left: x_is_null,
22898 right: search_is_null,
22899 left_comments: Vec::new(),
22900 operator_comments: Vec::new(),
22901 trailing_comments: Vec::new(),
22902 inferred_type: None,
22903 }));
22904 let condition = Expression::Or(Box::new(BinaryOp {
22905 left: eq,
22906 right: Expression::Paren(Box::new(
22907 crate::expressions::Paren {
22908 this: both_null,
22909 trailing_comments: Vec::new(),
22910 },
22911 )),
22912 left_comments: Vec::new(),
22913 operator_comments: Vec::new(),
22914 trailing_comments: Vec::new(),
22915 inferred_type: None,
22916 }));
22917 (condition, result)
22918 }
22919 })
22920 .collect();
22921 Expression::Case(Box::new(Case {
22922 operand: None,
22923 whens,
22924 else_: default,
22925 comments: Vec::new(),
22926 inferred_type: None,
22927 }))
22928 };
22929
22930 if let Expression::Decode(decode) = e {
22931 Ok(build_decode_case(
22932 decode.this,
22933 decode.search_results,
22934 decode.default,
22935 ))
22936 } else if let Expression::DecodeCase(dc) = e {
22937 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
22938 let mut exprs = dc.expressions;
22939 if exprs.len() < 3 {
22940 return Ok(Expression::DecodeCase(Box::new(
22941 crate::expressions::DecodeCase { expressions: exprs },
22942 )));
22943 }
22944 let this_expr = exprs.remove(0);
22945 let mut pairs = Vec::new();
22946 let mut default = None;
22947 let mut i = 0;
22948 while i + 1 < exprs.len() {
22949 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
22950 i += 2;
22951 }
22952 if i < exprs.len() {
22953 // Odd remaining element is the default
22954 default = Some(exprs[i].clone());
22955 }
22956 Ok(build_decode_case(this_expr, pairs, default))
22957 } else {
22958 Ok(e)
22959 }
22960 }
22961
22962 Action::CreateTableLikeToCtas => {
22963 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
22964 if let Expression::CreateTable(ct) = e {
22965 let like_source = ct.constraints.iter().find_map(|c| {
22966 if let crate::expressions::TableConstraint::Like { source, .. } = c {
22967 Some(source.clone())
22968 } else {
22969 None
22970 }
22971 });
22972 if let Some(source_table) = like_source {
22973 let mut new_ct = *ct;
22974 new_ct.constraints.clear();
22975 // Build: SELECT * FROM b LIMIT 0
22976 let select = Expression::Select(Box::new(crate::expressions::Select {
22977 expressions: vec![Expression::Star(crate::expressions::Star {
22978 table: None,
22979 except: None,
22980 replace: None,
22981 rename: None,
22982 trailing_comments: Vec::new(),
22983 span: None,
22984 })],
22985 from: Some(crate::expressions::From {
22986 expressions: vec![Expression::Table(source_table)],
22987 }),
22988 limit: Some(crate::expressions::Limit {
22989 this: Expression::Literal(Literal::Number("0".to_string())),
22990 percent: false,
22991 comments: Vec::new(),
22992 }),
22993 ..Default::default()
22994 }));
22995 new_ct.as_select = Some(select);
22996 Ok(Expression::CreateTable(Box::new(new_ct)))
22997 } else {
22998 Ok(Expression::CreateTable(ct))
22999 }
23000 } else {
23001 Ok(e)
23002 }
23003 }
23004
23005 Action::CreateTableLikeToSelectInto => {
23006 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
23007 if let Expression::CreateTable(ct) = e {
23008 let like_source = ct.constraints.iter().find_map(|c| {
23009 if let crate::expressions::TableConstraint::Like { source, .. } = c {
23010 Some(source.clone())
23011 } else {
23012 None
23013 }
23014 });
23015 if let Some(source_table) = like_source {
23016 let mut aliased_source = source_table;
23017 aliased_source.alias = Some(Identifier::new("temp"));
23018 // Build: SELECT TOP 0 * INTO a FROM b AS temp
23019 let select = Expression::Select(Box::new(crate::expressions::Select {
23020 expressions: vec![Expression::Star(crate::expressions::Star {
23021 table: None,
23022 except: None,
23023 replace: None,
23024 rename: None,
23025 trailing_comments: Vec::new(),
23026 span: None,
23027 })],
23028 from: Some(crate::expressions::From {
23029 expressions: vec![Expression::Table(aliased_source)],
23030 }),
23031 into: Some(crate::expressions::SelectInto {
23032 this: Expression::Table(ct.name.clone()),
23033 temporary: false,
23034 unlogged: false,
23035 bulk_collect: false,
23036 expressions: Vec::new(),
23037 }),
23038 top: Some(crate::expressions::Top {
23039 this: Expression::Literal(Literal::Number("0".to_string())),
23040 percent: false,
23041 with_ties: false,
23042 parenthesized: false,
23043 }),
23044 ..Default::default()
23045 }));
23046 Ok(select)
23047 } else {
23048 Ok(Expression::CreateTable(ct))
23049 }
23050 } else {
23051 Ok(e)
23052 }
23053 }
23054
23055 Action::CreateTableLikeToAs => {
23056 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
23057 if let Expression::CreateTable(ct) = e {
23058 let like_source = ct.constraints.iter().find_map(|c| {
23059 if let crate::expressions::TableConstraint::Like { source, .. } = c {
23060 Some(source.clone())
23061 } else {
23062 None
23063 }
23064 });
23065 if let Some(source_table) = like_source {
23066 let mut new_ct = *ct;
23067 new_ct.constraints.clear();
23068 // AS b (just a table reference, not a SELECT)
23069 new_ct.as_select = Some(Expression::Table(source_table));
23070 Ok(Expression::CreateTable(Box::new(new_ct)))
23071 } else {
23072 Ok(Expression::CreateTable(ct))
23073 }
23074 } else {
23075 Ok(e)
23076 }
23077 }
23078
23079 Action::TsOrDsToDateConvert => {
23080 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
23081 if let Expression::Function(f) = e {
23082 let mut args = f.args;
23083 let this = args.remove(0);
23084 let fmt = if !args.is_empty() {
23085 match &args[0] {
23086 Expression::Literal(Literal::String(s)) => Some(s.clone()),
23087 _ => None,
23088 }
23089 } else {
23090 None
23091 };
23092 Ok(Expression::TsOrDsToDate(Box::new(
23093 crate::expressions::TsOrDsToDate {
23094 this: Box::new(this),
23095 format: fmt,
23096 safe: None,
23097 },
23098 )))
23099 } else {
23100 Ok(e)
23101 }
23102 }
23103
23104 Action::TsOrDsToDateStrConvert => {
23105 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
23106 if let Expression::Function(f) = e {
23107 let arg = f.args.into_iter().next().unwrap();
23108 let str_type = match target {
23109 DialectType::DuckDB
23110 | DialectType::PostgreSQL
23111 | DialectType::Materialize => DataType::Text,
23112 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23113 DataType::Custom {
23114 name: "STRING".to_string(),
23115 }
23116 }
23117 DialectType::Presto
23118 | DialectType::Trino
23119 | DialectType::Athena
23120 | DialectType::Drill => DataType::VarChar {
23121 length: None,
23122 parenthesized_length: false,
23123 },
23124 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
23125 DataType::Custom {
23126 name: "STRING".to_string(),
23127 }
23128 }
23129 _ => DataType::VarChar {
23130 length: None,
23131 parenthesized_length: false,
23132 },
23133 };
23134 let cast_expr = Expression::Cast(Box::new(Cast {
23135 this: arg,
23136 to: str_type,
23137 double_colon_syntax: false,
23138 trailing_comments: Vec::new(),
23139 format: None,
23140 default: None,
23141 inferred_type: None,
23142 }));
23143 Ok(Expression::Substring(Box::new(
23144 crate::expressions::SubstringFunc {
23145 this: cast_expr,
23146 start: Expression::number(1),
23147 length: Some(Expression::number(10)),
23148 from_for_syntax: false,
23149 },
23150 )))
23151 } else {
23152 Ok(e)
23153 }
23154 }
23155
23156 Action::DateStrToDateConvert => {
23157 // DATE_STR_TO_DATE(x) -> dialect-specific
23158 if let Expression::Function(f) = e {
23159 let arg = f.args.into_iter().next().unwrap();
23160 match target {
23161 DialectType::SQLite => {
23162 // SQLite: just the bare expression (dates are strings)
23163 Ok(arg)
23164 }
23165 _ => Ok(Expression::Cast(Box::new(Cast {
23166 this: arg,
23167 to: DataType::Date,
23168 double_colon_syntax: false,
23169 trailing_comments: Vec::new(),
23170 format: None,
23171 default: None,
23172 inferred_type: None,
23173 }))),
23174 }
23175 } else {
23176 Ok(e)
23177 }
23178 }
23179
23180 Action::TimeStrToDateConvert => {
23181 // TIME_STR_TO_DATE(x) -> dialect-specific
23182 if let Expression::Function(f) = e {
23183 let arg = f.args.into_iter().next().unwrap();
23184 match target {
23185 DialectType::Hive
23186 | DialectType::Doris
23187 | DialectType::StarRocks
23188 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
23189 Function::new("TO_DATE".to_string(), vec![arg]),
23190 ))),
23191 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23192 // Presto: CAST(x AS TIMESTAMP)
23193 Ok(Expression::Cast(Box::new(Cast {
23194 this: arg,
23195 to: DataType::Timestamp {
23196 timezone: false,
23197 precision: None,
23198 },
23199 double_colon_syntax: false,
23200 trailing_comments: Vec::new(),
23201 format: None,
23202 default: None,
23203 inferred_type: None,
23204 })))
23205 }
23206 _ => {
23207 // Default: CAST(x AS DATE)
23208 Ok(Expression::Cast(Box::new(Cast {
23209 this: arg,
23210 to: DataType::Date,
23211 double_colon_syntax: false,
23212 trailing_comments: Vec::new(),
23213 format: None,
23214 default: None,
23215 inferred_type: None,
23216 })))
23217 }
23218 }
23219 } else {
23220 Ok(e)
23221 }
23222 }
23223
23224 Action::TimeStrToTimeConvert => {
23225 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
23226 if let Expression::Function(f) = e {
23227 let mut args = f.args;
23228 let this = args.remove(0);
23229 let zone = if !args.is_empty() {
23230 match &args[0] {
23231 Expression::Literal(Literal::String(s)) => Some(s.clone()),
23232 _ => None,
23233 }
23234 } else {
23235 None
23236 };
23237 let has_zone = zone.is_some();
23238
23239 match target {
23240 DialectType::SQLite => {
23241 // SQLite: just the bare expression
23242 Ok(this)
23243 }
23244 DialectType::MySQL => {
23245 if has_zone {
23246 // MySQL with zone: TIMESTAMP(x)
23247 Ok(Expression::Function(Box::new(Function::new(
23248 "TIMESTAMP".to_string(),
23249 vec![this],
23250 ))))
23251 } else {
23252 // MySQL: CAST(x AS DATETIME) or with precision
23253 // Use DataType::Custom to avoid MySQL's transform_cast converting
23254 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
23255 let precision =
23256 if let Expression::Literal(Literal::String(ref s)) = this {
23257 if let Some(dot_pos) = s.rfind('.') {
23258 let frac = &s[dot_pos + 1..];
23259 let digit_count = frac
23260 .chars()
23261 .take_while(|c| c.is_ascii_digit())
23262 .count();
23263 if digit_count > 0 {
23264 Some(digit_count)
23265 } else {
23266 None
23267 }
23268 } else {
23269 None
23270 }
23271 } else {
23272 None
23273 };
23274 let type_name = match precision {
23275 Some(p) => format!("DATETIME({})", p),
23276 None => "DATETIME".to_string(),
23277 };
23278 Ok(Expression::Cast(Box::new(Cast {
23279 this,
23280 to: DataType::Custom { name: type_name },
23281 double_colon_syntax: false,
23282 trailing_comments: Vec::new(),
23283 format: None,
23284 default: None,
23285 inferred_type: None,
23286 })))
23287 }
23288 }
23289 DialectType::ClickHouse => {
23290 if has_zone {
23291 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
23292 // We need to strip the timezone offset from the literal if present
23293 let clean_this =
23294 if let Expression::Literal(Literal::String(ref s)) = this {
23295 // Strip timezone offset like "-08:00" or "+00:00"
23296 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
23297 if let Some(offset_pos) = re_offset {
23298 if offset_pos > 10 {
23299 // After the date part
23300 let trimmed = s[..offset_pos].to_string();
23301 Expression::Literal(Literal::String(trimmed))
23302 } else {
23303 this.clone()
23304 }
23305 } else {
23306 this.clone()
23307 }
23308 } else {
23309 this.clone()
23310 };
23311 let zone_str = zone.unwrap();
23312 // Build: CAST(x AS DateTime64(6, 'zone'))
23313 let type_name = format!("DateTime64(6, '{}')", zone_str);
23314 Ok(Expression::Cast(Box::new(Cast {
23315 this: clean_this,
23316 to: DataType::Custom { name: type_name },
23317 double_colon_syntax: false,
23318 trailing_comments: Vec::new(),
23319 format: None,
23320 default: None,
23321 inferred_type: None,
23322 })))
23323 } else {
23324 Ok(Expression::Cast(Box::new(Cast {
23325 this,
23326 to: DataType::Custom {
23327 name: "DateTime64(6)".to_string(),
23328 },
23329 double_colon_syntax: false,
23330 trailing_comments: Vec::new(),
23331 format: None,
23332 default: None,
23333 inferred_type: None,
23334 })))
23335 }
23336 }
23337 DialectType::BigQuery => {
23338 if has_zone {
23339 // BigQuery with zone: CAST(x AS TIMESTAMP)
23340 Ok(Expression::Cast(Box::new(Cast {
23341 this,
23342 to: DataType::Timestamp {
23343 timezone: false,
23344 precision: None,
23345 },
23346 double_colon_syntax: false,
23347 trailing_comments: Vec::new(),
23348 format: None,
23349 default: None,
23350 inferred_type: None,
23351 })))
23352 } else {
23353 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
23354 Ok(Expression::Cast(Box::new(Cast {
23355 this,
23356 to: DataType::Custom {
23357 name: "DATETIME".to_string(),
23358 },
23359 double_colon_syntax: false,
23360 trailing_comments: Vec::new(),
23361 format: None,
23362 default: None,
23363 inferred_type: None,
23364 })))
23365 }
23366 }
23367 DialectType::Doris => {
23368 // Doris: CAST(x AS DATETIME)
23369 Ok(Expression::Cast(Box::new(Cast {
23370 this,
23371 to: DataType::Custom {
23372 name: "DATETIME".to_string(),
23373 },
23374 double_colon_syntax: false,
23375 trailing_comments: Vec::new(),
23376 format: None,
23377 default: None,
23378 inferred_type: None,
23379 })))
23380 }
23381 DialectType::TSQL | DialectType::Fabric => {
23382 if has_zone {
23383 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
23384 let cast_expr = Expression::Cast(Box::new(Cast {
23385 this,
23386 to: DataType::Custom {
23387 name: "DATETIMEOFFSET".to_string(),
23388 },
23389 double_colon_syntax: false,
23390 trailing_comments: Vec::new(),
23391 format: None,
23392 default: None,
23393 inferred_type: None,
23394 }));
23395 Ok(Expression::AtTimeZone(Box::new(
23396 crate::expressions::AtTimeZone {
23397 this: cast_expr,
23398 zone: Expression::Literal(Literal::String(
23399 "UTC".to_string(),
23400 )),
23401 },
23402 )))
23403 } else {
23404 // TSQL: CAST(x AS DATETIME2)
23405 Ok(Expression::Cast(Box::new(Cast {
23406 this,
23407 to: DataType::Custom {
23408 name: "DATETIME2".to_string(),
23409 },
23410 double_colon_syntax: false,
23411 trailing_comments: Vec::new(),
23412 format: None,
23413 default: None,
23414 inferred_type: None,
23415 })))
23416 }
23417 }
23418 DialectType::DuckDB => {
23419 if has_zone {
23420 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
23421 Ok(Expression::Cast(Box::new(Cast {
23422 this,
23423 to: DataType::Timestamp {
23424 timezone: true,
23425 precision: None,
23426 },
23427 double_colon_syntax: false,
23428 trailing_comments: Vec::new(),
23429 format: None,
23430 default: None,
23431 inferred_type: None,
23432 })))
23433 } else {
23434 // DuckDB: CAST(x AS TIMESTAMP)
23435 Ok(Expression::Cast(Box::new(Cast {
23436 this,
23437 to: DataType::Timestamp {
23438 timezone: false,
23439 precision: None,
23440 },
23441 double_colon_syntax: false,
23442 trailing_comments: Vec::new(),
23443 format: None,
23444 default: None,
23445 inferred_type: None,
23446 })))
23447 }
23448 }
23449 DialectType::PostgreSQL
23450 | DialectType::Materialize
23451 | DialectType::RisingWave => {
23452 if has_zone {
23453 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
23454 Ok(Expression::Cast(Box::new(Cast {
23455 this,
23456 to: DataType::Timestamp {
23457 timezone: true,
23458 precision: None,
23459 },
23460 double_colon_syntax: false,
23461 trailing_comments: Vec::new(),
23462 format: None,
23463 default: None,
23464 inferred_type: None,
23465 })))
23466 } else {
23467 // PostgreSQL: CAST(x AS TIMESTAMP)
23468 Ok(Expression::Cast(Box::new(Cast {
23469 this,
23470 to: DataType::Timestamp {
23471 timezone: false,
23472 precision: None,
23473 },
23474 double_colon_syntax: false,
23475 trailing_comments: Vec::new(),
23476 format: None,
23477 default: None,
23478 inferred_type: None,
23479 })))
23480 }
23481 }
23482 DialectType::Snowflake => {
23483 if has_zone {
23484 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
23485 Ok(Expression::Cast(Box::new(Cast {
23486 this,
23487 to: DataType::Timestamp {
23488 timezone: true,
23489 precision: None,
23490 },
23491 double_colon_syntax: false,
23492 trailing_comments: Vec::new(),
23493 format: None,
23494 default: None,
23495 inferred_type: None,
23496 })))
23497 } else {
23498 // Snowflake: CAST(x AS TIMESTAMP)
23499 Ok(Expression::Cast(Box::new(Cast {
23500 this,
23501 to: DataType::Timestamp {
23502 timezone: false,
23503 precision: None,
23504 },
23505 double_colon_syntax: false,
23506 trailing_comments: Vec::new(),
23507 format: None,
23508 default: None,
23509 inferred_type: None,
23510 })))
23511 }
23512 }
23513 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23514 if has_zone {
23515 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23516 // Check for precision from sub-second digits
23517 let precision =
23518 if let Expression::Literal(Literal::String(ref s)) = this {
23519 if let Some(dot_pos) = s.rfind('.') {
23520 let frac = &s[dot_pos + 1..];
23521 let digit_count = frac
23522 .chars()
23523 .take_while(|c| c.is_ascii_digit())
23524 .count();
23525 if digit_count > 0
23526 && matches!(target, DialectType::Trino)
23527 {
23528 Some(digit_count as u32)
23529 } else {
23530 None
23531 }
23532 } else {
23533 None
23534 }
23535 } else {
23536 None
23537 };
23538 let dt = if let Some(prec) = precision {
23539 DataType::Timestamp {
23540 timezone: true,
23541 precision: Some(prec),
23542 }
23543 } else {
23544 DataType::Timestamp {
23545 timezone: true,
23546 precision: None,
23547 }
23548 };
23549 Ok(Expression::Cast(Box::new(Cast {
23550 this,
23551 to: dt,
23552 double_colon_syntax: false,
23553 trailing_comments: Vec::new(),
23554 format: None,
23555 default: None,
23556 inferred_type: None,
23557 })))
23558 } else {
23559 // Check for sub-second precision for Trino
23560 let precision =
23561 if let Expression::Literal(Literal::String(ref s)) = this {
23562 if let Some(dot_pos) = s.rfind('.') {
23563 let frac = &s[dot_pos + 1..];
23564 let digit_count = frac
23565 .chars()
23566 .take_while(|c| c.is_ascii_digit())
23567 .count();
23568 if digit_count > 0
23569 && matches!(target, DialectType::Trino)
23570 {
23571 Some(digit_count as u32)
23572 } else {
23573 None
23574 }
23575 } else {
23576 None
23577 }
23578 } else {
23579 None
23580 };
23581 let dt = DataType::Timestamp {
23582 timezone: false,
23583 precision,
23584 };
23585 Ok(Expression::Cast(Box::new(Cast {
23586 this,
23587 to: dt,
23588 double_colon_syntax: false,
23589 trailing_comments: Vec::new(),
23590 format: None,
23591 default: None,
23592 inferred_type: None,
23593 })))
23594 }
23595 }
23596 DialectType::Redshift => {
23597 if has_zone {
23598 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
23599 Ok(Expression::Cast(Box::new(Cast {
23600 this,
23601 to: DataType::Timestamp {
23602 timezone: true,
23603 precision: None,
23604 },
23605 double_colon_syntax: false,
23606 trailing_comments: Vec::new(),
23607 format: None,
23608 default: None,
23609 inferred_type: None,
23610 })))
23611 } else {
23612 // Redshift: CAST(x AS TIMESTAMP)
23613 Ok(Expression::Cast(Box::new(Cast {
23614 this,
23615 to: DataType::Timestamp {
23616 timezone: false,
23617 precision: None,
23618 },
23619 double_colon_syntax: false,
23620 trailing_comments: Vec::new(),
23621 format: None,
23622 default: None,
23623 inferred_type: None,
23624 })))
23625 }
23626 }
23627 _ => {
23628 // Default: CAST(x AS TIMESTAMP)
23629 Ok(Expression::Cast(Box::new(Cast {
23630 this,
23631 to: DataType::Timestamp {
23632 timezone: false,
23633 precision: None,
23634 },
23635 double_colon_syntax: false,
23636 trailing_comments: Vec::new(),
23637 format: None,
23638 default: None,
23639 inferred_type: None,
23640 })))
23641 }
23642 }
23643 } else {
23644 Ok(e)
23645 }
23646 }
23647
23648 Action::DateToDateStrConvert => {
23649 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
23650 if let Expression::Function(f) = e {
23651 let arg = f.args.into_iter().next().unwrap();
23652 let str_type = match target {
23653 DialectType::DuckDB => DataType::Text,
23654 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23655 DataType::Custom {
23656 name: "STRING".to_string(),
23657 }
23658 }
23659 DialectType::Presto
23660 | DialectType::Trino
23661 | DialectType::Athena
23662 | DialectType::Drill => DataType::VarChar {
23663 length: None,
23664 parenthesized_length: false,
23665 },
23666 _ => DataType::VarChar {
23667 length: None,
23668 parenthesized_length: false,
23669 },
23670 };
23671 Ok(Expression::Cast(Box::new(Cast {
23672 this: arg,
23673 to: str_type,
23674 double_colon_syntax: false,
23675 trailing_comments: Vec::new(),
23676 format: None,
23677 default: None,
23678 inferred_type: None,
23679 })))
23680 } else {
23681 Ok(e)
23682 }
23683 }
23684
23685 Action::DateToDiConvert => {
23686 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
23687 if let Expression::Function(f) = e {
23688 let arg = f.args.into_iter().next().unwrap();
23689 let inner = match target {
23690 DialectType::DuckDB => {
23691 // STRFTIME(x, '%Y%m%d')
23692 Expression::Function(Box::new(Function::new(
23693 "STRFTIME".to_string(),
23694 vec![arg, Expression::string("%Y%m%d")],
23695 )))
23696 }
23697 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23698 // DATE_FORMAT(x, 'yyyyMMdd')
23699 Expression::Function(Box::new(Function::new(
23700 "DATE_FORMAT".to_string(),
23701 vec![arg, Expression::string("yyyyMMdd")],
23702 )))
23703 }
23704 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23705 // DATE_FORMAT(x, '%Y%m%d')
23706 Expression::Function(Box::new(Function::new(
23707 "DATE_FORMAT".to_string(),
23708 vec![arg, Expression::string("%Y%m%d")],
23709 )))
23710 }
23711 DialectType::Drill => {
23712 // TO_DATE(x, 'yyyyMMdd')
23713 Expression::Function(Box::new(Function::new(
23714 "TO_DATE".to_string(),
23715 vec![arg, Expression::string("yyyyMMdd")],
23716 )))
23717 }
23718 _ => {
23719 // Default: STRFTIME(x, '%Y%m%d')
23720 Expression::Function(Box::new(Function::new(
23721 "STRFTIME".to_string(),
23722 vec![arg, Expression::string("%Y%m%d")],
23723 )))
23724 }
23725 };
23726 // Use INT (not INTEGER) for Presto/Trino
23727 let int_type = match target {
23728 DialectType::Presto
23729 | DialectType::Trino
23730 | DialectType::Athena
23731 | DialectType::TSQL
23732 | DialectType::Fabric
23733 | DialectType::SQLite
23734 | DialectType::Redshift => DataType::Custom {
23735 name: "INT".to_string(),
23736 },
23737 _ => DataType::Int {
23738 length: None,
23739 integer_spelling: false,
23740 },
23741 };
23742 Ok(Expression::Cast(Box::new(Cast {
23743 this: inner,
23744 to: int_type,
23745 double_colon_syntax: false,
23746 trailing_comments: Vec::new(),
23747 format: None,
23748 default: None,
23749 inferred_type: None,
23750 })))
23751 } else {
23752 Ok(e)
23753 }
23754 }
23755
23756 Action::DiToDateConvert => {
23757 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
23758 if let Expression::Function(f) = e {
23759 let arg = f.args.into_iter().next().unwrap();
23760 match target {
23761 DialectType::DuckDB => {
23762 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
23763 let cast_text = Expression::Cast(Box::new(Cast {
23764 this: arg,
23765 to: DataType::Text,
23766 double_colon_syntax: false,
23767 trailing_comments: Vec::new(),
23768 format: None,
23769 default: None,
23770 inferred_type: None,
23771 }));
23772 let strptime = Expression::Function(Box::new(Function::new(
23773 "STRPTIME".to_string(),
23774 vec![cast_text, Expression::string("%Y%m%d")],
23775 )));
23776 Ok(Expression::Cast(Box::new(Cast {
23777 this: strptime,
23778 to: DataType::Date,
23779 double_colon_syntax: false,
23780 trailing_comments: Vec::new(),
23781 format: None,
23782 default: None,
23783 inferred_type: None,
23784 })))
23785 }
23786 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23787 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
23788 let cast_str = Expression::Cast(Box::new(Cast {
23789 this: arg,
23790 to: DataType::Custom {
23791 name: "STRING".to_string(),
23792 },
23793 double_colon_syntax: false,
23794 trailing_comments: Vec::new(),
23795 format: None,
23796 default: None,
23797 inferred_type: None,
23798 }));
23799 Ok(Expression::Function(Box::new(Function::new(
23800 "TO_DATE".to_string(),
23801 vec![cast_str, Expression::string("yyyyMMdd")],
23802 ))))
23803 }
23804 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23805 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
23806 let cast_varchar = Expression::Cast(Box::new(Cast {
23807 this: arg,
23808 to: DataType::VarChar {
23809 length: None,
23810 parenthesized_length: false,
23811 },
23812 double_colon_syntax: false,
23813 trailing_comments: Vec::new(),
23814 format: None,
23815 default: None,
23816 inferred_type: None,
23817 }));
23818 let date_parse = Expression::Function(Box::new(Function::new(
23819 "DATE_PARSE".to_string(),
23820 vec![cast_varchar, Expression::string("%Y%m%d")],
23821 )));
23822 Ok(Expression::Cast(Box::new(Cast {
23823 this: date_parse,
23824 to: DataType::Date,
23825 double_colon_syntax: false,
23826 trailing_comments: Vec::new(),
23827 format: None,
23828 default: None,
23829 inferred_type: None,
23830 })))
23831 }
23832 DialectType::Drill => {
23833 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
23834 let cast_varchar = Expression::Cast(Box::new(Cast {
23835 this: arg,
23836 to: DataType::VarChar {
23837 length: None,
23838 parenthesized_length: false,
23839 },
23840 double_colon_syntax: false,
23841 trailing_comments: Vec::new(),
23842 format: None,
23843 default: None,
23844 inferred_type: None,
23845 }));
23846 Ok(Expression::Function(Box::new(Function::new(
23847 "TO_DATE".to_string(),
23848 vec![cast_varchar, Expression::string("yyyyMMdd")],
23849 ))))
23850 }
23851 _ => Ok(Expression::Function(Box::new(Function::new(
23852 "DI_TO_DATE".to_string(),
23853 vec![arg],
23854 )))),
23855 }
23856 } else {
23857 Ok(e)
23858 }
23859 }
23860
23861 Action::TsOrDiToDiConvert => {
23862 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
23863 if let Expression::Function(f) = e {
23864 let arg = f.args.into_iter().next().unwrap();
23865 let str_type = match target {
23866 DialectType::DuckDB => DataType::Text,
23867 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
23868 DataType::Custom {
23869 name: "STRING".to_string(),
23870 }
23871 }
23872 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23873 DataType::VarChar {
23874 length: None,
23875 parenthesized_length: false,
23876 }
23877 }
23878 _ => DataType::VarChar {
23879 length: None,
23880 parenthesized_length: false,
23881 },
23882 };
23883 let cast_str = Expression::Cast(Box::new(Cast {
23884 this: arg,
23885 to: str_type,
23886 double_colon_syntax: false,
23887 trailing_comments: Vec::new(),
23888 format: None,
23889 default: None,
23890 inferred_type: None,
23891 }));
23892 let replace_expr = Expression::Function(Box::new(Function::new(
23893 "REPLACE".to_string(),
23894 vec![cast_str, Expression::string("-"), Expression::string("")],
23895 )));
23896 let substr_name = match target {
23897 DialectType::DuckDB
23898 | DialectType::Hive
23899 | DialectType::Spark
23900 | DialectType::Databricks => "SUBSTR",
23901 _ => "SUBSTR",
23902 };
23903 let substr = Expression::Function(Box::new(Function::new(
23904 substr_name.to_string(),
23905 vec![replace_expr, Expression::number(1), Expression::number(8)],
23906 )));
23907 // Use INT (not INTEGER) for Presto/Trino etc.
23908 let int_type = match target {
23909 DialectType::Presto
23910 | DialectType::Trino
23911 | DialectType::Athena
23912 | DialectType::TSQL
23913 | DialectType::Fabric
23914 | DialectType::SQLite
23915 | DialectType::Redshift => DataType::Custom {
23916 name: "INT".to_string(),
23917 },
23918 _ => DataType::Int {
23919 length: None,
23920 integer_spelling: false,
23921 },
23922 };
23923 Ok(Expression::Cast(Box::new(Cast {
23924 this: substr,
23925 to: int_type,
23926 double_colon_syntax: false,
23927 trailing_comments: Vec::new(),
23928 format: None,
23929 default: None,
23930 inferred_type: None,
23931 })))
23932 } else {
23933 Ok(e)
23934 }
23935 }
23936
23937 Action::UnixToStrConvert => {
23938 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
23939 if let Expression::Function(f) = e {
23940 let mut args = f.args;
23941 let this = args.remove(0);
23942 let fmt_expr = if !args.is_empty() {
23943 Some(args.remove(0))
23944 } else {
23945 None
23946 };
23947
23948 // Check if format is a string literal
23949 let fmt_str = fmt_expr.as_ref().and_then(|f| {
23950 if let Expression::Literal(Literal::String(s)) = f {
23951 Some(s.clone())
23952 } else {
23953 None
23954 }
23955 });
23956
23957 if let Some(fmt_string) = fmt_str {
23958 // String literal format -> use UnixToStr expression (generator handles it)
23959 Ok(Expression::UnixToStr(Box::new(
23960 crate::expressions::UnixToStr {
23961 this: Box::new(this),
23962 format: Some(fmt_string),
23963 },
23964 )))
23965 } else if let Some(fmt_e) = fmt_expr {
23966 // Non-literal format (e.g., identifier `y`) -> build target expression directly
23967 match target {
23968 DialectType::DuckDB => {
23969 // STRFTIME(TO_TIMESTAMP(x), y)
23970 let to_ts = Expression::Function(Box::new(Function::new(
23971 "TO_TIMESTAMP".to_string(),
23972 vec![this],
23973 )));
23974 Ok(Expression::Function(Box::new(Function::new(
23975 "STRFTIME".to_string(),
23976 vec![to_ts, fmt_e],
23977 ))))
23978 }
23979 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23980 // DATE_FORMAT(FROM_UNIXTIME(x), y)
23981 let from_unix = Expression::Function(Box::new(Function::new(
23982 "FROM_UNIXTIME".to_string(),
23983 vec![this],
23984 )));
23985 Ok(Expression::Function(Box::new(Function::new(
23986 "DATE_FORMAT".to_string(),
23987 vec![from_unix, fmt_e],
23988 ))))
23989 }
23990 DialectType::Hive
23991 | DialectType::Spark
23992 | DialectType::Databricks
23993 | DialectType::Doris
23994 | DialectType::StarRocks => {
23995 // FROM_UNIXTIME(x, y)
23996 Ok(Expression::Function(Box::new(Function::new(
23997 "FROM_UNIXTIME".to_string(),
23998 vec![this, fmt_e],
23999 ))))
24000 }
24001 _ => {
24002 // Default: keep as UNIX_TO_STR(x, y)
24003 Ok(Expression::Function(Box::new(Function::new(
24004 "UNIX_TO_STR".to_string(),
24005 vec![this, fmt_e],
24006 ))))
24007 }
24008 }
24009 } else {
24010 Ok(Expression::UnixToStr(Box::new(
24011 crate::expressions::UnixToStr {
24012 this: Box::new(this),
24013 format: None,
24014 },
24015 )))
24016 }
24017 } else {
24018 Ok(e)
24019 }
24020 }
24021
24022 Action::UnixToTimeConvert => {
24023 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
24024 if let Expression::Function(f) = e {
24025 let arg = f.args.into_iter().next().unwrap();
24026 Ok(Expression::UnixToTime(Box::new(
24027 crate::expressions::UnixToTime {
24028 this: Box::new(arg),
24029 scale: None,
24030 zone: None,
24031 hours: None,
24032 minutes: None,
24033 format: None,
24034 target_type: None,
24035 },
24036 )))
24037 } else {
24038 Ok(e)
24039 }
24040 }
24041
24042 Action::UnixToTimeStrConvert => {
24043 // UNIX_TO_TIME_STR(x) -> dialect-specific
24044 if let Expression::Function(f) = e {
24045 let arg = f.args.into_iter().next().unwrap();
24046 match target {
24047 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24048 // FROM_UNIXTIME(x)
24049 Ok(Expression::Function(Box::new(Function::new(
24050 "FROM_UNIXTIME".to_string(),
24051 vec![arg],
24052 ))))
24053 }
24054 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24055 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
24056 let from_unix = Expression::Function(Box::new(Function::new(
24057 "FROM_UNIXTIME".to_string(),
24058 vec![arg],
24059 )));
24060 Ok(Expression::Cast(Box::new(Cast {
24061 this: from_unix,
24062 to: DataType::VarChar {
24063 length: None,
24064 parenthesized_length: false,
24065 },
24066 double_colon_syntax: false,
24067 trailing_comments: Vec::new(),
24068 format: None,
24069 default: None,
24070 inferred_type: None,
24071 })))
24072 }
24073 DialectType::DuckDB => {
24074 // CAST(TO_TIMESTAMP(x) AS TEXT)
24075 let to_ts = Expression::Function(Box::new(Function::new(
24076 "TO_TIMESTAMP".to_string(),
24077 vec![arg],
24078 )));
24079 Ok(Expression::Cast(Box::new(Cast {
24080 this: to_ts,
24081 to: DataType::Text,
24082 double_colon_syntax: false,
24083 trailing_comments: Vec::new(),
24084 format: None,
24085 default: None,
24086 inferred_type: None,
24087 })))
24088 }
24089 _ => Ok(Expression::Function(Box::new(Function::new(
24090 "UNIX_TO_TIME_STR".to_string(),
24091 vec![arg],
24092 )))),
24093 }
24094 } else {
24095 Ok(e)
24096 }
24097 }
24098
24099 Action::TimeToUnixConvert => {
24100 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
24101 if let Expression::Function(f) = e {
24102 let arg = f.args.into_iter().next().unwrap();
24103 Ok(Expression::TimeToUnix(Box::new(
24104 crate::expressions::UnaryFunc {
24105 this: arg,
24106 original_name: None,
24107 inferred_type: None,
24108 },
24109 )))
24110 } else {
24111 Ok(e)
24112 }
24113 }
24114
24115 Action::TimeToStrConvert => {
24116 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
24117 if let Expression::Function(f) = e {
24118 let mut args = f.args;
24119 let this = args.remove(0);
24120 let fmt = match args.remove(0) {
24121 Expression::Literal(Literal::String(s)) => s,
24122 other => {
24123 return Ok(Expression::Function(Box::new(Function::new(
24124 "TIME_TO_STR".to_string(),
24125 vec![this, other],
24126 ))));
24127 }
24128 };
24129 Ok(Expression::TimeToStr(Box::new(
24130 crate::expressions::TimeToStr {
24131 this: Box::new(this),
24132 format: fmt,
24133 culture: None,
24134 zone: None,
24135 },
24136 )))
24137 } else {
24138 Ok(e)
24139 }
24140 }
24141
24142 Action::StrToUnixConvert => {
24143 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
24144 if let Expression::Function(f) = e {
24145 let mut args = f.args;
24146 let this = args.remove(0);
24147 let fmt = match args.remove(0) {
24148 Expression::Literal(Literal::String(s)) => s,
24149 other => {
24150 return Ok(Expression::Function(Box::new(Function::new(
24151 "STR_TO_UNIX".to_string(),
24152 vec![this, other],
24153 ))));
24154 }
24155 };
24156 Ok(Expression::StrToUnix(Box::new(
24157 crate::expressions::StrToUnix {
24158 this: Some(Box::new(this)),
24159 format: Some(fmt),
24160 },
24161 )))
24162 } else {
24163 Ok(e)
24164 }
24165 }
24166
24167 Action::TimeStrToUnixConvert => {
24168 // TIME_STR_TO_UNIX(x) -> dialect-specific
24169 if let Expression::Function(f) = e {
24170 let arg = f.args.into_iter().next().unwrap();
24171 match target {
24172 DialectType::DuckDB => {
24173 // EPOCH(CAST(x AS TIMESTAMP))
24174 let cast_ts = Expression::Cast(Box::new(Cast {
24175 this: arg,
24176 to: DataType::Timestamp {
24177 timezone: false,
24178 precision: None,
24179 },
24180 double_colon_syntax: false,
24181 trailing_comments: Vec::new(),
24182 format: None,
24183 default: None,
24184 inferred_type: None,
24185 }));
24186 Ok(Expression::Function(Box::new(Function::new(
24187 "EPOCH".to_string(),
24188 vec![cast_ts],
24189 ))))
24190 }
24191 DialectType::Hive
24192 | DialectType::Doris
24193 | DialectType::StarRocks
24194 | DialectType::MySQL => {
24195 // UNIX_TIMESTAMP(x)
24196 Ok(Expression::Function(Box::new(Function::new(
24197 "UNIX_TIMESTAMP".to_string(),
24198 vec![arg],
24199 ))))
24200 }
24201 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24202 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
24203 let date_parse = Expression::Function(Box::new(Function::new(
24204 "DATE_PARSE".to_string(),
24205 vec![arg, Expression::string("%Y-%m-%d %T")],
24206 )));
24207 Ok(Expression::Function(Box::new(Function::new(
24208 "TO_UNIXTIME".to_string(),
24209 vec![date_parse],
24210 ))))
24211 }
24212 _ => Ok(Expression::Function(Box::new(Function::new(
24213 "TIME_STR_TO_UNIX".to_string(),
24214 vec![arg],
24215 )))),
24216 }
24217 } else {
24218 Ok(e)
24219 }
24220 }
24221
24222 Action::TimeToTimeStrConvert => {
24223 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
24224 if let Expression::Function(f) = e {
24225 let arg = f.args.into_iter().next().unwrap();
24226 let str_type = match target {
24227 DialectType::DuckDB => DataType::Text,
24228 DialectType::Hive
24229 | DialectType::Spark
24230 | DialectType::Databricks
24231 | DialectType::Doris
24232 | DialectType::StarRocks => DataType::Custom {
24233 name: "STRING".to_string(),
24234 },
24235 DialectType::Redshift => DataType::Custom {
24236 name: "VARCHAR(MAX)".to_string(),
24237 },
24238 _ => DataType::VarChar {
24239 length: None,
24240 parenthesized_length: false,
24241 },
24242 };
24243 Ok(Expression::Cast(Box::new(Cast {
24244 this: arg,
24245 to: str_type,
24246 double_colon_syntax: false,
24247 trailing_comments: Vec::new(),
24248 format: None,
24249 default: None,
24250 inferred_type: None,
24251 })))
24252 } else {
24253 Ok(e)
24254 }
24255 }
24256
24257 Action::DateTruncSwapArgs => {
24258 // DATE_TRUNC('unit', x) from Generic -> target-specific
24259 if let Expression::Function(f) = e {
24260 if f.args.len() == 2 {
24261 let unit_arg = f.args[0].clone();
24262 let expr_arg = f.args[1].clone();
24263 // Extract unit string from the first arg
24264 let unit_str = match &unit_arg {
24265 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24266 _ => return Ok(Expression::Function(f)),
24267 };
24268 match target {
24269 DialectType::BigQuery => {
24270 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
24271 let unit_ident =
24272 Expression::Column(crate::expressions::Column {
24273 name: crate::expressions::Identifier::new(unit_str),
24274 table: None,
24275 join_mark: false,
24276 trailing_comments: Vec::new(),
24277 span: None,
24278 inferred_type: None,
24279 });
24280 Ok(Expression::Function(Box::new(Function::new(
24281 "DATE_TRUNC".to_string(),
24282 vec![expr_arg, unit_ident],
24283 ))))
24284 }
24285 DialectType::Doris => {
24286 // Doris: DATE_TRUNC(x, 'UNIT')
24287 Ok(Expression::Function(Box::new(Function::new(
24288 "DATE_TRUNC".to_string(),
24289 vec![expr_arg, Expression::string(&unit_str)],
24290 ))))
24291 }
24292 DialectType::StarRocks => {
24293 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
24294 Ok(Expression::Function(Box::new(Function::new(
24295 "DATE_TRUNC".to_string(),
24296 vec![Expression::string(&unit_str), expr_arg],
24297 ))))
24298 }
24299 DialectType::Spark | DialectType::Databricks => {
24300 // Spark: TRUNC(x, 'UNIT')
24301 Ok(Expression::Function(Box::new(Function::new(
24302 "TRUNC".to_string(),
24303 vec![expr_arg, Expression::string(&unit_str)],
24304 ))))
24305 }
24306 DialectType::MySQL => {
24307 // MySQL: complex expansion based on unit
24308 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
24309 }
24310 _ => Ok(Expression::Function(f)),
24311 }
24312 } else {
24313 Ok(Expression::Function(f))
24314 }
24315 } else {
24316 Ok(e)
24317 }
24318 }
24319
24320 Action::TimestampTruncConvert => {
24321 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
24322 if let Expression::Function(f) = e {
24323 if f.args.len() >= 2 {
24324 let expr_arg = f.args[0].clone();
24325 let unit_arg = f.args[1].clone();
24326 let tz_arg = if f.args.len() >= 3 {
24327 Some(f.args[2].clone())
24328 } else {
24329 None
24330 };
24331 // Extract unit string
24332 let unit_str = match &unit_arg {
24333 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24334 Expression::Column(c) => c.name.name.to_uppercase(),
24335 _ => {
24336 return Ok(Expression::Function(f));
24337 }
24338 };
24339 match target {
24340 DialectType::Spark | DialectType::Databricks => {
24341 // Spark: DATE_TRUNC('UNIT', x)
24342 Ok(Expression::Function(Box::new(Function::new(
24343 "DATE_TRUNC".to_string(),
24344 vec![Expression::string(&unit_str), expr_arg],
24345 ))))
24346 }
24347 DialectType::Doris | DialectType::StarRocks => {
24348 // Doris: DATE_TRUNC(x, 'UNIT')
24349 Ok(Expression::Function(Box::new(Function::new(
24350 "DATE_TRUNC".to_string(),
24351 vec![expr_arg, Expression::string(&unit_str)],
24352 ))))
24353 }
24354 DialectType::BigQuery => {
24355 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
24356 let unit_ident =
24357 Expression::Column(crate::expressions::Column {
24358 name: crate::expressions::Identifier::new(unit_str),
24359 table: None,
24360 join_mark: false,
24361 trailing_comments: Vec::new(),
24362 span: None,
24363 inferred_type: None,
24364 });
24365 let mut args = vec![expr_arg, unit_ident];
24366 if let Some(tz) = tz_arg {
24367 args.push(tz);
24368 }
24369 Ok(Expression::Function(Box::new(Function::new(
24370 "TIMESTAMP_TRUNC".to_string(),
24371 args,
24372 ))))
24373 }
24374 DialectType::DuckDB => {
24375 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
24376 if let Some(tz) = tz_arg {
24377 let tz_str = match &tz {
24378 Expression::Literal(Literal::String(s)) => s.clone(),
24379 _ => "UTC".to_string(),
24380 };
24381 // x AT TIME ZONE 'tz'
24382 let at_tz = Expression::AtTimeZone(Box::new(
24383 crate::expressions::AtTimeZone {
24384 this: expr_arg,
24385 zone: Expression::string(&tz_str),
24386 },
24387 ));
24388 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
24389 let trunc = Expression::Function(Box::new(Function::new(
24390 "DATE_TRUNC".to_string(),
24391 vec![Expression::string(&unit_str), at_tz],
24392 )));
24393 // DATE_TRUNC(...) AT TIME ZONE 'tz'
24394 Ok(Expression::AtTimeZone(Box::new(
24395 crate::expressions::AtTimeZone {
24396 this: trunc,
24397 zone: Expression::string(&tz_str),
24398 },
24399 )))
24400 } else {
24401 Ok(Expression::Function(Box::new(Function::new(
24402 "DATE_TRUNC".to_string(),
24403 vec![Expression::string(&unit_str), expr_arg],
24404 ))))
24405 }
24406 }
24407 DialectType::Presto
24408 | DialectType::Trino
24409 | DialectType::Athena
24410 | DialectType::Snowflake => {
24411 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
24412 Ok(Expression::Function(Box::new(Function::new(
24413 "DATE_TRUNC".to_string(),
24414 vec![Expression::string(&unit_str), expr_arg],
24415 ))))
24416 }
24417 _ => {
24418 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
24419 let mut args = vec![Expression::string(&unit_str), expr_arg];
24420 if let Some(tz) = tz_arg {
24421 args.push(tz);
24422 }
24423 Ok(Expression::Function(Box::new(Function::new(
24424 "DATE_TRUNC".to_string(),
24425 args,
24426 ))))
24427 }
24428 }
24429 } else {
24430 Ok(Expression::Function(f))
24431 }
24432 } else {
24433 Ok(e)
24434 }
24435 }
24436
24437 Action::StrToDateConvert => {
24438 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
24439 if let Expression::Function(f) = e {
24440 if f.args.len() == 2 {
24441 let mut args = f.args;
24442 let this = args.remove(0);
24443 let fmt_expr = args.remove(0);
24444 let fmt_str = match &fmt_expr {
24445 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24446 _ => None,
24447 };
24448 let default_date = "%Y-%m-%d";
24449 let default_time = "%Y-%m-%d %H:%M:%S";
24450 let is_default = fmt_str
24451 .as_ref()
24452 .map_or(false, |f| f == default_date || f == default_time);
24453
24454 if is_default {
24455 // Default format: handle per-dialect
24456 match target {
24457 DialectType::MySQL
24458 | DialectType::Doris
24459 | DialectType::StarRocks => {
24460 // Keep STR_TO_DATE(x, fmt) as-is
24461 Ok(Expression::Function(Box::new(Function::new(
24462 "STR_TO_DATE".to_string(),
24463 vec![this, fmt_expr],
24464 ))))
24465 }
24466 DialectType::Hive => {
24467 // Hive: CAST(x AS DATE)
24468 Ok(Expression::Cast(Box::new(Cast {
24469 this,
24470 to: DataType::Date,
24471 double_colon_syntax: false,
24472 trailing_comments: Vec::new(),
24473 format: None,
24474 default: None,
24475 inferred_type: None,
24476 })))
24477 }
24478 DialectType::Presto
24479 | DialectType::Trino
24480 | DialectType::Athena => {
24481 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
24482 let date_parse =
24483 Expression::Function(Box::new(Function::new(
24484 "DATE_PARSE".to_string(),
24485 vec![this, fmt_expr],
24486 )));
24487 Ok(Expression::Cast(Box::new(Cast {
24488 this: date_parse,
24489 to: DataType::Date,
24490 double_colon_syntax: false,
24491 trailing_comments: Vec::new(),
24492 format: None,
24493 default: None,
24494 inferred_type: None,
24495 })))
24496 }
24497 _ => {
24498 // Others: TsOrDsToDate (delegates to generator)
24499 Ok(Expression::TsOrDsToDate(Box::new(
24500 crate::expressions::TsOrDsToDate {
24501 this: Box::new(this),
24502 format: None,
24503 safe: None,
24504 },
24505 )))
24506 }
24507 }
24508 } else if let Some(fmt) = fmt_str {
24509 match target {
24510 DialectType::Doris
24511 | DialectType::StarRocks
24512 | DialectType::MySQL => {
24513 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
24514 let mut normalized = fmt.clone();
24515 normalized = normalized.replace("%-d", "%e");
24516 normalized = normalized.replace("%-m", "%c");
24517 normalized = normalized.replace("%H:%M:%S", "%T");
24518 Ok(Expression::Function(Box::new(Function::new(
24519 "STR_TO_DATE".to_string(),
24520 vec![this, Expression::string(&normalized)],
24521 ))))
24522 }
24523 DialectType::Hive => {
24524 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
24525 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24526 let unix_ts =
24527 Expression::Function(Box::new(Function::new(
24528 "UNIX_TIMESTAMP".to_string(),
24529 vec![this, Expression::string(&java_fmt)],
24530 )));
24531 let from_unix =
24532 Expression::Function(Box::new(Function::new(
24533 "FROM_UNIXTIME".to_string(),
24534 vec![unix_ts],
24535 )));
24536 Ok(Expression::Cast(Box::new(Cast {
24537 this: from_unix,
24538 to: DataType::Date,
24539 double_colon_syntax: false,
24540 trailing_comments: Vec::new(),
24541 format: None,
24542 default: None,
24543 inferred_type: None,
24544 })))
24545 }
24546 DialectType::Spark | DialectType::Databricks => {
24547 // Spark: TO_DATE(x, java_fmt)
24548 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24549 Ok(Expression::Function(Box::new(Function::new(
24550 "TO_DATE".to_string(),
24551 vec![this, Expression::string(&java_fmt)],
24552 ))))
24553 }
24554 DialectType::Drill => {
24555 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
24556 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
24557 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
24558 let java_fmt = java_fmt.replace('T', "'T'");
24559 Ok(Expression::Function(Box::new(Function::new(
24560 "TO_DATE".to_string(),
24561 vec![this, Expression::string(&java_fmt)],
24562 ))))
24563 }
24564 _ => {
24565 // For other dialects: use TsOrDsToDate which delegates to generator
24566 Ok(Expression::TsOrDsToDate(Box::new(
24567 crate::expressions::TsOrDsToDate {
24568 this: Box::new(this),
24569 format: Some(fmt),
24570 safe: None,
24571 },
24572 )))
24573 }
24574 }
24575 } else {
24576 // Non-string format - keep as-is
24577 let mut new_args = Vec::new();
24578 new_args.push(this);
24579 new_args.push(fmt_expr);
24580 Ok(Expression::Function(Box::new(Function::new(
24581 "STR_TO_DATE".to_string(),
24582 new_args,
24583 ))))
24584 }
24585 } else {
24586 Ok(Expression::Function(f))
24587 }
24588 } else {
24589 Ok(e)
24590 }
24591 }
24592
24593 Action::TsOrDsAddConvert => {
24594 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
24595 if let Expression::Function(f) = e {
24596 if f.args.len() == 3 {
24597 let mut args = f.args;
24598 let x = args.remove(0);
24599 let n = args.remove(0);
24600 let unit_expr = args.remove(0);
24601 let unit_str = match &unit_expr {
24602 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
24603 _ => "DAY".to_string(),
24604 };
24605
24606 match target {
24607 DialectType::Hive
24608 | DialectType::Spark
24609 | DialectType::Databricks => {
24610 // DATE_ADD(x, n) - only supports DAY unit
24611 Ok(Expression::Function(Box::new(Function::new(
24612 "DATE_ADD".to_string(),
24613 vec![x, n],
24614 ))))
24615 }
24616 DialectType::MySQL => {
24617 // DATE_ADD(x, INTERVAL n UNIT)
24618 let iu = match unit_str.to_uppercase().as_str() {
24619 "YEAR" => crate::expressions::IntervalUnit::Year,
24620 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24621 "MONTH" => crate::expressions::IntervalUnit::Month,
24622 "WEEK" => crate::expressions::IntervalUnit::Week,
24623 "HOUR" => crate::expressions::IntervalUnit::Hour,
24624 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24625 "SECOND" => crate::expressions::IntervalUnit::Second,
24626 _ => crate::expressions::IntervalUnit::Day,
24627 };
24628 let interval = Expression::Interval(Box::new(
24629 crate::expressions::Interval {
24630 this: Some(n),
24631 unit: Some(
24632 crate::expressions::IntervalUnitSpec::Simple {
24633 unit: iu,
24634 use_plural: false,
24635 },
24636 ),
24637 },
24638 ));
24639 Ok(Expression::Function(Box::new(Function::new(
24640 "DATE_ADD".to_string(),
24641 vec![x, interval],
24642 ))))
24643 }
24644 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24645 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
24646 let cast_ts = Expression::Cast(Box::new(Cast {
24647 this: x,
24648 to: DataType::Timestamp {
24649 precision: None,
24650 timezone: false,
24651 },
24652 double_colon_syntax: false,
24653 trailing_comments: Vec::new(),
24654 format: None,
24655 default: None,
24656 inferred_type: None,
24657 }));
24658 let cast_date = Expression::Cast(Box::new(Cast {
24659 this: cast_ts,
24660 to: DataType::Date,
24661 double_colon_syntax: false,
24662 trailing_comments: Vec::new(),
24663 format: None,
24664 default: None,
24665 inferred_type: None,
24666 }));
24667 Ok(Expression::Function(Box::new(Function::new(
24668 "DATE_ADD".to_string(),
24669 vec![Expression::string(&unit_str), n, cast_date],
24670 ))))
24671 }
24672 DialectType::DuckDB => {
24673 // CAST(x AS DATE) + INTERVAL n UNIT
24674 let cast_date = Expression::Cast(Box::new(Cast {
24675 this: x,
24676 to: DataType::Date,
24677 double_colon_syntax: false,
24678 trailing_comments: Vec::new(),
24679 format: None,
24680 default: None,
24681 inferred_type: None,
24682 }));
24683 let iu = match unit_str.to_uppercase().as_str() {
24684 "YEAR" => crate::expressions::IntervalUnit::Year,
24685 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24686 "MONTH" => crate::expressions::IntervalUnit::Month,
24687 "WEEK" => crate::expressions::IntervalUnit::Week,
24688 "HOUR" => crate::expressions::IntervalUnit::Hour,
24689 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24690 "SECOND" => crate::expressions::IntervalUnit::Second,
24691 _ => crate::expressions::IntervalUnit::Day,
24692 };
24693 let interval = Expression::Interval(Box::new(
24694 crate::expressions::Interval {
24695 this: Some(n),
24696 unit: Some(
24697 crate::expressions::IntervalUnitSpec::Simple {
24698 unit: iu,
24699 use_plural: false,
24700 },
24701 ),
24702 },
24703 ));
24704 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
24705 left: cast_date,
24706 right: interval,
24707 left_comments: Vec::new(),
24708 operator_comments: Vec::new(),
24709 trailing_comments: Vec::new(),
24710 inferred_type: None,
24711 })))
24712 }
24713 DialectType::Drill => {
24714 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
24715 let cast_date = Expression::Cast(Box::new(Cast {
24716 this: x,
24717 to: DataType::Date,
24718 double_colon_syntax: false,
24719 trailing_comments: Vec::new(),
24720 format: None,
24721 default: None,
24722 inferred_type: None,
24723 }));
24724 let iu = match unit_str.to_uppercase().as_str() {
24725 "YEAR" => crate::expressions::IntervalUnit::Year,
24726 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
24727 "MONTH" => crate::expressions::IntervalUnit::Month,
24728 "WEEK" => crate::expressions::IntervalUnit::Week,
24729 "HOUR" => crate::expressions::IntervalUnit::Hour,
24730 "MINUTE" => crate::expressions::IntervalUnit::Minute,
24731 "SECOND" => crate::expressions::IntervalUnit::Second,
24732 _ => crate::expressions::IntervalUnit::Day,
24733 };
24734 let interval = Expression::Interval(Box::new(
24735 crate::expressions::Interval {
24736 this: Some(n),
24737 unit: Some(
24738 crate::expressions::IntervalUnitSpec::Simple {
24739 unit: iu,
24740 use_plural: false,
24741 },
24742 ),
24743 },
24744 ));
24745 Ok(Expression::Function(Box::new(Function::new(
24746 "DATE_ADD".to_string(),
24747 vec![cast_date, interval],
24748 ))))
24749 }
24750 _ => {
24751 // Default: keep as TS_OR_DS_ADD
24752 Ok(Expression::Function(Box::new(Function::new(
24753 "TS_OR_DS_ADD".to_string(),
24754 vec![x, n, unit_expr],
24755 ))))
24756 }
24757 }
24758 } else {
24759 Ok(Expression::Function(f))
24760 }
24761 } else {
24762 Ok(e)
24763 }
24764 }
24765
24766 Action::DateFromUnixDateConvert => {
24767 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24768 if let Expression::Function(f) = e {
24769 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
24770 if matches!(
24771 target,
24772 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
24773 ) {
24774 return Ok(Expression::Function(Box::new(Function::new(
24775 "DATE_FROM_UNIX_DATE".to_string(),
24776 f.args,
24777 ))));
24778 }
24779 let n = f.args.into_iter().next().unwrap();
24780 let epoch_date = Expression::Cast(Box::new(Cast {
24781 this: Expression::string("1970-01-01"),
24782 to: DataType::Date,
24783 double_colon_syntax: false,
24784 trailing_comments: Vec::new(),
24785 format: None,
24786 default: None,
24787 inferred_type: None,
24788 }));
24789 match target {
24790 DialectType::DuckDB => {
24791 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
24792 let interval =
24793 Expression::Interval(Box::new(crate::expressions::Interval {
24794 this: Some(n),
24795 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24796 unit: crate::expressions::IntervalUnit::Day,
24797 use_plural: false,
24798 }),
24799 }));
24800 Ok(Expression::Add(Box::new(
24801 crate::expressions::BinaryOp::new(epoch_date, interval),
24802 )))
24803 }
24804 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24805 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
24806 Ok(Expression::Function(Box::new(Function::new(
24807 "DATE_ADD".to_string(),
24808 vec![Expression::string("DAY"), n, epoch_date],
24809 ))))
24810 }
24811 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
24812 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
24813 Ok(Expression::Function(Box::new(Function::new(
24814 "DATEADD".to_string(),
24815 vec![
24816 Expression::Identifier(Identifier::new("DAY")),
24817 n,
24818 epoch_date,
24819 ],
24820 ))))
24821 }
24822 DialectType::BigQuery => {
24823 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24824 let interval =
24825 Expression::Interval(Box::new(crate::expressions::Interval {
24826 this: Some(n),
24827 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24828 unit: crate::expressions::IntervalUnit::Day,
24829 use_plural: false,
24830 }),
24831 }));
24832 Ok(Expression::Function(Box::new(Function::new(
24833 "DATE_ADD".to_string(),
24834 vec![epoch_date, interval],
24835 ))))
24836 }
24837 DialectType::MySQL
24838 | DialectType::Doris
24839 | DialectType::StarRocks
24840 | DialectType::Drill => {
24841 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
24842 let interval =
24843 Expression::Interval(Box::new(crate::expressions::Interval {
24844 this: Some(n),
24845 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24846 unit: crate::expressions::IntervalUnit::Day,
24847 use_plural: false,
24848 }),
24849 }));
24850 Ok(Expression::Function(Box::new(Function::new(
24851 "DATE_ADD".to_string(),
24852 vec![epoch_date, interval],
24853 ))))
24854 }
24855 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24856 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
24857 Ok(Expression::Function(Box::new(Function::new(
24858 "DATE_ADD".to_string(),
24859 vec![epoch_date, n],
24860 ))))
24861 }
24862 DialectType::PostgreSQL
24863 | DialectType::Materialize
24864 | DialectType::RisingWave => {
24865 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
24866 let n_str = match &n {
24867 Expression::Literal(Literal::Number(s)) => s.clone(),
24868 _ => Self::expr_to_string_static(&n),
24869 };
24870 let interval =
24871 Expression::Interval(Box::new(crate::expressions::Interval {
24872 this: Some(Expression::string(&format!("{} DAY", n_str))),
24873 unit: None,
24874 }));
24875 Ok(Expression::Add(Box::new(
24876 crate::expressions::BinaryOp::new(epoch_date, interval),
24877 )))
24878 }
24879 _ => {
24880 // Default: keep as-is
24881 Ok(Expression::Function(Box::new(Function::new(
24882 "DATE_FROM_UNIX_DATE".to_string(),
24883 vec![n],
24884 ))))
24885 }
24886 }
24887 } else {
24888 Ok(e)
24889 }
24890 }
24891
24892 Action::ArrayRemoveConvert => {
24893 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
24894 if let Expression::ArrayRemove(bf) = e {
24895 let arr = bf.this;
24896 let target_val = bf.expression;
24897 match target {
24898 DialectType::DuckDB => {
24899 let u_id = crate::expressions::Identifier::new("_u");
24900 let lambda =
24901 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24902 parameters: vec![u_id.clone()],
24903 body: Expression::Neq(Box::new(BinaryOp {
24904 left: Expression::Identifier(u_id),
24905 right: target_val,
24906 left_comments: Vec::new(),
24907 operator_comments: Vec::new(),
24908 trailing_comments: Vec::new(),
24909 inferred_type: None,
24910 })),
24911 colon: false,
24912 parameter_types: Vec::new(),
24913 }));
24914 Ok(Expression::Function(Box::new(Function::new(
24915 "LIST_FILTER".to_string(),
24916 vec![arr, lambda],
24917 ))))
24918 }
24919 DialectType::ClickHouse => {
24920 let u_id = crate::expressions::Identifier::new("_u");
24921 let lambda =
24922 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24923 parameters: vec![u_id.clone()],
24924 body: Expression::Neq(Box::new(BinaryOp {
24925 left: Expression::Identifier(u_id),
24926 right: target_val,
24927 left_comments: Vec::new(),
24928 operator_comments: Vec::new(),
24929 trailing_comments: Vec::new(),
24930 inferred_type: None,
24931 })),
24932 colon: false,
24933 parameter_types: Vec::new(),
24934 }));
24935 Ok(Expression::Function(Box::new(Function::new(
24936 "arrayFilter".to_string(),
24937 vec![lambda, arr],
24938 ))))
24939 }
24940 DialectType::BigQuery => {
24941 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
24942 let u_id = crate::expressions::Identifier::new("_u");
24943 let u_col = Expression::Column(crate::expressions::Column {
24944 name: u_id.clone(),
24945 table: None,
24946 join_mark: false,
24947 trailing_comments: Vec::new(),
24948 span: None,
24949 inferred_type: None,
24950 });
24951 let unnest_expr =
24952 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
24953 this: arr,
24954 expressions: Vec::new(),
24955 with_ordinality: false,
24956 alias: None,
24957 offset_alias: None,
24958 }));
24959 let aliased_unnest =
24960 Expression::Alias(Box::new(crate::expressions::Alias {
24961 this: unnest_expr,
24962 alias: u_id.clone(),
24963 column_aliases: Vec::new(),
24964 pre_alias_comments: Vec::new(),
24965 trailing_comments: Vec::new(),
24966 inferred_type: None,
24967 }));
24968 let where_cond = Expression::Neq(Box::new(BinaryOp {
24969 left: u_col.clone(),
24970 right: target_val,
24971 left_comments: Vec::new(),
24972 operator_comments: Vec::new(),
24973 trailing_comments: Vec::new(),
24974 inferred_type: None,
24975 }));
24976 let subquery = Expression::Select(Box::new(
24977 crate::expressions::Select::new()
24978 .column(u_col)
24979 .from(aliased_unnest)
24980 .where_(where_cond),
24981 ));
24982 Ok(Expression::ArrayFunc(Box::new(
24983 crate::expressions::ArrayConstructor {
24984 expressions: vec![subquery],
24985 bracket_notation: false,
24986 use_list_keyword: false,
24987 },
24988 )))
24989 }
24990 _ => Ok(Expression::ArrayRemove(Box::new(
24991 crate::expressions::BinaryFunc {
24992 original_name: None,
24993 this: arr,
24994 expression: target_val,
24995 inferred_type: None,
24996 },
24997 ))),
24998 }
24999 } else {
25000 Ok(e)
25001 }
25002 }
25003
25004 Action::ArrayReverseConvert => {
25005 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
25006 if let Expression::ArrayReverse(af) = e {
25007 Ok(Expression::Function(Box::new(Function::new(
25008 "arrayReverse".to_string(),
25009 vec![af.this],
25010 ))))
25011 } else {
25012 Ok(e)
25013 }
25014 }
25015
25016 Action::JsonKeysConvert => {
25017 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
25018 if let Expression::JsonKeys(uf) = e {
25019 match target {
25020 DialectType::Spark | DialectType::Databricks => {
25021 Ok(Expression::Function(Box::new(Function::new(
25022 "JSON_OBJECT_KEYS".to_string(),
25023 vec![uf.this],
25024 ))))
25025 }
25026 DialectType::Snowflake => Ok(Expression::Function(Box::new(
25027 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
25028 ))),
25029 _ => Ok(Expression::JsonKeys(uf)),
25030 }
25031 } else {
25032 Ok(e)
25033 }
25034 }
25035
25036 Action::ParseJsonStrip => {
25037 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
25038 if let Expression::ParseJson(uf) = e {
25039 Ok(uf.this)
25040 } else {
25041 Ok(e)
25042 }
25043 }
25044
25045 Action::ArraySizeDrill => {
25046 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
25047 if let Expression::ArraySize(uf) = e {
25048 Ok(Expression::Function(Box::new(Function::new(
25049 "REPEATED_COUNT".to_string(),
25050 vec![uf.this],
25051 ))))
25052 } else {
25053 Ok(e)
25054 }
25055 }
25056
25057 Action::WeekOfYearToWeekIso => {
25058 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
25059 if let Expression::WeekOfYear(uf) = e {
25060 Ok(Expression::Function(Box::new(Function::new(
25061 "WEEKISO".to_string(),
25062 vec![uf.this],
25063 ))))
25064 } else {
25065 Ok(e)
25066 }
25067 }
25068 }
25069 })
25070 }
25071
25072 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
25073 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
25074 use crate::expressions::Function;
25075 match unit {
25076 "DAY" => {
25077 // DATE(x)
25078 Ok(Expression::Function(Box::new(Function::new(
25079 "DATE".to_string(),
25080 vec![expr.clone()],
25081 ))))
25082 }
25083 "WEEK" => {
25084 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
25085 let year_x = Expression::Function(Box::new(Function::new(
25086 "YEAR".to_string(),
25087 vec![expr.clone()],
25088 )));
25089 let week_x = Expression::Function(Box::new(Function::new(
25090 "WEEK".to_string(),
25091 vec![expr.clone(), Expression::number(1)],
25092 )));
25093 let concat_args = vec![
25094 year_x,
25095 Expression::string(" "),
25096 week_x,
25097 Expression::string(" 1"),
25098 ];
25099 let concat = Expression::Function(Box::new(Function::new(
25100 "CONCAT".to_string(),
25101 concat_args,
25102 )));
25103 Ok(Expression::Function(Box::new(Function::new(
25104 "STR_TO_DATE".to_string(),
25105 vec![concat, Expression::string("%Y %u %w")],
25106 ))))
25107 }
25108 "MONTH" => {
25109 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
25110 let year_x = Expression::Function(Box::new(Function::new(
25111 "YEAR".to_string(),
25112 vec![expr.clone()],
25113 )));
25114 let month_x = Expression::Function(Box::new(Function::new(
25115 "MONTH".to_string(),
25116 vec![expr.clone()],
25117 )));
25118 let concat_args = vec![
25119 year_x,
25120 Expression::string(" "),
25121 month_x,
25122 Expression::string(" 1"),
25123 ];
25124 let concat = Expression::Function(Box::new(Function::new(
25125 "CONCAT".to_string(),
25126 concat_args,
25127 )));
25128 Ok(Expression::Function(Box::new(Function::new(
25129 "STR_TO_DATE".to_string(),
25130 vec![concat, Expression::string("%Y %c %e")],
25131 ))))
25132 }
25133 "QUARTER" => {
25134 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
25135 let year_x = Expression::Function(Box::new(Function::new(
25136 "YEAR".to_string(),
25137 vec![expr.clone()],
25138 )));
25139 let quarter_x = Expression::Function(Box::new(Function::new(
25140 "QUARTER".to_string(),
25141 vec![expr.clone()],
25142 )));
25143 // QUARTER(x) * 3 - 2
25144 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
25145 left: quarter_x,
25146 right: Expression::number(3),
25147 left_comments: Vec::new(),
25148 operator_comments: Vec::new(),
25149 trailing_comments: Vec::new(),
25150 inferred_type: None,
25151 }));
25152 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
25153 left: mul,
25154 right: Expression::number(2),
25155 left_comments: Vec::new(),
25156 operator_comments: Vec::new(),
25157 trailing_comments: Vec::new(),
25158 inferred_type: None,
25159 }));
25160 let concat_args = vec![
25161 year_x,
25162 Expression::string(" "),
25163 sub,
25164 Expression::string(" 1"),
25165 ];
25166 let concat = Expression::Function(Box::new(Function::new(
25167 "CONCAT".to_string(),
25168 concat_args,
25169 )));
25170 Ok(Expression::Function(Box::new(Function::new(
25171 "STR_TO_DATE".to_string(),
25172 vec![concat, Expression::string("%Y %c %e")],
25173 ))))
25174 }
25175 "YEAR" => {
25176 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
25177 let year_x = Expression::Function(Box::new(Function::new(
25178 "YEAR".to_string(),
25179 vec![expr.clone()],
25180 )));
25181 let concat_args = vec![year_x, Expression::string(" 1 1")];
25182 let concat = Expression::Function(Box::new(Function::new(
25183 "CONCAT".to_string(),
25184 concat_args,
25185 )));
25186 Ok(Expression::Function(Box::new(Function::new(
25187 "STR_TO_DATE".to_string(),
25188 vec![concat, Expression::string("%Y %c %e")],
25189 ))))
25190 }
25191 _ => {
25192 // Unsupported unit -> keep as DATE_TRUNC
25193 Ok(Expression::Function(Box::new(Function::new(
25194 "DATE_TRUNC".to_string(),
25195 vec![Expression::string(unit), expr.clone()],
25196 ))))
25197 }
25198 }
25199 }
25200
25201 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
25202 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
25203 use crate::expressions::DataType;
25204 match dt {
25205 DataType::VarChar { .. } | DataType::Char { .. } => true,
25206 DataType::Struct { fields, .. } => fields
25207 .iter()
25208 .any(|f| Self::has_varchar_char_type(&f.data_type)),
25209 _ => false,
25210 }
25211 }
25212
25213 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
25214 fn normalize_varchar_to_string(
25215 dt: crate::expressions::DataType,
25216 ) -> crate::expressions::DataType {
25217 use crate::expressions::DataType;
25218 match dt {
25219 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
25220 name: "STRING".to_string(),
25221 },
25222 DataType::Struct { fields, nested } => {
25223 let fields = fields
25224 .into_iter()
25225 .map(|mut f| {
25226 f.data_type = Self::normalize_varchar_to_string(f.data_type);
25227 f
25228 })
25229 .collect();
25230 DataType::Struct { fields, nested }
25231 }
25232 other => other,
25233 }
25234 }
25235
25236 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
25237 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
25238 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
25239 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
25240 let trimmed = s.trim();
25241
25242 // Find where digits end and unit text begins
25243 let digit_end = trimmed
25244 .find(|c: char| !c.is_ascii_digit())
25245 .unwrap_or(trimmed.len());
25246 if digit_end == 0 || digit_end == trimmed.len() {
25247 return expr;
25248 }
25249 let num = &trimmed[..digit_end];
25250 let unit_text = trimmed[digit_end..].trim().to_uppercase();
25251 if unit_text.is_empty() {
25252 return expr;
25253 }
25254
25255 let known_units = [
25256 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
25257 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
25258 ];
25259 if !known_units.contains(&unit_text.as_str()) {
25260 return expr;
25261 }
25262
25263 let unit_str = unit_text.clone();
25264 // Singularize
25265 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
25266 &unit_str[..unit_str.len() - 1]
25267 } else {
25268 &unit_str
25269 };
25270 let unit = unit_singular;
25271
25272 match target {
25273 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25274 // INTERVAL '2' DAY
25275 let iu = match unit {
25276 "DAY" => crate::expressions::IntervalUnit::Day,
25277 "HOUR" => crate::expressions::IntervalUnit::Hour,
25278 "MINUTE" => crate::expressions::IntervalUnit::Minute,
25279 "SECOND" => crate::expressions::IntervalUnit::Second,
25280 "WEEK" => crate::expressions::IntervalUnit::Week,
25281 "MONTH" => crate::expressions::IntervalUnit::Month,
25282 "YEAR" => crate::expressions::IntervalUnit::Year,
25283 _ => return expr,
25284 };
25285 return Expression::Interval(Box::new(crate::expressions::Interval {
25286 this: Some(Expression::string(num)),
25287 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25288 unit: iu,
25289 use_plural: false,
25290 }),
25291 }));
25292 }
25293 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
25294 // INTERVAL '2 DAYS'
25295 let plural = if num != "1" && !unit_str.ends_with('S') {
25296 format!("{} {}S", num, unit)
25297 } else if unit_str.ends_with('S') {
25298 format!("{} {}", num, unit_str)
25299 } else {
25300 format!("{} {}", num, unit)
25301 };
25302 return Expression::Interval(Box::new(crate::expressions::Interval {
25303 this: Some(Expression::string(&plural)),
25304 unit: None,
25305 }));
25306 }
25307 _ => {
25308 // Spark/Databricks/Hive: INTERVAL '1' DAY
25309 let iu = match unit {
25310 "DAY" => crate::expressions::IntervalUnit::Day,
25311 "HOUR" => crate::expressions::IntervalUnit::Hour,
25312 "MINUTE" => crate::expressions::IntervalUnit::Minute,
25313 "SECOND" => crate::expressions::IntervalUnit::Second,
25314 "WEEK" => crate::expressions::IntervalUnit::Week,
25315 "MONTH" => crate::expressions::IntervalUnit::Month,
25316 "YEAR" => crate::expressions::IntervalUnit::Year,
25317 _ => return expr,
25318 };
25319 return Expression::Interval(Box::new(crate::expressions::Interval {
25320 this: Some(Expression::string(num)),
25321 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25322 unit: iu,
25323 use_plural: false,
25324 }),
25325 }));
25326 }
25327 }
25328 }
25329 // If it's already an INTERVAL expression, pass through
25330 expr
25331 }
25332
25333 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
25334 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
25335 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
25336 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
25337 fn rewrite_unnest_expansion(
25338 select: &crate::expressions::Select,
25339 target: DialectType,
25340 ) -> Option<crate::expressions::Select> {
25341 use crate::expressions::{
25342 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
25343 UnnestFunc,
25344 };
25345
25346 let index_offset: i64 = match target {
25347 DialectType::Presto | DialectType::Trino => 1,
25348 _ => 0, // BigQuery, Snowflake
25349 };
25350
25351 let if_func_name = match target {
25352 DialectType::Snowflake => "IFF",
25353 _ => "IF",
25354 };
25355
25356 let array_length_func = match target {
25357 DialectType::BigQuery => "ARRAY_LENGTH",
25358 DialectType::Presto | DialectType::Trino => "CARDINALITY",
25359 DialectType::Snowflake => "ARRAY_SIZE",
25360 _ => "ARRAY_LENGTH",
25361 };
25362
25363 let use_table_aliases = matches!(
25364 target,
25365 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
25366 );
25367 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
25368
25369 fn make_col(name: &str, table: Option<&str>) -> Expression {
25370 if let Some(tbl) = table {
25371 Expression::Column(Column {
25372 name: Identifier::new(name.to_string()),
25373 table: Some(Identifier::new(tbl.to_string())),
25374 join_mark: false,
25375 trailing_comments: Vec::new(),
25376 span: None,
25377 inferred_type: None,
25378 })
25379 } else {
25380 Expression::Identifier(Identifier::new(name.to_string()))
25381 }
25382 }
25383
25384 fn make_join(this: Expression) -> Join {
25385 Join {
25386 this,
25387 on: None,
25388 using: Vec::new(),
25389 kind: JoinKind::Cross,
25390 use_inner_keyword: false,
25391 use_outer_keyword: false,
25392 deferred_condition: false,
25393 join_hint: None,
25394 match_condition: None,
25395 pivots: Vec::new(),
25396 comments: Vec::new(),
25397 nesting_group: 0,
25398 directed: false,
25399 }
25400 }
25401
25402 // Collect UNNEST info from SELECT expressions
25403 struct UnnestInfo {
25404 arr_expr: Expression,
25405 col_alias: String,
25406 pos_alias: String,
25407 source_alias: String,
25408 original_expr: Expression,
25409 has_outer_alias: Option<String>,
25410 }
25411
25412 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
25413 let mut col_counter = 0usize;
25414 let mut pos_counter = 1usize;
25415 let mut source_counter = 1usize;
25416
25417 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
25418 match expr {
25419 Expression::Unnest(u) => Some(u.this.clone()),
25420 Expression::Function(f)
25421 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
25422 {
25423 Some(f.args[0].clone())
25424 }
25425 Expression::Alias(a) => extract_unnest_arg(&a.this),
25426 Expression::Add(op)
25427 | Expression::Sub(op)
25428 | Expression::Mul(op)
25429 | Expression::Div(op) => {
25430 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
25431 }
25432 _ => None,
25433 }
25434 }
25435
25436 fn get_alias_name(expr: &Expression) -> Option<String> {
25437 if let Expression::Alias(a) = expr {
25438 Some(a.alias.name.clone())
25439 } else {
25440 None
25441 }
25442 }
25443
25444 for sel_expr in &select.expressions {
25445 if let Some(arr) = extract_unnest_arg(sel_expr) {
25446 col_counter += 1;
25447 pos_counter += 1;
25448 source_counter += 1;
25449
25450 let col_alias = if col_counter == 1 {
25451 "col".to_string()
25452 } else {
25453 format!("col_{}", col_counter)
25454 };
25455 let pos_alias = format!("pos_{}", pos_counter);
25456 let source_alias = format!("_u_{}", source_counter);
25457 let has_outer_alias = get_alias_name(sel_expr);
25458
25459 unnest_infos.push(UnnestInfo {
25460 arr_expr: arr,
25461 col_alias,
25462 pos_alias,
25463 source_alias,
25464 original_expr: sel_expr.clone(),
25465 has_outer_alias,
25466 });
25467 }
25468 }
25469
25470 if unnest_infos.is_empty() {
25471 return None;
25472 }
25473
25474 let series_alias = "pos".to_string();
25475 let series_source_alias = "_u".to_string();
25476 let tbl_ref = if use_table_aliases {
25477 Some(series_source_alias.as_str())
25478 } else {
25479 None
25480 };
25481
25482 // Build new SELECT expressions
25483 let mut new_select_exprs = Vec::new();
25484 for info in &unnest_infos {
25485 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25486 let src_ref = if use_table_aliases {
25487 Some(info.source_alias.as_str())
25488 } else {
25489 None
25490 };
25491
25492 let pos_col = make_col(&series_alias, tbl_ref);
25493 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25494 let col_ref = make_col(actual_col_name, src_ref);
25495
25496 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
25497 pos_col.clone(),
25498 unnest_pos_col.clone(),
25499 )));
25500 let mut if_args = vec![eq_cond, col_ref];
25501 if null_third_arg {
25502 if_args.push(Expression::Null(crate::expressions::Null));
25503 }
25504
25505 let if_expr =
25506 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
25507 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
25508
25509 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
25510 final_expr,
25511 Identifier::new(actual_col_name.clone()),
25512 ))));
25513 }
25514
25515 // Build array size expressions for GREATEST
25516 let size_exprs: Vec<Expression> = unnest_infos
25517 .iter()
25518 .map(|info| {
25519 Expression::Function(Box::new(Function::new(
25520 array_length_func.to_string(),
25521 vec![info.arr_expr.clone()],
25522 )))
25523 })
25524 .collect();
25525
25526 let greatest =
25527 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
25528
25529 let series_end = if index_offset == 0 {
25530 Expression::Sub(Box::new(BinaryOp::new(
25531 greatest,
25532 Expression::Literal(Literal::Number("1".to_string())),
25533 )))
25534 } else {
25535 greatest
25536 };
25537
25538 // Build the position array source
25539 let series_unnest_expr = match target {
25540 DialectType::BigQuery => {
25541 let gen_array = Expression::Function(Box::new(Function::new(
25542 "GENERATE_ARRAY".to_string(),
25543 vec![
25544 Expression::Literal(Literal::Number("0".to_string())),
25545 series_end,
25546 ],
25547 )));
25548 Expression::Unnest(Box::new(UnnestFunc {
25549 this: gen_array,
25550 expressions: Vec::new(),
25551 with_ordinality: false,
25552 alias: None,
25553 offset_alias: None,
25554 }))
25555 }
25556 DialectType::Presto | DialectType::Trino => {
25557 let sequence = Expression::Function(Box::new(Function::new(
25558 "SEQUENCE".to_string(),
25559 vec![
25560 Expression::Literal(Literal::Number("1".to_string())),
25561 series_end,
25562 ],
25563 )));
25564 Expression::Unnest(Box::new(UnnestFunc {
25565 this: sequence,
25566 expressions: Vec::new(),
25567 with_ordinality: false,
25568 alias: None,
25569 offset_alias: None,
25570 }))
25571 }
25572 DialectType::Snowflake => {
25573 let range_end = Expression::Add(Box::new(BinaryOp::new(
25574 Expression::Paren(Box::new(crate::expressions::Paren {
25575 this: series_end,
25576 trailing_comments: Vec::new(),
25577 })),
25578 Expression::Literal(Literal::Number("1".to_string())),
25579 )));
25580 let gen_range = Expression::Function(Box::new(Function::new(
25581 "ARRAY_GENERATE_RANGE".to_string(),
25582 vec![
25583 Expression::Literal(Literal::Number("0".to_string())),
25584 range_end,
25585 ],
25586 )));
25587 let flatten_arg =
25588 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25589 name: Identifier::new("INPUT".to_string()),
25590 value: gen_range,
25591 separator: crate::expressions::NamedArgSeparator::DArrow,
25592 }));
25593 let flatten = Expression::Function(Box::new(Function::new(
25594 "FLATTEN".to_string(),
25595 vec![flatten_arg],
25596 )));
25597 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
25598 }
25599 _ => return None,
25600 };
25601
25602 // Build series alias expression
25603 let series_alias_expr = if use_table_aliases {
25604 let col_aliases = if matches!(target, DialectType::Snowflake) {
25605 vec![
25606 Identifier::new("seq".to_string()),
25607 Identifier::new("key".to_string()),
25608 Identifier::new("path".to_string()),
25609 Identifier::new("index".to_string()),
25610 Identifier::new(series_alias.clone()),
25611 Identifier::new("this".to_string()),
25612 ]
25613 } else {
25614 vec![Identifier::new(series_alias.clone())]
25615 };
25616 Expression::Alias(Box::new(Alias {
25617 this: series_unnest_expr,
25618 alias: Identifier::new(series_source_alias.clone()),
25619 column_aliases: col_aliases,
25620 pre_alias_comments: Vec::new(),
25621 trailing_comments: Vec::new(),
25622 inferred_type: None,
25623 }))
25624 } else {
25625 Expression::Alias(Box::new(Alias::new(
25626 series_unnest_expr,
25627 Identifier::new(series_alias.clone()),
25628 )))
25629 };
25630
25631 // Build CROSS JOINs for each UNNEST
25632 let mut joins = Vec::new();
25633 for info in &unnest_infos {
25634 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
25635
25636 let unnest_join_expr = match target {
25637 DialectType::BigQuery => {
25638 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
25639 let unnest = UnnestFunc {
25640 this: info.arr_expr.clone(),
25641 expressions: Vec::new(),
25642 with_ordinality: true,
25643 alias: Some(Identifier::new(actual_col_name.clone())),
25644 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
25645 };
25646 Expression::Unnest(Box::new(unnest))
25647 }
25648 DialectType::Presto | DialectType::Trino => {
25649 let unnest = UnnestFunc {
25650 this: info.arr_expr.clone(),
25651 expressions: Vec::new(),
25652 with_ordinality: true,
25653 alias: None,
25654 offset_alias: None,
25655 };
25656 Expression::Alias(Box::new(Alias {
25657 this: Expression::Unnest(Box::new(unnest)),
25658 alias: Identifier::new(info.source_alias.clone()),
25659 column_aliases: vec![
25660 Identifier::new(actual_col_name.clone()),
25661 Identifier::new(info.pos_alias.clone()),
25662 ],
25663 pre_alias_comments: Vec::new(),
25664 trailing_comments: Vec::new(),
25665 inferred_type: None,
25666 }))
25667 }
25668 DialectType::Snowflake => {
25669 let flatten_arg =
25670 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
25671 name: Identifier::new("INPUT".to_string()),
25672 value: info.arr_expr.clone(),
25673 separator: crate::expressions::NamedArgSeparator::DArrow,
25674 }));
25675 let flatten = Expression::Function(Box::new(Function::new(
25676 "FLATTEN".to_string(),
25677 vec![flatten_arg],
25678 )));
25679 let table_fn = Expression::Function(Box::new(Function::new(
25680 "TABLE".to_string(),
25681 vec![flatten],
25682 )));
25683 Expression::Alias(Box::new(Alias {
25684 this: table_fn,
25685 alias: Identifier::new(info.source_alias.clone()),
25686 column_aliases: vec![
25687 Identifier::new("seq".to_string()),
25688 Identifier::new("key".to_string()),
25689 Identifier::new("path".to_string()),
25690 Identifier::new(info.pos_alias.clone()),
25691 Identifier::new(actual_col_name.clone()),
25692 Identifier::new("this".to_string()),
25693 ],
25694 pre_alias_comments: Vec::new(),
25695 trailing_comments: Vec::new(),
25696 inferred_type: None,
25697 }))
25698 }
25699 _ => return None,
25700 };
25701
25702 joins.push(make_join(unnest_join_expr));
25703 }
25704
25705 // Build WHERE clause
25706 let mut where_conditions: Vec<Expression> = Vec::new();
25707 for info in &unnest_infos {
25708 let src_ref = if use_table_aliases {
25709 Some(info.source_alias.as_str())
25710 } else {
25711 None
25712 };
25713 let pos_col = make_col(&series_alias, tbl_ref);
25714 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
25715
25716 let arr_size = Expression::Function(Box::new(Function::new(
25717 array_length_func.to_string(),
25718 vec![info.arr_expr.clone()],
25719 )));
25720
25721 let size_ref = if index_offset == 0 {
25722 Expression::Paren(Box::new(crate::expressions::Paren {
25723 this: Expression::Sub(Box::new(BinaryOp::new(
25724 arr_size,
25725 Expression::Literal(Literal::Number("1".to_string())),
25726 ))),
25727 trailing_comments: Vec::new(),
25728 }))
25729 } else {
25730 arr_size
25731 };
25732
25733 let eq = Expression::Eq(Box::new(BinaryOp::new(
25734 pos_col.clone(),
25735 unnest_pos_col.clone(),
25736 )));
25737 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
25738 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
25739 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
25740 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
25741 this: and_cond,
25742 trailing_comments: Vec::new(),
25743 }));
25744 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
25745
25746 where_conditions.push(or_cond);
25747 }
25748
25749 let where_expr = if where_conditions.len() == 1 {
25750 // Single condition: no parens needed
25751 where_conditions.into_iter().next().unwrap()
25752 } else {
25753 // Multiple conditions: wrap each OR in parens, then combine with AND
25754 let wrap = |e: Expression| {
25755 Expression::Paren(Box::new(crate::expressions::Paren {
25756 this: e,
25757 trailing_comments: Vec::new(),
25758 }))
25759 };
25760 let mut iter = where_conditions.into_iter();
25761 let first = wrap(iter.next().unwrap());
25762 let second = wrap(iter.next().unwrap());
25763 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
25764 this: Expression::And(Box::new(BinaryOp::new(first, second))),
25765 trailing_comments: Vec::new(),
25766 }));
25767 for cond in iter {
25768 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
25769 }
25770 combined
25771 };
25772
25773 // Build the new SELECT
25774 let mut new_select = select.clone();
25775 new_select.expressions = new_select_exprs;
25776
25777 if new_select.from.is_some() {
25778 let mut all_joins = vec![make_join(series_alias_expr)];
25779 all_joins.extend(joins);
25780 new_select.joins.extend(all_joins);
25781 } else {
25782 new_select.from = Some(From {
25783 expressions: vec![series_alias_expr],
25784 });
25785 new_select.joins.extend(joins);
25786 }
25787
25788 if let Some(ref existing_where) = new_select.where_clause {
25789 let combined = Expression::And(Box::new(BinaryOp::new(
25790 existing_where.this.clone(),
25791 where_expr,
25792 )));
25793 new_select.where_clause = Some(crate::expressions::Where { this: combined });
25794 } else {
25795 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
25796 }
25797
25798 Some(new_select)
25799 }
25800
25801 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
25802 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
25803 match original {
25804 Expression::Unnest(_) => replacement.clone(),
25805 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
25806 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
25807 Expression::Add(op) => {
25808 let left = Self::replace_unnest_with_if(&op.left, replacement);
25809 let right = Self::replace_unnest_with_if(&op.right, replacement);
25810 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
25811 }
25812 Expression::Sub(op) => {
25813 let left = Self::replace_unnest_with_if(&op.left, replacement);
25814 let right = Self::replace_unnest_with_if(&op.right, replacement);
25815 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
25816 }
25817 Expression::Mul(op) => {
25818 let left = Self::replace_unnest_with_if(&op.left, replacement);
25819 let right = Self::replace_unnest_with_if(&op.right, replacement);
25820 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
25821 }
25822 Expression::Div(op) => {
25823 let left = Self::replace_unnest_with_if(&op.left, replacement);
25824 let right = Self::replace_unnest_with_if(&op.right, replacement);
25825 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
25826 }
25827 _ => original.clone(),
25828 }
25829 }
25830
25831 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
25832 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
25833 fn decompose_json_path(path: &str) -> Vec<String> {
25834 let mut parts = Vec::new();
25835 let path = if path.starts_with("$.") {
25836 &path[2..]
25837 } else if path.starts_with('$') {
25838 &path[1..]
25839 } else {
25840 path
25841 };
25842 if path.is_empty() {
25843 return parts;
25844 }
25845 let mut current = String::new();
25846 let chars: Vec<char> = path.chars().collect();
25847 let mut i = 0;
25848 while i < chars.len() {
25849 match chars[i] {
25850 '.' => {
25851 if !current.is_empty() {
25852 parts.push(current.clone());
25853 current.clear();
25854 }
25855 i += 1;
25856 }
25857 '[' => {
25858 if !current.is_empty() {
25859 parts.push(current.clone());
25860 current.clear();
25861 }
25862 i += 1;
25863 let mut bracket_content = String::new();
25864 while i < chars.len() && chars[i] != ']' {
25865 if chars[i] == '"' || chars[i] == '\'' {
25866 let quote = chars[i];
25867 i += 1;
25868 while i < chars.len() && chars[i] != quote {
25869 bracket_content.push(chars[i]);
25870 i += 1;
25871 }
25872 if i < chars.len() {
25873 i += 1;
25874 }
25875 } else {
25876 bracket_content.push(chars[i]);
25877 i += 1;
25878 }
25879 }
25880 if i < chars.len() {
25881 i += 1;
25882 }
25883 if bracket_content != "*" {
25884 parts.push(bracket_content);
25885 }
25886 }
25887 _ => {
25888 current.push(chars[i]);
25889 i += 1;
25890 }
25891 }
25892 }
25893 if !current.is_empty() {
25894 parts.push(current);
25895 }
25896 parts
25897 }
25898
25899 /// Strip `$` prefix from a JSON path, keeping the rest.
25900 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
25901 fn strip_json_dollar_prefix(path: &str) -> String {
25902 if path.starts_with("$.") {
25903 path[2..].to_string()
25904 } else if path.starts_with('$') {
25905 path[1..].to_string()
25906 } else {
25907 path.to_string()
25908 }
25909 }
25910
25911 /// Strip `[*]` wildcards from a JSON path.
25912 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
25913 fn strip_json_wildcards(path: &str) -> String {
25914 path.replace("[*]", "")
25915 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
25916 .trim_end_matches('.')
25917 .to_string()
25918 }
25919
25920 /// Convert bracket notation to dot notation for JSON paths.
25921 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
25922 fn bracket_to_dot_notation(path: &str) -> String {
25923 let mut result = String::new();
25924 let chars: Vec<char> = path.chars().collect();
25925 let mut i = 0;
25926 while i < chars.len() {
25927 if chars[i] == '[' {
25928 // Read bracket content
25929 i += 1;
25930 let mut bracket_content = String::new();
25931 let mut is_quoted = false;
25932 let mut _quote_char = '"';
25933 while i < chars.len() && chars[i] != ']' {
25934 if chars[i] == '"' || chars[i] == '\'' {
25935 is_quoted = true;
25936 _quote_char = chars[i];
25937 i += 1;
25938 while i < chars.len() && chars[i] != _quote_char {
25939 bracket_content.push(chars[i]);
25940 i += 1;
25941 }
25942 if i < chars.len() {
25943 i += 1;
25944 }
25945 } else {
25946 bracket_content.push(chars[i]);
25947 i += 1;
25948 }
25949 }
25950 if i < chars.len() {
25951 i += 1;
25952 } // skip ]
25953 if bracket_content == "*" {
25954 // Keep wildcard as-is
25955 result.push_str("[*]");
25956 } else if is_quoted {
25957 // Quoted bracket -> dot notation with quotes
25958 result.push('.');
25959 result.push('"');
25960 result.push_str(&bracket_content);
25961 result.push('"');
25962 } else {
25963 // Numeric index -> keep as bracket
25964 result.push('[');
25965 result.push_str(&bracket_content);
25966 result.push(']');
25967 }
25968 } else {
25969 result.push(chars[i]);
25970 i += 1;
25971 }
25972 }
25973 result
25974 }
25975
25976 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
25977 /// `$["a b"]` -> `$['a b']`
25978 fn bracket_to_single_quotes(path: &str) -> String {
25979 let mut result = String::new();
25980 let chars: Vec<char> = path.chars().collect();
25981 let mut i = 0;
25982 while i < chars.len() {
25983 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
25984 result.push('[');
25985 result.push('\'');
25986 i += 2; // skip [ and "
25987 while i < chars.len() && chars[i] != '"' {
25988 result.push(chars[i]);
25989 i += 1;
25990 }
25991 if i < chars.len() {
25992 i += 1;
25993 } // skip closing "
25994 result.push('\'');
25995 } else {
25996 result.push(chars[i]);
25997 i += 1;
25998 }
25999 }
26000 result
26001 }
26002
26003 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
26004 /// or PostgreSQL #temp -> TEMPORARY.
26005 /// Also strips # from INSERT INTO #table for non-TSQL targets.
26006 fn transform_select_into(
26007 expr: Expression,
26008 _source: DialectType,
26009 target: DialectType,
26010 ) -> Expression {
26011 use crate::expressions::{CreateTable, Expression, TableRef};
26012
26013 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
26014 if let Expression::Insert(ref insert) = expr {
26015 if insert.table.name.name.starts_with('#')
26016 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
26017 {
26018 let mut new_insert = insert.clone();
26019 new_insert.table.name.name =
26020 insert.table.name.name.trim_start_matches('#').to_string();
26021 return Expression::Insert(new_insert);
26022 }
26023 return expr;
26024 }
26025
26026 if let Expression::Select(ref select) = expr {
26027 if let Some(ref into) = select.into {
26028 let table_name_raw = match &into.this {
26029 Expression::Table(tr) => tr.name.name.clone(),
26030 Expression::Identifier(id) => id.name.clone(),
26031 _ => String::new(),
26032 };
26033 let is_temp = table_name_raw.starts_with('#') || into.temporary;
26034 let clean_name = table_name_raw.trim_start_matches('#').to_string();
26035
26036 match target {
26037 DialectType::DuckDB | DialectType::Snowflake => {
26038 // SELECT INTO -> CREATE TABLE AS SELECT
26039 let mut new_select = select.clone();
26040 new_select.into = None;
26041 let ct = CreateTable {
26042 name: TableRef::new(clean_name),
26043 on_cluster: None,
26044 columns: Vec::new(),
26045 constraints: Vec::new(),
26046 if_not_exists: false,
26047 temporary: is_temp,
26048 or_replace: false,
26049 table_modifier: None,
26050 as_select: Some(Expression::Select(new_select)),
26051 as_select_parenthesized: false,
26052 on_commit: None,
26053 clone_source: None,
26054 clone_at_clause: None,
26055 shallow_clone: false,
26056 is_copy: false,
26057 leading_comments: Vec::new(),
26058 with_properties: Vec::new(),
26059 teradata_post_name_options: Vec::new(),
26060 with_data: None,
26061 with_statistics: None,
26062 teradata_indexes: Vec::new(),
26063 with_cte: None,
26064 properties: Vec::new(),
26065 partition_of: None,
26066 post_table_properties: Vec::new(),
26067 mysql_table_options: Vec::new(),
26068 inherits: Vec::new(),
26069 on_property: None,
26070 copy_grants: false,
26071 using_template: None,
26072 rollup: None,
26073 };
26074 return Expression::CreateTable(Box::new(ct));
26075 }
26076 DialectType::PostgreSQL | DialectType::Redshift => {
26077 // PostgreSQL: #foo -> INTO TEMPORARY foo
26078 if is_temp && !into.temporary {
26079 let mut new_select = select.clone();
26080 let mut new_into = into.clone();
26081 new_into.temporary = true;
26082 new_into.unlogged = false;
26083 new_into.this = Expression::Table(TableRef::new(clean_name));
26084 new_select.into = Some(new_into);
26085 Expression::Select(new_select)
26086 } else {
26087 expr
26088 }
26089 }
26090 _ => expr,
26091 }
26092 } else {
26093 expr
26094 }
26095 } else {
26096 expr
26097 }
26098 }
26099
26100 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
26101 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
26102 fn transform_create_table_properties(
26103 ct: &mut crate::expressions::CreateTable,
26104 _source: DialectType,
26105 target: DialectType,
26106 ) {
26107 use crate::expressions::{
26108 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
26109 Properties,
26110 };
26111
26112 // Helper to convert a raw property value string to the correct Expression
26113 let value_to_expr = |v: &str| -> Expression {
26114 let trimmed = v.trim();
26115 // Check if it's a quoted string (starts and ends with ')
26116 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
26117 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
26118 }
26119 // Check if it's a number
26120 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
26121 Expression::Literal(Literal::Number(trimmed.to_string()))
26122 }
26123 // Check if it's ARRAY[...] or ARRAY(...)
26124 else if trimmed.to_uppercase().starts_with("ARRAY") {
26125 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
26126 let inner = trimmed
26127 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
26128 .trim_start_matches('[')
26129 .trim_start_matches('(')
26130 .trim_end_matches(']')
26131 .trim_end_matches(')');
26132 let elements: Vec<Expression> = inner
26133 .split(',')
26134 .map(|e| {
26135 let elem = e.trim().trim_matches('\'');
26136 Expression::Literal(Literal::String(elem.to_string()))
26137 })
26138 .collect();
26139 Expression::Function(Box::new(crate::expressions::Function::new(
26140 "ARRAY".to_string(),
26141 elements,
26142 )))
26143 }
26144 // Otherwise, just output as identifier (unquoted)
26145 else {
26146 Expression::Identifier(Identifier::new(trimmed.to_string()))
26147 }
26148 };
26149
26150 if ct.with_properties.is_empty() && ct.properties.is_empty() {
26151 return;
26152 }
26153
26154 // Handle Presto-style WITH properties
26155 if !ct.with_properties.is_empty() {
26156 // Extract FORMAT property and remaining properties
26157 let mut format_value: Option<String> = None;
26158 let mut partitioned_by: Option<String> = None;
26159 let mut other_props: Vec<(String, String)> = Vec::new();
26160
26161 for (key, value) in ct.with_properties.drain(..) {
26162 let key_upper = key.to_uppercase();
26163 if key_upper == "FORMAT" {
26164 // Strip surrounding quotes from value if present
26165 format_value = Some(value.trim_matches('\'').to_string());
26166 } else if key_upper == "PARTITIONED_BY" {
26167 partitioned_by = Some(value);
26168 } else {
26169 other_props.push((key, value));
26170 }
26171 }
26172
26173 match target {
26174 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26175 // Presto: keep WITH properties but lowercase 'format' key
26176 if let Some(fmt) = format_value {
26177 ct.with_properties
26178 .push(("format".to_string(), format!("'{}'", fmt)));
26179 }
26180 if let Some(part) = partitioned_by {
26181 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
26182 let trimmed = part.trim();
26183 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
26184 // Also handle ARRAY['...'] format - keep as-is
26185 if trimmed.to_uppercase().starts_with("ARRAY") {
26186 ct.with_properties
26187 .push(("PARTITIONED_BY".to_string(), part));
26188 } else {
26189 // Parse column names from the parenthesized list
26190 let cols: Vec<&str> = inner
26191 .split(',')
26192 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
26193 .collect();
26194 let array_val = format!(
26195 "ARRAY[{}]",
26196 cols.iter()
26197 .map(|c| format!("'{}'", c))
26198 .collect::<Vec<_>>()
26199 .join(", ")
26200 );
26201 ct.with_properties
26202 .push(("PARTITIONED_BY".to_string(), array_val));
26203 }
26204 }
26205 ct.with_properties.extend(other_props);
26206 }
26207 DialectType::Hive => {
26208 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
26209 if let Some(fmt) = format_value {
26210 ct.properties.push(Expression::FileFormatProperty(Box::new(
26211 FileFormatProperty {
26212 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
26213 expressions: vec![],
26214 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
26215 value: true,
26216 }))),
26217 },
26218 )));
26219 }
26220 if let Some(_part) = partitioned_by {
26221 // PARTITIONED_BY handling is complex - move columns to partitioned by
26222 // For now, the partition columns are extracted from the column list
26223 Self::apply_partitioned_by(ct, &_part, target);
26224 }
26225 if !other_props.is_empty() {
26226 let eq_exprs: Vec<Expression> = other_props
26227 .into_iter()
26228 .map(|(k, v)| {
26229 Expression::Eq(Box::new(BinaryOp::new(
26230 Expression::Literal(Literal::String(k)),
26231 value_to_expr(&v),
26232 )))
26233 })
26234 .collect();
26235 ct.properties
26236 .push(Expression::Properties(Box::new(Properties {
26237 expressions: eq_exprs,
26238 })));
26239 }
26240 }
26241 DialectType::Spark | DialectType::Databricks => {
26242 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
26243 if let Some(fmt) = format_value {
26244 ct.properties.push(Expression::FileFormatProperty(Box::new(
26245 FileFormatProperty {
26246 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
26247 expressions: vec![],
26248 hive_format: None, // None means USING syntax
26249 },
26250 )));
26251 }
26252 if let Some(_part) = partitioned_by {
26253 Self::apply_partitioned_by(ct, &_part, target);
26254 }
26255 if !other_props.is_empty() {
26256 let eq_exprs: Vec<Expression> = other_props
26257 .into_iter()
26258 .map(|(k, v)| {
26259 Expression::Eq(Box::new(BinaryOp::new(
26260 Expression::Literal(Literal::String(k)),
26261 value_to_expr(&v),
26262 )))
26263 })
26264 .collect();
26265 ct.properties
26266 .push(Expression::Properties(Box::new(Properties {
26267 expressions: eq_exprs,
26268 })));
26269 }
26270 }
26271 DialectType::DuckDB => {
26272 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
26273 // Keep nothing
26274 }
26275 _ => {
26276 // For other dialects, keep WITH properties as-is
26277 if let Some(fmt) = format_value {
26278 ct.with_properties
26279 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
26280 }
26281 if let Some(part) = partitioned_by {
26282 ct.with_properties
26283 .push(("PARTITIONED_BY".to_string(), part));
26284 }
26285 ct.with_properties.extend(other_props);
26286 }
26287 }
26288 }
26289
26290 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
26291 // and Hive STORED AS -> Presto WITH (format=...) conversion
26292 if !ct.properties.is_empty() {
26293 let is_presto_target = matches!(
26294 target,
26295 DialectType::Presto | DialectType::Trino | DialectType::Athena
26296 );
26297 let is_duckdb_target = matches!(target, DialectType::DuckDB);
26298
26299 if is_presto_target || is_duckdb_target {
26300 let mut new_properties = Vec::new();
26301 for prop in ct.properties.drain(..) {
26302 match &prop {
26303 Expression::FileFormatProperty(ffp) => {
26304 if is_presto_target {
26305 // Convert STORED AS/USING to WITH (format=...)
26306 if let Some(ref fmt_expr) = ffp.this {
26307 let fmt_str = match fmt_expr.as_ref() {
26308 Expression::Identifier(id) => id.name.clone(),
26309 Expression::Literal(Literal::String(s)) => s.clone(),
26310 _ => {
26311 new_properties.push(prop);
26312 continue;
26313 }
26314 };
26315 ct.with_properties
26316 .push(("format".to_string(), format!("'{}'", fmt_str)));
26317 }
26318 }
26319 // DuckDB: just strip file format properties
26320 }
26321 // Convert TBLPROPERTIES to WITH properties for Presto target
26322 Expression::Properties(props) if is_presto_target => {
26323 for expr in &props.expressions {
26324 if let Expression::Eq(eq) = expr {
26325 // Extract key and value from the Eq expression
26326 let key = match &eq.left {
26327 Expression::Literal(Literal::String(s)) => s.clone(),
26328 Expression::Identifier(id) => id.name.clone(),
26329 _ => continue,
26330 };
26331 let value = match &eq.right {
26332 Expression::Literal(Literal::String(s)) => {
26333 format!("'{}'", s)
26334 }
26335 Expression::Literal(Literal::Number(n)) => n.clone(),
26336 Expression::Identifier(id) => id.name.clone(),
26337 _ => continue,
26338 };
26339 ct.with_properties.push((key, value));
26340 }
26341 }
26342 }
26343 // Convert PartitionedByProperty for Presto target
26344 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
26345 // Check if it contains ColumnDef expressions (Hive-style with types)
26346 if let Expression::Tuple(ref tuple) = *pbp.this {
26347 let mut col_names: Vec<String> = Vec::new();
26348 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
26349 let mut has_col_defs = false;
26350 for expr in &tuple.expressions {
26351 if let Expression::ColumnDef(ref cd) = expr {
26352 has_col_defs = true;
26353 col_names.push(cd.name.name.clone());
26354 col_defs.push(*cd.clone());
26355 } else if let Expression::Column(ref col) = expr {
26356 col_names.push(col.name.name.clone());
26357 } else if let Expression::Identifier(ref id) = expr {
26358 col_names.push(id.name.clone());
26359 } else {
26360 // For function expressions like MONTHS(y), serialize to SQL
26361 let generic = Dialect::get(DialectType::Generic);
26362 if let Ok(sql) = generic.generate(expr) {
26363 col_names.push(sql);
26364 }
26365 }
26366 }
26367 if has_col_defs {
26368 // Merge partition column defs into the main column list
26369 for cd in col_defs {
26370 ct.columns.push(cd);
26371 }
26372 }
26373 if !col_names.is_empty() {
26374 // Add PARTITIONED_BY property
26375 let array_val = format!(
26376 "ARRAY[{}]",
26377 col_names
26378 .iter()
26379 .map(|n| format!("'{}'", n))
26380 .collect::<Vec<_>>()
26381 .join(", ")
26382 );
26383 ct.with_properties
26384 .push(("PARTITIONED_BY".to_string(), array_val));
26385 }
26386 }
26387 // Skip - don't keep in properties
26388 }
26389 _ => {
26390 if !is_duckdb_target {
26391 new_properties.push(prop);
26392 }
26393 }
26394 }
26395 }
26396 ct.properties = new_properties;
26397 } else {
26398 // For Hive/Spark targets, unquote format names in STORED AS
26399 for prop in &mut ct.properties {
26400 if let Expression::FileFormatProperty(ref mut ffp) = prop {
26401 if let Some(ref mut fmt_expr) = ffp.this {
26402 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
26403 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
26404 let unquoted = s.clone();
26405 *fmt_expr =
26406 Box::new(Expression::Identifier(Identifier::new(unquoted)));
26407 }
26408 }
26409 }
26410 }
26411 }
26412 }
26413 }
26414
26415 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
26416 fn apply_partitioned_by(
26417 ct: &mut crate::expressions::CreateTable,
26418 partitioned_by_value: &str,
26419 target: DialectType,
26420 ) {
26421 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
26422
26423 // Parse the ARRAY['col1', 'col2'] value to extract column names
26424 let mut col_names: Vec<String> = Vec::new();
26425 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
26426 let inner = partitioned_by_value
26427 .trim()
26428 .trim_start_matches("ARRAY")
26429 .trim_start_matches('[')
26430 .trim_start_matches('(')
26431 .trim_end_matches(']')
26432 .trim_end_matches(')');
26433 for part in inner.split(',') {
26434 let col = part.trim().trim_matches('\'').trim_matches('"');
26435 if !col.is_empty() {
26436 col_names.push(col.to_string());
26437 }
26438 }
26439
26440 if col_names.is_empty() {
26441 return;
26442 }
26443
26444 if matches!(target, DialectType::Hive) {
26445 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
26446 let mut partition_col_defs = Vec::new();
26447 for col_name in &col_names {
26448 // Find and remove from columns
26449 if let Some(pos) = ct
26450 .columns
26451 .iter()
26452 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
26453 {
26454 let col_def = ct.columns.remove(pos);
26455 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
26456 }
26457 }
26458 if !partition_col_defs.is_empty() {
26459 ct.properties
26460 .push(Expression::PartitionedByProperty(Box::new(
26461 PartitionedByProperty {
26462 this: Box::new(Expression::Tuple(Box::new(Tuple {
26463 expressions: partition_col_defs,
26464 }))),
26465 },
26466 )));
26467 }
26468 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
26469 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
26470 // Use quoted identifiers to match the quoting style of the original column definitions
26471 let partition_exprs: Vec<Expression> = col_names
26472 .iter()
26473 .map(|name| {
26474 // Check if the column exists in the column list and use its quoting
26475 let is_quoted = ct
26476 .columns
26477 .iter()
26478 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
26479 let ident = if is_quoted {
26480 Identifier::quoted(name.clone())
26481 } else {
26482 Identifier::new(name.clone())
26483 };
26484 Expression::Column(Column {
26485 name: ident,
26486 table: None,
26487 join_mark: false,
26488 trailing_comments: Vec::new(),
26489 span: None,
26490 inferred_type: None,
26491 })
26492 })
26493 .collect();
26494 ct.properties
26495 .push(Expression::PartitionedByProperty(Box::new(
26496 PartitionedByProperty {
26497 this: Box::new(Expression::Tuple(Box::new(Tuple {
26498 expressions: partition_exprs,
26499 }))),
26500 },
26501 )));
26502 }
26503 // DuckDB: strip partitioned_by entirely (already handled)
26504 }
26505
26506 /// Convert a DataType to Spark's type string format (using angle brackets)
26507 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
26508 use crate::expressions::DataType;
26509 match dt {
26510 DataType::Int { .. } => "INT".to_string(),
26511 DataType::BigInt { .. } => "BIGINT".to_string(),
26512 DataType::SmallInt { .. } => "SMALLINT".to_string(),
26513 DataType::TinyInt { .. } => "TINYINT".to_string(),
26514 DataType::Float { .. } => "FLOAT".to_string(),
26515 DataType::Double { .. } => "DOUBLE".to_string(),
26516 DataType::Decimal {
26517 precision: Some(p),
26518 scale: Some(s),
26519 } => format!("DECIMAL({}, {})", p, s),
26520 DataType::Decimal {
26521 precision: Some(p), ..
26522 } => format!("DECIMAL({})", p),
26523 DataType::Decimal { .. } => "DECIMAL".to_string(),
26524 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
26525 "STRING".to_string()
26526 }
26527 DataType::Char { .. } => "STRING".to_string(),
26528 DataType::Boolean => "BOOLEAN".to_string(),
26529 DataType::Date => "DATE".to_string(),
26530 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
26531 DataType::Json | DataType::JsonB => "STRING".to_string(),
26532 DataType::Binary { .. } => "BINARY".to_string(),
26533 DataType::Array { element_type, .. } => {
26534 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
26535 }
26536 DataType::Map {
26537 key_type,
26538 value_type,
26539 } => format!(
26540 "MAP<{}, {}>",
26541 Self::data_type_to_spark_string(key_type),
26542 Self::data_type_to_spark_string(value_type)
26543 ),
26544 DataType::Struct { fields, .. } => {
26545 let field_strs: Vec<String> = fields
26546 .iter()
26547 .map(|f| {
26548 if f.name.is_empty() {
26549 Self::data_type_to_spark_string(&f.data_type)
26550 } else {
26551 format!(
26552 "{}: {}",
26553 f.name,
26554 Self::data_type_to_spark_string(&f.data_type)
26555 )
26556 }
26557 })
26558 .collect();
26559 format!("STRUCT<{}>", field_strs.join(", "))
26560 }
26561 DataType::Custom { name } => name.clone(),
26562 _ => format!("{:?}", dt),
26563 }
26564 }
26565
26566 /// Extract value and unit from an Interval expression
26567 /// Returns (value_expression, IntervalUnit)
26568 fn extract_interval_parts(
26569 interval_expr: &Expression,
26570 ) -> (Expression, crate::expressions::IntervalUnit) {
26571 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
26572
26573 if let Expression::Interval(iv) = interval_expr {
26574 let val = iv.this.clone().unwrap_or(Expression::number(0));
26575 let unit = match &iv.unit {
26576 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
26577 None => {
26578 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
26579 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
26580 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
26581 if parts.len() == 2 {
26582 let unit_str = parts[1].trim().to_uppercase();
26583 let parsed_unit = match unit_str.as_str() {
26584 "YEAR" | "YEARS" => IntervalUnit::Year,
26585 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
26586 "MONTH" | "MONTHS" => IntervalUnit::Month,
26587 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
26588 "DAY" | "DAYS" => IntervalUnit::Day,
26589 "HOUR" | "HOURS" => IntervalUnit::Hour,
26590 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
26591 "SECOND" | "SECONDS" => IntervalUnit::Second,
26592 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
26593 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
26594 _ => IntervalUnit::Day,
26595 };
26596 // Return just the numeric part as value and parsed unit
26597 return (
26598 Expression::Literal(crate::expressions::Literal::String(
26599 parts[0].to_string(),
26600 )),
26601 parsed_unit,
26602 );
26603 }
26604 IntervalUnit::Day
26605 } else {
26606 IntervalUnit::Day
26607 }
26608 }
26609 _ => IntervalUnit::Day,
26610 };
26611 (val, unit)
26612 } else {
26613 // Not an interval - pass through
26614 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
26615 }
26616 }
26617
26618 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
26619 fn normalize_bigquery_function(
26620 e: Expression,
26621 source: DialectType,
26622 target: DialectType,
26623 ) -> Result<Expression> {
26624 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
26625
26626 let f = if let Expression::Function(f) = e {
26627 *f
26628 } else {
26629 return Ok(e);
26630 };
26631 let name = f.name.to_uppercase();
26632 let mut args = f.args;
26633
26634 /// Helper to extract unit string from an identifier, column, or literal expression
26635 fn get_unit_str(expr: &Expression) -> String {
26636 match expr {
26637 Expression::Identifier(id) => id.name.to_uppercase(),
26638 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26639 Expression::Column(col) => col.name.name.to_uppercase(),
26640 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
26641 Expression::Function(f) => {
26642 let base = f.name.to_uppercase();
26643 if !f.args.is_empty() {
26644 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
26645 let inner = get_unit_str(&f.args[0]);
26646 format!("{}({})", base, inner)
26647 } else {
26648 base
26649 }
26650 }
26651 _ => "DAY".to_string(),
26652 }
26653 }
26654
26655 /// Parse unit string to IntervalUnit
26656 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
26657 match s {
26658 "YEAR" => crate::expressions::IntervalUnit::Year,
26659 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26660 "MONTH" => crate::expressions::IntervalUnit::Month,
26661 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
26662 "DAY" => crate::expressions::IntervalUnit::Day,
26663 "HOUR" => crate::expressions::IntervalUnit::Hour,
26664 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26665 "SECOND" => crate::expressions::IntervalUnit::Second,
26666 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
26667 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
26668 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
26669 _ => crate::expressions::IntervalUnit::Day,
26670 }
26671 }
26672
26673 match name.as_str() {
26674 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
26675 // (BigQuery: result = date1 - date2, Standard: result = end - start)
26676 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
26677 let date1 = args.remove(0);
26678 let date2 = args.remove(0);
26679 let unit_expr = args.remove(0);
26680 let unit_str = get_unit_str(&unit_expr);
26681
26682 if matches!(target, DialectType::BigQuery) {
26683 // BigQuery -> BigQuery: just uppercase the unit
26684 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
26685 return Ok(Expression::Function(Box::new(Function::new(
26686 f.name,
26687 vec![date1, date2, unit],
26688 ))));
26689 }
26690
26691 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
26692 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
26693 if matches!(target, DialectType::Snowflake) {
26694 return Ok(Expression::TimestampDiff(Box::new(
26695 crate::expressions::TimestampDiff {
26696 this: Box::new(date2),
26697 expression: Box::new(date1),
26698 unit: Some(unit_str),
26699 },
26700 )));
26701 }
26702
26703 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
26704 if matches!(target, DialectType::DuckDB) {
26705 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
26706 // CAST to TIME
26707 let cast_fn = |e: Expression| -> Expression {
26708 match e {
26709 Expression::Literal(Literal::String(s)) => {
26710 Expression::Cast(Box::new(Cast {
26711 this: Expression::Literal(Literal::String(s)),
26712 to: DataType::Custom {
26713 name: "TIME".to_string(),
26714 },
26715 trailing_comments: vec![],
26716 double_colon_syntax: false,
26717 format: None,
26718 default: None,
26719 inferred_type: None,
26720 }))
26721 }
26722 other => other,
26723 }
26724 };
26725 (cast_fn(date1), cast_fn(date2))
26726 } else if name == "DATETIME_DIFF" {
26727 // CAST to TIMESTAMP
26728 (
26729 Self::ensure_cast_timestamp(date1),
26730 Self::ensure_cast_timestamp(date2),
26731 )
26732 } else {
26733 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
26734 (
26735 Self::ensure_cast_timestamptz(date1),
26736 Self::ensure_cast_timestamptz(date2),
26737 )
26738 };
26739 return Ok(Expression::Function(Box::new(Function::new(
26740 "DATE_DIFF".to_string(),
26741 vec![
26742 Expression::Literal(Literal::String(unit_str)),
26743 cast_d2,
26744 cast_d1,
26745 ],
26746 ))));
26747 }
26748
26749 // Convert to standard TIMESTAMPDIFF(unit, start, end)
26750 let unit = Expression::Identifier(Identifier::new(unit_str));
26751 Ok(Expression::Function(Box::new(Function::new(
26752 "TIMESTAMPDIFF".to_string(),
26753 vec![unit, date2, date1],
26754 ))))
26755 }
26756
26757 // DATEDIFF(unit, start, end) -> target-specific form
26758 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
26759 "DATEDIFF" if args.len() == 3 => {
26760 let arg0 = args.remove(0);
26761 let arg1 = args.remove(0);
26762 let arg2 = args.remove(0);
26763 let unit_str = get_unit_str(&arg0);
26764
26765 // Redshift DATEDIFF(unit, start, end) order: result = end - start
26766 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
26767 // TSQL DATEDIFF(unit, start, end) order: result = end - start
26768
26769 if matches!(target, DialectType::Snowflake) {
26770 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
26771 let unit = Expression::Identifier(Identifier::new(unit_str));
26772 return Ok(Expression::Function(Box::new(Function::new(
26773 "DATEDIFF".to_string(),
26774 vec![unit, arg1, arg2],
26775 ))));
26776 }
26777
26778 if matches!(target, DialectType::DuckDB) {
26779 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
26780 let cast_d1 = Self::ensure_cast_timestamp(arg1);
26781 let cast_d2 = Self::ensure_cast_timestamp(arg2);
26782 return Ok(Expression::Function(Box::new(Function::new(
26783 "DATE_DIFF".to_string(),
26784 vec![
26785 Expression::Literal(Literal::String(unit_str)),
26786 cast_d1,
26787 cast_d2,
26788 ],
26789 ))));
26790 }
26791
26792 if matches!(target, DialectType::BigQuery) {
26793 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
26794 let cast_d1 = Self::ensure_cast_datetime(arg1);
26795 let cast_d2 = Self::ensure_cast_datetime(arg2);
26796 let unit = Expression::Identifier(Identifier::new(unit_str));
26797 return Ok(Expression::Function(Box::new(Function::new(
26798 "DATE_DIFF".to_string(),
26799 vec![cast_d2, cast_d1, unit],
26800 ))));
26801 }
26802
26803 if matches!(target, DialectType::Spark | DialectType::Databricks) {
26804 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
26805 let unit = Expression::Identifier(Identifier::new(unit_str));
26806 return Ok(Expression::Function(Box::new(Function::new(
26807 "DATEDIFF".to_string(),
26808 vec![unit, arg1, arg2],
26809 ))));
26810 }
26811
26812 if matches!(target, DialectType::Hive) {
26813 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
26814 match unit_str.as_str() {
26815 "MONTH" => {
26816 return Ok(Expression::Function(Box::new(Function::new(
26817 "CAST".to_string(),
26818 vec![Expression::Function(Box::new(Function::new(
26819 "MONTHS_BETWEEN".to_string(),
26820 vec![arg2, arg1],
26821 )))],
26822 ))));
26823 }
26824 "WEEK" => {
26825 return Ok(Expression::Cast(Box::new(Cast {
26826 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
26827 Expression::Function(Box::new(Function::new(
26828 "DATEDIFF".to_string(),
26829 vec![arg2, arg1],
26830 ))),
26831 Expression::Literal(Literal::Number("7".to_string())),
26832 ))),
26833 to: DataType::Int {
26834 length: None,
26835 integer_spelling: false,
26836 },
26837 trailing_comments: vec![],
26838 double_colon_syntax: false,
26839 format: None,
26840 default: None,
26841 inferred_type: None,
26842 })));
26843 }
26844 _ => {
26845 // Default: DATEDIFF(end, start) for DAY
26846 return Ok(Expression::Function(Box::new(Function::new(
26847 "DATEDIFF".to_string(),
26848 vec![arg2, arg1],
26849 ))));
26850 }
26851 }
26852 }
26853
26854 if matches!(
26855 target,
26856 DialectType::Presto | DialectType::Trino | DialectType::Athena
26857 ) {
26858 // Presto/Trino: DATE_DIFF('UNIT', start, end)
26859 return Ok(Expression::Function(Box::new(Function::new(
26860 "DATE_DIFF".to_string(),
26861 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
26862 ))));
26863 }
26864
26865 if matches!(target, DialectType::TSQL) {
26866 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
26867 let cast_d2 = Self::ensure_cast_datetime2(arg2);
26868 let unit = Expression::Identifier(Identifier::new(unit_str));
26869 return Ok(Expression::Function(Box::new(Function::new(
26870 "DATEDIFF".to_string(),
26871 vec![unit, arg1, cast_d2],
26872 ))));
26873 }
26874
26875 if matches!(target, DialectType::PostgreSQL) {
26876 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
26877 // For now, use DATEDIFF (passthrough) with uppercased unit
26878 let unit = Expression::Identifier(Identifier::new(unit_str));
26879 return Ok(Expression::Function(Box::new(Function::new(
26880 "DATEDIFF".to_string(),
26881 vec![unit, arg1, arg2],
26882 ))));
26883 }
26884
26885 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
26886 let unit = Expression::Identifier(Identifier::new(unit_str));
26887 Ok(Expression::Function(Box::new(Function::new(
26888 "DATEDIFF".to_string(),
26889 vec![unit, arg1, arg2],
26890 ))))
26891 }
26892
26893 // DATE_DIFF(date1, date2, unit) -> standard form
26894 "DATE_DIFF" if args.len() == 3 => {
26895 let date1 = args.remove(0);
26896 let date2 = args.remove(0);
26897 let unit_expr = args.remove(0);
26898 let unit_str = get_unit_str(&unit_expr);
26899
26900 if matches!(target, DialectType::BigQuery) {
26901 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
26902 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
26903 "WEEK".to_string()
26904 } else {
26905 unit_str
26906 };
26907 let norm_d1 = Self::date_literal_to_cast(date1);
26908 let norm_d2 = Self::date_literal_to_cast(date2);
26909 let unit = Expression::Identifier(Identifier::new(norm_unit));
26910 return Ok(Expression::Function(Box::new(Function::new(
26911 f.name,
26912 vec![norm_d1, norm_d2, unit],
26913 ))));
26914 }
26915
26916 if matches!(target, DialectType::MySQL) {
26917 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
26918 let norm_d1 = Self::date_literal_to_cast(date1);
26919 let norm_d2 = Self::date_literal_to_cast(date2);
26920 return Ok(Expression::Function(Box::new(Function::new(
26921 "DATEDIFF".to_string(),
26922 vec![norm_d1, norm_d2],
26923 ))));
26924 }
26925
26926 if matches!(target, DialectType::StarRocks) {
26927 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
26928 let norm_d1 = Self::date_literal_to_cast(date1);
26929 let norm_d2 = Self::date_literal_to_cast(date2);
26930 return Ok(Expression::Function(Box::new(Function::new(
26931 "DATE_DIFF".to_string(),
26932 vec![
26933 Expression::Literal(Literal::String(unit_str)),
26934 norm_d1,
26935 norm_d2,
26936 ],
26937 ))));
26938 }
26939
26940 if matches!(target, DialectType::DuckDB) {
26941 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
26942 let norm_d1 = Self::ensure_cast_date(date1);
26943 let norm_d2 = Self::ensure_cast_date(date2);
26944
26945 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
26946 let is_week_variant = unit_str == "WEEK"
26947 || unit_str.starts_with("WEEK(")
26948 || unit_str == "ISOWEEK";
26949 if is_week_variant {
26950 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
26951 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
26952 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
26953 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
26954 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
26955 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
26956 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
26957 Some("1") // Shift Sunday to Monday alignment
26958 } else if unit_str == "WEEK(SATURDAY)" {
26959 Some("-5")
26960 } else if unit_str == "WEEK(TUESDAY)" {
26961 Some("-1")
26962 } else if unit_str == "WEEK(WEDNESDAY)" {
26963 Some("-2")
26964 } else if unit_str == "WEEK(THURSDAY)" {
26965 Some("-3")
26966 } else if unit_str == "WEEK(FRIDAY)" {
26967 Some("-4")
26968 } else {
26969 Some("1") // default to Sunday
26970 };
26971
26972 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
26973 let shifted = if let Some(off) = offset {
26974 let interval =
26975 Expression::Interval(Box::new(crate::expressions::Interval {
26976 this: Some(Expression::Literal(Literal::String(
26977 off.to_string(),
26978 ))),
26979 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26980 unit: crate::expressions::IntervalUnit::Day,
26981 use_plural: false,
26982 }),
26983 }));
26984 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26985 date, interval,
26986 )))
26987 } else {
26988 date
26989 };
26990 Expression::Function(Box::new(Function::new(
26991 "DATE_TRUNC".to_string(),
26992 vec![
26993 Expression::Literal(Literal::String("WEEK".to_string())),
26994 shifted,
26995 ],
26996 )))
26997 };
26998
26999 let trunc_d2 = make_trunc(norm_d2, day_offset);
27000 let trunc_d1 = make_trunc(norm_d1, day_offset);
27001 return Ok(Expression::Function(Box::new(Function::new(
27002 "DATE_DIFF".to_string(),
27003 vec![
27004 Expression::Literal(Literal::String("WEEK".to_string())),
27005 trunc_d2,
27006 trunc_d1,
27007 ],
27008 ))));
27009 }
27010
27011 return Ok(Expression::Function(Box::new(Function::new(
27012 "DATE_DIFF".to_string(),
27013 vec![
27014 Expression::Literal(Literal::String(unit_str)),
27015 norm_d2,
27016 norm_d1,
27017 ],
27018 ))));
27019 }
27020
27021 // Default: DATEDIFF(unit, date2, date1)
27022 let unit = Expression::Identifier(Identifier::new(unit_str));
27023 Ok(Expression::Function(Box::new(Function::new(
27024 "DATEDIFF".to_string(),
27025 vec![unit, date2, date1],
27026 ))))
27027 }
27028
27029 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
27030 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
27031 let ts = args.remove(0);
27032 let interval_expr = args.remove(0);
27033 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27034
27035 match target {
27036 DialectType::Snowflake => {
27037 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
27038 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
27039 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
27040 let unit_str = Self::interval_unit_to_string(&unit);
27041 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
27042 Ok(Expression::TimestampAdd(Box::new(
27043 crate::expressions::TimestampAdd {
27044 this: Box::new(val),
27045 expression: Box::new(cast_ts),
27046 unit: Some(unit_str),
27047 },
27048 )))
27049 }
27050 DialectType::Spark | DialectType::Databricks => {
27051 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
27052 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
27053 let interval =
27054 Expression::Interval(Box::new(crate::expressions::Interval {
27055 this: Some(val),
27056 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27057 unit,
27058 use_plural: false,
27059 }),
27060 }));
27061 Ok(Expression::Add(Box::new(
27062 crate::expressions::BinaryOp::new(ts, interval),
27063 )))
27064 } else if name == "DATETIME_ADD"
27065 && matches!(target, DialectType::Databricks)
27066 {
27067 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
27068 let unit_str = Self::interval_unit_to_string(&unit);
27069 Ok(Expression::Function(Box::new(Function::new(
27070 "TIMESTAMPADD".to_string(),
27071 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
27072 ))))
27073 } else {
27074 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
27075 let unit_str = Self::interval_unit_to_string(&unit);
27076 let cast_ts =
27077 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
27078 Self::maybe_cast_ts(ts)
27079 } else {
27080 ts
27081 };
27082 Ok(Expression::Function(Box::new(Function::new(
27083 "DATE_ADD".to_string(),
27084 vec![
27085 Expression::Identifier(Identifier::new(unit_str)),
27086 val,
27087 cast_ts,
27088 ],
27089 ))))
27090 }
27091 }
27092 DialectType::MySQL => {
27093 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
27094 let mysql_ts = if name.starts_with("TIMESTAMP") {
27095 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
27096 match &ts {
27097 Expression::Function(ref inner_f)
27098 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
27099 {
27100 // Already wrapped, keep as-is
27101 ts
27102 }
27103 _ => {
27104 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
27105 let unwrapped = match ts {
27106 Expression::Literal(Literal::Timestamp(s)) => {
27107 Expression::Literal(Literal::String(s))
27108 }
27109 other => other,
27110 };
27111 Expression::Function(Box::new(Function::new(
27112 "TIMESTAMP".to_string(),
27113 vec![unwrapped],
27114 )))
27115 }
27116 }
27117 } else {
27118 ts
27119 };
27120 Ok(Expression::DateAdd(Box::new(
27121 crate::expressions::DateAddFunc {
27122 this: mysql_ts,
27123 interval: val,
27124 unit,
27125 },
27126 )))
27127 }
27128 _ => {
27129 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
27130 let cast_ts = if matches!(target, DialectType::DuckDB) {
27131 if name == "DATETIME_ADD" {
27132 Self::ensure_cast_timestamp(ts)
27133 } else if name.starts_with("TIMESTAMP") {
27134 Self::maybe_cast_ts_to_tz(ts, &name)
27135 } else {
27136 ts
27137 }
27138 } else {
27139 ts
27140 };
27141 Ok(Expression::DateAdd(Box::new(
27142 crate::expressions::DateAddFunc {
27143 this: cast_ts,
27144 interval: val,
27145 unit,
27146 },
27147 )))
27148 }
27149 }
27150 }
27151
27152 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
27153 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
27154 let ts = args.remove(0);
27155 let interval_expr = args.remove(0);
27156 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27157
27158 match target {
27159 DialectType::Snowflake => {
27160 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
27161 let unit_str = Self::interval_unit_to_string(&unit);
27162 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
27163 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27164 val,
27165 Expression::Neg(Box::new(crate::expressions::UnaryOp {
27166 this: Expression::number(1),
27167 inferred_type: None,
27168 })),
27169 )));
27170 Ok(Expression::TimestampAdd(Box::new(
27171 crate::expressions::TimestampAdd {
27172 this: Box::new(neg_val),
27173 expression: Box::new(cast_ts),
27174 unit: Some(unit_str),
27175 },
27176 )))
27177 }
27178 DialectType::Spark | DialectType::Databricks => {
27179 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
27180 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
27181 {
27182 // Spark: ts - INTERVAL val UNIT
27183 let cast_ts = if name.starts_with("TIMESTAMP") {
27184 Self::maybe_cast_ts(ts)
27185 } else {
27186 ts
27187 };
27188 let interval =
27189 Expression::Interval(Box::new(crate::expressions::Interval {
27190 this: Some(val),
27191 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27192 unit,
27193 use_plural: false,
27194 }),
27195 }));
27196 Ok(Expression::Sub(Box::new(
27197 crate::expressions::BinaryOp::new(cast_ts, interval),
27198 )))
27199 } else {
27200 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
27201 let unit_str = Self::interval_unit_to_string(&unit);
27202 let neg_val =
27203 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27204 val,
27205 Expression::Neg(Box::new(crate::expressions::UnaryOp {
27206 this: Expression::number(1),
27207 inferred_type: None,
27208 })),
27209 )));
27210 Ok(Expression::Function(Box::new(Function::new(
27211 "TIMESTAMPADD".to_string(),
27212 vec![
27213 Expression::Identifier(Identifier::new(unit_str)),
27214 neg_val,
27215 ts,
27216 ],
27217 ))))
27218 }
27219 }
27220 DialectType::MySQL => {
27221 let mysql_ts = if name.starts_with("TIMESTAMP") {
27222 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
27223 match &ts {
27224 Expression::Function(ref inner_f)
27225 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
27226 {
27227 // Already wrapped, keep as-is
27228 ts
27229 }
27230 _ => {
27231 let unwrapped = match ts {
27232 Expression::Literal(Literal::Timestamp(s)) => {
27233 Expression::Literal(Literal::String(s))
27234 }
27235 other => other,
27236 };
27237 Expression::Function(Box::new(Function::new(
27238 "TIMESTAMP".to_string(),
27239 vec![unwrapped],
27240 )))
27241 }
27242 }
27243 } else {
27244 ts
27245 };
27246 Ok(Expression::DateSub(Box::new(
27247 crate::expressions::DateAddFunc {
27248 this: mysql_ts,
27249 interval: val,
27250 unit,
27251 },
27252 )))
27253 }
27254 _ => {
27255 let cast_ts = if matches!(target, DialectType::DuckDB) {
27256 if name == "DATETIME_SUB" {
27257 Self::ensure_cast_timestamp(ts)
27258 } else if name.starts_with("TIMESTAMP") {
27259 Self::maybe_cast_ts_to_tz(ts, &name)
27260 } else {
27261 ts
27262 }
27263 } else {
27264 ts
27265 };
27266 Ok(Expression::DateSub(Box::new(
27267 crate::expressions::DateAddFunc {
27268 this: cast_ts,
27269 interval: val,
27270 unit,
27271 },
27272 )))
27273 }
27274 }
27275 }
27276
27277 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
27278 "DATE_SUB" if args.len() == 2 => {
27279 let date = args.remove(0);
27280 let interval_expr = args.remove(0);
27281 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27282
27283 match target {
27284 DialectType::Databricks | DialectType::Spark => {
27285 // Databricks/Spark: DATE_ADD(date, -val)
27286 // Use DateAdd expression with negative val so it generates correctly
27287 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
27288 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
27289 // Instead, we directly output as a simple negated DateSub
27290 Ok(Expression::DateSub(Box::new(
27291 crate::expressions::DateAddFunc {
27292 this: date,
27293 interval: val,
27294 unit,
27295 },
27296 )))
27297 }
27298 DialectType::DuckDB => {
27299 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
27300 let cast_date = Self::ensure_cast_date(date);
27301 let interval =
27302 Expression::Interval(Box::new(crate::expressions::Interval {
27303 this: Some(val),
27304 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27305 unit,
27306 use_plural: false,
27307 }),
27308 }));
27309 Ok(Expression::Sub(Box::new(
27310 crate::expressions::BinaryOp::new(cast_date, interval),
27311 )))
27312 }
27313 DialectType::Snowflake => {
27314 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
27315 // Just ensure the date is cast properly
27316 let cast_date = Self::ensure_cast_date(date);
27317 Ok(Expression::DateSub(Box::new(
27318 crate::expressions::DateAddFunc {
27319 this: cast_date,
27320 interval: val,
27321 unit,
27322 },
27323 )))
27324 }
27325 DialectType::PostgreSQL => {
27326 // PostgreSQL: date - INTERVAL 'val UNIT'
27327 let unit_str = Self::interval_unit_to_string(&unit);
27328 let interval =
27329 Expression::Interval(Box::new(crate::expressions::Interval {
27330 this: Some(Expression::Literal(Literal::String(format!(
27331 "{} {}",
27332 Self::expr_to_string(&val),
27333 unit_str
27334 )))),
27335 unit: None,
27336 }));
27337 Ok(Expression::Sub(Box::new(
27338 crate::expressions::BinaryOp::new(date, interval),
27339 )))
27340 }
27341 _ => Ok(Expression::DateSub(Box::new(
27342 crate::expressions::DateAddFunc {
27343 this: date,
27344 interval: val,
27345 unit,
27346 },
27347 ))),
27348 }
27349 }
27350
27351 // DATEADD(unit, val, date) -> target-specific form
27352 // Used by: Redshift, Snowflake, TSQL, ClickHouse
27353 "DATEADD" if args.len() == 3 => {
27354 let arg0 = args.remove(0);
27355 let arg1 = args.remove(0);
27356 let arg2 = args.remove(0);
27357 let unit_str = get_unit_str(&arg0);
27358
27359 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
27360 // Keep DATEADD(UNIT, val, date) with uppercased unit
27361 let unit = Expression::Identifier(Identifier::new(unit_str));
27362 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
27363 let date = if matches!(target, DialectType::TSQL)
27364 && !matches!(
27365 source,
27366 DialectType::Spark | DialectType::Databricks | DialectType::Hive
27367 ) {
27368 Self::ensure_cast_datetime2(arg2)
27369 } else {
27370 arg2
27371 };
27372 return Ok(Expression::Function(Box::new(Function::new(
27373 "DATEADD".to_string(),
27374 vec![unit, arg1, date],
27375 ))));
27376 }
27377
27378 if matches!(target, DialectType::DuckDB) {
27379 // DuckDB: date + INTERVAL 'val' UNIT
27380 let iu = parse_interval_unit(&unit_str);
27381 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27382 this: Some(arg1),
27383 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27384 unit: iu,
27385 use_plural: false,
27386 }),
27387 }));
27388 let cast_date = Self::ensure_cast_timestamp(arg2);
27389 return Ok(Expression::Add(Box::new(
27390 crate::expressions::BinaryOp::new(cast_date, interval),
27391 )));
27392 }
27393
27394 if matches!(target, DialectType::BigQuery) {
27395 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
27396 let iu = parse_interval_unit(&unit_str);
27397 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27398 this: Some(arg1),
27399 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27400 unit: iu,
27401 use_plural: false,
27402 }),
27403 }));
27404 return Ok(Expression::Function(Box::new(Function::new(
27405 "DATE_ADD".to_string(),
27406 vec![arg2, interval],
27407 ))));
27408 }
27409
27410 if matches!(target, DialectType::Databricks) {
27411 // Databricks: keep DATEADD(UNIT, val, date) format
27412 let unit = Expression::Identifier(Identifier::new(unit_str));
27413 return Ok(Expression::Function(Box::new(Function::new(
27414 "DATEADD".to_string(),
27415 vec![unit, arg1, arg2],
27416 ))));
27417 }
27418
27419 if matches!(target, DialectType::Spark) {
27420 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
27421 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
27422 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
27423 if let Ok(val) = n.parse::<i64>() {
27424 return Expression::Literal(crate::expressions::Literal::Number(
27425 (val * factor).to_string(),
27426 ));
27427 }
27428 }
27429 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
27430 expr,
27431 Expression::Literal(crate::expressions::Literal::Number(
27432 factor.to_string(),
27433 )),
27434 )))
27435 }
27436 match unit_str.as_str() {
27437 "YEAR" => {
27438 let months = multiply_expr_dateadd(arg1, 12);
27439 return Ok(Expression::Function(Box::new(Function::new(
27440 "ADD_MONTHS".to_string(),
27441 vec![arg2, months],
27442 ))));
27443 }
27444 "QUARTER" => {
27445 let months = multiply_expr_dateadd(arg1, 3);
27446 return Ok(Expression::Function(Box::new(Function::new(
27447 "ADD_MONTHS".to_string(),
27448 vec![arg2, months],
27449 ))));
27450 }
27451 "MONTH" => {
27452 return Ok(Expression::Function(Box::new(Function::new(
27453 "ADD_MONTHS".to_string(),
27454 vec![arg2, arg1],
27455 ))));
27456 }
27457 "WEEK" => {
27458 let days = multiply_expr_dateadd(arg1, 7);
27459 return Ok(Expression::Function(Box::new(Function::new(
27460 "DATE_ADD".to_string(),
27461 vec![arg2, days],
27462 ))));
27463 }
27464 "DAY" => {
27465 return Ok(Expression::Function(Box::new(Function::new(
27466 "DATE_ADD".to_string(),
27467 vec![arg2, arg1],
27468 ))));
27469 }
27470 _ => {
27471 let unit = Expression::Identifier(Identifier::new(unit_str));
27472 return Ok(Expression::Function(Box::new(Function::new(
27473 "DATE_ADD".to_string(),
27474 vec![unit, arg1, arg2],
27475 ))));
27476 }
27477 }
27478 }
27479
27480 if matches!(target, DialectType::Hive) {
27481 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
27482 match unit_str.as_str() {
27483 "DAY" => {
27484 return Ok(Expression::Function(Box::new(Function::new(
27485 "DATE_ADD".to_string(),
27486 vec![arg2, arg1],
27487 ))));
27488 }
27489 "MONTH" => {
27490 return Ok(Expression::Function(Box::new(Function::new(
27491 "ADD_MONTHS".to_string(),
27492 vec![arg2, arg1],
27493 ))));
27494 }
27495 _ => {
27496 let iu = parse_interval_unit(&unit_str);
27497 let interval =
27498 Expression::Interval(Box::new(crate::expressions::Interval {
27499 this: Some(arg1),
27500 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27501 unit: iu,
27502 use_plural: false,
27503 }),
27504 }));
27505 return Ok(Expression::Add(Box::new(
27506 crate::expressions::BinaryOp::new(arg2, interval),
27507 )));
27508 }
27509 }
27510 }
27511
27512 if matches!(target, DialectType::PostgreSQL) {
27513 // PostgreSQL: date + INTERVAL 'val UNIT'
27514 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27515 this: Some(Expression::Literal(Literal::String(format!(
27516 "{} {}",
27517 Self::expr_to_string(&arg1),
27518 unit_str
27519 )))),
27520 unit: None,
27521 }));
27522 return Ok(Expression::Add(Box::new(
27523 crate::expressions::BinaryOp::new(arg2, interval),
27524 )));
27525 }
27526
27527 if matches!(
27528 target,
27529 DialectType::Presto | DialectType::Trino | DialectType::Athena
27530 ) {
27531 // Presto/Trino: DATE_ADD('UNIT', val, date)
27532 return Ok(Expression::Function(Box::new(Function::new(
27533 "DATE_ADD".to_string(),
27534 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27535 ))));
27536 }
27537
27538 if matches!(target, DialectType::ClickHouse) {
27539 // ClickHouse: DATE_ADD(UNIT, val, date)
27540 let unit = Expression::Identifier(Identifier::new(unit_str));
27541 return Ok(Expression::Function(Box::new(Function::new(
27542 "DATE_ADD".to_string(),
27543 vec![unit, arg1, arg2],
27544 ))));
27545 }
27546
27547 // Default: keep DATEADD with uppercased unit
27548 let unit = Expression::Identifier(Identifier::new(unit_str));
27549 Ok(Expression::Function(Box::new(Function::new(
27550 "DATEADD".to_string(),
27551 vec![unit, arg1, arg2],
27552 ))))
27553 }
27554
27555 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
27556 "DATE_ADD" if args.len() == 3 => {
27557 let arg0 = args.remove(0);
27558 let arg1 = args.remove(0);
27559 let arg2 = args.remove(0);
27560 let unit_str = get_unit_str(&arg0);
27561
27562 if matches!(
27563 target,
27564 DialectType::Presto | DialectType::Trino | DialectType::Athena
27565 ) {
27566 // Presto/Trino: DATE_ADD('UNIT', val, date)
27567 return Ok(Expression::Function(Box::new(Function::new(
27568 "DATE_ADD".to_string(),
27569 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
27570 ))));
27571 }
27572
27573 if matches!(
27574 target,
27575 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
27576 ) {
27577 // DATEADD(UNIT, val, date)
27578 let unit = Expression::Identifier(Identifier::new(unit_str));
27579 let date = if matches!(target, DialectType::TSQL) {
27580 Self::ensure_cast_datetime2(arg2)
27581 } else {
27582 arg2
27583 };
27584 return Ok(Expression::Function(Box::new(Function::new(
27585 "DATEADD".to_string(),
27586 vec![unit, arg1, date],
27587 ))));
27588 }
27589
27590 if matches!(target, DialectType::DuckDB) {
27591 // DuckDB: date + INTERVAL val UNIT
27592 let iu = parse_interval_unit(&unit_str);
27593 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27594 this: Some(arg1),
27595 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27596 unit: iu,
27597 use_plural: false,
27598 }),
27599 }));
27600 return Ok(Expression::Add(Box::new(
27601 crate::expressions::BinaryOp::new(arg2, interval),
27602 )));
27603 }
27604
27605 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27606 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
27607 let unit = Expression::Identifier(Identifier::new(unit_str));
27608 return Ok(Expression::Function(Box::new(Function::new(
27609 "DATE_ADD".to_string(),
27610 vec![unit, arg1, arg2],
27611 ))));
27612 }
27613
27614 // Default: DATE_ADD(UNIT, val, date)
27615 let unit = Expression::Identifier(Identifier::new(unit_str));
27616 Ok(Expression::Function(Box::new(Function::new(
27617 "DATE_ADD".to_string(),
27618 vec![unit, arg1, arg2],
27619 ))))
27620 }
27621
27622 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
27623 "DATE_ADD" if args.len() == 2 => {
27624 let date = args.remove(0);
27625 let interval_expr = args.remove(0);
27626 let (val, unit) = Self::extract_interval_parts(&interval_expr);
27627 let unit_str = Self::interval_unit_to_string(&unit);
27628
27629 match target {
27630 DialectType::DuckDB => {
27631 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
27632 let cast_date = Self::ensure_cast_date(date);
27633 let quoted_val = Self::quote_interval_val(&val);
27634 let interval =
27635 Expression::Interval(Box::new(crate::expressions::Interval {
27636 this: Some(quoted_val),
27637 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27638 unit,
27639 use_plural: false,
27640 }),
27641 }));
27642 Ok(Expression::Add(Box::new(
27643 crate::expressions::BinaryOp::new(cast_date, interval),
27644 )))
27645 }
27646 DialectType::PostgreSQL => {
27647 // PostgreSQL: date + INTERVAL 'val UNIT'
27648 let interval =
27649 Expression::Interval(Box::new(crate::expressions::Interval {
27650 this: Some(Expression::Literal(Literal::String(format!(
27651 "{} {}",
27652 Self::expr_to_string(&val),
27653 unit_str
27654 )))),
27655 unit: None,
27656 }));
27657 Ok(Expression::Add(Box::new(
27658 crate::expressions::BinaryOp::new(date, interval),
27659 )))
27660 }
27661 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27662 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
27663 let val_str = Self::expr_to_string(&val);
27664 Ok(Expression::Function(Box::new(Function::new(
27665 "DATE_ADD".to_string(),
27666 vec![
27667 Expression::Literal(Literal::String(unit_str)),
27668 Expression::Cast(Box::new(Cast {
27669 this: Expression::Literal(Literal::String(val_str)),
27670 to: DataType::BigInt { length: None },
27671 trailing_comments: vec![],
27672 double_colon_syntax: false,
27673 format: None,
27674 default: None,
27675 inferred_type: None,
27676 })),
27677 date,
27678 ],
27679 ))))
27680 }
27681 DialectType::Spark | DialectType::Hive => {
27682 // Spark/Hive: DATE_ADD(date, val) for DAY
27683 match unit_str.as_str() {
27684 "DAY" => Ok(Expression::Function(Box::new(Function::new(
27685 "DATE_ADD".to_string(),
27686 vec![date, val],
27687 )))),
27688 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
27689 "ADD_MONTHS".to_string(),
27690 vec![date, val],
27691 )))),
27692 _ => {
27693 let iu = parse_interval_unit(&unit_str);
27694 let interval =
27695 Expression::Interval(Box::new(crate::expressions::Interval {
27696 this: Some(val),
27697 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27698 unit: iu,
27699 use_plural: false,
27700 }),
27701 }));
27702 Ok(Expression::Function(Box::new(Function::new(
27703 "DATE_ADD".to_string(),
27704 vec![date, interval],
27705 ))))
27706 }
27707 }
27708 }
27709 DialectType::Snowflake => {
27710 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
27711 let cast_date = Self::ensure_cast_date(date);
27712 let val_str = Self::expr_to_string(&val);
27713 Ok(Expression::Function(Box::new(Function::new(
27714 "DATEADD".to_string(),
27715 vec![
27716 Expression::Identifier(Identifier::new(unit_str)),
27717 Expression::Literal(Literal::String(val_str)),
27718 cast_date,
27719 ],
27720 ))))
27721 }
27722 DialectType::TSQL | DialectType::Fabric => {
27723 let cast_date = Self::ensure_cast_datetime2(date);
27724 Ok(Expression::Function(Box::new(Function::new(
27725 "DATEADD".to_string(),
27726 vec![
27727 Expression::Identifier(Identifier::new(unit_str)),
27728 val,
27729 cast_date,
27730 ],
27731 ))))
27732 }
27733 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
27734 "DATEADD".to_string(),
27735 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27736 )))),
27737 DialectType::MySQL => {
27738 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
27739 let quoted_val = Self::quote_interval_val(&val);
27740 let iu = parse_interval_unit(&unit_str);
27741 let interval =
27742 Expression::Interval(Box::new(crate::expressions::Interval {
27743 this: Some(quoted_val),
27744 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27745 unit: iu,
27746 use_plural: false,
27747 }),
27748 }));
27749 Ok(Expression::Function(Box::new(Function::new(
27750 "DATE_ADD".to_string(),
27751 vec![date, interval],
27752 ))))
27753 }
27754 DialectType::BigQuery => {
27755 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
27756 let quoted_val = Self::quote_interval_val(&val);
27757 let iu = parse_interval_unit(&unit_str);
27758 let interval =
27759 Expression::Interval(Box::new(crate::expressions::Interval {
27760 this: Some(quoted_val),
27761 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27762 unit: iu,
27763 use_plural: false,
27764 }),
27765 }));
27766 Ok(Expression::Function(Box::new(Function::new(
27767 "DATE_ADD".to_string(),
27768 vec![date, interval],
27769 ))))
27770 }
27771 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
27772 "DATEADD".to_string(),
27773 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
27774 )))),
27775 _ => {
27776 // Default: keep as DATE_ADD with decomposed interval
27777 Ok(Expression::DateAdd(Box::new(
27778 crate::expressions::DateAddFunc {
27779 this: date,
27780 interval: val,
27781 unit,
27782 },
27783 )))
27784 }
27785 }
27786 }
27787
27788 // ADD_MONTHS(date, val) -> target-specific form
27789 "ADD_MONTHS" if args.len() == 2 => {
27790 let date = args.remove(0);
27791 let val = args.remove(0);
27792
27793 if matches!(target, DialectType::TSQL) {
27794 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
27795 let cast_date = Self::ensure_cast_datetime2(date);
27796 return Ok(Expression::Function(Box::new(Function::new(
27797 "DATEADD".to_string(),
27798 vec![
27799 Expression::Identifier(Identifier::new("MONTH")),
27800 val,
27801 cast_date,
27802 ],
27803 ))));
27804 }
27805
27806 if matches!(target, DialectType::DuckDB) {
27807 // DuckDB: date + INTERVAL val MONTH
27808 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
27809 this: Some(val),
27810 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27811 unit: crate::expressions::IntervalUnit::Month,
27812 use_plural: false,
27813 }),
27814 }));
27815 return Ok(Expression::Add(Box::new(
27816 crate::expressions::BinaryOp::new(date, interval),
27817 )));
27818 }
27819
27820 if matches!(target, DialectType::Snowflake) {
27821 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
27822 if matches!(source, DialectType::Snowflake) {
27823 return Ok(Expression::Function(Box::new(Function::new(
27824 "ADD_MONTHS".to_string(),
27825 vec![date, val],
27826 ))));
27827 }
27828 return Ok(Expression::Function(Box::new(Function::new(
27829 "DATEADD".to_string(),
27830 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
27831 ))));
27832 }
27833
27834 if matches!(target, DialectType::Spark | DialectType::Databricks) {
27835 // Spark: ADD_MONTHS(date, val) - keep as is
27836 return Ok(Expression::Function(Box::new(Function::new(
27837 "ADD_MONTHS".to_string(),
27838 vec![date, val],
27839 ))));
27840 }
27841
27842 if matches!(target, DialectType::Hive) {
27843 return Ok(Expression::Function(Box::new(Function::new(
27844 "ADD_MONTHS".to_string(),
27845 vec![date, val],
27846 ))));
27847 }
27848
27849 if matches!(
27850 target,
27851 DialectType::Presto | DialectType::Trino | DialectType::Athena
27852 ) {
27853 // Presto: DATE_ADD('MONTH', val, date)
27854 return Ok(Expression::Function(Box::new(Function::new(
27855 "DATE_ADD".to_string(),
27856 vec![
27857 Expression::Literal(Literal::String("MONTH".to_string())),
27858 val,
27859 date,
27860 ],
27861 ))));
27862 }
27863
27864 // Default: keep ADD_MONTHS
27865 Ok(Expression::Function(Box::new(Function::new(
27866 "ADD_MONTHS".to_string(),
27867 vec![date, val],
27868 ))))
27869 }
27870
27871 // SAFE_DIVIDE(x, y) -> target-specific form directly
27872 "SAFE_DIVIDE" if args.len() == 2 => {
27873 let x = args.remove(0);
27874 let y = args.remove(0);
27875 // Wrap x and y in parens if they're complex expressions
27876 let y_ref = match &y {
27877 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27878 y.clone()
27879 }
27880 _ => Expression::Paren(Box::new(Paren {
27881 this: y.clone(),
27882 trailing_comments: vec![],
27883 })),
27884 };
27885 let x_ref = match &x {
27886 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
27887 x.clone()
27888 }
27889 _ => Expression::Paren(Box::new(Paren {
27890 this: x.clone(),
27891 trailing_comments: vec![],
27892 })),
27893 };
27894 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
27895 y_ref.clone(),
27896 Expression::number(0),
27897 )));
27898 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27899 x_ref.clone(),
27900 y_ref.clone(),
27901 )));
27902
27903 match target {
27904 DialectType::DuckDB | DialectType::PostgreSQL => {
27905 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
27906 let result_div = if matches!(target, DialectType::PostgreSQL) {
27907 let cast_x = Expression::Cast(Box::new(Cast {
27908 this: x_ref,
27909 to: DataType::Custom {
27910 name: "DOUBLE PRECISION".to_string(),
27911 },
27912 trailing_comments: vec![],
27913 double_colon_syntax: false,
27914 format: None,
27915 default: None,
27916 inferred_type: None,
27917 }));
27918 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
27919 cast_x, y_ref,
27920 )))
27921 } else {
27922 div_expr
27923 };
27924 Ok(Expression::Case(Box::new(crate::expressions::Case {
27925 operand: None,
27926 whens: vec![(condition, result_div)],
27927 else_: Some(Expression::Null(crate::expressions::Null)),
27928 comments: Vec::new(),
27929 inferred_type: None,
27930 })))
27931 }
27932 DialectType::Snowflake => {
27933 // IFF(y <> 0, x / y, NULL)
27934 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27935 condition,
27936 true_value: div_expr,
27937 false_value: Some(Expression::Null(crate::expressions::Null)),
27938 original_name: Some("IFF".to_string()),
27939 })))
27940 }
27941 DialectType::Presto | DialectType::Trino => {
27942 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
27943 let cast_x = Expression::Cast(Box::new(Cast {
27944 this: x_ref,
27945 to: DataType::Double {
27946 precision: None,
27947 scale: None,
27948 },
27949 trailing_comments: vec![],
27950 double_colon_syntax: false,
27951 format: None,
27952 default: None,
27953 inferred_type: None,
27954 }));
27955 let cast_div = Expression::Div(Box::new(
27956 crate::expressions::BinaryOp::new(cast_x, y_ref),
27957 ));
27958 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27959 condition,
27960 true_value: cast_div,
27961 false_value: Some(Expression::Null(crate::expressions::Null)),
27962 original_name: None,
27963 })))
27964 }
27965 _ => {
27966 // IF(y <> 0, x / y, NULL)
27967 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
27968 condition,
27969 true_value: div_expr,
27970 false_value: Some(Expression::Null(crate::expressions::Null)),
27971 original_name: None,
27972 })))
27973 }
27974 }
27975 }
27976
27977 // GENERATE_UUID() -> UUID() with CAST to string
27978 "GENERATE_UUID" => {
27979 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
27980 this: None,
27981 name: None,
27982 is_string: None,
27983 }));
27984 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
27985 let cast_type = match target {
27986 DialectType::DuckDB => Some(DataType::Text),
27987 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
27988 length: None,
27989 parenthesized_length: false,
27990 }),
27991 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
27992 Some(DataType::String { length: None })
27993 }
27994 _ => None,
27995 };
27996 if let Some(dt) = cast_type {
27997 Ok(Expression::Cast(Box::new(Cast {
27998 this: uuid_expr,
27999 to: dt,
28000 trailing_comments: vec![],
28001 double_colon_syntax: false,
28002 format: None,
28003 default: None,
28004 inferred_type: None,
28005 })))
28006 } else {
28007 Ok(uuid_expr)
28008 }
28009 }
28010
28011 // COUNTIF(x) -> CountIf expression
28012 "COUNTIF" if args.len() == 1 => {
28013 let arg = args.remove(0);
28014 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
28015 this: arg,
28016 distinct: false,
28017 filter: None,
28018 order_by: vec![],
28019 name: None,
28020 ignore_nulls: None,
28021 having_max: None,
28022 limit: None,
28023 inferred_type: None,
28024 })))
28025 }
28026
28027 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
28028 "EDIT_DISTANCE" => {
28029 // Strip named arguments (max_distance => N) and pass as positional
28030 let mut positional_args: Vec<Expression> = vec![];
28031 for arg in args {
28032 match arg {
28033 Expression::NamedArgument(na) => {
28034 positional_args.push(na.value);
28035 }
28036 other => positional_args.push(other),
28037 }
28038 }
28039 if positional_args.len() >= 2 {
28040 let col1 = positional_args.remove(0);
28041 let col2 = positional_args.remove(0);
28042 let levenshtein = crate::expressions::BinaryFunc {
28043 this: col1,
28044 expression: col2,
28045 original_name: None,
28046 inferred_type: None,
28047 };
28048 // Pass extra args through a function wrapper with all args
28049 if !positional_args.is_empty() {
28050 let max_dist = positional_args.remove(0);
28051 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
28052 if matches!(target, DialectType::DuckDB) {
28053 let lev = Expression::Function(Box::new(Function::new(
28054 "LEVENSHTEIN".to_string(),
28055 vec![levenshtein.this, levenshtein.expression],
28056 )));
28057 let lev_is_null =
28058 Expression::IsNull(Box::new(crate::expressions::IsNull {
28059 this: lev.clone(),
28060 not: false,
28061 postfix_form: false,
28062 }));
28063 let max_is_null =
28064 Expression::IsNull(Box::new(crate::expressions::IsNull {
28065 this: max_dist.clone(),
28066 not: false,
28067 postfix_form: false,
28068 }));
28069 let null_check =
28070 Expression::Or(Box::new(crate::expressions::BinaryOp {
28071 left: lev_is_null,
28072 right: max_is_null,
28073 left_comments: Vec::new(),
28074 operator_comments: Vec::new(),
28075 trailing_comments: Vec::new(),
28076 inferred_type: None,
28077 }));
28078 let least =
28079 Expression::Least(Box::new(crate::expressions::VarArgFunc {
28080 expressions: vec![lev, max_dist],
28081 original_name: None,
28082 inferred_type: None,
28083 }));
28084 return Ok(Expression::Case(Box::new(crate::expressions::Case {
28085 operand: None,
28086 whens: vec![(
28087 null_check,
28088 Expression::Null(crate::expressions::Null),
28089 )],
28090 else_: Some(least),
28091 comments: Vec::new(),
28092 inferred_type: None,
28093 })));
28094 }
28095 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
28096 all_args.extend(positional_args);
28097 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
28098 let func_name = if matches!(target, DialectType::PostgreSQL) {
28099 "LEVENSHTEIN_LESS_EQUAL"
28100 } else {
28101 "LEVENSHTEIN"
28102 };
28103 return Ok(Expression::Function(Box::new(Function::new(
28104 func_name.to_string(),
28105 all_args,
28106 ))));
28107 }
28108 Ok(Expression::Levenshtein(Box::new(levenshtein)))
28109 } else {
28110 Ok(Expression::Function(Box::new(Function::new(
28111 "EDIT_DISTANCE".to_string(),
28112 positional_args,
28113 ))))
28114 }
28115 }
28116
28117 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
28118 "TIMESTAMP_SECONDS" if args.len() == 1 => {
28119 let arg = args.remove(0);
28120 Ok(Expression::UnixToTime(Box::new(
28121 crate::expressions::UnixToTime {
28122 this: Box::new(arg),
28123 scale: Some(0),
28124 zone: None,
28125 hours: None,
28126 minutes: None,
28127 format: None,
28128 target_type: None,
28129 },
28130 )))
28131 }
28132
28133 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
28134 "TIMESTAMP_MILLIS" if args.len() == 1 => {
28135 let arg = args.remove(0);
28136 Ok(Expression::UnixToTime(Box::new(
28137 crate::expressions::UnixToTime {
28138 this: Box::new(arg),
28139 scale: Some(3),
28140 zone: None,
28141 hours: None,
28142 minutes: None,
28143 format: None,
28144 target_type: None,
28145 },
28146 )))
28147 }
28148
28149 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
28150 "TIMESTAMP_MICROS" if args.len() == 1 => {
28151 let arg = args.remove(0);
28152 Ok(Expression::UnixToTime(Box::new(
28153 crate::expressions::UnixToTime {
28154 this: Box::new(arg),
28155 scale: Some(6),
28156 zone: None,
28157 hours: None,
28158 minutes: None,
28159 format: None,
28160 target_type: None,
28161 },
28162 )))
28163 }
28164
28165 // DIV(x, y) -> IntDiv expression
28166 "DIV" if args.len() == 2 => {
28167 let x = args.remove(0);
28168 let y = args.remove(0);
28169 Ok(Expression::IntDiv(Box::new(
28170 crate::expressions::BinaryFunc {
28171 this: x,
28172 expression: y,
28173 original_name: None,
28174 inferred_type: None,
28175 },
28176 )))
28177 }
28178
28179 // TO_HEX(x) -> target-specific form
28180 "TO_HEX" if args.len() == 1 => {
28181 let arg = args.remove(0);
28182 // Check if inner function already returns hex string in certain targets
28183 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
28184 if matches!(target, DialectType::BigQuery) {
28185 // BQ->BQ: keep as TO_HEX
28186 Ok(Expression::Function(Box::new(Function::new(
28187 "TO_HEX".to_string(),
28188 vec![arg],
28189 ))))
28190 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
28191 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
28192 Ok(arg)
28193 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
28194 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
28195 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
28196 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
28197 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
28198 if let Expression::Function(ref inner_f) = arg {
28199 let inner_args = inner_f.args.clone();
28200 let binary_func = match inner_f.name.to_uppercase().as_str() {
28201 "SHA1" => Expression::Function(Box::new(Function::new(
28202 "SHA1_BINARY".to_string(),
28203 inner_args,
28204 ))),
28205 "MD5" => Expression::Function(Box::new(Function::new(
28206 "MD5_BINARY".to_string(),
28207 inner_args,
28208 ))),
28209 "SHA256" => {
28210 let mut a = inner_args;
28211 a.push(Expression::number(256));
28212 Expression::Function(Box::new(Function::new(
28213 "SHA2_BINARY".to_string(),
28214 a,
28215 )))
28216 }
28217 "SHA512" => {
28218 let mut a = inner_args;
28219 a.push(Expression::number(512));
28220 Expression::Function(Box::new(Function::new(
28221 "SHA2_BINARY".to_string(),
28222 a,
28223 )))
28224 }
28225 _ => arg.clone(),
28226 };
28227 Ok(Expression::Function(Box::new(Function::new(
28228 "TO_CHAR".to_string(),
28229 vec![binary_func],
28230 ))))
28231 } else {
28232 let inner = Expression::Function(Box::new(Function::new(
28233 "HEX".to_string(),
28234 vec![arg],
28235 )));
28236 Ok(Expression::Lower(Box::new(
28237 crate::expressions::UnaryFunc::new(inner),
28238 )))
28239 }
28240 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
28241 let inner = Expression::Function(Box::new(Function::new(
28242 "TO_HEX".to_string(),
28243 vec![arg],
28244 )));
28245 Ok(Expression::Lower(Box::new(
28246 crate::expressions::UnaryFunc::new(inner),
28247 )))
28248 } else {
28249 let inner =
28250 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
28251 Ok(Expression::Lower(Box::new(
28252 crate::expressions::UnaryFunc::new(inner),
28253 )))
28254 }
28255 }
28256
28257 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
28258 "LAST_DAY" if args.len() == 2 => {
28259 let date = args.remove(0);
28260 let _unit = args.remove(0); // Strip the unit (MONTH is default)
28261 Ok(Expression::Function(Box::new(Function::new(
28262 "LAST_DAY".to_string(),
28263 vec![date],
28264 ))))
28265 }
28266
28267 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
28268 "GENERATE_ARRAY" => {
28269 let start = args.get(0).cloned();
28270 let end = args.get(1).cloned();
28271 let step = args.get(2).cloned();
28272 Ok(Expression::GenerateSeries(Box::new(
28273 crate::expressions::GenerateSeries {
28274 start: start.map(Box::new),
28275 end: end.map(Box::new),
28276 step: step.map(Box::new),
28277 is_end_exclusive: None,
28278 },
28279 )))
28280 }
28281
28282 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
28283 "GENERATE_TIMESTAMP_ARRAY" => {
28284 let start = args.get(0).cloned();
28285 let end = args.get(1).cloned();
28286 let step = args.get(2).cloned();
28287
28288 if matches!(target, DialectType::DuckDB) {
28289 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
28290 // Only cast string literals - leave columns/expressions as-is
28291 let maybe_cast_ts = |expr: Expression| -> Expression {
28292 if matches!(&expr, Expression::Literal(Literal::String(_))) {
28293 Expression::Cast(Box::new(Cast {
28294 this: expr,
28295 to: DataType::Timestamp {
28296 precision: None,
28297 timezone: false,
28298 },
28299 trailing_comments: vec![],
28300 double_colon_syntax: false,
28301 format: None,
28302 default: None,
28303 inferred_type: None,
28304 }))
28305 } else {
28306 expr
28307 }
28308 };
28309 let cast_start = start.map(maybe_cast_ts);
28310 let cast_end = end.map(maybe_cast_ts);
28311 Ok(Expression::GenerateSeries(Box::new(
28312 crate::expressions::GenerateSeries {
28313 start: cast_start.map(Box::new),
28314 end: cast_end.map(Box::new),
28315 step: step.map(Box::new),
28316 is_end_exclusive: None,
28317 },
28318 )))
28319 } else {
28320 Ok(Expression::GenerateSeries(Box::new(
28321 crate::expressions::GenerateSeries {
28322 start: start.map(Box::new),
28323 end: end.map(Box::new),
28324 step: step.map(Box::new),
28325 is_end_exclusive: None,
28326 },
28327 )))
28328 }
28329 }
28330
28331 // TO_JSON(x) -> target-specific (from Spark/Hive)
28332 "TO_JSON" => {
28333 match target {
28334 DialectType::Presto | DialectType::Trino => {
28335 // JSON_FORMAT(CAST(x AS JSON))
28336 let arg = args
28337 .into_iter()
28338 .next()
28339 .unwrap_or(Expression::Null(crate::expressions::Null));
28340 let cast_json = Expression::Cast(Box::new(Cast {
28341 this: arg,
28342 to: DataType::Custom {
28343 name: "JSON".to_string(),
28344 },
28345 trailing_comments: vec![],
28346 double_colon_syntax: false,
28347 format: None,
28348 default: None,
28349 inferred_type: None,
28350 }));
28351 Ok(Expression::Function(Box::new(Function::new(
28352 "JSON_FORMAT".to_string(),
28353 vec![cast_json],
28354 ))))
28355 }
28356 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
28357 "TO_JSON_STRING".to_string(),
28358 args,
28359 )))),
28360 DialectType::DuckDB => {
28361 // CAST(TO_JSON(x) AS TEXT)
28362 let arg = args
28363 .into_iter()
28364 .next()
28365 .unwrap_or(Expression::Null(crate::expressions::Null));
28366 let to_json = Expression::Function(Box::new(Function::new(
28367 "TO_JSON".to_string(),
28368 vec![arg],
28369 )));
28370 Ok(Expression::Cast(Box::new(Cast {
28371 this: to_json,
28372 to: DataType::Text,
28373 trailing_comments: vec![],
28374 double_colon_syntax: false,
28375 format: None,
28376 default: None,
28377 inferred_type: None,
28378 })))
28379 }
28380 _ => Ok(Expression::Function(Box::new(Function::new(
28381 "TO_JSON".to_string(),
28382 args,
28383 )))),
28384 }
28385 }
28386
28387 // TO_JSON_STRING(x) -> target-specific
28388 "TO_JSON_STRING" => {
28389 match target {
28390 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
28391 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
28392 ),
28393 DialectType::Presto | DialectType::Trino => {
28394 // JSON_FORMAT(CAST(x AS JSON))
28395 let arg = args
28396 .into_iter()
28397 .next()
28398 .unwrap_or(Expression::Null(crate::expressions::Null));
28399 let cast_json = Expression::Cast(Box::new(Cast {
28400 this: arg,
28401 to: DataType::Custom {
28402 name: "JSON".to_string(),
28403 },
28404 trailing_comments: vec![],
28405 double_colon_syntax: false,
28406 format: None,
28407 default: None,
28408 inferred_type: None,
28409 }));
28410 Ok(Expression::Function(Box::new(Function::new(
28411 "JSON_FORMAT".to_string(),
28412 vec![cast_json],
28413 ))))
28414 }
28415 DialectType::DuckDB => {
28416 // CAST(TO_JSON(x) AS TEXT)
28417 let arg = args
28418 .into_iter()
28419 .next()
28420 .unwrap_or(Expression::Null(crate::expressions::Null));
28421 let to_json = Expression::Function(Box::new(Function::new(
28422 "TO_JSON".to_string(),
28423 vec![arg],
28424 )));
28425 Ok(Expression::Cast(Box::new(Cast {
28426 this: to_json,
28427 to: DataType::Text,
28428 trailing_comments: vec![],
28429 double_colon_syntax: false,
28430 format: None,
28431 default: None,
28432 inferred_type: None,
28433 })))
28434 }
28435 DialectType::Snowflake => {
28436 // TO_JSON(x)
28437 Ok(Expression::Function(Box::new(Function::new(
28438 "TO_JSON".to_string(),
28439 args,
28440 ))))
28441 }
28442 _ => Ok(Expression::Function(Box::new(Function::new(
28443 "TO_JSON_STRING".to_string(),
28444 args,
28445 )))),
28446 }
28447 }
28448
28449 // SAFE_ADD(x, y) -> SafeAdd expression
28450 "SAFE_ADD" if args.len() == 2 => {
28451 let x = args.remove(0);
28452 let y = args.remove(0);
28453 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
28454 this: Box::new(x),
28455 expression: Box::new(y),
28456 })))
28457 }
28458
28459 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
28460 "SAFE_SUBTRACT" if args.len() == 2 => {
28461 let x = args.remove(0);
28462 let y = args.remove(0);
28463 Ok(Expression::SafeSubtract(Box::new(
28464 crate::expressions::SafeSubtract {
28465 this: Box::new(x),
28466 expression: Box::new(y),
28467 },
28468 )))
28469 }
28470
28471 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
28472 "SAFE_MULTIPLY" if args.len() == 2 => {
28473 let x = args.remove(0);
28474 let y = args.remove(0);
28475 Ok(Expression::SafeMultiply(Box::new(
28476 crate::expressions::SafeMultiply {
28477 this: Box::new(x),
28478 expression: Box::new(y),
28479 },
28480 )))
28481 }
28482
28483 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
28484 "REGEXP_CONTAINS" if args.len() == 2 => {
28485 let str_expr = args.remove(0);
28486 let pattern = args.remove(0);
28487 Ok(Expression::RegexpLike(Box::new(
28488 crate::expressions::RegexpFunc {
28489 this: str_expr,
28490 pattern,
28491 flags: None,
28492 },
28493 )))
28494 }
28495
28496 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
28497 "CONTAINS_SUBSTR" if args.len() == 2 => {
28498 let a = args.remove(0);
28499 let b = args.remove(0);
28500 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
28501 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
28502 Ok(Expression::Function(Box::new(Function::new(
28503 "CONTAINS".to_string(),
28504 vec![lower_a, lower_b],
28505 ))))
28506 }
28507
28508 // INT64(x) -> CAST(x AS BIGINT)
28509 "INT64" if args.len() == 1 => {
28510 let arg = args.remove(0);
28511 Ok(Expression::Cast(Box::new(Cast {
28512 this: arg,
28513 to: DataType::BigInt { length: None },
28514 trailing_comments: vec![],
28515 double_colon_syntax: false,
28516 format: None,
28517 default: None,
28518 inferred_type: None,
28519 })))
28520 }
28521
28522 // INSTR(str, substr) -> target-specific
28523 "INSTR" if args.len() >= 2 => {
28524 let str_expr = args.remove(0);
28525 let substr = args.remove(0);
28526 if matches!(target, DialectType::Snowflake) {
28527 // CHARINDEX(substr, str)
28528 Ok(Expression::Function(Box::new(Function::new(
28529 "CHARINDEX".to_string(),
28530 vec![substr, str_expr],
28531 ))))
28532 } else if matches!(target, DialectType::BigQuery) {
28533 // Keep as INSTR
28534 Ok(Expression::Function(Box::new(Function::new(
28535 "INSTR".to_string(),
28536 vec![str_expr, substr],
28537 ))))
28538 } else {
28539 // Default: keep as INSTR
28540 Ok(Expression::Function(Box::new(Function::new(
28541 "INSTR".to_string(),
28542 vec![str_expr, substr],
28543 ))))
28544 }
28545 }
28546
28547 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
28548 "DATE_TRUNC" if args.len() == 2 => {
28549 let expr = args.remove(0);
28550 let unit_expr = args.remove(0);
28551 let unit_str = get_unit_str(&unit_expr);
28552
28553 match target {
28554 DialectType::DuckDB
28555 | DialectType::Snowflake
28556 | DialectType::PostgreSQL
28557 | DialectType::Presto
28558 | DialectType::Trino
28559 | DialectType::Databricks
28560 | DialectType::Spark
28561 | DialectType::Redshift
28562 | DialectType::ClickHouse
28563 | DialectType::TSQL => {
28564 // Standard: DATE_TRUNC('UNIT', expr)
28565 Ok(Expression::Function(Box::new(Function::new(
28566 "DATE_TRUNC".to_string(),
28567 vec![Expression::Literal(Literal::String(unit_str)), expr],
28568 ))))
28569 }
28570 _ => {
28571 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
28572 Ok(Expression::Function(Box::new(Function::new(
28573 "DATE_TRUNC".to_string(),
28574 vec![expr, unit_expr],
28575 ))))
28576 }
28577 }
28578 }
28579
28580 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
28581 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
28582 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
28583 let ts = args.remove(0);
28584 let unit_expr = args.remove(0);
28585 let tz = if !args.is_empty() {
28586 Some(args.remove(0))
28587 } else {
28588 None
28589 };
28590 let unit_str = get_unit_str(&unit_expr);
28591
28592 match target {
28593 DialectType::DuckDB => {
28594 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28595 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
28596 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
28597 let is_coarse = matches!(
28598 unit_str.as_str(),
28599 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
28600 );
28601 // For DATETIME_TRUNC, cast string args to TIMESTAMP
28602 let cast_ts = if name == "DATETIME_TRUNC" {
28603 match ts {
28604 Expression::Literal(Literal::String(ref _s)) => {
28605 Expression::Cast(Box::new(Cast {
28606 this: ts,
28607 to: DataType::Timestamp {
28608 precision: None,
28609 timezone: false,
28610 },
28611 trailing_comments: vec![],
28612 double_colon_syntax: false,
28613 format: None,
28614 default: None,
28615 inferred_type: None,
28616 }))
28617 }
28618 _ => Self::maybe_cast_ts_to_tz(ts, &name),
28619 }
28620 } else {
28621 Self::maybe_cast_ts_to_tz(ts, &name)
28622 };
28623
28624 if let Some(tz_arg) = tz {
28625 if is_coarse {
28626 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28627 let at_tz = Expression::AtTimeZone(Box::new(
28628 crate::expressions::AtTimeZone {
28629 this: cast_ts,
28630 zone: tz_arg.clone(),
28631 },
28632 ));
28633 let date_trunc = Expression::Function(Box::new(Function::new(
28634 "DATE_TRUNC".to_string(),
28635 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
28636 )));
28637 Ok(Expression::AtTimeZone(Box::new(
28638 crate::expressions::AtTimeZone {
28639 this: date_trunc,
28640 zone: tz_arg,
28641 },
28642 )))
28643 } else {
28644 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
28645 Ok(Expression::Function(Box::new(Function::new(
28646 "DATE_TRUNC".to_string(),
28647 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28648 ))))
28649 }
28650 } else {
28651 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
28652 Ok(Expression::Function(Box::new(Function::new(
28653 "DATE_TRUNC".to_string(),
28654 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
28655 ))))
28656 }
28657 }
28658 DialectType::Databricks | DialectType::Spark => {
28659 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
28660 Ok(Expression::Function(Box::new(Function::new(
28661 "DATE_TRUNC".to_string(),
28662 vec![Expression::Literal(Literal::String(unit_str)), ts],
28663 ))))
28664 }
28665 _ => {
28666 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
28667 let unit = Expression::Literal(Literal::String(unit_str));
28668 let mut date_trunc_args = vec![unit, ts];
28669 if let Some(tz_arg) = tz {
28670 date_trunc_args.push(tz_arg);
28671 }
28672 Ok(Expression::Function(Box::new(Function::new(
28673 "TIMESTAMP_TRUNC".to_string(),
28674 date_trunc_args,
28675 ))))
28676 }
28677 }
28678 }
28679
28680 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
28681 "TIME" => {
28682 if args.len() == 3 {
28683 // TIME(h, m, s) constructor
28684 match target {
28685 DialectType::TSQL => {
28686 // TIMEFROMPARTS(h, m, s, 0, 0)
28687 args.push(Expression::number(0));
28688 args.push(Expression::number(0));
28689 Ok(Expression::Function(Box::new(Function::new(
28690 "TIMEFROMPARTS".to_string(),
28691 args,
28692 ))))
28693 }
28694 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
28695 "MAKETIME".to_string(),
28696 args,
28697 )))),
28698 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
28699 Function::new("MAKE_TIME".to_string(), args),
28700 ))),
28701 _ => Ok(Expression::Function(Box::new(Function::new(
28702 "TIME".to_string(),
28703 args,
28704 )))),
28705 }
28706 } else if args.len() == 1 {
28707 let arg = args.remove(0);
28708 if matches!(target, DialectType::Spark) {
28709 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
28710 Ok(Expression::Cast(Box::new(Cast {
28711 this: arg,
28712 to: DataType::Timestamp {
28713 timezone: false,
28714 precision: None,
28715 },
28716 trailing_comments: vec![],
28717 double_colon_syntax: false,
28718 format: None,
28719 default: None,
28720 inferred_type: None,
28721 })))
28722 } else {
28723 // Most targets: CAST(x AS TIME)
28724 Ok(Expression::Cast(Box::new(Cast {
28725 this: arg,
28726 to: DataType::Time {
28727 precision: None,
28728 timezone: false,
28729 },
28730 trailing_comments: vec![],
28731 double_colon_syntax: false,
28732 format: None,
28733 default: None,
28734 inferred_type: None,
28735 })))
28736 }
28737 } else if args.len() == 2 {
28738 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
28739 let expr = args.remove(0);
28740 let tz = args.remove(0);
28741 let cast_tstz = Expression::Cast(Box::new(Cast {
28742 this: expr,
28743 to: DataType::Timestamp {
28744 timezone: true,
28745 precision: None,
28746 },
28747 trailing_comments: vec![],
28748 double_colon_syntax: false,
28749 format: None,
28750 default: None,
28751 inferred_type: None,
28752 }));
28753 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28754 this: cast_tstz,
28755 zone: tz,
28756 }));
28757 Ok(Expression::Cast(Box::new(Cast {
28758 this: at_tz,
28759 to: DataType::Time {
28760 precision: None,
28761 timezone: false,
28762 },
28763 trailing_comments: vec![],
28764 double_colon_syntax: false,
28765 format: None,
28766 default: None,
28767 inferred_type: None,
28768 })))
28769 } else {
28770 Ok(Expression::Function(Box::new(Function::new(
28771 "TIME".to_string(),
28772 args,
28773 ))))
28774 }
28775 }
28776
28777 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
28778 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28779 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
28780 // DATETIME(y, m, d, h, min, s) -> target-specific
28781 "DATETIME" => {
28782 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
28783 if matches!(target, DialectType::BigQuery) {
28784 if args.len() == 2 {
28785 let has_time_literal =
28786 matches!(&args[1], Expression::Literal(Literal::Time(_)));
28787 if has_time_literal {
28788 let first = args.remove(0);
28789 let second = args.remove(0);
28790 let time_as_cast = match second {
28791 Expression::Literal(Literal::Time(s)) => {
28792 Expression::Cast(Box::new(Cast {
28793 this: Expression::Literal(Literal::String(s)),
28794 to: DataType::Time {
28795 precision: None,
28796 timezone: false,
28797 },
28798 trailing_comments: vec![],
28799 double_colon_syntax: false,
28800 format: None,
28801 default: None,
28802 inferred_type: None,
28803 }))
28804 }
28805 other => other,
28806 };
28807 return Ok(Expression::Function(Box::new(Function::new(
28808 "DATETIME".to_string(),
28809 vec![first, time_as_cast],
28810 ))));
28811 }
28812 }
28813 return Ok(Expression::Function(Box::new(Function::new(
28814 "DATETIME".to_string(),
28815 args,
28816 ))));
28817 }
28818
28819 if args.len() == 1 {
28820 let arg = args.remove(0);
28821 Ok(Expression::Cast(Box::new(Cast {
28822 this: arg,
28823 to: DataType::Timestamp {
28824 timezone: false,
28825 precision: None,
28826 },
28827 trailing_comments: vec![],
28828 double_colon_syntax: false,
28829 format: None,
28830 default: None,
28831 inferred_type: None,
28832 })))
28833 } else if args.len() == 2 {
28834 let first = args.remove(0);
28835 let second = args.remove(0);
28836 // Check if second arg is a TIME literal
28837 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
28838 if is_time_literal {
28839 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
28840 let cast_date = Expression::Cast(Box::new(Cast {
28841 this: first,
28842 to: DataType::Date,
28843 trailing_comments: vec![],
28844 double_colon_syntax: false,
28845 format: None,
28846 default: None,
28847 inferred_type: None,
28848 }));
28849 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
28850 let time_as_string = match second {
28851 Expression::Literal(Literal::Time(s)) => {
28852 Expression::Literal(Literal::String(s))
28853 }
28854 other => other,
28855 };
28856 let cast_time = Expression::Cast(Box::new(Cast {
28857 this: time_as_string,
28858 to: DataType::Time {
28859 precision: None,
28860 timezone: false,
28861 },
28862 trailing_comments: vec![],
28863 double_colon_syntax: false,
28864 format: None,
28865 default: None,
28866 inferred_type: None,
28867 }));
28868 let add_expr =
28869 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
28870 Ok(Expression::Cast(Box::new(Cast {
28871 this: add_expr,
28872 to: DataType::Timestamp {
28873 timezone: false,
28874 precision: None,
28875 },
28876 trailing_comments: vec![],
28877 double_colon_syntax: false,
28878 format: None,
28879 default: None,
28880 inferred_type: None,
28881 })))
28882 } else {
28883 // DATETIME('string', 'timezone')
28884 let cast_tstz = Expression::Cast(Box::new(Cast {
28885 this: first,
28886 to: DataType::Timestamp {
28887 timezone: true,
28888 precision: None,
28889 },
28890 trailing_comments: vec![],
28891 double_colon_syntax: false,
28892 format: None,
28893 default: None,
28894 inferred_type: None,
28895 }));
28896 let at_tz =
28897 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
28898 this: cast_tstz,
28899 zone: second,
28900 }));
28901 Ok(Expression::Cast(Box::new(Cast {
28902 this: at_tz,
28903 to: DataType::Timestamp {
28904 timezone: false,
28905 precision: None,
28906 },
28907 trailing_comments: vec![],
28908 double_colon_syntax: false,
28909 format: None,
28910 default: None,
28911 inferred_type: None,
28912 })))
28913 }
28914 } else if args.len() >= 3 {
28915 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
28916 // For other targets, use MAKE_TIMESTAMP or similar
28917 if matches!(target, DialectType::Snowflake) {
28918 Ok(Expression::Function(Box::new(Function::new(
28919 "TIMESTAMP_FROM_PARTS".to_string(),
28920 args,
28921 ))))
28922 } else {
28923 Ok(Expression::Function(Box::new(Function::new(
28924 "DATETIME".to_string(),
28925 args,
28926 ))))
28927 }
28928 } else {
28929 Ok(Expression::Function(Box::new(Function::new(
28930 "DATETIME".to_string(),
28931 args,
28932 ))))
28933 }
28934 }
28935
28936 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
28937 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
28938 "TIMESTAMP" => {
28939 if args.len() == 1 {
28940 let arg = args.remove(0);
28941 Ok(Expression::Cast(Box::new(Cast {
28942 this: arg,
28943 to: DataType::Timestamp {
28944 timezone: true,
28945 precision: None,
28946 },
28947 trailing_comments: vec![],
28948 double_colon_syntax: false,
28949 format: None,
28950 default: None,
28951 inferred_type: None,
28952 })))
28953 } else if args.len() == 2 {
28954 let arg = args.remove(0);
28955 let tz = args.remove(0);
28956 let cast_ts = Expression::Cast(Box::new(Cast {
28957 this: arg,
28958 to: DataType::Timestamp {
28959 timezone: false,
28960 precision: None,
28961 },
28962 trailing_comments: vec![],
28963 double_colon_syntax: false,
28964 format: None,
28965 default: None,
28966 inferred_type: None,
28967 }));
28968 if matches!(target, DialectType::Snowflake) {
28969 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
28970 Ok(Expression::Function(Box::new(Function::new(
28971 "CONVERT_TIMEZONE".to_string(),
28972 vec![tz, cast_ts],
28973 ))))
28974 } else {
28975 Ok(Expression::AtTimeZone(Box::new(
28976 crate::expressions::AtTimeZone {
28977 this: cast_ts,
28978 zone: tz,
28979 },
28980 )))
28981 }
28982 } else {
28983 Ok(Expression::Function(Box::new(Function::new(
28984 "TIMESTAMP".to_string(),
28985 args,
28986 ))))
28987 }
28988 }
28989
28990 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
28991 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
28992 "STRING" => {
28993 if args.len() == 1 {
28994 let arg = args.remove(0);
28995 let cast_type = match target {
28996 DialectType::DuckDB => DataType::Text,
28997 _ => DataType::VarChar {
28998 length: None,
28999 parenthesized_length: false,
29000 },
29001 };
29002 Ok(Expression::Cast(Box::new(Cast {
29003 this: arg,
29004 to: cast_type,
29005 trailing_comments: vec![],
29006 double_colon_syntax: false,
29007 format: None,
29008 default: None,
29009 inferred_type: None,
29010 })))
29011 } else if args.len() == 2 {
29012 let arg = args.remove(0);
29013 let tz = args.remove(0);
29014 let cast_type = match target {
29015 DialectType::DuckDB => DataType::Text,
29016 _ => DataType::VarChar {
29017 length: None,
29018 parenthesized_length: false,
29019 },
29020 };
29021 if matches!(target, DialectType::Snowflake) {
29022 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
29023 let convert_tz = Expression::Function(Box::new(Function::new(
29024 "CONVERT_TIMEZONE".to_string(),
29025 vec![
29026 Expression::Literal(Literal::String("UTC".to_string())),
29027 tz,
29028 arg,
29029 ],
29030 )));
29031 Ok(Expression::Cast(Box::new(Cast {
29032 this: convert_tz,
29033 to: cast_type,
29034 trailing_comments: vec![],
29035 double_colon_syntax: false,
29036 format: None,
29037 default: None,
29038 inferred_type: None,
29039 })))
29040 } else {
29041 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
29042 let cast_ts = Expression::Cast(Box::new(Cast {
29043 this: arg,
29044 to: DataType::Timestamp {
29045 timezone: false,
29046 precision: None,
29047 },
29048 trailing_comments: vec![],
29049 double_colon_syntax: false,
29050 format: None,
29051 default: None,
29052 inferred_type: None,
29053 }));
29054 let at_utc =
29055 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29056 this: cast_ts,
29057 zone: Expression::Literal(Literal::String("UTC".to_string())),
29058 }));
29059 let at_tz =
29060 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
29061 this: at_utc,
29062 zone: tz,
29063 }));
29064 Ok(Expression::Cast(Box::new(Cast {
29065 this: at_tz,
29066 to: cast_type,
29067 trailing_comments: vec![],
29068 double_colon_syntax: false,
29069 format: None,
29070 default: None,
29071 inferred_type: None,
29072 })))
29073 }
29074 } else {
29075 Ok(Expression::Function(Box::new(Function::new(
29076 "STRING".to_string(),
29077 args,
29078 ))))
29079 }
29080 }
29081
29082 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
29083 "UNIX_SECONDS" if args.len() == 1 => {
29084 let ts = args.remove(0);
29085 match target {
29086 DialectType::DuckDB => {
29087 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29088 let cast_ts = Self::ensure_cast_timestamptz(ts);
29089 let epoch = Expression::Function(Box::new(Function::new(
29090 "EPOCH".to_string(),
29091 vec![cast_ts],
29092 )));
29093 Ok(Expression::Cast(Box::new(Cast {
29094 this: epoch,
29095 to: DataType::BigInt { length: None },
29096 trailing_comments: vec![],
29097 double_colon_syntax: false,
29098 format: None,
29099 default: None,
29100 inferred_type: None,
29101 })))
29102 }
29103 DialectType::Snowflake => {
29104 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
29105 let epoch = Expression::Cast(Box::new(Cast {
29106 this: Expression::Literal(Literal::String(
29107 "1970-01-01 00:00:00+00".to_string(),
29108 )),
29109 to: DataType::Timestamp {
29110 timezone: true,
29111 precision: None,
29112 },
29113 trailing_comments: vec![],
29114 double_colon_syntax: false,
29115 format: None,
29116 default: None,
29117 inferred_type: None,
29118 }));
29119 Ok(Expression::TimestampDiff(Box::new(
29120 crate::expressions::TimestampDiff {
29121 this: Box::new(epoch),
29122 expression: Box::new(ts),
29123 unit: Some("SECONDS".to_string()),
29124 },
29125 )))
29126 }
29127 _ => Ok(Expression::Function(Box::new(Function::new(
29128 "UNIX_SECONDS".to_string(),
29129 vec![ts],
29130 )))),
29131 }
29132 }
29133
29134 "UNIX_MILLIS" if args.len() == 1 => {
29135 let ts = args.remove(0);
29136 match target {
29137 DialectType::DuckDB => {
29138 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
29139 let cast_ts = Self::ensure_cast_timestamptz(ts);
29140 Ok(Expression::Function(Box::new(Function::new(
29141 "EPOCH_MS".to_string(),
29142 vec![cast_ts],
29143 ))))
29144 }
29145 _ => Ok(Expression::Function(Box::new(Function::new(
29146 "UNIX_MILLIS".to_string(),
29147 vec![ts],
29148 )))),
29149 }
29150 }
29151
29152 "UNIX_MICROS" if args.len() == 1 => {
29153 let ts = args.remove(0);
29154 match target {
29155 DialectType::DuckDB => {
29156 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
29157 let cast_ts = Self::ensure_cast_timestamptz(ts);
29158 Ok(Expression::Function(Box::new(Function::new(
29159 "EPOCH_US".to_string(),
29160 vec![cast_ts],
29161 ))))
29162 }
29163 _ => Ok(Expression::Function(Box::new(Function::new(
29164 "UNIX_MICROS".to_string(),
29165 vec![ts],
29166 )))),
29167 }
29168 }
29169
29170 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
29171 "ARRAY_CONCAT" | "LIST_CONCAT" => {
29172 match target {
29173 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29174 // CONCAT(arr1, arr2, ...)
29175 Ok(Expression::Function(Box::new(Function::new(
29176 "CONCAT".to_string(),
29177 args,
29178 ))))
29179 }
29180 DialectType::Presto | DialectType::Trino => {
29181 // CONCAT(arr1, arr2, ...)
29182 Ok(Expression::Function(Box::new(Function::new(
29183 "CONCAT".to_string(),
29184 args,
29185 ))))
29186 }
29187 DialectType::Snowflake => {
29188 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
29189 if args.len() == 1 {
29190 // ARRAY_CAT requires 2 args, add empty array as []
29191 let empty_arr = Expression::ArrayFunc(Box::new(
29192 crate::expressions::ArrayConstructor {
29193 expressions: vec![],
29194 bracket_notation: true,
29195 use_list_keyword: false,
29196 },
29197 ));
29198 let mut new_args = args;
29199 new_args.push(empty_arr);
29200 Ok(Expression::Function(Box::new(Function::new(
29201 "ARRAY_CAT".to_string(),
29202 new_args,
29203 ))))
29204 } else if args.is_empty() {
29205 Ok(Expression::Function(Box::new(Function::new(
29206 "ARRAY_CAT".to_string(),
29207 args,
29208 ))))
29209 } else {
29210 let mut it = args.into_iter().rev();
29211 let mut result = it.next().unwrap();
29212 for arr in it {
29213 result = Expression::Function(Box::new(Function::new(
29214 "ARRAY_CAT".to_string(),
29215 vec![arr, result],
29216 )));
29217 }
29218 Ok(result)
29219 }
29220 }
29221 DialectType::PostgreSQL => {
29222 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
29223 if args.len() <= 1 {
29224 Ok(Expression::Function(Box::new(Function::new(
29225 "ARRAY_CAT".to_string(),
29226 args,
29227 ))))
29228 } else {
29229 let mut it = args.into_iter().rev();
29230 let mut result = it.next().unwrap();
29231 for arr in it {
29232 result = Expression::Function(Box::new(Function::new(
29233 "ARRAY_CAT".to_string(),
29234 vec![arr, result],
29235 )));
29236 }
29237 Ok(result)
29238 }
29239 }
29240 DialectType::Redshift => {
29241 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
29242 if args.len() <= 2 {
29243 Ok(Expression::Function(Box::new(Function::new(
29244 "ARRAY_CONCAT".to_string(),
29245 args,
29246 ))))
29247 } else {
29248 let mut it = args.into_iter().rev();
29249 let mut result = it.next().unwrap();
29250 for arr in it {
29251 result = Expression::Function(Box::new(Function::new(
29252 "ARRAY_CONCAT".to_string(),
29253 vec![arr, result],
29254 )));
29255 }
29256 Ok(result)
29257 }
29258 }
29259 DialectType::DuckDB => {
29260 // LIST_CONCAT supports multiple args natively in DuckDB
29261 Ok(Expression::Function(Box::new(Function::new(
29262 "LIST_CONCAT".to_string(),
29263 args,
29264 ))))
29265 }
29266 _ => Ok(Expression::Function(Box::new(Function::new(
29267 "ARRAY_CONCAT".to_string(),
29268 args,
29269 )))),
29270 }
29271 }
29272
29273 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
29274 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
29275 let arg = args.remove(0);
29276 match target {
29277 DialectType::Snowflake => {
29278 let array_agg =
29279 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
29280 this: arg,
29281 distinct: false,
29282 filter: None,
29283 order_by: vec![],
29284 name: None,
29285 ignore_nulls: None,
29286 having_max: None,
29287 limit: None,
29288 inferred_type: None,
29289 }));
29290 Ok(Expression::Function(Box::new(Function::new(
29291 "ARRAY_FLATTEN".to_string(),
29292 vec![array_agg],
29293 ))))
29294 }
29295 _ => Ok(Expression::Function(Box::new(Function::new(
29296 "ARRAY_CONCAT_AGG".to_string(),
29297 vec![arg],
29298 )))),
29299 }
29300 }
29301
29302 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
29303 "MD5" if args.len() == 1 => {
29304 let arg = args.remove(0);
29305 match target {
29306 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29307 // UNHEX(MD5(x))
29308 let md5 = Expression::Function(Box::new(Function::new(
29309 "MD5".to_string(),
29310 vec![arg],
29311 )));
29312 Ok(Expression::Function(Box::new(Function::new(
29313 "UNHEX".to_string(),
29314 vec![md5],
29315 ))))
29316 }
29317 DialectType::Snowflake => {
29318 // MD5_BINARY(x)
29319 Ok(Expression::Function(Box::new(Function::new(
29320 "MD5_BINARY".to_string(),
29321 vec![arg],
29322 ))))
29323 }
29324 _ => Ok(Expression::Function(Box::new(Function::new(
29325 "MD5".to_string(),
29326 vec![arg],
29327 )))),
29328 }
29329 }
29330
29331 "SHA1" if args.len() == 1 => {
29332 let arg = args.remove(0);
29333 match target {
29334 DialectType::DuckDB => {
29335 // UNHEX(SHA1(x))
29336 let sha1 = Expression::Function(Box::new(Function::new(
29337 "SHA1".to_string(),
29338 vec![arg],
29339 )));
29340 Ok(Expression::Function(Box::new(Function::new(
29341 "UNHEX".to_string(),
29342 vec![sha1],
29343 ))))
29344 }
29345 _ => Ok(Expression::Function(Box::new(Function::new(
29346 "SHA1".to_string(),
29347 vec![arg],
29348 )))),
29349 }
29350 }
29351
29352 "SHA256" if args.len() == 1 => {
29353 let arg = args.remove(0);
29354 match target {
29355 DialectType::DuckDB => {
29356 // UNHEX(SHA256(x))
29357 let sha = Expression::Function(Box::new(Function::new(
29358 "SHA256".to_string(),
29359 vec![arg],
29360 )));
29361 Ok(Expression::Function(Box::new(Function::new(
29362 "UNHEX".to_string(),
29363 vec![sha],
29364 ))))
29365 }
29366 DialectType::Snowflake => {
29367 // SHA2_BINARY(x, 256)
29368 Ok(Expression::Function(Box::new(Function::new(
29369 "SHA2_BINARY".to_string(),
29370 vec![arg, Expression::number(256)],
29371 ))))
29372 }
29373 DialectType::Redshift | DialectType::Spark => {
29374 // SHA2(x, 256)
29375 Ok(Expression::Function(Box::new(Function::new(
29376 "SHA2".to_string(),
29377 vec![arg, Expression::number(256)],
29378 ))))
29379 }
29380 _ => Ok(Expression::Function(Box::new(Function::new(
29381 "SHA256".to_string(),
29382 vec![arg],
29383 )))),
29384 }
29385 }
29386
29387 "SHA512" if args.len() == 1 => {
29388 let arg = args.remove(0);
29389 match target {
29390 DialectType::Snowflake => {
29391 // SHA2_BINARY(x, 512)
29392 Ok(Expression::Function(Box::new(Function::new(
29393 "SHA2_BINARY".to_string(),
29394 vec![arg, Expression::number(512)],
29395 ))))
29396 }
29397 DialectType::Redshift | DialectType::Spark => {
29398 // SHA2(x, 512)
29399 Ok(Expression::Function(Box::new(Function::new(
29400 "SHA2".to_string(),
29401 vec![arg, Expression::number(512)],
29402 ))))
29403 }
29404 _ => Ok(Expression::Function(Box::new(Function::new(
29405 "SHA512".to_string(),
29406 vec![arg],
29407 )))),
29408 }
29409 }
29410
29411 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
29412 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
29413 let str_expr = args.remove(0);
29414 let pattern = args.remove(0);
29415
29416 // Check if pattern contains capturing groups (parentheses)
29417 let has_groups = match &pattern {
29418 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
29419 _ => false,
29420 };
29421
29422 match target {
29423 DialectType::DuckDB => {
29424 let group = if has_groups {
29425 Expression::number(1)
29426 } else {
29427 Expression::number(0)
29428 };
29429 Ok(Expression::Function(Box::new(Function::new(
29430 "REGEXP_EXTRACT_ALL".to_string(),
29431 vec![str_expr, pattern, group],
29432 ))))
29433 }
29434 DialectType::Spark | DialectType::Databricks => {
29435 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
29436 if has_groups {
29437 Ok(Expression::Function(Box::new(Function::new(
29438 "REGEXP_EXTRACT_ALL".to_string(),
29439 vec![str_expr, pattern],
29440 ))))
29441 } else {
29442 Ok(Expression::Function(Box::new(Function::new(
29443 "REGEXP_EXTRACT_ALL".to_string(),
29444 vec![str_expr, pattern, Expression::number(0)],
29445 ))))
29446 }
29447 }
29448 DialectType::Presto | DialectType::Trino => {
29449 if has_groups {
29450 Ok(Expression::Function(Box::new(Function::new(
29451 "REGEXP_EXTRACT_ALL".to_string(),
29452 vec![str_expr, pattern, Expression::number(1)],
29453 ))))
29454 } else {
29455 Ok(Expression::Function(Box::new(Function::new(
29456 "REGEXP_EXTRACT_ALL".to_string(),
29457 vec![str_expr, pattern],
29458 ))))
29459 }
29460 }
29461 DialectType::Snowflake => {
29462 if has_groups {
29463 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
29464 Ok(Expression::Function(Box::new(Function::new(
29465 "REGEXP_EXTRACT_ALL".to_string(),
29466 vec![
29467 str_expr,
29468 pattern,
29469 Expression::number(1),
29470 Expression::number(1),
29471 Expression::Literal(Literal::String("c".to_string())),
29472 Expression::number(1),
29473 ],
29474 ))))
29475 } else {
29476 Ok(Expression::Function(Box::new(Function::new(
29477 "REGEXP_EXTRACT_ALL".to_string(),
29478 vec![str_expr, pattern],
29479 ))))
29480 }
29481 }
29482 _ => Ok(Expression::Function(Box::new(Function::new(
29483 "REGEXP_EXTRACT_ALL".to_string(),
29484 vec![str_expr, pattern],
29485 )))),
29486 }
29487 }
29488
29489 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
29490 "MOD" if args.len() == 2 => {
29491 match target {
29492 DialectType::PostgreSQL
29493 | DialectType::DuckDB
29494 | DialectType::Presto
29495 | DialectType::Trino
29496 | DialectType::Athena
29497 | DialectType::Snowflake => {
29498 let x = args.remove(0);
29499 let y = args.remove(0);
29500 // Wrap complex expressions in parens to preserve precedence
29501 let needs_paren = |e: &Expression| {
29502 matches!(
29503 e,
29504 Expression::Add(_)
29505 | Expression::Sub(_)
29506 | Expression::Mul(_)
29507 | Expression::Div(_)
29508 )
29509 };
29510 let x = if needs_paren(&x) {
29511 Expression::Paren(Box::new(crate::expressions::Paren {
29512 this: x,
29513 trailing_comments: vec![],
29514 }))
29515 } else {
29516 x
29517 };
29518 let y = if needs_paren(&y) {
29519 Expression::Paren(Box::new(crate::expressions::Paren {
29520 this: y,
29521 trailing_comments: vec![],
29522 }))
29523 } else {
29524 y
29525 };
29526 Ok(Expression::Mod(Box::new(
29527 crate::expressions::BinaryOp::new(x, y),
29528 )))
29529 }
29530 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29531 // Hive/Spark: a % b
29532 let x = args.remove(0);
29533 let y = args.remove(0);
29534 let needs_paren = |e: &Expression| {
29535 matches!(
29536 e,
29537 Expression::Add(_)
29538 | Expression::Sub(_)
29539 | Expression::Mul(_)
29540 | Expression::Div(_)
29541 )
29542 };
29543 let x = if needs_paren(&x) {
29544 Expression::Paren(Box::new(crate::expressions::Paren {
29545 this: x,
29546 trailing_comments: vec![],
29547 }))
29548 } else {
29549 x
29550 };
29551 let y = if needs_paren(&y) {
29552 Expression::Paren(Box::new(crate::expressions::Paren {
29553 this: y,
29554 trailing_comments: vec![],
29555 }))
29556 } else {
29557 y
29558 };
29559 Ok(Expression::Mod(Box::new(
29560 crate::expressions::BinaryOp::new(x, y),
29561 )))
29562 }
29563 _ => Ok(Expression::Function(Box::new(Function::new(
29564 "MOD".to_string(),
29565 args,
29566 )))),
29567 }
29568 }
29569
29570 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
29571 "ARRAY_FILTER" if args.len() == 2 => {
29572 let name = match target {
29573 DialectType::DuckDB => "LIST_FILTER",
29574 DialectType::StarRocks => "ARRAY_FILTER",
29575 _ => "FILTER",
29576 };
29577 Ok(Expression::Function(Box::new(Function::new(
29578 name.to_string(),
29579 args,
29580 ))))
29581 }
29582 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
29583 "FILTER" if args.len() == 2 => {
29584 let name = match target {
29585 DialectType::DuckDB => "LIST_FILTER",
29586 DialectType::StarRocks => "ARRAY_FILTER",
29587 _ => "FILTER",
29588 };
29589 Ok(Expression::Function(Box::new(Function::new(
29590 name.to_string(),
29591 args,
29592 ))))
29593 }
29594 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
29595 "REDUCE" if args.len() >= 3 => {
29596 let name = match target {
29597 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
29598 _ => "REDUCE",
29599 };
29600 Ok(Expression::Function(Box::new(Function::new(
29601 name.to_string(),
29602 args,
29603 ))))
29604 }
29605 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
29606 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
29607 Function::new("ARRAY_REVERSE".to_string(), args),
29608 ))),
29609
29610 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
29611 "CONCAT" if args.len() > 2 => match target {
29612 DialectType::DuckDB => {
29613 let mut it = args.into_iter();
29614 let mut result = it.next().unwrap();
29615 for arg in it {
29616 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
29617 this: Box::new(result),
29618 expression: Box::new(arg),
29619 safe: None,
29620 }));
29621 }
29622 Ok(result)
29623 }
29624 _ => Ok(Expression::Function(Box::new(Function::new(
29625 "CONCAT".to_string(),
29626 args,
29627 )))),
29628 },
29629
29630 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
29631 "GENERATE_DATE_ARRAY" => {
29632 if matches!(target, DialectType::BigQuery) {
29633 // BQ->BQ: add default interval if not present
29634 if args.len() == 2 {
29635 let start = args.remove(0);
29636 let end = args.remove(0);
29637 let default_interval =
29638 Expression::Interval(Box::new(crate::expressions::Interval {
29639 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29640 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29641 unit: crate::expressions::IntervalUnit::Day,
29642 use_plural: false,
29643 }),
29644 }));
29645 Ok(Expression::Function(Box::new(Function::new(
29646 "GENERATE_DATE_ARRAY".to_string(),
29647 vec![start, end, default_interval],
29648 ))))
29649 } else {
29650 Ok(Expression::Function(Box::new(Function::new(
29651 "GENERATE_DATE_ARRAY".to_string(),
29652 args,
29653 ))))
29654 }
29655 } else if matches!(target, DialectType::DuckDB) {
29656 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
29657 let start = args.get(0).cloned();
29658 let end = args.get(1).cloned();
29659 let step = args.get(2).cloned().or_else(|| {
29660 Some(Expression::Interval(Box::new(
29661 crate::expressions::Interval {
29662 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29663 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29664 unit: crate::expressions::IntervalUnit::Day,
29665 use_plural: false,
29666 }),
29667 },
29668 )))
29669 });
29670
29671 // Wrap start/end in CAST(... AS DATE) only for string literals
29672 let maybe_cast_date = |expr: Expression| -> Expression {
29673 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29674 Expression::Cast(Box::new(Cast {
29675 this: expr,
29676 to: DataType::Date,
29677 trailing_comments: vec![],
29678 double_colon_syntax: false,
29679 format: None,
29680 default: None,
29681 inferred_type: None,
29682 }))
29683 } else {
29684 expr
29685 }
29686 };
29687 let cast_start = start.map(maybe_cast_date);
29688 let cast_end = end.map(maybe_cast_date);
29689
29690 let gen_series =
29691 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
29692 start: cast_start.map(Box::new),
29693 end: cast_end.map(Box::new),
29694 step: step.map(Box::new),
29695 is_end_exclusive: None,
29696 }));
29697
29698 // Wrap in CAST(... AS DATE[])
29699 Ok(Expression::Cast(Box::new(Cast {
29700 this: gen_series,
29701 to: DataType::Array {
29702 element_type: Box::new(DataType::Date),
29703 dimension: None,
29704 },
29705 trailing_comments: vec![],
29706 double_colon_syntax: false,
29707 format: None,
29708 default: None,
29709 inferred_type: None,
29710 })))
29711 } else if matches!(target, DialectType::Snowflake) {
29712 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
29713 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
29714 if args.len() == 2 {
29715 let start = args.remove(0);
29716 let end = args.remove(0);
29717 let default_interval =
29718 Expression::Interval(Box::new(crate::expressions::Interval {
29719 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29720 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29721 unit: crate::expressions::IntervalUnit::Day,
29722 use_plural: false,
29723 }),
29724 }));
29725 Ok(Expression::Function(Box::new(Function::new(
29726 "GENERATE_DATE_ARRAY".to_string(),
29727 vec![start, end, default_interval],
29728 ))))
29729 } else {
29730 Ok(Expression::Function(Box::new(Function::new(
29731 "GENERATE_DATE_ARRAY".to_string(),
29732 args,
29733 ))))
29734 }
29735 } else {
29736 // Convert to GenerateSeries for other targets
29737 let start = args.get(0).cloned();
29738 let end = args.get(1).cloned();
29739 let step = args.get(2).cloned().or_else(|| {
29740 Some(Expression::Interval(Box::new(
29741 crate::expressions::Interval {
29742 this: Some(Expression::Literal(Literal::String("1".to_string()))),
29743 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29744 unit: crate::expressions::IntervalUnit::Day,
29745 use_plural: false,
29746 }),
29747 },
29748 )))
29749 });
29750 Ok(Expression::GenerateSeries(Box::new(
29751 crate::expressions::GenerateSeries {
29752 start: start.map(Box::new),
29753 end: end.map(Box::new),
29754 step: step.map(Box::new),
29755 is_end_exclusive: None,
29756 },
29757 )))
29758 }
29759 }
29760
29761 // PARSE_DATE(format, str) -> target-specific
29762 "PARSE_DATE" if args.len() == 2 => {
29763 let format = args.remove(0);
29764 let str_expr = args.remove(0);
29765 match target {
29766 DialectType::DuckDB => {
29767 // CAST(STRPTIME(str, duck_format) AS DATE)
29768 let duck_format = Self::bq_format_to_duckdb(&format);
29769 let strptime = Expression::Function(Box::new(Function::new(
29770 "STRPTIME".to_string(),
29771 vec![str_expr, duck_format],
29772 )));
29773 Ok(Expression::Cast(Box::new(Cast {
29774 this: strptime,
29775 to: DataType::Date,
29776 trailing_comments: vec![],
29777 double_colon_syntax: false,
29778 format: None,
29779 default: None,
29780 inferred_type: None,
29781 })))
29782 }
29783 DialectType::Snowflake => {
29784 // _POLYGLOT_DATE(str, snowflake_format)
29785 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
29786 let sf_format = Self::bq_format_to_snowflake(&format);
29787 Ok(Expression::Function(Box::new(Function::new(
29788 "_POLYGLOT_DATE".to_string(),
29789 vec![str_expr, sf_format],
29790 ))))
29791 }
29792 _ => Ok(Expression::Function(Box::new(Function::new(
29793 "PARSE_DATE".to_string(),
29794 vec![format, str_expr],
29795 )))),
29796 }
29797 }
29798
29799 // PARSE_TIMESTAMP(format, str) -> target-specific
29800 "PARSE_TIMESTAMP" if args.len() >= 2 => {
29801 let format = args.remove(0);
29802 let str_expr = args.remove(0);
29803 let tz = if !args.is_empty() {
29804 Some(args.remove(0))
29805 } else {
29806 None
29807 };
29808 match target {
29809 DialectType::DuckDB => {
29810 let duck_format = Self::bq_format_to_duckdb(&format);
29811 let strptime = Expression::Function(Box::new(Function::new(
29812 "STRPTIME".to_string(),
29813 vec![str_expr, duck_format],
29814 )));
29815 Ok(strptime)
29816 }
29817 _ => {
29818 let mut result_args = vec![format, str_expr];
29819 if let Some(tz_arg) = tz {
29820 result_args.push(tz_arg);
29821 }
29822 Ok(Expression::Function(Box::new(Function::new(
29823 "PARSE_TIMESTAMP".to_string(),
29824 result_args,
29825 ))))
29826 }
29827 }
29828 }
29829
29830 // FORMAT_DATE(format, date) -> target-specific
29831 "FORMAT_DATE" if args.len() == 2 => {
29832 let format = args.remove(0);
29833 let date_expr = args.remove(0);
29834 match target {
29835 DialectType::DuckDB => {
29836 // STRFTIME(CAST(date AS DATE), format)
29837 let cast_date = Expression::Cast(Box::new(Cast {
29838 this: date_expr,
29839 to: DataType::Date,
29840 trailing_comments: vec![],
29841 double_colon_syntax: false,
29842 format: None,
29843 default: None,
29844 inferred_type: None,
29845 }));
29846 Ok(Expression::Function(Box::new(Function::new(
29847 "STRFTIME".to_string(),
29848 vec![cast_date, format],
29849 ))))
29850 }
29851 _ => Ok(Expression::Function(Box::new(Function::new(
29852 "FORMAT_DATE".to_string(),
29853 vec![format, date_expr],
29854 )))),
29855 }
29856 }
29857
29858 // FORMAT_DATETIME(format, datetime) -> target-specific
29859 "FORMAT_DATETIME" if args.len() == 2 => {
29860 let format = args.remove(0);
29861 let dt_expr = args.remove(0);
29862
29863 if matches!(target, DialectType::BigQuery) {
29864 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
29865 let norm_format = Self::bq_format_normalize_bq(&format);
29866 // Also strip DATETIME keyword from typed literals
29867 let norm_dt = match dt_expr {
29868 Expression::Literal(Literal::Timestamp(s)) => {
29869 Expression::Cast(Box::new(Cast {
29870 this: Expression::Literal(Literal::String(s)),
29871 to: DataType::Custom {
29872 name: "DATETIME".to_string(),
29873 },
29874 trailing_comments: vec![],
29875 double_colon_syntax: false,
29876 format: None,
29877 default: None,
29878 inferred_type: None,
29879 }))
29880 }
29881 other => other,
29882 };
29883 return Ok(Expression::Function(Box::new(Function::new(
29884 "FORMAT_DATETIME".to_string(),
29885 vec![norm_format, norm_dt],
29886 ))));
29887 }
29888
29889 match target {
29890 DialectType::DuckDB => {
29891 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
29892 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
29893 let duck_format = Self::bq_format_to_duckdb(&format);
29894 Ok(Expression::Function(Box::new(Function::new(
29895 "STRFTIME".to_string(),
29896 vec![cast_dt, duck_format],
29897 ))))
29898 }
29899 _ => Ok(Expression::Function(Box::new(Function::new(
29900 "FORMAT_DATETIME".to_string(),
29901 vec![format, dt_expr],
29902 )))),
29903 }
29904 }
29905
29906 // FORMAT_TIMESTAMP(format, ts) -> target-specific
29907 "FORMAT_TIMESTAMP" if args.len() == 2 => {
29908 let format = args.remove(0);
29909 let ts_expr = args.remove(0);
29910 match target {
29911 DialectType::DuckDB => {
29912 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
29913 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29914 let cast_ts = Expression::Cast(Box::new(Cast {
29915 this: cast_tstz,
29916 to: DataType::Timestamp {
29917 timezone: false,
29918 precision: None,
29919 },
29920 trailing_comments: vec![],
29921 double_colon_syntax: false,
29922 format: None,
29923 default: None,
29924 inferred_type: None,
29925 }));
29926 Ok(Expression::Function(Box::new(Function::new(
29927 "STRFTIME".to_string(),
29928 vec![cast_ts, format],
29929 ))))
29930 }
29931 DialectType::Snowflake => {
29932 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
29933 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
29934 let cast_ts = Expression::Cast(Box::new(Cast {
29935 this: cast_tstz,
29936 to: DataType::Timestamp {
29937 timezone: false,
29938 precision: None,
29939 },
29940 trailing_comments: vec![],
29941 double_colon_syntax: false,
29942 format: None,
29943 default: None,
29944 inferred_type: None,
29945 }));
29946 let sf_format = Self::bq_format_to_snowflake(&format);
29947 Ok(Expression::Function(Box::new(Function::new(
29948 "TO_CHAR".to_string(),
29949 vec![cast_ts, sf_format],
29950 ))))
29951 }
29952 _ => Ok(Expression::Function(Box::new(Function::new(
29953 "FORMAT_TIMESTAMP".to_string(),
29954 vec![format, ts_expr],
29955 )))),
29956 }
29957 }
29958
29959 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
29960 "UNIX_DATE" if args.len() == 1 => {
29961 let date = args.remove(0);
29962 match target {
29963 DialectType::DuckDB => {
29964 let epoch = Expression::Cast(Box::new(Cast {
29965 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
29966 to: DataType::Date,
29967 trailing_comments: vec![],
29968 double_colon_syntax: false,
29969 format: None,
29970 default: None,
29971 inferred_type: None,
29972 }));
29973 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
29974 // Need to convert DATE literal to CAST
29975 let norm_date = Self::date_literal_to_cast(date);
29976 Ok(Expression::Function(Box::new(Function::new(
29977 "DATE_DIFF".to_string(),
29978 vec![
29979 Expression::Literal(Literal::String("DAY".to_string())),
29980 epoch,
29981 norm_date,
29982 ],
29983 ))))
29984 }
29985 _ => Ok(Expression::Function(Box::new(Function::new(
29986 "UNIX_DATE".to_string(),
29987 vec![date],
29988 )))),
29989 }
29990 }
29991
29992 // UNIX_SECONDS(ts) -> target-specific
29993 "UNIX_SECONDS" if args.len() == 1 => {
29994 let ts = args.remove(0);
29995 match target {
29996 DialectType::DuckDB => {
29997 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
29998 let norm_ts = Self::ts_literal_to_cast_tz(ts);
29999 let epoch = Expression::Function(Box::new(Function::new(
30000 "EPOCH".to_string(),
30001 vec![norm_ts],
30002 )));
30003 Ok(Expression::Cast(Box::new(Cast {
30004 this: epoch,
30005 to: DataType::BigInt { length: None },
30006 trailing_comments: vec![],
30007 double_colon_syntax: false,
30008 format: None,
30009 default: None,
30010 inferred_type: None,
30011 })))
30012 }
30013 DialectType::Snowflake => {
30014 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
30015 let epoch = Expression::Cast(Box::new(Cast {
30016 this: Expression::Literal(Literal::String(
30017 "1970-01-01 00:00:00+00".to_string(),
30018 )),
30019 to: DataType::Timestamp {
30020 timezone: true,
30021 precision: None,
30022 },
30023 trailing_comments: vec![],
30024 double_colon_syntax: false,
30025 format: None,
30026 default: None,
30027 inferred_type: None,
30028 }));
30029 Ok(Expression::Function(Box::new(Function::new(
30030 "TIMESTAMPDIFF".to_string(),
30031 vec![
30032 Expression::Identifier(Identifier::new("SECONDS".to_string())),
30033 epoch,
30034 ts,
30035 ],
30036 ))))
30037 }
30038 _ => Ok(Expression::Function(Box::new(Function::new(
30039 "UNIX_SECONDS".to_string(),
30040 vec![ts],
30041 )))),
30042 }
30043 }
30044
30045 // UNIX_MILLIS(ts) -> target-specific
30046 "UNIX_MILLIS" if args.len() == 1 => {
30047 let ts = args.remove(0);
30048 match target {
30049 DialectType::DuckDB => {
30050 let norm_ts = Self::ts_literal_to_cast_tz(ts);
30051 Ok(Expression::Function(Box::new(Function::new(
30052 "EPOCH_MS".to_string(),
30053 vec![norm_ts],
30054 ))))
30055 }
30056 _ => Ok(Expression::Function(Box::new(Function::new(
30057 "UNIX_MILLIS".to_string(),
30058 vec![ts],
30059 )))),
30060 }
30061 }
30062
30063 // UNIX_MICROS(ts) -> target-specific
30064 "UNIX_MICROS" if args.len() == 1 => {
30065 let ts = args.remove(0);
30066 match target {
30067 DialectType::DuckDB => {
30068 let norm_ts = Self::ts_literal_to_cast_tz(ts);
30069 Ok(Expression::Function(Box::new(Function::new(
30070 "EPOCH_US".to_string(),
30071 vec![norm_ts],
30072 ))))
30073 }
30074 _ => Ok(Expression::Function(Box::new(Function::new(
30075 "UNIX_MICROS".to_string(),
30076 vec![ts],
30077 )))),
30078 }
30079 }
30080
30081 // INSTR(str, substr) -> target-specific
30082 "INSTR" => {
30083 if matches!(target, DialectType::BigQuery) {
30084 // BQ->BQ: keep as INSTR
30085 Ok(Expression::Function(Box::new(Function::new(
30086 "INSTR".to_string(),
30087 args,
30088 ))))
30089 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
30090 // Snowflake: CHARINDEX(substr, str) - swap args
30091 let str_expr = args.remove(0);
30092 let substr = args.remove(0);
30093 Ok(Expression::Function(Box::new(Function::new(
30094 "CHARINDEX".to_string(),
30095 vec![substr, str_expr],
30096 ))))
30097 } else {
30098 // Keep as INSTR for other targets
30099 Ok(Expression::Function(Box::new(Function::new(
30100 "INSTR".to_string(),
30101 args,
30102 ))))
30103 }
30104 }
30105
30106 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
30107 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
30108 if matches!(target, DialectType::BigQuery) {
30109 // BQ->BQ: always output with parens (function form), keep any timezone arg
30110 Ok(Expression::Function(Box::new(Function::new(name, args))))
30111 } else if name == "CURRENT_DATE" && args.len() == 1 {
30112 // CURRENT_DATE('UTC') - has timezone arg
30113 let tz_arg = args.remove(0);
30114 match target {
30115 DialectType::DuckDB => {
30116 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
30117 let ct = Expression::CurrentTimestamp(
30118 crate::expressions::CurrentTimestamp {
30119 precision: None,
30120 sysdate: false,
30121 },
30122 );
30123 let at_tz =
30124 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30125 this: ct,
30126 zone: tz_arg,
30127 }));
30128 Ok(Expression::Cast(Box::new(Cast {
30129 this: at_tz,
30130 to: DataType::Date,
30131 trailing_comments: vec![],
30132 double_colon_syntax: false,
30133 format: None,
30134 default: None,
30135 inferred_type: None,
30136 })))
30137 }
30138 DialectType::Snowflake => {
30139 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
30140 let ct = Expression::Function(Box::new(Function::new(
30141 "CURRENT_TIMESTAMP".to_string(),
30142 vec![],
30143 )));
30144 let convert = Expression::Function(Box::new(Function::new(
30145 "CONVERT_TIMEZONE".to_string(),
30146 vec![tz_arg, ct],
30147 )));
30148 Ok(Expression::Cast(Box::new(Cast {
30149 this: convert,
30150 to: DataType::Date,
30151 trailing_comments: vec![],
30152 double_colon_syntax: false,
30153 format: None,
30154 default: None,
30155 inferred_type: None,
30156 })))
30157 }
30158 _ => {
30159 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
30160 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
30161 Ok(Expression::AtTimeZone(Box::new(
30162 crate::expressions::AtTimeZone {
30163 this: cd,
30164 zone: tz_arg,
30165 },
30166 )))
30167 }
30168 }
30169 } else if (name == "CURRENT_TIMESTAMP"
30170 || name == "CURRENT_TIME"
30171 || name == "CURRENT_DATE")
30172 && args.is_empty()
30173 && matches!(
30174 target,
30175 DialectType::PostgreSQL
30176 | DialectType::DuckDB
30177 | DialectType::Presto
30178 | DialectType::Trino
30179 )
30180 {
30181 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
30182 if name == "CURRENT_TIMESTAMP" {
30183 Ok(Expression::CurrentTimestamp(
30184 crate::expressions::CurrentTimestamp {
30185 precision: None,
30186 sysdate: false,
30187 },
30188 ))
30189 } else if name == "CURRENT_DATE" {
30190 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
30191 } else {
30192 // CURRENT_TIME
30193 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
30194 precision: None,
30195 }))
30196 }
30197 } else {
30198 // All other targets: keep as function (with parens)
30199 Ok(Expression::Function(Box::new(Function::new(name, args))))
30200 }
30201 }
30202
30203 // JSON_QUERY(json, path) -> target-specific
30204 "JSON_QUERY" if args.len() == 2 => {
30205 match target {
30206 DialectType::DuckDB | DialectType::SQLite => {
30207 // json -> path syntax
30208 let json_expr = args.remove(0);
30209 let path = args.remove(0);
30210 Ok(Expression::JsonExtract(Box::new(
30211 crate::expressions::JsonExtractFunc {
30212 this: json_expr,
30213 path,
30214 returning: None,
30215 arrow_syntax: true,
30216 hash_arrow_syntax: false,
30217 wrapper_option: None,
30218 quotes_option: None,
30219 on_scalar_string: false,
30220 on_error: None,
30221 },
30222 )))
30223 }
30224 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30225 Ok(Expression::Function(Box::new(Function::new(
30226 "GET_JSON_OBJECT".to_string(),
30227 args,
30228 ))))
30229 }
30230 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
30231 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
30232 )),
30233 _ => Ok(Expression::Function(Box::new(Function::new(
30234 "JSON_QUERY".to_string(),
30235 args,
30236 )))),
30237 }
30238 }
30239
30240 // JSON_VALUE_ARRAY(json, path) -> target-specific
30241 "JSON_VALUE_ARRAY" if args.len() == 2 => {
30242 match target {
30243 DialectType::DuckDB => {
30244 // CAST(json -> path AS TEXT[])
30245 let json_expr = args.remove(0);
30246 let path = args.remove(0);
30247 let arrow = Expression::JsonExtract(Box::new(
30248 crate::expressions::JsonExtractFunc {
30249 this: json_expr,
30250 path,
30251 returning: None,
30252 arrow_syntax: true,
30253 hash_arrow_syntax: false,
30254 wrapper_option: None,
30255 quotes_option: None,
30256 on_scalar_string: false,
30257 on_error: None,
30258 },
30259 ));
30260 Ok(Expression::Cast(Box::new(Cast {
30261 this: arrow,
30262 to: DataType::Array {
30263 element_type: Box::new(DataType::Text),
30264 dimension: None,
30265 },
30266 trailing_comments: vec![],
30267 double_colon_syntax: false,
30268 format: None,
30269 default: None,
30270 inferred_type: None,
30271 })))
30272 }
30273 DialectType::Snowflake => {
30274 let json_expr = args.remove(0);
30275 let path_expr = args.remove(0);
30276 // Convert JSON path from $.path to just path
30277 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
30278 {
30279 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
30280 Expression::Literal(Literal::String(trimmed.to_string()))
30281 } else {
30282 path_expr
30283 };
30284 let parse_json = Expression::Function(Box::new(Function::new(
30285 "PARSE_JSON".to_string(),
30286 vec![json_expr],
30287 )));
30288 let get_path = Expression::Function(Box::new(Function::new(
30289 "GET_PATH".to_string(),
30290 vec![parse_json, sf_path],
30291 )));
30292 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
30293 let cast_expr = Expression::Cast(Box::new(Cast {
30294 this: Expression::Identifier(Identifier::new("x")),
30295 to: DataType::VarChar {
30296 length: None,
30297 parenthesized_length: false,
30298 },
30299 trailing_comments: vec![],
30300 double_colon_syntax: false,
30301 format: None,
30302 default: None,
30303 inferred_type: None,
30304 }));
30305 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30306 parameters: vec![Identifier::new("x")],
30307 body: cast_expr,
30308 colon: false,
30309 parameter_types: vec![],
30310 }));
30311 Ok(Expression::Function(Box::new(Function::new(
30312 "TRANSFORM".to_string(),
30313 vec![get_path, lambda],
30314 ))))
30315 }
30316 _ => Ok(Expression::Function(Box::new(Function::new(
30317 "JSON_VALUE_ARRAY".to_string(),
30318 args,
30319 )))),
30320 }
30321 }
30322
30323 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
30324 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
30325 // This is different from Hive/Spark where 3rd arg is "group_index"
30326 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
30327 match target {
30328 DialectType::DuckDB
30329 | DialectType::Presto
30330 | DialectType::Trino
30331 | DialectType::Athena => {
30332 if args.len() == 2 {
30333 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
30334 args.push(Expression::number(1));
30335 Ok(Expression::Function(Box::new(Function::new(
30336 "REGEXP_EXTRACT".to_string(),
30337 args,
30338 ))))
30339 } else if args.len() == 3 {
30340 let val = args.remove(0);
30341 let regex = args.remove(0);
30342 let position = args.remove(0);
30343 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
30344 if is_pos_1 {
30345 Ok(Expression::Function(Box::new(Function::new(
30346 "REGEXP_EXTRACT".to_string(),
30347 vec![val, regex, Expression::number(1)],
30348 ))))
30349 } else {
30350 let substring_expr = Expression::Function(Box::new(Function::new(
30351 "SUBSTRING".to_string(),
30352 vec![val, position],
30353 )));
30354 let nullif_expr = Expression::Function(Box::new(Function::new(
30355 "NULLIF".to_string(),
30356 vec![
30357 substring_expr,
30358 Expression::Literal(Literal::String(String::new())),
30359 ],
30360 )));
30361 Ok(Expression::Function(Box::new(Function::new(
30362 "REGEXP_EXTRACT".to_string(),
30363 vec![nullif_expr, regex, Expression::number(1)],
30364 ))))
30365 }
30366 } else if args.len() == 4 {
30367 let val = args.remove(0);
30368 let regex = args.remove(0);
30369 let position = args.remove(0);
30370 let occurrence = args.remove(0);
30371 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
30372 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
30373 if is_pos_1 && is_occ_1 {
30374 Ok(Expression::Function(Box::new(Function::new(
30375 "REGEXP_EXTRACT".to_string(),
30376 vec![val, regex, Expression::number(1)],
30377 ))))
30378 } else {
30379 let subject = if is_pos_1 {
30380 val
30381 } else {
30382 let substring_expr = Expression::Function(Box::new(
30383 Function::new("SUBSTRING".to_string(), vec![val, position]),
30384 ));
30385 Expression::Function(Box::new(Function::new(
30386 "NULLIF".to_string(),
30387 vec![
30388 substring_expr,
30389 Expression::Literal(Literal::String(String::new())),
30390 ],
30391 )))
30392 };
30393 let extract_all = Expression::Function(Box::new(Function::new(
30394 "REGEXP_EXTRACT_ALL".to_string(),
30395 vec![subject, regex, Expression::number(1)],
30396 )));
30397 Ok(Expression::Function(Box::new(Function::new(
30398 "ARRAY_EXTRACT".to_string(),
30399 vec![extract_all, occurrence],
30400 ))))
30401 }
30402 } else {
30403 Ok(Expression::Function(Box::new(Function {
30404 name: f.name,
30405 args,
30406 distinct: f.distinct,
30407 trailing_comments: f.trailing_comments,
30408 use_bracket_syntax: f.use_bracket_syntax,
30409 no_parens: f.no_parens,
30410 quoted: f.quoted,
30411 span: None,
30412 inferred_type: None,
30413 })))
30414 }
30415 }
30416 DialectType::Snowflake => {
30417 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
30418 Ok(Expression::Function(Box::new(Function::new(
30419 "REGEXP_SUBSTR".to_string(),
30420 args,
30421 ))))
30422 }
30423 _ => {
30424 // For other targets (Hive/Spark/BigQuery): pass through as-is
30425 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
30426 Ok(Expression::Function(Box::new(Function {
30427 name: f.name,
30428 args,
30429 distinct: f.distinct,
30430 trailing_comments: f.trailing_comments,
30431 use_bracket_syntax: f.use_bracket_syntax,
30432 no_parens: f.no_parens,
30433 quoted: f.quoted,
30434 span: None,
30435 inferred_type: None,
30436 })))
30437 }
30438 }
30439 }
30440
30441 // BigQuery STRUCT(args) -> target-specific struct expression
30442 "STRUCT" => {
30443 // Convert Function args to Struct fields
30444 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
30445 for (i, arg) in args.into_iter().enumerate() {
30446 match arg {
30447 Expression::Alias(a) => {
30448 // Named field: expr AS name
30449 fields.push((Some(a.alias.name.clone()), a.this));
30450 }
30451 other => {
30452 // Unnamed field: for Spark/Hive, keep as None
30453 // For Snowflake, auto-name as _N
30454 // For DuckDB, use column name for column refs, _N for others
30455 if matches!(target, DialectType::Snowflake) {
30456 fields.push((Some(format!("_{}", i)), other));
30457 } else if matches!(target, DialectType::DuckDB) {
30458 let auto_name = match &other {
30459 Expression::Column(col) => col.name.name.clone(),
30460 _ => format!("_{}", i),
30461 };
30462 fields.push((Some(auto_name), other));
30463 } else {
30464 fields.push((None, other));
30465 }
30466 }
30467 }
30468 }
30469
30470 match target {
30471 DialectType::Snowflake => {
30472 // OBJECT_CONSTRUCT('name', value, ...)
30473 let mut oc_args = Vec::new();
30474 for (name, val) in &fields {
30475 if let Some(n) = name {
30476 oc_args.push(Expression::Literal(Literal::String(n.clone())));
30477 oc_args.push(val.clone());
30478 } else {
30479 oc_args.push(val.clone());
30480 }
30481 }
30482 Ok(Expression::Function(Box::new(Function::new(
30483 "OBJECT_CONSTRUCT".to_string(),
30484 oc_args,
30485 ))))
30486 }
30487 DialectType::DuckDB => {
30488 // {'name': value, ...}
30489 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30490 fields,
30491 })))
30492 }
30493 DialectType::Hive => {
30494 // STRUCT(val1, val2, ...) - strip aliases
30495 let hive_fields: Vec<(Option<String>, Expression)> =
30496 fields.into_iter().map(|(_, v)| (None, v)).collect();
30497 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30498 fields: hive_fields,
30499 })))
30500 }
30501 DialectType::Spark | DialectType::Databricks => {
30502 // Use Expression::Struct to bypass Spark target transform auto-naming
30503 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
30504 fields,
30505 })))
30506 }
30507 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30508 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
30509 let all_named =
30510 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
30511 let all_types_inferable = all_named
30512 && fields
30513 .iter()
30514 .all(|(_, val)| Self::can_infer_presto_type(val));
30515 let row_args: Vec<Expression> =
30516 fields.iter().map(|(_, v)| v.clone()).collect();
30517 let row_expr = Expression::Function(Box::new(Function::new(
30518 "ROW".to_string(),
30519 row_args,
30520 )));
30521 if all_named && all_types_inferable {
30522 // Build ROW type with inferred types
30523 let mut row_type_fields = Vec::new();
30524 for (name, val) in &fields {
30525 if let Some(n) = name {
30526 let type_str = Self::infer_sql_type_for_presto(val);
30527 row_type_fields.push(crate::expressions::StructField::new(
30528 n.clone(),
30529 crate::expressions::DataType::Custom { name: type_str },
30530 ));
30531 }
30532 }
30533 let row_type = crate::expressions::DataType::Struct {
30534 fields: row_type_fields,
30535 nested: true,
30536 };
30537 Ok(Expression::Cast(Box::new(Cast {
30538 this: row_expr,
30539 to: row_type,
30540 trailing_comments: Vec::new(),
30541 double_colon_syntax: false,
30542 format: None,
30543 default: None,
30544 inferred_type: None,
30545 })))
30546 } else {
30547 Ok(row_expr)
30548 }
30549 }
30550 _ => {
30551 // Default: keep as STRUCT function with original args
30552 let mut new_args = Vec::new();
30553 for (name, val) in fields {
30554 if let Some(n) = name {
30555 new_args.push(Expression::Alias(Box::new(
30556 crate::expressions::Alias::new(val, Identifier::new(n)),
30557 )));
30558 } else {
30559 new_args.push(val);
30560 }
30561 }
30562 Ok(Expression::Function(Box::new(Function::new(
30563 "STRUCT".to_string(),
30564 new_args,
30565 ))))
30566 }
30567 }
30568 }
30569
30570 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
30571 "ROUND" if args.len() == 3 => {
30572 let x = args.remove(0);
30573 let n = args.remove(0);
30574 let mode = args.remove(0);
30575 // Check if mode is 'ROUND_HALF_EVEN'
30576 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
30577 if is_half_even && matches!(target, DialectType::DuckDB) {
30578 Ok(Expression::Function(Box::new(Function::new(
30579 "ROUND_EVEN".to_string(),
30580 vec![x, n],
30581 ))))
30582 } else {
30583 // Pass through with all args
30584 Ok(Expression::Function(Box::new(Function::new(
30585 "ROUND".to_string(),
30586 vec![x, n, mode],
30587 ))))
30588 }
30589 }
30590
30591 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
30592 "MAKE_INTERVAL" => {
30593 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
30594 // The positional args are: year, month
30595 // Named args are: day =>, minute =>, etc.
30596 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
30597 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
30598 // For BigQuery->BigQuery: reorder named args (day before minute)
30599 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
30600 let mut parts: Vec<(String, String)> = Vec::new();
30601 let mut pos_idx = 0;
30602 let pos_units = ["year", "month"];
30603 for arg in &args {
30604 if let Expression::NamedArgument(na) = arg {
30605 // Named arg like minute => 5
30606 let unit = na.name.name.clone();
30607 if let Expression::Literal(Literal::Number(n)) = &na.value {
30608 parts.push((unit, n.clone()));
30609 }
30610 } else if pos_idx < pos_units.len() {
30611 if let Expression::Literal(Literal::Number(n)) = arg {
30612 parts.push((pos_units[pos_idx].to_string(), n.clone()));
30613 }
30614 pos_idx += 1;
30615 }
30616 }
30617 // Don't sort - preserve original argument order
30618 let separator = if matches!(target, DialectType::Snowflake) {
30619 ", "
30620 } else {
30621 " "
30622 };
30623 let interval_str = parts
30624 .iter()
30625 .map(|(u, v)| format!("{} {}", v, u))
30626 .collect::<Vec<_>>()
30627 .join(separator);
30628 Ok(Expression::Interval(Box::new(
30629 crate::expressions::Interval {
30630 this: Some(Expression::Literal(Literal::String(interval_str))),
30631 unit: None,
30632 },
30633 )))
30634 } else if matches!(target, DialectType::BigQuery) {
30635 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
30636 let mut positional = Vec::new();
30637 let mut named: Vec<(
30638 String,
30639 Expression,
30640 crate::expressions::NamedArgSeparator,
30641 )> = Vec::new();
30642 let _pos_units = ["year", "month"];
30643 let mut _pos_idx = 0;
30644 for arg in args {
30645 if let Expression::NamedArgument(na) = arg {
30646 named.push((na.name.name.clone(), na.value, na.separator));
30647 } else {
30648 positional.push(arg);
30649 _pos_idx += 1;
30650 }
30651 }
30652 // Sort named args by: day, hour, minute, second
30653 let unit_order = |u: &str| -> usize {
30654 match u.to_lowercase().as_str() {
30655 "day" => 0,
30656 "hour" => 1,
30657 "minute" => 2,
30658 "second" => 3,
30659 _ => 4,
30660 }
30661 };
30662 named.sort_by_key(|(u, _, _)| unit_order(u));
30663 let mut result_args = positional;
30664 for (name, value, sep) in named {
30665 result_args.push(Expression::NamedArgument(Box::new(
30666 crate::expressions::NamedArgument {
30667 name: Identifier::new(&name),
30668 value,
30669 separator: sep,
30670 },
30671 )));
30672 }
30673 Ok(Expression::Function(Box::new(Function::new(
30674 "MAKE_INTERVAL".to_string(),
30675 result_args,
30676 ))))
30677 } else {
30678 Ok(Expression::Function(Box::new(Function::new(
30679 "MAKE_INTERVAL".to_string(),
30680 args,
30681 ))))
30682 }
30683 }
30684
30685 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
30686 "ARRAY_TO_STRING" if args.len() == 3 => {
30687 let arr = args.remove(0);
30688 let sep = args.remove(0);
30689 let null_text = args.remove(0);
30690 match target {
30691 DialectType::DuckDB => {
30692 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
30693 let _lambda_param =
30694 Expression::Identifier(crate::expressions::Identifier::new("x"));
30695 let coalesce =
30696 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
30697 original_name: None,
30698 expressions: vec![
30699 Expression::Identifier(crate::expressions::Identifier::new(
30700 "x",
30701 )),
30702 null_text,
30703 ],
30704 inferred_type: None,
30705 }));
30706 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30707 parameters: vec![crate::expressions::Identifier::new("x")],
30708 body: coalesce,
30709 colon: false,
30710 parameter_types: vec![],
30711 }));
30712 let list_transform = Expression::Function(Box::new(Function::new(
30713 "LIST_TRANSFORM".to_string(),
30714 vec![arr, lambda],
30715 )));
30716 Ok(Expression::Function(Box::new(Function::new(
30717 "ARRAY_TO_STRING".to_string(),
30718 vec![list_transform, sep],
30719 ))))
30720 }
30721 _ => Ok(Expression::Function(Box::new(Function::new(
30722 "ARRAY_TO_STRING".to_string(),
30723 vec![arr, sep, null_text],
30724 )))),
30725 }
30726 }
30727
30728 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
30729 "LENGTH" if args.len() == 1 => {
30730 let arg = args.remove(0);
30731 match target {
30732 DialectType::DuckDB => {
30733 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
30734 let typeof_func = Expression::Function(Box::new(Function::new(
30735 "TYPEOF".to_string(),
30736 vec![arg.clone()],
30737 )));
30738 let blob_cast = Expression::Cast(Box::new(Cast {
30739 this: arg.clone(),
30740 to: DataType::VarBinary { length: None },
30741 trailing_comments: vec![],
30742 double_colon_syntax: false,
30743 format: None,
30744 default: None,
30745 inferred_type: None,
30746 }));
30747 let octet_length = Expression::Function(Box::new(Function::new(
30748 "OCTET_LENGTH".to_string(),
30749 vec![blob_cast],
30750 )));
30751 let text_cast = Expression::Cast(Box::new(Cast {
30752 this: arg,
30753 to: DataType::Text,
30754 trailing_comments: vec![],
30755 double_colon_syntax: false,
30756 format: None,
30757 default: None,
30758 inferred_type: None,
30759 }));
30760 let length_text = Expression::Function(Box::new(Function::new(
30761 "LENGTH".to_string(),
30762 vec![text_cast],
30763 )));
30764 Ok(Expression::Case(Box::new(crate::expressions::Case {
30765 operand: Some(typeof_func),
30766 whens: vec![(
30767 Expression::Literal(Literal::String("BLOB".to_string())),
30768 octet_length,
30769 )],
30770 else_: Some(length_text),
30771 comments: Vec::new(),
30772 inferred_type: None,
30773 })))
30774 }
30775 _ => Ok(Expression::Function(Box::new(Function::new(
30776 "LENGTH".to_string(),
30777 vec![arg],
30778 )))),
30779 }
30780 }
30781
30782 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
30783 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
30784 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
30785 // The args should be [x, fraction] with the null handling stripped
30786 // For DuckDB: QUANTILE_CONT(x, fraction)
30787 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
30788 match target {
30789 DialectType::DuckDB => {
30790 // Strip down to just 2 args, rename to QUANTILE_CONT
30791 let x = args[0].clone();
30792 let frac = args[1].clone();
30793 Ok(Expression::Function(Box::new(Function::new(
30794 "QUANTILE_CONT".to_string(),
30795 vec![x, frac],
30796 ))))
30797 }
30798 _ => Ok(Expression::Function(Box::new(Function::new(
30799 "PERCENTILE_CONT".to_string(),
30800 args,
30801 )))),
30802 }
30803 }
30804
30805 // All others: pass through
30806 _ => Ok(Expression::Function(Box::new(Function {
30807 name: f.name,
30808 args,
30809 distinct: f.distinct,
30810 trailing_comments: f.trailing_comments,
30811 use_bracket_syntax: f.use_bracket_syntax,
30812 no_parens: f.no_parens,
30813 quoted: f.quoted,
30814 span: None,
30815 inferred_type: None,
30816 }))),
30817 }
30818 }
30819
30820 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
30821 /// Returns false for column references and other non-literal expressions where the type is unknown.
30822 fn can_infer_presto_type(expr: &Expression) -> bool {
30823 match expr {
30824 Expression::Literal(_) => true,
30825 Expression::Boolean(_) => true,
30826 Expression::Array(_) | Expression::ArrayFunc(_) => true,
30827 Expression::Struct(_) | Expression::StructFunc(_) => true,
30828 Expression::Function(f) => {
30829 let up = f.name.to_uppercase();
30830 up == "STRUCT"
30831 || up == "ROW"
30832 || up == "CURRENT_DATE"
30833 || up == "CURRENT_TIMESTAMP"
30834 || up == "NOW"
30835 }
30836 Expression::Cast(_) => true,
30837 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
30838 _ => false,
30839 }
30840 }
30841
30842 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
30843 fn infer_sql_type_for_presto(expr: &Expression) -> String {
30844 use crate::expressions::Literal;
30845 match expr {
30846 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
30847 Expression::Literal(Literal::Number(n)) => {
30848 if n.contains('.') {
30849 "DOUBLE".to_string()
30850 } else {
30851 "INTEGER".to_string()
30852 }
30853 }
30854 Expression::Boolean(_) => "BOOLEAN".to_string(),
30855 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
30856 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
30857 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
30858 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
30859 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
30860 Expression::Function(f) => {
30861 let up = f.name.to_uppercase();
30862 if up == "STRUCT" || up == "ROW" {
30863 "ROW".to_string()
30864 } else if up == "CURRENT_DATE" {
30865 "DATE".to_string()
30866 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
30867 "TIMESTAMP".to_string()
30868 } else {
30869 "VARCHAR".to_string()
30870 }
30871 }
30872 Expression::Cast(c) => {
30873 // If already cast, use the target type
30874 Self::data_type_to_presto_string(&c.to)
30875 }
30876 _ => "VARCHAR".to_string(),
30877 }
30878 }
30879
30880 /// Convert a DataType to its Presto/Trino string representation for ROW type
30881 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
30882 use crate::expressions::DataType;
30883 match dt {
30884 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30885 "VARCHAR".to_string()
30886 }
30887 DataType::Int { .. }
30888 | DataType::BigInt { .. }
30889 | DataType::SmallInt { .. }
30890 | DataType::TinyInt { .. } => "INTEGER".to_string(),
30891 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
30892 DataType::Boolean => "BOOLEAN".to_string(),
30893 DataType::Date => "DATE".to_string(),
30894 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30895 DataType::Struct { fields, .. } => {
30896 let field_strs: Vec<String> = fields
30897 .iter()
30898 .map(|f| {
30899 format!(
30900 "{} {}",
30901 f.name,
30902 Self::data_type_to_presto_string(&f.data_type)
30903 )
30904 })
30905 .collect();
30906 format!("ROW({})", field_strs.join(", "))
30907 }
30908 DataType::Array { element_type, .. } => {
30909 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
30910 }
30911 DataType::Custom { name } => {
30912 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
30913 name.clone()
30914 }
30915 _ => "VARCHAR".to_string(),
30916 }
30917 }
30918
30919 /// Convert IntervalUnit to string
30920 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
30921 match unit {
30922 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
30923 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
30924 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
30925 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
30926 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
30927 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
30928 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
30929 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
30930 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
30931 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
30932 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
30933 }
30934 }
30935
30936 /// Extract unit string from an expression (uppercased)
30937 fn get_unit_str_static(expr: &Expression) -> String {
30938 use crate::expressions::Literal;
30939 match expr {
30940 Expression::Identifier(id) => id.name.to_uppercase(),
30941 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
30942 Expression::Column(col) => col.name.name.to_uppercase(),
30943 Expression::Function(f) => {
30944 let base = f.name.to_uppercase();
30945 if !f.args.is_empty() {
30946 let inner = Self::get_unit_str_static(&f.args[0]);
30947 format!("{}({})", base, inner)
30948 } else {
30949 base
30950 }
30951 }
30952 _ => "DAY".to_string(),
30953 }
30954 }
30955
30956 /// Parse unit string to IntervalUnit
30957 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
30958 match s {
30959 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
30960 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
30961 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
30962 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30963 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
30964 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
30965 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
30966 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
30967 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
30968 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
30969 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30970 _ => crate::expressions::IntervalUnit::Day,
30971 }
30972 }
30973
30974 /// Convert expression to simple string for interval building
30975 fn expr_to_string_static(expr: &Expression) -> String {
30976 use crate::expressions::Literal;
30977 match expr {
30978 Expression::Literal(Literal::Number(s)) => s.clone(),
30979 Expression::Literal(Literal::String(s)) => s.clone(),
30980 Expression::Identifier(id) => id.name.clone(),
30981 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
30982 _ => "1".to_string(),
30983 }
30984 }
30985
30986 /// Extract a simple string representation from a literal expression
30987 fn expr_to_string(expr: &Expression) -> String {
30988 use crate::expressions::Literal;
30989 match expr {
30990 Expression::Literal(Literal::Number(s)) => s.clone(),
30991 Expression::Literal(Literal::String(s)) => s.clone(),
30992 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
30993 Expression::Identifier(id) => id.name.clone(),
30994 _ => "1".to_string(),
30995 }
30996 }
30997
30998 /// Quote an interval value expression as a string literal if it's a number (or negated number)
30999 fn quote_interval_val(expr: &Expression) -> Expression {
31000 use crate::expressions::Literal;
31001 match expr {
31002 Expression::Literal(Literal::Number(n)) => {
31003 Expression::Literal(Literal::String(n.clone()))
31004 }
31005 Expression::Literal(Literal::String(_)) => expr.clone(),
31006 Expression::Neg(inner) => {
31007 if let Expression::Literal(Literal::Number(n)) = &inner.this {
31008 Expression::Literal(Literal::String(format!("-{}", n)))
31009 } else {
31010 expr.clone()
31011 }
31012 }
31013 _ => expr.clone(),
31014 }
31015 }
31016
31017 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
31018 fn timestamp_string_has_timezone(ts: &str) -> bool {
31019 let trimmed = ts.trim();
31020 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
31021 if let Some(last_space) = trimmed.rfind(' ') {
31022 let suffix = &trimmed[last_space + 1..];
31023 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
31024 let rest = &suffix[1..];
31025 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
31026 return true;
31027 }
31028 }
31029 }
31030 // Check for named timezone abbreviations
31031 let ts_lower = trimmed.to_lowercase();
31032 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
31033 for abbrev in &tz_abbrevs {
31034 if ts_lower.ends_with(abbrev) {
31035 return true;
31036 }
31037 }
31038 false
31039 }
31040
31041 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
31042 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
31043 use crate::expressions::{Cast, DataType, Literal};
31044 match expr {
31045 Expression::Literal(Literal::Timestamp(s)) => {
31046 let tz = func_name.starts_with("TIMESTAMP");
31047 Expression::Cast(Box::new(Cast {
31048 this: Expression::Literal(Literal::String(s)),
31049 to: if tz {
31050 DataType::Timestamp {
31051 timezone: true,
31052 precision: None,
31053 }
31054 } else {
31055 DataType::Timestamp {
31056 timezone: false,
31057 precision: None,
31058 }
31059 },
31060 trailing_comments: vec![],
31061 double_colon_syntax: false,
31062 format: None,
31063 default: None,
31064 inferred_type: None,
31065 }))
31066 }
31067 other => other,
31068 }
31069 }
31070
31071 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
31072 fn maybe_cast_ts(expr: Expression) -> Expression {
31073 use crate::expressions::{Cast, DataType, Literal};
31074 match expr {
31075 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31076 this: Expression::Literal(Literal::String(s)),
31077 to: DataType::Timestamp {
31078 timezone: false,
31079 precision: None,
31080 },
31081 trailing_comments: vec![],
31082 double_colon_syntax: false,
31083 format: None,
31084 default: None,
31085 inferred_type: None,
31086 })),
31087 other => other,
31088 }
31089 }
31090
31091 /// Convert DATE 'x' literal to CAST('x' AS DATE)
31092 fn date_literal_to_cast(expr: Expression) -> Expression {
31093 use crate::expressions::{Cast, DataType, Literal};
31094 match expr {
31095 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
31096 this: Expression::Literal(Literal::String(s)),
31097 to: DataType::Date,
31098 trailing_comments: vec![],
31099 double_colon_syntax: false,
31100 format: None,
31101 default: None,
31102 inferred_type: None,
31103 })),
31104 other => other,
31105 }
31106 }
31107
31108 /// Ensure an expression that should be a date is CAST(... AS DATE).
31109 /// Handles both DATE literals and string literals that look like dates.
31110 fn ensure_cast_date(expr: Expression) -> Expression {
31111 use crate::expressions::{Cast, DataType, Literal};
31112 match expr {
31113 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
31114 this: Expression::Literal(Literal::String(s)),
31115 to: DataType::Date,
31116 trailing_comments: vec![],
31117 double_colon_syntax: false,
31118 format: None,
31119 default: None,
31120 inferred_type: None,
31121 })),
31122 Expression::Literal(Literal::String(ref _s)) => {
31123 // String literal that should be a date -> CAST('s' AS DATE)
31124 Expression::Cast(Box::new(Cast {
31125 this: expr,
31126 to: DataType::Date,
31127 trailing_comments: vec![],
31128 double_colon_syntax: false,
31129 format: None,
31130 default: None,
31131 inferred_type: None,
31132 }))
31133 }
31134 // Already a CAST or other expression -> leave as-is
31135 other => other,
31136 }
31137 }
31138
31139 /// Force CAST(expr AS DATE) for any expression (not just literals)
31140 /// Skips if the expression is already a CAST to DATE
31141 fn force_cast_date(expr: Expression) -> Expression {
31142 use crate::expressions::{Cast, DataType};
31143 // If it's already a CAST to DATE, don't double-wrap
31144 if let Expression::Cast(ref c) = expr {
31145 if matches!(c.to, DataType::Date) {
31146 return expr;
31147 }
31148 }
31149 Expression::Cast(Box::new(Cast {
31150 this: expr,
31151 to: DataType::Date,
31152 trailing_comments: vec![],
31153 double_colon_syntax: false,
31154 format: None,
31155 default: None,
31156 inferred_type: None,
31157 }))
31158 }
31159
31160 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
31161 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
31162 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
31163 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
31164
31165 fn ensure_to_date_preserved(expr: Expression) -> Expression {
31166 use crate::expressions::{Function, Literal};
31167 if matches!(expr, Expression::Literal(Literal::String(_))) {
31168 Expression::Function(Box::new(Function::new(
31169 Self::PRESERVED_TO_DATE.to_string(),
31170 vec![expr],
31171 )))
31172 } else {
31173 expr
31174 }
31175 }
31176
31177 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
31178 fn try_cast_date(expr: Expression) -> Expression {
31179 use crate::expressions::{Cast, DataType};
31180 Expression::TryCast(Box::new(Cast {
31181 this: expr,
31182 to: DataType::Date,
31183 trailing_comments: vec![],
31184 double_colon_syntax: false,
31185 format: None,
31186 default: None,
31187 inferred_type: None,
31188 }))
31189 }
31190
31191 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
31192 fn double_cast_timestamp_date(expr: Expression) -> Expression {
31193 use crate::expressions::{Cast, DataType};
31194 let inner = Expression::Cast(Box::new(Cast {
31195 this: expr,
31196 to: DataType::Timestamp {
31197 timezone: false,
31198 precision: None,
31199 },
31200 trailing_comments: vec![],
31201 double_colon_syntax: false,
31202 format: None,
31203 default: None,
31204 inferred_type: None,
31205 }));
31206 Expression::Cast(Box::new(Cast {
31207 this: inner,
31208 to: DataType::Date,
31209 trailing_comments: vec![],
31210 double_colon_syntax: false,
31211 format: None,
31212 default: None,
31213 inferred_type: None,
31214 }))
31215 }
31216
31217 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
31218 fn double_cast_datetime_date(expr: Expression) -> Expression {
31219 use crate::expressions::{Cast, DataType};
31220 let inner = Expression::Cast(Box::new(Cast {
31221 this: expr,
31222 to: DataType::Custom {
31223 name: "DATETIME".to_string(),
31224 },
31225 trailing_comments: vec![],
31226 double_colon_syntax: false,
31227 format: None,
31228 default: None,
31229 inferred_type: None,
31230 }));
31231 Expression::Cast(Box::new(Cast {
31232 this: inner,
31233 to: DataType::Date,
31234 trailing_comments: vec![],
31235 double_colon_syntax: false,
31236 format: None,
31237 default: None,
31238 inferred_type: None,
31239 }))
31240 }
31241
31242 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
31243 fn double_cast_datetime2_date(expr: Expression) -> Expression {
31244 use crate::expressions::{Cast, DataType};
31245 let inner = Expression::Cast(Box::new(Cast {
31246 this: expr,
31247 to: DataType::Custom {
31248 name: "DATETIME2".to_string(),
31249 },
31250 trailing_comments: vec![],
31251 double_colon_syntax: false,
31252 format: None,
31253 default: None,
31254 inferred_type: None,
31255 }));
31256 Expression::Cast(Box::new(Cast {
31257 this: inner,
31258 to: DataType::Date,
31259 trailing_comments: vec![],
31260 double_colon_syntax: false,
31261 format: None,
31262 default: None,
31263 inferred_type: None,
31264 }))
31265 }
31266
31267 /// Convert Hive/Java-style date format strings to C-style (strftime) format
31268 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
31269 fn hive_format_to_c_format(fmt: &str) -> String {
31270 let mut result = String::new();
31271 let chars: Vec<char> = fmt.chars().collect();
31272 let mut i = 0;
31273 while i < chars.len() {
31274 match chars[i] {
31275 'y' => {
31276 let mut count = 0;
31277 while i < chars.len() && chars[i] == 'y' {
31278 count += 1;
31279 i += 1;
31280 }
31281 if count >= 4 {
31282 result.push_str("%Y");
31283 } else if count == 2 {
31284 result.push_str("%y");
31285 } else {
31286 result.push_str("%Y");
31287 }
31288 }
31289 'M' => {
31290 let mut count = 0;
31291 while i < chars.len() && chars[i] == 'M' {
31292 count += 1;
31293 i += 1;
31294 }
31295 if count >= 3 {
31296 result.push_str("%b");
31297 } else if count == 2 {
31298 result.push_str("%m");
31299 } else {
31300 result.push_str("%m");
31301 }
31302 }
31303 'd' => {
31304 let mut _count = 0;
31305 while i < chars.len() && chars[i] == 'd' {
31306 _count += 1;
31307 i += 1;
31308 }
31309 result.push_str("%d");
31310 }
31311 'H' => {
31312 let mut _count = 0;
31313 while i < chars.len() && chars[i] == 'H' {
31314 _count += 1;
31315 i += 1;
31316 }
31317 result.push_str("%H");
31318 }
31319 'h' => {
31320 let mut _count = 0;
31321 while i < chars.len() && chars[i] == 'h' {
31322 _count += 1;
31323 i += 1;
31324 }
31325 result.push_str("%I");
31326 }
31327 'm' => {
31328 let mut _count = 0;
31329 while i < chars.len() && chars[i] == 'm' {
31330 _count += 1;
31331 i += 1;
31332 }
31333 result.push_str("%M");
31334 }
31335 's' => {
31336 let mut _count = 0;
31337 while i < chars.len() && chars[i] == 's' {
31338 _count += 1;
31339 i += 1;
31340 }
31341 result.push_str("%S");
31342 }
31343 'S' => {
31344 // Fractional seconds - skip
31345 while i < chars.len() && chars[i] == 'S' {
31346 i += 1;
31347 }
31348 result.push_str("%f");
31349 }
31350 'a' => {
31351 // AM/PM
31352 while i < chars.len() && chars[i] == 'a' {
31353 i += 1;
31354 }
31355 result.push_str("%p");
31356 }
31357 'E' => {
31358 let mut count = 0;
31359 while i < chars.len() && chars[i] == 'E' {
31360 count += 1;
31361 i += 1;
31362 }
31363 if count >= 4 {
31364 result.push_str("%A");
31365 } else {
31366 result.push_str("%a");
31367 }
31368 }
31369 '\'' => {
31370 // Quoted literal text - pass through the quotes and content
31371 result.push('\'');
31372 i += 1;
31373 while i < chars.len() && chars[i] != '\'' {
31374 result.push(chars[i]);
31375 i += 1;
31376 }
31377 if i < chars.len() {
31378 result.push('\'');
31379 i += 1;
31380 }
31381 }
31382 c => {
31383 result.push(c);
31384 i += 1;
31385 }
31386 }
31387 }
31388 result
31389 }
31390
31391 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
31392 fn hive_format_to_presto_format(fmt: &str) -> String {
31393 let c_fmt = Self::hive_format_to_c_format(fmt);
31394 // Presto uses %T for HH:MM:SS
31395 c_fmt.replace("%H:%M:%S", "%T")
31396 }
31397
31398 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
31399 fn ensure_cast_timestamp(expr: Expression) -> Expression {
31400 use crate::expressions::{Cast, DataType, Literal};
31401 match expr {
31402 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31403 this: Expression::Literal(Literal::String(s)),
31404 to: DataType::Timestamp {
31405 timezone: false,
31406 precision: None,
31407 },
31408 trailing_comments: vec![],
31409 double_colon_syntax: false,
31410 format: None,
31411 default: None,
31412 inferred_type: None,
31413 })),
31414 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31415 this: expr,
31416 to: DataType::Timestamp {
31417 timezone: false,
31418 precision: None,
31419 },
31420 trailing_comments: vec![],
31421 double_colon_syntax: false,
31422 format: None,
31423 default: None,
31424 inferred_type: None,
31425 })),
31426 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31427 this: Expression::Literal(Literal::String(s)),
31428 to: DataType::Timestamp {
31429 timezone: false,
31430 precision: None,
31431 },
31432 trailing_comments: vec![],
31433 double_colon_syntax: false,
31434 format: None,
31435 default: None,
31436 inferred_type: None,
31437 })),
31438 other => other,
31439 }
31440 }
31441
31442 /// Force CAST to TIMESTAMP for any expression (not just literals)
31443 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
31444 fn force_cast_timestamp(expr: Expression) -> Expression {
31445 use crate::expressions::{Cast, DataType};
31446 // Don't double-wrap if already a CAST to TIMESTAMP
31447 if let Expression::Cast(ref c) = expr {
31448 if matches!(c.to, DataType::Timestamp { .. }) {
31449 return expr;
31450 }
31451 }
31452 Expression::Cast(Box::new(Cast {
31453 this: expr,
31454 to: DataType::Timestamp {
31455 timezone: false,
31456 precision: None,
31457 },
31458 trailing_comments: vec![],
31459 double_colon_syntax: false,
31460 format: None,
31461 default: None,
31462 inferred_type: None,
31463 }))
31464 }
31465
31466 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
31467 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
31468 use crate::expressions::{Cast, DataType, Literal};
31469 match expr {
31470 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31471 this: Expression::Literal(Literal::String(s)),
31472 to: DataType::Timestamp {
31473 timezone: true,
31474 precision: None,
31475 },
31476 trailing_comments: vec![],
31477 double_colon_syntax: false,
31478 format: None,
31479 default: None,
31480 inferred_type: None,
31481 })),
31482 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31483 this: expr,
31484 to: DataType::Timestamp {
31485 timezone: true,
31486 precision: None,
31487 },
31488 trailing_comments: vec![],
31489 double_colon_syntax: false,
31490 format: None,
31491 default: None,
31492 inferred_type: None,
31493 })),
31494 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
31495 this: Expression::Literal(Literal::String(s)),
31496 to: DataType::Timestamp {
31497 timezone: true,
31498 precision: None,
31499 },
31500 trailing_comments: vec![],
31501 double_colon_syntax: false,
31502 format: None,
31503 default: None,
31504 inferred_type: None,
31505 })),
31506 other => other,
31507 }
31508 }
31509
31510 /// Ensure expression is CAST to DATETIME (for BigQuery)
31511 fn ensure_cast_datetime(expr: Expression) -> Expression {
31512 use crate::expressions::{Cast, DataType, Literal};
31513 match expr {
31514 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31515 this: expr,
31516 to: DataType::Custom {
31517 name: "DATETIME".to_string(),
31518 },
31519 trailing_comments: vec![],
31520 double_colon_syntax: false,
31521 format: None,
31522 default: None,
31523 inferred_type: None,
31524 })),
31525 other => other,
31526 }
31527 }
31528
31529 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
31530 fn force_cast_datetime(expr: Expression) -> Expression {
31531 use crate::expressions::{Cast, DataType};
31532 if let Expression::Cast(ref c) = expr {
31533 if let DataType::Custom { ref name } = c.to {
31534 if name.eq_ignore_ascii_case("DATETIME") {
31535 return expr;
31536 }
31537 }
31538 }
31539 Expression::Cast(Box::new(Cast {
31540 this: expr,
31541 to: DataType::Custom {
31542 name: "DATETIME".to_string(),
31543 },
31544 trailing_comments: vec![],
31545 double_colon_syntax: false,
31546 format: None,
31547 default: None,
31548 inferred_type: None,
31549 }))
31550 }
31551
31552 /// Ensure expression is CAST to DATETIME2 (for TSQL)
31553 fn ensure_cast_datetime2(expr: Expression) -> Expression {
31554 use crate::expressions::{Cast, DataType, Literal};
31555 match expr {
31556 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
31557 this: expr,
31558 to: DataType::Custom {
31559 name: "DATETIME2".to_string(),
31560 },
31561 trailing_comments: vec![],
31562 double_colon_syntax: false,
31563 format: None,
31564 default: None,
31565 inferred_type: None,
31566 })),
31567 other => other,
31568 }
31569 }
31570
31571 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
31572 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
31573 use crate::expressions::{Cast, DataType, Literal};
31574 match expr {
31575 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
31576 this: Expression::Literal(Literal::String(s)),
31577 to: DataType::Timestamp {
31578 timezone: true,
31579 precision: None,
31580 },
31581 trailing_comments: vec![],
31582 double_colon_syntax: false,
31583 format: None,
31584 default: None,
31585 inferred_type: None,
31586 })),
31587 other => other,
31588 }
31589 }
31590
31591 /// Convert BigQuery format string to Snowflake format string
31592 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
31593 use crate::expressions::Literal;
31594 if let Expression::Literal(Literal::String(s)) = format_expr {
31595 let sf = s
31596 .replace("%Y", "yyyy")
31597 .replace("%m", "mm")
31598 .replace("%d", "DD")
31599 .replace("%H", "HH24")
31600 .replace("%M", "MI")
31601 .replace("%S", "SS")
31602 .replace("%b", "mon")
31603 .replace("%B", "Month")
31604 .replace("%e", "FMDD");
31605 Expression::Literal(Literal::String(sf))
31606 } else {
31607 format_expr.clone()
31608 }
31609 }
31610
31611 /// Convert BigQuery format string to DuckDB format string
31612 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
31613 use crate::expressions::Literal;
31614 if let Expression::Literal(Literal::String(s)) = format_expr {
31615 let duck = s
31616 .replace("%T", "%H:%M:%S")
31617 .replace("%F", "%Y-%m-%d")
31618 .replace("%D", "%m/%d/%y")
31619 .replace("%x", "%m/%d/%y")
31620 .replace("%c", "%a %b %-d %H:%M:%S %Y")
31621 .replace("%e", "%-d")
31622 .replace("%E6S", "%S.%f");
31623 Expression::Literal(Literal::String(duck))
31624 } else {
31625 format_expr.clone()
31626 }
31627 }
31628
31629 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
31630 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
31631 use crate::expressions::Literal;
31632 if let Expression::Literal(Literal::String(s)) = format_expr {
31633 // Replace format elements from longest to shortest to avoid partial matches
31634 let result = s
31635 .replace("YYYYMMDD", "%Y%m%d")
31636 .replace("YYYY", "%Y")
31637 .replace("YY", "%y")
31638 .replace("MONTH", "%B")
31639 .replace("MON", "%b")
31640 .replace("MM", "%m")
31641 .replace("DD", "%d")
31642 .replace("HH24", "%H")
31643 .replace("HH12", "%I")
31644 .replace("HH", "%I")
31645 .replace("MI", "%M")
31646 .replace("SSTZH", "%S%z")
31647 .replace("SS", "%S")
31648 .replace("TZH", "%z");
31649 Expression::Literal(Literal::String(result))
31650 } else {
31651 format_expr.clone()
31652 }
31653 }
31654
31655 /// Normalize BigQuery format strings for BQ->BQ output
31656 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
31657 use crate::expressions::Literal;
31658 if let Expression::Literal(Literal::String(s)) = format_expr {
31659 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
31660 Expression::Literal(Literal::String(norm))
31661 } else {
31662 format_expr.clone()
31663 }
31664 }
31665}
31666
31667#[cfg(test)]
31668mod tests {
31669 use super::*;
31670
31671 #[test]
31672 fn test_dialect_type_from_str() {
31673 assert_eq!(
31674 "postgres".parse::<DialectType>().unwrap(),
31675 DialectType::PostgreSQL
31676 );
31677 assert_eq!(
31678 "postgresql".parse::<DialectType>().unwrap(),
31679 DialectType::PostgreSQL
31680 );
31681 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
31682 assert_eq!(
31683 "bigquery".parse::<DialectType>().unwrap(),
31684 DialectType::BigQuery
31685 );
31686 }
31687
31688 #[test]
31689 fn test_basic_transpile() {
31690 let dialect = Dialect::get(DialectType::Generic);
31691 let result = dialect
31692 .transpile_to("SELECT 1", DialectType::PostgreSQL)
31693 .unwrap();
31694 assert_eq!(result.len(), 1);
31695 assert_eq!(result[0], "SELECT 1");
31696 }
31697
31698 #[test]
31699 fn test_function_transformation_mysql() {
31700 // NVL should be transformed to IFNULL in MySQL
31701 let dialect = Dialect::get(DialectType::Generic);
31702 let result = dialect
31703 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
31704 .unwrap();
31705 assert_eq!(result[0], "SELECT IFNULL(a, b)");
31706 }
31707
31708 #[test]
31709 fn test_get_path_duckdb() {
31710 // Test: step by step
31711 let snowflake = Dialect::get(DialectType::Snowflake);
31712
31713 // Step 1: Parse and check what Snowflake produces as intermediate
31714 let result_sf_sf = snowflake
31715 .transpile_to(
31716 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31717 DialectType::Snowflake,
31718 )
31719 .unwrap();
31720 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
31721
31722 // Step 2: DuckDB target
31723 let result_sf_dk = snowflake
31724 .transpile_to(
31725 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
31726 DialectType::DuckDB,
31727 )
31728 .unwrap();
31729 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
31730
31731 // Step 3: GET_PATH directly
31732 let result_gp = snowflake
31733 .transpile_to(
31734 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
31735 DialectType::DuckDB,
31736 )
31737 .unwrap();
31738 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
31739 }
31740
31741 #[test]
31742 fn test_function_transformation_postgres() {
31743 // IFNULL should be transformed to COALESCE in PostgreSQL
31744 let dialect = Dialect::get(DialectType::Generic);
31745 let result = dialect
31746 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
31747 .unwrap();
31748 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31749
31750 // NVL should also be transformed to COALESCE
31751 let result = dialect
31752 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
31753 .unwrap();
31754 assert_eq!(result[0], "SELECT COALESCE(a, b)");
31755 }
31756
31757 #[test]
31758 fn test_hive_cast_to_trycast() {
31759 // Hive CAST should become TRY_CAST for targets that support it
31760 let hive = Dialect::get(DialectType::Hive);
31761 let result = hive
31762 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
31763 .unwrap();
31764 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
31765
31766 let result = hive
31767 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
31768 .unwrap();
31769 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
31770 }
31771
31772 #[test]
31773 fn test_hive_array_identity() {
31774 // Hive ARRAY<DATE> should preserve angle bracket syntax
31775 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
31776 let hive = Dialect::get(DialectType::Hive);
31777
31778 // Test via transpile_to (this works)
31779 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
31780 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
31781 assert!(
31782 result[0].contains("ARRAY<DATE>"),
31783 "transpile_to: Expected ARRAY<DATE>, got: {}",
31784 result[0]
31785 );
31786
31787 // Test via parse -> transform -> generate (identity test path)
31788 let ast = hive.parse(sql).unwrap();
31789 let transformed = hive.transform(ast[0].clone()).unwrap();
31790 let output = hive.generate(&transformed).unwrap();
31791 eprintln!("Hive ARRAY via identity path: {}", output);
31792 assert!(
31793 output.contains("ARRAY<DATE>"),
31794 "identity path: Expected ARRAY<DATE>, got: {}",
31795 output
31796 );
31797 }
31798
31799 #[test]
31800 fn test_starrocks_delete_between_expansion() {
31801 // StarRocks doesn't support BETWEEN in DELETE statements
31802 let dialect = Dialect::get(DialectType::Generic);
31803
31804 // BETWEEN should be expanded to >= AND <= in DELETE
31805 let result = dialect
31806 .transpile_to(
31807 "DELETE FROM t WHERE a BETWEEN b AND c",
31808 DialectType::StarRocks,
31809 )
31810 .unwrap();
31811 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
31812
31813 // NOT BETWEEN should be expanded to < OR > in DELETE
31814 let result = dialect
31815 .transpile_to(
31816 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
31817 DialectType::StarRocks,
31818 )
31819 .unwrap();
31820 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
31821
31822 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
31823 let result = dialect
31824 .transpile_to(
31825 "SELECT * FROM t WHERE a BETWEEN b AND c",
31826 DialectType::StarRocks,
31827 )
31828 .unwrap();
31829 assert!(
31830 result[0].contains("BETWEEN"),
31831 "BETWEEN should be preserved in SELECT"
31832 );
31833 }
31834
31835 #[test]
31836 fn test_snowflake_ltrim_rtrim_parse() {
31837 let sf = Dialect::get(DialectType::Snowflake);
31838 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
31839 let result = sf.transpile_to(sql, DialectType::DuckDB);
31840 match &result {
31841 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
31842 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
31843 }
31844 assert!(
31845 result.is_ok(),
31846 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
31847 result.err()
31848 );
31849 }
31850
31851 #[test]
31852 fn test_duckdb_count_if_parse() {
31853 let duck = Dialect::get(DialectType::DuckDB);
31854 let sql = "COUNT_IF(x)";
31855 let result = duck.transpile_to(sql, DialectType::DuckDB);
31856 match &result {
31857 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
31858 Err(e) => eprintln!("COUNT_IF error: {}", e),
31859 }
31860 assert!(
31861 result.is_ok(),
31862 "Expected successful parse of COUNT_IF(x), got error: {:?}",
31863 result.err()
31864 );
31865 }
31866
31867 #[test]
31868 fn test_tsql_cast_tinyint_parse() {
31869 let tsql = Dialect::get(DialectType::TSQL);
31870 let sql = "CAST(X AS TINYINT)";
31871 let result = tsql.transpile_to(sql, DialectType::DuckDB);
31872 match &result {
31873 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
31874 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
31875 }
31876 assert!(
31877 result.is_ok(),
31878 "Expected successful transpile, got error: {:?}",
31879 result.err()
31880 );
31881 }
31882
31883 #[test]
31884 fn test_pg_hash_bitwise_xor() {
31885 let dialect = Dialect::get(DialectType::PostgreSQL);
31886 let result = dialect
31887 .transpile_to("x # y", DialectType::PostgreSQL)
31888 .unwrap();
31889 assert_eq!(result[0], "x # y");
31890 }
31891
31892 #[test]
31893 fn test_pg_array_to_duckdb() {
31894 let dialect = Dialect::get(DialectType::PostgreSQL);
31895 let result = dialect
31896 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
31897 .unwrap();
31898 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
31899 }
31900
31901 #[test]
31902 fn test_array_remove_bigquery() {
31903 let dialect = Dialect::get(DialectType::Generic);
31904 let result = dialect
31905 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
31906 .unwrap();
31907 assert_eq!(
31908 result[0],
31909 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
31910 );
31911 }
31912
31913 #[test]
31914 fn test_map_clickhouse_case() {
31915 let dialect = Dialect::get(DialectType::Generic);
31916 let parsed = dialect
31917 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
31918 .unwrap();
31919 eprintln!("MAP parsed: {:?}", parsed);
31920 let result = dialect
31921 .transpile_to(
31922 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
31923 DialectType::ClickHouse,
31924 )
31925 .unwrap();
31926 eprintln!("MAP result: {}", result[0]);
31927 }
31928
31929 #[test]
31930 fn test_generate_date_array_presto() {
31931 let dialect = Dialect::get(DialectType::Generic);
31932 let result = dialect.transpile_to(
31933 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31934 DialectType::Presto,
31935 ).unwrap();
31936 eprintln!("GDA -> Presto: {}", result[0]);
31937 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
31938 }
31939
31940 #[test]
31941 fn test_generate_date_array_postgres() {
31942 let dialect = Dialect::get(DialectType::Generic);
31943 let result = dialect.transpile_to(
31944 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31945 DialectType::PostgreSQL,
31946 ).unwrap();
31947 eprintln!("GDA -> PostgreSQL: {}", result[0]);
31948 }
31949
31950 #[test]
31951 fn test_generate_date_array_snowflake() {
31952 std::thread::Builder::new()
31953 .stack_size(16 * 1024 * 1024)
31954 .spawn(|| {
31955 let dialect = Dialect::get(DialectType::Generic);
31956 let result = dialect.transpile_to(
31957 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31958 DialectType::Snowflake,
31959 ).unwrap();
31960 eprintln!("GDA -> Snowflake: {}", result[0]);
31961 })
31962 .unwrap()
31963 .join()
31964 .unwrap();
31965 }
31966
31967 #[test]
31968 fn test_array_length_generate_date_array_snowflake() {
31969 let dialect = Dialect::get(DialectType::Generic);
31970 let result = dialect.transpile_to(
31971 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31972 DialectType::Snowflake,
31973 ).unwrap();
31974 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
31975 }
31976
31977 #[test]
31978 fn test_generate_date_array_mysql() {
31979 let dialect = Dialect::get(DialectType::Generic);
31980 let result = dialect.transpile_to(
31981 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31982 DialectType::MySQL,
31983 ).unwrap();
31984 eprintln!("GDA -> MySQL: {}", result[0]);
31985 }
31986
31987 #[test]
31988 fn test_generate_date_array_redshift() {
31989 let dialect = Dialect::get(DialectType::Generic);
31990 let result = dialect.transpile_to(
31991 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
31992 DialectType::Redshift,
31993 ).unwrap();
31994 eprintln!("GDA -> Redshift: {}", result[0]);
31995 }
31996
31997 #[test]
31998 fn test_generate_date_array_tsql() {
31999 let dialect = Dialect::get(DialectType::Generic);
32000 let result = dialect.transpile_to(
32001 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
32002 DialectType::TSQL,
32003 ).unwrap();
32004 eprintln!("GDA -> TSQL: {}", result[0]);
32005 }
32006
32007 #[test]
32008 fn test_struct_colon_syntax() {
32009 let dialect = Dialect::get(DialectType::Generic);
32010 // Test without colon first
32011 let result = dialect.transpile_to(
32012 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
32013 DialectType::ClickHouse,
32014 );
32015 match result {
32016 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
32017 Err(e) => eprintln!("STRUCT no colon error: {}", e),
32018 }
32019 // Now test with colon
32020 let result = dialect.transpile_to(
32021 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
32022 DialectType::ClickHouse,
32023 );
32024 match result {
32025 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
32026 Err(e) => eprintln!("STRUCT colon error: {}", e),
32027 }
32028 }
32029
32030 #[test]
32031 fn test_generate_date_array_cte_wrapped_mysql() {
32032 let dialect = Dialect::get(DialectType::Generic);
32033 let result = dialect.transpile_to(
32034 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
32035 DialectType::MySQL,
32036 ).unwrap();
32037 eprintln!("GDA CTE -> MySQL: {}", result[0]);
32038 }
32039
32040 #[test]
32041 fn test_generate_date_array_cte_wrapped_tsql() {
32042 let dialect = Dialect::get(DialectType::Generic);
32043 let result = dialect.transpile_to(
32044 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
32045 DialectType::TSQL,
32046 ).unwrap();
32047 eprintln!("GDA CTE -> TSQL: {}", result[0]);
32048 }
32049
32050 #[test]
32051 fn test_decode_literal_no_null_check() {
32052 // Oracle DECODE with all literals should produce simple equality, no IS NULL
32053 let dialect = Dialect::get(DialectType::Oracle);
32054 let result = dialect
32055 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
32056 .unwrap();
32057 assert_eq!(
32058 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
32059 "Literal DECODE should not have IS NULL checks"
32060 );
32061 }
32062
32063 #[test]
32064 fn test_decode_column_vs_literal_no_null_check() {
32065 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
32066 let dialect = Dialect::get(DialectType::Oracle);
32067 let result = dialect
32068 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
32069 .unwrap();
32070 assert_eq!(
32071 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
32072 "Column vs literal DECODE should not have IS NULL checks"
32073 );
32074 }
32075
32076 #[test]
32077 fn test_decode_column_vs_column_keeps_null_check() {
32078 // Oracle DECODE with column vs column should keep null-safe comparison
32079 let dialect = Dialect::get(DialectType::Oracle);
32080 let result = dialect
32081 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
32082 .unwrap();
32083 assert!(
32084 result[0].contains("IS NULL"),
32085 "Column vs column DECODE should have IS NULL checks, got: {}",
32086 result[0]
32087 );
32088 }
32089
32090 #[test]
32091 fn test_decode_null_search() {
32092 // Oracle DECODE with NULL search should use IS NULL
32093 let dialect = Dialect::get(DialectType::Oracle);
32094 let result = dialect
32095 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
32096 .unwrap();
32097 assert_eq!(
32098 result[0],
32099 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
32100 );
32101 }
32102}