polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // First recursively transform children, then apply the transform function
563 let expr = match expr {
564 Expression::Select(mut select) => {
565 select.expressions = select
566 .expressions
567 .into_iter()
568 .map(|e| transform_recursive(e, transform_fn))
569 .collect::<Result<Vec<_>>>()?;
570
571 // Transform FROM clause
572 if let Some(mut from) = select.from.take() {
573 from.expressions = from
574 .expressions
575 .into_iter()
576 .map(|e| transform_recursive(e, transform_fn))
577 .collect::<Result<Vec<_>>>()?;
578 select.from = Some(from);
579 }
580
581 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
582 select.joins = select
583 .joins
584 .into_iter()
585 .map(|mut join| {
586 join.this = transform_recursive(join.this, transform_fn)?;
587 if let Some(on) = join.on.take() {
588 join.on = Some(transform_recursive(on, transform_fn)?);
589 }
590 // Wrap join in Expression::Join to allow transform_fn to transform it
591 match transform_fn(Expression::Join(Box::new(join)))? {
592 Expression::Join(j) => Ok(*j),
593 _ => Err(crate::error::Error::parse(
594 "Join transformation returned non-join expression",
595 0,
596 0,
597 0,
598 0,
599 )),
600 }
601 })
602 .collect::<Result<Vec<_>>>()?;
603
604 // Transform LATERAL VIEW expressions (Hive/Spark)
605 select.lateral_views = select
606 .lateral_views
607 .into_iter()
608 .map(|mut lv| {
609 lv.this = transform_recursive(lv.this, transform_fn)?;
610 Ok(lv)
611 })
612 .collect::<Result<Vec<_>>>()?;
613
614 // Transform WHERE clause
615 if let Some(mut where_clause) = select.where_clause.take() {
616 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
617 select.where_clause = Some(where_clause);
618 }
619
620 // Transform GROUP BY
621 if let Some(mut group_by) = select.group_by.take() {
622 group_by.expressions = group_by
623 .expressions
624 .into_iter()
625 .map(|e| transform_recursive(e, transform_fn))
626 .collect::<Result<Vec<_>>>()?;
627 select.group_by = Some(group_by);
628 }
629
630 // Transform HAVING
631 if let Some(mut having) = select.having.take() {
632 having.this = transform_recursive(having.this, transform_fn)?;
633 select.having = Some(having);
634 }
635
636 // Transform WITH (CTEs)
637 if let Some(mut with) = select.with.take() {
638 with.ctes = with
639 .ctes
640 .into_iter()
641 .map(|mut cte| {
642 let original = cte.this.clone();
643 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
644 cte
645 })
646 .collect();
647 select.with = Some(with);
648 }
649
650 // Transform ORDER BY
651 if let Some(mut order) = select.order_by.take() {
652 order.expressions = order
653 .expressions
654 .into_iter()
655 .map(|o| {
656 let mut o = o;
657 let original = o.this.clone();
658 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
659 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
660 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
661 Ok(Expression::Ordered(transformed)) => *transformed,
662 Ok(_) | Err(_) => o,
663 }
664 })
665 .collect();
666 select.order_by = Some(order);
667 }
668
669 // Transform WINDOW clause order_by
670 if let Some(ref mut windows) = select.windows {
671 for nw in windows.iter_mut() {
672 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
673 .into_iter()
674 .map(|o| {
675 let mut o = o;
676 let original = o.this.clone();
677 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
678 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
679 Ok(Expression::Ordered(transformed)) => *transformed,
680 Ok(_) | Err(_) => o,
681 }
682 })
683 .collect();
684 }
685 }
686
687 // Transform QUALIFY
688 if let Some(mut qual) = select.qualify.take() {
689 qual.this = transform_recursive(qual.this, transform_fn)?;
690 select.qualify = Some(qual);
691 }
692
693 Expression::Select(select)
694 }
695 Expression::Function(mut f) => {
696 f.args = f
697 .args
698 .into_iter()
699 .map(|e| transform_recursive(e, transform_fn))
700 .collect::<Result<Vec<_>>>()?;
701 Expression::Function(f)
702 }
703 Expression::AggregateFunction(mut f) => {
704 f.args = f
705 .args
706 .into_iter()
707 .map(|e| transform_recursive(e, transform_fn))
708 .collect::<Result<Vec<_>>>()?;
709 if let Some(filter) = f.filter {
710 f.filter = Some(transform_recursive(filter, transform_fn)?);
711 }
712 Expression::AggregateFunction(f)
713 }
714 Expression::WindowFunction(mut wf) => {
715 wf.this = transform_recursive(wf.this, transform_fn)?;
716 wf.over.partition_by = wf
717 .over
718 .partition_by
719 .into_iter()
720 .map(|e| transform_recursive(e, transform_fn))
721 .collect::<Result<Vec<_>>>()?;
722 // Transform order_by items through Expression::Ordered wrapper
723 wf.over.order_by = wf
724 .over
725 .order_by
726 .into_iter()
727 .map(|o| {
728 let mut o = o;
729 o.this = transform_recursive(o.this, transform_fn)?;
730 match transform_fn(Expression::Ordered(Box::new(o)))? {
731 Expression::Ordered(transformed) => Ok(*transformed),
732 _ => Err(crate::error::Error::parse(
733 "Ordered transformation returned non-Ordered expression",
734 0,
735 0,
736 0,
737 0,
738 )),
739 }
740 })
741 .collect::<Result<Vec<_>>>()?;
742 Expression::WindowFunction(wf)
743 }
744 Expression::Alias(mut a) => {
745 a.this = transform_recursive(a.this, transform_fn)?;
746 Expression::Alias(a)
747 }
748 Expression::Cast(mut c) => {
749 c.this = transform_recursive(c.this, transform_fn)?;
750 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
751 c.to = transform_data_type_recursive(c.to, transform_fn)?;
752 Expression::Cast(c)
753 }
754 Expression::And(op) => transform_binary!(And, *op),
755 Expression::Or(op) => transform_binary!(Or, *op),
756 Expression::Add(op) => transform_binary!(Add, *op),
757 Expression::Sub(op) => transform_binary!(Sub, *op),
758 Expression::Mul(op) => transform_binary!(Mul, *op),
759 Expression::Div(op) => transform_binary!(Div, *op),
760 Expression::Eq(op) => transform_binary!(Eq, *op),
761 Expression::Lt(op) => transform_binary!(Lt, *op),
762 Expression::Gt(op) => transform_binary!(Gt, *op),
763 Expression::Paren(mut p) => {
764 p.this = transform_recursive(p.this, transform_fn)?;
765 Expression::Paren(p)
766 }
767 Expression::Coalesce(mut f) => {
768 f.expressions = f
769 .expressions
770 .into_iter()
771 .map(|e| transform_recursive(e, transform_fn))
772 .collect::<Result<Vec<_>>>()?;
773 Expression::Coalesce(f)
774 }
775 Expression::IfNull(mut f) => {
776 f.this = transform_recursive(f.this, transform_fn)?;
777 f.expression = transform_recursive(f.expression, transform_fn)?;
778 Expression::IfNull(f)
779 }
780 Expression::Nvl(mut f) => {
781 f.this = transform_recursive(f.this, transform_fn)?;
782 f.expression = transform_recursive(f.expression, transform_fn)?;
783 Expression::Nvl(f)
784 }
785 Expression::In(mut i) => {
786 i.this = transform_recursive(i.this, transform_fn)?;
787 i.expressions = i
788 .expressions
789 .into_iter()
790 .map(|e| transform_recursive(e, transform_fn))
791 .collect::<Result<Vec<_>>>()?;
792 if let Some(query) = i.query {
793 i.query = Some(transform_recursive(query, transform_fn)?);
794 }
795 Expression::In(i)
796 }
797 Expression::Not(mut n) => {
798 n.this = transform_recursive(n.this, transform_fn)?;
799 Expression::Not(n)
800 }
801 Expression::ArraySlice(mut s) => {
802 s.this = transform_recursive(s.this, transform_fn)?;
803 if let Some(start) = s.start {
804 s.start = Some(transform_recursive(start, transform_fn)?);
805 }
806 if let Some(end) = s.end {
807 s.end = Some(transform_recursive(end, transform_fn)?);
808 }
809 Expression::ArraySlice(s)
810 }
811 Expression::Subscript(mut s) => {
812 s.this = transform_recursive(s.this, transform_fn)?;
813 s.index = transform_recursive(s.index, transform_fn)?;
814 Expression::Subscript(s)
815 }
816 Expression::Array(mut a) => {
817 a.expressions = a
818 .expressions
819 .into_iter()
820 .map(|e| transform_recursive(e, transform_fn))
821 .collect::<Result<Vec<_>>>()?;
822 Expression::Array(a)
823 }
824 Expression::Struct(mut s) => {
825 let mut new_fields = Vec::new();
826 for (name, expr) in s.fields {
827 let transformed = transform_recursive(expr, transform_fn)?;
828 new_fields.push((name, transformed));
829 }
830 s.fields = new_fields;
831 Expression::Struct(s)
832 }
833 Expression::NamedArgument(mut na) => {
834 na.value = transform_recursive(na.value, transform_fn)?;
835 Expression::NamedArgument(na)
836 }
837 Expression::MapFunc(mut m) => {
838 m.keys = m
839 .keys
840 .into_iter()
841 .map(|e| transform_recursive(e, transform_fn))
842 .collect::<Result<Vec<_>>>()?;
843 m.values = m
844 .values
845 .into_iter()
846 .map(|e| transform_recursive(e, transform_fn))
847 .collect::<Result<Vec<_>>>()?;
848 Expression::MapFunc(m)
849 }
850 Expression::ArrayFunc(mut a) => {
851 a.expressions = a
852 .expressions
853 .into_iter()
854 .map(|e| transform_recursive(e, transform_fn))
855 .collect::<Result<Vec<_>>>()?;
856 Expression::ArrayFunc(a)
857 }
858 Expression::Lambda(mut l) => {
859 l.body = transform_recursive(l.body, transform_fn)?;
860 Expression::Lambda(l)
861 }
862 Expression::JsonExtract(mut f) => {
863 f.this = transform_recursive(f.this, transform_fn)?;
864 f.path = transform_recursive(f.path, transform_fn)?;
865 Expression::JsonExtract(f)
866 }
867 Expression::JsonExtractScalar(mut f) => {
868 f.this = transform_recursive(f.this, transform_fn)?;
869 f.path = transform_recursive(f.path, transform_fn)?;
870 Expression::JsonExtractScalar(f)
871 }
872
873 // ===== UnaryFunc-based expressions =====
874 // These all have a single `this: Expression` child
875 Expression::Length(mut f) => {
876 f.this = transform_recursive(f.this, transform_fn)?;
877 Expression::Length(f)
878 }
879 Expression::Upper(mut f) => {
880 f.this = transform_recursive(f.this, transform_fn)?;
881 Expression::Upper(f)
882 }
883 Expression::Lower(mut f) => {
884 f.this = transform_recursive(f.this, transform_fn)?;
885 Expression::Lower(f)
886 }
887 Expression::LTrim(mut f) => {
888 f.this = transform_recursive(f.this, transform_fn)?;
889 Expression::LTrim(f)
890 }
891 Expression::RTrim(mut f) => {
892 f.this = transform_recursive(f.this, transform_fn)?;
893 Expression::RTrim(f)
894 }
895 Expression::Reverse(mut f) => {
896 f.this = transform_recursive(f.this, transform_fn)?;
897 Expression::Reverse(f)
898 }
899 Expression::Abs(mut f) => {
900 f.this = transform_recursive(f.this, transform_fn)?;
901 Expression::Abs(f)
902 }
903 Expression::Ceil(mut f) => {
904 f.this = transform_recursive(f.this, transform_fn)?;
905 Expression::Ceil(f)
906 }
907 Expression::Floor(mut f) => {
908 f.this = transform_recursive(f.this, transform_fn)?;
909 Expression::Floor(f)
910 }
911 Expression::Sign(mut f) => {
912 f.this = transform_recursive(f.this, transform_fn)?;
913 Expression::Sign(f)
914 }
915 Expression::Sqrt(mut f) => {
916 f.this = transform_recursive(f.this, transform_fn)?;
917 Expression::Sqrt(f)
918 }
919 Expression::Cbrt(mut f) => {
920 f.this = transform_recursive(f.this, transform_fn)?;
921 Expression::Cbrt(f)
922 }
923 Expression::Ln(mut f) => {
924 f.this = transform_recursive(f.this, transform_fn)?;
925 Expression::Ln(f)
926 }
927 Expression::Log(mut f) => {
928 f.this = transform_recursive(f.this, transform_fn)?;
929 if let Some(base) = f.base {
930 f.base = Some(transform_recursive(base, transform_fn)?);
931 }
932 Expression::Log(f)
933 }
934 Expression::Exp(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Exp(f)
937 }
938 Expression::Date(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Date(f)
941 }
942 Expression::Stddev(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 Expression::Stddev(f)
945 }
946 Expression::Variance(mut f) => {
947 f.this = transform_recursive(f.this, transform_fn)?;
948 Expression::Variance(f)
949 }
950
951 // ===== BinaryFunc-based expressions =====
952 Expression::ModFunc(mut f) => {
953 f.this = transform_recursive(f.this, transform_fn)?;
954 f.expression = transform_recursive(f.expression, transform_fn)?;
955 Expression::ModFunc(f)
956 }
957 Expression::Power(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 f.expression = transform_recursive(f.expression, transform_fn)?;
960 Expression::Power(f)
961 }
962 Expression::MapFromArrays(mut f) => {
963 f.this = transform_recursive(f.this, transform_fn)?;
964 f.expression = transform_recursive(f.expression, transform_fn)?;
965 Expression::MapFromArrays(f)
966 }
967 Expression::ElementAt(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ElementAt(f)
971 }
972 Expression::MapContainsKey(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::MapContainsKey(f)
976 }
977 Expression::Left(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.length = transform_recursive(f.length, transform_fn)?;
980 Expression::Left(f)
981 }
982 Expression::Right(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.length = transform_recursive(f.length, transform_fn)?;
985 Expression::Right(f)
986 }
987 Expression::Repeat(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.times = transform_recursive(f.times, transform_fn)?;
990 Expression::Repeat(f)
991 }
992
993 // ===== Complex function expressions =====
994 Expression::Substring(mut f) => {
995 f.this = transform_recursive(f.this, transform_fn)?;
996 f.start = transform_recursive(f.start, transform_fn)?;
997 if let Some(len) = f.length {
998 f.length = Some(transform_recursive(len, transform_fn)?);
999 }
1000 Expression::Substring(f)
1001 }
1002 Expression::Replace(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.old = transform_recursive(f.old, transform_fn)?;
1005 f.new = transform_recursive(f.new, transform_fn)?;
1006 Expression::Replace(f)
1007 }
1008 Expression::ConcatWs(mut f) => {
1009 f.separator = transform_recursive(f.separator, transform_fn)?;
1010 f.expressions = f
1011 .expressions
1012 .into_iter()
1013 .map(|e| transform_recursive(e, transform_fn))
1014 .collect::<Result<Vec<_>>>()?;
1015 Expression::ConcatWs(f)
1016 }
1017 Expression::Trim(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 if let Some(chars) = f.characters {
1020 f.characters = Some(transform_recursive(chars, transform_fn)?);
1021 }
1022 Expression::Trim(f)
1023 }
1024 Expression::Split(mut f) => {
1025 f.this = transform_recursive(f.this, transform_fn)?;
1026 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1027 Expression::Split(f)
1028 }
1029 Expression::Lpad(mut f) => {
1030 f.this = transform_recursive(f.this, transform_fn)?;
1031 f.length = transform_recursive(f.length, transform_fn)?;
1032 if let Some(fill) = f.fill {
1033 f.fill = Some(transform_recursive(fill, transform_fn)?);
1034 }
1035 Expression::Lpad(f)
1036 }
1037 Expression::Rpad(mut f) => {
1038 f.this = transform_recursive(f.this, transform_fn)?;
1039 f.length = transform_recursive(f.length, transform_fn)?;
1040 if let Some(fill) = f.fill {
1041 f.fill = Some(transform_recursive(fill, transform_fn)?);
1042 }
1043 Expression::Rpad(f)
1044 }
1045
1046 // ===== Conditional expressions =====
1047 Expression::Case(mut c) => {
1048 if let Some(operand) = c.operand {
1049 c.operand = Some(transform_recursive(operand, transform_fn)?);
1050 }
1051 c.whens = c
1052 .whens
1053 .into_iter()
1054 .map(|(cond, then)| {
1055 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1056 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1057 (new_cond, new_then)
1058 })
1059 .collect();
1060 if let Some(else_expr) = c.else_ {
1061 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1062 }
1063 Expression::Case(c)
1064 }
1065 Expression::IfFunc(mut f) => {
1066 f.condition = transform_recursive(f.condition, transform_fn)?;
1067 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1068 if let Some(false_val) = f.false_value {
1069 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1070 }
1071 Expression::IfFunc(f)
1072 }
1073
1074 // ===== Date/Time expressions =====
1075 Expression::DateAdd(mut f) => {
1076 f.this = transform_recursive(f.this, transform_fn)?;
1077 f.interval = transform_recursive(f.interval, transform_fn)?;
1078 Expression::DateAdd(f)
1079 }
1080 Expression::DateSub(mut f) => {
1081 f.this = transform_recursive(f.this, transform_fn)?;
1082 f.interval = transform_recursive(f.interval, transform_fn)?;
1083 Expression::DateSub(f)
1084 }
1085 Expression::DateDiff(mut f) => {
1086 f.this = transform_recursive(f.this, transform_fn)?;
1087 f.expression = transform_recursive(f.expression, transform_fn)?;
1088 Expression::DateDiff(f)
1089 }
1090 Expression::DateTrunc(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 Expression::DateTrunc(f)
1093 }
1094 Expression::Extract(mut f) => {
1095 f.this = transform_recursive(f.this, transform_fn)?;
1096 Expression::Extract(f)
1097 }
1098
1099 // ===== JSON expressions =====
1100 Expression::JsonObject(mut f) => {
1101 f.pairs = f
1102 .pairs
1103 .into_iter()
1104 .map(|(k, v)| {
1105 let new_k = transform_recursive(k, transform_fn)?;
1106 let new_v = transform_recursive(v, transform_fn)?;
1107 Ok((new_k, new_v))
1108 })
1109 .collect::<Result<Vec<_>>>()?;
1110 Expression::JsonObject(f)
1111 }
1112
1113 // ===== Subquery expressions =====
1114 Expression::Subquery(mut s) => {
1115 s.this = transform_recursive(s.this, transform_fn)?;
1116 Expression::Subquery(s)
1117 }
1118 Expression::Exists(mut e) => {
1119 e.this = transform_recursive(e.this, transform_fn)?;
1120 Expression::Exists(e)
1121 }
1122
1123 // ===== Set operations =====
1124 Expression::Union(mut u) => {
1125 u.left = transform_recursive(u.left, transform_fn)?;
1126 u.right = transform_recursive(u.right, transform_fn)?;
1127 Expression::Union(u)
1128 }
1129 Expression::Intersect(mut i) => {
1130 i.left = transform_recursive(i.left, transform_fn)?;
1131 i.right = transform_recursive(i.right, transform_fn)?;
1132 Expression::Intersect(i)
1133 }
1134 Expression::Except(mut e) => {
1135 e.left = transform_recursive(e.left, transform_fn)?;
1136 e.right = transform_recursive(e.right, transform_fn)?;
1137 Expression::Except(e)
1138 }
1139
1140 // ===== DML expressions =====
1141 Expression::Insert(mut ins) => {
1142 // Transform VALUES clause expressions
1143 let mut new_values = Vec::new();
1144 for row in ins.values {
1145 let mut new_row = Vec::new();
1146 for e in row {
1147 new_row.push(transform_recursive(e, transform_fn)?);
1148 }
1149 new_values.push(new_row);
1150 }
1151 ins.values = new_values;
1152
1153 // Transform query (for INSERT ... SELECT)
1154 if let Some(query) = ins.query {
1155 ins.query = Some(transform_recursive(query, transform_fn)?);
1156 }
1157
1158 // Transform RETURNING clause
1159 let mut new_returning = Vec::new();
1160 for e in ins.returning {
1161 new_returning.push(transform_recursive(e, transform_fn)?);
1162 }
1163 ins.returning = new_returning;
1164
1165 // Transform ON CONFLICT clause
1166 if let Some(on_conflict) = ins.on_conflict {
1167 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1168 }
1169
1170 Expression::Insert(ins)
1171 }
1172 Expression::Update(mut upd) => {
1173 upd.set = upd
1174 .set
1175 .into_iter()
1176 .map(|(id, val)| {
1177 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1178 (id, new_val)
1179 })
1180 .collect();
1181 if let Some(mut where_clause) = upd.where_clause.take() {
1182 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1183 upd.where_clause = Some(where_clause);
1184 }
1185 Expression::Update(upd)
1186 }
1187 Expression::Delete(mut del) => {
1188 if let Some(mut where_clause) = del.where_clause.take() {
1189 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1190 del.where_clause = Some(where_clause);
1191 }
1192 Expression::Delete(del)
1193 }
1194
1195 // ===== CTE expressions =====
1196 Expression::With(mut w) => {
1197 w.ctes = w
1198 .ctes
1199 .into_iter()
1200 .map(|mut cte| {
1201 let original = cte.this.clone();
1202 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1203 cte
1204 })
1205 .collect();
1206 Expression::With(w)
1207 }
1208 Expression::Cte(mut c) => {
1209 c.this = transform_recursive(c.this, transform_fn)?;
1210 Expression::Cte(c)
1211 }
1212
1213 // ===== Order expressions =====
1214 Expression::Ordered(mut o) => {
1215 o.this = transform_recursive(o.this, transform_fn)?;
1216 Expression::Ordered(o)
1217 }
1218
1219 // ===== Negation =====
1220 Expression::Neg(mut n) => {
1221 n.this = transform_recursive(n.this, transform_fn)?;
1222 Expression::Neg(n)
1223 }
1224
1225 // ===== Between =====
1226 Expression::Between(mut b) => {
1227 b.this = transform_recursive(b.this, transform_fn)?;
1228 b.low = transform_recursive(b.low, transform_fn)?;
1229 b.high = transform_recursive(b.high, transform_fn)?;
1230 Expression::Between(b)
1231 }
1232 Expression::IsNull(mut i) => {
1233 i.this = transform_recursive(i.this, transform_fn)?;
1234 Expression::IsNull(i)
1235 }
1236 Expression::IsTrue(mut i) => {
1237 i.this = transform_recursive(i.this, transform_fn)?;
1238 Expression::IsTrue(i)
1239 }
1240 Expression::IsFalse(mut i) => {
1241 i.this = transform_recursive(i.this, transform_fn)?;
1242 Expression::IsFalse(i)
1243 }
1244
1245 // ===== Like expressions =====
1246 Expression::Like(mut l) => {
1247 l.left = transform_recursive(l.left, transform_fn)?;
1248 l.right = transform_recursive(l.right, transform_fn)?;
1249 Expression::Like(l)
1250 }
1251 Expression::ILike(mut l) => {
1252 l.left = transform_recursive(l.left, transform_fn)?;
1253 l.right = transform_recursive(l.right, transform_fn)?;
1254 Expression::ILike(l)
1255 }
1256
1257 // ===== Additional binary ops not covered by macro =====
1258 Expression::Neq(op) => transform_binary!(Neq, *op),
1259 Expression::Lte(op) => transform_binary!(Lte, *op),
1260 Expression::Gte(op) => transform_binary!(Gte, *op),
1261 Expression::Mod(op) => transform_binary!(Mod, *op),
1262 Expression::Concat(op) => transform_binary!(Concat, *op),
1263 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1264 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1265 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1266 Expression::Is(op) => transform_binary!(Is, *op),
1267
1268 // ===== TryCast / SafeCast =====
1269 Expression::TryCast(mut c) => {
1270 c.this = transform_recursive(c.this, transform_fn)?;
1271 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1272 Expression::TryCast(c)
1273 }
1274 Expression::SafeCast(mut c) => {
1275 c.this = transform_recursive(c.this, transform_fn)?;
1276 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1277 Expression::SafeCast(c)
1278 }
1279
1280 // ===== Misc =====
1281 Expression::Unnest(mut f) => {
1282 f.this = transform_recursive(f.this, transform_fn)?;
1283 f.expressions = f
1284 .expressions
1285 .into_iter()
1286 .map(|e| transform_recursive(e, transform_fn))
1287 .collect::<Result<Vec<_>>>()?;
1288 Expression::Unnest(f)
1289 }
1290 Expression::Explode(mut f) => {
1291 f.this = transform_recursive(f.this, transform_fn)?;
1292 Expression::Explode(f)
1293 }
1294 Expression::GroupConcat(mut f) => {
1295 f.this = transform_recursive(f.this, transform_fn)?;
1296 Expression::GroupConcat(f)
1297 }
1298 Expression::StringAgg(mut f) => {
1299 f.this = transform_recursive(f.this, transform_fn)?;
1300 Expression::StringAgg(f)
1301 }
1302 Expression::ListAgg(mut f) => {
1303 f.this = transform_recursive(f.this, transform_fn)?;
1304 Expression::ListAgg(f)
1305 }
1306 Expression::ArrayAgg(mut f) => {
1307 f.this = transform_recursive(f.this, transform_fn)?;
1308 Expression::ArrayAgg(f)
1309 }
1310 Expression::ParseJson(mut f) => {
1311 f.this = transform_recursive(f.this, transform_fn)?;
1312 Expression::ParseJson(f)
1313 }
1314 Expression::ToJson(mut f) => {
1315 f.this = transform_recursive(f.this, transform_fn)?;
1316 Expression::ToJson(f)
1317 }
1318 Expression::JSONExtract(mut e) => {
1319 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1320 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1321 Expression::JSONExtract(e)
1322 }
1323 Expression::JSONExtractScalar(mut e) => {
1324 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1325 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1326 Expression::JSONExtractScalar(e)
1327 }
1328
1329 // StrToTime: recurse into this
1330 Expression::StrToTime(mut e) => {
1331 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1332 Expression::StrToTime(e)
1333 }
1334
1335 // UnixToTime: recurse into this
1336 Expression::UnixToTime(mut e) => {
1337 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1338 Expression::UnixToTime(e)
1339 }
1340
1341 // CreateTable: recurse into column defaults, on_update expressions, and data types
1342 Expression::CreateTable(mut ct) => {
1343 for col in &mut ct.columns {
1344 if let Some(default_expr) = col.default.take() {
1345 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1346 }
1347 if let Some(on_update_expr) = col.on_update.take() {
1348 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1349 }
1350 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1351 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1352 // contexts and may not produce correct results for DDL column definitions.
1353 // The DDL type mappings would need dedicated handling per source/target pair.
1354 }
1355 if let Some(as_select) = ct.as_select.take() {
1356 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1357 }
1358 Expression::CreateTable(ct)
1359 }
1360
1361 // CreateProcedure: recurse into body expressions
1362 Expression::CreateProcedure(mut cp) => {
1363 if let Some(body) = cp.body.take() {
1364 cp.body = Some(match body {
1365 FunctionBody::Expression(expr) => {
1366 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1367 }
1368 FunctionBody::Return(expr) => {
1369 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1370 }
1371 FunctionBody::Statements(stmts) => {
1372 let transformed_stmts = stmts
1373 .into_iter()
1374 .map(|s| transform_recursive(s, transform_fn))
1375 .collect::<Result<Vec<_>>>()?;
1376 FunctionBody::Statements(transformed_stmts)
1377 }
1378 other => other,
1379 });
1380 }
1381 Expression::CreateProcedure(cp)
1382 }
1383
1384 // CreateFunction: recurse into body expressions
1385 Expression::CreateFunction(mut cf) => {
1386 if let Some(body) = cf.body.take() {
1387 cf.body = Some(match body {
1388 FunctionBody::Expression(expr) => {
1389 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1390 }
1391 FunctionBody::Return(expr) => {
1392 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1393 }
1394 FunctionBody::Statements(stmts) => {
1395 let transformed_stmts = stmts
1396 .into_iter()
1397 .map(|s| transform_recursive(s, transform_fn))
1398 .collect::<Result<Vec<_>>>()?;
1399 FunctionBody::Statements(transformed_stmts)
1400 }
1401 other => other,
1402 });
1403 }
1404 Expression::CreateFunction(cf)
1405 }
1406
1407 // MemberOf: recurse into left and right operands
1408 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1409 // ArrayContainsAll (@>): recurse into left and right operands
1410 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1411 // ArrayContainedBy (<@): recurse into left and right operands
1412 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1413 // ArrayOverlaps (&&): recurse into left and right operands
1414 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1415 // TsMatch (@@): recurse into left and right operands
1416 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1417 // Adjacent (-|-): recurse into left and right operands
1418 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1419
1420 // Table: recurse into when (HistoricalData) and changes fields
1421 Expression::Table(mut t) => {
1422 if let Some(when) = t.when.take() {
1423 let transformed =
1424 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1425 if let Expression::HistoricalData(hd) = transformed {
1426 t.when = Some(hd);
1427 }
1428 }
1429 if let Some(changes) = t.changes.take() {
1430 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1431 if let Expression::Changes(c) = transformed {
1432 t.changes = Some(c);
1433 }
1434 }
1435 Expression::Table(t)
1436 }
1437
1438 // HistoricalData (Snowflake time travel): recurse into expression
1439 Expression::HistoricalData(mut hd) => {
1440 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1441 Expression::HistoricalData(hd)
1442 }
1443
1444 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1445 Expression::Changes(mut c) => {
1446 if let Some(at_before) = c.at_before.take() {
1447 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1448 }
1449 if let Some(end) = c.end.take() {
1450 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1451 }
1452 Expression::Changes(c)
1453 }
1454
1455 // TableArgument: TABLE(expr) or MODEL(expr)
1456 Expression::TableArgument(mut ta) => {
1457 ta.this = transform_recursive(ta.this, transform_fn)?;
1458 Expression::TableArgument(ta)
1459 }
1460
1461 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1462 Expression::JoinedTable(mut jt) => {
1463 jt.left = transform_recursive(jt.left, transform_fn)?;
1464 for join in &mut jt.joins {
1465 join.this = transform_recursive(
1466 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1467 transform_fn,
1468 )?;
1469 if let Some(on) = join.on.take() {
1470 join.on = Some(transform_recursive(on, transform_fn)?);
1471 }
1472 }
1473 jt.lateral_views = jt
1474 .lateral_views
1475 .into_iter()
1476 .map(|mut lv| {
1477 lv.this = transform_recursive(lv.this, transform_fn)?;
1478 Ok(lv)
1479 })
1480 .collect::<Result<Vec<_>>>()?;
1481 Expression::JoinedTable(jt)
1482 }
1483
1484 // Lateral: LATERAL func() - recurse into the function expression
1485 Expression::Lateral(mut lat) => {
1486 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1487 Expression::Lateral(lat)
1488 }
1489
1490 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1491 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1492 // as a unit together with the WithinGroup wrapper
1493 Expression::WithinGroup(mut wg) => {
1494 wg.order_by = wg
1495 .order_by
1496 .into_iter()
1497 .map(|mut o| {
1498 let original = o.this.clone();
1499 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1500 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1501 Ok(Expression::Ordered(transformed)) => *transformed,
1502 Ok(_) | Err(_) => o,
1503 }
1504 })
1505 .collect();
1506 Expression::WithinGroup(wg)
1507 }
1508
1509 // Filter: recurse into both the aggregate and the filter condition
1510 Expression::Filter(mut f) => {
1511 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1512 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1513 Expression::Filter(f)
1514 }
1515
1516 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1517 Expression::BitwiseOrAgg(mut f) => {
1518 f.this = transform_recursive(f.this, transform_fn)?;
1519 Expression::BitwiseOrAgg(f)
1520 }
1521 Expression::BitwiseAndAgg(mut f) => {
1522 f.this = transform_recursive(f.this, transform_fn)?;
1523 Expression::BitwiseAndAgg(f)
1524 }
1525 Expression::BitwiseXorAgg(mut f) => {
1526 f.this = transform_recursive(f.this, transform_fn)?;
1527 Expression::BitwiseXorAgg(f)
1528 }
1529 Expression::PipeOperator(mut pipe) => {
1530 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1531 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1532 Expression::PipeOperator(pipe)
1533 }
1534
1535 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1536 Expression::ArrayExcept(mut f) => {
1537 f.this = transform_recursive(f.this, transform_fn)?;
1538 f.expression = transform_recursive(f.expression, transform_fn)?;
1539 Expression::ArrayExcept(f)
1540 }
1541 Expression::ArrayContains(mut f) => {
1542 f.this = transform_recursive(f.this, transform_fn)?;
1543 f.expression = transform_recursive(f.expression, transform_fn)?;
1544 Expression::ArrayContains(f)
1545 }
1546 Expression::ArrayDistinct(mut f) => {
1547 f.this = transform_recursive(f.this, transform_fn)?;
1548 Expression::ArrayDistinct(f)
1549 }
1550
1551 // Pass through leaf nodes unchanged
1552 other => other,
1553 };
1554
1555 // Then apply the transform function
1556 transform_fn(expr)
1557}
1558
1559/// Returns the tokenizer config, generator config, and expression transform closure
1560/// for a built-in dialect type. This is the shared implementation used by both
1561/// `Dialect::get()` and custom dialect construction.
1562fn configs_for_dialect_type(
1563 dt: DialectType,
1564) -> (
1565 TokenizerConfig,
1566 GeneratorConfig,
1567 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1568) {
1569 macro_rules! dialect_configs {
1570 ($dialect_struct:ident) => {{
1571 let d = $dialect_struct;
1572 (
1573 d.tokenizer_config(),
1574 d.generator_config(),
1575 Box::new(move |e| $dialect_struct.transform_expr(e)),
1576 )
1577 }};
1578 }
1579 match dt {
1580 #[cfg(feature = "dialect-postgresql")]
1581 DialectType::PostgreSQL => dialect_configs!(PostgresDialect),
1582 #[cfg(feature = "dialect-mysql")]
1583 DialectType::MySQL => dialect_configs!(MySQLDialect),
1584 #[cfg(feature = "dialect-bigquery")]
1585 DialectType::BigQuery => dialect_configs!(BigQueryDialect),
1586 #[cfg(feature = "dialect-snowflake")]
1587 DialectType::Snowflake => dialect_configs!(SnowflakeDialect),
1588 #[cfg(feature = "dialect-duckdb")]
1589 DialectType::DuckDB => dialect_configs!(DuckDBDialect),
1590 #[cfg(feature = "dialect-tsql")]
1591 DialectType::TSQL => dialect_configs!(TSQLDialect),
1592 #[cfg(feature = "dialect-oracle")]
1593 DialectType::Oracle => dialect_configs!(OracleDialect),
1594 #[cfg(feature = "dialect-hive")]
1595 DialectType::Hive => dialect_configs!(HiveDialect),
1596 #[cfg(feature = "dialect-spark")]
1597 DialectType::Spark => dialect_configs!(SparkDialect),
1598 #[cfg(feature = "dialect-sqlite")]
1599 DialectType::SQLite => dialect_configs!(SQLiteDialect),
1600 #[cfg(feature = "dialect-presto")]
1601 DialectType::Presto => dialect_configs!(PrestoDialect),
1602 #[cfg(feature = "dialect-trino")]
1603 DialectType::Trino => dialect_configs!(TrinoDialect),
1604 #[cfg(feature = "dialect-redshift")]
1605 DialectType::Redshift => dialect_configs!(RedshiftDialect),
1606 #[cfg(feature = "dialect-clickhouse")]
1607 DialectType::ClickHouse => dialect_configs!(ClickHouseDialect),
1608 #[cfg(feature = "dialect-databricks")]
1609 DialectType::Databricks => dialect_configs!(DatabricksDialect),
1610 #[cfg(feature = "dialect-athena")]
1611 DialectType::Athena => dialect_configs!(AthenaDialect),
1612 #[cfg(feature = "dialect-teradata")]
1613 DialectType::Teradata => dialect_configs!(TeradataDialect),
1614 #[cfg(feature = "dialect-doris")]
1615 DialectType::Doris => dialect_configs!(DorisDialect),
1616 #[cfg(feature = "dialect-starrocks")]
1617 DialectType::StarRocks => dialect_configs!(StarRocksDialect),
1618 #[cfg(feature = "dialect-materialize")]
1619 DialectType::Materialize => dialect_configs!(MaterializeDialect),
1620 #[cfg(feature = "dialect-risingwave")]
1621 DialectType::RisingWave => dialect_configs!(RisingWaveDialect),
1622 #[cfg(feature = "dialect-singlestore")]
1623 DialectType::SingleStore => dialect_configs!(SingleStoreDialect),
1624 #[cfg(feature = "dialect-cockroachdb")]
1625 DialectType::CockroachDB => dialect_configs!(CockroachDBDialect),
1626 #[cfg(feature = "dialect-tidb")]
1627 DialectType::TiDB => dialect_configs!(TiDBDialect),
1628 #[cfg(feature = "dialect-druid")]
1629 DialectType::Druid => dialect_configs!(DruidDialect),
1630 #[cfg(feature = "dialect-solr")]
1631 DialectType::Solr => dialect_configs!(SolrDialect),
1632 #[cfg(feature = "dialect-tableau")]
1633 DialectType::Tableau => dialect_configs!(TableauDialect),
1634 #[cfg(feature = "dialect-dune")]
1635 DialectType::Dune => dialect_configs!(DuneDialect),
1636 #[cfg(feature = "dialect-fabric")]
1637 DialectType::Fabric => dialect_configs!(FabricDialect),
1638 #[cfg(feature = "dialect-drill")]
1639 DialectType::Drill => dialect_configs!(DrillDialect),
1640 #[cfg(feature = "dialect-dremio")]
1641 DialectType::Dremio => dialect_configs!(DremioDialect),
1642 #[cfg(feature = "dialect-exasol")]
1643 DialectType::Exasol => dialect_configs!(ExasolDialect),
1644 #[cfg(feature = "dialect-datafusion")]
1645 DialectType::DataFusion => dialect_configs!(DataFusionDialect),
1646 _ => dialect_configs!(GenericDialect),
1647 }
1648}
1649
1650// ---------------------------------------------------------------------------
1651// Custom dialect registry
1652// ---------------------------------------------------------------------------
1653
1654static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1655 LazyLock::new(|| RwLock::new(HashMap::new()));
1656
1657struct CustomDialectConfig {
1658 name: String,
1659 base_dialect: DialectType,
1660 tokenizer_config: TokenizerConfig,
1661 generator_config: GeneratorConfig,
1662 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1663 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1664}
1665
1666/// Fluent builder for creating and registering custom SQL dialects.
1667///
1668/// A custom dialect is based on an existing built-in dialect and allows selective
1669/// overrides of tokenizer configuration, generator configuration, and expression
1670/// transforms.
1671///
1672/// # Example
1673///
1674/// ```rust,ignore
1675/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1676/// use polyglot_sql::generator::NormalizeFunctions;
1677///
1678/// CustomDialectBuilder::new("my_postgres")
1679/// .based_on(DialectType::PostgreSQL)
1680/// .generator_config_modifier(|gc| {
1681/// gc.normalize_functions = NormalizeFunctions::Lower;
1682/// })
1683/// .register()
1684/// .unwrap();
1685///
1686/// let d = Dialect::get_by_name("my_postgres").unwrap();
1687/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1688/// let sql = d.generate(&exprs[0]).unwrap();
1689/// assert_eq!(sql, "select count(*)");
1690///
1691/// polyglot_sql::unregister_custom_dialect("my_postgres");
1692/// ```
1693pub struct CustomDialectBuilder {
1694 name: String,
1695 base_dialect: DialectType,
1696 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1697 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1698 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1699 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1700}
1701
1702impl CustomDialectBuilder {
1703 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1704 pub fn new(name: impl Into<String>) -> Self {
1705 Self {
1706 name: name.into(),
1707 base_dialect: DialectType::Generic,
1708 tokenizer_modifier: None,
1709 generator_modifier: None,
1710 transform: None,
1711 preprocess: None,
1712 }
1713 }
1714
1715 /// Set the base built-in dialect to inherit configuration from.
1716 pub fn based_on(mut self, dialect: DialectType) -> Self {
1717 self.base_dialect = dialect;
1718 self
1719 }
1720
1721 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1722 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1723 where
1724 F: FnOnce(&mut TokenizerConfig) + 'static,
1725 {
1726 self.tokenizer_modifier = Some(Box::new(f));
1727 self
1728 }
1729
1730 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1731 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1732 where
1733 F: FnOnce(&mut GeneratorConfig) + 'static,
1734 {
1735 self.generator_modifier = Some(Box::new(f));
1736 self
1737 }
1738
1739 /// Set a custom per-node expression transform function.
1740 ///
1741 /// This replaces the base dialect's transform. It is called on every expression
1742 /// node during the recursive transform pass.
1743 pub fn transform_fn<F>(mut self, f: F) -> Self
1744 where
1745 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1746 {
1747 self.transform = Some(Arc::new(f));
1748 self
1749 }
1750
1751 /// Set a custom whole-tree preprocessing function.
1752 ///
1753 /// This replaces the base dialect's built-in preprocessing. It is called once
1754 /// on the entire expression tree before the recursive per-node transform.
1755 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1756 where
1757 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1758 {
1759 self.preprocess = Some(Arc::new(f));
1760 self
1761 }
1762
1763 /// Build the custom dialect configuration and register it in the global registry.
1764 ///
1765 /// Returns an error if:
1766 /// - The name collides with a built-in dialect name
1767 /// - A custom dialect with the same name is already registered
1768 pub fn register(self) -> Result<()> {
1769 // Reject names that collide with built-in dialects
1770 if DialectType::from_str(&self.name).is_ok() {
1771 return Err(crate::error::Error::parse(
1772 format!(
1773 "Cannot register custom dialect '{}': name collides with built-in dialect",
1774 self.name
1775 ),
1776 0,
1777 0,
1778 0,
1779 0,
1780 ));
1781 }
1782
1783 // Get base configs
1784 let (mut tok_config, mut gen_config, _base_transform) =
1785 configs_for_dialect_type(self.base_dialect);
1786
1787 // Apply modifiers
1788 if let Some(tok_mod) = self.tokenizer_modifier {
1789 tok_mod(&mut tok_config);
1790 }
1791 if let Some(gen_mod) = self.generator_modifier {
1792 gen_mod(&mut gen_config);
1793 }
1794
1795 let config = CustomDialectConfig {
1796 name: self.name.clone(),
1797 base_dialect: self.base_dialect,
1798 tokenizer_config: tok_config,
1799 generator_config: gen_config,
1800 transform: self.transform,
1801 preprocess: self.preprocess,
1802 };
1803
1804 register_custom_dialect(config)
1805 }
1806}
1807
1808use std::str::FromStr;
1809
1810fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1811 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1812 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1813 })?;
1814
1815 if registry.contains_key(&config.name) {
1816 return Err(crate::error::Error::parse(
1817 format!("Custom dialect '{}' is already registered", config.name),
1818 0,
1819 0,
1820 0,
1821 0,
1822 ));
1823 }
1824
1825 registry.insert(config.name.clone(), Arc::new(config));
1826 Ok(())
1827}
1828
1829/// Remove a custom dialect from the global registry.
1830///
1831/// Returns `true` if a dialect with that name was found and removed,
1832/// `false` if no such custom dialect existed.
1833pub fn unregister_custom_dialect(name: &str) -> bool {
1834 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1835 registry.remove(name).is_some()
1836 } else {
1837 false
1838 }
1839}
1840
1841fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1842 CUSTOM_DIALECT_REGISTRY
1843 .read()
1844 .ok()
1845 .and_then(|registry| registry.get(name).cloned())
1846}
1847
1848/// Main entry point for dialect-specific SQL operations.
1849///
1850/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1851/// transformer for a specific SQL database engine. It is the high-level API through
1852/// which callers parse, generate, transform, and transpile SQL.
1853///
1854/// # Usage
1855///
1856/// ```rust,ignore
1857/// use polyglot_sql::dialects::{Dialect, DialectType};
1858///
1859/// // Parse PostgreSQL SQL into an AST
1860/// let pg = Dialect::get(DialectType::PostgreSQL);
1861/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1862///
1863/// // Transpile from PostgreSQL to BigQuery
1864/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1865/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1866/// ```
1867///
1868/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1869/// The struct is `Send + Sync` safe so it can be shared across threads.
1870pub struct Dialect {
1871 dialect_type: DialectType,
1872 tokenizer: Tokenizer,
1873 generator_config: GeneratorConfig,
1874 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1875 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1876 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1877 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1878 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1879}
1880
1881impl Dialect {
1882 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1883 ///
1884 /// This is the primary constructor. It initializes the tokenizer, generator config,
1885 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1886 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1887 /// config routing.
1888 pub fn get(dialect_type: DialectType) -> Self {
1889 let (tokenizer_config, generator_config, transformer) =
1890 configs_for_dialect_type(dialect_type);
1891
1892 // Set up expression-specific generator config for hybrid dialects
1893 let generator_config_for_expr: Option<
1894 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1895 > = match dialect_type {
1896 #[cfg(feature = "dialect-athena")]
1897 DialectType::Athena => Some(Box::new(|expr| {
1898 AthenaDialect.generator_config_for_expr(expr)
1899 })),
1900 _ => None,
1901 };
1902
1903 Self {
1904 dialect_type,
1905 tokenizer: Tokenizer::new(tokenizer_config),
1906 generator_config,
1907 transformer,
1908 generator_config_for_expr,
1909 custom_preprocess: None,
1910 }
1911 }
1912
1913 /// Look up a dialect by string name.
1914 ///
1915 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1916 /// falls back to the custom dialect registry. Returns `None` if no dialect
1917 /// with the given name exists.
1918 pub fn get_by_name(name: &str) -> Option<Self> {
1919 // Try built-in first
1920 if let Ok(dt) = DialectType::from_str(name) {
1921 return Some(Self::get(dt));
1922 }
1923
1924 // Try custom registry
1925 let config = get_custom_dialect_config(name)?;
1926 Some(Self::from_custom_config(&config))
1927 }
1928
1929 /// Construct a `Dialect` from a custom dialect configuration.
1930 fn from_custom_config(config: &CustomDialectConfig) -> Self {
1931 // Build the transformer: use custom if provided, else use base dialect's
1932 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
1933 if let Some(ref custom_transform) = config.transform {
1934 let t = Arc::clone(custom_transform);
1935 Box::new(move |e| t(e))
1936 } else {
1937 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
1938 base_transform
1939 };
1940
1941 // Build the custom preprocess: use custom if provided
1942 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
1943 config.preprocess.as_ref().map(|p| {
1944 let p = Arc::clone(p);
1945 Box::new(move |e: Expression| p(e))
1946 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
1947 });
1948
1949 Self {
1950 dialect_type: config.base_dialect,
1951 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
1952 generator_config: config.generator_config.clone(),
1953 transformer,
1954 generator_config_for_expr: None,
1955 custom_preprocess,
1956 }
1957 }
1958
1959 /// Get the dialect type
1960 pub fn dialect_type(&self) -> DialectType {
1961 self.dialect_type
1962 }
1963
1964 /// Get the generator configuration
1965 pub fn generator_config(&self) -> &GeneratorConfig {
1966 &self.generator_config
1967 }
1968
1969 /// Parses a SQL string into a list of [`Expression`] AST nodes.
1970 ///
1971 /// The input may contain multiple semicolon-separated statements; each one
1972 /// produces a separate element in the returned vector. Tokenization uses
1973 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
1974 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
1975 let tokens = self.tokenizer.tokenize(sql)?;
1976 let config = crate::parser::ParserConfig {
1977 dialect: Some(self.dialect_type),
1978 ..Default::default()
1979 };
1980 let mut parser = Parser::with_source(tokens, config, sql.to_string());
1981 parser.parse()
1982 }
1983
1984 /// Tokenize SQL using this dialect's tokenizer configuration.
1985 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
1986 self.tokenizer.tokenize(sql)
1987 }
1988
1989 /// Get the generator config for a specific expression (supports hybrid dialects)
1990 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
1991 if let Some(ref config_fn) = self.generator_config_for_expr {
1992 config_fn(expr)
1993 } else {
1994 self.generator_config.clone()
1995 }
1996 }
1997
1998 /// Generates a SQL string from an [`Expression`] AST node.
1999 ///
2000 /// The output uses this dialect's generator configuration for identifier quoting,
2001 /// keyword casing, function name normalization, and syntax style. The result is
2002 /// a single-line (non-pretty) SQL string.
2003 pub fn generate(&self, expr: &Expression) -> Result<String> {
2004 let config = self.get_config_for_expr(expr);
2005 let mut generator = Generator::with_config(config);
2006 generator.generate(expr)
2007 }
2008
2009 /// Generate SQL from an expression with pretty printing enabled
2010 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2011 let mut config = self.get_config_for_expr(expr);
2012 config.pretty = true;
2013 let mut generator = Generator::with_config(config);
2014 generator.generate(expr)
2015 }
2016
2017 /// Generate SQL from an expression with source dialect info (for transpilation)
2018 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2019 let mut config = self.get_config_for_expr(expr);
2020 config.source_dialect = Some(source);
2021 let mut generator = Generator::with_config(config);
2022 generator.generate(expr)
2023 }
2024
2025 /// Generate SQL from an expression with pretty printing and source dialect info
2026 pub fn generate_pretty_with_source(
2027 &self,
2028 expr: &Expression,
2029 source: DialectType,
2030 ) -> Result<String> {
2031 let mut config = self.get_config_for_expr(expr);
2032 config.pretty = true;
2033 config.source_dialect = Some(source);
2034 let mut generator = Generator::with_config(config);
2035 generator.generate(expr)
2036 }
2037
2038 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2039 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2040 let mut config = self.get_config_for_expr(expr);
2041 config.always_quote_identifiers = true;
2042 let mut generator = Generator::with_config(config);
2043 generator.generate(expr)
2044 }
2045
2046 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2047 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2048 let mut config = self.generator_config.clone();
2049 config.pretty = true;
2050 config.always_quote_identifiers = true;
2051 let mut generator = Generator::with_config(config);
2052 generator.generate(expr)
2053 }
2054
2055 /// Generate SQL from an expression with caller-specified config overrides
2056 pub fn generate_with_overrides(
2057 &self,
2058 expr: &Expression,
2059 overrides: impl FnOnce(&mut GeneratorConfig),
2060 ) -> Result<String> {
2061 let mut config = self.get_config_for_expr(expr);
2062 overrides(&mut config);
2063 let mut generator = Generator::with_config(config);
2064 generator.generate(expr)
2065 }
2066
2067 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2068 ///
2069 /// The transformation proceeds in two phases:
2070 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2071 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2072 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2073 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2074 ///
2075 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2076 /// and for identity transforms (normalizing SQL within the same dialect).
2077 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2078 // Apply preprocessing transforms based on dialect
2079 let preprocessed = self.preprocess(expr)?;
2080 // Then apply recursive transformation
2081 transform_recursive(preprocessed, &self.transformer)
2082 }
2083
2084 /// Apply dialect-specific preprocessing transforms
2085 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2086 // If a custom preprocess function is set, use it instead of the built-in logic
2087 if let Some(ref custom_preprocess) = self.custom_preprocess {
2088 return custom_preprocess(expr);
2089 }
2090
2091 #[cfg(any(
2092 feature = "dialect-mysql",
2093 feature = "dialect-postgresql",
2094 feature = "dialect-bigquery",
2095 feature = "dialect-snowflake",
2096 feature = "dialect-tsql",
2097 feature = "dialect-spark",
2098 feature = "dialect-databricks",
2099 feature = "dialect-hive",
2100 feature = "dialect-sqlite",
2101 feature = "dialect-trino",
2102 feature = "dialect-presto",
2103 feature = "dialect-duckdb",
2104 feature = "dialect-redshift",
2105 feature = "dialect-starrocks",
2106 feature = "dialect-oracle",
2107 feature = "dialect-clickhouse",
2108 ))]
2109 use crate::transforms;
2110
2111 match self.dialect_type {
2112 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2113 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2114 #[cfg(feature = "dialect-mysql")]
2115 DialectType::MySQL => {
2116 let expr = transforms::eliminate_qualify(expr)?;
2117 let expr = transforms::eliminate_full_outer_join(expr)?;
2118 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2119 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2120 Ok(expr)
2121 }
2122 // PostgreSQL doesn't support QUALIFY
2123 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2124 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2125 #[cfg(feature = "dialect-postgresql")]
2126 DialectType::PostgreSQL => {
2127 let expr = transforms::eliminate_qualify(expr)?;
2128 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2129 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2130 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2131 // Only normalize when sqlglot would fully parse (no body) —
2132 // sqlglot falls back to Command for complex function bodies,
2133 // preserving the original text including TO.
2134 let expr = if let Expression::CreateFunction(mut cf) = expr {
2135 if cf.body.is_none() {
2136 for opt in &mut cf.set_options {
2137 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2138 &mut opt.value
2139 {
2140 *use_to = false;
2141 }
2142 }
2143 }
2144 Expression::CreateFunction(cf)
2145 } else {
2146 expr
2147 };
2148 Ok(expr)
2149 }
2150 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2151 #[cfg(feature = "dialect-bigquery")]
2152 DialectType::BigQuery => {
2153 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2154 let expr = transforms::pushdown_cte_column_names(expr)?;
2155 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2156 Ok(expr)
2157 }
2158 // Snowflake
2159 #[cfg(feature = "dialect-snowflake")]
2160 DialectType::Snowflake => {
2161 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2162 let expr = transforms::eliminate_window_clause(expr)?;
2163 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2164 Ok(expr)
2165 }
2166 // TSQL doesn't support QUALIFY
2167 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2168 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2169 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2170 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2171 #[cfg(feature = "dialect-tsql")]
2172 DialectType::TSQL => {
2173 let expr = transforms::eliminate_qualify(expr)?;
2174 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2175 let expr = transforms::ensure_bools(expr)?;
2176 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2177 let expr = transforms::move_ctes_to_top_level(expr)?;
2178 let expr = transforms::qualify_derived_table_outputs(expr)?;
2179 Ok(expr)
2180 }
2181 // Spark doesn't support QUALIFY (but Databricks does)
2182 // Spark doesn't support CTEs in subqueries (hoist to top level)
2183 #[cfg(feature = "dialect-spark")]
2184 DialectType::Spark => {
2185 let expr = transforms::eliminate_qualify(expr)?;
2186 let expr = transforms::add_auto_table_alias(expr)?;
2187 let expr = transforms::simplify_nested_paren_values(expr)?;
2188 let expr = transforms::move_ctes_to_top_level(expr)?;
2189 Ok(expr)
2190 }
2191 // Databricks supports QUALIFY natively
2192 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2193 #[cfg(feature = "dialect-databricks")]
2194 DialectType::Databricks => {
2195 let expr = transforms::add_auto_table_alias(expr)?;
2196 let expr = transforms::simplify_nested_paren_values(expr)?;
2197 let expr = transforms::move_ctes_to_top_level(expr)?;
2198 Ok(expr)
2199 }
2200 // Hive doesn't support QUALIFY or CTEs in subqueries
2201 #[cfg(feature = "dialect-hive")]
2202 DialectType::Hive => {
2203 let expr = transforms::eliminate_qualify(expr)?;
2204 let expr = transforms::move_ctes_to_top_level(expr)?;
2205 Ok(expr)
2206 }
2207 // SQLite doesn't support QUALIFY
2208 #[cfg(feature = "dialect-sqlite")]
2209 DialectType::SQLite => {
2210 let expr = transforms::eliminate_qualify(expr)?;
2211 Ok(expr)
2212 }
2213 // Trino doesn't support QUALIFY
2214 #[cfg(feature = "dialect-trino")]
2215 DialectType::Trino => {
2216 let expr = transforms::eliminate_qualify(expr)?;
2217 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2218 Ok(expr)
2219 }
2220 // Presto doesn't support QUALIFY or WINDOW clause
2221 #[cfg(feature = "dialect-presto")]
2222 DialectType::Presto => {
2223 let expr = transforms::eliminate_qualify(expr)?;
2224 let expr = transforms::eliminate_window_clause(expr)?;
2225 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2226 Ok(expr)
2227 }
2228 // DuckDB supports QUALIFY - no elimination needed
2229 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2230 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2231 #[cfg(feature = "dialect-duckdb")]
2232 DialectType::DuckDB => {
2233 let expr = transforms::expand_posexplode_duckdb(expr)?;
2234 let expr = transforms::expand_like_any(expr)?;
2235 Ok(expr)
2236 }
2237 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2238 #[cfg(feature = "dialect-redshift")]
2239 DialectType::Redshift => {
2240 let expr = transforms::eliminate_qualify(expr)?;
2241 let expr = transforms::eliminate_window_clause(expr)?;
2242 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2243 Ok(expr)
2244 }
2245 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2246 #[cfg(feature = "dialect-starrocks")]
2247 DialectType::StarRocks => {
2248 let expr = transforms::eliminate_qualify(expr)?;
2249 let expr = transforms::expand_between_in_delete(expr)?;
2250 Ok(expr)
2251 }
2252 // DataFusion supports QUALIFY and semi/anti joins natively
2253 #[cfg(feature = "dialect-datafusion")]
2254 DialectType::DataFusion => Ok(expr),
2255 // Oracle doesn't support QUALIFY
2256 #[cfg(feature = "dialect-oracle")]
2257 DialectType::Oracle => {
2258 let expr = transforms::eliminate_qualify(expr)?;
2259 Ok(expr)
2260 }
2261 // Drill - no special preprocessing needed
2262 #[cfg(feature = "dialect-drill")]
2263 DialectType::Drill => Ok(expr),
2264 // Teradata - no special preprocessing needed
2265 #[cfg(feature = "dialect-teradata")]
2266 DialectType::Teradata => Ok(expr),
2267 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2268 #[cfg(feature = "dialect-clickhouse")]
2269 DialectType::ClickHouse => {
2270 let expr = transforms::no_limit_order_by_union(expr)?;
2271 Ok(expr)
2272 }
2273 // Other dialects - no preprocessing
2274 _ => Ok(expr),
2275 }
2276 }
2277
2278 /// Transpile SQL from this dialect to another
2279 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2280 self.transpile_to_inner(sql, target, false)
2281 }
2282
2283 /// Transpile SQL from this dialect to another with pretty printing enabled
2284 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2285 self.transpile_to_inner(sql, target, true)
2286 }
2287
2288 #[cfg(not(feature = "transpile"))]
2289 fn transpile_to_inner(
2290 &self,
2291 sql: &str,
2292 target: DialectType,
2293 pretty: bool,
2294 ) -> Result<Vec<String>> {
2295 // Without the transpile feature, only same-dialect or to/from generic is supported
2296 if self.dialect_type != target
2297 && self.dialect_type != DialectType::Generic
2298 && target != DialectType::Generic
2299 {
2300 return Err(crate::error::Error::parse(
2301 "Cross-dialect transpilation not available in this build",
2302 0,
2303 0,
2304 0,
2305 0,
2306 ));
2307 }
2308
2309 let expressions = self.parse(sql)?;
2310 let target_dialect = Dialect::get(target);
2311 let generic_identity =
2312 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2313
2314 if generic_identity {
2315 return expressions
2316 .into_iter()
2317 .map(|expr| {
2318 if pretty {
2319 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2320 } else {
2321 target_dialect.generate_with_source(&expr, self.dialect_type)
2322 }
2323 })
2324 .collect();
2325 }
2326
2327 expressions
2328 .into_iter()
2329 .map(|expr| {
2330 let transformed = target_dialect.transform(expr)?;
2331 if pretty {
2332 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2333 } else {
2334 target_dialect.generate_with_source(&transformed, self.dialect_type)
2335 }
2336 })
2337 .collect()
2338 }
2339
2340 #[cfg(feature = "transpile")]
2341 fn transpile_to_inner(
2342 &self,
2343 sql: &str,
2344 target: DialectType,
2345 pretty: bool,
2346 ) -> Result<Vec<String>> {
2347 let expressions = self.parse(sql)?;
2348 let target_dialect = Dialect::get(target);
2349 let generic_identity =
2350 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2351
2352 if generic_identity {
2353 return expressions
2354 .into_iter()
2355 .map(|expr| {
2356 if pretty {
2357 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2358 } else {
2359 target_dialect.generate_with_source(&expr, self.dialect_type)
2360 }
2361 })
2362 .collect();
2363 }
2364
2365 expressions
2366 .into_iter()
2367 .map(|expr| {
2368 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2369 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2370 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2371 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2372 use crate::expressions::DataType as DT;
2373 transform_recursive(expr, &|e| match e {
2374 Expression::DataType(DT::VarChar { .. }) => {
2375 Ok(Expression::DataType(DT::Text))
2376 }
2377 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2378 _ => Ok(e),
2379 })?
2380 } else {
2381 expr
2382 };
2383
2384 // When source and target differ, first normalize the source dialect's
2385 // AST constructs to standard SQL, so that the target dialect can handle them.
2386 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2387 let normalized =
2388 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2389 self.transform(expr)?
2390 } else {
2391 expr
2392 };
2393
2394 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2395 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2396 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2397 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2398 let normalized =
2399 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2400 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2401 {
2402 transform_recursive(normalized, &|e| {
2403 if let Expression::Function(ref f) = e {
2404 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2405 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2406 if let (
2407 Expression::Function(ref jq),
2408 Expression::Function(ref jv),
2409 ) = (&f.args[0], &f.args[1])
2410 {
2411 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2412 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2413 {
2414 // Unwrap: return just JSON_QUERY(...)
2415 return Ok(f.args[0].clone());
2416 }
2417 }
2418 }
2419 }
2420 Ok(e)
2421 })?
2422 } else {
2423 normalized
2424 };
2425
2426 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2427 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2428 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2429 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2430 && !matches!(target, DialectType::Snowflake)
2431 {
2432 transform_recursive(normalized, &|e| {
2433 if let Expression::Function(ref f) = e {
2434 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2435 return Ok(Expression::Localtime(Box::new(
2436 crate::expressions::Localtime { this: None },
2437 )));
2438 }
2439 }
2440 Ok(e)
2441 })?
2442 } else {
2443 normalized
2444 };
2445
2446 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2447 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2448 // transform. DuckDB requires the count argument to be BIGINT.
2449 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2450 && matches!(target, DialectType::DuckDB)
2451 {
2452 transform_recursive(normalized, &|e| {
2453 if let Expression::Function(ref f) = e {
2454 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2455 // Check if first arg is space string literal
2456 if let Expression::Literal(crate::expressions::Literal::String(
2457 ref s,
2458 )) = f.args[0]
2459 {
2460 if s == " " {
2461 // Wrap second arg in CAST(... AS BIGINT) if not already
2462 if !matches!(f.args[1], Expression::Cast(_)) {
2463 let mut new_args = f.args.clone();
2464 new_args[1] = Expression::Cast(Box::new(
2465 crate::expressions::Cast {
2466 this: new_args[1].clone(),
2467 to: crate::expressions::DataType::BigInt {
2468 length: None,
2469 },
2470 trailing_comments: Vec::new(),
2471 double_colon_syntax: false,
2472 format: None,
2473 default: None,
2474 inferred_type: None,
2475 },
2476 ));
2477 return Ok(Expression::Function(Box::new(
2478 crate::expressions::Function {
2479 name: f.name.clone(),
2480 args: new_args,
2481 distinct: f.distinct,
2482 trailing_comments: f.trailing_comments.clone(),
2483 use_bracket_syntax: f.use_bracket_syntax,
2484 no_parens: f.no_parens,
2485 quoted: f.quoted,
2486 span: None,
2487 inferred_type: None,
2488 },
2489 )));
2490 }
2491 }
2492 }
2493 }
2494 }
2495 Ok(e)
2496 })?
2497 } else {
2498 normalized
2499 };
2500
2501 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2502 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2503 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2504 && !matches!(target, DialectType::BigQuery)
2505 {
2506 crate::transforms::propagate_struct_field_names(normalized)?
2507 } else {
2508 normalized
2509 };
2510
2511 // Apply cross-dialect semantic normalizations
2512 let normalized =
2513 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2514
2515 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2516 // (SELECT UNNEST(..., max_depth => 2)) subquery
2517 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2518 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2519 && matches!(target, DialectType::DuckDB)
2520 {
2521 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2522 } else {
2523 normalized
2524 };
2525
2526 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2527 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2528 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2529 && matches!(
2530 target,
2531 DialectType::DuckDB
2532 | DialectType::Presto
2533 | DialectType::Trino
2534 | DialectType::Athena
2535 | DialectType::Spark
2536 | DialectType::Databricks
2537 ) {
2538 crate::transforms::unnest_alias_to_column_alias(normalized)?
2539 } else if matches!(self.dialect_type, DialectType::BigQuery)
2540 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2541 {
2542 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2543 // but don't convert alias format (no _t0 wrapper)
2544 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2545 // For Redshift: strip UNNEST when arg is a column reference path
2546 if matches!(target, DialectType::Redshift) {
2547 crate::transforms::strip_unnest_column_refs(result)?
2548 } else {
2549 result
2550 }
2551 } else {
2552 normalized
2553 };
2554
2555 // For Presto/Trino targets from PostgreSQL/Redshift source:
2556 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2557 let normalized = if matches!(
2558 self.dialect_type,
2559 DialectType::PostgreSQL | DialectType::Redshift
2560 ) && matches!(
2561 target,
2562 DialectType::Presto | DialectType::Trino | DialectType::Athena
2563 ) {
2564 crate::transforms::wrap_unnest_join_aliases(normalized)?
2565 } else {
2566 normalized
2567 };
2568
2569 // Eliminate DISTINCT ON with target-dialect awareness
2570 // This must happen after source transform (which may produce DISTINCT ON)
2571 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2572 let normalized =
2573 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2574
2575 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2576 let normalized = if matches!(target, DialectType::Snowflake) {
2577 Self::transform_generate_date_array_snowflake(normalized)?
2578 } else {
2579 normalized
2580 };
2581
2582 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2583 let normalized = if matches!(
2584 target,
2585 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2586 ) {
2587 crate::transforms::unnest_to_explode_select(normalized)?
2588 } else {
2589 normalized
2590 };
2591
2592 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2593 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2594 crate::transforms::no_limit_order_by_union(normalized)?
2595 } else {
2596 normalized
2597 };
2598
2599 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2600 // Python sqlglot does this in the TSQL generator, but we can't do it there
2601 // because it would break TSQL -> TSQL identity
2602 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2603 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2604 {
2605 transform_recursive(normalized, &|e| {
2606 if let Expression::Count(ref c) = e {
2607 // Build COUNT_BIG(...) as an AggregateFunction
2608 let args = if c.star {
2609 vec![Expression::Star(crate::expressions::Star {
2610 table: None,
2611 except: None,
2612 replace: None,
2613 rename: None,
2614 trailing_comments: Vec::new(),
2615 span: None,
2616 })]
2617 } else if let Some(ref this) = c.this {
2618 vec![this.clone()]
2619 } else {
2620 vec![]
2621 };
2622 Ok(Expression::AggregateFunction(Box::new(
2623 crate::expressions::AggregateFunction {
2624 name: "COUNT_BIG".to_string(),
2625 args,
2626 distinct: c.distinct,
2627 filter: c.filter.clone(),
2628 order_by: Vec::new(),
2629 limit: None,
2630 ignore_nulls: None,
2631 inferred_type: None,
2632 },
2633 )))
2634 } else {
2635 Ok(e)
2636 }
2637 })?
2638 } else {
2639 normalized
2640 };
2641
2642 let transformed = target_dialect.transform(normalized)?;
2643
2644 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
2645 let transformed = if matches!(target, DialectType::DuckDB) {
2646 Self::seq_rownum_to_range(transformed)?
2647 } else {
2648 transformed
2649 };
2650
2651 let mut sql = if pretty {
2652 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2653 } else {
2654 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2655 };
2656
2657 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2658 if pretty && target == DialectType::Snowflake {
2659 sql = Self::normalize_snowflake_pretty(sql);
2660 }
2661
2662 Ok(sql)
2663 })
2664 .collect()
2665 }
2666}
2667
2668// Transpile-only methods: cross-dialect normalization and helpers
2669#[cfg(feature = "transpile")]
2670impl Dialect {
2671 /// For DuckDB target: when FROM clause contains RANGE(n), replace
2672 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
2673 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
2674 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
2675 if let Expression::Select(mut select) = expr {
2676 // Check if FROM contains a RANGE function
2677 let has_range_from = if let Some(ref from) = select.from {
2678 from.expressions.iter().any(|e| {
2679 // Check for direct RANGE(...) or aliased RANGE(...)
2680 match e {
2681 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
2682 Expression::Alias(a) => {
2683 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
2684 }
2685 _ => false,
2686 }
2687 })
2688 } else {
2689 false
2690 };
2691
2692 if has_range_from {
2693 // Replace the ROW_NUMBER pattern in select expressions
2694 select.expressions = select.expressions.into_iter().map(|e| {
2695 Self::replace_rownum_with_range(e)
2696 }).collect();
2697 }
2698
2699 Ok(Expression::Select(select))
2700 } else {
2701 Ok(expr)
2702 }
2703 }
2704
2705 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
2706 fn replace_rownum_with_range(expr: Expression) -> Expression {
2707 match expr {
2708 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
2709 Expression::Mod(op) => {
2710 let new_left = Self::try_replace_rownum_paren(&op.left);
2711 Expression::Mod(Box::new(crate::expressions::BinaryOp {
2712 left: new_left,
2713 right: op.right,
2714 left_comments: op.left_comments,
2715 operator_comments: op.operator_comments,
2716 trailing_comments: op.trailing_comments,
2717 inferred_type: op.inferred_type,
2718 }))
2719 }
2720 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
2721 Expression::Paren(p) => {
2722 let inner = Self::replace_rownum_with_range(p.this);
2723 Expression::Paren(Box::new(crate::expressions::Paren {
2724 this: inner,
2725 trailing_comments: p.trailing_comments,
2726 }))
2727 }
2728 Expression::Case(mut c) => {
2729 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
2730 c.whens = c.whens.into_iter().map(|(cond, then)| {
2731 (Self::replace_rownum_with_range(cond), Self::replace_rownum_with_range(then))
2732 }).collect();
2733 if let Some(else_) = c.else_ {
2734 c.else_ = Some(Self::replace_rownum_with_range(else_));
2735 }
2736 Expression::Case(c)
2737 }
2738 Expression::Gte(op) => {
2739 Expression::Gte(Box::new(crate::expressions::BinaryOp {
2740 left: Self::replace_rownum_with_range(op.left),
2741 right: op.right,
2742 left_comments: op.left_comments,
2743 operator_comments: op.operator_comments,
2744 trailing_comments: op.trailing_comments,
2745 inferred_type: op.inferred_type,
2746 }))
2747 }
2748 Expression::Sub(op) => {
2749 Expression::Sub(Box::new(crate::expressions::BinaryOp {
2750 left: Self::replace_rownum_with_range(op.left),
2751 right: op.right,
2752 left_comments: op.left_comments,
2753 operator_comments: op.operator_comments,
2754 trailing_comments: op.trailing_comments,
2755 inferred_type: op.inferred_type,
2756 }))
2757 }
2758 Expression::Alias(mut a) => {
2759 a.this = Self::replace_rownum_with_range(a.this);
2760 Expression::Alias(a)
2761 }
2762 other => other,
2763 }
2764 }
2765
2766 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
2767 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
2768 if let Expression::Paren(ref p) = expr {
2769 if let Expression::Sub(ref sub) = p.this {
2770 if let Expression::WindowFunction(ref wf) = sub.left {
2771 if let Expression::Function(ref f) = wf.this {
2772 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
2773 if let Expression::Literal(crate::expressions::Literal::Number(ref n)) = sub.right {
2774 if n == "1" {
2775 return Expression::column("range");
2776 }
2777 }
2778 }
2779 }
2780 }
2781 }
2782 }
2783 expr.clone()
2784 }
2785
2786 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2787 /// Converts:
2788 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2789 /// To:
2790 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2791 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2792 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2793 use crate::expressions::*;
2794 transform_recursive(expr, &|e| {
2795 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2796 if let Expression::ArraySize(ref af) = e {
2797 if let Expression::Function(ref f) = af.this {
2798 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2799 let result = Self::convert_array_size_gda_snowflake(f)?;
2800 return Ok(result);
2801 }
2802 }
2803 }
2804
2805 let Expression::Select(mut sel) = e else {
2806 return Ok(e);
2807 };
2808
2809 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2810 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2811 let mut gda_join_idx: Option<usize> = None;
2812
2813 for (idx, join) in sel.joins.iter().enumerate() {
2814 // The join.this may be:
2815 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2816 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2817 let (unnest_ref, alias_name) = match &join.this {
2818 Expression::Unnest(ref unnest) => {
2819 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2820 (Some(unnest.as_ref()), alias)
2821 }
2822 Expression::Alias(ref a) => {
2823 if let Expression::Unnest(ref unnest) = a.this {
2824 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2825 } else {
2826 (None, None)
2827 }
2828 }
2829 _ => (None, None),
2830 };
2831
2832 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2833 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2834 if let Expression::Function(ref f) = unnest.this {
2835 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2836 let start_expr = f.args[0].clone();
2837 let end_expr = f.args[1].clone();
2838 let step = f.args.get(2).cloned();
2839
2840 // Extract unit from step interval
2841 let unit = if let Some(Expression::Interval(ref iv)) = step {
2842 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2843 Some(format!("{:?}", unit).to_uppercase())
2844 } else if let Some(ref this) = iv.this {
2845 // The interval may be stored as a string like "1 MONTH"
2846 if let Expression::Literal(Literal::String(ref s)) = this {
2847 let parts: Vec<&str> = s.split_whitespace().collect();
2848 if parts.len() == 2 {
2849 Some(parts[1].to_uppercase())
2850 } else if parts.len() == 1 {
2851 // Single word like "MONTH" or just "1"
2852 let upper = parts[0].to_uppercase();
2853 if matches!(
2854 upper.as_str(),
2855 "YEAR"
2856 | "QUARTER"
2857 | "MONTH"
2858 | "WEEK"
2859 | "DAY"
2860 | "HOUR"
2861 | "MINUTE"
2862 | "SECOND"
2863 ) {
2864 Some(upper)
2865 } else {
2866 None
2867 }
2868 } else {
2869 None
2870 }
2871 } else {
2872 None
2873 }
2874 } else {
2875 None
2876 }
2877 } else {
2878 None
2879 };
2880
2881 if let Some(unit_str) = unit {
2882 gda_info = Some((alias, start_expr, end_expr, unit_str));
2883 gda_join_idx = Some(idx);
2884 }
2885 }
2886 }
2887 }
2888 if gda_info.is_some() {
2889 break;
2890 }
2891 }
2892
2893 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2894 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2895 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2896 let result = Self::try_transform_from_gda_snowflake(sel);
2897 return result;
2898 };
2899 let join_idx = gda_join_idx.unwrap();
2900
2901 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2902 let datediff = Expression::Function(Box::new(Function::new(
2903 "DATEDIFF".to_string(),
2904 vec![
2905 Expression::Column(Column {
2906 name: Identifier::new(&unit_str),
2907 table: None,
2908 join_mark: false,
2909 trailing_comments: vec![],
2910 span: None,
2911 inferred_type: None,
2912 }),
2913 start_expr.clone(),
2914 end_expr.clone(),
2915 ],
2916 )));
2917 // (DATEDIFF(...) + 1 - 1) + 1
2918 let plus_one = Expression::Add(Box::new(BinaryOp {
2919 left: datediff,
2920 right: Expression::Literal(Literal::Number("1".to_string())),
2921 left_comments: vec![],
2922 operator_comments: vec![],
2923 trailing_comments: vec![],
2924 inferred_type: None,
2925 }));
2926 let minus_one = Expression::Sub(Box::new(BinaryOp {
2927 left: plus_one,
2928 right: Expression::Literal(Literal::Number("1".to_string())),
2929 left_comments: vec![],
2930 operator_comments: vec![],
2931 trailing_comments: vec![],
2932 inferred_type: None,
2933 }));
2934 let paren_inner = Expression::Paren(Box::new(Paren {
2935 this: minus_one,
2936 trailing_comments: vec![],
2937 }));
2938 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
2939 left: paren_inner,
2940 right: Expression::Literal(Literal::Number("1".to_string())),
2941 left_comments: vec![],
2942 operator_comments: vec![],
2943 trailing_comments: vec![],
2944 inferred_type: None,
2945 }));
2946
2947 let array_gen_range = Expression::Function(Box::new(Function::new(
2948 "ARRAY_GENERATE_RANGE".to_string(),
2949 vec![
2950 Expression::Literal(Literal::Number("0".to_string())),
2951 outer_plus_one,
2952 ],
2953 )));
2954
2955 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
2956 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
2957 name: Identifier::new("INPUT"),
2958 value: array_gen_range,
2959 separator: crate::expressions::NamedArgSeparator::DArrow,
2960 }));
2961 let flatten = Expression::Function(Box::new(Function::new(
2962 "FLATTEN".to_string(),
2963 vec![flatten_input],
2964 )));
2965
2966 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
2967 let alias_table = Alias {
2968 this: flatten,
2969 alias: Identifier::new("_t0"),
2970 column_aliases: vec![
2971 Identifier::new("seq"),
2972 Identifier::new("key"),
2973 Identifier::new("path"),
2974 Identifier::new("index"),
2975 Identifier::new(&alias_name),
2976 Identifier::new("this"),
2977 ],
2978 pre_alias_comments: vec![],
2979 trailing_comments: vec![],
2980 inferred_type: None,
2981 };
2982 let lateral_expr = Expression::Lateral(Box::new(Lateral {
2983 this: Box::new(Expression::Alias(Box::new(alias_table))),
2984 view: None,
2985 outer: None,
2986 alias: None,
2987 alias_quoted: false,
2988 cross_apply: None,
2989 ordinality: None,
2990 column_aliases: vec![],
2991 }));
2992
2993 // Remove the original join and add to FROM expressions
2994 sel.joins.remove(join_idx);
2995 if let Some(ref mut from) = sel.from {
2996 from.expressions.push(lateral_expr);
2997 }
2998
2999 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3000 let dateadd_expr = Expression::Function(Box::new(Function::new(
3001 "DATEADD".to_string(),
3002 vec![
3003 Expression::Column(Column {
3004 name: Identifier::new(&unit_str),
3005 table: None,
3006 join_mark: false,
3007 trailing_comments: vec![],
3008 span: None,
3009 inferred_type: None,
3010 }),
3011 Expression::Cast(Box::new(Cast {
3012 this: Expression::Column(Column {
3013 name: Identifier::new(&alias_name),
3014 table: None,
3015 join_mark: false,
3016 trailing_comments: vec![],
3017 span: None,
3018 inferred_type: None,
3019 }),
3020 to: DataType::Int {
3021 length: None,
3022 integer_spelling: false,
3023 },
3024 trailing_comments: vec![],
3025 double_colon_syntax: false,
3026 format: None,
3027 default: None,
3028 inferred_type: None,
3029 })),
3030 Expression::Cast(Box::new(Cast {
3031 this: start_expr.clone(),
3032 to: DataType::Date,
3033 trailing_comments: vec![],
3034 double_colon_syntax: false,
3035 format: None,
3036 default: None,
3037 inferred_type: None,
3038 })),
3039 ],
3040 )));
3041
3042 // Replace references to the alias in the SELECT list
3043 let new_exprs: Vec<Expression> = sel
3044 .expressions
3045 .iter()
3046 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3047 .collect();
3048 sel.expressions = new_exprs;
3049
3050 Ok(Expression::Select(sel))
3051 })
3052 }
3053
3054 /// Helper: replace column references to `alias_name` with dateadd expression
3055 fn replace_column_ref_with_dateadd(
3056 expr: &Expression,
3057 alias_name: &str,
3058 dateadd: &Expression,
3059 ) -> Expression {
3060 use crate::expressions::*;
3061 match expr {
3062 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3063 // Plain column reference -> DATEADD(...) AS alias_name
3064 Expression::Alias(Box::new(Alias {
3065 this: dateadd.clone(),
3066 alias: Identifier::new(alias_name),
3067 column_aliases: vec![],
3068 pre_alias_comments: vec![],
3069 trailing_comments: vec![],
3070 inferred_type: None,
3071 }))
3072 }
3073 Expression::Alias(a) => {
3074 // Check if the inner expression references the alias
3075 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3076 Expression::Alias(Box::new(Alias {
3077 this: new_this,
3078 alias: a.alias.clone(),
3079 column_aliases: a.column_aliases.clone(),
3080 pre_alias_comments: a.pre_alias_comments.clone(),
3081 trailing_comments: a.trailing_comments.clone(),
3082 inferred_type: None,
3083 }))
3084 }
3085 _ => expr.clone(),
3086 }
3087 }
3088
3089 /// Helper: replace column references in inner expression (not top-level)
3090 fn replace_column_ref_inner(
3091 expr: &Expression,
3092 alias_name: &str,
3093 dateadd: &Expression,
3094 ) -> Expression {
3095 use crate::expressions::*;
3096 match expr {
3097 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3098 dateadd.clone()
3099 }
3100 Expression::Add(op) => {
3101 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3102 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3103 Expression::Add(Box::new(BinaryOp {
3104 left,
3105 right,
3106 left_comments: op.left_comments.clone(),
3107 operator_comments: op.operator_comments.clone(),
3108 trailing_comments: op.trailing_comments.clone(),
3109 inferred_type: None,
3110 }))
3111 }
3112 Expression::Sub(op) => {
3113 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3114 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3115 Expression::Sub(Box::new(BinaryOp {
3116 left,
3117 right,
3118 left_comments: op.left_comments.clone(),
3119 operator_comments: op.operator_comments.clone(),
3120 trailing_comments: op.trailing_comments.clone(),
3121 inferred_type: None,
3122 }))
3123 }
3124 Expression::Mul(op) => {
3125 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3126 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3127 Expression::Mul(Box::new(BinaryOp {
3128 left,
3129 right,
3130 left_comments: op.left_comments.clone(),
3131 operator_comments: op.operator_comments.clone(),
3132 trailing_comments: op.trailing_comments.clone(),
3133 inferred_type: None,
3134 }))
3135 }
3136 _ => expr.clone(),
3137 }
3138 }
3139
3140 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3141 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3142 fn try_transform_from_gda_snowflake(
3143 mut sel: Box<crate::expressions::Select>,
3144 ) -> Result<Expression> {
3145 use crate::expressions::*;
3146
3147 // Extract GDA info from FROM clause
3148 let mut gda_info: Option<(
3149 usize,
3150 String,
3151 Expression,
3152 Expression,
3153 String,
3154 Option<(String, Vec<Identifier>)>,
3155 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3156
3157 if let Some(ref from) = sel.from {
3158 for (idx, table_expr) in from.expressions.iter().enumerate() {
3159 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3160 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3161 let (unnest_opt, outer_alias_info) = match table_expr {
3162 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3163 Expression::Alias(ref a) => {
3164 if let Expression::Unnest(ref unnest) = a.this {
3165 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3166 (Some(unnest.as_ref()), Some(alias_info))
3167 } else {
3168 (None, None)
3169 }
3170 }
3171 _ => (None, None),
3172 };
3173
3174 if let Some(unnest) = unnest_opt {
3175 // Check for GENERATE_DATE_ARRAY function
3176 let func_opt = match &unnest.this {
3177 Expression::Function(ref f)
3178 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3179 && f.args.len() >= 2 =>
3180 {
3181 Some(f)
3182 }
3183 // Also check for GenerateSeries (from earlier normalization)
3184 _ => None,
3185 };
3186
3187 if let Some(f) = func_opt {
3188 let start_expr = f.args[0].clone();
3189 let end_expr = f.args[1].clone();
3190 let step = f.args.get(2).cloned();
3191
3192 // Extract unit and column name
3193 let unit = Self::extract_interval_unit_str(&step);
3194 let col_name = outer_alias_info
3195 .as_ref()
3196 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3197 .unwrap_or_else(|| "value".to_string());
3198
3199 if let Some(unit_str) = unit {
3200 gda_info = Some((
3201 idx,
3202 col_name,
3203 start_expr,
3204 end_expr,
3205 unit_str,
3206 outer_alias_info,
3207 ));
3208 break;
3209 }
3210 }
3211 }
3212 }
3213 }
3214
3215 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3216 else {
3217 return Ok(Expression::Select(sel));
3218 };
3219
3220 // Build the Snowflake subquery:
3221 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3222 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3223
3224 // DATEDIFF(unit, start, end)
3225 let datediff = Expression::Function(Box::new(Function::new(
3226 "DATEDIFF".to_string(),
3227 vec![
3228 Expression::Column(Column {
3229 name: Identifier::new(&unit_str),
3230 table: None,
3231 join_mark: false,
3232 trailing_comments: vec![],
3233 span: None,
3234 inferred_type: None,
3235 }),
3236 start_expr.clone(),
3237 end_expr.clone(),
3238 ],
3239 )));
3240 // (DATEDIFF(...) + 1 - 1) + 1
3241 let plus_one = Expression::Add(Box::new(BinaryOp {
3242 left: datediff,
3243 right: Expression::Literal(Literal::Number("1".to_string())),
3244 left_comments: vec![],
3245 operator_comments: vec![],
3246 trailing_comments: vec![],
3247 inferred_type: None,
3248 }));
3249 let minus_one = Expression::Sub(Box::new(BinaryOp {
3250 left: plus_one,
3251 right: Expression::Literal(Literal::Number("1".to_string())),
3252 left_comments: vec![],
3253 operator_comments: vec![],
3254 trailing_comments: vec![],
3255 inferred_type: None,
3256 }));
3257 let paren_inner = Expression::Paren(Box::new(Paren {
3258 this: minus_one,
3259 trailing_comments: vec![],
3260 }));
3261 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3262 left: paren_inner,
3263 right: Expression::Literal(Literal::Number("1".to_string())),
3264 left_comments: vec![],
3265 operator_comments: vec![],
3266 trailing_comments: vec![],
3267 inferred_type: None,
3268 }));
3269
3270 let array_gen_range = Expression::Function(Box::new(Function::new(
3271 "ARRAY_GENERATE_RANGE".to_string(),
3272 vec![
3273 Expression::Literal(Literal::Number("0".to_string())),
3274 outer_plus_one,
3275 ],
3276 )));
3277
3278 // TABLE(FLATTEN(INPUT => ...))
3279 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3280 name: Identifier::new("INPUT"),
3281 value: array_gen_range,
3282 separator: crate::expressions::NamedArgSeparator::DArrow,
3283 }));
3284 let flatten = Expression::Function(Box::new(Function::new(
3285 "FLATTEN".to_string(),
3286 vec![flatten_input],
3287 )));
3288
3289 // Determine alias name for the table: use outer alias or _t0
3290 let table_alias_name = outer_alias_info
3291 .as_ref()
3292 .map(|(name, _)| name.clone())
3293 .unwrap_or_else(|| "_t0".to_string());
3294
3295 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3296 let table_func =
3297 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3298 let flatten_aliased = Expression::Alias(Box::new(Alias {
3299 this: table_func,
3300 alias: Identifier::new(&table_alias_name),
3301 column_aliases: vec![
3302 Identifier::new("seq"),
3303 Identifier::new("key"),
3304 Identifier::new("path"),
3305 Identifier::new("index"),
3306 Identifier::new(&col_name),
3307 Identifier::new("this"),
3308 ],
3309 pre_alias_comments: vec![],
3310 trailing_comments: vec![],
3311 inferred_type: None,
3312 }));
3313
3314 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3315 let dateadd_expr = Expression::Function(Box::new(Function::new(
3316 "DATEADD".to_string(),
3317 vec![
3318 Expression::Column(Column {
3319 name: Identifier::new(&unit_str),
3320 table: None,
3321 join_mark: false,
3322 trailing_comments: vec![],
3323 span: None,
3324 inferred_type: None,
3325 }),
3326 Expression::Cast(Box::new(Cast {
3327 this: Expression::Column(Column {
3328 name: Identifier::new(&col_name),
3329 table: None,
3330 join_mark: false,
3331 trailing_comments: vec![],
3332 span: None,
3333 inferred_type: None,
3334 }),
3335 to: DataType::Int {
3336 length: None,
3337 integer_spelling: false,
3338 },
3339 trailing_comments: vec![],
3340 double_colon_syntax: false,
3341 format: None,
3342 default: None,
3343 inferred_type: None,
3344 })),
3345 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3346 start_expr.clone(),
3347 ],
3348 )));
3349 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3350 this: dateadd_expr,
3351 alias: Identifier::new(&col_name),
3352 column_aliases: vec![],
3353 pre_alias_comments: vec![],
3354 trailing_comments: vec![],
3355 inferred_type: None,
3356 }));
3357
3358 // Build inner SELECT
3359 let mut inner_select = Select::new();
3360 inner_select.expressions = vec![dateadd_aliased];
3361 inner_select.from = Some(From {
3362 expressions: vec![flatten_aliased],
3363 });
3364
3365 let inner_select_expr = Expression::Select(Box::new(inner_select));
3366 let subquery = Expression::Subquery(Box::new(Subquery {
3367 this: inner_select_expr,
3368 alias: None,
3369 column_aliases: vec![],
3370 order_by: None,
3371 limit: None,
3372 offset: None,
3373 distribute_by: None,
3374 sort_by: None,
3375 cluster_by: None,
3376 lateral: false,
3377 modifiers_inside: false,
3378 trailing_comments: vec![],
3379 inferred_type: None,
3380 }));
3381
3382 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3383 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3384 Expression::Alias(Box::new(Alias {
3385 this: subquery,
3386 alias: Identifier::new(&alias_name),
3387 column_aliases: col_aliases,
3388 pre_alias_comments: vec![],
3389 trailing_comments: vec![],
3390 inferred_type: None,
3391 }))
3392 } else {
3393 subquery
3394 };
3395
3396 // Replace the FROM expression
3397 if let Some(ref mut from) = sel.from {
3398 from.expressions[from_idx] = replacement;
3399 }
3400
3401 Ok(Expression::Select(sel))
3402 }
3403
3404 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3405 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3406 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3407 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3408 use crate::expressions::*;
3409
3410 let start_expr = f.args[0].clone();
3411 let end_expr = f.args[1].clone();
3412 let step = f.args.get(2).cloned();
3413 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3414 let col_name = "value";
3415
3416 // Build the inner subquery: same as try_transform_from_gda_snowflake
3417 let datediff = Expression::Function(Box::new(Function::new(
3418 "DATEDIFF".to_string(),
3419 vec![
3420 Expression::Column(Column {
3421 name: Identifier::new(&unit_str),
3422 table: None,
3423 join_mark: false,
3424 trailing_comments: vec![],
3425 span: None,
3426 inferred_type: None,
3427 }),
3428 start_expr.clone(),
3429 end_expr.clone(),
3430 ],
3431 )));
3432 let plus_one = Expression::Add(Box::new(BinaryOp {
3433 left: datediff,
3434 right: Expression::Literal(Literal::Number("1".to_string())),
3435 left_comments: vec![],
3436 operator_comments: vec![],
3437 trailing_comments: vec![],
3438 inferred_type: None,
3439 }));
3440 let minus_one = Expression::Sub(Box::new(BinaryOp {
3441 left: plus_one,
3442 right: Expression::Literal(Literal::Number("1".to_string())),
3443 left_comments: vec![],
3444 operator_comments: vec![],
3445 trailing_comments: vec![],
3446 inferred_type: None,
3447 }));
3448 let paren_inner = Expression::Paren(Box::new(Paren {
3449 this: minus_one,
3450 trailing_comments: vec![],
3451 }));
3452 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3453 left: paren_inner,
3454 right: Expression::Literal(Literal::Number("1".to_string())),
3455 left_comments: vec![],
3456 operator_comments: vec![],
3457 trailing_comments: vec![],
3458 inferred_type: None,
3459 }));
3460
3461 let array_gen_range = Expression::Function(Box::new(Function::new(
3462 "ARRAY_GENERATE_RANGE".to_string(),
3463 vec![
3464 Expression::Literal(Literal::Number("0".to_string())),
3465 outer_plus_one,
3466 ],
3467 )));
3468
3469 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3470 name: Identifier::new("INPUT"),
3471 value: array_gen_range,
3472 separator: crate::expressions::NamedArgSeparator::DArrow,
3473 }));
3474 let flatten = Expression::Function(Box::new(Function::new(
3475 "FLATTEN".to_string(),
3476 vec![flatten_input],
3477 )));
3478
3479 let table_func =
3480 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3481 let flatten_aliased = Expression::Alias(Box::new(Alias {
3482 this: table_func,
3483 alias: Identifier::new("_t0"),
3484 column_aliases: vec![
3485 Identifier::new("seq"),
3486 Identifier::new("key"),
3487 Identifier::new("path"),
3488 Identifier::new("index"),
3489 Identifier::new(col_name),
3490 Identifier::new("this"),
3491 ],
3492 pre_alias_comments: vec![],
3493 trailing_comments: vec![],
3494 inferred_type: None,
3495 }));
3496
3497 let dateadd_expr = Expression::Function(Box::new(Function::new(
3498 "DATEADD".to_string(),
3499 vec![
3500 Expression::Column(Column {
3501 name: Identifier::new(&unit_str),
3502 table: None,
3503 join_mark: false,
3504 trailing_comments: vec![],
3505 span: None,
3506 inferred_type: None,
3507 }),
3508 Expression::Cast(Box::new(Cast {
3509 this: Expression::Column(Column {
3510 name: Identifier::new(col_name),
3511 table: None,
3512 join_mark: false,
3513 trailing_comments: vec![],
3514 span: None,
3515 inferred_type: None,
3516 }),
3517 to: DataType::Int {
3518 length: None,
3519 integer_spelling: false,
3520 },
3521 trailing_comments: vec![],
3522 double_colon_syntax: false,
3523 format: None,
3524 default: None,
3525 inferred_type: None,
3526 })),
3527 start_expr.clone(),
3528 ],
3529 )));
3530 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3531 this: dateadd_expr,
3532 alias: Identifier::new(col_name),
3533 column_aliases: vec![],
3534 pre_alias_comments: vec![],
3535 trailing_comments: vec![],
3536 inferred_type: None,
3537 }));
3538
3539 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3540 let mut inner_select = Select::new();
3541 inner_select.expressions = vec![dateadd_aliased];
3542 inner_select.from = Some(From {
3543 expressions: vec![flatten_aliased],
3544 });
3545
3546 // Wrap in subquery for the inner part
3547 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3548 this: Expression::Select(Box::new(inner_select)),
3549 alias: None,
3550 column_aliases: vec![],
3551 order_by: None,
3552 limit: None,
3553 offset: None,
3554 distribute_by: None,
3555 sort_by: None,
3556 cluster_by: None,
3557 lateral: false,
3558 modifiers_inside: false,
3559 trailing_comments: vec![],
3560 inferred_type: None,
3561 }));
3562
3563 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3564 let star = Expression::Star(Star {
3565 table: None,
3566 except: None,
3567 replace: None,
3568 rename: None,
3569 trailing_comments: vec![],
3570 span: None,
3571 });
3572 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3573 this: star,
3574 distinct: false,
3575 filter: None,
3576 order_by: vec![],
3577 name: Some("ARRAY_AGG".to_string()),
3578 ignore_nulls: None,
3579 having_max: None,
3580 limit: None,
3581 inferred_type: None,
3582 }));
3583
3584 let mut outer_select = Select::new();
3585 outer_select.expressions = vec![array_agg];
3586 outer_select.from = Some(From {
3587 expressions: vec![inner_subquery],
3588 });
3589
3590 // Wrap in a subquery
3591 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3592 this: Expression::Select(Box::new(outer_select)),
3593 alias: None,
3594 column_aliases: vec![],
3595 order_by: None,
3596 limit: None,
3597 offset: None,
3598 distribute_by: None,
3599 sort_by: None,
3600 cluster_by: None,
3601 lateral: false,
3602 modifiers_inside: false,
3603 trailing_comments: vec![],
3604 inferred_type: None,
3605 }));
3606
3607 // ARRAY_SIZE(subquery)
3608 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3609 outer_subquery,
3610 ))))
3611 }
3612
3613 /// Extract interval unit string from an optional step expression.
3614 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3615 use crate::expressions::*;
3616 if let Some(Expression::Interval(ref iv)) = step {
3617 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3618 return Some(format!("{:?}", unit).to_uppercase());
3619 }
3620 if let Some(ref this) = iv.this {
3621 if let Expression::Literal(Literal::String(ref s)) = this {
3622 let parts: Vec<&str> = s.split_whitespace().collect();
3623 if parts.len() == 2 {
3624 return Some(parts[1].to_uppercase());
3625 } else if parts.len() == 1 {
3626 let upper = parts[0].to_uppercase();
3627 if matches!(
3628 upper.as_str(),
3629 "YEAR"
3630 | "QUARTER"
3631 | "MONTH"
3632 | "WEEK"
3633 | "DAY"
3634 | "HOUR"
3635 | "MINUTE"
3636 | "SECOND"
3637 ) {
3638 return Some(upper);
3639 }
3640 }
3641 }
3642 }
3643 }
3644 // Default to DAY if no step or no interval
3645 if step.is_none() {
3646 return Some("DAY".to_string());
3647 }
3648 None
3649 }
3650
3651 fn normalize_snowflake_pretty(mut sql: String) -> String {
3652 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3653 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3654 {
3655 sql = sql.replace(
3656 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3657 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3658 );
3659
3660 sql = sql.replace(
3661 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3662 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3663 );
3664
3665 sql = sql.replace(
3666 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3667 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3668 );
3669 }
3670
3671 sql
3672 }
3673
3674 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3675 /// This handles cases where the same syntax has different semantics across dialects.
3676 fn cross_dialect_normalize(
3677 expr: Expression,
3678 source: DialectType,
3679 target: DialectType,
3680 ) -> Result<Expression> {
3681 use crate::expressions::{
3682 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3683 Function, Identifier, IsNull, Literal, Null, Paren,
3684 };
3685
3686 // Helper to tag which kind of transform to apply
3687 #[derive(Debug)]
3688 enum Action {
3689 None,
3690 GreatestLeastNull,
3691 ArrayGenerateRange,
3692 Div0TypedDivision,
3693 ArrayAggCollectList,
3694 ArrayAggWithinGroupFilter,
3695 ArrayAggFilter,
3696 CastTimestampToDatetime,
3697 DateTruncWrapCast,
3698 ToDateToCast,
3699 ConvertTimezoneToExpr,
3700 SetToVariable,
3701 RegexpReplaceSnowflakeToDuckDB,
3702 BigQueryFunctionNormalize,
3703 BigQuerySafeDivide,
3704 BigQueryCastType,
3705 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3706 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3707 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3708 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3709 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3710 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3711 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3712 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3713 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3714 EpochConvert, // Expression::Epoch -> target-specific epoch function
3715 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3716 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3717 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3718 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3719 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3720 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3721 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3722 TempTableHash, // TSQL #table -> temp table normalization
3723 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3724 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3725 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3726 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3727 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3728 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3729 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3730 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3731 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3732 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3733 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3734 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3735 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3736 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3737 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3738 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
3739 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3740 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3741 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3742 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3743 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3744 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3745 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3746 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3747 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3748 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3749 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3750 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3751 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3752 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3753 DollarParamConvert, // $foo -> @foo for BigQuery
3754 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3755 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3756 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3757 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3758 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3759 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3760 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3761 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3762 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3763 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3764 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3765 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3766 RespectNullsConvert, // RESPECT NULLS window function handling
3767 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3768 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3769 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3770 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3771 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3772 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3773 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3774 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3775 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3776 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3777 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3778 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3779 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3780 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3781 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3782 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3783 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3784 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3785 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3786 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3787 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3788 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3789 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3790 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3791 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3792 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3793 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3794 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3795 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3796 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3797 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3798 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3799 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3800 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3801 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3802 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3803 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3804 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3805 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3806 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3807 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3808 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3809 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3810 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3811 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3812 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3813 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3814 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3815 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3816 ArraySumConvert, // ARRAY_SUM -> target-specific
3817 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3818 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3819 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3820 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3821 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3822 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3823 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3824 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3825 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3826 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3827 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3828 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3829 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3830 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3831 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3832 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3833 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3834 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3835 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3836 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3837 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3838 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3839 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3840 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3841 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3842 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3843 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3844 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3845 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3846 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3847 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3848 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3849 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3850 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3851 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
3852 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
3853 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
3854 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
3855 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
3856 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
3857 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_MATCHES(a, anchored_pattern) for DuckDB
3858 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
3859 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
3860 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
3861 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
3862 }
3863
3864 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3865 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3866 Self::transform_select_into(expr, source, target)
3867 } else {
3868 expr
3869 };
3870
3871 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3872 let expr = if !matches!(
3873 target,
3874 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3875 ) {
3876 if let Expression::Select(mut select) = expr {
3877 if let Some(ref mut offset) = select.offset {
3878 offset.rows = None;
3879 }
3880 Expression::Select(select)
3881 } else {
3882 expr
3883 }
3884 } else {
3885 expr
3886 };
3887
3888 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3889 let expr = if matches!(target, DialectType::Oracle) {
3890 if let Expression::Select(mut select) = expr {
3891 if let Some(limit) = select.limit.take() {
3892 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3893 select.fetch = Some(crate::expressions::Fetch {
3894 direction: "FIRST".to_string(),
3895 count: Some(limit.this),
3896 percent: false,
3897 rows: true,
3898 with_ties: false,
3899 });
3900 }
3901 // Add ROWS to OFFSET if present
3902 if let Some(ref mut offset) = select.offset {
3903 offset.rows = Some(true);
3904 }
3905 Expression::Select(select)
3906 } else {
3907 expr
3908 }
3909 } else {
3910 expr
3911 };
3912
3913 // Handle CreateTable WITH properties transformation before recursive transforms
3914 let expr = if let Expression::CreateTable(mut ct) = expr {
3915 Self::transform_create_table_properties(&mut ct, source, target);
3916
3917 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
3918 // When the PARTITIONED BY clause contains column definitions, merge them into the
3919 // main column list and adjust the PARTITIONED BY clause for the target dialect.
3920 if matches!(
3921 source,
3922 DialectType::Hive | DialectType::Spark | DialectType::Databricks
3923 ) {
3924 let mut partition_col_names: Vec<String> = Vec::new();
3925 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
3926 let mut has_col_def_partitions = false;
3927
3928 // Check if any PARTITIONED BY property contains ColumnDef expressions
3929 for prop in &ct.properties {
3930 if let Expression::PartitionedByProperty(ref pbp) = prop {
3931 if let Expression::Tuple(ref tuple) = *pbp.this {
3932 for expr in &tuple.expressions {
3933 if let Expression::ColumnDef(ref cd) = expr {
3934 has_col_def_partitions = true;
3935 partition_col_names.push(cd.name.name.clone());
3936 partition_col_defs.push(*cd.clone());
3937 }
3938 }
3939 }
3940 }
3941 }
3942
3943 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
3944 // Merge partition columns into main column list
3945 for cd in partition_col_defs {
3946 ct.columns.push(cd);
3947 }
3948
3949 // Replace PARTITIONED BY property with column-name-only version
3950 ct.properties
3951 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
3952
3953 if matches!(
3954 target,
3955 DialectType::Presto | DialectType::Trino | DialectType::Athena
3956 ) {
3957 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
3958 let array_elements: Vec<String> = partition_col_names
3959 .iter()
3960 .map(|n| format!("'{}'", n))
3961 .collect();
3962 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
3963 ct.with_properties
3964 .push(("PARTITIONED_BY".to_string(), array_value));
3965 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
3966 // Spark: PARTITIONED BY (y, z) - just column names
3967 let name_exprs: Vec<Expression> = partition_col_names
3968 .iter()
3969 .map(|n| {
3970 Expression::Column(crate::expressions::Column {
3971 name: crate::expressions::Identifier::new(n.clone()),
3972 table: None,
3973 join_mark: false,
3974 trailing_comments: Vec::new(),
3975 span: None,
3976 inferred_type: None,
3977 })
3978 })
3979 .collect();
3980 ct.properties.insert(
3981 0,
3982 Expression::PartitionedByProperty(Box::new(
3983 crate::expressions::PartitionedByProperty {
3984 this: Box::new(Expression::Tuple(Box::new(
3985 crate::expressions::Tuple {
3986 expressions: name_exprs,
3987 },
3988 ))),
3989 },
3990 )),
3991 );
3992 }
3993 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
3994 }
3995
3996 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
3997 // are handled by transform_create_table_properties which runs first
3998 }
3999
4000 // Strip LOCATION property for Presto/Trino (not supported)
4001 if matches!(
4002 target,
4003 DialectType::Presto | DialectType::Trino | DialectType::Athena
4004 ) {
4005 ct.properties
4006 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4007 }
4008
4009 // Strip table-level constraints for Spark/Hive/Databricks
4010 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4011 if matches!(
4012 target,
4013 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4014 ) {
4015 ct.constraints.retain(|c| {
4016 matches!(
4017 c,
4018 crate::expressions::TableConstraint::PrimaryKey { .. }
4019 | crate::expressions::TableConstraint::Like { .. }
4020 )
4021 });
4022 for constraint in &mut ct.constraints {
4023 if let crate::expressions::TableConstraint::PrimaryKey {
4024 columns,
4025 modifiers,
4026 ..
4027 } = constraint
4028 {
4029 // Strip ASC/DESC from column names
4030 for col in columns.iter_mut() {
4031 if col.name.ends_with(" ASC") {
4032 col.name = col.name[..col.name.len() - 4].to_string();
4033 } else if col.name.ends_with(" DESC") {
4034 col.name = col.name[..col.name.len() - 5].to_string();
4035 }
4036 }
4037 // Strip TSQL-specific modifiers
4038 modifiers.clustered = None;
4039 modifiers.with_options.clear();
4040 modifiers.on_filegroup = None;
4041 }
4042 }
4043 }
4044
4045 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4046 if matches!(target, DialectType::Databricks) {
4047 for col in &mut ct.columns {
4048 if col.auto_increment {
4049 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4050 col.data_type = crate::expressions::DataType::BigInt { length: None };
4051 }
4052 }
4053 }
4054 }
4055
4056 // Spark/Databricks: INTEGER -> INT in column definitions
4057 // Python sqlglot always outputs INT for Spark/Databricks
4058 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4059 for col in &mut ct.columns {
4060 if let crate::expressions::DataType::Int {
4061 integer_spelling, ..
4062 } = &mut col.data_type
4063 {
4064 *integer_spelling = false;
4065 }
4066 }
4067 }
4068
4069 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4070 if matches!(target, DialectType::Hive | DialectType::Spark) {
4071 for col in &mut ct.columns {
4072 // If nullable is explicitly true (NULL), change to None (omit it)
4073 if col.nullable == Some(true) {
4074 col.nullable = None;
4075 }
4076 // Also remove from constraints if stored there
4077 col.constraints
4078 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4079 }
4080 }
4081
4082 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4083 if ct.on_property.is_some()
4084 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4085 {
4086 ct.on_property = None;
4087 }
4088
4089 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4090 // Snowflake doesn't support typed arrays in DDL
4091 if matches!(target, DialectType::Snowflake) {
4092 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4093 if let crate::expressions::DataType::Array { .. } = dt {
4094 *dt = crate::expressions::DataType::Custom {
4095 name: "ARRAY".to_string(),
4096 };
4097 }
4098 }
4099 for col in &mut ct.columns {
4100 strip_array_type_params(&mut col.data_type);
4101 }
4102 }
4103
4104 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4105 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4106 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4107 if matches!(target, DialectType::PostgreSQL) {
4108 for col in &mut ct.columns {
4109 if col.auto_increment && !col.constraint_order.is_empty() {
4110 use crate::expressions::ConstraintType;
4111 let has_explicit_not_null = col
4112 .constraint_order
4113 .iter()
4114 .any(|ct| *ct == ConstraintType::NotNull);
4115
4116 if has_explicit_not_null {
4117 // Source had explicit NOT NULL - preserve original order
4118 // Just ensure nullable is set
4119 if col.nullable != Some(false) {
4120 col.nullable = Some(false);
4121 }
4122 } else {
4123 // Source didn't have explicit NOT NULL - build order with
4124 // AutoIncrement + NotNull first, then remaining constraints
4125 let mut new_order = Vec::new();
4126 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4127 new_order.push(ConstraintType::AutoIncrement);
4128 new_order.push(ConstraintType::NotNull);
4129 // Add remaining constraints in original order (except AutoIncrement)
4130 for ct_type in &col.constraint_order {
4131 if *ct_type != ConstraintType::AutoIncrement {
4132 new_order.push(ct_type.clone());
4133 }
4134 }
4135 col.constraint_order = new_order;
4136 col.nullable = Some(false);
4137 }
4138 }
4139 }
4140 }
4141
4142 Expression::CreateTable(ct)
4143 } else {
4144 expr
4145 };
4146
4147 // Handle CreateView column stripping for Presto/Trino target
4148 let expr = if let Expression::CreateView(mut cv) = expr {
4149 // Presto/Trino: drop column list when view has a SELECT body
4150 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4151 {
4152 if !matches!(&cv.query, Expression::Null(_)) {
4153 cv.columns.clear();
4154 }
4155 }
4156 Expression::CreateView(cv)
4157 } else {
4158 expr
4159 };
4160
4161 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4162 let expr = if !matches!(
4163 target,
4164 DialectType::Presto | DialectType::Trino | DialectType::Athena
4165 ) {
4166 if let Expression::Select(mut select) = expr {
4167 if let Some(ref mut with) = select.with {
4168 for cte in &mut with.ctes {
4169 if let Expression::Values(ref vals) = cte.this {
4170 // Build: SELECT * FROM (VALUES ...) AS _values
4171 let values_subquery =
4172 Expression::Subquery(Box::new(crate::expressions::Subquery {
4173 this: Expression::Values(vals.clone()),
4174 alias: Some(Identifier::new("_values".to_string())),
4175 column_aliases: Vec::new(),
4176 order_by: None,
4177 limit: None,
4178 offset: None,
4179 distribute_by: None,
4180 sort_by: None,
4181 cluster_by: None,
4182 lateral: false,
4183 modifiers_inside: false,
4184 trailing_comments: Vec::new(),
4185 inferred_type: None,
4186 }));
4187 let mut new_select = crate::expressions::Select::new();
4188 new_select.expressions =
4189 vec![Expression::Star(crate::expressions::Star {
4190 table: None,
4191 except: None,
4192 replace: None,
4193 rename: None,
4194 trailing_comments: Vec::new(),
4195 span: None,
4196 })];
4197 new_select.from = Some(crate::expressions::From {
4198 expressions: vec![values_subquery],
4199 });
4200 cte.this = Expression::Select(Box::new(new_select));
4201 }
4202 }
4203 }
4204 Expression::Select(select)
4205 } else {
4206 expr
4207 }
4208 } else {
4209 expr
4210 };
4211
4212 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4213 let expr = if matches!(target, DialectType::PostgreSQL) {
4214 if let Expression::CreateIndex(mut ci) = expr {
4215 for col in &mut ci.columns {
4216 if col.nulls_first.is_none() {
4217 col.nulls_first = Some(true);
4218 }
4219 }
4220 Expression::CreateIndex(ci)
4221 } else {
4222 expr
4223 }
4224 } else {
4225 expr
4226 };
4227
4228 transform_recursive(expr, &|e| {
4229 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4230 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4231 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4232 if let Expression::Cast(ref c) = e {
4233 // Check if this is a CAST of an array to a struct array type
4234 let is_struct_array_cast =
4235 matches!(&c.to, crate::expressions::DataType::Array { .. });
4236 if is_struct_array_cast {
4237 let has_auto_named_structs = match &c.this {
4238 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4239 if let Expression::Struct(s) = elem {
4240 s.fields.iter().all(|(name, _)| {
4241 name.as_ref().map_or(true, |n| {
4242 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4243 })
4244 })
4245 } else {
4246 false
4247 }
4248 }),
4249 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4250 if let Expression::Struct(s) = elem {
4251 s.fields.iter().all(|(name, _)| {
4252 name.as_ref().map_or(true, |n| {
4253 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4254 })
4255 })
4256 } else {
4257 false
4258 }
4259 }),
4260 _ => false,
4261 };
4262 if has_auto_named_structs {
4263 let convert_struct_to_row = |elem: Expression| -> Expression {
4264 if let Expression::Struct(s) = elem {
4265 let row_args: Vec<Expression> =
4266 s.fields.into_iter().map(|(_, v)| v).collect();
4267 Expression::Function(Box::new(Function::new(
4268 "ROW".to_string(),
4269 row_args,
4270 )))
4271 } else {
4272 elem
4273 }
4274 };
4275 let mut c_clone = c.as_ref().clone();
4276 match &mut c_clone.this {
4277 Expression::Array(arr) => {
4278 arr.expressions = arr
4279 .expressions
4280 .drain(..)
4281 .map(convert_struct_to_row)
4282 .collect();
4283 }
4284 Expression::ArrayFunc(arr) => {
4285 arr.expressions = arr
4286 .expressions
4287 .drain(..)
4288 .map(convert_struct_to_row)
4289 .collect();
4290 }
4291 _ => {}
4292 }
4293 return Ok(Expression::Cast(Box::new(c_clone)));
4294 }
4295 }
4296 }
4297 }
4298
4299 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4300 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4301 if let Expression::Select(ref sel) = e {
4302 if sel.kind.as_deref() == Some("STRUCT") {
4303 let mut fields = Vec::new();
4304 for expr in &sel.expressions {
4305 match expr {
4306 Expression::Alias(a) => {
4307 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4308 }
4309 Expression::Column(c) => {
4310 fields.push((Some(c.name.name.clone()), expr.clone()));
4311 }
4312 _ => {
4313 fields.push((None, expr.clone()));
4314 }
4315 }
4316 }
4317 let struct_lit =
4318 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4319 let mut new_select = sel.as_ref().clone();
4320 new_select.kind = None;
4321 new_select.expressions = vec![struct_lit];
4322 return Ok(Expression::Select(Box::new(new_select)));
4323 }
4324 }
4325 }
4326
4327 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4328 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4329 && matches!(
4330 target,
4331 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4332 )
4333 {
4334 if let Expression::Parameter(ref p) = e {
4335 if p.style == crate::expressions::ParameterStyle::At {
4336 if let Some(ref name) = p.name {
4337 return Ok(Expression::Parameter(Box::new(
4338 crate::expressions::Parameter {
4339 name: Some(name.clone()),
4340 index: p.index,
4341 style: crate::expressions::ParameterStyle::DollarBrace,
4342 quoted: p.quoted,
4343 string_quoted: p.string_quoted,
4344 expression: None,
4345 },
4346 )));
4347 }
4348 }
4349 }
4350 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4351 if let Expression::Column(ref col) = e {
4352 if col.name.name.starts_with('@') && col.table.is_none() {
4353 let var_name = col.name.name.trim_start_matches('@').to_string();
4354 return Ok(Expression::Parameter(Box::new(
4355 crate::expressions::Parameter {
4356 name: Some(var_name),
4357 index: None,
4358 style: crate::expressions::ParameterStyle::DollarBrace,
4359 quoted: false,
4360 string_quoted: false,
4361 expression: None,
4362 },
4363 )));
4364 }
4365 }
4366 }
4367
4368 // Convert @variable -> variable in SET statements for Spark/Databricks
4369 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4370 && matches!(target, DialectType::Spark | DialectType::Databricks)
4371 {
4372 if let Expression::SetStatement(ref s) = e {
4373 let mut new_items = s.items.clone();
4374 let mut changed = false;
4375 for item in &mut new_items {
4376 // Strip @ from the SET name (Parameter style)
4377 if let Expression::Parameter(ref p) = item.name {
4378 if p.style == crate::expressions::ParameterStyle::At {
4379 if let Some(ref name) = p.name {
4380 item.name = Expression::Identifier(Identifier::new(name));
4381 changed = true;
4382 }
4383 }
4384 }
4385 // Strip @ from the SET name (Identifier style - SET parser)
4386 if let Expression::Identifier(ref id) = item.name {
4387 if id.name.starts_with('@') {
4388 let var_name = id.name.trim_start_matches('@').to_string();
4389 item.name = Expression::Identifier(Identifier::new(&var_name));
4390 changed = true;
4391 }
4392 }
4393 // Strip @ from the SET name (Column style - alternative parsing)
4394 if let Expression::Column(ref col) = item.name {
4395 if col.name.name.starts_with('@') && col.table.is_none() {
4396 let var_name = col.name.name.trim_start_matches('@').to_string();
4397 item.name = Expression::Identifier(Identifier::new(&var_name));
4398 changed = true;
4399 }
4400 }
4401 }
4402 if changed {
4403 let mut new_set = (**s).clone();
4404 new_set.items = new_items;
4405 return Ok(Expression::SetStatement(Box::new(new_set)));
4406 }
4407 }
4408 }
4409
4410 // Strip NOLOCK hint for non-TSQL targets
4411 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4412 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4413 {
4414 if let Expression::Table(ref tr) = e {
4415 if !tr.hints.is_empty() {
4416 let mut new_tr = tr.clone();
4417 new_tr.hints.clear();
4418 return Ok(Expression::Table(new_tr));
4419 }
4420 }
4421 }
4422
4423 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4424 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4425 if matches!(target, DialectType::Snowflake) {
4426 if let Expression::IsTrue(ref itf) = e {
4427 if let Expression::Boolean(ref b) = itf.this {
4428 if !itf.not {
4429 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4430 value: b.value,
4431 }));
4432 } else {
4433 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4434 value: !b.value,
4435 }));
4436 }
4437 }
4438 }
4439 if let Expression::IsFalse(ref itf) = e {
4440 if let Expression::Boolean(ref b) = itf.this {
4441 if !itf.not {
4442 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4443 value: !b.value,
4444 }));
4445 } else {
4446 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4447 value: b.value,
4448 }));
4449 }
4450 }
4451 }
4452 }
4453
4454 // BigQuery: split dotted backtick identifiers in table names
4455 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4456 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4457 if let Expression::CreateTable(ref ct) = e {
4458 let mut changed = false;
4459 let mut new_ct = ct.clone();
4460 // Split the table name
4461 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4462 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4463 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4464 let was_quoted = ct.name.name.quoted;
4465 let mk_id = |s: &str| {
4466 if was_quoted {
4467 Identifier::quoted(s)
4468 } else {
4469 Identifier::new(s)
4470 }
4471 };
4472 if parts.len() == 3 {
4473 new_ct.name.catalog = Some(mk_id(parts[0]));
4474 new_ct.name.schema = Some(mk_id(parts[1]));
4475 new_ct.name.name = mk_id(parts[2]);
4476 changed = true;
4477 } else if parts.len() == 2 {
4478 new_ct.name.schema = Some(mk_id(parts[0]));
4479 new_ct.name.name = mk_id(parts[1]);
4480 changed = true;
4481 }
4482 }
4483 // Split the clone source name
4484 if let Some(ref clone_src) = ct.clone_source {
4485 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4486 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4487 let was_quoted = clone_src.name.quoted;
4488 let mk_id = |s: &str| {
4489 if was_quoted {
4490 Identifier::quoted(s)
4491 } else {
4492 Identifier::new(s)
4493 }
4494 };
4495 let mut new_src = clone_src.clone();
4496 if parts.len() == 3 {
4497 new_src.catalog = Some(mk_id(parts[0]));
4498 new_src.schema = Some(mk_id(parts[1]));
4499 new_src.name = mk_id(parts[2]);
4500 new_ct.clone_source = Some(new_src);
4501 changed = true;
4502 } else if parts.len() == 2 {
4503 new_src.schema = Some(mk_id(parts[0]));
4504 new_src.name = mk_id(parts[1]);
4505 new_ct.clone_source = Some(new_src);
4506 changed = true;
4507 }
4508 }
4509 }
4510 if changed {
4511 return Ok(Expression::CreateTable(new_ct));
4512 }
4513 }
4514 }
4515
4516 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4517 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4518 if matches!(source, DialectType::BigQuery)
4519 && matches!(
4520 target,
4521 DialectType::DuckDB
4522 | DialectType::Presto
4523 | DialectType::Trino
4524 | DialectType::Athena
4525 )
4526 {
4527 if let Expression::Subscript(ref sub) = e {
4528 let (new_index, is_safe) = match &sub.index {
4529 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4530 Expression::Literal(Literal::Number(n)) => {
4531 if let Ok(val) = n.parse::<i64>() {
4532 (
4533 Some(Expression::Literal(Literal::Number(
4534 (val + 1).to_string(),
4535 ))),
4536 false,
4537 )
4538 } else {
4539 (None, false)
4540 }
4541 }
4542 // OFFSET(n) -> n+1 (0-based)
4543 Expression::Function(ref f)
4544 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4545 {
4546 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4547 if let Ok(val) = n.parse::<i64>() {
4548 (
4549 Some(Expression::Literal(Literal::Number(
4550 (val + 1).to_string(),
4551 ))),
4552 false,
4553 )
4554 } else {
4555 (
4556 Some(Expression::Add(Box::new(
4557 crate::expressions::BinaryOp::new(
4558 f.args[0].clone(),
4559 Expression::number(1),
4560 ),
4561 ))),
4562 false,
4563 )
4564 }
4565 } else {
4566 (
4567 Some(Expression::Add(Box::new(
4568 crate::expressions::BinaryOp::new(
4569 f.args[0].clone(),
4570 Expression::number(1),
4571 ),
4572 ))),
4573 false,
4574 )
4575 }
4576 }
4577 // ORDINAL(n) -> n (already 1-based)
4578 Expression::Function(ref f)
4579 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4580 {
4581 (Some(f.args[0].clone()), false)
4582 }
4583 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4584 Expression::Function(ref f)
4585 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4586 {
4587 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4588 if let Ok(val) = n.parse::<i64>() {
4589 (
4590 Some(Expression::Literal(Literal::Number(
4591 (val + 1).to_string(),
4592 ))),
4593 true,
4594 )
4595 } else {
4596 (
4597 Some(Expression::Add(Box::new(
4598 crate::expressions::BinaryOp::new(
4599 f.args[0].clone(),
4600 Expression::number(1),
4601 ),
4602 ))),
4603 true,
4604 )
4605 }
4606 } else {
4607 (
4608 Some(Expression::Add(Box::new(
4609 crate::expressions::BinaryOp::new(
4610 f.args[0].clone(),
4611 Expression::number(1),
4612 ),
4613 ))),
4614 true,
4615 )
4616 }
4617 }
4618 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4619 Expression::Function(ref f)
4620 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4621 {
4622 (Some(f.args[0].clone()), true)
4623 }
4624 _ => (None, false),
4625 };
4626 if let Some(idx) = new_index {
4627 if is_safe
4628 && matches!(
4629 target,
4630 DialectType::Presto | DialectType::Trino | DialectType::Athena
4631 )
4632 {
4633 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4634 return Ok(Expression::Function(Box::new(Function::new(
4635 "ELEMENT_AT".to_string(),
4636 vec![sub.this.clone(), idx],
4637 ))));
4638 } else {
4639 // DuckDB or non-safe: just use subscript with converted index
4640 return Ok(Expression::Subscript(Box::new(
4641 crate::expressions::Subscript {
4642 this: sub.this.clone(),
4643 index: idx,
4644 },
4645 )));
4646 }
4647 }
4648 }
4649 }
4650
4651 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4652 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4653 if let Expression::Length(ref uf) = e {
4654 let arg = uf.this.clone();
4655 let typeof_func = Expression::Function(Box::new(Function::new(
4656 "TYPEOF".to_string(),
4657 vec![arg.clone()],
4658 )));
4659 let blob_cast = Expression::Cast(Box::new(Cast {
4660 this: arg.clone(),
4661 to: DataType::VarBinary { length: None },
4662 trailing_comments: vec![],
4663 double_colon_syntax: false,
4664 format: None,
4665 default: None,
4666 inferred_type: None,
4667 }));
4668 let octet_length = Expression::Function(Box::new(Function::new(
4669 "OCTET_LENGTH".to_string(),
4670 vec![blob_cast],
4671 )));
4672 let text_cast = Expression::Cast(Box::new(Cast {
4673 this: arg,
4674 to: DataType::Text,
4675 trailing_comments: vec![],
4676 double_colon_syntax: false,
4677 format: None,
4678 default: None,
4679 inferred_type: None,
4680 }));
4681 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4682 this: text_cast,
4683 original_name: None,
4684 inferred_type: None,
4685 }));
4686 return Ok(Expression::Case(Box::new(Case {
4687 operand: Some(typeof_func),
4688 whens: vec![(
4689 Expression::Literal(Literal::String("BLOB".to_string())),
4690 octet_length,
4691 )],
4692 else_: Some(length_text),
4693 comments: Vec::new(),
4694 inferred_type: None,
4695 })));
4696 }
4697 }
4698
4699 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4700 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4701 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4702 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4703 if let Expression::Alias(ref a) = e {
4704 if matches!(&a.this, Expression::Unnest(_)) {
4705 if a.column_aliases.is_empty() {
4706 // Drop the entire alias, return just the UNNEST expression
4707 return Ok(a.this.clone());
4708 } else {
4709 // Use first column alias as the main alias
4710 let mut new_alias = a.as_ref().clone();
4711 new_alias.alias = a.column_aliases[0].clone();
4712 new_alias.column_aliases.clear();
4713 return Ok(Expression::Alias(Box::new(new_alias)));
4714 }
4715 }
4716 }
4717 }
4718
4719 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4720 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4721 if let Expression::In(ref in_expr) = e {
4722 if let Some(ref unnest_inner) = in_expr.unnest {
4723 // Build the function call for the target dialect
4724 let func_expr = if matches!(
4725 target,
4726 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4727 ) {
4728 // Use EXPLODE for Hive/Spark
4729 Expression::Function(Box::new(Function::new(
4730 "EXPLODE".to_string(),
4731 vec![*unnest_inner.clone()],
4732 )))
4733 } else {
4734 // Use UNNEST for Presto/Trino/DuckDB/etc.
4735 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4736 this: *unnest_inner.clone(),
4737 expressions: Vec::new(),
4738 with_ordinality: false,
4739 alias: None,
4740 offset_alias: None,
4741 }))
4742 };
4743
4744 // Wrap in SELECT
4745 let mut inner_select = crate::expressions::Select::new();
4746 inner_select.expressions = vec![func_expr];
4747
4748 let subquery_expr = Expression::Select(Box::new(inner_select));
4749
4750 return Ok(Expression::In(Box::new(crate::expressions::In {
4751 this: in_expr.this.clone(),
4752 expressions: Vec::new(),
4753 query: Some(subquery_expr),
4754 not: in_expr.not,
4755 global: in_expr.global,
4756 unnest: None,
4757 is_field: false,
4758 })));
4759 }
4760 }
4761 }
4762
4763 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4764 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4765 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4766 if let Expression::Alias(ref a) = e {
4767 if let Expression::Function(ref f) = a.this {
4768 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4769 && !a.column_aliases.is_empty()
4770 {
4771 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4772 let col_alias = a.column_aliases[0].clone();
4773 let mut inner_select = crate::expressions::Select::new();
4774 inner_select.expressions =
4775 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4776 Expression::Identifier(Identifier::new("value".to_string())),
4777 col_alias,
4778 )))];
4779 inner_select.from = Some(crate::expressions::From {
4780 expressions: vec![a.this.clone()],
4781 });
4782 let subquery =
4783 Expression::Subquery(Box::new(crate::expressions::Subquery {
4784 this: Expression::Select(Box::new(inner_select)),
4785 alias: Some(a.alias.clone()),
4786 column_aliases: Vec::new(),
4787 order_by: None,
4788 limit: None,
4789 offset: None,
4790 lateral: false,
4791 modifiers_inside: false,
4792 trailing_comments: Vec::new(),
4793 distribute_by: None,
4794 sort_by: None,
4795 cluster_by: None,
4796 inferred_type: None,
4797 }));
4798 return Ok(subquery);
4799 }
4800 }
4801 }
4802 }
4803
4804 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4805 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4806 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4807 if matches!(source, DialectType::BigQuery) {
4808 if let Expression::Select(ref s) = e {
4809 if let Some(ref from) = s.from {
4810 if from.expressions.len() >= 2 {
4811 // Collect table names from first expression
4812 let first_tables: Vec<String> = from
4813 .expressions
4814 .iter()
4815 .take(1)
4816 .filter_map(|expr| {
4817 if let Expression::Table(t) = expr {
4818 Some(t.name.name.to_lowercase())
4819 } else {
4820 None
4821 }
4822 })
4823 .collect();
4824
4825 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4826 // or have a dotted name matching a table
4827 let mut needs_rewrite = false;
4828 for expr in from.expressions.iter().skip(1) {
4829 if let Expression::Table(t) = expr {
4830 if let Some(ref schema) = t.schema {
4831 if first_tables.contains(&schema.name.to_lowercase()) {
4832 needs_rewrite = true;
4833 break;
4834 }
4835 }
4836 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4837 if t.schema.is_none() && t.name.name.contains('.') {
4838 let parts: Vec<&str> = t.name.name.split('.').collect();
4839 if parts.len() >= 2
4840 && first_tables.contains(&parts[0].to_lowercase())
4841 {
4842 needs_rewrite = true;
4843 break;
4844 }
4845 }
4846 }
4847 }
4848
4849 if needs_rewrite {
4850 let mut new_select = s.clone();
4851 let mut new_from_exprs = vec![from.expressions[0].clone()];
4852 let mut new_joins = s.joins.clone();
4853
4854 for expr in from.expressions.iter().skip(1) {
4855 if let Expression::Table(ref t) = expr {
4856 if let Some(ref schema) = t.schema {
4857 if first_tables.contains(&schema.name.to_lowercase()) {
4858 // This is an array path reference, convert to CROSS JOIN UNNEST
4859 let col_expr = Expression::Column(
4860 crate::expressions::Column {
4861 name: t.name.clone(),
4862 table: Some(schema.clone()),
4863 join_mark: false,
4864 trailing_comments: vec![],
4865 span: None,
4866 inferred_type: None,
4867 },
4868 );
4869 let unnest_expr = Expression::Unnest(Box::new(
4870 crate::expressions::UnnestFunc {
4871 this: col_expr,
4872 expressions: Vec::new(),
4873 with_ordinality: false,
4874 alias: None,
4875 offset_alias: None,
4876 },
4877 ));
4878 let join_this = if let Some(ref alias) = t.alias {
4879 if matches!(
4880 target,
4881 DialectType::Presto
4882 | DialectType::Trino
4883 | DialectType::Athena
4884 ) {
4885 // Presto: UNNEST(x) AS _t0(results)
4886 Expression::Alias(Box::new(
4887 crate::expressions::Alias {
4888 this: unnest_expr,
4889 alias: Identifier::new("_t0"),
4890 column_aliases: vec![alias.clone()],
4891 pre_alias_comments: vec![],
4892 trailing_comments: vec![],
4893 inferred_type: None,
4894 },
4895 ))
4896 } else {
4897 // BigQuery: UNNEST(x) AS results
4898 Expression::Alias(Box::new(
4899 crate::expressions::Alias {
4900 this: unnest_expr,
4901 alias: alias.clone(),
4902 column_aliases: vec![],
4903 pre_alias_comments: vec![],
4904 trailing_comments: vec![],
4905 inferred_type: None,
4906 },
4907 ))
4908 }
4909 } else {
4910 unnest_expr
4911 };
4912 new_joins.push(crate::expressions::Join {
4913 kind: crate::expressions::JoinKind::Cross,
4914 this: join_this,
4915 on: None,
4916 using: Vec::new(),
4917 use_inner_keyword: false,
4918 use_outer_keyword: false,
4919 deferred_condition: false,
4920 join_hint: None,
4921 match_condition: None,
4922 pivots: Vec::new(),
4923 comments: Vec::new(),
4924 nesting_group: 0,
4925 directed: false,
4926 });
4927 } else {
4928 new_from_exprs.push(expr.clone());
4929 }
4930 } else if t.schema.is_none() && t.name.name.contains('.') {
4931 // Dotted name in quoted identifier: `Coordinates.position`
4932 let parts: Vec<&str> = t.name.name.split('.').collect();
4933 if parts.len() >= 2
4934 && first_tables.contains(&parts[0].to_lowercase())
4935 {
4936 let join_this =
4937 if matches!(target, DialectType::BigQuery) {
4938 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
4939 Expression::Table(t.clone())
4940 } else {
4941 // Other targets: split into "schema"."name"
4942 let mut new_t = t.clone();
4943 new_t.schema =
4944 Some(Identifier::quoted(parts[0]));
4945 new_t.name = Identifier::quoted(parts[1]);
4946 Expression::Table(new_t)
4947 };
4948 new_joins.push(crate::expressions::Join {
4949 kind: crate::expressions::JoinKind::Cross,
4950 this: join_this,
4951 on: None,
4952 using: Vec::new(),
4953 use_inner_keyword: false,
4954 use_outer_keyword: false,
4955 deferred_condition: false,
4956 join_hint: None,
4957 match_condition: None,
4958 pivots: Vec::new(),
4959 comments: Vec::new(),
4960 nesting_group: 0,
4961 directed: false,
4962 });
4963 } else {
4964 new_from_exprs.push(expr.clone());
4965 }
4966 } else {
4967 new_from_exprs.push(expr.clone());
4968 }
4969 } else {
4970 new_from_exprs.push(expr.clone());
4971 }
4972 }
4973
4974 new_select.from = Some(crate::expressions::From {
4975 expressions: new_from_exprs,
4976 ..from.clone()
4977 });
4978 new_select.joins = new_joins;
4979 return Ok(Expression::Select(new_select));
4980 }
4981 }
4982 }
4983 }
4984 }
4985
4986 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
4987 if matches!(
4988 target,
4989 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4990 ) {
4991 if let Expression::Select(ref s) = e {
4992 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
4993 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
4994 matches!(expr, Expression::Unnest(_))
4995 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
4996 };
4997 let has_unnest_join = s.joins.iter().any(|j| {
4998 j.kind == crate::expressions::JoinKind::Cross && (
4999 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5000 || is_unnest_or_explode_expr(&j.this)
5001 )
5002 });
5003 if has_unnest_join {
5004 let mut select = s.clone();
5005 let mut new_joins = Vec::new();
5006 for join in select.joins.drain(..) {
5007 if join.kind == crate::expressions::JoinKind::Cross {
5008 // Extract the UNNEST/EXPLODE from the join
5009 let (func_expr, table_alias, col_aliases) = match &join.this {
5010 Expression::Alias(a) => {
5011 let ta = if a.alias.is_empty() {
5012 None
5013 } else {
5014 Some(a.alias.clone())
5015 };
5016 let cas = a.column_aliases.clone();
5017 match &a.this {
5018 Expression::Unnest(u) => {
5019 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5020 if !u.expressions.is_empty() {
5021 let mut all_args = vec![u.this.clone()];
5022 all_args.extend(u.expressions.clone());
5023 let arrays_zip =
5024 Expression::Function(Box::new(
5025 crate::expressions::Function::new(
5026 "ARRAYS_ZIP".to_string(),
5027 all_args,
5028 ),
5029 ));
5030 let inline = Expression::Function(Box::new(
5031 crate::expressions::Function::new(
5032 "INLINE".to_string(),
5033 vec![arrays_zip],
5034 ),
5035 ));
5036 (Some(inline), ta, a.column_aliases.clone())
5037 } else {
5038 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5039 let func_name = if u.with_ordinality {
5040 "POSEXPLODE"
5041 } else {
5042 "EXPLODE"
5043 };
5044 let explode = Expression::Function(Box::new(
5045 crate::expressions::Function::new(
5046 func_name.to_string(),
5047 vec![u.this.clone()],
5048 ),
5049 ));
5050 // For POSEXPLODE, add 'pos' to column aliases
5051 let cas = if u.with_ordinality {
5052 let mut pos_aliases =
5053 vec![Identifier::new(
5054 "pos".to_string(),
5055 )];
5056 pos_aliases
5057 .extend(a.column_aliases.clone());
5058 pos_aliases
5059 } else {
5060 a.column_aliases.clone()
5061 };
5062 (Some(explode), ta, cas)
5063 }
5064 }
5065 Expression::Function(f)
5066 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5067 {
5068 (Some(Expression::Function(f.clone())), ta, cas)
5069 }
5070 _ => (None, None, Vec::new()),
5071 }
5072 }
5073 Expression::Unnest(u) => {
5074 let func_name = if u.with_ordinality {
5075 "POSEXPLODE"
5076 } else {
5077 "EXPLODE"
5078 };
5079 let explode = Expression::Function(Box::new(
5080 crate::expressions::Function::new(
5081 func_name.to_string(),
5082 vec![u.this.clone()],
5083 ),
5084 ));
5085 let ta = u.alias.clone();
5086 let col_aliases = if u.with_ordinality {
5087 vec![Identifier::new("pos".to_string())]
5088 } else {
5089 Vec::new()
5090 };
5091 (Some(explode), ta, col_aliases)
5092 }
5093 _ => (None, None, Vec::new()),
5094 };
5095 if let Some(func) = func_expr {
5096 select.lateral_views.push(crate::expressions::LateralView {
5097 this: func,
5098 table_alias,
5099 column_aliases: col_aliases,
5100 outer: false,
5101 });
5102 } else {
5103 new_joins.push(join);
5104 }
5105 } else {
5106 new_joins.push(join);
5107 }
5108 }
5109 select.joins = new_joins;
5110 return Ok(Expression::Select(select));
5111 }
5112 }
5113 }
5114
5115 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5116 // for BigQuery, Presto/Trino, Snowflake
5117 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5118 && matches!(
5119 target,
5120 DialectType::BigQuery
5121 | DialectType::Presto
5122 | DialectType::Trino
5123 | DialectType::Snowflake
5124 )
5125 {
5126 if let Expression::Select(ref s) = e {
5127 // Check if any SELECT expressions contain UNNEST
5128 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5129 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5130 fn contains_unnest(e: &Expression) -> bool {
5131 match e {
5132 Expression::Unnest(_) => true,
5133 Expression::Function(f)
5134 if f.name.eq_ignore_ascii_case("UNNEST") =>
5135 {
5136 true
5137 }
5138 Expression::Alias(a) => contains_unnest(&a.this),
5139 Expression::Add(op)
5140 | Expression::Sub(op)
5141 | Expression::Mul(op)
5142 | Expression::Div(op) => {
5143 contains_unnest(&op.left) || contains_unnest(&op.right)
5144 }
5145 _ => false,
5146 }
5147 }
5148 contains_unnest(expr)
5149 });
5150
5151 if has_unnest_in_select {
5152 let rewritten = Self::rewrite_unnest_expansion(s, target);
5153 if let Some(new_select) = rewritten {
5154 return Ok(Expression::Select(Box::new(new_select)));
5155 }
5156 }
5157 }
5158 }
5159
5160 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5161 // BigQuery '\n' -> PostgreSQL literal newline in string
5162 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5163 {
5164 if let Expression::Literal(Literal::String(ref s)) = e {
5165 if s.contains("\\n")
5166 || s.contains("\\t")
5167 || s.contains("\\r")
5168 || s.contains("\\\\")
5169 {
5170 let converted = s
5171 .replace("\\n", "\n")
5172 .replace("\\t", "\t")
5173 .replace("\\r", "\r")
5174 .replace("\\\\", "\\");
5175 return Ok(Expression::Literal(Literal::String(converted)));
5176 }
5177 }
5178 }
5179
5180 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5181 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5182 if source != target {
5183 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
5184 let s = s.clone();
5185 // MySQL: TIMESTAMP handling depends on source dialect
5186 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5187 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5188 if matches!(target, DialectType::MySQL) {
5189 if matches!(source, DialectType::BigQuery) {
5190 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5191 return Ok(Expression::Function(Box::new(Function::new(
5192 "TIMESTAMP".to_string(),
5193 vec![Expression::Literal(Literal::String(s))],
5194 ))));
5195 } else {
5196 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5197 return Ok(Expression::Cast(Box::new(Cast {
5198 this: Expression::Literal(Literal::String(s)),
5199 to: DataType::Custom {
5200 name: "DATETIME".to_string(),
5201 },
5202 trailing_comments: Vec::new(),
5203 double_colon_syntax: false,
5204 format: None,
5205 default: None,
5206 inferred_type: None,
5207 })));
5208 }
5209 }
5210 let dt = match target {
5211 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5212 name: "DATETIME".to_string(),
5213 },
5214 DialectType::Snowflake => {
5215 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5216 if matches!(source, DialectType::BigQuery) {
5217 DataType::Custom {
5218 name: "TIMESTAMPTZ".to_string(),
5219 }
5220 } else if matches!(
5221 source,
5222 DialectType::PostgreSQL
5223 | DialectType::Redshift
5224 | DialectType::Snowflake
5225 ) {
5226 DataType::Timestamp {
5227 precision: None,
5228 timezone: false,
5229 }
5230 } else {
5231 DataType::Custom {
5232 name: "TIMESTAMPNTZ".to_string(),
5233 }
5234 }
5235 }
5236 DialectType::Spark | DialectType::Databricks => {
5237 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5238 if matches!(source, DialectType::BigQuery) {
5239 DataType::Timestamp {
5240 precision: None,
5241 timezone: false,
5242 }
5243 } else {
5244 DataType::Custom {
5245 name: "TIMESTAMP_NTZ".to_string(),
5246 }
5247 }
5248 }
5249 DialectType::ClickHouse => DataType::Nullable {
5250 inner: Box::new(DataType::Custom {
5251 name: "DateTime".to_string(),
5252 }),
5253 },
5254 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5255 name: "DATETIME2".to_string(),
5256 },
5257 DialectType::DuckDB => {
5258 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5259 // or when the timestamp string explicitly has timezone info
5260 if matches!(source, DialectType::BigQuery)
5261 || Self::timestamp_string_has_timezone(&s)
5262 {
5263 DataType::Custom {
5264 name: "TIMESTAMPTZ".to_string(),
5265 }
5266 } else {
5267 DataType::Timestamp {
5268 precision: None,
5269 timezone: false,
5270 }
5271 }
5272 }
5273 _ => DataType::Timestamp {
5274 precision: None,
5275 timezone: false,
5276 },
5277 };
5278 return Ok(Expression::Cast(Box::new(Cast {
5279 this: Expression::Literal(Literal::String(s)),
5280 to: dt,
5281 trailing_comments: vec![],
5282 double_colon_syntax: false,
5283 format: None,
5284 default: None,
5285 inferred_type: None,
5286 })));
5287 }
5288 }
5289
5290 // PostgreSQL DELETE requires explicit AS for table aliases
5291 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5292 if let Expression::Delete(ref del) = e {
5293 if del.alias.is_some() && !del.alias_explicit_as {
5294 let mut new_del = del.clone();
5295 new_del.alias_explicit_as = true;
5296 return Ok(Expression::Delete(new_del));
5297 }
5298 }
5299 }
5300
5301 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5302 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5303 // while others don't support it (Presto, Spark, DuckDB, etc.)
5304 {
5305 let needs_distinct =
5306 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5307 let drop_distinct = matches!(
5308 target,
5309 DialectType::Presto
5310 | DialectType::Trino
5311 | DialectType::Athena
5312 | DialectType::Spark
5313 | DialectType::Databricks
5314 | DialectType::DuckDB
5315 | DialectType::Hive
5316 | DialectType::MySQL
5317 | DialectType::PostgreSQL
5318 | DialectType::SQLite
5319 | DialectType::TSQL
5320 | DialectType::Redshift
5321 | DialectType::Snowflake
5322 | DialectType::Oracle
5323 | DialectType::Teradata
5324 | DialectType::Drill
5325 | DialectType::Doris
5326 | DialectType::StarRocks
5327 );
5328 match &e {
5329 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5330 let mut new_u = (**u).clone();
5331 new_u.distinct = true;
5332 return Ok(Expression::Union(Box::new(new_u)));
5333 }
5334 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5335 let mut new_i = (**i).clone();
5336 new_i.distinct = true;
5337 return Ok(Expression::Intersect(Box::new(new_i)));
5338 }
5339 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5340 let mut new_ex = (**ex).clone();
5341 new_ex.distinct = true;
5342 return Ok(Expression::Except(Box::new(new_ex)));
5343 }
5344 Expression::Union(u) if u.distinct && drop_distinct => {
5345 let mut new_u = (**u).clone();
5346 new_u.distinct = false;
5347 return Ok(Expression::Union(Box::new(new_u)));
5348 }
5349 Expression::Intersect(i) if i.distinct && drop_distinct => {
5350 let mut new_i = (**i).clone();
5351 new_i.distinct = false;
5352 return Ok(Expression::Intersect(Box::new(new_i)));
5353 }
5354 Expression::Except(ex) if ex.distinct && drop_distinct => {
5355 let mut new_ex = (**ex).clone();
5356 new_ex.distinct = false;
5357 return Ok(Expression::Except(Box::new(new_ex)));
5358 }
5359 _ => {}
5360 }
5361 }
5362
5363 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5364 if matches!(target, DialectType::ClickHouse) {
5365 if let Expression::Function(ref f) = e {
5366 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5367 let mut new_f = f.as_ref().clone();
5368 new_f.name = "map".to_string();
5369 return Ok(Expression::Function(Box::new(new_f)));
5370 }
5371 }
5372 }
5373
5374 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5375 if matches!(target, DialectType::ClickHouse) {
5376 if let Expression::Intersect(ref i) = e {
5377 if i.all {
5378 let mut new_i = (**i).clone();
5379 new_i.all = false;
5380 return Ok(Expression::Intersect(Box::new(new_i)));
5381 }
5382 }
5383 }
5384
5385 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5386 // Only from Generic source, to prevent double-wrapping
5387 if matches!(source, DialectType::Generic) {
5388 if let Expression::Div(ref op) = e {
5389 let cast_type = match target {
5390 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5391 precision: None,
5392 scale: None,
5393 real_spelling: false,
5394 }),
5395 DialectType::Drill
5396 | DialectType::Trino
5397 | DialectType::Athena
5398 | DialectType::Presto => Some(DataType::Double {
5399 precision: None,
5400 scale: None,
5401 }),
5402 DialectType::PostgreSQL
5403 | DialectType::Redshift
5404 | DialectType::Materialize
5405 | DialectType::Teradata
5406 | DialectType::RisingWave => Some(DataType::Double {
5407 precision: None,
5408 scale: None,
5409 }),
5410 _ => None,
5411 };
5412 if let Some(dt) = cast_type {
5413 let cast_left = Expression::Cast(Box::new(Cast {
5414 this: op.left.clone(),
5415 to: dt,
5416 double_colon_syntax: false,
5417 trailing_comments: Vec::new(),
5418 format: None,
5419 default: None,
5420 inferred_type: None,
5421 }));
5422 let new_op = crate::expressions::BinaryOp {
5423 left: cast_left,
5424 right: op.right.clone(),
5425 left_comments: op.left_comments.clone(),
5426 operator_comments: op.operator_comments.clone(),
5427 trailing_comments: op.trailing_comments.clone(),
5428 inferred_type: None,
5429 };
5430 return Ok(Expression::Div(Box::new(new_op)));
5431 }
5432 }
5433 }
5434
5435 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5436 if matches!(target, DialectType::DuckDB) {
5437 if let Expression::CreateDatabase(db) = e {
5438 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5439 schema.if_not_exists = db.if_not_exists;
5440 return Ok(Expression::CreateSchema(Box::new(schema)));
5441 }
5442 if let Expression::DropDatabase(db) = e {
5443 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5444 schema.if_exists = db.if_exists;
5445 return Ok(Expression::DropSchema(Box::new(schema)));
5446 }
5447 }
5448
5449 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5450 if matches!(source, DialectType::ClickHouse)
5451 && !matches!(target, DialectType::ClickHouse)
5452 {
5453 if let Expression::Cast(ref c) = e {
5454 if let DataType::Custom { ref name } = c.to {
5455 let upper = name.to_uppercase();
5456 if upper.starts_with("NULLABLE(") && upper.ends_with(")") {
5457 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5458 let inner_upper = inner.to_uppercase();
5459 let new_dt = match inner_upper.as_str() {
5460 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5461 precision: None,
5462 timezone: false,
5463 },
5464 "DATE" => DataType::Date,
5465 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5466 "INT32" | "INT" | "INTEGER" => DataType::Int {
5467 length: None,
5468 integer_spelling: false,
5469 },
5470 "FLOAT64" | "DOUBLE" => DataType::Double {
5471 precision: None,
5472 scale: None,
5473 },
5474 "STRING" => DataType::Text,
5475 _ => DataType::Custom {
5476 name: inner.to_string(),
5477 },
5478 };
5479 let mut new_cast = c.clone();
5480 new_cast.to = new_dt;
5481 return Ok(Expression::Cast(new_cast));
5482 }
5483 }
5484 }
5485 }
5486
5487 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5488 if matches!(target, DialectType::Snowflake) {
5489 if let Expression::ArrayConcatAgg(ref agg) = e {
5490 let mut agg_clone = agg.as_ref().clone();
5491 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5492 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5493 let flatten = Expression::Function(Box::new(Function::new(
5494 "ARRAY_FLATTEN".to_string(),
5495 vec![array_agg],
5496 )));
5497 return Ok(flatten);
5498 }
5499 }
5500
5501 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5502 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5503 if let Expression::ArrayConcatAgg(agg) = e {
5504 let arg = agg.this;
5505 return Ok(Expression::Function(Box::new(Function::new(
5506 "ARRAY_CONCAT_AGG".to_string(),
5507 vec![arg],
5508 ))));
5509 }
5510 }
5511
5512 // Determine what action to take by inspecting e immutably
5513 let action = {
5514 let source_propagates_nulls =
5515 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5516 let target_ignores_nulls =
5517 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5518
5519 match &e {
5520 Expression::Function(f) => {
5521 let name = f.name.to_uppercase();
5522 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5523 if (name == "DATE_PART" || name == "DATEPART")
5524 && f.args.len() == 2
5525 && matches!(target, DialectType::Snowflake)
5526 && !matches!(source, DialectType::Snowflake)
5527 && matches!(
5528 &f.args[0],
5529 Expression::Literal(crate::expressions::Literal::String(_))
5530 )
5531 {
5532 Action::DatePartUnquote
5533 } else if source_propagates_nulls
5534 && target_ignores_nulls
5535 && (name == "GREATEST" || name == "LEAST")
5536 && f.args.len() >= 2
5537 {
5538 Action::GreatestLeastNull
5539 } else if matches!(source, DialectType::Snowflake)
5540 && name == "ARRAY_GENERATE_RANGE"
5541 && f.args.len() >= 2
5542 {
5543 Action::ArrayGenerateRange
5544 } else if matches!(source, DialectType::Snowflake)
5545 && matches!(target, DialectType::DuckDB)
5546 && name == "DATE_TRUNC"
5547 && f.args.len() == 2
5548 {
5549 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5550 // Logic based on Python sqlglot's input_type_preserved flag:
5551 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5552 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5553 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5554 let unit_str = match &f.args[0] {
5555 Expression::Literal(crate::expressions::Literal::String(s)) => {
5556 Some(s.to_uppercase())
5557 }
5558 _ => None,
5559 };
5560 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5561 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5562 });
5563 match &f.args[1] {
5564 Expression::Cast(c) => match &c.to {
5565 DataType::Time { .. } => Action::DateTruncWrapCast,
5566 DataType::Custom { name }
5567 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5568 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5569 {
5570 Action::DateTruncWrapCast
5571 }
5572 DataType::Timestamp { timezone: true, .. } => {
5573 Action::DateTruncWrapCast
5574 }
5575 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5576 DataType::Timestamp {
5577 timezone: false, ..
5578 } if is_date_unit => Action::DateTruncWrapCast,
5579 _ => Action::None,
5580 },
5581 _ => Action::None,
5582 }
5583 } else if matches!(source, DialectType::Snowflake)
5584 && matches!(target, DialectType::DuckDB)
5585 && name == "TO_DATE"
5586 && f.args.len() == 1
5587 && !matches!(
5588 &f.args[0],
5589 Expression::Literal(crate::expressions::Literal::String(_))
5590 )
5591 {
5592 Action::ToDateToCast
5593 } else if !matches!(source, DialectType::Redshift)
5594 && matches!(target, DialectType::Redshift)
5595 && name == "CONVERT_TIMEZONE"
5596 && (f.args.len() == 2 || f.args.len() == 3)
5597 {
5598 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5599 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5600 // The Redshift parser adds 'UTC' as default source_tz, but when
5601 // transpiling from other dialects, we should preserve the original form.
5602 Action::ConvertTimezoneToExpr
5603 } else if matches!(source, DialectType::Snowflake)
5604 && matches!(target, DialectType::DuckDB)
5605 && name == "REGEXP_REPLACE"
5606 && f.args.len() == 4
5607 && !matches!(
5608 &f.args[3],
5609 Expression::Literal(crate::expressions::Literal::String(_))
5610 )
5611 {
5612 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5613 Action::RegexpReplaceSnowflakeToDuckDB
5614 } else if matches!(source, DialectType::Snowflake)
5615 && matches!(target, DialectType::DuckDB)
5616 && name == "REGEXP_REPLACE"
5617 && f.args.len() == 5
5618 {
5619 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
5620 Action::RegexpReplacePositionSnowflakeToDuckDB
5621 } else if matches!(source, DialectType::Snowflake)
5622 && matches!(target, DialectType::DuckDB)
5623 && name == "REGEXP_SUBSTR"
5624 {
5625 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
5626 Action::RegexpSubstrSnowflakeToDuckDB
5627 } else if matches!(source, DialectType::Snowflake)
5628 && matches!(target, DialectType::Snowflake)
5629 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
5630 && f.args.len() == 6
5631 {
5632 // Snowflake identity: strip trailing group=0
5633 Action::RegexpSubstrSnowflakeIdentity
5634 } else if matches!(source, DialectType::Snowflake)
5635 && matches!(target, DialectType::DuckDB)
5636 && name == "REGEXP_SUBSTR_ALL"
5637 {
5638 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
5639 Action::RegexpSubstrAllSnowflakeToDuckDB
5640 } else if matches!(source, DialectType::Snowflake)
5641 && matches!(target, DialectType::DuckDB)
5642 && name == "REGEXP_COUNT"
5643 {
5644 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
5645 Action::RegexpCountSnowflakeToDuckDB
5646 } else if matches!(source, DialectType::Snowflake)
5647 && matches!(target, DialectType::DuckDB)
5648 && name == "REGEXP_INSTR"
5649 {
5650 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
5651 Action::RegexpInstrSnowflakeToDuckDB
5652 } else if matches!(source, DialectType::BigQuery)
5653 && matches!(target, DialectType::Snowflake)
5654 && name == "REGEXP_EXTRACT_ALL"
5655 {
5656 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
5657 Action::RegexpExtractAllToSnowflake
5658 } else if name == "_BQ_TO_HEX" {
5659 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5660 Action::BigQueryToHexBare
5661 } else if matches!(source, DialectType::BigQuery)
5662 && !matches!(target, DialectType::BigQuery)
5663 {
5664 // BigQuery-specific functions that need to be converted to standard forms
5665 match name.as_str() {
5666 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5667 | "DATE_DIFF"
5668 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5669 | "DATETIME_ADD" | "DATETIME_SUB"
5670 | "TIME_ADD" | "TIME_SUB"
5671 | "DATE_ADD" | "DATE_SUB"
5672 | "SAFE_DIVIDE"
5673 | "GENERATE_UUID"
5674 | "COUNTIF"
5675 | "EDIT_DISTANCE"
5676 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5677 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5678 | "TO_HEX"
5679 | "TO_JSON_STRING"
5680 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5681 | "DIV"
5682 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5683 | "LAST_DAY"
5684 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5685 | "REGEXP_CONTAINS"
5686 | "CONTAINS_SUBSTR"
5687 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5688 | "SAFE_CAST"
5689 | "GENERATE_DATE_ARRAY"
5690 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5691 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5692 | "ARRAY_CONCAT"
5693 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5694 | "INSTR"
5695 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5696 | "GENERATE_UUID()" // just in case
5697 | "REGEXP_EXTRACT_ALL"
5698 | "REGEXP_EXTRACT"
5699 | "INT64"
5700 | "ARRAY_CONCAT_AGG"
5701 | "DATE_DIFF(" // just in case
5702 | "TO_HEX_MD5" // internal
5703 | "MOD"
5704 | "CONCAT"
5705 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5706 | "STRUCT"
5707 | "ROUND"
5708 | "MAKE_INTERVAL"
5709 | "ARRAY_TO_STRING"
5710 | "PERCENTILE_CONT"
5711 => Action::BigQueryFunctionNormalize,
5712 "ARRAY" if matches!(target, DialectType::Snowflake)
5713 && f.args.len() == 1
5714 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5715 => Action::BigQueryArraySelectAsStructToSnowflake,
5716 _ => Action::None,
5717 }
5718 } else if matches!(source, DialectType::BigQuery)
5719 && matches!(target, DialectType::BigQuery)
5720 {
5721 // BigQuery -> BigQuery normalizations
5722 match name.as_str() {
5723 "TIMESTAMP_DIFF"
5724 | "DATETIME_DIFF"
5725 | "TIME_DIFF"
5726 | "DATE_DIFF"
5727 | "DATE_ADD"
5728 | "TO_HEX"
5729 | "CURRENT_TIMESTAMP"
5730 | "CURRENT_DATE"
5731 | "CURRENT_TIME"
5732 | "CURRENT_DATETIME"
5733 | "GENERATE_DATE_ARRAY"
5734 | "INSTR"
5735 | "FORMAT_DATETIME"
5736 | "DATETIME"
5737 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5738 _ => Action::None,
5739 }
5740 } else {
5741 // Generic function normalization for non-BigQuery sources
5742 match name.as_str() {
5743 "ARBITRARY" | "AGGREGATE"
5744 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5745 | "STRUCT_EXTRACT"
5746 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5747 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5748 | "SUBSTRINGINDEX"
5749 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5750 | "UNICODE"
5751 | "XOR"
5752 | "ARRAY_REVERSE_SORT"
5753 | "ENCODE" | "DECODE"
5754 | "QUANTILE"
5755 | "EPOCH" | "EPOCH_MS"
5756 | "HASHBYTES"
5757 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5758 | "APPROX_DISTINCT"
5759 | "DATE_PARSE" | "FORMAT_DATETIME"
5760 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5761 | "RLIKE"
5762 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5763 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5764 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5765 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5766 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5767 | "MAP" | "MAP_FROM_ENTRIES"
5768 | "COLLECT_LIST" | "COLLECT_SET"
5769 | "ISNAN" | "IS_NAN"
5770 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5771 | "FORMAT_NUMBER"
5772 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5773 | "ELEMENT_AT"
5774 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5775 | "SPLIT_PART"
5776 // GENERATE_SERIES: handled separately below
5777 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5778 | "JSON_QUERY" | "JSON_VALUE"
5779 | "JSON_SEARCH"
5780 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5781 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5782 | "CURDATE" | "CURTIME"
5783 | "ARRAY_TO_STRING"
5784 | "ARRAY_SORT" | "SORT_ARRAY"
5785 | "LEFT" | "RIGHT"
5786 | "MAP_FROM_ARRAYS"
5787 | "LIKE" | "ILIKE"
5788 | "ARRAY_CONCAT" | "LIST_CONCAT"
5789 | "QUANTILE_CONT" | "QUANTILE_DISC"
5790 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5791 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5792 | "LOCATE" | "STRPOS" | "INSTR"
5793 | "CHAR"
5794 // CONCAT: handled separately for COALESCE wrapping
5795 | "ARRAY_JOIN"
5796 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5797 | "ISNULL"
5798 | "MONTHNAME"
5799 | "TO_TIMESTAMP"
5800 | "TO_DATE"
5801 | "TO_JSON"
5802 | "REGEXP_SPLIT"
5803 | "SPLIT"
5804 | "FORMATDATETIME"
5805 | "ARRAYJOIN"
5806 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5807 | "NVL"
5808 | "TO_CHAR"
5809 | "DBMS_RANDOM.VALUE"
5810 | "REGEXP_LIKE"
5811 | "REPLICATE"
5812 | "LEN"
5813 | "COUNT_BIG"
5814 | "DATEFROMPARTS"
5815 | "DATETIMEFROMPARTS"
5816 | "CONVERT" | "TRY_CONVERT"
5817 | "STRFTIME" | "STRPTIME"
5818 | "DATE_FORMAT" | "FORMAT_DATE"
5819 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5820 | "FROM_BASE64" | "TO_BASE64"
5821 | "GETDATE"
5822 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5823 | "TO_UTF8" | "FROM_UTF8"
5824 | "STARTS_WITH" | "STARTSWITH"
5825 | "APPROX_COUNT_DISTINCT"
5826 | "JSON_FORMAT"
5827 | "SYSDATE"
5828 | "LOGICAL_OR" | "LOGICAL_AND"
5829 | "MONTHS_ADD"
5830 | "SCHEMA_NAME"
5831 | "STRTOL"
5832 | "EDITDIST3"
5833 | "FORMAT"
5834 | "LIST_CONTAINS" | "LIST_HAS"
5835 | "VARIANCE" | "STDDEV"
5836 | "ISINF"
5837 | "TO_UNIXTIME"
5838 | "FROM_UNIXTIME"
5839 | "DATEPART" | "DATE_PART"
5840 | "DATENAME"
5841 | "STRING_AGG"
5842 | "JSON_ARRAYAGG"
5843 | "APPROX_QUANTILE"
5844 | "MAKE_DATE"
5845 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5846 | "RANGE"
5847 | "TRY_ELEMENT_AT"
5848 | "STR_TO_MAP"
5849 | "STRING"
5850 | "STR_TO_TIME"
5851 | "CURRENT_SCHEMA"
5852 | "LTRIM" | "RTRIM"
5853 | "UUID"
5854 | "FARM_FINGERPRINT"
5855 | "JSON_KEYS"
5856 | "WEEKOFYEAR"
5857 | "CONCAT_WS"
5858 | "ARRAY_SLICE"
5859 | "ARRAY_PREPEND"
5860 | "ARRAY_REMOVE"
5861 | "GENERATE_DATE_ARRAY"
5862 | "PARSE_JSON"
5863 | "JSON_REMOVE"
5864 | "JSON_SET"
5865 | "LEVENSHTEIN"
5866 | "CURRENT_VERSION"
5867 | "ARRAY_MAX"
5868 | "ARRAY_MIN"
5869 | "JAROWINKLER_SIMILARITY"
5870 | "CURRENT_SCHEMAS"
5871 => Action::GenericFunctionNormalize,
5872 // Canonical date functions -> dialect-specific
5873 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5874 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5875 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5876 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5877 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5878 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5879 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5880 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5881 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5882 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5883 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5884 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5885 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5886 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5887 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5888 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5889 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5890 // STR_TO_DATE(x, fmt) -> dialect-specific
5891 "STR_TO_DATE" if f.args.len() == 2
5892 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5893 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5894 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5895 "TS_OR_DS_ADD" if f.args.len() == 3
5896 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5897 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5898 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5899 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5900 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5901 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5902 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5903 // IS_ASCII(x) -> dialect-specific
5904 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5905 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5906 "STR_POSITION" => Action::StrPositionConvert,
5907 // ARRAY_SUM -> dialect-specific
5908 "ARRAY_SUM" => Action::ArraySumConvert,
5909 // ARRAY_SIZE -> dialect-specific (Drill only)
5910 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5911 // ARRAY_ANY -> dialect-specific
5912 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5913 // Functions needing specific cross-dialect transforms
5914 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5915 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5916 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5917 "ARRAY" if matches!(source, DialectType::BigQuery)
5918 && matches!(target, DialectType::Snowflake)
5919 && f.args.len() == 1
5920 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
5921 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
5922 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
5923 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !matches!(f.args.get(1), Some(Expression::Literal(Literal::String(_)))) => Action::GenericFunctionNormalize,
5924 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
5925 "DATE_TRUNC" if f.args.len() == 2
5926 && matches!(source, DialectType::Generic)
5927 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
5928 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
5929 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
5930 "TIMESTAMP_TRUNC" if f.args.len() >= 2
5931 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
5932 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
5933 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
5934 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5935 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
5936 // GENERATE_SERIES with interval normalization for PG target
5937 "GENERATE_SERIES" if f.args.len() >= 3
5938 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5939 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
5940 "GENERATE_SERIES" => Action::None, // passthrough for other cases
5941 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
5942 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
5943 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
5944 "CONCAT" => Action::GenericFunctionNormalize,
5945 // DIV(a, b) -> target-specific integer division
5946 "DIV" if f.args.len() == 2
5947 && matches!(source, DialectType::PostgreSQL)
5948 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
5949 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5950 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
5951 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
5952 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5953 "JSONB_EXISTS" if f.args.len() == 2
5954 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
5955 // DATE_BIN -> TIME_BUCKET for DuckDB
5956 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
5957 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
5958 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
5959 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
5960 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
5961 // ClickHouse any -> ANY_VALUE for other dialects
5962 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
5963 _ => Action::None,
5964 }
5965 }
5966 }
5967 Expression::AggregateFunction(af) => {
5968 let name = af.name.to_uppercase();
5969 match name.as_str() {
5970 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
5971 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
5972 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5973 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
5974 if matches!(target, DialectType::DuckDB) =>
5975 {
5976 Action::JsonObjectAggConvert
5977 }
5978 "ARRAY_AGG"
5979 if matches!(
5980 target,
5981 DialectType::Hive
5982 | DialectType::Spark
5983 | DialectType::Databricks
5984 ) =>
5985 {
5986 Action::ArrayAggToCollectList
5987 }
5988 "MAX_BY" | "MIN_BY"
5989 if matches!(
5990 target,
5991 DialectType::ClickHouse
5992 | DialectType::Spark
5993 | DialectType::Databricks
5994 | DialectType::DuckDB
5995 ) =>
5996 {
5997 Action::MaxByMinByConvert
5998 }
5999 "COLLECT_LIST"
6000 if matches!(
6001 target,
6002 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6003 ) =>
6004 {
6005 Action::CollectListToArrayAgg
6006 }
6007 "COLLECT_SET"
6008 if matches!(
6009 target,
6010 DialectType::Presto
6011 | DialectType::Trino
6012 | DialectType::Snowflake
6013 | DialectType::DuckDB
6014 ) =>
6015 {
6016 Action::CollectSetConvert
6017 }
6018 "PERCENTILE"
6019 if matches!(
6020 target,
6021 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6022 ) =>
6023 {
6024 Action::PercentileConvert
6025 }
6026 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6027 "CORR"
6028 if matches!(target, DialectType::DuckDB)
6029 && matches!(source, DialectType::Snowflake) =>
6030 {
6031 Action::CorrIsnanWrap
6032 }
6033 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6034 "APPROX_QUANTILES"
6035 if matches!(source, DialectType::BigQuery)
6036 && matches!(target, DialectType::DuckDB) =>
6037 {
6038 Action::BigQueryApproxQuantiles
6039 }
6040 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6041 "PERCENTILE_CONT"
6042 if matches!(source, DialectType::BigQuery)
6043 && matches!(target, DialectType::DuckDB)
6044 && af.args.len() >= 2 =>
6045 {
6046 Action::BigQueryPercentileContToDuckDB
6047 }
6048 _ => Action::None,
6049 }
6050 }
6051 Expression::JSONArrayAgg(_) => match target {
6052 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6053 _ => Action::None,
6054 },
6055 Expression::ToNumber(tn) => {
6056 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6057 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6058 match target {
6059 DialectType::Oracle
6060 | DialectType::Snowflake
6061 | DialectType::Teradata => Action::None,
6062 _ => Action::GenericFunctionNormalize,
6063 }
6064 } else {
6065 Action::None
6066 }
6067 }
6068 Expression::Nvl2(_) => {
6069 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6070 // Keep as NVL2 for dialects that support it natively
6071 match target {
6072 DialectType::Oracle
6073 | DialectType::Snowflake
6074 | DialectType::Teradata
6075 | DialectType::Spark
6076 | DialectType::Databricks
6077 | DialectType::Redshift => Action::None,
6078 _ => Action::Nvl2Expand,
6079 }
6080 }
6081 Expression::Decode(_) | Expression::DecodeCase(_) => {
6082 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6083 // Keep as DECODE for Oracle/Snowflake
6084 match target {
6085 DialectType::Oracle | DialectType::Snowflake => Action::None,
6086 _ => Action::DecodeSimplify,
6087 }
6088 }
6089 Expression::Coalesce(ref cf) => {
6090 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6091 // BigQuery keeps IFNULL natively when source is also BigQuery
6092 if cf.original_name.as_deref() == Some("IFNULL")
6093 && !(matches!(source, DialectType::BigQuery)
6094 && matches!(target, DialectType::BigQuery))
6095 {
6096 Action::IfnullToCoalesce
6097 } else {
6098 Action::None
6099 }
6100 }
6101 Expression::IfFunc(if_func) => {
6102 if matches!(source, DialectType::Snowflake)
6103 && matches!(
6104 target,
6105 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6106 )
6107 && matches!(if_func.false_value, Some(Expression::Div(_)))
6108 {
6109 Action::Div0TypedDivision
6110 } else {
6111 Action::None
6112 }
6113 }
6114 Expression::ToJson(_) => match target {
6115 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6116 DialectType::BigQuery => Action::ToJsonConvert,
6117 DialectType::DuckDB => Action::ToJsonConvert,
6118 _ => Action::None,
6119 },
6120 Expression::ArrayAgg(ref agg) => {
6121 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6122 Action::ArrayAggToGroupConcat
6123 } else if matches!(
6124 target,
6125 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6126 ) {
6127 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6128 Action::ArrayAggToCollectList
6129 } else if matches!(
6130 source,
6131 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6132 ) && matches!(target, DialectType::DuckDB)
6133 && agg.filter.is_some()
6134 {
6135 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6136 // Need to add NOT x IS NULL to existing filter
6137 Action::ArrayAggNullFilter
6138 } else if matches!(target, DialectType::DuckDB)
6139 && agg.ignore_nulls == Some(true)
6140 && !agg.order_by.is_empty()
6141 {
6142 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6143 Action::ArrayAggIgnoreNullsDuckDB
6144 } else if !matches!(source, DialectType::Snowflake) {
6145 Action::None
6146 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6147 let is_array_agg = agg.name.as_deref().map(|n| n.to_uppercase())
6148 == Some("ARRAY_AGG".to_string())
6149 || agg.name.is_none();
6150 if is_array_agg {
6151 Action::ArrayAggCollectList
6152 } else {
6153 Action::None
6154 }
6155 } else if matches!(
6156 target,
6157 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6158 ) && agg.filter.is_none()
6159 {
6160 Action::ArrayAggFilter
6161 } else {
6162 Action::None
6163 }
6164 }
6165 Expression::WithinGroup(wg) => {
6166 if matches!(source, DialectType::Snowflake)
6167 && matches!(
6168 target,
6169 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6170 )
6171 && matches!(wg.this, Expression::ArrayAgg(_))
6172 {
6173 Action::ArrayAggWithinGroupFilter
6174 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6175 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6176 || matches!(&wg.this, Expression::StringAgg(_))
6177 {
6178 Action::StringAggConvert
6179 } else if matches!(
6180 target,
6181 DialectType::Presto
6182 | DialectType::Trino
6183 | DialectType::Athena
6184 | DialectType::Spark
6185 | DialectType::Databricks
6186 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6187 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6188 || matches!(&wg.this, Expression::PercentileCont(_)))
6189 {
6190 Action::PercentileContConvert
6191 } else {
6192 Action::None
6193 }
6194 }
6195 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6196 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6197 // DATETIME is the timezone-unaware type
6198 Expression::Cast(ref c) => {
6199 if c.format.is_some()
6200 && (matches!(source, DialectType::BigQuery)
6201 || matches!(source, DialectType::Teradata))
6202 {
6203 Action::BigQueryCastFormat
6204 } else if matches!(target, DialectType::BigQuery)
6205 && !matches!(source, DialectType::BigQuery)
6206 && matches!(
6207 c.to,
6208 DataType::Timestamp {
6209 timezone: false,
6210 ..
6211 }
6212 )
6213 {
6214 Action::CastTimestampToDatetime
6215 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6216 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6217 && matches!(
6218 c.to,
6219 DataType::Timestamp {
6220 timezone: false,
6221 ..
6222 }
6223 )
6224 {
6225 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6226 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6227 Action::CastTimestampToDatetime
6228 } else if matches!(
6229 source,
6230 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6231 ) && matches!(
6232 target,
6233 DialectType::Presto
6234 | DialectType::Trino
6235 | DialectType::Athena
6236 | DialectType::DuckDB
6237 | DialectType::Snowflake
6238 | DialectType::BigQuery
6239 | DialectType::Databricks
6240 | DialectType::TSQL
6241 ) {
6242 Action::HiveCastToTryCast
6243 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6244 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6245 {
6246 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6247 Action::CastTimestamptzToFunc
6248 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6249 && matches!(
6250 target,
6251 DialectType::Hive
6252 | DialectType::Spark
6253 | DialectType::Databricks
6254 | DialectType::BigQuery
6255 )
6256 {
6257 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6258 Action::CastTimestampStripTz
6259 } else if matches!(&c.to, DataType::Json)
6260 && matches!(&c.this, Expression::Literal(Literal::String(_)))
6261 && matches!(
6262 target,
6263 DialectType::Presto
6264 | DialectType::Trino
6265 | DialectType::Athena
6266 | DialectType::Snowflake
6267 )
6268 {
6269 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6270 // Only when the input is a string literal (JSON 'value' syntax)
6271 Action::JsonLiteralToJsonParse
6272 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6273 && matches!(target, DialectType::Spark | DialectType::Databricks)
6274 {
6275 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6276 Action::CastToJsonForSpark
6277 } else if (matches!(
6278 &c.to,
6279 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6280 )) && matches!(
6281 target,
6282 DialectType::Spark | DialectType::Databricks
6283 ) && (matches!(&c.this, Expression::ParseJson(_))
6284 || matches!(
6285 &c.this,
6286 Expression::Function(f)
6287 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6288 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6289 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6290 ))
6291 {
6292 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6293 // -> FROM_JSON(..., type_string) for Spark
6294 Action::CastJsonToFromJson
6295 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6296 && matches!(
6297 c.to,
6298 DataType::Timestamp {
6299 timezone: false,
6300 ..
6301 }
6302 )
6303 && matches!(source, DialectType::DuckDB)
6304 {
6305 Action::StrftimeCastTimestamp
6306 } else if matches!(source, DialectType::DuckDB)
6307 && matches!(
6308 c.to,
6309 DataType::Decimal {
6310 precision: None,
6311 ..
6312 }
6313 )
6314 {
6315 Action::DecimalDefaultPrecision
6316 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6317 && matches!(c.to, DataType::Char { length: None })
6318 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6319 {
6320 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6321 Action::MysqlCastCharToText
6322 } else if matches!(
6323 source,
6324 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6325 ) && matches!(
6326 target,
6327 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6328 ) && Self::has_varchar_char_type(&c.to)
6329 {
6330 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6331 Action::SparkCastVarcharToString
6332 } else {
6333 Action::None
6334 }
6335 }
6336 Expression::SafeCast(ref c) => {
6337 if c.format.is_some()
6338 && matches!(source, DialectType::BigQuery)
6339 && !matches!(target, DialectType::BigQuery)
6340 {
6341 Action::BigQueryCastFormat
6342 } else {
6343 Action::None
6344 }
6345 }
6346 // For DuckDB: DATE_TRUNC should preserve the input type
6347 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6348 if matches!(source, DialectType::Snowflake)
6349 && matches!(target, DialectType::DuckDB)
6350 {
6351 Action::DateTruncWrapCast
6352 } else {
6353 Action::None
6354 }
6355 }
6356 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6357 Expression::SetStatement(s) => {
6358 if matches!(target, DialectType::DuckDB)
6359 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6360 && s.items.iter().any(|item| item.kind.is_none())
6361 {
6362 Action::SetToVariable
6363 } else {
6364 Action::None
6365 }
6366 }
6367 // Cross-dialect NULL ordering normalization.
6368 // When nulls_first is not specified, fill in the source dialect's implied
6369 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6370 Expression::Ordered(o) => {
6371 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6372 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6373 Action::MysqlNullsOrdering
6374 } else {
6375 // Skip targets that don't support NULLS FIRST/LAST syntax
6376 let target_supports_nulls = !matches!(
6377 target,
6378 DialectType::MySQL
6379 | DialectType::TSQL
6380 | DialectType::StarRocks
6381 | DialectType::Doris
6382 );
6383 if o.nulls_first.is_none() && source != target && target_supports_nulls
6384 {
6385 Action::NullsOrdering
6386 } else {
6387 Action::None
6388 }
6389 }
6390 }
6391 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6392 Expression::DataType(dt) => {
6393 if matches!(source, DialectType::BigQuery)
6394 && !matches!(target, DialectType::BigQuery)
6395 {
6396 match dt {
6397 DataType::Custom { ref name }
6398 if name.eq_ignore_ascii_case("INT64")
6399 || name.eq_ignore_ascii_case("FLOAT64")
6400 || name.eq_ignore_ascii_case("BOOL")
6401 || name.eq_ignore_ascii_case("BYTES")
6402 || name.eq_ignore_ascii_case("NUMERIC")
6403 || name.eq_ignore_ascii_case("STRING")
6404 || name.eq_ignore_ascii_case("DATETIME") =>
6405 {
6406 Action::BigQueryCastType
6407 }
6408 _ => Action::None,
6409 }
6410 } else if matches!(source, DialectType::TSQL) {
6411 // For TSQL source -> any target (including TSQL itself for REAL)
6412 match dt {
6413 // REAL -> FLOAT even for TSQL->TSQL
6414 DataType::Custom { ref name }
6415 if name.eq_ignore_ascii_case("REAL") =>
6416 {
6417 Action::TSQLTypeNormalize
6418 }
6419 DataType::Float {
6420 real_spelling: true,
6421 ..
6422 } => Action::TSQLTypeNormalize,
6423 // Other TSQL type normalizations only for non-TSQL targets
6424 DataType::Custom { ref name }
6425 if !matches!(target, DialectType::TSQL)
6426 && (name.eq_ignore_ascii_case("MONEY")
6427 || name.eq_ignore_ascii_case("SMALLMONEY")
6428 || name.eq_ignore_ascii_case("DATETIME2")
6429 || name.eq_ignore_ascii_case("IMAGE")
6430 || name.eq_ignore_ascii_case("BIT")
6431 || name.eq_ignore_ascii_case("ROWVERSION")
6432 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6433 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6434 || name.to_uppercase().starts_with("NUMERIC")
6435 || name.to_uppercase().starts_with("DATETIME2(")
6436 || name.to_uppercase().starts_with("TIME(")) =>
6437 {
6438 Action::TSQLTypeNormalize
6439 }
6440 DataType::Float {
6441 precision: Some(_), ..
6442 } if !matches!(target, DialectType::TSQL) => {
6443 Action::TSQLTypeNormalize
6444 }
6445 DataType::TinyInt { .. }
6446 if !matches!(target, DialectType::TSQL) =>
6447 {
6448 Action::TSQLTypeNormalize
6449 }
6450 // INTEGER -> INT for Databricks/Spark targets
6451 DataType::Int {
6452 integer_spelling: true,
6453 ..
6454 } if matches!(
6455 target,
6456 DialectType::Databricks | DialectType::Spark
6457 ) =>
6458 {
6459 Action::TSQLTypeNormalize
6460 }
6461 _ => Action::None,
6462 }
6463 } else if (matches!(source, DialectType::Oracle)
6464 || matches!(source, DialectType::Generic))
6465 && !matches!(target, DialectType::Oracle)
6466 {
6467 match dt {
6468 DataType::Custom { ref name }
6469 if name.to_uppercase().starts_with("VARCHAR2(")
6470 || name.to_uppercase().starts_with("NVARCHAR2(")
6471 || name.eq_ignore_ascii_case("VARCHAR2")
6472 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6473 {
6474 Action::OracleVarchar2ToVarchar
6475 }
6476 _ => Action::None,
6477 }
6478 } else if matches!(target, DialectType::Snowflake)
6479 && !matches!(source, DialectType::Snowflake)
6480 {
6481 // When target is Snowflake but source is NOT Snowflake,
6482 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6483 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6484 // should keep their FLOAT spelling.
6485 match dt {
6486 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6487 _ => Action::None,
6488 }
6489 } else {
6490 Action::None
6491 }
6492 }
6493 // LOWER patterns from BigQuery TO_HEX conversions:
6494 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6495 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6496 Expression::Lower(uf) => {
6497 if matches!(source, DialectType::BigQuery) {
6498 match &uf.this {
6499 Expression::Lower(_) => Action::BigQueryToHexLower,
6500 Expression::Function(f)
6501 if f.name == "TO_HEX"
6502 && matches!(target, DialectType::BigQuery) =>
6503 {
6504 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6505 Action::BigQueryToHexLower
6506 }
6507 _ => Action::None,
6508 }
6509 } else {
6510 Action::None
6511 }
6512 }
6513 // UPPER patterns from BigQuery TO_HEX conversions:
6514 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6515 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6516 Expression::Upper(uf) => {
6517 if matches!(source, DialectType::BigQuery) {
6518 match &uf.this {
6519 Expression::Lower(_) => Action::BigQueryToHexUpper,
6520 _ => Action::None,
6521 }
6522 } else {
6523 Action::None
6524 }
6525 }
6526 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6527 // Snowflake supports LAST_DAY with unit, so keep it there
6528 Expression::LastDay(ld) => {
6529 if matches!(source, DialectType::BigQuery)
6530 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6531 && ld.unit.is_some()
6532 {
6533 Action::BigQueryLastDayStripUnit
6534 } else {
6535 Action::None
6536 }
6537 }
6538 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6539 Expression::SafeDivide(_) => {
6540 if matches!(source, DialectType::BigQuery)
6541 && !matches!(target, DialectType::BigQuery)
6542 {
6543 Action::BigQuerySafeDivide
6544 } else {
6545 Action::None
6546 }
6547 }
6548 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6549 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6550 Expression::AnyValue(ref agg) => {
6551 if matches!(source, DialectType::BigQuery)
6552 && matches!(target, DialectType::DuckDB)
6553 && agg.having_max.is_some()
6554 {
6555 Action::BigQueryAnyValueHaving
6556 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6557 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6558 && agg.ignore_nulls.is_none()
6559 {
6560 Action::AnyValueIgnoreNulls
6561 } else {
6562 Action::None
6563 }
6564 }
6565 Expression::Any(ref q) => {
6566 if matches!(source, DialectType::PostgreSQL)
6567 && matches!(
6568 target,
6569 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6570 )
6571 && q.op.is_some()
6572 && !matches!(
6573 q.subquery,
6574 Expression::Select(_) | Expression::Subquery(_)
6575 )
6576 {
6577 Action::AnyToExists
6578 } else {
6579 Action::None
6580 }
6581 }
6582 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6583 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES is partial
6584 // So anchor the pattern with ^(...) $ for Snowflake -> DuckDB
6585 Expression::RegexpLike(_)
6586 if matches!(source, DialectType::Snowflake)
6587 && matches!(target, DialectType::DuckDB) =>
6588 {
6589 Action::RlikeSnowflakeToDuckDB
6590 }
6591 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6592 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6593 Expression::RegexpLike(_)
6594 if !matches!(source, DialectType::DuckDB)
6595 && matches!(target, DialectType::DuckDB) =>
6596 {
6597 Action::RegexpLikeToDuckDB
6598 }
6599 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6600 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6601 Expression::Div(ref op)
6602 if matches!(
6603 source,
6604 DialectType::MySQL
6605 | DialectType::DuckDB
6606 | DialectType::SingleStore
6607 | DialectType::TiDB
6608 | DialectType::ClickHouse
6609 | DialectType::Doris
6610 ) && matches!(
6611 target,
6612 DialectType::PostgreSQL
6613 | DialectType::Redshift
6614 | DialectType::Drill
6615 | DialectType::Trino
6616 | DialectType::Presto
6617 | DialectType::Athena
6618 | DialectType::TSQL
6619 | DialectType::Teradata
6620 | DialectType::SQLite
6621 | DialectType::BigQuery
6622 | DialectType::Snowflake
6623 | DialectType::Databricks
6624 | DialectType::Oracle
6625 | DialectType::Materialize
6626 | DialectType::RisingWave
6627 ) =>
6628 {
6629 // Only wrap if RHS is not already NULLIF
6630 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6631 {
6632 Action::MySQLSafeDivide
6633 } else {
6634 Action::None
6635 }
6636 }
6637 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6638 // For TSQL/Fabric, convert to sp_rename instead
6639 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6640 if let Some(crate::expressions::AlterTableAction::RenameTable(
6641 ref new_tbl,
6642 )) = at.actions.first()
6643 {
6644 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6645 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6646 Action::AlterTableToSpRename
6647 } else if new_tbl.schema.is_some()
6648 && matches!(
6649 target,
6650 DialectType::BigQuery
6651 | DialectType::Doris
6652 | DialectType::StarRocks
6653 | DialectType::DuckDB
6654 | DialectType::PostgreSQL
6655 | DialectType::Redshift
6656 )
6657 {
6658 Action::AlterTableRenameStripSchema
6659 } else {
6660 Action::None
6661 }
6662 } else {
6663 Action::None
6664 }
6665 }
6666 // EPOCH(x) expression -> target-specific epoch conversion
6667 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6668 Action::EpochConvert
6669 }
6670 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6671 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6672 Action::EpochMsConvert
6673 }
6674 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6675 Expression::StringAgg(_) => {
6676 if matches!(
6677 target,
6678 DialectType::MySQL
6679 | DialectType::SingleStore
6680 | DialectType::Doris
6681 | DialectType::StarRocks
6682 | DialectType::SQLite
6683 ) {
6684 Action::StringAggConvert
6685 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6686 Action::StringAggConvert
6687 } else {
6688 Action::None
6689 }
6690 }
6691 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6692 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6693 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6694 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6695 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6696 Action::ArrayLengthConvert
6697 }
6698 Expression::ArraySize(_) => {
6699 if matches!(target, DialectType::Drill) {
6700 Action::ArraySizeDrill
6701 } else {
6702 Action::ArrayLengthConvert
6703 }
6704 }
6705 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6706 Expression::ArrayRemove(_) => match target {
6707 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6708 Action::ArrayRemoveConvert
6709 }
6710 _ => Action::None,
6711 },
6712 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6713 Expression::ArrayReverse(_) => match target {
6714 DialectType::ClickHouse => Action::ArrayReverseConvert,
6715 _ => Action::None,
6716 },
6717 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6718 Expression::JsonKeys(_) => match target {
6719 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6720 Action::JsonKeysConvert
6721 }
6722 _ => Action::None,
6723 },
6724 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6725 Expression::ParseJson(_) => match target {
6726 DialectType::SQLite
6727 | DialectType::Doris
6728 | DialectType::MySQL
6729 | DialectType::StarRocks => Action::ParseJsonStrip,
6730 _ => Action::None,
6731 },
6732 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6733 Expression::WeekOfYear(_)
6734 if matches!(target, DialectType::Snowflake)
6735 && !matches!(source, DialectType::Snowflake) =>
6736 {
6737 Action::WeekOfYearToWeekIso
6738 }
6739 // NVL: clear original_name so generator uses dialect-specific function names
6740 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6741 // XOR: expand for dialects that don't support the XOR keyword
6742 Expression::Xor(_) => {
6743 let target_supports_xor = matches!(
6744 target,
6745 DialectType::MySQL
6746 | DialectType::SingleStore
6747 | DialectType::Doris
6748 | DialectType::StarRocks
6749 );
6750 if !target_supports_xor {
6751 Action::XorExpand
6752 } else {
6753 Action::None
6754 }
6755 }
6756 // TSQL #table -> temp table normalization (CREATE TABLE)
6757 Expression::CreateTable(ct)
6758 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6759 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6760 && ct.name.name.name.starts_with('#') =>
6761 {
6762 Action::TempTableHash
6763 }
6764 // TSQL #table -> strip # from table references in SELECT/etc.
6765 Expression::Table(tr)
6766 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6767 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6768 && tr.name.name.starts_with('#') =>
6769 {
6770 Action::TempTableHash
6771 }
6772 // TSQL #table -> strip # from DROP TABLE names
6773 Expression::DropTable(ref dt)
6774 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6775 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6776 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6777 {
6778 Action::TempTableHash
6779 }
6780 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6781 Expression::JsonExtract(_)
6782 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6783 {
6784 Action::JsonExtractToTsql
6785 }
6786 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6787 Expression::JsonExtractScalar(_)
6788 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6789 {
6790 Action::JsonExtractToTsql
6791 }
6792 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6793 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6794 Action::JsonExtractToClickHouse
6795 }
6796 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6797 Expression::JsonExtractScalar(_)
6798 if matches!(target, DialectType::ClickHouse) =>
6799 {
6800 Action::JsonExtractToClickHouse
6801 }
6802 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6803 Expression::JsonExtract(ref f)
6804 if !f.arrow_syntax
6805 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6806 {
6807 Action::JsonExtractToArrow
6808 }
6809 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6810 Expression::JsonExtract(ref f)
6811 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6812 && !matches!(
6813 source,
6814 DialectType::PostgreSQL
6815 | DialectType::Redshift
6816 | DialectType::Materialize
6817 )
6818 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6819 {
6820 Action::JsonExtractToGetJsonObject
6821 }
6822 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6823 Expression::JsonExtract(_)
6824 if matches!(
6825 target,
6826 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6827 ) =>
6828 {
6829 Action::JsonExtractToGetJsonObject
6830 }
6831 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6832 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6833 Expression::JsonExtractScalar(ref f)
6834 if !f.arrow_syntax
6835 && !f.hash_arrow_syntax
6836 && matches!(
6837 target,
6838 DialectType::PostgreSQL
6839 | DialectType::Redshift
6840 | DialectType::Snowflake
6841 | DialectType::SQLite
6842 | DialectType::DuckDB
6843 ) =>
6844 {
6845 Action::JsonExtractScalarConvert
6846 }
6847 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6848 Expression::JsonExtractScalar(_)
6849 if matches!(
6850 target,
6851 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6852 ) =>
6853 {
6854 Action::JsonExtractScalarToGetJsonObject
6855 }
6856 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6857 Expression::JsonExtract(ref f)
6858 if !f.arrow_syntax
6859 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6860 {
6861 Action::JsonPathNormalize
6862 }
6863 // JsonQuery (parsed JSON_QUERY) -> target-specific
6864 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6865 // JsonValue (parsed JSON_VALUE) -> target-specific
6866 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6867 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6868 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6869 Expression::AtTimeZone(_)
6870 if matches!(
6871 target,
6872 DialectType::Presto
6873 | DialectType::Trino
6874 | DialectType::Athena
6875 | DialectType::Spark
6876 | DialectType::Databricks
6877 | DialectType::BigQuery
6878 | DialectType::Snowflake
6879 ) =>
6880 {
6881 Action::AtTimeZoneConvert
6882 }
6883 // DAY_OF_WEEK -> dialect-specific
6884 Expression::DayOfWeek(_)
6885 if matches!(
6886 target,
6887 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6888 ) =>
6889 {
6890 Action::DayOfWeekConvert
6891 }
6892 // CURRENT_USER -> CURRENT_USER() for Snowflake
6893 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6894 Action::CurrentUserParens
6895 }
6896 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6897 Expression::ElementAt(_)
6898 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6899 {
6900 Action::ElementAtConvert
6901 }
6902 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6903 Expression::ArrayFunc(ref arr)
6904 if !arr.bracket_notation
6905 && matches!(
6906 target,
6907 DialectType::Spark
6908 | DialectType::Databricks
6909 | DialectType::Hive
6910 | DialectType::BigQuery
6911 | DialectType::DuckDB
6912 | DialectType::Snowflake
6913 | DialectType::Presto
6914 | DialectType::Trino
6915 | DialectType::Athena
6916 | DialectType::ClickHouse
6917 | DialectType::StarRocks
6918 ) =>
6919 {
6920 Action::ArraySyntaxConvert
6921 }
6922 // VARIANCE expression -> varSamp for ClickHouse
6923 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
6924 Action::VarianceToClickHouse
6925 }
6926 // STDDEV expression -> stddevSamp for ClickHouse
6927 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
6928 Action::StddevToClickHouse
6929 }
6930 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6931 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
6932 Action::ApproxQuantileConvert
6933 }
6934 // MonthsBetween -> target-specific
6935 Expression::MonthsBetween(_)
6936 if !matches!(
6937 target,
6938 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6939 ) =>
6940 {
6941 Action::MonthsBetweenConvert
6942 }
6943 // AddMonths -> target-specific DATEADD/DATE_ADD
6944 Expression::AddMonths(_) => Action::AddMonthsConvert,
6945 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
6946 Expression::MapFromArrays(_)
6947 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
6948 {
6949 Action::MapFromArraysConvert
6950 }
6951 // CURRENT_USER -> CURRENT_USER() for Spark
6952 Expression::CurrentUser(_)
6953 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
6954 {
6955 Action::CurrentUserSparkParens
6956 }
6957 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
6958 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
6959 if matches!(
6960 source,
6961 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6962 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
6963 && matches!(
6964 target,
6965 DialectType::DuckDB
6966 | DialectType::Presto
6967 | DialectType::Trino
6968 | DialectType::Athena
6969 | DialectType::PostgreSQL
6970 | DialectType::Redshift
6971 ) =>
6972 {
6973 Action::SparkDateFuncCast
6974 }
6975 // $parameter -> @parameter for BigQuery
6976 Expression::Parameter(ref p)
6977 if matches!(target, DialectType::BigQuery)
6978 && matches!(source, DialectType::DuckDB)
6979 && (p.style == crate::expressions::ParameterStyle::Dollar
6980 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
6981 {
6982 Action::DollarParamConvert
6983 }
6984 // EscapeString literal: normalize literal newlines to \n
6985 Expression::Literal(Literal::EscapeString(ref s))
6986 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
6987 {
6988 Action::EscapeStringNormalize
6989 }
6990 // straight_join: keep lowercase for DuckDB, quote for MySQL
6991 Expression::Column(ref col)
6992 if col.name.name == "STRAIGHT_JOIN"
6993 && col.table.is_none()
6994 && matches!(source, DialectType::DuckDB)
6995 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
6996 {
6997 Action::StraightJoinCase
6998 }
6999 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7000 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7001 Expression::Interval(ref iv)
7002 if matches!(
7003 target,
7004 DialectType::Snowflake
7005 | DialectType::PostgreSQL
7006 | DialectType::Redshift
7007 ) && iv.unit.is_some()
7008 && matches!(
7009 &iv.this,
7010 Some(Expression::Literal(Literal::String(_)))
7011 ) =>
7012 {
7013 Action::SnowflakeIntervalFormat
7014 }
7015 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7016 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7017 if let Some(ref sample) = ts.sample {
7018 if !sample.explicit_method {
7019 Action::TablesampleReservoir
7020 } else {
7021 Action::None
7022 }
7023 } else {
7024 Action::None
7025 }
7026 }
7027 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7028 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7029 Expression::TableSample(ref ts)
7030 if matches!(target, DialectType::Snowflake)
7031 && !matches!(source, DialectType::Snowflake)
7032 && ts.sample.is_some() =>
7033 {
7034 if let Some(ref sample) = ts.sample {
7035 if !sample.explicit_method {
7036 Action::TablesampleSnowflakeStrip
7037 } else {
7038 Action::None
7039 }
7040 } else {
7041 Action::None
7042 }
7043 }
7044 Expression::Table(ref t)
7045 if matches!(target, DialectType::Snowflake)
7046 && !matches!(source, DialectType::Snowflake)
7047 && t.table_sample.is_some() =>
7048 {
7049 if let Some(ref sample) = t.table_sample {
7050 if !sample.explicit_method {
7051 Action::TablesampleSnowflakeStrip
7052 } else {
7053 Action::None
7054 }
7055 } else {
7056 Action::None
7057 }
7058 }
7059 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7060 Expression::AlterTable(ref at)
7061 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7062 && !at.actions.is_empty()
7063 && matches!(
7064 at.actions.first(),
7065 Some(crate::expressions::AlterTableAction::RenameTable(_))
7066 ) =>
7067 {
7068 Action::AlterTableToSpRename
7069 }
7070 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7071 Expression::Subscript(ref sub)
7072 if matches!(
7073 target,
7074 DialectType::BigQuery
7075 | DialectType::Hive
7076 | DialectType::Spark
7077 | DialectType::Databricks
7078 ) && matches!(
7079 source,
7080 DialectType::DuckDB
7081 | DialectType::PostgreSQL
7082 | DialectType::Presto
7083 | DialectType::Trino
7084 | DialectType::Redshift
7085 | DialectType::ClickHouse
7086 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
7087 {
7088 Action::ArrayIndexConvert
7089 }
7090 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7091 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7092 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7093 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7094 Expression::WindowFunction(ref wf) => {
7095 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7096 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7097 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7098 if matches!(target, DialectType::BigQuery)
7099 && !is_row_number
7100 && !wf.over.order_by.is_empty()
7101 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7102 {
7103 Action::BigQueryNullsOrdering
7104 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7105 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7106 } else {
7107 let source_nulls_last = matches!(source, DialectType::DuckDB);
7108 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7109 matches!(
7110 f.kind,
7111 crate::expressions::WindowFrameKind::Range
7112 | crate::expressions::WindowFrameKind::Groups
7113 )
7114 });
7115 if source_nulls_last
7116 && matches!(target, DialectType::MySQL)
7117 && !wf.over.order_by.is_empty()
7118 && wf.over.order_by.iter().any(|o| !o.desc)
7119 && !has_range_frame
7120 {
7121 Action::MysqlNullsLastRewrite
7122 } else {
7123 match &wf.this {
7124 Expression::FirstValue(ref vf)
7125 | Expression::LastValue(ref vf)
7126 if vf.ignore_nulls == Some(false) =>
7127 {
7128 // RESPECT NULLS
7129 match target {
7130 DialectType::SQLite => Action::RespectNullsConvert,
7131 _ => Action::None,
7132 }
7133 }
7134 _ => Action::None,
7135 }
7136 }
7137 }
7138 }
7139 // CREATE TABLE a LIKE b -> dialect-specific transformations
7140 Expression::CreateTable(ref ct)
7141 if ct.columns.is_empty()
7142 && ct.constraints.iter().any(|c| {
7143 matches!(c, crate::expressions::TableConstraint::Like { .. })
7144 })
7145 && matches!(
7146 target,
7147 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7148 ) =>
7149 {
7150 Action::CreateTableLikeToCtas
7151 }
7152 Expression::CreateTable(ref ct)
7153 if ct.columns.is_empty()
7154 && ct.constraints.iter().any(|c| {
7155 matches!(c, crate::expressions::TableConstraint::Like { .. })
7156 })
7157 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7158 {
7159 Action::CreateTableLikeToSelectInto
7160 }
7161 Expression::CreateTable(ref ct)
7162 if ct.columns.is_empty()
7163 && ct.constraints.iter().any(|c| {
7164 matches!(c, crate::expressions::TableConstraint::Like { .. })
7165 })
7166 && matches!(target, DialectType::ClickHouse) =>
7167 {
7168 Action::CreateTableLikeToAs
7169 }
7170 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7171 Expression::CreateTable(ref ct)
7172 if matches!(target, DialectType::DuckDB)
7173 && matches!(
7174 source,
7175 DialectType::DuckDB
7176 | DialectType::Spark
7177 | DialectType::Databricks
7178 | DialectType::Hive
7179 ) =>
7180 {
7181 let has_comment = ct.columns.iter().any(|c| {
7182 c.comment.is_some()
7183 || c.constraints.iter().any(|con| {
7184 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7185 })
7186 });
7187 let has_props = !ct.properties.is_empty();
7188 if has_comment || has_props {
7189 Action::CreateTableStripComment
7190 } else {
7191 Action::None
7192 }
7193 }
7194 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7195 Expression::Array(_)
7196 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7197 {
7198 Action::ArrayConcatBracketConvert
7199 }
7200 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7201 Expression::ArrayFunc(ref arr)
7202 if arr.bracket_notation
7203 && matches!(source, DialectType::BigQuery)
7204 && matches!(target, DialectType::Redshift) =>
7205 {
7206 Action::ArrayConcatBracketConvert
7207 }
7208 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7209 Expression::BitwiseOrAgg(ref f)
7210 | Expression::BitwiseAndAgg(ref f)
7211 | Expression::BitwiseXorAgg(ref f) => {
7212 if matches!(target, DialectType::DuckDB) {
7213 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7214 if let Expression::Cast(ref c) = f.this {
7215 match &c.to {
7216 DataType::Float { .. }
7217 | DataType::Double { .. }
7218 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7219 DataType::Custom { ref name }
7220 if name.eq_ignore_ascii_case("REAL") =>
7221 {
7222 Action::BitAggFloatCast
7223 }
7224 _ => Action::None,
7225 }
7226 } else {
7227 Action::None
7228 }
7229 } else if matches!(target, DialectType::Snowflake) {
7230 Action::BitAggSnowflakeRename
7231 } else {
7232 Action::None
7233 }
7234 }
7235 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7236 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7237 Action::FilterToIff
7238 }
7239 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7240 Expression::Avg(ref f)
7241 | Expression::Sum(ref f)
7242 | Expression::Min(ref f)
7243 | Expression::Max(ref f)
7244 | Expression::CountIf(ref f)
7245 | Expression::Stddev(ref f)
7246 | Expression::StddevPop(ref f)
7247 | Expression::StddevSamp(ref f)
7248 | Expression::Variance(ref f)
7249 | Expression::VarPop(ref f)
7250 | Expression::VarSamp(ref f)
7251 | Expression::Median(ref f)
7252 | Expression::Mode(ref f)
7253 | Expression::First(ref f)
7254 | Expression::Last(ref f)
7255 | Expression::ApproxDistinct(ref f)
7256 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7257 {
7258 Action::AggFilterToIff
7259 }
7260 Expression::Count(ref c)
7261 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7262 {
7263 Action::AggFilterToIff
7264 }
7265 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7266 Expression::Count(ref c)
7267 if c.distinct
7268 && matches!(&c.this, Some(Expression::Tuple(_)))
7269 && matches!(
7270 target,
7271 DialectType::Presto
7272 | DialectType::Trino
7273 | DialectType::DuckDB
7274 | DialectType::PostgreSQL
7275 ) =>
7276 {
7277 Action::CountDistinctMultiArg
7278 }
7279 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7280 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7281 Action::JsonToGetPath
7282 }
7283 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7284 Expression::Struct(_)
7285 if matches!(
7286 target,
7287 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7288 ) && matches!(source, DialectType::DuckDB) =>
7289 {
7290 Action::StructToRow
7291 }
7292 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7293 Expression::MapFunc(ref m)
7294 if m.curly_brace_syntax
7295 && matches!(
7296 target,
7297 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7298 )
7299 && matches!(source, DialectType::DuckDB) =>
7300 {
7301 Action::StructToRow
7302 }
7303 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7304 Expression::ApproxCountDistinct(_)
7305 if matches!(
7306 target,
7307 DialectType::Presto | DialectType::Trino | DialectType::Athena
7308 ) =>
7309 {
7310 Action::ApproxCountDistinctToApproxDistinct
7311 }
7312 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7313 Expression::ArrayContains(_)
7314 if matches!(
7315 target,
7316 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7317 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7318 {
7319 Action::ArrayContainsConvert
7320 }
7321 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7322 Expression::ArrayContains(_)
7323 if matches!(target, DialectType::DuckDB)
7324 && matches!(source, DialectType::Snowflake) =>
7325 {
7326 Action::ArrayContainsDuckDBConvert
7327 }
7328 // ARRAY_EXCEPT -> target-specific conversion
7329 Expression::ArrayExcept(_)
7330 if matches!(
7331 target,
7332 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7333 ) =>
7334 {
7335 Action::ArrayExceptConvert
7336 }
7337 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7338 Expression::ArrayDistinct(_)
7339 if matches!(target, DialectType::DuckDB)
7340 && matches!(source, DialectType::Snowflake) =>
7341 {
7342 Action::ArrayDistinctConvert
7343 }
7344 // StrPosition with position -> complex expansion for Presto/DuckDB
7345 // STRPOS doesn't support a position arg in these dialects
7346 Expression::StrPosition(ref sp)
7347 if sp.position.is_some()
7348 && matches!(
7349 target,
7350 DialectType::Presto
7351 | DialectType::Trino
7352 | DialectType::Athena
7353 | DialectType::DuckDB
7354 ) =>
7355 {
7356 Action::StrPositionExpand
7357 }
7358 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7359 Expression::First(ref f)
7360 if f.ignore_nulls == Some(true)
7361 && matches!(target, DialectType::DuckDB) =>
7362 {
7363 Action::FirstToAnyValue
7364 }
7365 // BEGIN -> START TRANSACTION for Presto/Trino
7366 Expression::Command(ref cmd)
7367 if cmd.this.eq_ignore_ascii_case("BEGIN")
7368 && matches!(
7369 target,
7370 DialectType::Presto | DialectType::Trino | DialectType::Athena
7371 ) =>
7372 {
7373 // Handled inline below
7374 Action::None // We'll handle it directly
7375 }
7376 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7377 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7378 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7379 Expression::Concat(ref _op)
7380 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7381 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7382 {
7383 Action::PipeConcatToConcat
7384 }
7385 _ => Action::None,
7386 }
7387 };
7388
7389 match action {
7390 Action::None => {
7391 // Handle inline transforms that don't need a dedicated action
7392
7393 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7394 if let Expression::Between(ref b) = e {
7395 if let Some(sym) = b.symmetric {
7396 let keeps_symmetric =
7397 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7398 if !keeps_symmetric {
7399 if sym {
7400 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7401 let b = if let Expression::Between(b) = e {
7402 *b
7403 } else {
7404 unreachable!()
7405 };
7406 let between1 = Expression::Between(Box::new(
7407 crate::expressions::Between {
7408 this: b.this.clone(),
7409 low: b.low.clone(),
7410 high: b.high.clone(),
7411 not: b.not,
7412 symmetric: None,
7413 },
7414 ));
7415 let between2 = Expression::Between(Box::new(
7416 crate::expressions::Between {
7417 this: b.this,
7418 low: b.high,
7419 high: b.low,
7420 not: b.not,
7421 symmetric: None,
7422 },
7423 ));
7424 return Ok(Expression::Paren(Box::new(
7425 crate::expressions::Paren {
7426 this: Expression::Or(Box::new(
7427 crate::expressions::BinaryOp::new(
7428 between1, between2,
7429 ),
7430 )),
7431 trailing_comments: vec![],
7432 },
7433 )));
7434 } else {
7435 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7436 let b = if let Expression::Between(b) = e {
7437 *b
7438 } else {
7439 unreachable!()
7440 };
7441 return Ok(Expression::Between(Box::new(
7442 crate::expressions::Between {
7443 this: b.this,
7444 low: b.low,
7445 high: b.high,
7446 not: b.not,
7447 symmetric: None,
7448 },
7449 )));
7450 }
7451 }
7452 }
7453 }
7454
7455 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7456 if let Expression::ILike(ref _like) = e {
7457 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7458 let like = if let Expression::ILike(l) = e {
7459 *l
7460 } else {
7461 unreachable!()
7462 };
7463 let lower_left = Expression::Function(Box::new(Function::new(
7464 "LOWER".to_string(),
7465 vec![like.left],
7466 )));
7467 let lower_right = Expression::Function(Box::new(Function::new(
7468 "LOWER".to_string(),
7469 vec![like.right],
7470 )));
7471 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7472 left: lower_left,
7473 right: lower_right,
7474 escape: like.escape,
7475 quantifier: like.quantifier,
7476 inferred_type: None,
7477 })));
7478 }
7479 }
7480
7481 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7482 if let Expression::MethodCall(ref mc) = e {
7483 if matches!(source, DialectType::Oracle)
7484 && mc.method.name.eq_ignore_ascii_case("VALUE")
7485 && mc.args.is_empty()
7486 {
7487 let is_dbms_random = match &mc.this {
7488 Expression::Identifier(id) => {
7489 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7490 }
7491 Expression::Column(col) => {
7492 col.table.is_none()
7493 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7494 }
7495 _ => false,
7496 };
7497 if is_dbms_random {
7498 let func_name = match target {
7499 DialectType::PostgreSQL
7500 | DialectType::Redshift
7501 | DialectType::DuckDB
7502 | DialectType::SQLite => "RANDOM",
7503 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7504 _ => "RAND",
7505 };
7506 return Ok(Expression::Function(Box::new(Function::new(
7507 func_name.to_string(),
7508 vec![],
7509 ))));
7510 }
7511 }
7512 }
7513 // TRIM without explicit position -> add BOTH for ClickHouse
7514 if let Expression::Trim(ref trim) = e {
7515 if matches!(target, DialectType::ClickHouse)
7516 && trim.sql_standard_syntax
7517 && trim.characters.is_some()
7518 && !trim.position_explicit
7519 {
7520 let mut new_trim = (**trim).clone();
7521 new_trim.position_explicit = true;
7522 return Ok(Expression::Trim(Box::new(new_trim)));
7523 }
7524 }
7525 // BEGIN -> START TRANSACTION for Presto/Trino
7526 if let Expression::Transaction(ref txn) = e {
7527 if matches!(
7528 target,
7529 DialectType::Presto | DialectType::Trino | DialectType::Athena
7530 ) {
7531 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7532 let mut txn = txn.clone();
7533 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7534 "START".to_string(),
7535 ))));
7536 return Ok(Expression::Transaction(Box::new(*txn)));
7537 }
7538 }
7539 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7540 if matches!(
7541 target,
7542 DialectType::Presto | DialectType::Trino | DialectType::Athena
7543 ) {
7544 match &e {
7545 Expression::IsTrue(itf) if !itf.not => {
7546 // x IS TRUE -> x
7547 return Ok(itf.this.clone());
7548 }
7549 Expression::IsTrue(itf) if itf.not => {
7550 // x IS NOT TRUE -> NOT x
7551 return Ok(Expression::Not(Box::new(
7552 crate::expressions::UnaryOp {
7553 this: itf.this.clone(),
7554 inferred_type: None,
7555 },
7556 )));
7557 }
7558 Expression::IsFalse(itf) if !itf.not => {
7559 // x IS FALSE -> NOT x
7560 return Ok(Expression::Not(Box::new(
7561 crate::expressions::UnaryOp {
7562 this: itf.this.clone(),
7563 inferred_type: None,
7564 },
7565 )));
7566 }
7567 Expression::IsFalse(itf) if itf.not => {
7568 // x IS NOT FALSE -> NOT NOT x
7569 let not_x =
7570 Expression::Not(Box::new(crate::expressions::UnaryOp {
7571 this: itf.this.clone(),
7572 inferred_type: None,
7573 }));
7574 return Ok(Expression::Not(Box::new(
7575 crate::expressions::UnaryOp {
7576 this: not_x,
7577 inferred_type: None,
7578 },
7579 )));
7580 }
7581 _ => {}
7582 }
7583 }
7584 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7585 if matches!(target, DialectType::Redshift) {
7586 if let Expression::IsFalse(ref itf) = e {
7587 if itf.not {
7588 return Ok(Expression::Not(Box::new(
7589 crate::expressions::UnaryOp {
7590 this: Expression::IsFalse(Box::new(
7591 crate::expressions::IsTrueFalse {
7592 this: itf.this.clone(),
7593 not: false,
7594 },
7595 )),
7596 inferred_type: None,
7597 },
7598 )));
7599 }
7600 }
7601 }
7602 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7603 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7604 if let Expression::Function(ref f) = e {
7605 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7606 && matches!(source, DialectType::Snowflake)
7607 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7608 {
7609 if f.args.len() == 3 {
7610 let mut args = f.args.clone();
7611 args.push(Expression::string("g"));
7612 return Ok(Expression::Function(Box::new(Function::new(
7613 "REGEXP_REPLACE".to_string(),
7614 args,
7615 ))));
7616 } else if f.args.len() == 4 {
7617 // 4th arg might be position, add 'g' as 5th
7618 let mut args = f.args.clone();
7619 args.push(Expression::string("g"));
7620 return Ok(Expression::Function(Box::new(Function::new(
7621 "REGEXP_REPLACE".to_string(),
7622 args,
7623 ))));
7624 }
7625 }
7626 }
7627 Ok(e)
7628 }
7629
7630 Action::GreatestLeastNull => {
7631 let f = if let Expression::Function(f) = e {
7632 *f
7633 } else {
7634 unreachable!("action only triggered for Function expressions")
7635 };
7636 let mut null_checks: Vec<Expression> = f
7637 .args
7638 .iter()
7639 .map(|a| {
7640 Expression::IsNull(Box::new(IsNull {
7641 this: a.clone(),
7642 not: false,
7643 postfix_form: false,
7644 }))
7645 })
7646 .collect();
7647 let condition = if null_checks.len() == 1 {
7648 null_checks.remove(0)
7649 } else {
7650 let first = null_checks.remove(0);
7651 null_checks.into_iter().fold(first, |acc, check| {
7652 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7653 })
7654 };
7655 Ok(Expression::Case(Box::new(Case {
7656 operand: None,
7657 whens: vec![(condition, Expression::Null(Null))],
7658 else_: Some(Expression::Function(Box::new(Function::new(
7659 f.name, f.args,
7660 )))),
7661 comments: Vec::new(),
7662 inferred_type: None,
7663 })))
7664 }
7665
7666 Action::ArrayGenerateRange => {
7667 let f = if let Expression::Function(f) = e {
7668 *f
7669 } else {
7670 unreachable!("action only triggered for Function expressions")
7671 };
7672 let start = f.args[0].clone();
7673 let end = f.args[1].clone();
7674 let step = f.args.get(2).cloned();
7675
7676 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7677 end.clone(),
7678 Expression::number(1),
7679 )));
7680
7681 match target {
7682 DialectType::PostgreSQL | DialectType::Redshift => {
7683 let mut args = vec![start, end_minus_1];
7684 if let Some(s) = step {
7685 args.push(s);
7686 }
7687 Ok(Expression::Function(Box::new(Function::new(
7688 "GENERATE_SERIES".to_string(),
7689 args,
7690 ))))
7691 }
7692 DialectType::Presto | DialectType::Trino => {
7693 let mut args = vec![start, end_minus_1];
7694 if let Some(s) = step {
7695 args.push(s);
7696 }
7697 Ok(Expression::Function(Box::new(Function::new(
7698 "SEQUENCE".to_string(),
7699 args,
7700 ))))
7701 }
7702 DialectType::BigQuery => {
7703 let mut args = vec![start, end_minus_1];
7704 if let Some(s) = step {
7705 args.push(s);
7706 }
7707 Ok(Expression::Function(Box::new(Function::new(
7708 "GENERATE_ARRAY".to_string(),
7709 args,
7710 ))))
7711 }
7712 DialectType::Snowflake => {
7713 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7714 Expression::Paren(Box::new(Paren {
7715 this: end_minus_1,
7716 trailing_comments: vec![],
7717 })),
7718 Expression::number(1),
7719 )));
7720 let mut args = vec![start, normalized_end];
7721 if let Some(s) = step {
7722 args.push(s);
7723 }
7724 Ok(Expression::Function(Box::new(Function::new(
7725 "ARRAY_GENERATE_RANGE".to_string(),
7726 args,
7727 ))))
7728 }
7729 _ => Ok(Expression::Function(Box::new(Function::new(
7730 f.name, f.args,
7731 )))),
7732 }
7733 }
7734
7735 Action::Div0TypedDivision => {
7736 let if_func = if let Expression::IfFunc(f) = e {
7737 *f
7738 } else {
7739 unreachable!("action only triggered for IfFunc expressions")
7740 };
7741 if let Some(Expression::Div(div)) = if_func.false_value {
7742 let cast_type = if matches!(target, DialectType::SQLite) {
7743 DataType::Float {
7744 precision: None,
7745 scale: None,
7746 real_spelling: true,
7747 }
7748 } else {
7749 DataType::Double {
7750 precision: None,
7751 scale: None,
7752 }
7753 };
7754 let casted_left = Expression::Cast(Box::new(Cast {
7755 this: div.left,
7756 to: cast_type,
7757 trailing_comments: vec![],
7758 double_colon_syntax: false,
7759 format: None,
7760 default: None,
7761 inferred_type: None,
7762 }));
7763 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7764 condition: if_func.condition,
7765 true_value: if_func.true_value,
7766 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7767 casted_left,
7768 div.right,
7769 )))),
7770 original_name: if_func.original_name,
7771 inferred_type: None,
7772 })))
7773 } else {
7774 // Not actually a Div, reconstruct
7775 Ok(Expression::IfFunc(Box::new(if_func)))
7776 }
7777 }
7778
7779 Action::ArrayAggCollectList => {
7780 let agg = if let Expression::ArrayAgg(a) = e {
7781 *a
7782 } else {
7783 unreachable!("action only triggered for ArrayAgg expressions")
7784 };
7785 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7786 name: Some("COLLECT_LIST".to_string()),
7787 ..agg
7788 })))
7789 }
7790
7791 Action::ArrayAggToGroupConcat => {
7792 let agg = if let Expression::ArrayAgg(a) = e {
7793 *a
7794 } else {
7795 unreachable!("action only triggered for ArrayAgg expressions")
7796 };
7797 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7798 name: Some("GROUP_CONCAT".to_string()),
7799 ..agg
7800 })))
7801 }
7802
7803 Action::ArrayAggWithinGroupFilter => {
7804 let wg = if let Expression::WithinGroup(w) = e {
7805 *w
7806 } else {
7807 unreachable!("action only triggered for WithinGroup expressions")
7808 };
7809 if let Expression::ArrayAgg(inner_agg) = wg.this {
7810 let col = inner_agg.this.clone();
7811 let filter = Expression::IsNull(Box::new(IsNull {
7812 this: col,
7813 not: true,
7814 postfix_form: false,
7815 }));
7816 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7817 let order_by = if matches!(target, DialectType::DuckDB) {
7818 wg.order_by
7819 .into_iter()
7820 .map(|mut o| {
7821 if o.desc && o.nulls_first.is_none() {
7822 o.nulls_first = Some(true);
7823 }
7824 o
7825 })
7826 .collect()
7827 } else {
7828 wg.order_by
7829 };
7830 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7831 this: inner_agg.this,
7832 distinct: inner_agg.distinct,
7833 filter: Some(filter),
7834 order_by,
7835 name: inner_agg.name,
7836 ignore_nulls: inner_agg.ignore_nulls,
7837 having_max: inner_agg.having_max,
7838 limit: inner_agg.limit,
7839 inferred_type: None,
7840 })))
7841 } else {
7842 Ok(Expression::WithinGroup(Box::new(wg)))
7843 }
7844 }
7845
7846 Action::ArrayAggFilter => {
7847 let agg = if let Expression::ArrayAgg(a) = e {
7848 *a
7849 } else {
7850 unreachable!("action only triggered for ArrayAgg expressions")
7851 };
7852 let col = agg.this.clone();
7853 let filter = Expression::IsNull(Box::new(IsNull {
7854 this: col,
7855 not: true,
7856 postfix_form: false,
7857 }));
7858 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7859 filter: Some(filter),
7860 ..agg
7861 })))
7862 }
7863
7864 Action::ArrayAggNullFilter => {
7865 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7866 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7867 let agg = if let Expression::ArrayAgg(a) = e {
7868 *a
7869 } else {
7870 unreachable!("action only triggered for ArrayAgg expressions")
7871 };
7872 let col = agg.this.clone();
7873 let not_null = Expression::IsNull(Box::new(IsNull {
7874 this: col,
7875 not: true,
7876 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7877 }));
7878 let new_filter = if let Some(existing_filter) = agg.filter {
7879 // AND the NOT IS NULL with existing filter
7880 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7881 existing_filter,
7882 not_null,
7883 )))
7884 } else {
7885 not_null
7886 };
7887 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7888 filter: Some(new_filter),
7889 ..agg
7890 })))
7891 }
7892
7893 Action::BigQueryArraySelectAsStructToSnowflake => {
7894 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7895 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7896 if let Expression::Function(mut f) = e {
7897 let is_match = f.args.len() == 1
7898 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7899 if is_match {
7900 let inner_select = match f.args.remove(0) {
7901 Expression::Select(s) => *s,
7902 _ => unreachable!(
7903 "argument already verified to be a Select expression"
7904 ),
7905 };
7906 // Build OBJECT_CONSTRUCT args from SELECT expressions
7907 let mut oc_args = Vec::new();
7908 for expr in &inner_select.expressions {
7909 match expr {
7910 Expression::Alias(a) => {
7911 let key = Expression::Literal(Literal::String(
7912 a.alias.name.clone(),
7913 ));
7914 let value = a.this.clone();
7915 oc_args.push(key);
7916 oc_args.push(value);
7917 }
7918 Expression::Column(c) => {
7919 let key = Expression::Literal(Literal::String(
7920 c.name.name.clone(),
7921 ));
7922 oc_args.push(key);
7923 oc_args.push(expr.clone());
7924 }
7925 _ => {
7926 oc_args.push(expr.clone());
7927 }
7928 }
7929 }
7930 let object_construct = Expression::Function(Box::new(Function::new(
7931 "OBJECT_CONSTRUCT".to_string(),
7932 oc_args,
7933 )));
7934 let array_agg = Expression::Function(Box::new(Function::new(
7935 "ARRAY_AGG".to_string(),
7936 vec![object_construct],
7937 )));
7938 let mut new_select = crate::expressions::Select::new();
7939 new_select.expressions = vec![array_agg];
7940 new_select.from = inner_select.from.clone();
7941 new_select.where_clause = inner_select.where_clause.clone();
7942 new_select.group_by = inner_select.group_by.clone();
7943 new_select.having = inner_select.having.clone();
7944 new_select.joins = inner_select.joins.clone();
7945 Ok(Expression::Subquery(Box::new(
7946 crate::expressions::Subquery {
7947 this: Expression::Select(Box::new(new_select)),
7948 alias: None,
7949 column_aliases: Vec::new(),
7950 order_by: None,
7951 limit: None,
7952 offset: None,
7953 distribute_by: None,
7954 sort_by: None,
7955 cluster_by: None,
7956 lateral: false,
7957 modifiers_inside: false,
7958 trailing_comments: Vec::new(),
7959 inferred_type: None,
7960 },
7961 )))
7962 } else {
7963 Ok(Expression::Function(f))
7964 }
7965 } else {
7966 Ok(e)
7967 }
7968 }
7969
7970 Action::BigQueryPercentileContToDuckDB => {
7971 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
7972 if let Expression::AggregateFunction(mut af) = e {
7973 af.name = "QUANTILE_CONT".to_string();
7974 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
7975 // Keep only first 2 args
7976 if af.args.len() > 2 {
7977 af.args.truncate(2);
7978 }
7979 Ok(Expression::AggregateFunction(af))
7980 } else {
7981 Ok(e)
7982 }
7983 }
7984
7985 Action::ArrayAggIgnoreNullsDuckDB => {
7986 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
7987 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
7988 let mut agg = if let Expression::ArrayAgg(a) = e {
7989 *a
7990 } else {
7991 unreachable!("action only triggered for ArrayAgg expressions")
7992 };
7993 agg.ignore_nulls = None; // Strip IGNORE NULLS
7994 if !agg.order_by.is_empty() {
7995 agg.order_by[0].nulls_first = Some(true);
7996 }
7997 Ok(Expression::ArrayAgg(Box::new(agg)))
7998 }
7999
8000 Action::CountDistinctMultiArg => {
8001 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8002 if let Expression::Count(c) = e {
8003 if let Some(Expression::Tuple(t)) = c.this {
8004 let args = t.expressions;
8005 // Build CASE expression:
8006 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8007 let mut whens = Vec::new();
8008 for arg in &args {
8009 whens.push((
8010 Expression::IsNull(Box::new(IsNull {
8011 this: arg.clone(),
8012 not: false,
8013 postfix_form: false,
8014 })),
8015 Expression::Null(crate::expressions::Null),
8016 ));
8017 }
8018 // Build the tuple for ELSE
8019 let tuple_expr =
8020 Expression::Tuple(Box::new(crate::expressions::Tuple {
8021 expressions: args,
8022 }));
8023 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8024 operand: None,
8025 whens,
8026 else_: Some(tuple_expr),
8027 comments: Vec::new(),
8028 inferred_type: None,
8029 }));
8030 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8031 this: Some(case_expr),
8032 star: false,
8033 distinct: true,
8034 filter: c.filter,
8035 ignore_nulls: c.ignore_nulls,
8036 original_name: c.original_name,
8037 inferred_type: None,
8038 })))
8039 } else {
8040 Ok(Expression::Count(c))
8041 }
8042 } else {
8043 Ok(e)
8044 }
8045 }
8046
8047 Action::CastTimestampToDatetime => {
8048 let c = if let Expression::Cast(c) = e {
8049 *c
8050 } else {
8051 unreachable!("action only triggered for Cast expressions")
8052 };
8053 Ok(Expression::Cast(Box::new(Cast {
8054 to: DataType::Custom {
8055 name: "DATETIME".to_string(),
8056 },
8057 ..c
8058 })))
8059 }
8060
8061 Action::CastTimestampStripTz => {
8062 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8063 let c = if let Expression::Cast(c) = e {
8064 *c
8065 } else {
8066 unreachable!("action only triggered for Cast expressions")
8067 };
8068 Ok(Expression::Cast(Box::new(Cast {
8069 to: DataType::Timestamp {
8070 precision: None,
8071 timezone: false,
8072 },
8073 ..c
8074 })))
8075 }
8076
8077 Action::CastTimestamptzToFunc => {
8078 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8079 let c = if let Expression::Cast(c) = e {
8080 *c
8081 } else {
8082 unreachable!("action only triggered for Cast expressions")
8083 };
8084 Ok(Expression::Function(Box::new(Function::new(
8085 "TIMESTAMP".to_string(),
8086 vec![c.this],
8087 ))))
8088 }
8089
8090 Action::ToDateToCast => {
8091 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8092 if let Expression::Function(f) = e {
8093 let arg = f.args.into_iter().next().unwrap();
8094 Ok(Expression::Cast(Box::new(Cast {
8095 this: arg,
8096 to: DataType::Date,
8097 double_colon_syntax: false,
8098 trailing_comments: vec![],
8099 format: None,
8100 default: None,
8101 inferred_type: None,
8102 })))
8103 } else {
8104 Ok(e)
8105 }
8106 }
8107 Action::DateTruncWrapCast => {
8108 // Handle both Expression::DateTrunc/TimestampTrunc and
8109 // Expression::Function("DATE_TRUNC", [unit, expr])
8110 match e {
8111 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8112 let input_type = match &d.this {
8113 Expression::Cast(c) => Some(c.to.clone()),
8114 _ => None,
8115 };
8116 if let Some(cast_type) = input_type {
8117 let is_time = matches!(cast_type, DataType::Time { .. });
8118 if is_time {
8119 let date_expr = Expression::Cast(Box::new(Cast {
8120 this: Expression::Literal(
8121 crate::expressions::Literal::String(
8122 "1970-01-01".to_string(),
8123 ),
8124 ),
8125 to: DataType::Date,
8126 double_colon_syntax: false,
8127 trailing_comments: vec![],
8128 format: None,
8129 default: None,
8130 inferred_type: None,
8131 }));
8132 let add_expr =
8133 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8134 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8135 this: add_expr,
8136 unit: d.unit,
8137 }));
8138 Ok(Expression::Cast(Box::new(Cast {
8139 this: inner,
8140 to: cast_type,
8141 double_colon_syntax: false,
8142 trailing_comments: vec![],
8143 format: None,
8144 default: None,
8145 inferred_type: None,
8146 })))
8147 } else {
8148 let inner = Expression::DateTrunc(Box::new(*d));
8149 Ok(Expression::Cast(Box::new(Cast {
8150 this: inner,
8151 to: cast_type,
8152 double_colon_syntax: false,
8153 trailing_comments: vec![],
8154 format: None,
8155 default: None,
8156 inferred_type: None,
8157 })))
8158 }
8159 } else {
8160 Ok(Expression::DateTrunc(d))
8161 }
8162 }
8163 Expression::Function(f) if f.args.len() == 2 => {
8164 // Function-based DATE_TRUNC(unit, expr)
8165 let input_type = match &f.args[1] {
8166 Expression::Cast(c) => Some(c.to.clone()),
8167 _ => None,
8168 };
8169 if let Some(cast_type) = input_type {
8170 let is_time = matches!(cast_type, DataType::Time { .. });
8171 if is_time {
8172 let date_expr = Expression::Cast(Box::new(Cast {
8173 this: Expression::Literal(
8174 crate::expressions::Literal::String(
8175 "1970-01-01".to_string(),
8176 ),
8177 ),
8178 to: DataType::Date,
8179 double_colon_syntax: false,
8180 trailing_comments: vec![],
8181 format: None,
8182 default: None,
8183 inferred_type: None,
8184 }));
8185 let mut args = f.args;
8186 let unit_arg = args.remove(0);
8187 let time_expr = args.remove(0);
8188 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8189 date_expr, time_expr,
8190 )));
8191 let inner = Expression::Function(Box::new(Function::new(
8192 "DATE_TRUNC".to_string(),
8193 vec![unit_arg, add_expr],
8194 )));
8195 Ok(Expression::Cast(Box::new(Cast {
8196 this: inner,
8197 to: cast_type,
8198 double_colon_syntax: false,
8199 trailing_comments: vec![],
8200 format: None,
8201 default: None,
8202 inferred_type: None,
8203 })))
8204 } else {
8205 // Wrap the function in CAST
8206 Ok(Expression::Cast(Box::new(Cast {
8207 this: Expression::Function(f),
8208 to: cast_type,
8209 double_colon_syntax: false,
8210 trailing_comments: vec![],
8211 format: None,
8212 default: None,
8213 inferred_type: None,
8214 })))
8215 }
8216 } else {
8217 Ok(Expression::Function(f))
8218 }
8219 }
8220 other => Ok(other),
8221 }
8222 }
8223
8224 Action::RegexpReplaceSnowflakeToDuckDB => {
8225 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8226 if let Expression::Function(f) = e {
8227 let mut args = f.args;
8228 let subject = args.remove(0);
8229 let pattern = args.remove(0);
8230 let replacement = args.remove(0);
8231 Ok(Expression::Function(Box::new(Function::new(
8232 "REGEXP_REPLACE".to_string(),
8233 vec![
8234 subject,
8235 pattern,
8236 replacement,
8237 Expression::Literal(crate::expressions::Literal::String(
8238 "g".to_string(),
8239 )),
8240 ],
8241 ))))
8242 } else {
8243 Ok(e)
8244 }
8245 }
8246
8247 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8248 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8249 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8250 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8251 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8252 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8253 if let Expression::Function(f) = e {
8254 let mut args = f.args;
8255 let subject = args.remove(0);
8256 let pattern = args.remove(0);
8257 let replacement = args.remove(0);
8258 let position = args.remove(0);
8259 let occurrence = args.remove(0);
8260
8261 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8262 let is_occ_0 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "0");
8263 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8264
8265 if is_pos_1 && is_occ_1 {
8266 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8267 Ok(Expression::Function(Box::new(Function::new(
8268 "REGEXP_REPLACE".to_string(),
8269 vec![subject, pattern, replacement],
8270 ))))
8271 } else if is_pos_1 && is_occ_0 {
8272 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8273 Ok(Expression::Function(Box::new(Function::new(
8274 "REGEXP_REPLACE".to_string(),
8275 vec![
8276 subject,
8277 pattern,
8278 replacement,
8279 Expression::Literal(Literal::String("g".to_string())),
8280 ],
8281 ))))
8282 } else {
8283 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8284 // Pre-compute pos-1 when position is a numeric literal
8285 let pos_minus_1 = if let Expression::Literal(Literal::Number(ref n)) = position {
8286 if let Ok(val) = n.parse::<i64>() {
8287 Expression::number(val - 1)
8288 } else {
8289 Expression::Sub(Box::new(BinaryOp::new(
8290 position.clone(),
8291 Expression::number(1),
8292 )))
8293 }
8294 } else {
8295 Expression::Sub(Box::new(BinaryOp::new(
8296 position.clone(),
8297 Expression::number(1),
8298 )))
8299 };
8300 let prefix = Expression::Function(Box::new(Function::new(
8301 "SUBSTRING".to_string(),
8302 vec![subject.clone(), Expression::number(1), pos_minus_1],
8303 )));
8304 let suffix_subject = Expression::Function(Box::new(Function::new(
8305 "SUBSTRING".to_string(),
8306 vec![subject, position],
8307 )));
8308 let mut replace_args = vec![suffix_subject, pattern, replacement];
8309 if is_occ_0 {
8310 replace_args.push(Expression::Literal(Literal::String(
8311 "g".to_string(),
8312 )));
8313 }
8314 let replace_expr = Expression::Function(Box::new(Function::new(
8315 "REGEXP_REPLACE".to_string(),
8316 replace_args,
8317 )));
8318 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8319 this: Box::new(prefix),
8320 expression: Box::new(replace_expr),
8321 safe: None,
8322 })))
8323 }
8324 } else {
8325 Ok(e)
8326 }
8327 }
8328
8329 Action::RegexpSubstrSnowflakeToDuckDB => {
8330 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8331 if let Expression::Function(f) = e {
8332 let mut args = f.args;
8333 let arg_count = args.len();
8334 match arg_count {
8335 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8336 0..=2 => {
8337 Ok(Expression::Function(Box::new(Function::new(
8338 "REGEXP_EXTRACT".to_string(),
8339 args,
8340 ))))
8341 }
8342 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8343 3 => {
8344 let subject = args.remove(0);
8345 let pattern = args.remove(0);
8346 let position = args.remove(0);
8347 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8348 if is_pos_1 {
8349 Ok(Expression::Function(Box::new(Function::new(
8350 "REGEXP_EXTRACT".to_string(),
8351 vec![subject, pattern],
8352 ))))
8353 } else {
8354 let substring_expr = Expression::Function(Box::new(
8355 Function::new(
8356 "SUBSTRING".to_string(),
8357 vec![subject, position],
8358 ),
8359 ));
8360 let nullif_expr = Expression::Function(Box::new(
8361 Function::new(
8362 "NULLIF".to_string(),
8363 vec![
8364 substring_expr,
8365 Expression::Literal(Literal::String(
8366 String::new(),
8367 )),
8368 ],
8369 ),
8370 ));
8371 Ok(Expression::Function(Box::new(Function::new(
8372 "REGEXP_EXTRACT".to_string(),
8373 vec![nullif_expr, pattern],
8374 ))))
8375 }
8376 }
8377 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8378 4 => {
8379 let subject = args.remove(0);
8380 let pattern = args.remove(0);
8381 let position = args.remove(0);
8382 let occurrence = args.remove(0);
8383 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8384 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8385
8386 let effective_subject = if is_pos_1 {
8387 subject
8388 } else {
8389 let substring_expr = Expression::Function(Box::new(
8390 Function::new(
8391 "SUBSTRING".to_string(),
8392 vec![subject, position],
8393 ),
8394 ));
8395 Expression::Function(Box::new(Function::new(
8396 "NULLIF".to_string(),
8397 vec![
8398 substring_expr,
8399 Expression::Literal(Literal::String(String::new())),
8400 ],
8401 )))
8402 };
8403
8404 if is_occ_1 {
8405 Ok(Expression::Function(Box::new(Function::new(
8406 "REGEXP_EXTRACT".to_string(),
8407 vec![effective_subject, pattern],
8408 ))))
8409 } else {
8410 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8411 let extract_all = Expression::Function(Box::new(
8412 Function::new(
8413 "REGEXP_EXTRACT_ALL".to_string(),
8414 vec![effective_subject, pattern],
8415 ),
8416 ));
8417 Ok(Expression::Function(Box::new(Function::new(
8418 "ARRAY_EXTRACT".to_string(),
8419 vec![extract_all, occurrence],
8420 ))))
8421 }
8422 }
8423 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8424 5 => {
8425 let subject = args.remove(0);
8426 let pattern = args.remove(0);
8427 let _position = args.remove(0);
8428 let _occurrence = args.remove(0);
8429 let _flags = args.remove(0);
8430 // Strip 'e' flag, convert to REGEXP_EXTRACT
8431 Ok(Expression::Function(Box::new(Function::new(
8432 "REGEXP_EXTRACT".to_string(),
8433 vec![subject, pattern],
8434 ))))
8435 }
8436 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
8437 _ => {
8438 let subject = args.remove(0);
8439 let pattern = args.remove(0);
8440 let _position = args.remove(0);
8441 let _occurrence = args.remove(0);
8442 let _flags = args.remove(0);
8443 let group = args.remove(0);
8444 let is_group_0 = matches!(&group, Expression::Literal(Literal::Number(n)) if n == "0");
8445 if is_group_0 {
8446 // Strip group=0 (default)
8447 Ok(Expression::Function(Box::new(Function::new(
8448 "REGEXP_EXTRACT".to_string(),
8449 vec![subject, pattern],
8450 ))))
8451 } else {
8452 Ok(Expression::Function(Box::new(Function::new(
8453 "REGEXP_EXTRACT".to_string(),
8454 vec![subject, pattern, group],
8455 ))))
8456 }
8457 }
8458 }
8459 } else {
8460 Ok(e)
8461 }
8462 }
8463
8464 Action::RegexpSubstrSnowflakeIdentity => {
8465 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
8466 // Strip trailing group=0
8467 if let Expression::Function(f) = e {
8468 let func_name = f.name.clone();
8469 let mut args = f.args;
8470 if args.len() == 6 {
8471 let is_group_0 = matches!(&args[5], Expression::Literal(Literal::Number(n)) if n == "0");
8472 if is_group_0 {
8473 args.truncate(5);
8474 }
8475 }
8476 Ok(Expression::Function(Box::new(Function::new(
8477 func_name,
8478 args,
8479 ))))
8480 } else {
8481 Ok(e)
8482 }
8483 }
8484
8485 Action::RegexpSubstrAllSnowflakeToDuckDB => {
8486 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8487 if let Expression::Function(f) = e {
8488 let mut args = f.args;
8489 let arg_count = args.len();
8490 match arg_count {
8491 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
8492 0..=2 => {
8493 Ok(Expression::Function(Box::new(Function::new(
8494 "REGEXP_EXTRACT_ALL".to_string(),
8495 args,
8496 ))))
8497 }
8498 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
8499 3 => {
8500 let subject = args.remove(0);
8501 let pattern = args.remove(0);
8502 let position = args.remove(0);
8503 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8504 if is_pos_1 {
8505 Ok(Expression::Function(Box::new(Function::new(
8506 "REGEXP_EXTRACT_ALL".to_string(),
8507 vec![subject, pattern],
8508 ))))
8509 } else {
8510 let substring_expr = Expression::Function(Box::new(
8511 Function::new(
8512 "SUBSTRING".to_string(),
8513 vec![subject, position],
8514 ),
8515 ));
8516 Ok(Expression::Function(Box::new(Function::new(
8517 "REGEXP_EXTRACT_ALL".to_string(),
8518 vec![substring_expr, pattern],
8519 ))))
8520 }
8521 }
8522 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
8523 4 => {
8524 let subject = args.remove(0);
8525 let pattern = args.remove(0);
8526 let position = args.remove(0);
8527 let occurrence = args.remove(0);
8528 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8529 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8530
8531 let effective_subject = if is_pos_1 {
8532 subject
8533 } else {
8534 Expression::Function(Box::new(Function::new(
8535 "SUBSTRING".to_string(),
8536 vec![subject, position],
8537 )))
8538 };
8539
8540 if is_occ_1 {
8541 Ok(Expression::Function(Box::new(Function::new(
8542 "REGEXP_EXTRACT_ALL".to_string(),
8543 vec![effective_subject, pattern],
8544 ))))
8545 } else {
8546 // REGEXP_EXTRACT_ALL(s, p)[occ:]
8547 let extract_all = Expression::Function(Box::new(
8548 Function::new(
8549 "REGEXP_EXTRACT_ALL".to_string(),
8550 vec![effective_subject, pattern],
8551 ),
8552 ));
8553 Ok(Expression::ArraySlice(Box::new(
8554 crate::expressions::ArraySlice {
8555 this: extract_all,
8556 start: Some(occurrence),
8557 end: None,
8558 },
8559 )))
8560 }
8561 }
8562 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
8563 5 => {
8564 let subject = args.remove(0);
8565 let pattern = args.remove(0);
8566 let _position = args.remove(0);
8567 let _occurrence = args.remove(0);
8568 let _flags = args.remove(0);
8569 Ok(Expression::Function(Box::new(Function::new(
8570 "REGEXP_EXTRACT_ALL".to_string(),
8571 vec![subject, pattern],
8572 ))))
8573 }
8574 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
8575 _ => {
8576 let subject = args.remove(0);
8577 let pattern = args.remove(0);
8578 let _position = args.remove(0);
8579 let _occurrence = args.remove(0);
8580 let _flags = args.remove(0);
8581 let group = args.remove(0);
8582 let is_group_0 = matches!(&group, Expression::Literal(Literal::Number(n)) if n == "0");
8583 if is_group_0 {
8584 Ok(Expression::Function(Box::new(Function::new(
8585 "REGEXP_EXTRACT_ALL".to_string(),
8586 vec![subject, pattern],
8587 ))))
8588 } else {
8589 Ok(Expression::Function(Box::new(Function::new(
8590 "REGEXP_EXTRACT_ALL".to_string(),
8591 vec![subject, pattern, group],
8592 ))))
8593 }
8594 }
8595 }
8596 } else {
8597 Ok(e)
8598 }
8599 }
8600
8601 Action::RegexpCountSnowflakeToDuckDB => {
8602 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
8603 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8604 if let Expression::Function(f) = e {
8605 let mut args = f.args;
8606 let arg_count = args.len();
8607 let subject = args.remove(0);
8608 let pattern = args.remove(0);
8609
8610 // Handle position arg
8611 let effective_subject = if arg_count >= 3 {
8612 let position = args.remove(0);
8613 Expression::Function(Box::new(Function::new(
8614 "SUBSTRING".to_string(),
8615 vec![subject, position],
8616 )))
8617 } else {
8618 subject
8619 };
8620
8621 // Handle flags arg -> embed as (?flags) prefix in pattern
8622 let effective_pattern = if arg_count >= 4 {
8623 let flags = args.remove(0);
8624 match &flags {
8625 Expression::Literal(Literal::String(f_str)) if !f_str.is_empty() => {
8626 // Always use concatenation: '(?flags)' || pattern
8627 let prefix = Expression::Literal(Literal::String(
8628 format!("(?{})", f_str),
8629 ));
8630 Expression::DPipe(Box::new(crate::expressions::DPipe {
8631 this: Box::new(prefix),
8632 expression: Box::new(pattern.clone()),
8633 safe: None,
8634 }))
8635 }
8636 _ => pattern.clone(),
8637 }
8638 } else {
8639 pattern.clone()
8640 };
8641
8642 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8643 let extract_all = Expression::Function(Box::new(Function::new(
8644 "REGEXP_EXTRACT_ALL".to_string(),
8645 vec![effective_subject, effective_pattern.clone()],
8646 )));
8647 let length_expr = Expression::Length(Box::new(
8648 crate::expressions::UnaryFunc {
8649 this: extract_all,
8650 original_name: None,
8651 inferred_type: None,
8652 },
8653 ));
8654 let condition = Expression::Eq(Box::new(BinaryOp::new(
8655 effective_pattern,
8656 Expression::Literal(Literal::String(String::new())),
8657 )));
8658 Ok(Expression::Case(Box::new(Case {
8659 operand: None,
8660 whens: vec![(condition, Expression::number(0))],
8661 else_: Some(length_expr),
8662 comments: vec![],
8663 inferred_type: None,
8664 })))
8665 } else {
8666 Ok(e)
8667 }
8668 }
8669
8670 Action::RegexpInstrSnowflakeToDuckDB => {
8671 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
8672 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
8673 // WHEN p = '' THEN 0
8674 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
8675 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
8676 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
8677 // + pos_offset
8678 // END
8679 if let Expression::Function(f) = e {
8680 let mut args = f.args;
8681 let subject = args.remove(0);
8682 let pattern = if !args.is_empty() { args.remove(0) } else {
8683 Expression::Literal(Literal::String(String::new()))
8684 };
8685
8686 // Collect all original args for NULL checks
8687 let position = if !args.is_empty() { Some(args.remove(0)) } else { None };
8688 let occurrence = if !args.is_empty() { Some(args.remove(0)) } else { None };
8689 let option = if !args.is_empty() { Some(args.remove(0)) } else { None };
8690 let flags = if !args.is_empty() { Some(args.remove(0)) } else { None };
8691 let _group = if !args.is_empty() { Some(args.remove(0)) } else { None };
8692
8693 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(Literal::Number(n)) if n == "1"));
8694 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
8695
8696 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
8697 let mut null_checks: Vec<Expression> = vec![
8698 Expression::Is(Box::new(BinaryOp::new(
8699 subject.clone(),
8700 Expression::Null(Null),
8701 ))),
8702 Expression::Is(Box::new(BinaryOp::new(
8703 pattern.clone(),
8704 Expression::Null(Null),
8705 ))),
8706 ];
8707 // Add NULL checks for all provided optional args
8708 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
8709 if let Some(arg) = opt_arg {
8710 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
8711 (*arg).clone(),
8712 Expression::Null(Null),
8713 ))));
8714 }
8715 }
8716 // Chain with OR
8717 let null_condition = null_checks.into_iter().reduce(|a, b| {
8718 Expression::Or(Box::new(BinaryOp::new(a, b)))
8719 }).unwrap();
8720
8721 // Effective subject (apply position offset)
8722 let effective_subject = if is_pos_1 {
8723 subject.clone()
8724 } else {
8725 let pos = position.clone().unwrap_or(Expression::number(1));
8726 Expression::Function(Box::new(Function::new(
8727 "SUBSTRING".to_string(),
8728 vec![subject.clone(), pos],
8729 )))
8730 };
8731
8732 // Effective pattern (apply flags if present)
8733 let effective_pattern = if let Some(ref fl) = flags {
8734 if let Expression::Literal(Literal::String(f_str)) = fl {
8735 if !f_str.is_empty() {
8736 let prefix = Expression::Literal(Literal::String(
8737 format!("(?{})", f_str),
8738 ));
8739 Expression::DPipe(Box::new(crate::expressions::DPipe {
8740 this: Box::new(prefix),
8741 expression: Box::new(pattern.clone()),
8742 safe: None,
8743 }))
8744 } else {
8745 pattern.clone()
8746 }
8747 } else {
8748 pattern.clone()
8749 }
8750 } else {
8751 pattern.clone()
8752 };
8753
8754 // WHEN pattern = '' THEN 0
8755 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
8756 effective_pattern.clone(),
8757 Expression::Literal(Literal::String(String::new())),
8758 )));
8759
8760 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
8761 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
8762 Expression::Length(Box::new(crate::expressions::UnaryFunc {
8763 this: Expression::Function(Box::new(Function::new(
8764 "REGEXP_EXTRACT_ALL".to_string(),
8765 vec![effective_subject.clone(), effective_pattern.clone()],
8766 ))),
8767 original_name: None,
8768 inferred_type: None,
8769 })),
8770 occurrence_expr.clone(),
8771 )));
8772
8773 // Helper: build LENGTH lambda for LIST_TRANSFORM
8774 let make_len_lambda = || Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8775 parameters: vec![crate::expressions::Identifier::new("x")],
8776 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
8777 this: Expression::Identifier(crate::expressions::Identifier::new("x")),
8778 original_name: None,
8779 inferred_type: None,
8780 })),
8781 colon: false,
8782 parameter_types: vec![],
8783 }));
8784
8785 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
8786 let split_sliced = Expression::ArraySlice(Box::new(
8787 crate::expressions::ArraySlice {
8788 this: Expression::Function(Box::new(Function::new(
8789 "STRING_SPLIT_REGEX".to_string(),
8790 vec![effective_subject.clone(), effective_pattern.clone()],
8791 ))),
8792 start: Some(Expression::number(1)),
8793 end: Some(occurrence_expr.clone()),
8794 },
8795 ));
8796 let split_sum = Expression::Function(Box::new(Function::new(
8797 "COALESCE".to_string(),
8798 vec![
8799 Expression::Function(Box::new(Function::new(
8800 "LIST_SUM".to_string(),
8801 vec![Expression::Function(Box::new(Function::new(
8802 "LIST_TRANSFORM".to_string(),
8803 vec![split_sliced, make_len_lambda()],
8804 )))],
8805 ))),
8806 Expression::number(0),
8807 ],
8808 )));
8809
8810 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
8811 let extract_sliced = Expression::ArraySlice(Box::new(
8812 crate::expressions::ArraySlice {
8813 this: Expression::Function(Box::new(Function::new(
8814 "REGEXP_EXTRACT_ALL".to_string(),
8815 vec![effective_subject.clone(), effective_pattern.clone()],
8816 ))),
8817 start: Some(Expression::number(1)),
8818 end: Some(Expression::Sub(Box::new(BinaryOp::new(
8819 occurrence_expr.clone(),
8820 Expression::number(1),
8821 )))),
8822 },
8823 ));
8824 let extract_sum = Expression::Function(Box::new(Function::new(
8825 "COALESCE".to_string(),
8826 vec![
8827 Expression::Function(Box::new(Function::new(
8828 "LIST_SUM".to_string(),
8829 vec![Expression::Function(Box::new(Function::new(
8830 "LIST_TRANSFORM".to_string(),
8831 vec![extract_sliced, make_len_lambda()],
8832 )))],
8833 ))),
8834 Expression::number(0),
8835 ],
8836 )));
8837
8838 // Position offset: pos - 1 when pos > 1, else 0
8839 let pos_offset: Expression = if !is_pos_1 {
8840 let pos = position.clone().unwrap_or(Expression::number(1));
8841 Expression::Sub(Box::new(BinaryOp::new(
8842 pos,
8843 Expression::number(1),
8844 )))
8845 } else {
8846 Expression::number(0)
8847 };
8848
8849 // ELSE: 1 + split_sum + extract_sum + pos_offset
8850 let else_expr = Expression::Add(Box::new(BinaryOp::new(
8851 Expression::Add(Box::new(BinaryOp::new(
8852 Expression::Add(Box::new(BinaryOp::new(
8853 Expression::number(1),
8854 split_sum,
8855 ))),
8856 extract_sum,
8857 ))),
8858 pos_offset,
8859 )));
8860
8861 Ok(Expression::Case(Box::new(Case {
8862 operand: None,
8863 whens: vec![
8864 (null_condition, Expression::Null(Null)),
8865 (empty_pattern_check, Expression::number(0)),
8866 (match_count_check, Expression::number(0)),
8867 ],
8868 else_: Some(else_expr),
8869 comments: vec![],
8870 inferred_type: None,
8871 })))
8872 } else {
8873 Ok(e)
8874 }
8875 }
8876
8877 Action::RlikeSnowflakeToDuckDB => {
8878 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_MATCHES(a, '^(' || (b) || ')$'[, flags])
8879 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES does partial match
8880 // So we anchor the pattern with ^ and $
8881 // Can come as Expression::RegexpLike (from Snowflake transform_expr) or
8882 // Expression::Function("RLIKE", args) (if not transformed yet)
8883 let (subject, pattern, flags) = match e {
8884 Expression::RegexpLike(ref rl) => {
8885 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
8886 }
8887 Expression::Function(ref f) if f.args.len() >= 2 => {
8888 let s = f.args[0].clone();
8889 let p = f.args[1].clone();
8890 let fl = f.args.get(2).cloned();
8891 (s, p, fl)
8892 }
8893 _ => return Ok(e),
8894 };
8895
8896 // Build anchored pattern: '^(' || (pattern) || ')$'
8897 let prefix = Expression::Literal(Literal::String("^(".to_string()));
8898 let suffix = Expression::Literal(Literal::String(")$".to_string()));
8899 let paren_pattern = Expression::Paren(Box::new(Paren {
8900 this: pattern,
8901 trailing_comments: vec![],
8902 }));
8903 let left_concat = Expression::DPipe(Box::new(
8904 crate::expressions::DPipe {
8905 this: Box::new(prefix),
8906 expression: Box::new(paren_pattern),
8907 safe: None,
8908 },
8909 ));
8910 let anchored = Expression::DPipe(Box::new(
8911 crate::expressions::DPipe {
8912 this: Box::new(left_concat),
8913 expression: Box::new(suffix),
8914 safe: None,
8915 },
8916 ));
8917
8918 let mut result_args = vec![subject, anchored];
8919 if let Some(fl) = flags {
8920 result_args.push(fl);
8921 }
8922 Ok(Expression::Function(Box::new(Function::new(
8923 "REGEXP_MATCHES".to_string(),
8924 result_args,
8925 ))))
8926 }
8927
8928 Action::RegexpExtractAllToSnowflake => {
8929 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
8930 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
8931 if let Expression::Function(f) = e {
8932 let mut args = f.args;
8933 if args.len() >= 2 {
8934 let str_expr = args.remove(0);
8935 let pattern = args.remove(0);
8936
8937 let has_groups = match &pattern {
8938 Expression::Literal(Literal::String(s)) => {
8939 s.contains('(') && s.contains(')')
8940 }
8941 _ => false,
8942 };
8943
8944 if has_groups {
8945 Ok(Expression::Function(Box::new(Function::new(
8946 "REGEXP_SUBSTR_ALL".to_string(),
8947 vec![
8948 str_expr,
8949 pattern,
8950 Expression::number(1),
8951 Expression::number(1),
8952 Expression::Literal(Literal::String("c".to_string())),
8953 Expression::number(1),
8954 ],
8955 ))))
8956 } else {
8957 Ok(Expression::Function(Box::new(Function::new(
8958 "REGEXP_SUBSTR_ALL".to_string(),
8959 vec![str_expr, pattern],
8960 ))))
8961 }
8962 } else {
8963 Ok(Expression::Function(Box::new(Function::new(
8964 "REGEXP_SUBSTR_ALL".to_string(),
8965 args,
8966 ))))
8967 }
8968 } else {
8969 Ok(e)
8970 }
8971 }
8972
8973 Action::SetToVariable => {
8974 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8975 if let Expression::SetStatement(mut s) = e {
8976 for item in &mut s.items {
8977 if item.kind.is_none() {
8978 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
8979 let already_variable = match &item.name {
8980 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
8981 _ => false,
8982 };
8983 if already_variable {
8984 // Extract the actual name and set kind
8985 if let Expression::Identifier(ref mut id) = item.name {
8986 let actual_name = id.name["VARIABLE ".len()..].to_string();
8987 id.name = actual_name;
8988 }
8989 }
8990 item.kind = Some("VARIABLE".to_string());
8991 }
8992 }
8993 Ok(Expression::SetStatement(s))
8994 } else {
8995 Ok(e)
8996 }
8997 }
8998
8999 Action::ConvertTimezoneToExpr => {
9000 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9001 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9002 if let Expression::Function(f) = e {
9003 if f.args.len() == 2 {
9004 let mut args = f.args;
9005 let target_tz = args.remove(0);
9006 let timestamp = args.remove(0);
9007 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9008 source_tz: None,
9009 target_tz: Some(Box::new(target_tz)),
9010 timestamp: Some(Box::new(timestamp)),
9011 options: vec![],
9012 })))
9013 } else if f.args.len() == 3 {
9014 let mut args = f.args;
9015 let source_tz = args.remove(0);
9016 let target_tz = args.remove(0);
9017 let timestamp = args.remove(0);
9018 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9019 source_tz: Some(Box::new(source_tz)),
9020 target_tz: Some(Box::new(target_tz)),
9021 timestamp: Some(Box::new(timestamp)),
9022 options: vec![],
9023 })))
9024 } else {
9025 Ok(Expression::Function(f))
9026 }
9027 } else {
9028 Ok(e)
9029 }
9030 }
9031
9032 Action::BigQueryCastType => {
9033 // Convert BigQuery types to standard SQL types
9034 if let Expression::DataType(dt) = e {
9035 match dt {
9036 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9037 Ok(Expression::DataType(DataType::BigInt { length: None }))
9038 }
9039 DataType::Custom { ref name }
9040 if name.eq_ignore_ascii_case("FLOAT64") =>
9041 {
9042 Ok(Expression::DataType(DataType::Double {
9043 precision: None,
9044 scale: None,
9045 }))
9046 }
9047 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9048 Ok(Expression::DataType(DataType::Boolean))
9049 }
9050 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9051 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9052 }
9053 DataType::Custom { ref name }
9054 if name.eq_ignore_ascii_case("NUMERIC") =>
9055 {
9056 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9057 // default precision (18, 3) being added to bare DECIMAL
9058 if matches!(target, DialectType::DuckDB) {
9059 Ok(Expression::DataType(DataType::Custom {
9060 name: "DECIMAL".to_string(),
9061 }))
9062 } else {
9063 Ok(Expression::DataType(DataType::Decimal {
9064 precision: None,
9065 scale: None,
9066 }))
9067 }
9068 }
9069 DataType::Custom { ref name }
9070 if name.eq_ignore_ascii_case("STRING") =>
9071 {
9072 Ok(Expression::DataType(DataType::String { length: None }))
9073 }
9074 DataType::Custom { ref name }
9075 if name.eq_ignore_ascii_case("DATETIME") =>
9076 {
9077 Ok(Expression::DataType(DataType::Timestamp {
9078 precision: None,
9079 timezone: false,
9080 }))
9081 }
9082 _ => Ok(Expression::DataType(dt)),
9083 }
9084 } else {
9085 Ok(e)
9086 }
9087 }
9088
9089 Action::BigQuerySafeDivide => {
9090 // Convert SafeDivide expression to IF/CASE form for most targets
9091 if let Expression::SafeDivide(sd) = e {
9092 let x = *sd.this;
9093 let y = *sd.expression;
9094 // Wrap x and y in parens if they're complex expressions
9095 let y_ref = match &y {
9096 Expression::Column(_)
9097 | Expression::Literal(_)
9098 | Expression::Identifier(_) => y.clone(),
9099 _ => Expression::Paren(Box::new(Paren {
9100 this: y.clone(),
9101 trailing_comments: vec![],
9102 })),
9103 };
9104 let x_ref = match &x {
9105 Expression::Column(_)
9106 | Expression::Literal(_)
9107 | Expression::Identifier(_) => x.clone(),
9108 _ => Expression::Paren(Box::new(Paren {
9109 this: x.clone(),
9110 trailing_comments: vec![],
9111 })),
9112 };
9113 let condition = Expression::Neq(Box::new(BinaryOp::new(
9114 y_ref.clone(),
9115 Expression::number(0),
9116 )));
9117 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9118
9119 if matches!(target, DialectType::Presto | DialectType::Trino) {
9120 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9121 let cast_x = Expression::Cast(Box::new(Cast {
9122 this: match &x {
9123 Expression::Column(_)
9124 | Expression::Literal(_)
9125 | Expression::Identifier(_) => x,
9126 _ => Expression::Paren(Box::new(Paren {
9127 this: x,
9128 trailing_comments: vec![],
9129 })),
9130 },
9131 to: DataType::Double {
9132 precision: None,
9133 scale: None,
9134 },
9135 trailing_comments: vec![],
9136 double_colon_syntax: false,
9137 format: None,
9138 default: None,
9139 inferred_type: None,
9140 }));
9141 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9142 cast_x,
9143 match &y {
9144 Expression::Column(_)
9145 | Expression::Literal(_)
9146 | Expression::Identifier(_) => y,
9147 _ => Expression::Paren(Box::new(Paren {
9148 this: y,
9149 trailing_comments: vec![],
9150 })),
9151 },
9152 )));
9153 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9154 condition,
9155 true_value: cast_div,
9156 false_value: Some(Expression::Null(Null)),
9157 original_name: None,
9158 inferred_type: None,
9159 })))
9160 } else if matches!(target, DialectType::PostgreSQL) {
9161 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9162 let cast_x = Expression::Cast(Box::new(Cast {
9163 this: match &x {
9164 Expression::Column(_)
9165 | Expression::Literal(_)
9166 | Expression::Identifier(_) => x,
9167 _ => Expression::Paren(Box::new(Paren {
9168 this: x,
9169 trailing_comments: vec![],
9170 })),
9171 },
9172 to: DataType::Custom {
9173 name: "DOUBLE PRECISION".to_string(),
9174 },
9175 trailing_comments: vec![],
9176 double_colon_syntax: false,
9177 format: None,
9178 default: None,
9179 inferred_type: None,
9180 }));
9181 let y_paren = match &y {
9182 Expression::Column(_)
9183 | Expression::Literal(_)
9184 | Expression::Identifier(_) => y,
9185 _ => Expression::Paren(Box::new(Paren {
9186 this: y,
9187 trailing_comments: vec![],
9188 })),
9189 };
9190 let cast_div =
9191 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9192 Ok(Expression::Case(Box::new(Case {
9193 operand: None,
9194 whens: vec![(condition, cast_div)],
9195 else_: Some(Expression::Null(Null)),
9196 comments: Vec::new(),
9197 inferred_type: None,
9198 })))
9199 } else if matches!(target, DialectType::DuckDB) {
9200 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9201 Ok(Expression::Case(Box::new(Case {
9202 operand: None,
9203 whens: vec![(condition, div_expr)],
9204 else_: Some(Expression::Null(Null)),
9205 comments: Vec::new(),
9206 inferred_type: None,
9207 })))
9208 } else if matches!(target, DialectType::Snowflake) {
9209 // Snowflake: IFF(y <> 0, x / y, NULL)
9210 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9211 condition,
9212 true_value: div_expr,
9213 false_value: Some(Expression::Null(Null)),
9214 original_name: Some("IFF".to_string()),
9215 inferred_type: None,
9216 })))
9217 } else {
9218 // All others: IF(y <> 0, x / y, NULL)
9219 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9220 condition,
9221 true_value: div_expr,
9222 false_value: Some(Expression::Null(Null)),
9223 original_name: None,
9224 inferred_type: None,
9225 })))
9226 }
9227 } else {
9228 Ok(e)
9229 }
9230 }
9231
9232 Action::BigQueryLastDayStripUnit => {
9233 if let Expression::LastDay(mut ld) = e {
9234 ld.unit = None; // Strip the unit (MONTH is default)
9235 match target {
9236 DialectType::PostgreSQL => {
9237 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9238 let date_trunc = Expression::Function(Box::new(Function::new(
9239 "DATE_TRUNC".to_string(),
9240 vec![
9241 Expression::Literal(crate::expressions::Literal::String(
9242 "MONTH".to_string(),
9243 )),
9244 ld.this.clone(),
9245 ],
9246 )));
9247 let plus_month =
9248 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9249 date_trunc,
9250 Expression::Interval(Box::new(
9251 crate::expressions::Interval {
9252 this: Some(Expression::Literal(
9253 crate::expressions::Literal::String(
9254 "1 MONTH".to_string(),
9255 ),
9256 )),
9257 unit: None,
9258 },
9259 )),
9260 )));
9261 let minus_day =
9262 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9263 plus_month,
9264 Expression::Interval(Box::new(
9265 crate::expressions::Interval {
9266 this: Some(Expression::Literal(
9267 crate::expressions::Literal::String(
9268 "1 DAY".to_string(),
9269 ),
9270 )),
9271 unit: None,
9272 },
9273 )),
9274 )));
9275 Ok(Expression::Cast(Box::new(Cast {
9276 this: minus_day,
9277 to: DataType::Date,
9278 trailing_comments: vec![],
9279 double_colon_syntax: false,
9280 format: None,
9281 default: None,
9282 inferred_type: None,
9283 })))
9284 }
9285 DialectType::Presto => {
9286 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9287 Ok(Expression::Function(Box::new(Function::new(
9288 "LAST_DAY_OF_MONTH".to_string(),
9289 vec![ld.this],
9290 ))))
9291 }
9292 DialectType::ClickHouse => {
9293 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9294 // Need to wrap the DATE type in Nullable
9295 let nullable_date = match ld.this {
9296 Expression::Cast(mut c) => {
9297 c.to = DataType::Nullable {
9298 inner: Box::new(DataType::Date),
9299 };
9300 Expression::Cast(c)
9301 }
9302 other => other,
9303 };
9304 ld.this = nullable_date;
9305 Ok(Expression::LastDay(ld))
9306 }
9307 _ => Ok(Expression::LastDay(ld)),
9308 }
9309 } else {
9310 Ok(e)
9311 }
9312 }
9313
9314 Action::BigQueryCastFormat => {
9315 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9316 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9317 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9318 let (this, to, format_expr, is_safe) = match e {
9319 Expression::Cast(ref c) if c.format.is_some() => (
9320 c.this.clone(),
9321 c.to.clone(),
9322 c.format.as_ref().unwrap().as_ref().clone(),
9323 false,
9324 ),
9325 Expression::SafeCast(ref c) if c.format.is_some() => (
9326 c.this.clone(),
9327 c.to.clone(),
9328 c.format.as_ref().unwrap().as_ref().clone(),
9329 true,
9330 ),
9331 _ => return Ok(e),
9332 };
9333 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9334 if matches!(target, DialectType::BigQuery) {
9335 match &to {
9336 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9337 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9338 return Ok(e);
9339 }
9340 _ => {}
9341 }
9342 }
9343 // Extract timezone from format if AT TIME ZONE is present
9344 let (actual_format_expr, timezone) = match &format_expr {
9345 Expression::AtTimeZone(ref atz) => {
9346 (atz.this.clone(), Some(atz.zone.clone()))
9347 }
9348 _ => (format_expr.clone(), None),
9349 };
9350 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9351 match target {
9352 DialectType::BigQuery => {
9353 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9354 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9355 let func_name = match &to {
9356 DataType::Date => "PARSE_DATE",
9357 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9358 DataType::Time { .. } => "PARSE_TIMESTAMP",
9359 _ => "PARSE_TIMESTAMP",
9360 };
9361 let mut func_args = vec![strftime_fmt, this];
9362 if let Some(tz) = timezone {
9363 func_args.push(tz);
9364 }
9365 Ok(Expression::Function(Box::new(Function::new(
9366 func_name.to_string(),
9367 func_args,
9368 ))))
9369 }
9370 DialectType::DuckDB => {
9371 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9372 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9373 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9374 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9375 let parse_call = Expression::Function(Box::new(Function::new(
9376 parse_fn_name.to_string(),
9377 vec![this, duck_fmt],
9378 )));
9379 Ok(Expression::Cast(Box::new(Cast {
9380 this: parse_call,
9381 to,
9382 trailing_comments: vec![],
9383 double_colon_syntax: false,
9384 format: None,
9385 default: None,
9386 inferred_type: None,
9387 })))
9388 }
9389 _ => Ok(e),
9390 }
9391 }
9392
9393 Action::BigQueryFunctionNormalize => {
9394 Self::normalize_bigquery_function(e, source, target)
9395 }
9396
9397 Action::BigQueryToHexBare => {
9398 // Not used anymore - handled directly in normalize_bigquery_function
9399 Ok(e)
9400 }
9401
9402 Action::BigQueryToHexLower => {
9403 if let Expression::Lower(uf) = e {
9404 match uf.this {
9405 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9406 Expression::Function(f)
9407 if matches!(target, DialectType::BigQuery)
9408 && f.name == "TO_HEX" =>
9409 {
9410 Ok(Expression::Function(f))
9411 }
9412 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9413 Expression::Lower(inner_uf) => {
9414 if matches!(target, DialectType::BigQuery) {
9415 // BQ->BQ: extract TO_HEX
9416 if let Expression::Function(f) = inner_uf.this {
9417 Ok(Expression::Function(Box::new(Function::new(
9418 "TO_HEX".to_string(),
9419 f.args,
9420 ))))
9421 } else {
9422 Ok(Expression::Lower(inner_uf))
9423 }
9424 } else {
9425 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
9426 Ok(Expression::Lower(inner_uf))
9427 }
9428 }
9429 other => {
9430 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
9431 this: other,
9432 original_name: None,
9433 inferred_type: None,
9434 })))
9435 }
9436 }
9437 } else {
9438 Ok(e)
9439 }
9440 }
9441
9442 Action::BigQueryToHexUpper => {
9443 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
9444 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
9445 if let Expression::Upper(uf) = e {
9446 if let Expression::Lower(inner_uf) = uf.this {
9447 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
9448 if matches!(target, DialectType::BigQuery) {
9449 // Restore TO_HEX name in inner function
9450 if let Expression::Function(f) = inner_uf.this {
9451 let restored = Expression::Function(Box::new(Function::new(
9452 "TO_HEX".to_string(),
9453 f.args,
9454 )));
9455 Ok(Expression::Upper(Box::new(
9456 crate::expressions::UnaryFunc::new(restored),
9457 )))
9458 } else {
9459 Ok(Expression::Upper(inner_uf))
9460 }
9461 } else {
9462 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
9463 Ok(inner_uf.this)
9464 }
9465 } else {
9466 Ok(Expression::Upper(uf))
9467 }
9468 } else {
9469 Ok(e)
9470 }
9471 }
9472
9473 Action::BigQueryAnyValueHaving => {
9474 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
9475 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
9476 if let Expression::AnyValue(agg) = e {
9477 if let Some((having_expr, is_max)) = agg.having_max {
9478 let func_name = if is_max {
9479 "ARG_MAX_NULL"
9480 } else {
9481 "ARG_MIN_NULL"
9482 };
9483 Ok(Expression::Function(Box::new(Function::new(
9484 func_name.to_string(),
9485 vec![agg.this, *having_expr],
9486 ))))
9487 } else {
9488 Ok(Expression::AnyValue(agg))
9489 }
9490 } else {
9491 Ok(e)
9492 }
9493 }
9494
9495 Action::BigQueryApproxQuantiles => {
9496 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
9497 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
9498 if let Expression::AggregateFunction(agg) = e {
9499 if agg.args.len() >= 2 {
9500 let x_expr = agg.args[0].clone();
9501 let n_expr = &agg.args[1];
9502
9503 // Extract the numeric value from n_expr
9504 let n = match n_expr {
9505 Expression::Literal(crate::expressions::Literal::Number(s)) => {
9506 s.parse::<usize>().unwrap_or(2)
9507 }
9508 _ => 2,
9509 };
9510
9511 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
9512 let mut quantiles = Vec::new();
9513 for i in 0..=n {
9514 let q = i as f64 / n as f64;
9515 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
9516 if q == 0.0 {
9517 quantiles.push(Expression::number(0));
9518 } else if q == 1.0 {
9519 quantiles.push(Expression::number(1));
9520 } else {
9521 quantiles.push(Expression::Literal(
9522 crate::expressions::Literal::Number(format!("{}", q)),
9523 ));
9524 }
9525 }
9526
9527 let array_expr =
9528 Expression::Array(Box::new(crate::expressions::Array {
9529 expressions: quantiles,
9530 }));
9531
9532 // Preserve DISTINCT modifier
9533 let mut new_func = Function::new(
9534 "APPROX_QUANTILE".to_string(),
9535 vec![x_expr, array_expr],
9536 );
9537 new_func.distinct = agg.distinct;
9538 Ok(Expression::Function(Box::new(new_func)))
9539 } else {
9540 Ok(Expression::AggregateFunction(agg))
9541 }
9542 } else {
9543 Ok(e)
9544 }
9545 }
9546
9547 Action::GenericFunctionNormalize => {
9548 // Helper closure to convert ARBITRARY to target-specific function
9549 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
9550 let name = match target {
9551 DialectType::ClickHouse => "any",
9552 DialectType::TSQL | DialectType::SQLite => "MAX",
9553 DialectType::Hive => "FIRST",
9554 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9555 "ARBITRARY"
9556 }
9557 _ => "ANY_VALUE",
9558 };
9559 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
9560 }
9561
9562 if let Expression::Function(f) = e {
9563 let name = f.name.to_uppercase();
9564 match name.as_str() {
9565 "ARBITRARY" if f.args.len() == 1 => {
9566 let arg = f.args.into_iter().next().unwrap();
9567 Ok(convert_arbitrary(arg, target))
9568 }
9569 "TO_NUMBER" if f.args.len() == 1 => {
9570 let arg = f.args.into_iter().next().unwrap();
9571 match target {
9572 DialectType::Oracle | DialectType::Snowflake => {
9573 Ok(Expression::Function(Box::new(Function::new(
9574 "TO_NUMBER".to_string(),
9575 vec![arg],
9576 ))))
9577 }
9578 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
9579 this: arg,
9580 to: crate::expressions::DataType::Double {
9581 precision: None,
9582 scale: None,
9583 },
9584 double_colon_syntax: false,
9585 trailing_comments: Vec::new(),
9586 format: None,
9587 default: None,
9588 inferred_type: None,
9589 }))),
9590 }
9591 }
9592 "AGGREGATE" if f.args.len() >= 3 => match target {
9593 DialectType::DuckDB
9594 | DialectType::Hive
9595 | DialectType::Presto
9596 | DialectType::Trino => Ok(Expression::Function(Box::new(
9597 Function::new("REDUCE".to_string(), f.args),
9598 ))),
9599 _ => Ok(Expression::Function(f)),
9600 },
9601 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
9602 "REGEXP_MATCHES" if f.args.len() >= 2 => {
9603 if matches!(target, DialectType::DuckDB) {
9604 Ok(Expression::Function(f))
9605 } else {
9606 let mut args = f.args;
9607 let this = args.remove(0);
9608 let pattern = args.remove(0);
9609 let flags = if args.is_empty() {
9610 None
9611 } else {
9612 Some(args.remove(0))
9613 };
9614 Ok(Expression::RegexpLike(Box::new(
9615 crate::expressions::RegexpFunc {
9616 this,
9617 pattern,
9618 flags,
9619 },
9620 )))
9621 }
9622 }
9623 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
9624 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
9625 if matches!(target, DialectType::DuckDB) {
9626 Ok(Expression::Function(f))
9627 } else {
9628 let mut args = f.args;
9629 let this = args.remove(0);
9630 let pattern = args.remove(0);
9631 let flags = if args.is_empty() {
9632 None
9633 } else {
9634 Some(args.remove(0))
9635 };
9636 Ok(Expression::RegexpLike(Box::new(
9637 crate::expressions::RegexpFunc {
9638 this,
9639 pattern,
9640 flags,
9641 },
9642 )))
9643 }
9644 }
9645 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
9646 "STRUCT_EXTRACT" if f.args.len() == 2 => {
9647 let mut args = f.args;
9648 let this = args.remove(0);
9649 let field_expr = args.remove(0);
9650 // Extract string literal to get field name
9651 let field_name = match &field_expr {
9652 Expression::Literal(crate::expressions::Literal::String(s)) => {
9653 s.clone()
9654 }
9655 Expression::Identifier(id) => id.name.clone(),
9656 _ => {
9657 return Ok(Expression::Function(Box::new(Function::new(
9658 "STRUCT_EXTRACT".to_string(),
9659 vec![this, field_expr],
9660 ))))
9661 }
9662 };
9663 Ok(Expression::StructExtract(Box::new(
9664 crate::expressions::StructExtractFunc {
9665 this,
9666 field: crate::expressions::Identifier::new(field_name),
9667 },
9668 )))
9669 }
9670 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
9671 "LIST_FILTER" if f.args.len() == 2 => {
9672 let name = match target {
9673 DialectType::DuckDB => "LIST_FILTER",
9674 _ => "FILTER",
9675 };
9676 Ok(Expression::Function(Box::new(Function::new(
9677 name.to_string(),
9678 f.args,
9679 ))))
9680 }
9681 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
9682 "LIST_TRANSFORM" if f.args.len() == 2 => {
9683 let name = match target {
9684 DialectType::DuckDB => "LIST_TRANSFORM",
9685 _ => "TRANSFORM",
9686 };
9687 Ok(Expression::Function(Box::new(Function::new(
9688 name.to_string(),
9689 f.args,
9690 ))))
9691 }
9692 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
9693 "LIST_SORT" if f.args.len() >= 1 => {
9694 let name = match target {
9695 DialectType::DuckDB
9696 | DialectType::Presto
9697 | DialectType::Trino => "ARRAY_SORT",
9698 _ => "SORT_ARRAY",
9699 };
9700 Ok(Expression::Function(Box::new(Function::new(
9701 name.to_string(),
9702 f.args,
9703 ))))
9704 }
9705 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9706 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
9707 match target {
9708 DialectType::DuckDB => Ok(Expression::Function(Box::new(
9709 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
9710 ))),
9711 DialectType::Spark
9712 | DialectType::Databricks
9713 | DialectType::Hive => {
9714 let mut args = f.args;
9715 args.push(Expression::Identifier(
9716 crate::expressions::Identifier::new("FALSE"),
9717 ));
9718 Ok(Expression::Function(Box::new(Function::new(
9719 "SORT_ARRAY".to_string(),
9720 args,
9721 ))))
9722 }
9723 DialectType::Presto
9724 | DialectType::Trino
9725 | DialectType::Athena => {
9726 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9727 let arr = f.args.into_iter().next().unwrap();
9728 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9729 parameters: vec![
9730 crate::expressions::Identifier::new("a"),
9731 crate::expressions::Identifier::new("b"),
9732 ],
9733 body: Expression::Case(Box::new(Case {
9734 operand: None,
9735 whens: vec![
9736 (
9737 Expression::Lt(Box::new(BinaryOp::new(
9738 Expression::Identifier(crate::expressions::Identifier::new("a")),
9739 Expression::Identifier(crate::expressions::Identifier::new("b")),
9740 ))),
9741 Expression::number(1),
9742 ),
9743 (
9744 Expression::Gt(Box::new(BinaryOp::new(
9745 Expression::Identifier(crate::expressions::Identifier::new("a")),
9746 Expression::Identifier(crate::expressions::Identifier::new("b")),
9747 ))),
9748 Expression::Literal(Literal::Number("-1".to_string())),
9749 ),
9750 ],
9751 else_: Some(Expression::number(0)),
9752 comments: Vec::new(),
9753 inferred_type: None,
9754 })),
9755 colon: false,
9756 parameter_types: Vec::new(),
9757 }));
9758 Ok(Expression::Function(Box::new(Function::new(
9759 "ARRAY_SORT".to_string(),
9760 vec![arr, lambda],
9761 ))))
9762 }
9763 _ => Ok(Expression::Function(Box::new(Function::new(
9764 "LIST_REVERSE_SORT".to_string(),
9765 f.args,
9766 )))),
9767 }
9768 }
9769 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
9770 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
9771 let mut args = f.args;
9772 args.push(Expression::string(","));
9773 let name = match target {
9774 DialectType::DuckDB => "STR_SPLIT",
9775 DialectType::Presto | DialectType::Trino => "SPLIT",
9776 DialectType::Spark
9777 | DialectType::Databricks
9778 | DialectType::Hive => "SPLIT",
9779 DialectType::PostgreSQL => "STRING_TO_ARRAY",
9780 DialectType::Redshift => "SPLIT_TO_ARRAY",
9781 _ => "SPLIT",
9782 };
9783 Ok(Expression::Function(Box::new(Function::new(
9784 name.to_string(),
9785 args,
9786 ))))
9787 }
9788 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
9789 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
9790 let name = match target {
9791 DialectType::DuckDB => "STR_SPLIT",
9792 DialectType::Presto | DialectType::Trino => "SPLIT",
9793 DialectType::Spark
9794 | DialectType::Databricks
9795 | DialectType::Hive => "SPLIT",
9796 DialectType::PostgreSQL => "STRING_TO_ARRAY",
9797 DialectType::Redshift => "SPLIT_TO_ARRAY",
9798 _ => "SPLIT",
9799 };
9800 Ok(Expression::Function(Box::new(Function::new(
9801 name.to_string(),
9802 f.args,
9803 ))))
9804 }
9805 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
9806 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
9807 let name = match target {
9808 DialectType::DuckDB => "STR_SPLIT",
9809 DialectType::Presto | DialectType::Trino => "SPLIT",
9810 DialectType::Spark
9811 | DialectType::Databricks
9812 | DialectType::Hive => "SPLIT",
9813 DialectType::Doris | DialectType::StarRocks => {
9814 "SPLIT_BY_STRING"
9815 }
9816 DialectType::PostgreSQL | DialectType::Redshift => {
9817 "STRING_TO_ARRAY"
9818 }
9819 _ => "SPLIT",
9820 };
9821 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
9822 if matches!(
9823 target,
9824 DialectType::Spark
9825 | DialectType::Databricks
9826 | DialectType::Hive
9827 ) {
9828 let mut args = f.args;
9829 let x = args.remove(0);
9830 let sep = args.remove(0);
9831 // Wrap separator in CONCAT('\\Q', sep, '\\E')
9832 let escaped_sep =
9833 Expression::Function(Box::new(Function::new(
9834 "CONCAT".to_string(),
9835 vec![
9836 Expression::string("\\Q"),
9837 sep,
9838 Expression::string("\\E"),
9839 ],
9840 )));
9841 Ok(Expression::Function(Box::new(Function::new(
9842 name.to_string(),
9843 vec![x, escaped_sep],
9844 ))))
9845 } else {
9846 Ok(Expression::Function(Box::new(Function::new(
9847 name.to_string(),
9848 f.args,
9849 ))))
9850 }
9851 }
9852 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
9853 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
9854 let name = match target {
9855 DialectType::DuckDB => "STR_SPLIT_REGEX",
9856 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
9857 DialectType::Spark
9858 | DialectType::Databricks
9859 | DialectType::Hive => "SPLIT",
9860 _ => "REGEXP_SPLIT",
9861 };
9862 Ok(Expression::Function(Box::new(Function::new(
9863 name.to_string(),
9864 f.args,
9865 ))))
9866 }
9867 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
9868 "SPLIT"
9869 if f.args.len() == 2
9870 && matches!(
9871 source,
9872 DialectType::Presto
9873 | DialectType::Trino
9874 | DialectType::Athena
9875 | DialectType::StarRocks
9876 | DialectType::Doris
9877 )
9878 && matches!(
9879 target,
9880 DialectType::Spark
9881 | DialectType::Databricks
9882 | DialectType::Hive
9883 ) =>
9884 {
9885 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
9886 let mut args = f.args;
9887 let x = args.remove(0);
9888 let sep = args.remove(0);
9889 let escaped_sep = Expression::Function(Box::new(Function::new(
9890 "CONCAT".to_string(),
9891 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
9892 )));
9893 Ok(Expression::Function(Box::new(Function::new(
9894 "SPLIT".to_string(),
9895 vec![x, escaped_sep],
9896 ))))
9897 }
9898 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
9899 // For ClickHouse target, preserve original name to maintain camelCase
9900 "SUBSTRINGINDEX" => {
9901 let name = if matches!(target, DialectType::ClickHouse) {
9902 f.name.clone()
9903 } else {
9904 "SUBSTRING_INDEX".to_string()
9905 };
9906 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
9907 }
9908 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
9909 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
9910 // Get the array argument (first arg, drop dimension args)
9911 let mut args = f.args;
9912 let arr = if args.is_empty() {
9913 return Ok(Expression::Function(Box::new(Function::new(
9914 name.to_string(),
9915 args,
9916 ))));
9917 } else {
9918 args.remove(0)
9919 };
9920 let name =
9921 match target {
9922 DialectType::Spark
9923 | DialectType::Databricks
9924 | DialectType::Hive => "SIZE",
9925 DialectType::Presto | DialectType::Trino => "CARDINALITY",
9926 DialectType::BigQuery => "ARRAY_LENGTH",
9927 DialectType::DuckDB => {
9928 // DuckDB: use ARRAY_LENGTH with all args
9929 let mut all_args = vec![arr];
9930 all_args.extend(args);
9931 return Ok(Expression::Function(Box::new(
9932 Function::new("ARRAY_LENGTH".to_string(), all_args),
9933 )));
9934 }
9935 DialectType::PostgreSQL | DialectType::Redshift => {
9936 // Keep ARRAY_LENGTH with dimension arg
9937 let mut all_args = vec![arr];
9938 all_args.extend(args);
9939 return Ok(Expression::Function(Box::new(
9940 Function::new("ARRAY_LENGTH".to_string(), all_args),
9941 )));
9942 }
9943 DialectType::ClickHouse => "LENGTH",
9944 _ => "ARRAY_LENGTH",
9945 };
9946 Ok(Expression::Function(Box::new(Function::new(
9947 name.to_string(),
9948 vec![arr],
9949 ))))
9950 }
9951 // UNICODE(x) -> target-specific codepoint function
9952 "UNICODE" if f.args.len() == 1 => {
9953 match target {
9954 DialectType::SQLite | DialectType::DuckDB => {
9955 Ok(Expression::Function(Box::new(Function::new(
9956 "UNICODE".to_string(),
9957 f.args,
9958 ))))
9959 }
9960 DialectType::Oracle => {
9961 // ASCII(UNISTR(x))
9962 let inner = Expression::Function(Box::new(Function::new(
9963 "UNISTR".to_string(),
9964 f.args,
9965 )));
9966 Ok(Expression::Function(Box::new(Function::new(
9967 "ASCII".to_string(),
9968 vec![inner],
9969 ))))
9970 }
9971 DialectType::MySQL => {
9972 // ORD(CONVERT(x USING utf32))
9973 let arg = f.args.into_iter().next().unwrap();
9974 let convert_expr = Expression::ConvertToCharset(Box::new(
9975 crate::expressions::ConvertToCharset {
9976 this: Box::new(arg),
9977 dest: Some(Box::new(Expression::Identifier(
9978 crate::expressions::Identifier::new("utf32"),
9979 ))),
9980 source: None,
9981 },
9982 ));
9983 Ok(Expression::Function(Box::new(Function::new(
9984 "ORD".to_string(),
9985 vec![convert_expr],
9986 ))))
9987 }
9988 _ => Ok(Expression::Function(Box::new(Function::new(
9989 "ASCII".to_string(),
9990 f.args,
9991 )))),
9992 }
9993 }
9994 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
9995 "XOR" if f.args.len() >= 2 => {
9996 match target {
9997 DialectType::ClickHouse => {
9998 // ClickHouse: keep as xor() function with lowercase name
9999 Ok(Expression::Function(Box::new(Function::new(
10000 "xor".to_string(),
10001 f.args,
10002 ))))
10003 }
10004 DialectType::Presto | DialectType::Trino => {
10005 if f.args.len() == 2 {
10006 Ok(Expression::Function(Box::new(Function::new(
10007 "BITWISE_XOR".to_string(),
10008 f.args,
10009 ))))
10010 } else {
10011 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10012 let mut args = f.args;
10013 let first = args.remove(0);
10014 let second = args.remove(0);
10015 let mut result =
10016 Expression::Function(Box::new(Function::new(
10017 "BITWISE_XOR".to_string(),
10018 vec![first, second],
10019 )));
10020 for arg in args {
10021 result =
10022 Expression::Function(Box::new(Function::new(
10023 "BITWISE_XOR".to_string(),
10024 vec![result, arg],
10025 )));
10026 }
10027 Ok(result)
10028 }
10029 }
10030 DialectType::MySQL
10031 | DialectType::SingleStore
10032 | DialectType::Doris
10033 | DialectType::StarRocks => {
10034 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10035 let args = f.args;
10036 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10037 this: None,
10038 expression: None,
10039 expressions: args,
10040 })))
10041 }
10042 DialectType::PostgreSQL | DialectType::Redshift => {
10043 // PostgreSQL: a # b (hash operator for XOR)
10044 let mut args = f.args;
10045 let first = args.remove(0);
10046 let second = args.remove(0);
10047 let mut result = Expression::BitwiseXor(Box::new(
10048 BinaryOp::new(first, second),
10049 ));
10050 for arg in args {
10051 result = Expression::BitwiseXor(Box::new(
10052 BinaryOp::new(result, arg),
10053 ));
10054 }
10055 Ok(result)
10056 }
10057 DialectType::DuckDB => {
10058 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10059 Ok(Expression::Function(Box::new(Function::new(
10060 "XOR".to_string(),
10061 f.args,
10062 ))))
10063 }
10064 DialectType::BigQuery => {
10065 // BigQuery: a ^ b (caret operator for XOR)
10066 let mut args = f.args;
10067 let first = args.remove(0);
10068 let second = args.remove(0);
10069 let mut result = Expression::BitwiseXor(Box::new(
10070 BinaryOp::new(first, second),
10071 ));
10072 for arg in args {
10073 result = Expression::BitwiseXor(Box::new(
10074 BinaryOp::new(result, arg),
10075 ));
10076 }
10077 Ok(result)
10078 }
10079 _ => Ok(Expression::Function(Box::new(Function::new(
10080 "XOR".to_string(),
10081 f.args,
10082 )))),
10083 }
10084 }
10085 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10086 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10087 match target {
10088 DialectType::Spark
10089 | DialectType::Databricks
10090 | DialectType::Hive => {
10091 let mut args = f.args;
10092 args.push(Expression::Identifier(
10093 crate::expressions::Identifier::new("FALSE"),
10094 ));
10095 Ok(Expression::Function(Box::new(Function::new(
10096 "SORT_ARRAY".to_string(),
10097 args,
10098 ))))
10099 }
10100 DialectType::Presto
10101 | DialectType::Trino
10102 | DialectType::Athena => {
10103 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10104 let arr = f.args.into_iter().next().unwrap();
10105 let lambda = Expression::Lambda(Box::new(
10106 crate::expressions::LambdaExpr {
10107 parameters: vec![
10108 Identifier::new("a"),
10109 Identifier::new("b"),
10110 ],
10111 colon: false,
10112 parameter_types: Vec::new(),
10113 body: Expression::Case(Box::new(Case {
10114 operand: None,
10115 whens: vec![
10116 (
10117 Expression::Lt(Box::new(
10118 BinaryOp::new(
10119 Expression::Identifier(
10120 Identifier::new("a"),
10121 ),
10122 Expression::Identifier(
10123 Identifier::new("b"),
10124 ),
10125 ),
10126 )),
10127 Expression::number(1),
10128 ),
10129 (
10130 Expression::Gt(Box::new(
10131 BinaryOp::new(
10132 Expression::Identifier(
10133 Identifier::new("a"),
10134 ),
10135 Expression::Identifier(
10136 Identifier::new("b"),
10137 ),
10138 ),
10139 )),
10140 Expression::Neg(Box::new(
10141 crate::expressions::UnaryOp {
10142 this: Expression::number(1),
10143 inferred_type: None,
10144 },
10145 )),
10146 ),
10147 ],
10148 else_: Some(Expression::number(0)),
10149 comments: Vec::new(),
10150 inferred_type: None,
10151 })),
10152 },
10153 ));
10154 Ok(Expression::Function(Box::new(Function::new(
10155 "ARRAY_SORT".to_string(),
10156 vec![arr, lambda],
10157 ))))
10158 }
10159 _ => Ok(Expression::Function(Box::new(Function::new(
10160 "ARRAY_REVERSE_SORT".to_string(),
10161 f.args,
10162 )))),
10163 }
10164 }
10165 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10166 "ENCODE" if f.args.len() == 1 => match target {
10167 DialectType::Spark
10168 | DialectType::Databricks
10169 | DialectType::Hive => {
10170 let mut args = f.args;
10171 args.push(Expression::string("utf-8"));
10172 Ok(Expression::Function(Box::new(Function::new(
10173 "ENCODE".to_string(),
10174 args,
10175 ))))
10176 }
10177 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10178 Ok(Expression::Function(Box::new(Function::new(
10179 "TO_UTF8".to_string(),
10180 f.args,
10181 ))))
10182 }
10183 _ => Ok(Expression::Function(Box::new(Function::new(
10184 "ENCODE".to_string(),
10185 f.args,
10186 )))),
10187 },
10188 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10189 "DECODE" if f.args.len() == 1 => match target {
10190 DialectType::Spark
10191 | DialectType::Databricks
10192 | DialectType::Hive => {
10193 let mut args = f.args;
10194 args.push(Expression::string("utf-8"));
10195 Ok(Expression::Function(Box::new(Function::new(
10196 "DECODE".to_string(),
10197 args,
10198 ))))
10199 }
10200 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10201 Ok(Expression::Function(Box::new(Function::new(
10202 "FROM_UTF8".to_string(),
10203 f.args,
10204 ))))
10205 }
10206 _ => Ok(Expression::Function(Box::new(Function::new(
10207 "DECODE".to_string(),
10208 f.args,
10209 )))),
10210 },
10211 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10212 "QUANTILE" if f.args.len() == 2 => {
10213 let name = match target {
10214 DialectType::Spark
10215 | DialectType::Databricks
10216 | DialectType::Hive => "PERCENTILE",
10217 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10218 DialectType::BigQuery => "PERCENTILE_CONT",
10219 _ => "QUANTILE",
10220 };
10221 Ok(Expression::Function(Box::new(Function::new(
10222 name.to_string(),
10223 f.args,
10224 ))))
10225 }
10226 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10227 "QUANTILE_CONT" if f.args.len() == 2 => {
10228 let mut args = f.args;
10229 let column = args.remove(0);
10230 let quantile = args.remove(0);
10231 match target {
10232 DialectType::DuckDB => {
10233 Ok(Expression::Function(Box::new(Function::new(
10234 "QUANTILE_CONT".to_string(),
10235 vec![column, quantile],
10236 ))))
10237 }
10238 DialectType::PostgreSQL
10239 | DialectType::Redshift
10240 | DialectType::Snowflake => {
10241 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10242 let inner = Expression::PercentileCont(Box::new(
10243 crate::expressions::PercentileFunc {
10244 this: column.clone(),
10245 percentile: quantile,
10246 order_by: None,
10247 filter: None,
10248 },
10249 ));
10250 Ok(Expression::WithinGroup(Box::new(
10251 crate::expressions::WithinGroup {
10252 this: inner,
10253 order_by: vec![crate::expressions::Ordered {
10254 this: column,
10255 desc: false,
10256 nulls_first: None,
10257 explicit_asc: false,
10258 with_fill: None,
10259 }],
10260 },
10261 )))
10262 }
10263 _ => Ok(Expression::Function(Box::new(Function::new(
10264 "QUANTILE_CONT".to_string(),
10265 vec![column, quantile],
10266 )))),
10267 }
10268 }
10269 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10270 "QUANTILE_DISC" if f.args.len() == 2 => {
10271 let mut args = f.args;
10272 let column = args.remove(0);
10273 let quantile = args.remove(0);
10274 match target {
10275 DialectType::DuckDB => {
10276 Ok(Expression::Function(Box::new(Function::new(
10277 "QUANTILE_DISC".to_string(),
10278 vec![column, quantile],
10279 ))))
10280 }
10281 DialectType::PostgreSQL
10282 | DialectType::Redshift
10283 | DialectType::Snowflake => {
10284 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10285 let inner = Expression::PercentileDisc(Box::new(
10286 crate::expressions::PercentileFunc {
10287 this: column.clone(),
10288 percentile: quantile,
10289 order_by: None,
10290 filter: None,
10291 },
10292 ));
10293 Ok(Expression::WithinGroup(Box::new(
10294 crate::expressions::WithinGroup {
10295 this: inner,
10296 order_by: vec![crate::expressions::Ordered {
10297 this: column,
10298 desc: false,
10299 nulls_first: None,
10300 explicit_asc: false,
10301 with_fill: None,
10302 }],
10303 },
10304 )))
10305 }
10306 _ => Ok(Expression::Function(Box::new(Function::new(
10307 "QUANTILE_DISC".to_string(),
10308 vec![column, quantile],
10309 )))),
10310 }
10311 }
10312 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
10313 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
10314 let name = match target {
10315 DialectType::Presto
10316 | DialectType::Trino
10317 | DialectType::Athena => "APPROX_PERCENTILE",
10318 DialectType::Spark
10319 | DialectType::Databricks
10320 | DialectType::Hive => "PERCENTILE_APPROX",
10321 DialectType::DuckDB => "APPROX_QUANTILE",
10322 DialectType::PostgreSQL | DialectType::Redshift => {
10323 "PERCENTILE_CONT"
10324 }
10325 _ => &f.name,
10326 };
10327 Ok(Expression::Function(Box::new(Function::new(
10328 name.to_string(),
10329 f.args,
10330 ))))
10331 }
10332 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10333 "EPOCH" if f.args.len() == 1 => {
10334 let name = match target {
10335 DialectType::Spark
10336 | DialectType::Databricks
10337 | DialectType::Hive => "UNIX_TIMESTAMP",
10338 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
10339 _ => "EPOCH",
10340 };
10341 Ok(Expression::Function(Box::new(Function::new(
10342 name.to_string(),
10343 f.args,
10344 ))))
10345 }
10346 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
10347 "EPOCH_MS" if f.args.len() == 1 => {
10348 match target {
10349 DialectType::Spark | DialectType::Databricks => {
10350 Ok(Expression::Function(Box::new(Function::new(
10351 "TIMESTAMP_MILLIS".to_string(),
10352 f.args,
10353 ))))
10354 }
10355 DialectType::Hive => {
10356 // Hive: FROM_UNIXTIME(x / 1000)
10357 let arg = f.args.into_iter().next().unwrap();
10358 let div_expr = Expression::Div(Box::new(
10359 crate::expressions::BinaryOp::new(
10360 arg,
10361 Expression::number(1000),
10362 ),
10363 ));
10364 Ok(Expression::Function(Box::new(Function::new(
10365 "FROM_UNIXTIME".to_string(),
10366 vec![div_expr],
10367 ))))
10368 }
10369 DialectType::Presto | DialectType::Trino => {
10370 Ok(Expression::Function(Box::new(Function::new(
10371 "FROM_UNIXTIME".to_string(),
10372 vec![Expression::Div(Box::new(
10373 crate::expressions::BinaryOp::new(
10374 f.args.into_iter().next().unwrap(),
10375 Expression::number(1000),
10376 ),
10377 ))],
10378 ))))
10379 }
10380 _ => Ok(Expression::Function(Box::new(Function::new(
10381 "EPOCH_MS".to_string(),
10382 f.args,
10383 )))),
10384 }
10385 }
10386 // HASHBYTES('algorithm', x) -> target-specific hash function
10387 "HASHBYTES" if f.args.len() == 2 => {
10388 // Keep HASHBYTES as-is for TSQL target
10389 if matches!(target, DialectType::TSQL) {
10390 return Ok(Expression::Function(f));
10391 }
10392 let algo_expr = &f.args[0];
10393 let algo = match algo_expr {
10394 Expression::Literal(crate::expressions::Literal::String(s)) => {
10395 s.to_uppercase()
10396 }
10397 _ => return Ok(Expression::Function(f)),
10398 };
10399 let data_arg = f.args.into_iter().nth(1).unwrap();
10400 match algo.as_str() {
10401 "SHA1" => {
10402 let name = match target {
10403 DialectType::Spark | DialectType::Databricks => "SHA",
10404 DialectType::Hive => "SHA1",
10405 _ => "SHA1",
10406 };
10407 Ok(Expression::Function(Box::new(Function::new(
10408 name.to_string(),
10409 vec![data_arg],
10410 ))))
10411 }
10412 "SHA2_256" => {
10413 Ok(Expression::Function(Box::new(Function::new(
10414 "SHA2".to_string(),
10415 vec![data_arg, Expression::number(256)],
10416 ))))
10417 }
10418 "SHA2_512" => {
10419 Ok(Expression::Function(Box::new(Function::new(
10420 "SHA2".to_string(),
10421 vec![data_arg, Expression::number(512)],
10422 ))))
10423 }
10424 "MD5" => Ok(Expression::Function(Box::new(Function::new(
10425 "MD5".to_string(),
10426 vec![data_arg],
10427 )))),
10428 _ => Ok(Expression::Function(Box::new(Function::new(
10429 "HASHBYTES".to_string(),
10430 vec![Expression::string(&algo), data_arg],
10431 )))),
10432 }
10433 }
10434 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
10435 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
10436 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
10437 let mut args = f.args;
10438 let json_expr = args.remove(0);
10439 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
10440 let mut json_path = "$".to_string();
10441 for a in &args {
10442 match a {
10443 Expression::Literal(
10444 crate::expressions::Literal::String(s),
10445 ) => {
10446 // Numeric string keys become array indices: [0]
10447 if s.chars().all(|c| c.is_ascii_digit()) {
10448 json_path.push('[');
10449 json_path.push_str(s);
10450 json_path.push(']');
10451 } else {
10452 json_path.push('.');
10453 json_path.push_str(s);
10454 }
10455 }
10456 _ => {
10457 json_path.push_str(".?");
10458 }
10459 }
10460 }
10461 match target {
10462 DialectType::Spark
10463 | DialectType::Databricks
10464 | DialectType::Hive => {
10465 Ok(Expression::Function(Box::new(Function::new(
10466 "GET_JSON_OBJECT".to_string(),
10467 vec![json_expr, Expression::string(&json_path)],
10468 ))))
10469 }
10470 DialectType::Presto | DialectType::Trino => {
10471 let func_name = if is_text {
10472 "JSON_EXTRACT_SCALAR"
10473 } else {
10474 "JSON_EXTRACT"
10475 };
10476 Ok(Expression::Function(Box::new(Function::new(
10477 func_name.to_string(),
10478 vec![json_expr, Expression::string(&json_path)],
10479 ))))
10480 }
10481 DialectType::BigQuery | DialectType::MySQL => {
10482 let func_name = if is_text {
10483 "JSON_EXTRACT_SCALAR"
10484 } else {
10485 "JSON_EXTRACT"
10486 };
10487 Ok(Expression::Function(Box::new(Function::new(
10488 func_name.to_string(),
10489 vec![json_expr, Expression::string(&json_path)],
10490 ))))
10491 }
10492 DialectType::PostgreSQL | DialectType::Materialize => {
10493 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
10494 let func_name = if is_text {
10495 "JSON_EXTRACT_PATH_TEXT"
10496 } else {
10497 "JSON_EXTRACT_PATH"
10498 };
10499 let mut new_args = vec![json_expr];
10500 new_args.extend(args);
10501 Ok(Expression::Function(Box::new(Function::new(
10502 func_name.to_string(),
10503 new_args,
10504 ))))
10505 }
10506 DialectType::DuckDB | DialectType::SQLite => {
10507 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
10508 if is_text {
10509 Ok(Expression::JsonExtractScalar(Box::new(
10510 crate::expressions::JsonExtractFunc {
10511 this: json_expr,
10512 path: Expression::string(&json_path),
10513 returning: None,
10514 arrow_syntax: true,
10515 hash_arrow_syntax: false,
10516 wrapper_option: None,
10517 quotes_option: None,
10518 on_scalar_string: false,
10519 on_error: None,
10520 },
10521 )))
10522 } else {
10523 Ok(Expression::JsonExtract(Box::new(
10524 crate::expressions::JsonExtractFunc {
10525 this: json_expr,
10526 path: Expression::string(&json_path),
10527 returning: None,
10528 arrow_syntax: true,
10529 hash_arrow_syntax: false,
10530 wrapper_option: None,
10531 quotes_option: None,
10532 on_scalar_string: false,
10533 on_error: None,
10534 },
10535 )))
10536 }
10537 }
10538 DialectType::Redshift => {
10539 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
10540 let mut new_args = vec![json_expr];
10541 new_args.extend(args);
10542 Ok(Expression::Function(Box::new(Function::new(
10543 "JSON_EXTRACT_PATH_TEXT".to_string(),
10544 new_args,
10545 ))))
10546 }
10547 DialectType::TSQL => {
10548 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
10549 let jq = Expression::Function(Box::new(Function::new(
10550 "JSON_QUERY".to_string(),
10551 vec![json_expr.clone(), Expression::string(&json_path)],
10552 )));
10553 let jv = Expression::Function(Box::new(Function::new(
10554 "JSON_VALUE".to_string(),
10555 vec![json_expr, Expression::string(&json_path)],
10556 )));
10557 Ok(Expression::Function(Box::new(Function::new(
10558 "ISNULL".to_string(),
10559 vec![jq, jv],
10560 ))))
10561 }
10562 DialectType::ClickHouse => {
10563 let func_name = if is_text {
10564 "JSONExtractString"
10565 } else {
10566 "JSONExtractRaw"
10567 };
10568 let mut new_args = vec![json_expr];
10569 new_args.extend(args);
10570 Ok(Expression::Function(Box::new(Function::new(
10571 func_name.to_string(),
10572 new_args,
10573 ))))
10574 }
10575 _ => {
10576 let func_name = if is_text {
10577 "JSON_EXTRACT_SCALAR"
10578 } else {
10579 "JSON_EXTRACT"
10580 };
10581 Ok(Expression::Function(Box::new(Function::new(
10582 func_name.to_string(),
10583 vec![json_expr, Expression::string(&json_path)],
10584 ))))
10585 }
10586 }
10587 }
10588 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
10589 "APPROX_DISTINCT" if f.args.len() >= 1 => {
10590 let name = match target {
10591 DialectType::Spark
10592 | DialectType::Databricks
10593 | DialectType::Hive
10594 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
10595 _ => "APPROX_DISTINCT",
10596 };
10597 let mut args = f.args;
10598 // Hive doesn't support the accuracy parameter
10599 if name == "APPROX_COUNT_DISTINCT"
10600 && matches!(target, DialectType::Hive)
10601 {
10602 args.truncate(1);
10603 }
10604 Ok(Expression::Function(Box::new(Function::new(
10605 name.to_string(),
10606 args,
10607 ))))
10608 }
10609 // REGEXP_EXTRACT(x, pattern) - normalize default group index
10610 "REGEXP_EXTRACT" if f.args.len() == 2 => {
10611 // Determine source default group index
10612 let source_default = match source {
10613 DialectType::Presto
10614 | DialectType::Trino
10615 | DialectType::DuckDB => 0,
10616 _ => 1, // Hive/Spark/Databricks default = 1
10617 };
10618 // Determine target default group index
10619 let target_default = match target {
10620 DialectType::Presto
10621 | DialectType::Trino
10622 | DialectType::DuckDB
10623 | DialectType::BigQuery => 0,
10624 DialectType::Snowflake => {
10625 // Snowflake uses REGEXP_SUBSTR
10626 return Ok(Expression::Function(Box::new(Function::new(
10627 "REGEXP_SUBSTR".to_string(),
10628 f.args,
10629 ))));
10630 }
10631 _ => 1, // Hive/Spark/Databricks default = 1
10632 };
10633 if source_default != target_default {
10634 let mut args = f.args;
10635 args.push(Expression::number(source_default));
10636 Ok(Expression::Function(Box::new(Function::new(
10637 "REGEXP_EXTRACT".to_string(),
10638 args,
10639 ))))
10640 } else {
10641 Ok(Expression::Function(Box::new(Function::new(
10642 "REGEXP_EXTRACT".to_string(),
10643 f.args,
10644 ))))
10645 }
10646 }
10647 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
10648 "RLIKE" if f.args.len() == 2 => {
10649 let mut args = f.args;
10650 let str_expr = args.remove(0);
10651 let pattern = args.remove(0);
10652 match target {
10653 DialectType::DuckDB => {
10654 // REGEXP_MATCHES(str, pattern)
10655 Ok(Expression::Function(Box::new(Function::new(
10656 "REGEXP_MATCHES".to_string(),
10657 vec![str_expr, pattern],
10658 ))))
10659 }
10660 _ => {
10661 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
10662 Ok(Expression::RegexpLike(Box::new(
10663 crate::expressions::RegexpFunc {
10664 this: str_expr,
10665 pattern,
10666 flags: None,
10667 },
10668 )))
10669 }
10670 }
10671 }
10672 // EOMONTH(date[, month_offset]) -> target-specific
10673 "EOMONTH" if f.args.len() >= 1 => {
10674 let mut args = f.args;
10675 let date_arg = args.remove(0);
10676 let month_offset = if !args.is_empty() {
10677 Some(args.remove(0))
10678 } else {
10679 None
10680 };
10681
10682 // Helper: wrap date in CAST to DATE
10683 let cast_to_date = |e: Expression| -> Expression {
10684 Expression::Cast(Box::new(Cast {
10685 this: e,
10686 to: DataType::Date,
10687 trailing_comments: vec![],
10688 double_colon_syntax: false,
10689 format: None,
10690 default: None,
10691 inferred_type: None,
10692 }))
10693 };
10694
10695 match target {
10696 DialectType::TSQL | DialectType::Fabric => {
10697 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
10698 let date = cast_to_date(date_arg);
10699 let date = if let Some(offset) = month_offset {
10700 Expression::Function(Box::new(Function::new(
10701 "DATEADD".to_string(),
10702 vec![
10703 Expression::Identifier(Identifier::new(
10704 "MONTH",
10705 )),
10706 offset,
10707 date,
10708 ],
10709 )))
10710 } else {
10711 date
10712 };
10713 Ok(Expression::Function(Box::new(Function::new(
10714 "EOMONTH".to_string(),
10715 vec![date],
10716 ))))
10717 }
10718 DialectType::Presto
10719 | DialectType::Trino
10720 | DialectType::Athena => {
10721 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
10722 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
10723 let cast_ts = Expression::Cast(Box::new(Cast {
10724 this: date_arg,
10725 to: DataType::Timestamp {
10726 timezone: false,
10727 precision: None,
10728 },
10729 trailing_comments: vec![],
10730 double_colon_syntax: false,
10731 format: None,
10732 default: None,
10733 inferred_type: None,
10734 }));
10735 let date = cast_to_date(cast_ts);
10736 let date = if let Some(offset) = month_offset {
10737 Expression::Function(Box::new(Function::new(
10738 "DATE_ADD".to_string(),
10739 vec![Expression::string("MONTH"), offset, date],
10740 )))
10741 } else {
10742 date
10743 };
10744 Ok(Expression::Function(Box::new(Function::new(
10745 "LAST_DAY_OF_MONTH".to_string(),
10746 vec![date],
10747 ))))
10748 }
10749 DialectType::PostgreSQL => {
10750 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
10751 let date = cast_to_date(date_arg);
10752 let date = if let Some(offset) = month_offset {
10753 let interval_str = format!(
10754 "{} MONTH",
10755 Self::expr_to_string_static(&offset)
10756 );
10757 Expression::Add(Box::new(
10758 crate::expressions::BinaryOp::new(
10759 date,
10760 Expression::Interval(Box::new(
10761 crate::expressions::Interval {
10762 this: Some(Expression::string(
10763 &interval_str,
10764 )),
10765 unit: None,
10766 },
10767 )),
10768 ),
10769 ))
10770 } else {
10771 date
10772 };
10773 let truncated =
10774 Expression::Function(Box::new(Function::new(
10775 "DATE_TRUNC".to_string(),
10776 vec![Expression::string("MONTH"), date],
10777 )));
10778 let plus_month = Expression::Add(Box::new(
10779 crate::expressions::BinaryOp::new(
10780 truncated,
10781 Expression::Interval(Box::new(
10782 crate::expressions::Interval {
10783 this: Some(Expression::string("1 MONTH")),
10784 unit: None,
10785 },
10786 )),
10787 ),
10788 ));
10789 let minus_day = Expression::Sub(Box::new(
10790 crate::expressions::BinaryOp::new(
10791 plus_month,
10792 Expression::Interval(Box::new(
10793 crate::expressions::Interval {
10794 this: Some(Expression::string("1 DAY")),
10795 unit: None,
10796 },
10797 )),
10798 ),
10799 ));
10800 Ok(Expression::Cast(Box::new(Cast {
10801 this: minus_day,
10802 to: DataType::Date,
10803 trailing_comments: vec![],
10804 double_colon_syntax: false,
10805 format: None,
10806 default: None,
10807 inferred_type: None,
10808 })))
10809 }
10810 DialectType::DuckDB => {
10811 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
10812 let date = cast_to_date(date_arg);
10813 let date = if let Some(offset) = month_offset {
10814 // Wrap negative numbers in parentheses for DuckDB INTERVAL
10815 let interval_val =
10816 if matches!(&offset, Expression::Neg(_)) {
10817 Expression::Paren(Box::new(
10818 crate::expressions::Paren {
10819 this: offset,
10820 trailing_comments: Vec::new(),
10821 },
10822 ))
10823 } else {
10824 offset
10825 };
10826 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
10827 date,
10828 Expression::Interval(Box::new(crate::expressions::Interval {
10829 this: Some(interval_val),
10830 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
10831 unit: crate::expressions::IntervalUnit::Month,
10832 use_plural: false,
10833 }),
10834 })),
10835 )))
10836 } else {
10837 date
10838 };
10839 Ok(Expression::Function(Box::new(Function::new(
10840 "LAST_DAY".to_string(),
10841 vec![date],
10842 ))))
10843 }
10844 DialectType::Snowflake | DialectType::Redshift => {
10845 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
10846 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
10847 let date = if matches!(target, DialectType::Snowflake) {
10848 Expression::Function(Box::new(Function::new(
10849 "TO_DATE".to_string(),
10850 vec![date_arg],
10851 )))
10852 } else {
10853 cast_to_date(date_arg)
10854 };
10855 let date = if let Some(offset) = month_offset {
10856 Expression::Function(Box::new(Function::new(
10857 "DATEADD".to_string(),
10858 vec![
10859 Expression::Identifier(Identifier::new(
10860 "MONTH",
10861 )),
10862 offset,
10863 date,
10864 ],
10865 )))
10866 } else {
10867 date
10868 };
10869 Ok(Expression::Function(Box::new(Function::new(
10870 "LAST_DAY".to_string(),
10871 vec![date],
10872 ))))
10873 }
10874 DialectType::Spark | DialectType::Databricks => {
10875 // Spark: LAST_DAY(TO_DATE(date))
10876 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
10877 let date = Expression::Function(Box::new(Function::new(
10878 "TO_DATE".to_string(),
10879 vec![date_arg],
10880 )));
10881 let date = if let Some(offset) = month_offset {
10882 Expression::Function(Box::new(Function::new(
10883 "ADD_MONTHS".to_string(),
10884 vec![date, offset],
10885 )))
10886 } else {
10887 date
10888 };
10889 Ok(Expression::Function(Box::new(Function::new(
10890 "LAST_DAY".to_string(),
10891 vec![date],
10892 ))))
10893 }
10894 DialectType::MySQL => {
10895 // MySQL: LAST_DAY(DATE(date)) - no offset
10896 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
10897 let date = if let Some(offset) = month_offset {
10898 let iu = crate::expressions::IntervalUnit::Month;
10899 Expression::DateAdd(Box::new(
10900 crate::expressions::DateAddFunc {
10901 this: date_arg,
10902 interval: offset,
10903 unit: iu,
10904 },
10905 ))
10906 } else {
10907 Expression::Function(Box::new(Function::new(
10908 "DATE".to_string(),
10909 vec![date_arg],
10910 )))
10911 };
10912 Ok(Expression::Function(Box::new(Function::new(
10913 "LAST_DAY".to_string(),
10914 vec![date],
10915 ))))
10916 }
10917 DialectType::BigQuery => {
10918 // BigQuery: LAST_DAY(CAST(date AS DATE))
10919 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
10920 let date = cast_to_date(date_arg);
10921 let date = if let Some(offset) = month_offset {
10922 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
10923 this: Some(offset),
10924 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
10925 unit: crate::expressions::IntervalUnit::Month,
10926 use_plural: false,
10927 }),
10928 }));
10929 Expression::Function(Box::new(Function::new(
10930 "DATE_ADD".to_string(),
10931 vec![date, interval],
10932 )))
10933 } else {
10934 date
10935 };
10936 Ok(Expression::Function(Box::new(Function::new(
10937 "LAST_DAY".to_string(),
10938 vec![date],
10939 ))))
10940 }
10941 DialectType::ClickHouse => {
10942 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
10943 let date = Expression::Cast(Box::new(Cast {
10944 this: date_arg,
10945 to: DataType::Nullable {
10946 inner: Box::new(DataType::Date),
10947 },
10948 trailing_comments: vec![],
10949 double_colon_syntax: false,
10950 format: None,
10951 default: None,
10952 inferred_type: None,
10953 }));
10954 let date = if let Some(offset) = month_offset {
10955 Expression::Function(Box::new(Function::new(
10956 "DATE_ADD".to_string(),
10957 vec![
10958 Expression::Identifier(Identifier::new(
10959 "MONTH",
10960 )),
10961 offset,
10962 date,
10963 ],
10964 )))
10965 } else {
10966 date
10967 };
10968 Ok(Expression::Function(Box::new(Function::new(
10969 "LAST_DAY".to_string(),
10970 vec![date],
10971 ))))
10972 }
10973 DialectType::Hive => {
10974 // Hive: LAST_DAY(date)
10975 let date = if let Some(offset) = month_offset {
10976 Expression::Function(Box::new(Function::new(
10977 "ADD_MONTHS".to_string(),
10978 vec![date_arg, offset],
10979 )))
10980 } else {
10981 date_arg
10982 };
10983 Ok(Expression::Function(Box::new(Function::new(
10984 "LAST_DAY".to_string(),
10985 vec![date],
10986 ))))
10987 }
10988 _ => {
10989 // Default: LAST_DAY(date)
10990 let date = if let Some(offset) = month_offset {
10991 let unit =
10992 Expression::Identifier(Identifier::new("MONTH"));
10993 Expression::Function(Box::new(Function::new(
10994 "DATEADD".to_string(),
10995 vec![unit, offset, date_arg],
10996 )))
10997 } else {
10998 date_arg
10999 };
11000 Ok(Expression::Function(Box::new(Function::new(
11001 "LAST_DAY".to_string(),
11002 vec![date],
11003 ))))
11004 }
11005 }
11006 }
11007 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11008 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11009 if !matches!(source, DialectType::BigQuery)
11010 && f.args.len() >= 1 =>
11011 {
11012 let first_arg = f.args.into_iter().next().unwrap();
11013 match target {
11014 DialectType::TSQL | DialectType::Fabric => {
11015 Ok(Expression::Function(Box::new(Function::new(
11016 "EOMONTH".to_string(),
11017 vec![first_arg],
11018 ))))
11019 }
11020 DialectType::Presto
11021 | DialectType::Trino
11022 | DialectType::Athena => {
11023 Ok(Expression::Function(Box::new(Function::new(
11024 "LAST_DAY_OF_MONTH".to_string(),
11025 vec![first_arg],
11026 ))))
11027 }
11028 _ => Ok(Expression::Function(Box::new(Function::new(
11029 "LAST_DAY".to_string(),
11030 vec![first_arg],
11031 )))),
11032 }
11033 }
11034 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11035 "MAP"
11036 if f.args.len() == 2
11037 && matches!(
11038 source,
11039 DialectType::Presto
11040 | DialectType::Trino
11041 | DialectType::Athena
11042 ) =>
11043 {
11044 let keys_arg = f.args[0].clone();
11045 let vals_arg = f.args[1].clone();
11046
11047 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11048 fn extract_array_elements(
11049 expr: &Expression,
11050 ) -> Option<&Vec<Expression>> {
11051 match expr {
11052 Expression::Array(arr) => Some(&arr.expressions),
11053 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11054 Expression::Function(f)
11055 if f.name.eq_ignore_ascii_case("ARRAY") =>
11056 {
11057 Some(&f.args)
11058 }
11059 _ => None,
11060 }
11061 }
11062
11063 match target {
11064 DialectType::Spark | DialectType::Databricks => {
11065 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11066 Ok(Expression::Function(Box::new(Function::new(
11067 "MAP_FROM_ARRAYS".to_string(),
11068 f.args,
11069 ))))
11070 }
11071 DialectType::Hive => {
11072 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11073 if let (Some(keys), Some(vals)) = (
11074 extract_array_elements(&keys_arg),
11075 extract_array_elements(&vals_arg),
11076 ) {
11077 if keys.len() == vals.len() {
11078 let mut interleaved = Vec::new();
11079 for (k, v) in keys.iter().zip(vals.iter()) {
11080 interleaved.push(k.clone());
11081 interleaved.push(v.clone());
11082 }
11083 Ok(Expression::Function(Box::new(Function::new(
11084 "MAP".to_string(),
11085 interleaved,
11086 ))))
11087 } else {
11088 Ok(Expression::Function(Box::new(Function::new(
11089 "MAP".to_string(),
11090 f.args,
11091 ))))
11092 }
11093 } else {
11094 Ok(Expression::Function(Box::new(Function::new(
11095 "MAP".to_string(),
11096 f.args,
11097 ))))
11098 }
11099 }
11100 DialectType::Snowflake => {
11101 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11102 if let (Some(keys), Some(vals)) = (
11103 extract_array_elements(&keys_arg),
11104 extract_array_elements(&vals_arg),
11105 ) {
11106 if keys.len() == vals.len() {
11107 let mut interleaved = Vec::new();
11108 for (k, v) in keys.iter().zip(vals.iter()) {
11109 interleaved.push(k.clone());
11110 interleaved.push(v.clone());
11111 }
11112 Ok(Expression::Function(Box::new(Function::new(
11113 "OBJECT_CONSTRUCT".to_string(),
11114 interleaved,
11115 ))))
11116 } else {
11117 Ok(Expression::Function(Box::new(Function::new(
11118 "MAP".to_string(),
11119 f.args,
11120 ))))
11121 }
11122 } else {
11123 Ok(Expression::Function(Box::new(Function::new(
11124 "MAP".to_string(),
11125 f.args,
11126 ))))
11127 }
11128 }
11129 _ => Ok(Expression::Function(f)),
11130 }
11131 }
11132 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11133 "MAP"
11134 if f.args.is_empty()
11135 && matches!(
11136 source,
11137 DialectType::Hive
11138 | DialectType::Spark
11139 | DialectType::Databricks
11140 )
11141 && matches!(
11142 target,
11143 DialectType::Presto
11144 | DialectType::Trino
11145 | DialectType::Athena
11146 ) =>
11147 {
11148 let empty_keys =
11149 Expression::Array(Box::new(crate::expressions::Array {
11150 expressions: vec![],
11151 }));
11152 let empty_vals =
11153 Expression::Array(Box::new(crate::expressions::Array {
11154 expressions: vec![],
11155 }));
11156 Ok(Expression::Function(Box::new(Function::new(
11157 "MAP".to_string(),
11158 vec![empty_keys, empty_vals],
11159 ))))
11160 }
11161 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11162 "MAP"
11163 if f.args.len() >= 2
11164 && f.args.len() % 2 == 0
11165 && matches!(
11166 source,
11167 DialectType::Hive
11168 | DialectType::Spark
11169 | DialectType::Databricks
11170 | DialectType::ClickHouse
11171 ) =>
11172 {
11173 let args = f.args;
11174 match target {
11175 DialectType::DuckDB => {
11176 // MAP([k1, k2], [v1, v2])
11177 let mut keys = Vec::new();
11178 let mut vals = Vec::new();
11179 for (i, arg) in args.into_iter().enumerate() {
11180 if i % 2 == 0 {
11181 keys.push(arg);
11182 } else {
11183 vals.push(arg);
11184 }
11185 }
11186 let keys_arr = Expression::Array(Box::new(
11187 crate::expressions::Array { expressions: keys },
11188 ));
11189 let vals_arr = Expression::Array(Box::new(
11190 crate::expressions::Array { expressions: vals },
11191 ));
11192 Ok(Expression::Function(Box::new(Function::new(
11193 "MAP".to_string(),
11194 vec![keys_arr, vals_arr],
11195 ))))
11196 }
11197 DialectType::Presto | DialectType::Trino => {
11198 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11199 let mut keys = Vec::new();
11200 let mut vals = Vec::new();
11201 for (i, arg) in args.into_iter().enumerate() {
11202 if i % 2 == 0 {
11203 keys.push(arg);
11204 } else {
11205 vals.push(arg);
11206 }
11207 }
11208 let keys_arr = Expression::Array(Box::new(
11209 crate::expressions::Array { expressions: keys },
11210 ));
11211 let vals_arr = Expression::Array(Box::new(
11212 crate::expressions::Array { expressions: vals },
11213 ));
11214 Ok(Expression::Function(Box::new(Function::new(
11215 "MAP".to_string(),
11216 vec![keys_arr, vals_arr],
11217 ))))
11218 }
11219 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11220 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11221 ))),
11222 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11223 Function::new("map".to_string(), args),
11224 ))),
11225 _ => Ok(Expression::Function(Box::new(Function::new(
11226 "MAP".to_string(),
11227 args,
11228 )))),
11229 }
11230 }
11231 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11232 "COLLECT_LIST" if f.args.len() >= 1 => {
11233 let name = match target {
11234 DialectType::Spark
11235 | DialectType::Databricks
11236 | DialectType::Hive => "COLLECT_LIST",
11237 DialectType::DuckDB
11238 | DialectType::PostgreSQL
11239 | DialectType::Redshift
11240 | DialectType::Snowflake
11241 | DialectType::BigQuery => "ARRAY_AGG",
11242 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11243 _ => "ARRAY_AGG",
11244 };
11245 Ok(Expression::Function(Box::new(Function::new(
11246 name.to_string(),
11247 f.args,
11248 ))))
11249 }
11250 // COLLECT_SET(x) -> target-specific distinct array aggregation
11251 "COLLECT_SET" if f.args.len() >= 1 => {
11252 let name = match target {
11253 DialectType::Spark
11254 | DialectType::Databricks
11255 | DialectType::Hive => "COLLECT_SET",
11256 DialectType::Presto
11257 | DialectType::Trino
11258 | DialectType::Athena => "SET_AGG",
11259 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11260 _ => "ARRAY_AGG",
11261 };
11262 Ok(Expression::Function(Box::new(Function::new(
11263 name.to_string(),
11264 f.args,
11265 ))))
11266 }
11267 // ISNAN(x) / IS_NAN(x) - normalize
11268 "ISNAN" | "IS_NAN" => {
11269 let name = match target {
11270 DialectType::Spark
11271 | DialectType::Databricks
11272 | DialectType::Hive => "ISNAN",
11273 DialectType::Presto
11274 | DialectType::Trino
11275 | DialectType::Athena => "IS_NAN",
11276 DialectType::BigQuery
11277 | DialectType::PostgreSQL
11278 | DialectType::Redshift => "IS_NAN",
11279 DialectType::ClickHouse => "IS_NAN",
11280 _ => "ISNAN",
11281 };
11282 Ok(Expression::Function(Box::new(Function::new(
11283 name.to_string(),
11284 f.args,
11285 ))))
11286 }
11287 // SPLIT_PART(str, delim, index) -> target-specific
11288 "SPLIT_PART" if f.args.len() == 3 => {
11289 match target {
11290 DialectType::Spark | DialectType::Databricks => {
11291 // Keep as SPLIT_PART (Spark 3.4+)
11292 Ok(Expression::Function(Box::new(Function::new(
11293 "SPLIT_PART".to_string(),
11294 f.args,
11295 ))))
11296 }
11297 DialectType::DuckDB
11298 | DialectType::PostgreSQL
11299 | DialectType::Snowflake
11300 | DialectType::Redshift
11301 | DialectType::Trino
11302 | DialectType::Presto => Ok(Expression::Function(Box::new(
11303 Function::new("SPLIT_PART".to_string(), f.args),
11304 ))),
11305 DialectType::Hive => {
11306 // SPLIT(str, delim)[index]
11307 // Complex conversion, just keep as-is for now
11308 Ok(Expression::Function(Box::new(Function::new(
11309 "SPLIT_PART".to_string(),
11310 f.args,
11311 ))))
11312 }
11313 _ => Ok(Expression::Function(Box::new(Function::new(
11314 "SPLIT_PART".to_string(),
11315 f.args,
11316 )))),
11317 }
11318 }
11319 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
11320 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
11321 let is_scalar = name == "JSON_EXTRACT_SCALAR";
11322 match target {
11323 DialectType::Spark
11324 | DialectType::Databricks
11325 | DialectType::Hive => {
11326 let mut args = f.args;
11327 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
11328 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
11329 if let Some(Expression::Function(inner)) = args.first() {
11330 if inner.name.eq_ignore_ascii_case("TRY")
11331 && inner.args.len() == 1
11332 {
11333 let mut inner_args = inner.args.clone();
11334 args[0] = inner_args.remove(0);
11335 }
11336 }
11337 Ok(Expression::Function(Box::new(Function::new(
11338 "GET_JSON_OBJECT".to_string(),
11339 args,
11340 ))))
11341 }
11342 DialectType::DuckDB | DialectType::SQLite => {
11343 // json -> path syntax
11344 let mut args = f.args;
11345 let json_expr = args.remove(0);
11346 let path = args.remove(0);
11347 Ok(Expression::JsonExtract(Box::new(
11348 crate::expressions::JsonExtractFunc {
11349 this: json_expr,
11350 path,
11351 returning: None,
11352 arrow_syntax: true,
11353 hash_arrow_syntax: false,
11354 wrapper_option: None,
11355 quotes_option: None,
11356 on_scalar_string: false,
11357 on_error: None,
11358 },
11359 )))
11360 }
11361 DialectType::TSQL => {
11362 let func_name = if is_scalar {
11363 "JSON_VALUE"
11364 } else {
11365 "JSON_QUERY"
11366 };
11367 Ok(Expression::Function(Box::new(Function::new(
11368 func_name.to_string(),
11369 f.args,
11370 ))))
11371 }
11372 DialectType::PostgreSQL | DialectType::Redshift => {
11373 let func_name = if is_scalar {
11374 "JSON_EXTRACT_PATH_TEXT"
11375 } else {
11376 "JSON_EXTRACT_PATH"
11377 };
11378 Ok(Expression::Function(Box::new(Function::new(
11379 func_name.to_string(),
11380 f.args,
11381 ))))
11382 }
11383 _ => Ok(Expression::Function(Box::new(Function::new(
11384 name.to_string(),
11385 f.args,
11386 )))),
11387 }
11388 }
11389 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
11390 "JSON_SEARCH"
11391 if matches!(target, DialectType::DuckDB)
11392 && (3..=5).contains(&f.args.len()) =>
11393 {
11394 let args = &f.args;
11395
11396 // Only rewrite deterministic modes and NULL/no escape-char variant.
11397 let mode = match &args[1] {
11398 Expression::Literal(crate::expressions::Literal::String(s)) => {
11399 s.to_ascii_lowercase()
11400 }
11401 _ => return Ok(Expression::Function(f)),
11402 };
11403 if mode != "one" && mode != "all" {
11404 return Ok(Expression::Function(f));
11405 }
11406 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
11407 return Ok(Expression::Function(f));
11408 }
11409
11410 let json_doc_sql = match Generator::sql(&args[0]) {
11411 Ok(sql) => sql,
11412 Err(_) => return Ok(Expression::Function(f)),
11413 };
11414 let search_sql = match Generator::sql(&args[2]) {
11415 Ok(sql) => sql,
11416 Err(_) => return Ok(Expression::Function(f)),
11417 };
11418 let path_sql = if args.len() == 5 {
11419 match Generator::sql(&args[4]) {
11420 Ok(sql) => sql,
11421 Err(_) => return Ok(Expression::Function(f)),
11422 }
11423 } else {
11424 "'$'".to_string()
11425 };
11426
11427 let rewrite_sql = if mode == "all" {
11428 format!(
11429 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
11430 json_doc_sql, path_sql, search_sql
11431 )
11432 } else {
11433 format!(
11434 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
11435 json_doc_sql, path_sql, search_sql
11436 )
11437 };
11438
11439 Ok(Expression::Raw(crate::expressions::Raw {
11440 sql: rewrite_sql,
11441 }))
11442 }
11443 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
11444 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
11445 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
11446 if f.args.len() >= 2
11447 && matches!(source, DialectType::SingleStore) =>
11448 {
11449 let is_bson = name == "BSON_EXTRACT_BSON";
11450 let mut args = f.args;
11451 let json_expr = args.remove(0);
11452
11453 // Build JSONPath from remaining arguments
11454 let mut path = String::from("$");
11455 for arg in &args {
11456 if let Expression::Literal(
11457 crate::expressions::Literal::String(s),
11458 ) = arg
11459 {
11460 // Check if it's a numeric string (array index)
11461 if s.parse::<i64>().is_ok() {
11462 path.push('[');
11463 path.push_str(s);
11464 path.push(']');
11465 } else {
11466 path.push('.');
11467 path.push_str(s);
11468 }
11469 }
11470 }
11471
11472 let target_func = if is_bson {
11473 "JSONB_EXTRACT"
11474 } else {
11475 "JSON_EXTRACT"
11476 };
11477 Ok(Expression::Function(Box::new(Function::new(
11478 target_func.to_string(),
11479 vec![json_expr, Expression::string(&path)],
11480 ))))
11481 }
11482 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
11483 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
11484 Ok(Expression::Function(Box::new(Function {
11485 name: "arraySum".to_string(),
11486 args: f.args,
11487 distinct: f.distinct,
11488 trailing_comments: f.trailing_comments,
11489 use_bracket_syntax: f.use_bracket_syntax,
11490 no_parens: f.no_parens,
11491 quoted: f.quoted,
11492 span: None,
11493 inferred_type: None,
11494 })))
11495 }
11496 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
11497 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
11498 // and is handled by JsonQueryValueConvert action. This handles the case where
11499 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
11500 "JSON_QUERY" | "JSON_VALUE"
11501 if f.args.len() == 2
11502 && matches!(
11503 source,
11504 DialectType::TSQL | DialectType::Fabric
11505 ) =>
11506 {
11507 match target {
11508 DialectType::Spark
11509 | DialectType::Databricks
11510 | DialectType::Hive => Ok(Expression::Function(Box::new(
11511 Function::new("GET_JSON_OBJECT".to_string(), f.args),
11512 ))),
11513 _ => Ok(Expression::Function(Box::new(Function::new(
11514 name.to_string(),
11515 f.args,
11516 )))),
11517 }
11518 }
11519 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
11520 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
11521 let arg = f.args.into_iter().next().unwrap();
11522 let is_hive_source = matches!(
11523 source,
11524 DialectType::Hive
11525 | DialectType::Spark
11526 | DialectType::Databricks
11527 );
11528 match target {
11529 DialectType::DuckDB if is_hive_source => {
11530 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
11531 let strptime =
11532 Expression::Function(Box::new(Function::new(
11533 "STRPTIME".to_string(),
11534 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
11535 )));
11536 Ok(Expression::Function(Box::new(Function::new(
11537 "EPOCH".to_string(),
11538 vec![strptime],
11539 ))))
11540 }
11541 DialectType::Presto | DialectType::Trino if is_hive_source => {
11542 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
11543 let cast_varchar =
11544 Expression::Cast(Box::new(crate::expressions::Cast {
11545 this: arg.clone(),
11546 to: DataType::VarChar {
11547 length: None,
11548 parenthesized_length: false,
11549 },
11550 trailing_comments: vec![],
11551 double_colon_syntax: false,
11552 format: None,
11553 default: None,
11554 inferred_type: None,
11555 }));
11556 let date_parse =
11557 Expression::Function(Box::new(Function::new(
11558 "DATE_PARSE".to_string(),
11559 vec![
11560 cast_varchar,
11561 Expression::string("%Y-%m-%d %T"),
11562 ],
11563 )));
11564 let try_expr = Expression::Function(Box::new(
11565 Function::new("TRY".to_string(), vec![date_parse]),
11566 ));
11567 let date_format =
11568 Expression::Function(Box::new(Function::new(
11569 "DATE_FORMAT".to_string(),
11570 vec![arg, Expression::string("%Y-%m-%d %T")],
11571 )));
11572 let parse_datetime =
11573 Expression::Function(Box::new(Function::new(
11574 "PARSE_DATETIME".to_string(),
11575 vec![
11576 date_format,
11577 Expression::string("yyyy-MM-dd HH:mm:ss"),
11578 ],
11579 )));
11580 let coalesce =
11581 Expression::Function(Box::new(Function::new(
11582 "COALESCE".to_string(),
11583 vec![try_expr, parse_datetime],
11584 )));
11585 Ok(Expression::Function(Box::new(Function::new(
11586 "TO_UNIXTIME".to_string(),
11587 vec![coalesce],
11588 ))))
11589 }
11590 DialectType::Presto | DialectType::Trino => {
11591 Ok(Expression::Function(Box::new(Function::new(
11592 "TO_UNIXTIME".to_string(),
11593 vec![arg],
11594 ))))
11595 }
11596 _ => Ok(Expression::Function(Box::new(Function::new(
11597 "UNIX_TIMESTAMP".to_string(),
11598 vec![arg],
11599 )))),
11600 }
11601 }
11602 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
11603 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
11604 DialectType::Spark
11605 | DialectType::Databricks
11606 | DialectType::Hive => Ok(Expression::Function(Box::new(
11607 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
11608 ))),
11609 _ => Ok(Expression::Function(Box::new(Function::new(
11610 "TO_UNIX_TIMESTAMP".to_string(),
11611 f.args,
11612 )))),
11613 },
11614 // CURDATE() -> CURRENT_DATE
11615 "CURDATE" => {
11616 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
11617 }
11618 // CURTIME() -> CURRENT_TIME
11619 "CURTIME" => {
11620 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
11621 precision: None,
11622 }))
11623 }
11624 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
11625 "ARRAY_SORT" if f.args.len() >= 1 => {
11626 match target {
11627 DialectType::Hive => {
11628 let mut args = f.args;
11629 args.truncate(1); // Drop lambda comparator
11630 Ok(Expression::Function(Box::new(Function::new(
11631 "SORT_ARRAY".to_string(),
11632 args,
11633 ))))
11634 }
11635 _ => Ok(Expression::Function(f)),
11636 }
11637 }
11638 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
11639 "SORT_ARRAY" if f.args.len() == 1 => match target {
11640 DialectType::Hive
11641 | DialectType::Spark
11642 | DialectType::Databricks => Ok(Expression::Function(f)),
11643 _ => Ok(Expression::Function(Box::new(Function::new(
11644 "ARRAY_SORT".to_string(),
11645 f.args,
11646 )))),
11647 },
11648 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
11649 "SORT_ARRAY" if f.args.len() == 2 => {
11650 let is_desc =
11651 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
11652 if is_desc {
11653 match target {
11654 DialectType::DuckDB => {
11655 Ok(Expression::Function(Box::new(Function::new(
11656 "ARRAY_REVERSE_SORT".to_string(),
11657 vec![f.args.into_iter().next().unwrap()],
11658 ))))
11659 }
11660 DialectType::Presto | DialectType::Trino => {
11661 let arr_arg = f.args.into_iter().next().unwrap();
11662 let a =
11663 Expression::Column(crate::expressions::Column {
11664 name: crate::expressions::Identifier::new("a"),
11665 table: None,
11666 join_mark: false,
11667 trailing_comments: Vec::new(),
11668 span: None,
11669 inferred_type: None,
11670 });
11671 let b =
11672 Expression::Column(crate::expressions::Column {
11673 name: crate::expressions::Identifier::new("b"),
11674 table: None,
11675 join_mark: false,
11676 trailing_comments: Vec::new(),
11677 span: None,
11678 inferred_type: None,
11679 });
11680 let case_expr = Expression::Case(Box::new(
11681 crate::expressions::Case {
11682 operand: None,
11683 whens: vec![
11684 (
11685 Expression::Lt(Box::new(
11686 BinaryOp::new(a.clone(), b.clone()),
11687 )),
11688 Expression::Literal(Literal::Number(
11689 "1".to_string(),
11690 )),
11691 ),
11692 (
11693 Expression::Gt(Box::new(
11694 BinaryOp::new(a.clone(), b.clone()),
11695 )),
11696 Expression::Literal(Literal::Number(
11697 "-1".to_string(),
11698 )),
11699 ),
11700 ],
11701 else_: Some(Expression::Literal(
11702 Literal::Number("0".to_string()),
11703 )),
11704 comments: Vec::new(),
11705 inferred_type: None,
11706 },
11707 ));
11708 let lambda = Expression::Lambda(Box::new(
11709 crate::expressions::LambdaExpr {
11710 parameters: vec![
11711 crate::expressions::Identifier::new("a"),
11712 crate::expressions::Identifier::new("b"),
11713 ],
11714 body: case_expr,
11715 colon: false,
11716 parameter_types: Vec::new(),
11717 },
11718 ));
11719 Ok(Expression::Function(Box::new(Function::new(
11720 "ARRAY_SORT".to_string(),
11721 vec![arr_arg, lambda],
11722 ))))
11723 }
11724 _ => Ok(Expression::Function(f)),
11725 }
11726 } else {
11727 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
11728 match target {
11729 DialectType::Hive => Ok(Expression::Function(f)),
11730 _ => Ok(Expression::Function(Box::new(Function::new(
11731 "ARRAY_SORT".to_string(),
11732 vec![f.args.into_iter().next().unwrap()],
11733 )))),
11734 }
11735 }
11736 }
11737 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
11738 "LEFT" if f.args.len() == 2 => {
11739 match target {
11740 DialectType::Hive
11741 | DialectType::Presto
11742 | DialectType::Trino
11743 | DialectType::Athena => {
11744 let x = f.args[0].clone();
11745 let n = f.args[1].clone();
11746 Ok(Expression::Function(Box::new(Function::new(
11747 "SUBSTRING".to_string(),
11748 vec![x, Expression::number(1), n],
11749 ))))
11750 }
11751 DialectType::Spark | DialectType::Databricks
11752 if matches!(
11753 source,
11754 DialectType::TSQL | DialectType::Fabric
11755 ) =>
11756 {
11757 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
11758 let x = f.args[0].clone();
11759 let n = f.args[1].clone();
11760 let cast_x = Expression::Cast(Box::new(Cast {
11761 this: x,
11762 to: DataType::VarChar {
11763 length: None,
11764 parenthesized_length: false,
11765 },
11766 double_colon_syntax: false,
11767 trailing_comments: Vec::new(),
11768 format: None,
11769 default: None,
11770 inferred_type: None,
11771 }));
11772 Ok(Expression::Function(Box::new(Function::new(
11773 "LEFT".to_string(),
11774 vec![cast_x, n],
11775 ))))
11776 }
11777 _ => Ok(Expression::Function(f)),
11778 }
11779 }
11780 "RIGHT" if f.args.len() == 2 => {
11781 match target {
11782 DialectType::Hive
11783 | DialectType::Presto
11784 | DialectType::Trino
11785 | DialectType::Athena => {
11786 let x = f.args[0].clone();
11787 let n = f.args[1].clone();
11788 // SUBSTRING(x, LENGTH(x) - (n - 1))
11789 let len_x = Expression::Function(Box::new(Function::new(
11790 "LENGTH".to_string(),
11791 vec![x.clone()],
11792 )));
11793 let n_minus_1 = Expression::Sub(Box::new(
11794 crate::expressions::BinaryOp::new(
11795 n,
11796 Expression::number(1),
11797 ),
11798 ));
11799 let n_minus_1_paren = Expression::Paren(Box::new(
11800 crate::expressions::Paren {
11801 this: n_minus_1,
11802 trailing_comments: Vec::new(),
11803 },
11804 ));
11805 let offset = Expression::Sub(Box::new(
11806 crate::expressions::BinaryOp::new(
11807 len_x,
11808 n_minus_1_paren,
11809 ),
11810 ));
11811 Ok(Expression::Function(Box::new(Function::new(
11812 "SUBSTRING".to_string(),
11813 vec![x, offset],
11814 ))))
11815 }
11816 DialectType::Spark | DialectType::Databricks
11817 if matches!(
11818 source,
11819 DialectType::TSQL | DialectType::Fabric
11820 ) =>
11821 {
11822 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
11823 let x = f.args[0].clone();
11824 let n = f.args[1].clone();
11825 let cast_x = Expression::Cast(Box::new(Cast {
11826 this: x,
11827 to: DataType::VarChar {
11828 length: None,
11829 parenthesized_length: false,
11830 },
11831 double_colon_syntax: false,
11832 trailing_comments: Vec::new(),
11833 format: None,
11834 default: None,
11835 inferred_type: None,
11836 }));
11837 Ok(Expression::Function(Box::new(Function::new(
11838 "RIGHT".to_string(),
11839 vec![cast_x, n],
11840 ))))
11841 }
11842 _ => Ok(Expression::Function(f)),
11843 }
11844 }
11845 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
11846 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
11847 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11848 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
11849 ))),
11850 DialectType::Spark | DialectType::Databricks => {
11851 Ok(Expression::Function(Box::new(Function::new(
11852 "MAP_FROM_ARRAYS".to_string(),
11853 f.args,
11854 ))))
11855 }
11856 _ => Ok(Expression::Function(Box::new(Function::new(
11857 "MAP".to_string(),
11858 f.args,
11859 )))),
11860 },
11861 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
11862 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
11863 "LIKE" if f.args.len() >= 2 => {
11864 let (this, pattern) = if matches!(source, DialectType::SQLite) {
11865 // SQLite: LIKE(pattern, string) -> string LIKE pattern
11866 (f.args[1].clone(), f.args[0].clone())
11867 } else {
11868 // Standard: LIKE(string, pattern) -> string LIKE pattern
11869 (f.args[0].clone(), f.args[1].clone())
11870 };
11871 let escape = if f.args.len() >= 3 {
11872 Some(f.args[2].clone())
11873 } else {
11874 None
11875 };
11876 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
11877 left: this,
11878 right: pattern,
11879 escape,
11880 quantifier: None,
11881 inferred_type: None,
11882 })))
11883 }
11884 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
11885 "ILIKE" if f.args.len() >= 2 => {
11886 let this = f.args[0].clone();
11887 let pattern = f.args[1].clone();
11888 let escape = if f.args.len() >= 3 {
11889 Some(f.args[2].clone())
11890 } else {
11891 None
11892 };
11893 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
11894 left: this,
11895 right: pattern,
11896 escape,
11897 quantifier: None,
11898 inferred_type: None,
11899 })))
11900 }
11901 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
11902 "CHAR" if f.args.len() == 1 => match target {
11903 DialectType::MySQL
11904 | DialectType::SingleStore
11905 | DialectType::TSQL => Ok(Expression::Function(f)),
11906 _ => Ok(Expression::Function(Box::new(Function::new(
11907 "CHR".to_string(),
11908 f.args,
11909 )))),
11910 },
11911 // CONCAT(a, b) -> a || b for PostgreSQL
11912 "CONCAT"
11913 if f.args.len() == 2
11914 && matches!(target, DialectType::PostgreSQL)
11915 && matches!(
11916 source,
11917 DialectType::ClickHouse | DialectType::MySQL
11918 ) =>
11919 {
11920 let mut args = f.args;
11921 let right = args.pop().unwrap();
11922 let left = args.pop().unwrap();
11923 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
11924 this: Box::new(left),
11925 expression: Box::new(right),
11926 safe: None,
11927 })))
11928 }
11929 // ARRAY_TO_STRING(arr, delim) -> target-specific
11930 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
11931 DialectType::Presto | DialectType::Trino => {
11932 Ok(Expression::Function(Box::new(Function::new(
11933 "ARRAY_JOIN".to_string(),
11934 f.args,
11935 ))))
11936 }
11937 DialectType::TSQL => Ok(Expression::Function(Box::new(
11938 Function::new("STRING_AGG".to_string(), f.args),
11939 ))),
11940 _ => Ok(Expression::Function(f)),
11941 },
11942 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
11943 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
11944 DialectType::Spark
11945 | DialectType::Databricks
11946 | DialectType::Hive => Ok(Expression::Function(Box::new(
11947 Function::new("CONCAT".to_string(), f.args),
11948 ))),
11949 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11950 Function::new("ARRAY_CAT".to_string(), f.args),
11951 ))),
11952 DialectType::Redshift => Ok(Expression::Function(Box::new(
11953 Function::new("ARRAY_CONCAT".to_string(), f.args),
11954 ))),
11955 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11956 Function::new("ARRAY_CAT".to_string(), f.args),
11957 ))),
11958 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11959 Function::new("LIST_CONCAT".to_string(), f.args),
11960 ))),
11961 DialectType::Presto | DialectType::Trino => {
11962 Ok(Expression::Function(Box::new(Function::new(
11963 "CONCAT".to_string(),
11964 f.args,
11965 ))))
11966 }
11967 DialectType::BigQuery => Ok(Expression::Function(Box::new(
11968 Function::new("ARRAY_CONCAT".to_string(), f.args),
11969 ))),
11970 _ => Ok(Expression::Function(f)),
11971 },
11972 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
11973 "HAS" if f.args.len() == 2 => match target {
11974 DialectType::Spark
11975 | DialectType::Databricks
11976 | DialectType::Hive => Ok(Expression::Function(Box::new(
11977 Function::new("ARRAY_CONTAINS".to_string(), f.args),
11978 ))),
11979 DialectType::Presto | DialectType::Trino => {
11980 Ok(Expression::Function(Box::new(Function::new(
11981 "CONTAINS".to_string(),
11982 f.args,
11983 ))))
11984 }
11985 _ => Ok(Expression::Function(f)),
11986 },
11987 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
11988 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
11989 Function::new("COALESCE".to_string(), f.args),
11990 ))),
11991 // ISNULL(x) in MySQL -> (x IS NULL)
11992 "ISNULL"
11993 if f.args.len() == 1
11994 && matches!(source, DialectType::MySQL)
11995 && matches!(target, DialectType::MySQL) =>
11996 {
11997 let arg = f.args.into_iter().next().unwrap();
11998 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
11999 this: Expression::IsNull(Box::new(
12000 crate::expressions::IsNull {
12001 this: arg,
12002 not: false,
12003 postfix_form: false,
12004 },
12005 )),
12006 trailing_comments: Vec::new(),
12007 })))
12008 }
12009 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
12010 "MONTHNAME"
12011 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
12012 {
12013 let arg = f.args.into_iter().next().unwrap();
12014 Ok(Expression::Function(Box::new(Function::new(
12015 "DATE_FORMAT".to_string(),
12016 vec![arg, Expression::string("%M")],
12017 ))))
12018 }
12019 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
12020 "SPLITBYSTRING" if f.args.len() == 2 => {
12021 let sep = f.args[0].clone();
12022 let str_arg = f.args[1].clone();
12023 match target {
12024 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12025 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
12026 ))),
12027 DialectType::Doris => {
12028 Ok(Expression::Function(Box::new(Function::new(
12029 "SPLIT_BY_STRING".to_string(),
12030 vec![str_arg, sep],
12031 ))))
12032 }
12033 DialectType::Hive
12034 | DialectType::Spark
12035 | DialectType::Databricks => {
12036 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
12037 let escaped =
12038 Expression::Function(Box::new(Function::new(
12039 "CONCAT".to_string(),
12040 vec![
12041 Expression::string("\\Q"),
12042 sep,
12043 Expression::string("\\E"),
12044 ],
12045 )));
12046 Ok(Expression::Function(Box::new(Function::new(
12047 "SPLIT".to_string(),
12048 vec![str_arg, escaped],
12049 ))))
12050 }
12051 _ => Ok(Expression::Function(f)),
12052 }
12053 }
12054 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
12055 "SPLITBYREGEXP" if f.args.len() == 2 => {
12056 let sep = f.args[0].clone();
12057 let str_arg = f.args[1].clone();
12058 match target {
12059 DialectType::DuckDB => {
12060 Ok(Expression::Function(Box::new(Function::new(
12061 "STR_SPLIT_REGEX".to_string(),
12062 vec![str_arg, sep],
12063 ))))
12064 }
12065 DialectType::Hive
12066 | DialectType::Spark
12067 | DialectType::Databricks => {
12068 Ok(Expression::Function(Box::new(Function::new(
12069 "SPLIT".to_string(),
12070 vec![str_arg, sep],
12071 ))))
12072 }
12073 _ => Ok(Expression::Function(f)),
12074 }
12075 }
12076 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
12077 "TOMONDAY" => {
12078 if f.args.len() == 1 {
12079 let arg = f.args.into_iter().next().unwrap();
12080 match target {
12081 DialectType::Doris => {
12082 Ok(Expression::Function(Box::new(Function::new(
12083 "DATE_TRUNC".to_string(),
12084 vec![arg, Expression::string("WEEK")],
12085 ))))
12086 }
12087 _ => Ok(Expression::Function(Box::new(Function::new(
12088 "DATE_TRUNC".to_string(),
12089 vec![Expression::string("WEEK"), arg],
12090 )))),
12091 }
12092 } else {
12093 Ok(Expression::Function(f))
12094 }
12095 }
12096 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
12097 "COLLECT_LIST" if f.args.len() == 1 => match target {
12098 DialectType::Spark
12099 | DialectType::Databricks
12100 | DialectType::Hive => Ok(Expression::Function(f)),
12101 _ => Ok(Expression::Function(Box::new(Function::new(
12102 "ARRAY_AGG".to_string(),
12103 f.args,
12104 )))),
12105 },
12106 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
12107 "TO_CHAR"
12108 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
12109 {
12110 let arg = f.args.into_iter().next().unwrap();
12111 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12112 this: arg,
12113 to: DataType::Custom {
12114 name: "STRING".to_string(),
12115 },
12116 double_colon_syntax: false,
12117 trailing_comments: Vec::new(),
12118 format: None,
12119 default: None,
12120 inferred_type: None,
12121 })))
12122 }
12123 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
12124 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
12125 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12126 Function::new("RANDOM".to_string(), vec![]),
12127 ))),
12128 _ => Ok(Expression::Function(f)),
12129 },
12130 // ClickHouse formatDateTime -> target-specific
12131 "FORMATDATETIME" if f.args.len() >= 2 => match target {
12132 DialectType::MySQL => Ok(Expression::Function(Box::new(
12133 Function::new("DATE_FORMAT".to_string(), f.args),
12134 ))),
12135 _ => Ok(Expression::Function(f)),
12136 },
12137 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
12138 "REPLICATE" if f.args.len() == 2 => match target {
12139 DialectType::TSQL => Ok(Expression::Function(f)),
12140 _ => Ok(Expression::Function(Box::new(Function::new(
12141 "REPEAT".to_string(),
12142 f.args,
12143 )))),
12144 },
12145 // LEN(x) -> LENGTH(x) for non-TSQL targets
12146 // No CAST needed when arg is already a string literal
12147 "LEN" if f.args.len() == 1 => {
12148 match target {
12149 DialectType::TSQL => Ok(Expression::Function(f)),
12150 DialectType::Spark | DialectType::Databricks => {
12151 let arg = f.args.into_iter().next().unwrap();
12152 // Don't wrap string literals with CAST - they're already strings
12153 let is_string = matches!(
12154 &arg,
12155 Expression::Literal(
12156 crate::expressions::Literal::String(_)
12157 )
12158 );
12159 let final_arg = if is_string {
12160 arg
12161 } else {
12162 Expression::Cast(Box::new(Cast {
12163 this: arg,
12164 to: DataType::VarChar {
12165 length: None,
12166 parenthesized_length: false,
12167 },
12168 double_colon_syntax: false,
12169 trailing_comments: Vec::new(),
12170 format: None,
12171 default: None,
12172 inferred_type: None,
12173 }))
12174 };
12175 Ok(Expression::Function(Box::new(Function::new(
12176 "LENGTH".to_string(),
12177 vec![final_arg],
12178 ))))
12179 }
12180 _ => {
12181 let arg = f.args.into_iter().next().unwrap();
12182 Ok(Expression::Function(Box::new(Function::new(
12183 "LENGTH".to_string(),
12184 vec![arg],
12185 ))))
12186 }
12187 }
12188 }
12189 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
12190 "COUNT_BIG" if f.args.len() == 1 => match target {
12191 DialectType::TSQL => Ok(Expression::Function(f)),
12192 _ => Ok(Expression::Function(Box::new(Function::new(
12193 "COUNT".to_string(),
12194 f.args,
12195 )))),
12196 },
12197 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
12198 "DATEFROMPARTS" if f.args.len() == 3 => match target {
12199 DialectType::TSQL => Ok(Expression::Function(f)),
12200 _ => Ok(Expression::Function(Box::new(Function::new(
12201 "MAKE_DATE".to_string(),
12202 f.args,
12203 )))),
12204 },
12205 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
12206 "REGEXP_LIKE" if f.args.len() >= 2 => {
12207 let str_expr = f.args[0].clone();
12208 let pattern = f.args[1].clone();
12209 let flags = if f.args.len() >= 3 {
12210 Some(f.args[2].clone())
12211 } else {
12212 None
12213 };
12214 match target {
12215 DialectType::DuckDB => {
12216 let mut new_args = vec![str_expr, pattern];
12217 if let Some(fl) = flags {
12218 new_args.push(fl);
12219 }
12220 Ok(Expression::Function(Box::new(Function::new(
12221 "REGEXP_MATCHES".to_string(),
12222 new_args,
12223 ))))
12224 }
12225 _ => Ok(Expression::RegexpLike(Box::new(
12226 crate::expressions::RegexpFunc {
12227 this: str_expr,
12228 pattern,
12229 flags,
12230 },
12231 ))),
12232 }
12233 }
12234 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
12235 "ARRAYJOIN" if f.args.len() == 1 => match target {
12236 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12237 Function::new("UNNEST".to_string(), f.args),
12238 ))),
12239 _ => Ok(Expression::Function(f)),
12240 },
12241 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
12242 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
12243 match target {
12244 DialectType::TSQL => Ok(Expression::Function(f)),
12245 DialectType::DuckDB => {
12246 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
12247 let mut args = f.args;
12248 let ms = args.pop().unwrap();
12249 let s = args.pop().unwrap();
12250 // s + (ms / 1000.0)
12251 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
12252 ms,
12253 Expression::Literal(
12254 crate::expressions::Literal::Number(
12255 "1000.0".to_string(),
12256 ),
12257 ),
12258 )));
12259 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
12260 s,
12261 Expression::Paren(Box::new(Paren {
12262 this: ms_frac,
12263 trailing_comments: vec![],
12264 })),
12265 )));
12266 args.push(s_with_ms);
12267 Ok(Expression::Function(Box::new(Function::new(
12268 "MAKE_TIMESTAMP".to_string(),
12269 args,
12270 ))))
12271 }
12272 DialectType::Snowflake => {
12273 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
12274 let mut args = f.args;
12275 let ms = args.pop().unwrap();
12276 // ms * 1000000
12277 let ns = Expression::Mul(Box::new(BinaryOp::new(
12278 ms,
12279 Expression::number(1000000),
12280 )));
12281 args.push(ns);
12282 Ok(Expression::Function(Box::new(Function::new(
12283 "TIMESTAMP_FROM_PARTS".to_string(),
12284 args,
12285 ))))
12286 }
12287 _ => {
12288 // Default: keep function name for other targets
12289 Ok(Expression::Function(Box::new(Function::new(
12290 "DATETIMEFROMPARTS".to_string(),
12291 f.args,
12292 ))))
12293 }
12294 }
12295 }
12296 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
12297 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
12298 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
12299 let is_try = name == "TRY_CONVERT";
12300 let type_expr = f.args[0].clone();
12301 let value_expr = f.args[1].clone();
12302 let style = if f.args.len() >= 3 {
12303 Some(&f.args[2])
12304 } else {
12305 None
12306 };
12307
12308 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
12309 if matches!(target, DialectType::TSQL) {
12310 let normalized_type = match &type_expr {
12311 Expression::DataType(dt) => {
12312 let new_dt = match dt {
12313 DataType::Int { .. } => DataType::Custom {
12314 name: "INTEGER".to_string(),
12315 },
12316 _ => dt.clone(),
12317 };
12318 Expression::DataType(new_dt)
12319 }
12320 Expression::Identifier(id) => {
12321 let upper = id.name.to_uppercase();
12322 let normalized = match upper.as_str() {
12323 "INT" => "INTEGER",
12324 _ => &upper,
12325 };
12326 Expression::Identifier(
12327 crate::expressions::Identifier::new(normalized),
12328 )
12329 }
12330 Expression::Column(col) => {
12331 let upper = col.name.name.to_uppercase();
12332 let normalized = match upper.as_str() {
12333 "INT" => "INTEGER",
12334 _ => &upper,
12335 };
12336 Expression::Identifier(
12337 crate::expressions::Identifier::new(normalized),
12338 )
12339 }
12340 _ => type_expr.clone(),
12341 };
12342 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
12343 let mut new_args = vec![normalized_type, value_expr];
12344 if let Some(s) = style {
12345 new_args.push(s.clone());
12346 }
12347 return Ok(Expression::Function(Box::new(Function::new(
12348 func_name.to_string(),
12349 new_args,
12350 ))));
12351 }
12352
12353 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
12354 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
12355 match e {
12356 Expression::DataType(dt) => {
12357 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
12358 match dt {
12359 DataType::Custom { name }
12360 if name.starts_with("NVARCHAR(")
12361 || name.starts_with("NCHAR(") =>
12362 {
12363 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
12364 let inner = &name[name.find('(').unwrap() + 1
12365 ..name.len() - 1];
12366 if inner.eq_ignore_ascii_case("MAX") {
12367 Some(DataType::Text)
12368 } else if let Ok(len) = inner.parse::<u32>() {
12369 if name.starts_with("NCHAR") {
12370 Some(DataType::Char {
12371 length: Some(len),
12372 })
12373 } else {
12374 Some(DataType::VarChar {
12375 length: Some(len),
12376 parenthesized_length: false,
12377 })
12378 }
12379 } else {
12380 Some(dt.clone())
12381 }
12382 }
12383 DataType::Custom { name } if name == "NVARCHAR" => {
12384 Some(DataType::VarChar {
12385 length: None,
12386 parenthesized_length: false,
12387 })
12388 }
12389 DataType::Custom { name } if name == "NCHAR" => {
12390 Some(DataType::Char { length: None })
12391 }
12392 DataType::Custom { name }
12393 if name == "NVARCHAR(MAX)"
12394 || name == "VARCHAR(MAX)" =>
12395 {
12396 Some(DataType::Text)
12397 }
12398 _ => Some(dt.clone()),
12399 }
12400 }
12401 Expression::Identifier(id) => {
12402 let name = id.name.to_uppercase();
12403 match name.as_str() {
12404 "INT" | "INTEGER" => Some(DataType::Int {
12405 length: None,
12406 integer_spelling: false,
12407 }),
12408 "BIGINT" => Some(DataType::BigInt { length: None }),
12409 "SMALLINT" => {
12410 Some(DataType::SmallInt { length: None })
12411 }
12412 "TINYINT" => {
12413 Some(DataType::TinyInt { length: None })
12414 }
12415 "FLOAT" => Some(DataType::Float {
12416 precision: None,
12417 scale: None,
12418 real_spelling: false,
12419 }),
12420 "REAL" => Some(DataType::Float {
12421 precision: None,
12422 scale: None,
12423 real_spelling: true,
12424 }),
12425 "DATETIME" | "DATETIME2" => {
12426 Some(DataType::Timestamp {
12427 timezone: false,
12428 precision: None,
12429 })
12430 }
12431 "DATE" => Some(DataType::Date),
12432 "BIT" => Some(DataType::Boolean),
12433 "TEXT" => Some(DataType::Text),
12434 "NUMERIC" => Some(DataType::Decimal {
12435 precision: None,
12436 scale: None,
12437 }),
12438 "MONEY" => Some(DataType::Decimal {
12439 precision: Some(15),
12440 scale: Some(4),
12441 }),
12442 "SMALLMONEY" => Some(DataType::Decimal {
12443 precision: Some(6),
12444 scale: Some(4),
12445 }),
12446 "VARCHAR" => Some(DataType::VarChar {
12447 length: None,
12448 parenthesized_length: false,
12449 }),
12450 "NVARCHAR" => Some(DataType::VarChar {
12451 length: None,
12452 parenthesized_length: false,
12453 }),
12454 "CHAR" => Some(DataType::Char { length: None }),
12455 "NCHAR" => Some(DataType::Char { length: None }),
12456 _ => Some(DataType::Custom { name }),
12457 }
12458 }
12459 Expression::Column(col) => {
12460 let name = col.name.name.to_uppercase();
12461 match name.as_str() {
12462 "INT" | "INTEGER" => Some(DataType::Int {
12463 length: None,
12464 integer_spelling: false,
12465 }),
12466 "BIGINT" => Some(DataType::BigInt { length: None }),
12467 "FLOAT" => Some(DataType::Float {
12468 precision: None,
12469 scale: None,
12470 real_spelling: false,
12471 }),
12472 "DATETIME" | "DATETIME2" => {
12473 Some(DataType::Timestamp {
12474 timezone: false,
12475 precision: None,
12476 })
12477 }
12478 "DATE" => Some(DataType::Date),
12479 "NUMERIC" => Some(DataType::Decimal {
12480 precision: None,
12481 scale: None,
12482 }),
12483 "VARCHAR" => Some(DataType::VarChar {
12484 length: None,
12485 parenthesized_length: false,
12486 }),
12487 "NVARCHAR" => Some(DataType::VarChar {
12488 length: None,
12489 parenthesized_length: false,
12490 }),
12491 "CHAR" => Some(DataType::Char { length: None }),
12492 "NCHAR" => Some(DataType::Char { length: None }),
12493 _ => Some(DataType::Custom { name }),
12494 }
12495 }
12496 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
12497 Expression::Function(f) => {
12498 let fname = f.name.to_uppercase();
12499 match fname.as_str() {
12500 "VARCHAR" | "NVARCHAR" => {
12501 let len = f.args.first().and_then(|a| {
12502 if let Expression::Literal(
12503 crate::expressions::Literal::Number(n),
12504 ) = a
12505 {
12506 n.parse::<u32>().ok()
12507 } else if let Expression::Identifier(id) = a
12508 {
12509 if id.name.eq_ignore_ascii_case("MAX") {
12510 None
12511 } else {
12512 None
12513 }
12514 } else {
12515 None
12516 }
12517 });
12518 // Check for VARCHAR(MAX) -> TEXT
12519 let is_max = f.args.first().map_or(false, |a| {
12520 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
12521 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
12522 });
12523 if is_max {
12524 Some(DataType::Text)
12525 } else {
12526 Some(DataType::VarChar {
12527 length: len,
12528 parenthesized_length: false,
12529 })
12530 }
12531 }
12532 "NCHAR" | "CHAR" => {
12533 let len = f.args.first().and_then(|a| {
12534 if let Expression::Literal(
12535 crate::expressions::Literal::Number(n),
12536 ) = a
12537 {
12538 n.parse::<u32>().ok()
12539 } else {
12540 None
12541 }
12542 });
12543 Some(DataType::Char { length: len })
12544 }
12545 "NUMERIC" | "DECIMAL" => {
12546 let precision = f.args.first().and_then(|a| {
12547 if let Expression::Literal(
12548 crate::expressions::Literal::Number(n),
12549 ) = a
12550 {
12551 n.parse::<u32>().ok()
12552 } else {
12553 None
12554 }
12555 });
12556 let scale = f.args.get(1).and_then(|a| {
12557 if let Expression::Literal(
12558 crate::expressions::Literal::Number(n),
12559 ) = a
12560 {
12561 n.parse::<u32>().ok()
12562 } else {
12563 None
12564 }
12565 });
12566 Some(DataType::Decimal { precision, scale })
12567 }
12568 _ => None,
12569 }
12570 }
12571 _ => None,
12572 }
12573 }
12574
12575 if let Some(mut dt) = expr_to_datatype(&type_expr) {
12576 // For TSQL source: VARCHAR/CHAR without length defaults to 30
12577 let is_tsql_source =
12578 matches!(source, DialectType::TSQL | DialectType::Fabric);
12579 if is_tsql_source {
12580 match &dt {
12581 DataType::VarChar { length: None, .. } => {
12582 dt = DataType::VarChar {
12583 length: Some(30),
12584 parenthesized_length: false,
12585 };
12586 }
12587 DataType::Char { length: None } => {
12588 dt = DataType::Char { length: Some(30) };
12589 }
12590 _ => {}
12591 }
12592 }
12593
12594 // Determine if this is a string type
12595 let is_string_type = matches!(
12596 dt,
12597 DataType::VarChar { .. }
12598 | DataType::Char { .. }
12599 | DataType::Text
12600 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
12601 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
12602 || name.starts_with("VARCHAR(") || name == "VARCHAR"
12603 || name == "STRING");
12604
12605 // Determine if this is a date/time type
12606 let is_datetime_type = matches!(
12607 dt,
12608 DataType::Timestamp { .. } | DataType::Date
12609 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
12610 || name == "DATETIME2" || name == "SMALLDATETIME");
12611
12612 // Check for date conversion with style
12613 if style.is_some() {
12614 let style_num = style.and_then(|s| {
12615 if let Expression::Literal(
12616 crate::expressions::Literal::Number(n),
12617 ) = s
12618 {
12619 n.parse::<u32>().ok()
12620 } else {
12621 None
12622 }
12623 });
12624
12625 // TSQL CONVERT date styles (Java format)
12626 let format_str = style_num.and_then(|n| match n {
12627 101 => Some("MM/dd/yyyy"),
12628 102 => Some("yyyy.MM.dd"),
12629 103 => Some("dd/MM/yyyy"),
12630 104 => Some("dd.MM.yyyy"),
12631 105 => Some("dd-MM-yyyy"),
12632 108 => Some("HH:mm:ss"),
12633 110 => Some("MM-dd-yyyy"),
12634 112 => Some("yyyyMMdd"),
12635 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
12636 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
12637 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
12638 _ => None,
12639 });
12640
12641 // Non-string, non-datetime types with style: just CAST, ignore the style
12642 if !is_string_type && !is_datetime_type {
12643 let cast_expr = if is_try {
12644 Expression::TryCast(Box::new(
12645 crate::expressions::Cast {
12646 this: value_expr,
12647 to: dt,
12648 trailing_comments: Vec::new(),
12649 double_colon_syntax: false,
12650 format: None,
12651 default: None,
12652 inferred_type: None,
12653 },
12654 ))
12655 } else {
12656 Expression::Cast(Box::new(
12657 crate::expressions::Cast {
12658 this: value_expr,
12659 to: dt,
12660 trailing_comments: Vec::new(),
12661 double_colon_syntax: false,
12662 format: None,
12663 default: None,
12664 inferred_type: None,
12665 },
12666 ))
12667 };
12668 return Ok(cast_expr);
12669 }
12670
12671 if let Some(java_fmt) = format_str {
12672 let c_fmt = java_fmt
12673 .replace("yyyy", "%Y")
12674 .replace("MM", "%m")
12675 .replace("dd", "%d")
12676 .replace("HH", "%H")
12677 .replace("mm", "%M")
12678 .replace("ss", "%S")
12679 .replace("SSSSSS", "%f")
12680 .replace("SSS", "%f")
12681 .replace("'T'", "T");
12682
12683 // For datetime target types: style is the INPUT format for parsing strings -> dates
12684 if is_datetime_type {
12685 match target {
12686 DialectType::DuckDB => {
12687 return Ok(Expression::Function(Box::new(
12688 Function::new(
12689 "STRPTIME".to_string(),
12690 vec![
12691 value_expr,
12692 Expression::string(&c_fmt),
12693 ],
12694 ),
12695 )));
12696 }
12697 DialectType::Spark
12698 | DialectType::Databricks => {
12699 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
12700 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
12701 let func_name =
12702 if matches!(dt, DataType::Date) {
12703 "TO_DATE"
12704 } else {
12705 "TO_TIMESTAMP"
12706 };
12707 return Ok(Expression::Function(Box::new(
12708 Function::new(
12709 func_name.to_string(),
12710 vec![
12711 value_expr,
12712 Expression::string(java_fmt),
12713 ],
12714 ),
12715 )));
12716 }
12717 DialectType::Hive => {
12718 return Ok(Expression::Function(Box::new(
12719 Function::new(
12720 "TO_TIMESTAMP".to_string(),
12721 vec![
12722 value_expr,
12723 Expression::string(java_fmt),
12724 ],
12725 ),
12726 )));
12727 }
12728 _ => {
12729 return Ok(Expression::Cast(Box::new(
12730 crate::expressions::Cast {
12731 this: value_expr,
12732 to: dt,
12733 trailing_comments: Vec::new(),
12734 double_colon_syntax: false,
12735 format: None,
12736 default: None,
12737 inferred_type: None,
12738 },
12739 )));
12740 }
12741 }
12742 }
12743
12744 // For string target types: style is the OUTPUT format for dates -> strings
12745 match target {
12746 DialectType::DuckDB => Ok(Expression::Function(
12747 Box::new(Function::new(
12748 "STRPTIME".to_string(),
12749 vec![
12750 value_expr,
12751 Expression::string(&c_fmt),
12752 ],
12753 )),
12754 )),
12755 DialectType::Spark | DialectType::Databricks => {
12756 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
12757 // Determine the target string type
12758 let string_dt = match &dt {
12759 DataType::VarChar {
12760 length: Some(l),
12761 ..
12762 } => DataType::VarChar {
12763 length: Some(*l),
12764 parenthesized_length: false,
12765 },
12766 DataType::Text => DataType::Custom {
12767 name: "STRING".to_string(),
12768 },
12769 _ => DataType::Custom {
12770 name: "STRING".to_string(),
12771 },
12772 };
12773 let date_format_expr = Expression::Function(
12774 Box::new(Function::new(
12775 "DATE_FORMAT".to_string(),
12776 vec![
12777 value_expr,
12778 Expression::string(java_fmt),
12779 ],
12780 )),
12781 );
12782 let cast_expr = if is_try {
12783 Expression::TryCast(Box::new(
12784 crate::expressions::Cast {
12785 this: date_format_expr,
12786 to: string_dt,
12787 trailing_comments: Vec::new(),
12788 double_colon_syntax: false,
12789 format: None,
12790 default: None,
12791 inferred_type: None,
12792 },
12793 ))
12794 } else {
12795 Expression::Cast(Box::new(
12796 crate::expressions::Cast {
12797 this: date_format_expr,
12798 to: string_dt,
12799 trailing_comments: Vec::new(),
12800 double_colon_syntax: false,
12801 format: None,
12802 default: None,
12803 inferred_type: None,
12804 },
12805 ))
12806 };
12807 Ok(cast_expr)
12808 }
12809 DialectType::MySQL | DialectType::SingleStore => {
12810 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
12811 let mysql_fmt = java_fmt
12812 .replace("yyyy", "%Y")
12813 .replace("MM", "%m")
12814 .replace("dd", "%d")
12815 .replace("HH:mm:ss.SSSSSS", "%T")
12816 .replace("HH:mm:ss", "%T")
12817 .replace("HH", "%H")
12818 .replace("mm", "%i")
12819 .replace("ss", "%S");
12820 let date_format_expr = Expression::Function(
12821 Box::new(Function::new(
12822 "DATE_FORMAT".to_string(),
12823 vec![
12824 value_expr,
12825 Expression::string(&mysql_fmt),
12826 ],
12827 )),
12828 );
12829 // MySQL uses CHAR for string casts
12830 let mysql_dt = match &dt {
12831 DataType::VarChar { length, .. } => {
12832 DataType::Char { length: *length }
12833 }
12834 _ => dt,
12835 };
12836 Ok(Expression::Cast(Box::new(
12837 crate::expressions::Cast {
12838 this: date_format_expr,
12839 to: mysql_dt,
12840 trailing_comments: Vec::new(),
12841 double_colon_syntax: false,
12842 format: None,
12843 default: None,
12844 inferred_type: None,
12845 },
12846 )))
12847 }
12848 DialectType::Hive => {
12849 let func_name = "TO_TIMESTAMP";
12850 Ok(Expression::Function(Box::new(
12851 Function::new(
12852 func_name.to_string(),
12853 vec![
12854 value_expr,
12855 Expression::string(java_fmt),
12856 ],
12857 ),
12858 )))
12859 }
12860 _ => Ok(Expression::Cast(Box::new(
12861 crate::expressions::Cast {
12862 this: value_expr,
12863 to: dt,
12864 trailing_comments: Vec::new(),
12865 double_colon_syntax: false,
12866 format: None,
12867 default: None,
12868 inferred_type: None,
12869 },
12870 ))),
12871 }
12872 } else {
12873 // Unknown style, just CAST
12874 let cast_expr = if is_try {
12875 Expression::TryCast(Box::new(
12876 crate::expressions::Cast {
12877 this: value_expr,
12878 to: dt,
12879 trailing_comments: Vec::new(),
12880 double_colon_syntax: false,
12881 format: None,
12882 default: None,
12883 inferred_type: None,
12884 },
12885 ))
12886 } else {
12887 Expression::Cast(Box::new(
12888 crate::expressions::Cast {
12889 this: value_expr,
12890 to: dt,
12891 trailing_comments: Vec::new(),
12892 double_colon_syntax: false,
12893 format: None,
12894 default: None,
12895 inferred_type: None,
12896 },
12897 ))
12898 };
12899 Ok(cast_expr)
12900 }
12901 } else {
12902 // No style - simple CAST
12903 let final_dt = if matches!(
12904 target,
12905 DialectType::MySQL | DialectType::SingleStore
12906 ) {
12907 match &dt {
12908 DataType::Int { .. }
12909 | DataType::BigInt { .. }
12910 | DataType::SmallInt { .. }
12911 | DataType::TinyInt { .. } => DataType::Custom {
12912 name: "SIGNED".to_string(),
12913 },
12914 DataType::VarChar { length, .. } => {
12915 DataType::Char { length: *length }
12916 }
12917 _ => dt,
12918 }
12919 } else {
12920 dt
12921 };
12922 let cast_expr = if is_try {
12923 Expression::TryCast(Box::new(
12924 crate::expressions::Cast {
12925 this: value_expr,
12926 to: final_dt,
12927 trailing_comments: Vec::new(),
12928 double_colon_syntax: false,
12929 format: None,
12930 default: None,
12931 inferred_type: None,
12932 },
12933 ))
12934 } else {
12935 Expression::Cast(Box::new(crate::expressions::Cast {
12936 this: value_expr,
12937 to: final_dt,
12938 trailing_comments: Vec::new(),
12939 double_colon_syntax: false,
12940 format: None,
12941 default: None,
12942 inferred_type: None,
12943 }))
12944 };
12945 Ok(cast_expr)
12946 }
12947 } else {
12948 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
12949 Ok(Expression::Function(f))
12950 }
12951 }
12952 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
12953 "STRFTIME" if f.args.len() == 2 => {
12954 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
12955 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
12956 // SQLite: args[0] = format, args[1] = value
12957 (f.args[1].clone(), &f.args[0])
12958 } else {
12959 // DuckDB and others: args[0] = value, args[1] = format
12960 (f.args[0].clone(), &f.args[1])
12961 };
12962
12963 // Helper to convert C-style format to Java-style
12964 fn c_to_java_format(fmt: &str) -> String {
12965 fmt.replace("%Y", "yyyy")
12966 .replace("%m", "MM")
12967 .replace("%d", "dd")
12968 .replace("%H", "HH")
12969 .replace("%M", "mm")
12970 .replace("%S", "ss")
12971 .replace("%f", "SSSSSS")
12972 .replace("%y", "yy")
12973 .replace("%-m", "M")
12974 .replace("%-d", "d")
12975 .replace("%-H", "H")
12976 .replace("%-I", "h")
12977 .replace("%I", "hh")
12978 .replace("%p", "a")
12979 .replace("%j", "DDD")
12980 .replace("%a", "EEE")
12981 .replace("%b", "MMM")
12982 .replace("%F", "yyyy-MM-dd")
12983 .replace("%T", "HH:mm:ss")
12984 }
12985
12986 // Helper: recursively convert format strings within expressions (handles CONCAT)
12987 fn convert_fmt_expr(
12988 expr: &Expression,
12989 converter: &dyn Fn(&str) -> String,
12990 ) -> Expression {
12991 match expr {
12992 Expression::Literal(
12993 crate::expressions::Literal::String(s),
12994 ) => Expression::string(&converter(s)),
12995 Expression::Function(func)
12996 if func.name.eq_ignore_ascii_case("CONCAT") =>
12997 {
12998 let new_args: Vec<Expression> = func
12999 .args
13000 .iter()
13001 .map(|a| convert_fmt_expr(a, converter))
13002 .collect();
13003 Expression::Function(Box::new(Function::new(
13004 "CONCAT".to_string(),
13005 new_args,
13006 )))
13007 }
13008 other => other.clone(),
13009 }
13010 }
13011
13012 match target {
13013 DialectType::DuckDB => {
13014 if matches!(source, DialectType::SQLite) {
13015 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
13016 let cast_val = Expression::Cast(Box::new(Cast {
13017 this: val,
13018 to: crate::expressions::DataType::Timestamp {
13019 precision: None,
13020 timezone: false,
13021 },
13022 trailing_comments: Vec::new(),
13023 double_colon_syntax: false,
13024 format: None,
13025 default: None,
13026 inferred_type: None,
13027 }));
13028 Ok(Expression::Function(Box::new(Function::new(
13029 "STRFTIME".to_string(),
13030 vec![cast_val, fmt_expr.clone()],
13031 ))))
13032 } else {
13033 Ok(Expression::Function(f))
13034 }
13035 }
13036 DialectType::Spark
13037 | DialectType::Databricks
13038 | DialectType::Hive => {
13039 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
13040 let converted_fmt =
13041 convert_fmt_expr(fmt_expr, &c_to_java_format);
13042 Ok(Expression::Function(Box::new(Function::new(
13043 "DATE_FORMAT".to_string(),
13044 vec![val, converted_fmt],
13045 ))))
13046 }
13047 DialectType::TSQL | DialectType::Fabric => {
13048 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
13049 let converted_fmt =
13050 convert_fmt_expr(fmt_expr, &c_to_java_format);
13051 Ok(Expression::Function(Box::new(Function::new(
13052 "FORMAT".to_string(),
13053 vec![val, converted_fmt],
13054 ))))
13055 }
13056 DialectType::Presto
13057 | DialectType::Trino
13058 | DialectType::Athena => {
13059 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
13060 if let Expression::Literal(
13061 crate::expressions::Literal::String(s),
13062 ) = fmt_expr
13063 {
13064 let presto_fmt = duckdb_to_presto_format(s);
13065 Ok(Expression::Function(Box::new(Function::new(
13066 "DATE_FORMAT".to_string(),
13067 vec![val, Expression::string(&presto_fmt)],
13068 ))))
13069 } else {
13070 Ok(Expression::Function(Box::new(Function::new(
13071 "DATE_FORMAT".to_string(),
13072 vec![val, fmt_expr.clone()],
13073 ))))
13074 }
13075 }
13076 DialectType::BigQuery => {
13077 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
13078 if let Expression::Literal(
13079 crate::expressions::Literal::String(s),
13080 ) = fmt_expr
13081 {
13082 let bq_fmt = duckdb_to_bigquery_format(s);
13083 Ok(Expression::Function(Box::new(Function::new(
13084 "FORMAT_DATE".to_string(),
13085 vec![Expression::string(&bq_fmt), val],
13086 ))))
13087 } else {
13088 Ok(Expression::Function(Box::new(Function::new(
13089 "FORMAT_DATE".to_string(),
13090 vec![fmt_expr.clone(), val],
13091 ))))
13092 }
13093 }
13094 DialectType::PostgreSQL | DialectType::Redshift => {
13095 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
13096 if let Expression::Literal(
13097 crate::expressions::Literal::String(s),
13098 ) = fmt_expr
13099 {
13100 let pg_fmt = s
13101 .replace("%Y", "YYYY")
13102 .replace("%m", "MM")
13103 .replace("%d", "DD")
13104 .replace("%H", "HH24")
13105 .replace("%M", "MI")
13106 .replace("%S", "SS")
13107 .replace("%y", "YY")
13108 .replace("%-m", "FMMM")
13109 .replace("%-d", "FMDD")
13110 .replace("%-H", "FMHH24")
13111 .replace("%-I", "FMHH12")
13112 .replace("%p", "AM")
13113 .replace("%F", "YYYY-MM-DD")
13114 .replace("%T", "HH24:MI:SS");
13115 Ok(Expression::Function(Box::new(Function::new(
13116 "TO_CHAR".to_string(),
13117 vec![val, Expression::string(&pg_fmt)],
13118 ))))
13119 } else {
13120 Ok(Expression::Function(Box::new(Function::new(
13121 "TO_CHAR".to_string(),
13122 vec![val, fmt_expr.clone()],
13123 ))))
13124 }
13125 }
13126 _ => Ok(Expression::Function(f)),
13127 }
13128 }
13129 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
13130 "STRPTIME" if f.args.len() == 2 => {
13131 let val = f.args[0].clone();
13132 let fmt_expr = &f.args[1];
13133
13134 fn c_to_java_format_parse(fmt: &str) -> String {
13135 fmt.replace("%Y", "yyyy")
13136 .replace("%m", "MM")
13137 .replace("%d", "dd")
13138 .replace("%H", "HH")
13139 .replace("%M", "mm")
13140 .replace("%S", "ss")
13141 .replace("%f", "SSSSSS")
13142 .replace("%y", "yy")
13143 .replace("%-m", "M")
13144 .replace("%-d", "d")
13145 .replace("%-H", "H")
13146 .replace("%-I", "h")
13147 .replace("%I", "hh")
13148 .replace("%p", "a")
13149 .replace("%F", "yyyy-MM-dd")
13150 .replace("%T", "HH:mm:ss")
13151 }
13152
13153 match target {
13154 DialectType::DuckDB => Ok(Expression::Function(f)),
13155 DialectType::Spark | DialectType::Databricks => {
13156 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
13157 if let Expression::Literal(
13158 crate::expressions::Literal::String(s),
13159 ) = fmt_expr
13160 {
13161 let java_fmt = c_to_java_format_parse(s);
13162 Ok(Expression::Function(Box::new(Function::new(
13163 "TO_TIMESTAMP".to_string(),
13164 vec![val, Expression::string(&java_fmt)],
13165 ))))
13166 } else {
13167 Ok(Expression::Function(Box::new(Function::new(
13168 "TO_TIMESTAMP".to_string(),
13169 vec![val, fmt_expr.clone()],
13170 ))))
13171 }
13172 }
13173 DialectType::Hive => {
13174 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
13175 if let Expression::Literal(
13176 crate::expressions::Literal::String(s),
13177 ) = fmt_expr
13178 {
13179 let java_fmt = c_to_java_format_parse(s);
13180 let unix_ts =
13181 Expression::Function(Box::new(Function::new(
13182 "UNIX_TIMESTAMP".to_string(),
13183 vec![val, Expression::string(&java_fmt)],
13184 )));
13185 let from_unix =
13186 Expression::Function(Box::new(Function::new(
13187 "FROM_UNIXTIME".to_string(),
13188 vec![unix_ts],
13189 )));
13190 Ok(Expression::Cast(Box::new(
13191 crate::expressions::Cast {
13192 this: from_unix,
13193 to: DataType::Timestamp {
13194 timezone: false,
13195 precision: None,
13196 },
13197 trailing_comments: Vec::new(),
13198 double_colon_syntax: false,
13199 format: None,
13200 default: None,
13201 inferred_type: None,
13202 },
13203 )))
13204 } else {
13205 Ok(Expression::Function(f))
13206 }
13207 }
13208 DialectType::Presto
13209 | DialectType::Trino
13210 | DialectType::Athena => {
13211 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
13212 if let Expression::Literal(
13213 crate::expressions::Literal::String(s),
13214 ) = fmt_expr
13215 {
13216 let presto_fmt = duckdb_to_presto_format(s);
13217 Ok(Expression::Function(Box::new(Function::new(
13218 "DATE_PARSE".to_string(),
13219 vec![val, Expression::string(&presto_fmt)],
13220 ))))
13221 } else {
13222 Ok(Expression::Function(Box::new(Function::new(
13223 "DATE_PARSE".to_string(),
13224 vec![val, fmt_expr.clone()],
13225 ))))
13226 }
13227 }
13228 DialectType::BigQuery => {
13229 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
13230 if let Expression::Literal(
13231 crate::expressions::Literal::String(s),
13232 ) = fmt_expr
13233 {
13234 let bq_fmt = duckdb_to_bigquery_format(s);
13235 Ok(Expression::Function(Box::new(Function::new(
13236 "PARSE_TIMESTAMP".to_string(),
13237 vec![Expression::string(&bq_fmt), val],
13238 ))))
13239 } else {
13240 Ok(Expression::Function(Box::new(Function::new(
13241 "PARSE_TIMESTAMP".to_string(),
13242 vec![fmt_expr.clone(), val],
13243 ))))
13244 }
13245 }
13246 _ => Ok(Expression::Function(f)),
13247 }
13248 }
13249 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
13250 "DATE_FORMAT"
13251 if f.args.len() >= 2
13252 && matches!(
13253 source,
13254 DialectType::Presto
13255 | DialectType::Trino
13256 | DialectType::Athena
13257 ) =>
13258 {
13259 let val = f.args[0].clone();
13260 let fmt_expr = &f.args[1];
13261
13262 match target {
13263 DialectType::Presto
13264 | DialectType::Trino
13265 | DialectType::Athena => {
13266 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
13267 if let Expression::Literal(
13268 crate::expressions::Literal::String(s),
13269 ) = fmt_expr
13270 {
13271 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13272 Ok(Expression::Function(Box::new(Function::new(
13273 "DATE_FORMAT".to_string(),
13274 vec![val, Expression::string(&normalized)],
13275 ))))
13276 } else {
13277 Ok(Expression::Function(f))
13278 }
13279 }
13280 DialectType::Hive
13281 | DialectType::Spark
13282 | DialectType::Databricks => {
13283 // Convert Presto C-style to Java-style format
13284 if let Expression::Literal(
13285 crate::expressions::Literal::String(s),
13286 ) = fmt_expr
13287 {
13288 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13289 Ok(Expression::Function(Box::new(Function::new(
13290 "DATE_FORMAT".to_string(),
13291 vec![val, Expression::string(&java_fmt)],
13292 ))))
13293 } else {
13294 Ok(Expression::Function(f))
13295 }
13296 }
13297 DialectType::DuckDB => {
13298 // Convert to STRFTIME(val, duckdb_fmt)
13299 if let Expression::Literal(
13300 crate::expressions::Literal::String(s),
13301 ) = fmt_expr
13302 {
13303 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
13304 Ok(Expression::Function(Box::new(Function::new(
13305 "STRFTIME".to_string(),
13306 vec![val, Expression::string(&duckdb_fmt)],
13307 ))))
13308 } else {
13309 Ok(Expression::Function(Box::new(Function::new(
13310 "STRFTIME".to_string(),
13311 vec![val, fmt_expr.clone()],
13312 ))))
13313 }
13314 }
13315 DialectType::BigQuery => {
13316 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
13317 if let Expression::Literal(
13318 crate::expressions::Literal::String(s),
13319 ) = fmt_expr
13320 {
13321 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
13322 Ok(Expression::Function(Box::new(Function::new(
13323 "FORMAT_DATE".to_string(),
13324 vec![Expression::string(&bq_fmt), val],
13325 ))))
13326 } else {
13327 Ok(Expression::Function(Box::new(Function::new(
13328 "FORMAT_DATE".to_string(),
13329 vec![fmt_expr.clone(), val],
13330 ))))
13331 }
13332 }
13333 _ => Ok(Expression::Function(f)),
13334 }
13335 }
13336 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
13337 "DATE_PARSE"
13338 if f.args.len() >= 2
13339 && matches!(
13340 source,
13341 DialectType::Presto
13342 | DialectType::Trino
13343 | DialectType::Athena
13344 ) =>
13345 {
13346 let val = f.args[0].clone();
13347 let fmt_expr = &f.args[1];
13348
13349 match target {
13350 DialectType::Presto
13351 | DialectType::Trino
13352 | DialectType::Athena => {
13353 // Presto -> Presto: normalize format
13354 if let Expression::Literal(
13355 crate::expressions::Literal::String(s),
13356 ) = fmt_expr
13357 {
13358 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13359 Ok(Expression::Function(Box::new(Function::new(
13360 "DATE_PARSE".to_string(),
13361 vec![val, Expression::string(&normalized)],
13362 ))))
13363 } else {
13364 Ok(Expression::Function(f))
13365 }
13366 }
13367 DialectType::Hive => {
13368 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
13369 if let Expression::Literal(
13370 crate::expressions::Literal::String(s),
13371 ) = fmt_expr
13372 {
13373 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
13374 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
13375 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13376 this: val,
13377 to: DataType::Timestamp { timezone: false, precision: None },
13378 trailing_comments: Vec::new(),
13379 double_colon_syntax: false,
13380 format: None,
13381 default: None,
13382 inferred_type: None,
13383 })))
13384 } else {
13385 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13386 Ok(Expression::Function(Box::new(Function::new(
13387 "TO_TIMESTAMP".to_string(),
13388 vec![val, Expression::string(&java_fmt)],
13389 ))))
13390 }
13391 } else {
13392 Ok(Expression::Function(f))
13393 }
13394 }
13395 DialectType::Spark | DialectType::Databricks => {
13396 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
13397 if let Expression::Literal(
13398 crate::expressions::Literal::String(s),
13399 ) = fmt_expr
13400 {
13401 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13402 Ok(Expression::Function(Box::new(Function::new(
13403 "TO_TIMESTAMP".to_string(),
13404 vec![val, Expression::string(&java_fmt)],
13405 ))))
13406 } else {
13407 Ok(Expression::Function(f))
13408 }
13409 }
13410 DialectType::DuckDB => {
13411 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
13412 if let Expression::Literal(
13413 crate::expressions::Literal::String(s),
13414 ) = fmt_expr
13415 {
13416 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
13417 Ok(Expression::Function(Box::new(Function::new(
13418 "STRPTIME".to_string(),
13419 vec![val, Expression::string(&duckdb_fmt)],
13420 ))))
13421 } else {
13422 Ok(Expression::Function(Box::new(Function::new(
13423 "STRPTIME".to_string(),
13424 vec![val, fmt_expr.clone()],
13425 ))))
13426 }
13427 }
13428 _ => Ok(Expression::Function(f)),
13429 }
13430 }
13431 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
13432 "FROM_BASE64"
13433 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
13434 {
13435 Ok(Expression::Function(Box::new(Function::new(
13436 "UNBASE64".to_string(),
13437 f.args,
13438 ))))
13439 }
13440 "TO_BASE64"
13441 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
13442 {
13443 Ok(Expression::Function(Box::new(Function::new(
13444 "BASE64".to_string(),
13445 f.args,
13446 ))))
13447 }
13448 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
13449 "FROM_UNIXTIME"
13450 if f.args.len() == 1
13451 && matches!(
13452 source,
13453 DialectType::Presto
13454 | DialectType::Trino
13455 | DialectType::Athena
13456 )
13457 && matches!(
13458 target,
13459 DialectType::Spark | DialectType::Databricks
13460 ) =>
13461 {
13462 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
13463 let from_unix = Expression::Function(Box::new(Function::new(
13464 "FROM_UNIXTIME".to_string(),
13465 f.args,
13466 )));
13467 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13468 this: from_unix,
13469 to: DataType::Timestamp {
13470 timezone: false,
13471 precision: None,
13472 },
13473 trailing_comments: Vec::new(),
13474 double_colon_syntax: false,
13475 format: None,
13476 default: None,
13477 inferred_type: None,
13478 })))
13479 }
13480 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
13481 "DATE_FORMAT"
13482 if f.args.len() >= 2
13483 && !matches!(
13484 target,
13485 DialectType::Hive
13486 | DialectType::Spark
13487 | DialectType::Databricks
13488 | DialectType::MySQL
13489 | DialectType::SingleStore
13490 ) =>
13491 {
13492 let val = f.args[0].clone();
13493 let fmt_expr = &f.args[1];
13494 let is_hive_source = matches!(
13495 source,
13496 DialectType::Hive
13497 | DialectType::Spark
13498 | DialectType::Databricks
13499 );
13500
13501 fn java_to_c_format(fmt: &str) -> String {
13502 // Replace Java patterns with C strftime patterns.
13503 // Uses multi-pass to handle patterns that conflict.
13504 // First pass: replace multi-char patterns (longer first)
13505 let result = fmt
13506 .replace("yyyy", "%Y")
13507 .replace("SSSSSS", "%f")
13508 .replace("EEEE", "%W")
13509 .replace("MM", "%m")
13510 .replace("dd", "%d")
13511 .replace("HH", "%H")
13512 .replace("mm", "%M")
13513 .replace("ss", "%S")
13514 .replace("yy", "%y");
13515 // Second pass: handle single-char timezone patterns
13516 // z -> %Z (timezone name), Z -> %z (timezone offset)
13517 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
13518 let mut out = String::new();
13519 let chars: Vec<char> = result.chars().collect();
13520 let mut i = 0;
13521 while i < chars.len() {
13522 if chars[i] == '%' && i + 1 < chars.len() {
13523 // Already a format specifier, skip both chars
13524 out.push(chars[i]);
13525 out.push(chars[i + 1]);
13526 i += 2;
13527 } else if chars[i] == 'z' {
13528 out.push_str("%Z");
13529 i += 1;
13530 } else if chars[i] == 'Z' {
13531 out.push_str("%z");
13532 i += 1;
13533 } else {
13534 out.push(chars[i]);
13535 i += 1;
13536 }
13537 }
13538 out
13539 }
13540
13541 fn java_to_presto_format(fmt: &str) -> String {
13542 // Presto uses %T for HH:MM:SS
13543 let c_fmt = java_to_c_format(fmt);
13544 c_fmt.replace("%H:%M:%S", "%T")
13545 }
13546
13547 fn java_to_bq_format(fmt: &str) -> String {
13548 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
13549 let c_fmt = java_to_c_format(fmt);
13550 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
13551 }
13552
13553 // For Hive source, CAST string literals to appropriate type
13554 let cast_val = if is_hive_source {
13555 match &val {
13556 Expression::Literal(
13557 crate::expressions::Literal::String(_),
13558 ) => {
13559 match target {
13560 DialectType::DuckDB
13561 | DialectType::Presto
13562 | DialectType::Trino
13563 | DialectType::Athena => {
13564 Self::ensure_cast_timestamp(val.clone())
13565 }
13566 DialectType::BigQuery => {
13567 // BigQuery: CAST(val AS DATETIME)
13568 Expression::Cast(Box::new(
13569 crate::expressions::Cast {
13570 this: val.clone(),
13571 to: DataType::Custom {
13572 name: "DATETIME".to_string(),
13573 },
13574 trailing_comments: vec![],
13575 double_colon_syntax: false,
13576 format: None,
13577 default: None,
13578 inferred_type: None,
13579 },
13580 ))
13581 }
13582 _ => val.clone(),
13583 }
13584 }
13585 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
13586 Expression::Cast(c)
13587 if matches!(c.to, DataType::Date)
13588 && matches!(
13589 target,
13590 DialectType::Presto
13591 | DialectType::Trino
13592 | DialectType::Athena
13593 ) =>
13594 {
13595 Expression::Cast(Box::new(crate::expressions::Cast {
13596 this: val.clone(),
13597 to: DataType::Timestamp {
13598 timezone: false,
13599 precision: None,
13600 },
13601 trailing_comments: vec![],
13602 double_colon_syntax: false,
13603 format: None,
13604 default: None,
13605 inferred_type: None,
13606 }))
13607 }
13608 Expression::Literal(crate::expressions::Literal::Date(
13609 _,
13610 )) if matches!(
13611 target,
13612 DialectType::Presto
13613 | DialectType::Trino
13614 | DialectType::Athena
13615 ) =>
13616 {
13617 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
13618 let cast_date = Self::date_literal_to_cast(val.clone());
13619 Expression::Cast(Box::new(crate::expressions::Cast {
13620 this: cast_date,
13621 to: DataType::Timestamp {
13622 timezone: false,
13623 precision: None,
13624 },
13625 trailing_comments: vec![],
13626 double_colon_syntax: false,
13627 format: None,
13628 default: None,
13629 inferred_type: None,
13630 }))
13631 }
13632 _ => val.clone(),
13633 }
13634 } else {
13635 val.clone()
13636 };
13637
13638 match target {
13639 DialectType::DuckDB => {
13640 if let Expression::Literal(
13641 crate::expressions::Literal::String(s),
13642 ) = fmt_expr
13643 {
13644 let c_fmt = if is_hive_source {
13645 java_to_c_format(s)
13646 } else {
13647 s.clone()
13648 };
13649 Ok(Expression::Function(Box::new(Function::new(
13650 "STRFTIME".to_string(),
13651 vec![cast_val, Expression::string(&c_fmt)],
13652 ))))
13653 } else {
13654 Ok(Expression::Function(Box::new(Function::new(
13655 "STRFTIME".to_string(),
13656 vec![cast_val, fmt_expr.clone()],
13657 ))))
13658 }
13659 }
13660 DialectType::Presto
13661 | DialectType::Trino
13662 | DialectType::Athena => {
13663 if is_hive_source {
13664 if let Expression::Literal(
13665 crate::expressions::Literal::String(s),
13666 ) = fmt_expr
13667 {
13668 let p_fmt = java_to_presto_format(s);
13669 Ok(Expression::Function(Box::new(Function::new(
13670 "DATE_FORMAT".to_string(),
13671 vec![cast_val, Expression::string(&p_fmt)],
13672 ))))
13673 } else {
13674 Ok(Expression::Function(Box::new(Function::new(
13675 "DATE_FORMAT".to_string(),
13676 vec![cast_val, fmt_expr.clone()],
13677 ))))
13678 }
13679 } else {
13680 Ok(Expression::Function(Box::new(Function::new(
13681 "DATE_FORMAT".to_string(),
13682 f.args,
13683 ))))
13684 }
13685 }
13686 DialectType::BigQuery => {
13687 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
13688 if let Expression::Literal(
13689 crate::expressions::Literal::String(s),
13690 ) = fmt_expr
13691 {
13692 let bq_fmt = if is_hive_source {
13693 java_to_bq_format(s)
13694 } else {
13695 java_to_c_format(s)
13696 };
13697 Ok(Expression::Function(Box::new(Function::new(
13698 "FORMAT_DATE".to_string(),
13699 vec![Expression::string(&bq_fmt), cast_val],
13700 ))))
13701 } else {
13702 Ok(Expression::Function(Box::new(Function::new(
13703 "FORMAT_DATE".to_string(),
13704 vec![fmt_expr.clone(), cast_val],
13705 ))))
13706 }
13707 }
13708 DialectType::PostgreSQL | DialectType::Redshift => {
13709 if let Expression::Literal(
13710 crate::expressions::Literal::String(s),
13711 ) = fmt_expr
13712 {
13713 let pg_fmt = s
13714 .replace("yyyy", "YYYY")
13715 .replace("MM", "MM")
13716 .replace("dd", "DD")
13717 .replace("HH", "HH24")
13718 .replace("mm", "MI")
13719 .replace("ss", "SS")
13720 .replace("yy", "YY");
13721 Ok(Expression::Function(Box::new(Function::new(
13722 "TO_CHAR".to_string(),
13723 vec![val, Expression::string(&pg_fmt)],
13724 ))))
13725 } else {
13726 Ok(Expression::Function(Box::new(Function::new(
13727 "TO_CHAR".to_string(),
13728 vec![val, fmt_expr.clone()],
13729 ))))
13730 }
13731 }
13732 _ => Ok(Expression::Function(f)),
13733 }
13734 }
13735 // DATEDIFF(unit, start, end) - 3-arg form
13736 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
13737 "DATEDIFF" if f.args.len() == 3 => {
13738 let mut args = f.args;
13739 // SQLite source: args = (date1, date2, unit_string)
13740 // Standard source: args = (unit, start, end)
13741 let (_arg0, arg1, arg2, unit_str) =
13742 if matches!(source, DialectType::SQLite) {
13743 let date1 = args.remove(0);
13744 let date2 = args.remove(0);
13745 let unit_expr = args.remove(0);
13746 let unit_s = Self::get_unit_str_static(&unit_expr);
13747
13748 // For SQLite target, generate JULIANDAY arithmetic directly
13749 if matches!(target, DialectType::SQLite) {
13750 let jd_first = Expression::Function(Box::new(
13751 Function::new("JULIANDAY".to_string(), vec![date1]),
13752 ));
13753 let jd_second = Expression::Function(Box::new(
13754 Function::new("JULIANDAY".to_string(), vec![date2]),
13755 ));
13756 let diff = Expression::Sub(Box::new(
13757 crate::expressions::BinaryOp::new(
13758 jd_first, jd_second,
13759 ),
13760 ));
13761 let paren_diff = Expression::Paren(Box::new(
13762 crate::expressions::Paren {
13763 this: diff,
13764 trailing_comments: Vec::new(),
13765 },
13766 ));
13767 let adjusted = match unit_s.as_str() {
13768 "HOUR" => Expression::Mul(Box::new(
13769 crate::expressions::BinaryOp::new(
13770 paren_diff,
13771 Expression::Literal(Literal::Number(
13772 "24.0".to_string(),
13773 )),
13774 ),
13775 )),
13776 "MINUTE" => Expression::Mul(Box::new(
13777 crate::expressions::BinaryOp::new(
13778 paren_diff,
13779 Expression::Literal(Literal::Number(
13780 "1440.0".to_string(),
13781 )),
13782 ),
13783 )),
13784 "SECOND" => Expression::Mul(Box::new(
13785 crate::expressions::BinaryOp::new(
13786 paren_diff,
13787 Expression::Literal(Literal::Number(
13788 "86400.0".to_string(),
13789 )),
13790 ),
13791 )),
13792 "MONTH" => Expression::Div(Box::new(
13793 crate::expressions::BinaryOp::new(
13794 paren_diff,
13795 Expression::Literal(Literal::Number(
13796 "30.0".to_string(),
13797 )),
13798 ),
13799 )),
13800 "YEAR" => Expression::Div(Box::new(
13801 crate::expressions::BinaryOp::new(
13802 paren_diff,
13803 Expression::Literal(Literal::Number(
13804 "365.0".to_string(),
13805 )),
13806 ),
13807 )),
13808 _ => paren_diff,
13809 };
13810 return Ok(Expression::Cast(Box::new(Cast {
13811 this: adjusted,
13812 to: DataType::Int {
13813 length: None,
13814 integer_spelling: true,
13815 },
13816 trailing_comments: vec![],
13817 double_colon_syntax: false,
13818 format: None,
13819 default: None,
13820 inferred_type: None,
13821 })));
13822 }
13823
13824 // For other targets, remap to standard (unit, start, end) form
13825 let unit_ident =
13826 Expression::Identifier(Identifier::new(&unit_s));
13827 (unit_ident, date1, date2, unit_s)
13828 } else {
13829 let arg0 = args.remove(0);
13830 let arg1 = args.remove(0);
13831 let arg2 = args.remove(0);
13832 let unit_s = Self::get_unit_str_static(&arg0);
13833 (arg0, arg1, arg2, unit_s)
13834 };
13835
13836 // For Hive/Spark source, string literal dates need to be cast
13837 // Note: Databricks is excluded - it handles string args like standard SQL
13838 let is_hive_spark =
13839 matches!(source, DialectType::Hive | DialectType::Spark);
13840
13841 match target {
13842 DialectType::Snowflake => {
13843 let unit =
13844 Expression::Identifier(Identifier::new(&unit_str));
13845 // Use ensure_to_date_preserved to add TO_DATE with a marker
13846 // that prevents the Snowflake TO_DATE handler from converting it to CAST
13847 let d1 = if is_hive_spark {
13848 Self::ensure_to_date_preserved(arg1)
13849 } else {
13850 arg1
13851 };
13852 let d2 = if is_hive_spark {
13853 Self::ensure_to_date_preserved(arg2)
13854 } else {
13855 arg2
13856 };
13857 Ok(Expression::Function(Box::new(Function::new(
13858 "DATEDIFF".to_string(),
13859 vec![unit, d1, d2],
13860 ))))
13861 }
13862 DialectType::Redshift => {
13863 let unit =
13864 Expression::Identifier(Identifier::new(&unit_str));
13865 let d1 = if is_hive_spark {
13866 Self::ensure_cast_date(arg1)
13867 } else {
13868 arg1
13869 };
13870 let d2 = if is_hive_spark {
13871 Self::ensure_cast_date(arg2)
13872 } else {
13873 arg2
13874 };
13875 Ok(Expression::Function(Box::new(Function::new(
13876 "DATEDIFF".to_string(),
13877 vec![unit, d1, d2],
13878 ))))
13879 }
13880 DialectType::TSQL => {
13881 let unit =
13882 Expression::Identifier(Identifier::new(&unit_str));
13883 Ok(Expression::Function(Box::new(Function::new(
13884 "DATEDIFF".to_string(),
13885 vec![unit, arg1, arg2],
13886 ))))
13887 }
13888 DialectType::DuckDB => {
13889 let is_redshift_tsql = matches!(
13890 source,
13891 DialectType::Redshift | DialectType::TSQL
13892 );
13893 if is_hive_spark {
13894 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
13895 let d1 = Self::ensure_cast_date(arg1);
13896 let d2 = Self::ensure_cast_date(arg2);
13897 Ok(Expression::Function(Box::new(Function::new(
13898 "DATE_DIFF".to_string(),
13899 vec![Expression::string(&unit_str), d1, d2],
13900 ))))
13901 } else if matches!(source, DialectType::Snowflake) {
13902 // For Snowflake source: special handling per unit
13903 match unit_str.as_str() {
13904 "NANOSECOND" => {
13905 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
13906 fn cast_to_timestamp_ns(
13907 expr: Expression,
13908 ) -> Expression
13909 {
13910 Expression::Cast(Box::new(Cast {
13911 this: expr,
13912 to: DataType::Custom {
13913 name: "TIMESTAMP_NS".to_string(),
13914 },
13915 trailing_comments: vec![],
13916 double_colon_syntax: false,
13917 format: None,
13918 default: None,
13919 inferred_type: None,
13920 }))
13921 }
13922 let epoch_end = Expression::Function(Box::new(
13923 Function::new(
13924 "EPOCH_NS".to_string(),
13925 vec![cast_to_timestamp_ns(arg2)],
13926 ),
13927 ));
13928 let epoch_start = Expression::Function(
13929 Box::new(Function::new(
13930 "EPOCH_NS".to_string(),
13931 vec![cast_to_timestamp_ns(arg1)],
13932 )),
13933 );
13934 Ok(Expression::Sub(Box::new(BinaryOp::new(
13935 epoch_end,
13936 epoch_start,
13937 ))))
13938 }
13939 "WEEK" => {
13940 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
13941 let d1 = Self::force_cast_date(arg1);
13942 let d2 = Self::force_cast_date(arg2);
13943 let dt1 = Expression::Function(Box::new(
13944 Function::new(
13945 "DATE_TRUNC".to_string(),
13946 vec![Expression::string("WEEK"), d1],
13947 ),
13948 ));
13949 let dt2 = Expression::Function(Box::new(
13950 Function::new(
13951 "DATE_TRUNC".to_string(),
13952 vec![Expression::string("WEEK"), d2],
13953 ),
13954 ));
13955 Ok(Expression::Function(Box::new(
13956 Function::new(
13957 "DATE_DIFF".to_string(),
13958 vec![
13959 Expression::string(&unit_str),
13960 dt1,
13961 dt2,
13962 ],
13963 ),
13964 )))
13965 }
13966 _ => {
13967 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
13968 let d1 = Self::force_cast_date(arg1);
13969 let d2 = Self::force_cast_date(arg2);
13970 Ok(Expression::Function(Box::new(
13971 Function::new(
13972 "DATE_DIFF".to_string(),
13973 vec![
13974 Expression::string(&unit_str),
13975 d1,
13976 d2,
13977 ],
13978 ),
13979 )))
13980 }
13981 }
13982 } else if is_redshift_tsql {
13983 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
13984 let d1 = Self::force_cast_timestamp(arg1);
13985 let d2 = Self::force_cast_timestamp(arg2);
13986 Ok(Expression::Function(Box::new(Function::new(
13987 "DATE_DIFF".to_string(),
13988 vec![Expression::string(&unit_str), d1, d2],
13989 ))))
13990 } else {
13991 // Keep as DATEDIFF so DuckDB's transform_datediff handles
13992 // DATE_TRUNC for WEEK, CAST for string literals, etc.
13993 let unit =
13994 Expression::Identifier(Identifier::new(&unit_str));
13995 Ok(Expression::Function(Box::new(Function::new(
13996 "DATEDIFF".to_string(),
13997 vec![unit, arg1, arg2],
13998 ))))
13999 }
14000 }
14001 DialectType::BigQuery => {
14002 let is_redshift_tsql = matches!(
14003 source,
14004 DialectType::Redshift
14005 | DialectType::TSQL
14006 | DialectType::Snowflake
14007 );
14008 let cast_d1 = if is_hive_spark {
14009 Self::ensure_cast_date(arg1)
14010 } else if is_redshift_tsql {
14011 Self::force_cast_datetime(arg1)
14012 } else {
14013 Self::ensure_cast_datetime(arg1)
14014 };
14015 let cast_d2 = if is_hive_spark {
14016 Self::ensure_cast_date(arg2)
14017 } else if is_redshift_tsql {
14018 Self::force_cast_datetime(arg2)
14019 } else {
14020 Self::ensure_cast_datetime(arg2)
14021 };
14022 let unit =
14023 Expression::Identifier(Identifier::new(&unit_str));
14024 Ok(Expression::Function(Box::new(Function::new(
14025 "DATE_DIFF".to_string(),
14026 vec![cast_d2, cast_d1, unit],
14027 ))))
14028 }
14029 DialectType::Presto
14030 | DialectType::Trino
14031 | DialectType::Athena => {
14032 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
14033 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
14034 let is_redshift_tsql = matches!(
14035 source,
14036 DialectType::Redshift
14037 | DialectType::TSQL
14038 | DialectType::Snowflake
14039 );
14040 let d1 = if is_hive_spark {
14041 Self::double_cast_timestamp_date(arg1)
14042 } else if is_redshift_tsql {
14043 Self::force_cast_timestamp(arg1)
14044 } else {
14045 arg1
14046 };
14047 let d2 = if is_hive_spark {
14048 Self::double_cast_timestamp_date(arg2)
14049 } else if is_redshift_tsql {
14050 Self::force_cast_timestamp(arg2)
14051 } else {
14052 arg2
14053 };
14054 Ok(Expression::Function(Box::new(Function::new(
14055 "DATE_DIFF".to_string(),
14056 vec![Expression::string(&unit_str), d1, d2],
14057 ))))
14058 }
14059 DialectType::Hive => match unit_str.as_str() {
14060 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
14061 this: Expression::Function(Box::new(Function::new(
14062 "MONTHS_BETWEEN".to_string(),
14063 vec![arg2, arg1],
14064 ))),
14065 to: DataType::Int {
14066 length: None,
14067 integer_spelling: false,
14068 },
14069 trailing_comments: vec![],
14070 double_colon_syntax: false,
14071 format: None,
14072 default: None,
14073 inferred_type: None,
14074 }))),
14075 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
14076 this: Expression::Div(Box::new(
14077 crate::expressions::BinaryOp::new(
14078 Expression::Function(Box::new(Function::new(
14079 "DATEDIFF".to_string(),
14080 vec![arg2, arg1],
14081 ))),
14082 Expression::number(7),
14083 ),
14084 )),
14085 to: DataType::Int {
14086 length: None,
14087 integer_spelling: false,
14088 },
14089 trailing_comments: vec![],
14090 double_colon_syntax: false,
14091 format: None,
14092 default: None,
14093 inferred_type: None,
14094 }))),
14095 _ => Ok(Expression::Function(Box::new(Function::new(
14096 "DATEDIFF".to_string(),
14097 vec![arg2, arg1],
14098 )))),
14099 },
14100 DialectType::Spark | DialectType::Databricks => {
14101 let unit =
14102 Expression::Identifier(Identifier::new(&unit_str));
14103 Ok(Expression::Function(Box::new(Function::new(
14104 "DATEDIFF".to_string(),
14105 vec![unit, arg1, arg2],
14106 ))))
14107 }
14108 _ => {
14109 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
14110 let d1 = if is_hive_spark {
14111 Self::ensure_cast_date(arg1)
14112 } else {
14113 arg1
14114 };
14115 let d2 = if is_hive_spark {
14116 Self::ensure_cast_date(arg2)
14117 } else {
14118 arg2
14119 };
14120 let unit =
14121 Expression::Identifier(Identifier::new(&unit_str));
14122 Ok(Expression::Function(Box::new(Function::new(
14123 "DATEDIFF".to_string(),
14124 vec![unit, d1, d2],
14125 ))))
14126 }
14127 }
14128 }
14129 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
14130 "DATEDIFF" if f.args.len() == 2 => {
14131 let mut args = f.args;
14132 let arg0 = args.remove(0);
14133 let arg1 = args.remove(0);
14134
14135 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
14136 // Also recognizes TryCast/Cast to DATE that may have been produced by
14137 // cross-dialect TO_DATE -> TRY_CAST conversion
14138 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
14139 if let Expression::Function(ref f) = e {
14140 if f.name.eq_ignore_ascii_case("TO_DATE")
14141 && f.args.len() == 1
14142 {
14143 return (f.args[0].clone(), true);
14144 }
14145 }
14146 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
14147 if let Expression::TryCast(ref c) = e {
14148 if matches!(c.to, DataType::Date) {
14149 return (e, true); // Already properly cast, return as-is
14150 }
14151 }
14152 (e, false)
14153 };
14154
14155 match target {
14156 DialectType::DuckDB => {
14157 // For Hive source, always CAST to DATE
14158 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
14159 let cast_d0 = if matches!(
14160 source,
14161 DialectType::Hive
14162 | DialectType::Spark
14163 | DialectType::Databricks
14164 ) {
14165 let (inner, was_to_date) = unwrap_to_date(arg1);
14166 if was_to_date {
14167 // Already a date expression, use directly
14168 if matches!(&inner, Expression::TryCast(_)) {
14169 inner // Already TRY_CAST(x AS DATE)
14170 } else {
14171 Self::try_cast_date(inner)
14172 }
14173 } else {
14174 Self::force_cast_date(inner)
14175 }
14176 } else {
14177 Self::ensure_cast_date(arg1)
14178 };
14179 let cast_d1 = if matches!(
14180 source,
14181 DialectType::Hive
14182 | DialectType::Spark
14183 | DialectType::Databricks
14184 ) {
14185 let (inner, was_to_date) = unwrap_to_date(arg0);
14186 if was_to_date {
14187 if matches!(&inner, Expression::TryCast(_)) {
14188 inner
14189 } else {
14190 Self::try_cast_date(inner)
14191 }
14192 } else {
14193 Self::force_cast_date(inner)
14194 }
14195 } else {
14196 Self::ensure_cast_date(arg0)
14197 };
14198 Ok(Expression::Function(Box::new(Function::new(
14199 "DATE_DIFF".to_string(),
14200 vec![Expression::string("DAY"), cast_d0, cast_d1],
14201 ))))
14202 }
14203 DialectType::Presto
14204 | DialectType::Trino
14205 | DialectType::Athena => {
14206 // For Hive/Spark source, apply double_cast_timestamp_date
14207 // For other sources (MySQL etc.), just swap args without casting
14208 if matches!(
14209 source,
14210 DialectType::Hive
14211 | DialectType::Spark
14212 | DialectType::Databricks
14213 ) {
14214 let cast_fn = |e: Expression| -> Expression {
14215 let (inner, was_to_date) = unwrap_to_date(e);
14216 if was_to_date {
14217 let first_cast =
14218 Self::double_cast_timestamp_date(inner);
14219 Self::double_cast_timestamp_date(first_cast)
14220 } else {
14221 Self::double_cast_timestamp_date(inner)
14222 }
14223 };
14224 Ok(Expression::Function(Box::new(Function::new(
14225 "DATE_DIFF".to_string(),
14226 vec![
14227 Expression::string("DAY"),
14228 cast_fn(arg1),
14229 cast_fn(arg0),
14230 ],
14231 ))))
14232 } else {
14233 Ok(Expression::Function(Box::new(Function::new(
14234 "DATE_DIFF".to_string(),
14235 vec![Expression::string("DAY"), arg1, arg0],
14236 ))))
14237 }
14238 }
14239 DialectType::Redshift => {
14240 let unit = Expression::Identifier(Identifier::new("DAY"));
14241 Ok(Expression::Function(Box::new(Function::new(
14242 "DATEDIFF".to_string(),
14243 vec![unit, arg1, arg0],
14244 ))))
14245 }
14246 _ => Ok(Expression::Function(Box::new(Function::new(
14247 "DATEDIFF".to_string(),
14248 vec![arg0, arg1],
14249 )))),
14250 }
14251 }
14252 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
14253 "DATE_DIFF" if f.args.len() == 3 => {
14254 let mut args = f.args;
14255 let arg0 = args.remove(0);
14256 let arg1 = args.remove(0);
14257 let arg2 = args.remove(0);
14258 let unit_str = Self::get_unit_str_static(&arg0);
14259
14260 match target {
14261 DialectType::DuckDB => {
14262 // DuckDB: DATE_DIFF('UNIT', start, end)
14263 Ok(Expression::Function(Box::new(Function::new(
14264 "DATE_DIFF".to_string(),
14265 vec![Expression::string(&unit_str), arg1, arg2],
14266 ))))
14267 }
14268 DialectType::Presto
14269 | DialectType::Trino
14270 | DialectType::Athena => {
14271 Ok(Expression::Function(Box::new(Function::new(
14272 "DATE_DIFF".to_string(),
14273 vec![Expression::string(&unit_str), arg1, arg2],
14274 ))))
14275 }
14276 DialectType::ClickHouse => {
14277 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
14278 let unit =
14279 Expression::Identifier(Identifier::new(&unit_str));
14280 Ok(Expression::Function(Box::new(Function::new(
14281 "DATE_DIFF".to_string(),
14282 vec![unit, arg1, arg2],
14283 ))))
14284 }
14285 DialectType::Snowflake | DialectType::Redshift => {
14286 let unit =
14287 Expression::Identifier(Identifier::new(&unit_str));
14288 Ok(Expression::Function(Box::new(Function::new(
14289 "DATEDIFF".to_string(),
14290 vec![unit, arg1, arg2],
14291 ))))
14292 }
14293 _ => {
14294 let unit =
14295 Expression::Identifier(Identifier::new(&unit_str));
14296 Ok(Expression::Function(Box::new(Function::new(
14297 "DATEDIFF".to_string(),
14298 vec![unit, arg1, arg2],
14299 ))))
14300 }
14301 }
14302 }
14303 // DATEADD(unit, val, date) - 3-arg form
14304 "DATEADD" if f.args.len() == 3 => {
14305 let mut args = f.args;
14306 let arg0 = args.remove(0);
14307 let arg1 = args.remove(0);
14308 let arg2 = args.remove(0);
14309 let unit_str = Self::get_unit_str_static(&arg0);
14310
14311 // Normalize TSQL unit abbreviations to standard names
14312 let unit_str = match unit_str.as_str() {
14313 "YY" | "YYYY" => "YEAR".to_string(),
14314 "QQ" | "Q" => "QUARTER".to_string(),
14315 "MM" | "M" => "MONTH".to_string(),
14316 "WK" | "WW" => "WEEK".to_string(),
14317 "DD" | "D" | "DY" => "DAY".to_string(),
14318 "HH" => "HOUR".to_string(),
14319 "MI" | "N" => "MINUTE".to_string(),
14320 "SS" | "S" => "SECOND".to_string(),
14321 "MS" => "MILLISECOND".to_string(),
14322 "MCS" | "US" => "MICROSECOND".to_string(),
14323 _ => unit_str,
14324 };
14325 match target {
14326 DialectType::Snowflake => {
14327 let unit =
14328 Expression::Identifier(Identifier::new(&unit_str));
14329 // Cast string literal to TIMESTAMP, but not for Snowflake source
14330 // (Snowflake natively accepts string literals in DATEADD)
14331 let arg2 = if matches!(
14332 &arg2,
14333 Expression::Literal(Literal::String(_))
14334 ) && !matches!(source, DialectType::Snowflake)
14335 {
14336 Expression::Cast(Box::new(Cast {
14337 this: arg2,
14338 to: DataType::Timestamp {
14339 precision: None,
14340 timezone: false,
14341 },
14342 trailing_comments: Vec::new(),
14343 double_colon_syntax: false,
14344 format: None,
14345 default: None,
14346 inferred_type: None,
14347 }))
14348 } else {
14349 arg2
14350 };
14351 Ok(Expression::Function(Box::new(Function::new(
14352 "DATEADD".to_string(),
14353 vec![unit, arg1, arg2],
14354 ))))
14355 }
14356 DialectType::TSQL => {
14357 let unit =
14358 Expression::Identifier(Identifier::new(&unit_str));
14359 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
14360 let arg2 = if matches!(
14361 &arg2,
14362 Expression::Literal(Literal::String(_))
14363 ) && !matches!(
14364 source,
14365 DialectType::Spark
14366 | DialectType::Databricks
14367 | DialectType::Hive
14368 ) {
14369 Expression::Cast(Box::new(Cast {
14370 this: arg2,
14371 to: DataType::Custom {
14372 name: "DATETIME2".to_string(),
14373 },
14374 trailing_comments: Vec::new(),
14375 double_colon_syntax: false,
14376 format: None,
14377 default: None,
14378 inferred_type: None,
14379 }))
14380 } else {
14381 arg2
14382 };
14383 Ok(Expression::Function(Box::new(Function::new(
14384 "DATEADD".to_string(),
14385 vec![unit, arg1, arg2],
14386 ))))
14387 }
14388 DialectType::Redshift => {
14389 let unit =
14390 Expression::Identifier(Identifier::new(&unit_str));
14391 Ok(Expression::Function(Box::new(Function::new(
14392 "DATEADD".to_string(),
14393 vec![unit, arg1, arg2],
14394 ))))
14395 }
14396 DialectType::Databricks => {
14397 let unit =
14398 Expression::Identifier(Identifier::new(&unit_str));
14399 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
14400 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
14401 let func_name = if matches!(
14402 source,
14403 DialectType::TSQL
14404 | DialectType::Fabric
14405 | DialectType::Databricks
14406 | DialectType::Snowflake
14407 ) {
14408 "DATEADD"
14409 } else {
14410 "DATE_ADD"
14411 };
14412 Ok(Expression::Function(Box::new(Function::new(
14413 func_name.to_string(),
14414 vec![unit, arg1, arg2],
14415 ))))
14416 }
14417 DialectType::DuckDB => {
14418 // Special handling for NANOSECOND from Snowflake
14419 if unit_str == "NANOSECOND"
14420 && matches!(source, DialectType::Snowflake)
14421 {
14422 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
14423 let cast_ts = Expression::Cast(Box::new(Cast {
14424 this: arg2,
14425 to: DataType::Custom {
14426 name: "TIMESTAMP_NS".to_string(),
14427 },
14428 trailing_comments: vec![],
14429 double_colon_syntax: false,
14430 format: None,
14431 default: None,
14432 inferred_type: None,
14433 }));
14434 let epoch_ns =
14435 Expression::Function(Box::new(Function::new(
14436 "EPOCH_NS".to_string(),
14437 vec![cast_ts],
14438 )));
14439 let sum = Expression::Add(Box::new(BinaryOp::new(
14440 epoch_ns, arg1,
14441 )));
14442 Ok(Expression::Function(Box::new(Function::new(
14443 "MAKE_TIMESTAMP_NS".to_string(),
14444 vec![sum],
14445 ))))
14446 } else {
14447 // DuckDB: convert to date + INTERVAL syntax with CAST
14448 let iu = Self::parse_interval_unit_static(&unit_str);
14449 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
14450 this: Some(arg1),
14451 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
14452 }));
14453 // Cast string literal to TIMESTAMP
14454 let arg2 = if matches!(
14455 &arg2,
14456 Expression::Literal(Literal::String(_))
14457 ) {
14458 Expression::Cast(Box::new(Cast {
14459 this: arg2,
14460 to: DataType::Timestamp {
14461 precision: None,
14462 timezone: false,
14463 },
14464 trailing_comments: Vec::new(),
14465 double_colon_syntax: false,
14466 format: None,
14467 default: None,
14468 inferred_type: None,
14469 }))
14470 } else {
14471 arg2
14472 };
14473 Ok(Expression::Add(Box::new(
14474 crate::expressions::BinaryOp::new(arg2, interval),
14475 )))
14476 }
14477 }
14478 DialectType::Spark => {
14479 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
14480 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
14481 if matches!(source, DialectType::TSQL | DialectType::Fabric)
14482 {
14483 fn multiply_expr_spark(
14484 expr: Expression,
14485 factor: i64,
14486 ) -> Expression
14487 {
14488 if let Expression::Literal(
14489 crate::expressions::Literal::Number(n),
14490 ) = &expr
14491 {
14492 if let Ok(val) = n.parse::<i64>() {
14493 return Expression::Literal(
14494 crate::expressions::Literal::Number(
14495 (val * factor).to_string(),
14496 ),
14497 );
14498 }
14499 }
14500 Expression::Mul(Box::new(
14501 crate::expressions::BinaryOp::new(
14502 expr,
14503 Expression::Literal(
14504 crate::expressions::Literal::Number(
14505 factor.to_string(),
14506 ),
14507 ),
14508 ),
14509 ))
14510 }
14511 let normalized_unit = match unit_str.as_str() {
14512 "YEAR" | "YY" | "YYYY" => "YEAR",
14513 "QUARTER" | "QQ" | "Q" => "QUARTER",
14514 "MONTH" | "MM" | "M" => "MONTH",
14515 "WEEK" | "WK" | "WW" => "WEEK",
14516 "DAY" | "DD" | "D" | "DY" => "DAY",
14517 _ => &unit_str,
14518 };
14519 match normalized_unit {
14520 "YEAR" => {
14521 let months = multiply_expr_spark(arg1, 12);
14522 Ok(Expression::Function(Box::new(
14523 Function::new(
14524 "ADD_MONTHS".to_string(),
14525 vec![arg2, months],
14526 ),
14527 )))
14528 }
14529 "QUARTER" => {
14530 let months = multiply_expr_spark(arg1, 3);
14531 Ok(Expression::Function(Box::new(
14532 Function::new(
14533 "ADD_MONTHS".to_string(),
14534 vec![arg2, months],
14535 ),
14536 )))
14537 }
14538 "MONTH" => Ok(Expression::Function(Box::new(
14539 Function::new(
14540 "ADD_MONTHS".to_string(),
14541 vec![arg2, arg1],
14542 ),
14543 ))),
14544 "WEEK" => {
14545 let days = multiply_expr_spark(arg1, 7);
14546 Ok(Expression::Function(Box::new(
14547 Function::new(
14548 "DATE_ADD".to_string(),
14549 vec![arg2, days],
14550 ),
14551 )))
14552 }
14553 "DAY" => Ok(Expression::Function(Box::new(
14554 Function::new(
14555 "DATE_ADD".to_string(),
14556 vec![arg2, arg1],
14557 ),
14558 ))),
14559 _ => {
14560 let unit = Expression::Identifier(
14561 Identifier::new(&unit_str),
14562 );
14563 Ok(Expression::Function(Box::new(
14564 Function::new(
14565 "DATE_ADD".to_string(),
14566 vec![unit, arg1, arg2],
14567 ),
14568 )))
14569 }
14570 }
14571 } else {
14572 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
14573 let unit =
14574 Expression::Identifier(Identifier::new(&unit_str));
14575 Ok(Expression::Function(Box::new(Function::new(
14576 "DATE_ADD".to_string(),
14577 vec![unit, arg1, arg2],
14578 ))))
14579 }
14580 }
14581 DialectType::Hive => match unit_str.as_str() {
14582 "MONTH" => {
14583 Ok(Expression::Function(Box::new(Function::new(
14584 "ADD_MONTHS".to_string(),
14585 vec![arg2, arg1],
14586 ))))
14587 }
14588 _ => Ok(Expression::Function(Box::new(Function::new(
14589 "DATE_ADD".to_string(),
14590 vec![arg2, arg1],
14591 )))),
14592 },
14593 DialectType::Presto
14594 | DialectType::Trino
14595 | DialectType::Athena => {
14596 // Cast string literal date to TIMESTAMP
14597 let arg2 = if matches!(
14598 &arg2,
14599 Expression::Literal(Literal::String(_))
14600 ) {
14601 Expression::Cast(Box::new(Cast {
14602 this: arg2,
14603 to: DataType::Timestamp {
14604 precision: None,
14605 timezone: false,
14606 },
14607 trailing_comments: Vec::new(),
14608 double_colon_syntax: false,
14609 format: None,
14610 default: None,
14611 inferred_type: None,
14612 }))
14613 } else {
14614 arg2
14615 };
14616 Ok(Expression::Function(Box::new(Function::new(
14617 "DATE_ADD".to_string(),
14618 vec![Expression::string(&unit_str), arg1, arg2],
14619 ))))
14620 }
14621 DialectType::MySQL => {
14622 let iu = Self::parse_interval_unit_static(&unit_str);
14623 Ok(Expression::DateAdd(Box::new(
14624 crate::expressions::DateAddFunc {
14625 this: arg2,
14626 interval: arg1,
14627 unit: iu,
14628 },
14629 )))
14630 }
14631 DialectType::PostgreSQL => {
14632 // Cast string literal date to TIMESTAMP
14633 let arg2 = if matches!(
14634 &arg2,
14635 Expression::Literal(Literal::String(_))
14636 ) {
14637 Expression::Cast(Box::new(Cast {
14638 this: arg2,
14639 to: DataType::Timestamp {
14640 precision: None,
14641 timezone: false,
14642 },
14643 trailing_comments: Vec::new(),
14644 double_colon_syntax: false,
14645 format: None,
14646 default: None,
14647 inferred_type: None,
14648 }))
14649 } else {
14650 arg2
14651 };
14652 let interval = Expression::Interval(Box::new(
14653 crate::expressions::Interval {
14654 this: Some(Expression::string(&format!(
14655 "{} {}",
14656 Self::expr_to_string_static(&arg1),
14657 unit_str
14658 ))),
14659 unit: None,
14660 },
14661 ));
14662 Ok(Expression::Add(Box::new(
14663 crate::expressions::BinaryOp::new(arg2, interval),
14664 )))
14665 }
14666 DialectType::BigQuery => {
14667 let iu = Self::parse_interval_unit_static(&unit_str);
14668 let interval = Expression::Interval(Box::new(
14669 crate::expressions::Interval {
14670 this: Some(arg1),
14671 unit: Some(
14672 crate::expressions::IntervalUnitSpec::Simple {
14673 unit: iu,
14674 use_plural: false,
14675 },
14676 ),
14677 },
14678 ));
14679 // Non-TSQL sources: CAST string literal to DATETIME
14680 let arg2 = if !matches!(
14681 source,
14682 DialectType::TSQL | DialectType::Fabric
14683 ) && matches!(
14684 &arg2,
14685 Expression::Literal(Literal::String(_))
14686 ) {
14687 Expression::Cast(Box::new(Cast {
14688 this: arg2,
14689 to: DataType::Custom {
14690 name: "DATETIME".to_string(),
14691 },
14692 trailing_comments: Vec::new(),
14693 double_colon_syntax: false,
14694 format: None,
14695 default: None,
14696 inferred_type: None,
14697 }))
14698 } else {
14699 arg2
14700 };
14701 Ok(Expression::Function(Box::new(Function::new(
14702 "DATE_ADD".to_string(),
14703 vec![arg2, interval],
14704 ))))
14705 }
14706 _ => {
14707 let unit =
14708 Expression::Identifier(Identifier::new(&unit_str));
14709 Ok(Expression::Function(Box::new(Function::new(
14710 "DATEADD".to_string(),
14711 vec![unit, arg1, arg2],
14712 ))))
14713 }
14714 }
14715 }
14716 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
14717 // or (date, val, 'UNIT') from Generic canonical form
14718 "DATE_ADD" if f.args.len() == 3 => {
14719 let mut args = f.args;
14720 let arg0 = args.remove(0);
14721 let arg1 = args.remove(0);
14722 let arg2 = args.remove(0);
14723 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
14724 // where arg2 is a string literal matching a unit name
14725 let arg2_unit = match &arg2 {
14726 Expression::Literal(Literal::String(s)) => {
14727 let u = s.to_uppercase();
14728 if matches!(
14729 u.as_str(),
14730 "DAY"
14731 | "MONTH"
14732 | "YEAR"
14733 | "HOUR"
14734 | "MINUTE"
14735 | "SECOND"
14736 | "WEEK"
14737 | "QUARTER"
14738 | "MILLISECOND"
14739 | "MICROSECOND"
14740 ) {
14741 Some(u)
14742 } else {
14743 None
14744 }
14745 }
14746 _ => None,
14747 };
14748 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
14749 let (unit_str, val, date) = if let Some(u) = arg2_unit {
14750 (u, arg1, arg0)
14751 } else {
14752 (Self::get_unit_str_static(&arg0), arg1, arg2)
14753 };
14754 // Alias for backward compat with the rest of the match
14755 let arg1 = val;
14756 let arg2 = date;
14757
14758 match target {
14759 DialectType::Presto
14760 | DialectType::Trino
14761 | DialectType::Athena => {
14762 Ok(Expression::Function(Box::new(Function::new(
14763 "DATE_ADD".to_string(),
14764 vec![Expression::string(&unit_str), arg1, arg2],
14765 ))))
14766 }
14767 DialectType::DuckDB => {
14768 let iu = Self::parse_interval_unit_static(&unit_str);
14769 let interval = Expression::Interval(Box::new(
14770 crate::expressions::Interval {
14771 this: Some(arg1),
14772 unit: Some(
14773 crate::expressions::IntervalUnitSpec::Simple {
14774 unit: iu,
14775 use_plural: false,
14776 },
14777 ),
14778 },
14779 ));
14780 Ok(Expression::Add(Box::new(
14781 crate::expressions::BinaryOp::new(arg2, interval),
14782 )))
14783 }
14784 DialectType::PostgreSQL
14785 | DialectType::Materialize
14786 | DialectType::RisingWave => {
14787 // PostgreSQL: x + INTERVAL '1 DAY'
14788 let amount_str = Self::expr_to_string_static(&arg1);
14789 let interval = Expression::Interval(Box::new(
14790 crate::expressions::Interval {
14791 this: Some(Expression::string(&format!(
14792 "{} {}",
14793 amount_str, unit_str
14794 ))),
14795 unit: None,
14796 },
14797 ));
14798 Ok(Expression::Add(Box::new(
14799 crate::expressions::BinaryOp::new(arg2, interval),
14800 )))
14801 }
14802 DialectType::Snowflake
14803 | DialectType::TSQL
14804 | DialectType::Redshift => {
14805 let unit =
14806 Expression::Identifier(Identifier::new(&unit_str));
14807 Ok(Expression::Function(Box::new(Function::new(
14808 "DATEADD".to_string(),
14809 vec![unit, arg1, arg2],
14810 ))))
14811 }
14812 DialectType::BigQuery
14813 | DialectType::MySQL
14814 | DialectType::Doris
14815 | DialectType::StarRocks
14816 | DialectType::Drill => {
14817 // DATE_ADD(date, INTERVAL amount UNIT)
14818 let iu = Self::parse_interval_unit_static(&unit_str);
14819 let interval = Expression::Interval(Box::new(
14820 crate::expressions::Interval {
14821 this: Some(arg1),
14822 unit: Some(
14823 crate::expressions::IntervalUnitSpec::Simple {
14824 unit: iu,
14825 use_plural: false,
14826 },
14827 ),
14828 },
14829 ));
14830 Ok(Expression::Function(Box::new(Function::new(
14831 "DATE_ADD".to_string(),
14832 vec![arg2, interval],
14833 ))))
14834 }
14835 DialectType::SQLite => {
14836 // SQLite: DATE(x, '1 DAY')
14837 // Build the string '1 DAY' from amount and unit
14838 let amount_str = match &arg1 {
14839 Expression::Literal(Literal::Number(n)) => n.clone(),
14840 _ => "1".to_string(),
14841 };
14842 Ok(Expression::Function(Box::new(Function::new(
14843 "DATE".to_string(),
14844 vec![
14845 arg2,
14846 Expression::string(format!(
14847 "{} {}",
14848 amount_str, unit_str
14849 )),
14850 ],
14851 ))))
14852 }
14853 DialectType::Dremio => {
14854 // Dremio: DATE_ADD(date, amount) - drops unit
14855 Ok(Expression::Function(Box::new(Function::new(
14856 "DATE_ADD".to_string(),
14857 vec![arg2, arg1],
14858 ))))
14859 }
14860 DialectType::Spark => {
14861 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
14862 if unit_str == "DAY" {
14863 Ok(Expression::Function(Box::new(Function::new(
14864 "DATE_ADD".to_string(),
14865 vec![arg2, arg1],
14866 ))))
14867 } else {
14868 let unit =
14869 Expression::Identifier(Identifier::new(&unit_str));
14870 Ok(Expression::Function(Box::new(Function::new(
14871 "DATE_ADD".to_string(),
14872 vec![unit, arg1, arg2],
14873 ))))
14874 }
14875 }
14876 DialectType::Databricks => {
14877 let unit =
14878 Expression::Identifier(Identifier::new(&unit_str));
14879 Ok(Expression::Function(Box::new(Function::new(
14880 "DATE_ADD".to_string(),
14881 vec![unit, arg1, arg2],
14882 ))))
14883 }
14884 DialectType::Hive => {
14885 // Hive: DATE_ADD(date, val) for DAY
14886 Ok(Expression::Function(Box::new(Function::new(
14887 "DATE_ADD".to_string(),
14888 vec![arg2, arg1],
14889 ))))
14890 }
14891 _ => {
14892 let unit =
14893 Expression::Identifier(Identifier::new(&unit_str));
14894 Ok(Expression::Function(Box::new(Function::new(
14895 "DATE_ADD".to_string(),
14896 vec![unit, arg1, arg2],
14897 ))))
14898 }
14899 }
14900 }
14901 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
14902 "DATE_ADD"
14903 if f.args.len() == 2
14904 && matches!(
14905 source,
14906 DialectType::Hive
14907 | DialectType::Spark
14908 | DialectType::Databricks
14909 | DialectType::Generic
14910 ) =>
14911 {
14912 let mut args = f.args;
14913 let date = args.remove(0);
14914 let days = args.remove(0);
14915 match target {
14916 DialectType::Hive | DialectType::Spark => {
14917 // Keep as DATE_ADD(date, days) for Hive/Spark
14918 Ok(Expression::Function(Box::new(Function::new(
14919 "DATE_ADD".to_string(),
14920 vec![date, days],
14921 ))))
14922 }
14923 DialectType::Databricks => {
14924 // Databricks: DATEADD(DAY, days, date)
14925 Ok(Expression::Function(Box::new(Function::new(
14926 "DATEADD".to_string(),
14927 vec![
14928 Expression::Identifier(Identifier::new("DAY")),
14929 days,
14930 date,
14931 ],
14932 ))))
14933 }
14934 DialectType::DuckDB => {
14935 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
14936 let cast_date = Self::ensure_cast_date(date);
14937 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
14938 let interval_val = if matches!(
14939 days,
14940 Expression::Mul(_)
14941 | Expression::Sub(_)
14942 | Expression::Add(_)
14943 ) {
14944 Expression::Paren(Box::new(crate::expressions::Paren {
14945 this: days,
14946 trailing_comments: vec![],
14947 }))
14948 } else {
14949 days
14950 };
14951 let interval = Expression::Interval(Box::new(
14952 crate::expressions::Interval {
14953 this: Some(interval_val),
14954 unit: Some(
14955 crate::expressions::IntervalUnitSpec::Simple {
14956 unit: crate::expressions::IntervalUnit::Day,
14957 use_plural: false,
14958 },
14959 ),
14960 },
14961 ));
14962 Ok(Expression::Add(Box::new(
14963 crate::expressions::BinaryOp::new(cast_date, interval),
14964 )))
14965 }
14966 DialectType::Snowflake => {
14967 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
14968 let cast_date = if matches!(
14969 source,
14970 DialectType::Hive
14971 | DialectType::Spark
14972 | DialectType::Databricks
14973 ) {
14974 if matches!(
14975 date,
14976 Expression::Literal(Literal::String(_))
14977 ) {
14978 Self::double_cast_timestamp_date(date)
14979 } else {
14980 date
14981 }
14982 } else {
14983 date
14984 };
14985 Ok(Expression::Function(Box::new(Function::new(
14986 "DATEADD".to_string(),
14987 vec![
14988 Expression::Identifier(Identifier::new("DAY")),
14989 days,
14990 cast_date,
14991 ],
14992 ))))
14993 }
14994 DialectType::Redshift => {
14995 Ok(Expression::Function(Box::new(Function::new(
14996 "DATEADD".to_string(),
14997 vec![
14998 Expression::Identifier(Identifier::new("DAY")),
14999 days,
15000 date,
15001 ],
15002 ))))
15003 }
15004 DialectType::TSQL | DialectType::Fabric => {
15005 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
15006 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
15007 let cast_date = if matches!(
15008 source,
15009 DialectType::Hive | DialectType::Spark
15010 ) {
15011 if matches!(
15012 date,
15013 Expression::Literal(Literal::String(_))
15014 ) {
15015 Self::double_cast_datetime2_date(date)
15016 } else {
15017 date
15018 }
15019 } else {
15020 date
15021 };
15022 Ok(Expression::Function(Box::new(Function::new(
15023 "DATEADD".to_string(),
15024 vec![
15025 Expression::Identifier(Identifier::new("DAY")),
15026 days,
15027 cast_date,
15028 ],
15029 ))))
15030 }
15031 DialectType::Presto
15032 | DialectType::Trino
15033 | DialectType::Athena => {
15034 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15035 let cast_date = if matches!(
15036 source,
15037 DialectType::Hive
15038 | DialectType::Spark
15039 | DialectType::Databricks
15040 ) {
15041 if matches!(
15042 date,
15043 Expression::Literal(Literal::String(_))
15044 ) {
15045 Self::double_cast_timestamp_date(date)
15046 } else {
15047 date
15048 }
15049 } else {
15050 date
15051 };
15052 Ok(Expression::Function(Box::new(Function::new(
15053 "DATE_ADD".to_string(),
15054 vec![Expression::string("DAY"), days, cast_date],
15055 ))))
15056 }
15057 DialectType::BigQuery => {
15058 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
15059 let cast_date = if matches!(
15060 source,
15061 DialectType::Hive
15062 | DialectType::Spark
15063 | DialectType::Databricks
15064 ) {
15065 Self::double_cast_datetime_date(date)
15066 } else {
15067 date
15068 };
15069 // Wrap complex expressions in Paren for interval
15070 let interval_val = if matches!(
15071 days,
15072 Expression::Mul(_)
15073 | Expression::Sub(_)
15074 | Expression::Add(_)
15075 ) {
15076 Expression::Paren(Box::new(crate::expressions::Paren {
15077 this: days,
15078 trailing_comments: vec![],
15079 }))
15080 } else {
15081 days
15082 };
15083 let interval = Expression::Interval(Box::new(
15084 crate::expressions::Interval {
15085 this: Some(interval_val),
15086 unit: Some(
15087 crate::expressions::IntervalUnitSpec::Simple {
15088 unit: crate::expressions::IntervalUnit::Day,
15089 use_plural: false,
15090 },
15091 ),
15092 },
15093 ));
15094 Ok(Expression::Function(Box::new(Function::new(
15095 "DATE_ADD".to_string(),
15096 vec![cast_date, interval],
15097 ))))
15098 }
15099 DialectType::MySQL => {
15100 let iu = crate::expressions::IntervalUnit::Day;
15101 Ok(Expression::DateAdd(Box::new(
15102 crate::expressions::DateAddFunc {
15103 this: date,
15104 interval: days,
15105 unit: iu,
15106 },
15107 )))
15108 }
15109 DialectType::PostgreSQL => {
15110 let interval = Expression::Interval(Box::new(
15111 crate::expressions::Interval {
15112 this: Some(Expression::string(&format!(
15113 "{} DAY",
15114 Self::expr_to_string_static(&days)
15115 ))),
15116 unit: None,
15117 },
15118 ));
15119 Ok(Expression::Add(Box::new(
15120 crate::expressions::BinaryOp::new(date, interval),
15121 )))
15122 }
15123 DialectType::Doris
15124 | DialectType::StarRocks
15125 | DialectType::Drill => {
15126 // DATE_ADD(date, INTERVAL days DAY)
15127 let interval = Expression::Interval(Box::new(
15128 crate::expressions::Interval {
15129 this: Some(days),
15130 unit: Some(
15131 crate::expressions::IntervalUnitSpec::Simple {
15132 unit: crate::expressions::IntervalUnit::Day,
15133 use_plural: false,
15134 },
15135 ),
15136 },
15137 ));
15138 Ok(Expression::Function(Box::new(Function::new(
15139 "DATE_ADD".to_string(),
15140 vec![date, interval],
15141 ))))
15142 }
15143 _ => Ok(Expression::Function(Box::new(Function::new(
15144 "DATE_ADD".to_string(),
15145 vec![date, days],
15146 )))),
15147 }
15148 }
15149 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
15150 "DATE_SUB"
15151 if f.args.len() == 2
15152 && matches!(
15153 source,
15154 DialectType::Hive
15155 | DialectType::Spark
15156 | DialectType::Databricks
15157 ) =>
15158 {
15159 let mut args = f.args;
15160 let date = args.remove(0);
15161 let days = args.remove(0);
15162 // Helper to create days * -1
15163 let make_neg_days = |d: Expression| -> Expression {
15164 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15165 d,
15166 Expression::Literal(Literal::Number("-1".to_string())),
15167 )))
15168 };
15169 let is_string_literal =
15170 matches!(date, Expression::Literal(Literal::String(_)));
15171 match target {
15172 DialectType::Hive
15173 | DialectType::Spark
15174 | DialectType::Databricks => {
15175 // Keep as DATE_SUB(date, days) for Hive/Spark
15176 Ok(Expression::Function(Box::new(Function::new(
15177 "DATE_SUB".to_string(),
15178 vec![date, days],
15179 ))))
15180 }
15181 DialectType::DuckDB => {
15182 let cast_date = Self::ensure_cast_date(date);
15183 let neg = make_neg_days(days);
15184 let interval = Expression::Interval(Box::new(
15185 crate::expressions::Interval {
15186 this: Some(Expression::Paren(Box::new(
15187 crate::expressions::Paren {
15188 this: neg,
15189 trailing_comments: vec![],
15190 },
15191 ))),
15192 unit: Some(
15193 crate::expressions::IntervalUnitSpec::Simple {
15194 unit: crate::expressions::IntervalUnit::Day,
15195 use_plural: false,
15196 },
15197 ),
15198 },
15199 ));
15200 Ok(Expression::Add(Box::new(
15201 crate::expressions::BinaryOp::new(cast_date, interval),
15202 )))
15203 }
15204 DialectType::Snowflake => {
15205 let cast_date = if is_string_literal {
15206 Self::double_cast_timestamp_date(date)
15207 } else {
15208 date
15209 };
15210 let neg = make_neg_days(days);
15211 Ok(Expression::Function(Box::new(Function::new(
15212 "DATEADD".to_string(),
15213 vec![
15214 Expression::Identifier(Identifier::new("DAY")),
15215 neg,
15216 cast_date,
15217 ],
15218 ))))
15219 }
15220 DialectType::Redshift => {
15221 let neg = make_neg_days(days);
15222 Ok(Expression::Function(Box::new(Function::new(
15223 "DATEADD".to_string(),
15224 vec![
15225 Expression::Identifier(Identifier::new("DAY")),
15226 neg,
15227 date,
15228 ],
15229 ))))
15230 }
15231 DialectType::TSQL | DialectType::Fabric => {
15232 let cast_date = if is_string_literal {
15233 Self::double_cast_datetime2_date(date)
15234 } else {
15235 date
15236 };
15237 let neg = make_neg_days(days);
15238 Ok(Expression::Function(Box::new(Function::new(
15239 "DATEADD".to_string(),
15240 vec![
15241 Expression::Identifier(Identifier::new("DAY")),
15242 neg,
15243 cast_date,
15244 ],
15245 ))))
15246 }
15247 DialectType::Presto
15248 | DialectType::Trino
15249 | DialectType::Athena => {
15250 let cast_date = if is_string_literal {
15251 Self::double_cast_timestamp_date(date)
15252 } else {
15253 date
15254 };
15255 let neg = make_neg_days(days);
15256 Ok(Expression::Function(Box::new(Function::new(
15257 "DATE_ADD".to_string(),
15258 vec![Expression::string("DAY"), neg, cast_date],
15259 ))))
15260 }
15261 DialectType::BigQuery => {
15262 let cast_date = if is_string_literal {
15263 Self::double_cast_datetime_date(date)
15264 } else {
15265 date
15266 };
15267 let neg = make_neg_days(days);
15268 let interval = Expression::Interval(Box::new(
15269 crate::expressions::Interval {
15270 this: Some(Expression::Paren(Box::new(
15271 crate::expressions::Paren {
15272 this: neg,
15273 trailing_comments: vec![],
15274 },
15275 ))),
15276 unit: Some(
15277 crate::expressions::IntervalUnitSpec::Simple {
15278 unit: crate::expressions::IntervalUnit::Day,
15279 use_plural: false,
15280 },
15281 ),
15282 },
15283 ));
15284 Ok(Expression::Function(Box::new(Function::new(
15285 "DATE_ADD".to_string(),
15286 vec![cast_date, interval],
15287 ))))
15288 }
15289 _ => Ok(Expression::Function(Box::new(Function::new(
15290 "DATE_SUB".to_string(),
15291 vec![date, days],
15292 )))),
15293 }
15294 }
15295 // ADD_MONTHS(date, val) -> target-specific
15296 "ADD_MONTHS" if f.args.len() == 2 => {
15297 let mut args = f.args;
15298 let date = args.remove(0);
15299 let val = args.remove(0);
15300 match target {
15301 DialectType::TSQL => {
15302 let cast_date = Self::ensure_cast_datetime2(date);
15303 Ok(Expression::Function(Box::new(Function::new(
15304 "DATEADD".to_string(),
15305 vec![
15306 Expression::Identifier(Identifier::new("MONTH")),
15307 val,
15308 cast_date,
15309 ],
15310 ))))
15311 }
15312 DialectType::DuckDB => {
15313 let interval = Expression::Interval(Box::new(
15314 crate::expressions::Interval {
15315 this: Some(val),
15316 unit: Some(
15317 crate::expressions::IntervalUnitSpec::Simple {
15318 unit:
15319 crate::expressions::IntervalUnit::Month,
15320 use_plural: false,
15321 },
15322 ),
15323 },
15324 ));
15325 Ok(Expression::Add(Box::new(
15326 crate::expressions::BinaryOp::new(date, interval),
15327 )))
15328 }
15329 DialectType::Snowflake => {
15330 // Keep ADD_MONTHS when source is Snowflake
15331 if matches!(source, DialectType::Snowflake) {
15332 Ok(Expression::Function(Box::new(Function::new(
15333 "ADD_MONTHS".to_string(),
15334 vec![date, val],
15335 ))))
15336 } else {
15337 Ok(Expression::Function(Box::new(Function::new(
15338 "DATEADD".to_string(),
15339 vec![
15340 Expression::Identifier(Identifier::new(
15341 "MONTH",
15342 )),
15343 val,
15344 date,
15345 ],
15346 ))))
15347 }
15348 }
15349 DialectType::Redshift => {
15350 Ok(Expression::Function(Box::new(Function::new(
15351 "DATEADD".to_string(),
15352 vec![
15353 Expression::Identifier(Identifier::new("MONTH")),
15354 val,
15355 date,
15356 ],
15357 ))))
15358 }
15359 DialectType::Presto
15360 | DialectType::Trino
15361 | DialectType::Athena => {
15362 Ok(Expression::Function(Box::new(Function::new(
15363 "DATE_ADD".to_string(),
15364 vec![Expression::string("MONTH"), val, date],
15365 ))))
15366 }
15367 DialectType::BigQuery => {
15368 let interval = Expression::Interval(Box::new(
15369 crate::expressions::Interval {
15370 this: Some(val),
15371 unit: Some(
15372 crate::expressions::IntervalUnitSpec::Simple {
15373 unit:
15374 crate::expressions::IntervalUnit::Month,
15375 use_plural: false,
15376 },
15377 ),
15378 },
15379 ));
15380 Ok(Expression::Function(Box::new(Function::new(
15381 "DATE_ADD".to_string(),
15382 vec![date, interval],
15383 ))))
15384 }
15385 _ => Ok(Expression::Function(Box::new(Function::new(
15386 "ADD_MONTHS".to_string(),
15387 vec![date, val],
15388 )))),
15389 }
15390 }
15391 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
15392 "DATETRUNC" if f.args.len() == 2 => {
15393 let mut args = f.args;
15394 let arg0 = args.remove(0);
15395 let arg1 = args.remove(0);
15396 let unit_str = Self::get_unit_str_static(&arg0);
15397 match target {
15398 DialectType::TSQL | DialectType::Fabric => {
15399 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
15400 Ok(Expression::Function(Box::new(Function::new(
15401 "DATETRUNC".to_string(),
15402 vec![
15403 Expression::Identifier(Identifier::new(&unit_str)),
15404 arg1,
15405 ],
15406 ))))
15407 }
15408 DialectType::DuckDB => {
15409 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
15410 let date = Self::ensure_cast_timestamp(arg1);
15411 Ok(Expression::Function(Box::new(Function::new(
15412 "DATE_TRUNC".to_string(),
15413 vec![Expression::string(&unit_str), date],
15414 ))))
15415 }
15416 DialectType::ClickHouse => {
15417 // ClickHouse: dateTrunc('UNIT', expr)
15418 Ok(Expression::Function(Box::new(Function::new(
15419 "dateTrunc".to_string(),
15420 vec![Expression::string(&unit_str), arg1],
15421 ))))
15422 }
15423 _ => {
15424 // Standard: DATE_TRUNC('UNIT', expr)
15425 let unit = Expression::string(&unit_str);
15426 Ok(Expression::Function(Box::new(Function::new(
15427 "DATE_TRUNC".to_string(),
15428 vec![unit, arg1],
15429 ))))
15430 }
15431 }
15432 }
15433 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
15434 "GETDATE" if f.args.is_empty() => match target {
15435 DialectType::TSQL => Ok(Expression::Function(f)),
15436 DialectType::Redshift => Ok(Expression::Function(Box::new(
15437 Function::new("GETDATE".to_string(), vec![]),
15438 ))),
15439 _ => Ok(Expression::CurrentTimestamp(
15440 crate::expressions::CurrentTimestamp {
15441 precision: None,
15442 sysdate: false,
15443 },
15444 )),
15445 },
15446 // TO_HEX(x) / HEX(x) -> target-specific hex function
15447 "TO_HEX" | "HEX" if f.args.len() == 1 => {
15448 let name = match target {
15449 DialectType::Presto | DialectType::Trino => "TO_HEX",
15450 DialectType::Spark
15451 | DialectType::Databricks
15452 | DialectType::Hive => "HEX",
15453 DialectType::DuckDB
15454 | DialectType::PostgreSQL
15455 | DialectType::Redshift => "TO_HEX",
15456 _ => &f.name,
15457 };
15458 Ok(Expression::Function(Box::new(Function::new(
15459 name.to_string(),
15460 f.args,
15461 ))))
15462 }
15463 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
15464 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
15465 match target {
15466 DialectType::BigQuery => {
15467 // BigQuery: UNHEX(x) -> FROM_HEX(x)
15468 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
15469 // because BigQuery MD5 returns BYTES, not hex string
15470 let arg = &f.args[0];
15471 let wrapped_arg = match arg {
15472 Expression::Function(inner_f)
15473 if inner_f.name.to_uppercase() == "MD5"
15474 || inner_f.name.to_uppercase() == "SHA1"
15475 || inner_f.name.to_uppercase() == "SHA256"
15476 || inner_f.name.to_uppercase() == "SHA512" =>
15477 {
15478 // Wrap hash function in TO_HEX for BigQuery
15479 Expression::Function(Box::new(Function::new(
15480 "TO_HEX".to_string(),
15481 vec![arg.clone()],
15482 )))
15483 }
15484 _ => f.args.into_iter().next().unwrap(),
15485 };
15486 Ok(Expression::Function(Box::new(Function::new(
15487 "FROM_HEX".to_string(),
15488 vec![wrapped_arg],
15489 ))))
15490 }
15491 _ => {
15492 let name = match target {
15493 DialectType::Presto | DialectType::Trino => "FROM_HEX",
15494 DialectType::Spark
15495 | DialectType::Databricks
15496 | DialectType::Hive => "UNHEX",
15497 _ => &f.name,
15498 };
15499 Ok(Expression::Function(Box::new(Function::new(
15500 name.to_string(),
15501 f.args,
15502 ))))
15503 }
15504 }
15505 }
15506 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
15507 "TO_UTF8" if f.args.len() == 1 => match target {
15508 DialectType::Spark | DialectType::Databricks => {
15509 let mut args = f.args;
15510 args.push(Expression::string("utf-8"));
15511 Ok(Expression::Function(Box::new(Function::new(
15512 "ENCODE".to_string(),
15513 args,
15514 ))))
15515 }
15516 _ => Ok(Expression::Function(f)),
15517 },
15518 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
15519 "FROM_UTF8" if f.args.len() == 1 => match target {
15520 DialectType::Spark | DialectType::Databricks => {
15521 let mut args = f.args;
15522 args.push(Expression::string("utf-8"));
15523 Ok(Expression::Function(Box::new(Function::new(
15524 "DECODE".to_string(),
15525 args,
15526 ))))
15527 }
15528 _ => Ok(Expression::Function(f)),
15529 },
15530 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
15531 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
15532 let name = match target {
15533 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
15534 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
15535 DialectType::PostgreSQL | DialectType::Redshift => {
15536 "STARTS_WITH"
15537 }
15538 _ => &f.name,
15539 };
15540 Ok(Expression::Function(Box::new(Function::new(
15541 name.to_string(),
15542 f.args,
15543 ))))
15544 }
15545 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
15546 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
15547 let name = match target {
15548 DialectType::Presto
15549 | DialectType::Trino
15550 | DialectType::Athena => "APPROX_DISTINCT",
15551 _ => "APPROX_COUNT_DISTINCT",
15552 };
15553 Ok(Expression::Function(Box::new(Function::new(
15554 name.to_string(),
15555 f.args,
15556 ))))
15557 }
15558 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
15559 "JSON_EXTRACT"
15560 if f.args.len() == 2
15561 && !matches!(source, DialectType::BigQuery)
15562 && matches!(
15563 target,
15564 DialectType::Spark
15565 | DialectType::Databricks
15566 | DialectType::Hive
15567 ) =>
15568 {
15569 Ok(Expression::Function(Box::new(Function::new(
15570 "GET_JSON_OBJECT".to_string(),
15571 f.args,
15572 ))))
15573 }
15574 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
15575 "JSON_EXTRACT"
15576 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
15577 {
15578 let mut args = f.args;
15579 let path = args.remove(1);
15580 let this = args.remove(0);
15581 Ok(Expression::JsonExtract(Box::new(
15582 crate::expressions::JsonExtractFunc {
15583 this,
15584 path,
15585 returning: None,
15586 arrow_syntax: true,
15587 hash_arrow_syntax: false,
15588 wrapper_option: None,
15589 quotes_option: None,
15590 on_scalar_string: false,
15591 on_error: None,
15592 },
15593 )))
15594 }
15595 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
15596 "JSON_FORMAT" if f.args.len() == 1 => {
15597 match target {
15598 DialectType::Spark | DialectType::Databricks => {
15599 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
15600 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
15601 if matches!(
15602 source,
15603 DialectType::Presto
15604 | DialectType::Trino
15605 | DialectType::Athena
15606 ) {
15607 if let Some(Expression::ParseJson(pj)) = f.args.first()
15608 {
15609 if let Expression::Literal(Literal::String(s)) =
15610 &pj.this
15611 {
15612 let wrapped = Expression::Literal(
15613 Literal::String(format!("[{}]", s)),
15614 );
15615 let schema_of_json = Expression::Function(
15616 Box::new(Function::new(
15617 "SCHEMA_OF_JSON".to_string(),
15618 vec![wrapped.clone()],
15619 )),
15620 );
15621 let from_json = Expression::Function(Box::new(
15622 Function::new(
15623 "FROM_JSON".to_string(),
15624 vec![wrapped, schema_of_json],
15625 ),
15626 ));
15627 let to_json = Expression::Function(Box::new(
15628 Function::new(
15629 "TO_JSON".to_string(),
15630 vec![from_json],
15631 ),
15632 ));
15633 return Ok(Expression::Function(Box::new(
15634 Function::new(
15635 "REGEXP_EXTRACT".to_string(),
15636 vec![
15637 to_json,
15638 Expression::Literal(
15639 Literal::String(
15640 "^.(.*).$".to_string(),
15641 ),
15642 ),
15643 Expression::Literal(
15644 Literal::Number(
15645 "1".to_string(),
15646 ),
15647 ),
15648 ],
15649 ),
15650 )));
15651 }
15652 }
15653 }
15654
15655 // Strip inner CAST(... AS JSON) or TO_JSON() if present
15656 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
15657 let mut args = f.args;
15658 if let Some(Expression::Cast(ref c)) = args.first() {
15659 if matches!(&c.to, DataType::Json | DataType::JsonB) {
15660 args = vec![c.this.clone()];
15661 }
15662 } else if let Some(Expression::Function(ref inner_f)) =
15663 args.first()
15664 {
15665 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
15666 && inner_f.args.len() == 1
15667 {
15668 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
15669 args = inner_f.args.clone();
15670 }
15671 }
15672 Ok(Expression::Function(Box::new(Function::new(
15673 "TO_JSON".to_string(),
15674 args,
15675 ))))
15676 }
15677 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15678 Function::new("TO_JSON_STRING".to_string(), f.args),
15679 ))),
15680 DialectType::DuckDB => {
15681 // CAST(TO_JSON(x) AS TEXT)
15682 let to_json = Expression::Function(Box::new(
15683 Function::new("TO_JSON".to_string(), f.args),
15684 ));
15685 Ok(Expression::Cast(Box::new(Cast {
15686 this: to_json,
15687 to: DataType::Text,
15688 trailing_comments: Vec::new(),
15689 double_colon_syntax: false,
15690 format: None,
15691 default: None,
15692 inferred_type: None,
15693 })))
15694 }
15695 _ => Ok(Expression::Function(f)),
15696 }
15697 }
15698 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
15699 "SYSDATE" if f.args.is_empty() => {
15700 match target {
15701 DialectType::Oracle | DialectType::Redshift => {
15702 Ok(Expression::Function(f))
15703 }
15704 DialectType::Snowflake => {
15705 // Snowflake uses SYSDATE() with parens
15706 let mut f = *f;
15707 f.no_parens = false;
15708 Ok(Expression::Function(Box::new(f)))
15709 }
15710 DialectType::DuckDB => {
15711 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
15712 Ok(Expression::AtTimeZone(Box::new(
15713 crate::expressions::AtTimeZone {
15714 this: Expression::CurrentTimestamp(
15715 crate::expressions::CurrentTimestamp {
15716 precision: None,
15717 sysdate: false,
15718 },
15719 ),
15720 zone: Expression::Literal(Literal::String(
15721 "UTC".to_string(),
15722 )),
15723 },
15724 )))
15725 }
15726 _ => Ok(Expression::CurrentTimestamp(
15727 crate::expressions::CurrentTimestamp {
15728 precision: None,
15729 sysdate: true,
15730 },
15731 )),
15732 }
15733 }
15734 // LOGICAL_OR(x) -> BOOL_OR(x)
15735 "LOGICAL_OR" if f.args.len() == 1 => {
15736 let name = match target {
15737 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
15738 _ => &f.name,
15739 };
15740 Ok(Expression::Function(Box::new(Function::new(
15741 name.to_string(),
15742 f.args,
15743 ))))
15744 }
15745 // LOGICAL_AND(x) -> BOOL_AND(x)
15746 "LOGICAL_AND" if f.args.len() == 1 => {
15747 let name = match target {
15748 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
15749 _ => &f.name,
15750 };
15751 Ok(Expression::Function(Box::new(Function::new(
15752 name.to_string(),
15753 f.args,
15754 ))))
15755 }
15756 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
15757 "MONTHS_ADD" if f.args.len() == 2 => match target {
15758 DialectType::Oracle => Ok(Expression::Function(Box::new(
15759 Function::new("ADD_MONTHS".to_string(), f.args),
15760 ))),
15761 _ => Ok(Expression::Function(f)),
15762 },
15763 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
15764 "ARRAY_JOIN" if f.args.len() >= 2 => {
15765 match target {
15766 DialectType::Spark | DialectType::Databricks => {
15767 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
15768 Ok(Expression::Function(f))
15769 }
15770 DialectType::Hive => {
15771 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
15772 let mut args = f.args;
15773 let arr = args.remove(0);
15774 let sep = args.remove(0);
15775 // Drop any remaining args (null_replacement)
15776 Ok(Expression::Function(Box::new(Function::new(
15777 "CONCAT_WS".to_string(),
15778 vec![sep, arr],
15779 ))))
15780 }
15781 DialectType::Presto | DialectType::Trino => {
15782 Ok(Expression::Function(f))
15783 }
15784 _ => Ok(Expression::Function(f)),
15785 }
15786 }
15787 // LOCATE(substr, str, pos) 3-arg -> target-specific
15788 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
15789 "LOCATE"
15790 if f.args.len() == 3
15791 && matches!(
15792 target,
15793 DialectType::Presto
15794 | DialectType::Trino
15795 | DialectType::Athena
15796 | DialectType::DuckDB
15797 ) =>
15798 {
15799 let mut args = f.args;
15800 let substr = args.remove(0);
15801 let string = args.remove(0);
15802 let pos = args.remove(0);
15803 // STRPOS(SUBSTRING(string, pos), substr)
15804 let substring_call = Expression::Function(Box::new(Function::new(
15805 "SUBSTRING".to_string(),
15806 vec![string.clone(), pos.clone()],
15807 )));
15808 let strpos_call = Expression::Function(Box::new(Function::new(
15809 "STRPOS".to_string(),
15810 vec![substring_call, substr.clone()],
15811 )));
15812 // STRPOS(...) + pos - 1
15813 let pos_adjusted =
15814 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
15815 Expression::Add(Box::new(
15816 crate::expressions::BinaryOp::new(
15817 strpos_call.clone(),
15818 pos.clone(),
15819 ),
15820 )),
15821 Expression::number(1),
15822 )));
15823 // STRPOS(...) = 0
15824 let is_zero =
15825 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
15826 strpos_call.clone(),
15827 Expression::number(0),
15828 )));
15829
15830 match target {
15831 DialectType::Presto
15832 | DialectType::Trino
15833 | DialectType::Athena => {
15834 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
15835 Ok(Expression::Function(Box::new(Function::new(
15836 "IF".to_string(),
15837 vec![is_zero, Expression::number(0), pos_adjusted],
15838 ))))
15839 }
15840 DialectType::DuckDB => {
15841 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
15842 Ok(Expression::Case(Box::new(crate::expressions::Case {
15843 operand: None,
15844 whens: vec![(is_zero, Expression::number(0))],
15845 else_: Some(pos_adjusted),
15846 comments: Vec::new(),
15847 inferred_type: None,
15848 })))
15849 }
15850 _ => Ok(Expression::Function(Box::new(Function::new(
15851 "LOCATE".to_string(),
15852 vec![substr, string, pos],
15853 )))),
15854 }
15855 }
15856 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
15857 "STRPOS"
15858 if f.args.len() == 3
15859 && matches!(
15860 target,
15861 DialectType::BigQuery
15862 | DialectType::Oracle
15863 | DialectType::Teradata
15864 ) =>
15865 {
15866 let mut args = f.args;
15867 let haystack = args.remove(0);
15868 let needle = args.remove(0);
15869 let occurrence = args.remove(0);
15870 Ok(Expression::Function(Box::new(Function::new(
15871 "INSTR".to_string(),
15872 vec![haystack, needle, Expression::number(1), occurrence],
15873 ))))
15874 }
15875 // SCHEMA_NAME(id) -> target-specific
15876 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
15877 DialectType::MySQL | DialectType::SingleStore => {
15878 Ok(Expression::Function(Box::new(Function::new(
15879 "SCHEMA".to_string(),
15880 vec![],
15881 ))))
15882 }
15883 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15884 crate::expressions::CurrentSchema { this: None },
15885 ))),
15886 DialectType::SQLite => Ok(Expression::string("main")),
15887 _ => Ok(Expression::Function(f)),
15888 },
15889 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
15890 "STRTOL" if f.args.len() == 2 => match target {
15891 DialectType::Presto | DialectType::Trino => {
15892 Ok(Expression::Function(Box::new(Function::new(
15893 "FROM_BASE".to_string(),
15894 f.args,
15895 ))))
15896 }
15897 _ => Ok(Expression::Function(f)),
15898 },
15899 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
15900 "EDITDIST3" if f.args.len() == 2 => match target {
15901 DialectType::Spark | DialectType::Databricks => {
15902 Ok(Expression::Function(Box::new(Function::new(
15903 "LEVENSHTEIN".to_string(),
15904 f.args,
15905 ))))
15906 }
15907 _ => Ok(Expression::Function(f)),
15908 },
15909 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
15910 "FORMAT"
15911 if f.args.len() == 2
15912 && matches!(
15913 source,
15914 DialectType::MySQL | DialectType::SingleStore
15915 )
15916 && matches!(target, DialectType::DuckDB) =>
15917 {
15918 let mut args = f.args;
15919 let num_expr = args.remove(0);
15920 let decimals_expr = args.remove(0);
15921 // Extract decimal count
15922 let dec_count = match &decimals_expr {
15923 Expression::Literal(Literal::Number(n)) => n.clone(),
15924 _ => "0".to_string(),
15925 };
15926 let fmt_str = format!("{{:,.{}f}}", dec_count);
15927 Ok(Expression::Function(Box::new(Function::new(
15928 "FORMAT".to_string(),
15929 vec![Expression::string(&fmt_str), num_expr],
15930 ))))
15931 }
15932 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
15933 "FORMAT"
15934 if f.args.len() == 2
15935 && matches!(
15936 source,
15937 DialectType::TSQL | DialectType::Fabric
15938 ) =>
15939 {
15940 let val_expr = f.args[0].clone();
15941 let fmt_expr = f.args[1].clone();
15942 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
15943 // Only expand shortcodes that are NOT also valid numeric format specifiers.
15944 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
15945 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
15946 let (expanded_fmt, is_shortcode) = match &fmt_expr {
15947 Expression::Literal(crate::expressions::Literal::String(s)) => {
15948 match s.as_str() {
15949 "m" | "M" => (Expression::string("MMMM d"), true),
15950 "t" => (Expression::string("h:mm tt"), true),
15951 "T" => (Expression::string("h:mm:ss tt"), true),
15952 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
15953 _ => (fmt_expr.clone(), false),
15954 }
15955 }
15956 _ => (fmt_expr.clone(), false),
15957 };
15958 // Check if the format looks like a date format
15959 let is_date_format = is_shortcode
15960 || match &expanded_fmt {
15961 Expression::Literal(
15962 crate::expressions::Literal::String(s),
15963 ) => {
15964 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
15965 s.contains("yyyy")
15966 || s.contains("YYYY")
15967 || s.contains("MM")
15968 || s.contains("dd")
15969 || s.contains("MMMM")
15970 || s.contains("HH")
15971 || s.contains("hh")
15972 || s.contains("ss")
15973 }
15974 _ => false,
15975 };
15976 match target {
15977 DialectType::Spark | DialectType::Databricks => {
15978 let func_name = if is_date_format {
15979 "DATE_FORMAT"
15980 } else {
15981 "FORMAT_NUMBER"
15982 };
15983 Ok(Expression::Function(Box::new(Function::new(
15984 func_name.to_string(),
15985 vec![val_expr, expanded_fmt],
15986 ))))
15987 }
15988 _ => {
15989 // For TSQL and other targets, expand shortcodes but keep FORMAT
15990 if is_shortcode {
15991 Ok(Expression::Function(Box::new(Function::new(
15992 "FORMAT".to_string(),
15993 vec![val_expr, expanded_fmt],
15994 ))))
15995 } else {
15996 Ok(Expression::Function(f))
15997 }
15998 }
15999 }
16000 }
16001 // FORMAT('%s', x) from Trino/Presto -> target-specific
16002 "FORMAT"
16003 if f.args.len() >= 2
16004 && matches!(
16005 source,
16006 DialectType::Trino
16007 | DialectType::Presto
16008 | DialectType::Athena
16009 ) =>
16010 {
16011 let fmt_expr = f.args[0].clone();
16012 let value_args: Vec<Expression> = f.args[1..].to_vec();
16013 match target {
16014 // DuckDB: replace %s with {} in format string
16015 DialectType::DuckDB => {
16016 let new_fmt = match &fmt_expr {
16017 Expression::Literal(Literal::String(s)) => {
16018 Expression::Literal(Literal::String(
16019 s.replace("%s", "{}"),
16020 ))
16021 }
16022 _ => fmt_expr,
16023 };
16024 let mut args = vec![new_fmt];
16025 args.extend(value_args);
16026 Ok(Expression::Function(Box::new(Function::new(
16027 "FORMAT".to_string(),
16028 args,
16029 ))))
16030 }
16031 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
16032 DialectType::Snowflake => match &fmt_expr {
16033 Expression::Literal(Literal::String(s))
16034 if s == "%s" && value_args.len() == 1 =>
16035 {
16036 Ok(Expression::Function(Box::new(Function::new(
16037 "TO_CHAR".to_string(),
16038 value_args,
16039 ))))
16040 }
16041 _ => Ok(Expression::Function(f)),
16042 },
16043 // Default: keep FORMAT as-is
16044 _ => Ok(Expression::Function(f)),
16045 }
16046 }
16047 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
16048 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
16049 if f.args.len() == 2 =>
16050 {
16051 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
16052 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
16053 if matches!(target, DialectType::DuckDB)
16054 && matches!(source, DialectType::Snowflake)
16055 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
16056 {
16057 let value = f.args[0].clone();
16058 let array = f.args[1].clone();
16059
16060 // value IS NULL
16061 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
16062 this: value.clone(),
16063 not: false,
16064 postfix_form: false,
16065 }));
16066
16067 // ARRAY_LENGTH(array)
16068 let array_length = Expression::Function(Box::new(Function::new(
16069 "ARRAY_LENGTH".to_string(),
16070 vec![array.clone()],
16071 )));
16072 // LIST_COUNT(array)
16073 let list_count = Expression::Function(Box::new(Function::new(
16074 "LIST_COUNT".to_string(),
16075 vec![array.clone()],
16076 )));
16077 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
16078 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
16079 left: array_length,
16080 right: list_count,
16081 left_comments: vec![],
16082 operator_comments: vec![],
16083 trailing_comments: vec![],
16084 inferred_type: None,
16085 }));
16086 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
16087 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
16088 this: Box::new(neq),
16089 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
16090 }));
16091
16092 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
16093 let array_contains = Expression::Function(Box::new(Function::new(
16094 "ARRAY_CONTAINS".to_string(),
16095 vec![array, value],
16096 )));
16097
16098 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
16099 return Ok(Expression::Case(Box::new(Case {
16100 operand: None,
16101 whens: vec![(value_is_null, nullif)],
16102 else_: Some(array_contains),
16103 comments: Vec::new(),
16104 inferred_type: None,
16105 })));
16106 }
16107 match target {
16108 DialectType::PostgreSQL | DialectType::Redshift => {
16109 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
16110 let arr = f.args[0].clone();
16111 let needle = f.args[1].clone();
16112 // Convert [] to ARRAY[] for PostgreSQL
16113 let pg_arr = match arr {
16114 Expression::Array(a) => Expression::ArrayFunc(
16115 Box::new(crate::expressions::ArrayConstructor {
16116 expressions: a.expressions,
16117 bracket_notation: false,
16118 use_list_keyword: false,
16119 }),
16120 ),
16121 _ => arr,
16122 };
16123 // needle = ANY(arr) using the Any quantified expression
16124 let any_expr = Expression::Any(Box::new(
16125 crate::expressions::QuantifiedExpr {
16126 this: needle.clone(),
16127 subquery: pg_arr,
16128 op: Some(crate::expressions::QuantifiedOp::Eq),
16129 },
16130 ));
16131 let coalesce = Expression::Coalesce(Box::new(
16132 crate::expressions::VarArgFunc {
16133 expressions: vec![
16134 any_expr,
16135 Expression::Boolean(
16136 crate::expressions::BooleanLiteral {
16137 value: false,
16138 },
16139 ),
16140 ],
16141 original_name: None,
16142 inferred_type: None,
16143 },
16144 ));
16145 let is_null_check = Expression::IsNull(Box::new(
16146 crate::expressions::IsNull {
16147 this: needle,
16148 not: false,
16149 postfix_form: false,
16150 },
16151 ));
16152 Ok(Expression::Case(Box::new(Case {
16153 operand: None,
16154 whens: vec![(
16155 is_null_check,
16156 Expression::Null(crate::expressions::Null),
16157 )],
16158 else_: Some(coalesce),
16159 comments: Vec::new(),
16160 inferred_type: None,
16161 })))
16162 }
16163 _ => Ok(Expression::Function(Box::new(Function::new(
16164 "ARRAY_CONTAINS".to_string(),
16165 f.args,
16166 )))),
16167 }
16168 }
16169 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
16170 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
16171 match target {
16172 DialectType::PostgreSQL | DialectType::Redshift => {
16173 // arr1 && arr2 with ARRAY[] syntax
16174 let mut args = f.args;
16175 let arr1 = args.remove(0);
16176 let arr2 = args.remove(0);
16177 let pg_arr1 = match arr1 {
16178 Expression::Array(a) => Expression::ArrayFunc(
16179 Box::new(crate::expressions::ArrayConstructor {
16180 expressions: a.expressions,
16181 bracket_notation: false,
16182 use_list_keyword: false,
16183 }),
16184 ),
16185 _ => arr1,
16186 };
16187 let pg_arr2 = match arr2 {
16188 Expression::Array(a) => Expression::ArrayFunc(
16189 Box::new(crate::expressions::ArrayConstructor {
16190 expressions: a.expressions,
16191 bracket_notation: false,
16192 use_list_keyword: false,
16193 }),
16194 ),
16195 _ => arr2,
16196 };
16197 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16198 pg_arr1, pg_arr2,
16199 ))))
16200 }
16201 DialectType::DuckDB => {
16202 // DuckDB: arr1 && arr2 (native support)
16203 let mut args = f.args;
16204 let arr1 = args.remove(0);
16205 let arr2 = args.remove(0);
16206 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16207 arr1, arr2,
16208 ))))
16209 }
16210 _ => Ok(Expression::Function(Box::new(Function::new(
16211 "LIST_HAS_ANY".to_string(),
16212 f.args,
16213 )))),
16214 }
16215 }
16216 // APPROX_QUANTILE(x, q) -> target-specific
16217 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
16218 DialectType::Snowflake => Ok(Expression::Function(Box::new(
16219 Function::new("APPROX_PERCENTILE".to_string(), f.args),
16220 ))),
16221 DialectType::DuckDB => Ok(Expression::Function(f)),
16222 _ => Ok(Expression::Function(f)),
16223 },
16224 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
16225 "MAKE_DATE" if f.args.len() == 3 => match target {
16226 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16227 Function::new("DATE".to_string(), f.args),
16228 ))),
16229 _ => Ok(Expression::Function(f)),
16230 },
16231 // RANGE(start, end[, step]) -> target-specific
16232 "RANGE"
16233 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
16234 {
16235 let start = f.args[0].clone();
16236 let end = f.args[1].clone();
16237 let step = f.args.get(2).cloned();
16238 match target {
16239 DialectType::Spark | DialectType::Databricks => {
16240 // RANGE(start, end) -> SEQUENCE(start, end-1)
16241 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
16242 // RANGE(start, start) -> ARRAY() (empty)
16243 // RANGE(start, end, 0) -> ARRAY() (empty)
16244 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
16245
16246 // Check for constant args
16247 fn extract_i64(e: &Expression) -> Option<i64> {
16248 match e {
16249 Expression::Literal(Literal::Number(n)) => {
16250 n.parse::<i64>().ok()
16251 }
16252 Expression::Neg(u) => {
16253 if let Expression::Literal(Literal::Number(n)) =
16254 &u.this
16255 {
16256 n.parse::<i64>().ok().map(|v| -v)
16257 } else {
16258 None
16259 }
16260 }
16261 _ => None,
16262 }
16263 }
16264 let start_val = extract_i64(&start);
16265 let end_val = extract_i64(&end);
16266 let step_val = step.as_ref().and_then(|s| extract_i64(s));
16267
16268 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
16269 if step_val == Some(0) {
16270 return Ok(Expression::Function(Box::new(
16271 Function::new("ARRAY".to_string(), vec![]),
16272 )));
16273 }
16274 if let (Some(s), Some(e_val)) = (start_val, end_val) {
16275 if s == e_val {
16276 return Ok(Expression::Function(Box::new(
16277 Function::new("ARRAY".to_string(), vec![]),
16278 )));
16279 }
16280 }
16281
16282 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
16283 // All constants - compute new end = end - step (if step provided) or end - 1
16284 match step_val {
16285 Some(st) if st < 0 => {
16286 // Negative step: SEQUENCE(start, end - step, step)
16287 let new_end = e_val - st; // end - step (= end + |step|)
16288 let mut args =
16289 vec![start, Expression::number(new_end)];
16290 if let Some(s) = step {
16291 args.push(s);
16292 }
16293 Ok(Expression::Function(Box::new(
16294 Function::new("SEQUENCE".to_string(), args),
16295 )))
16296 }
16297 Some(st) => {
16298 let new_end = e_val - st;
16299 let mut args =
16300 vec![start, Expression::number(new_end)];
16301 if let Some(s) = step {
16302 args.push(s);
16303 }
16304 Ok(Expression::Function(Box::new(
16305 Function::new("SEQUENCE".to_string(), args),
16306 )))
16307 }
16308 None => {
16309 // No step: SEQUENCE(start, end - 1)
16310 let new_end = e_val - 1;
16311 Ok(Expression::Function(Box::new(
16312 Function::new(
16313 "SEQUENCE".to_string(),
16314 vec![
16315 start,
16316 Expression::number(new_end),
16317 ],
16318 ),
16319 )))
16320 }
16321 }
16322 } else {
16323 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
16324 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
16325 end.clone(),
16326 Expression::number(1),
16327 )));
16328 let cond = Expression::Lte(Box::new(BinaryOp::new(
16329 Expression::Paren(Box::new(Paren {
16330 this: end_m1.clone(),
16331 trailing_comments: Vec::new(),
16332 })),
16333 start.clone(),
16334 )));
16335 let empty = Expression::Function(Box::new(
16336 Function::new("ARRAY".to_string(), vec![]),
16337 ));
16338 let mut seq_args = vec![
16339 start,
16340 Expression::Paren(Box::new(Paren {
16341 this: end_m1,
16342 trailing_comments: Vec::new(),
16343 })),
16344 ];
16345 if let Some(s) = step {
16346 seq_args.push(s);
16347 }
16348 let seq = Expression::Function(Box::new(
16349 Function::new("SEQUENCE".to_string(), seq_args),
16350 ));
16351 Ok(Expression::IfFunc(Box::new(
16352 crate::expressions::IfFunc {
16353 condition: cond,
16354 true_value: empty,
16355 false_value: Some(seq),
16356 original_name: None,
16357 inferred_type: None,
16358 },
16359 )))
16360 }
16361 }
16362 DialectType::SQLite => {
16363 // RANGE(start, end) -> GENERATE_SERIES(start, end)
16364 // The subquery wrapping is handled at the Alias level
16365 let mut args = vec![start, end];
16366 if let Some(s) = step {
16367 args.push(s);
16368 }
16369 Ok(Expression::Function(Box::new(Function::new(
16370 "GENERATE_SERIES".to_string(),
16371 args,
16372 ))))
16373 }
16374 _ => Ok(Expression::Function(f)),
16375 }
16376 }
16377 // ARRAY_REVERSE_SORT -> target-specific
16378 // (handled above as well, but also need DuckDB self-normalization)
16379 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16380 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
16381 DialectType::Snowflake => Ok(Expression::Function(Box::new(
16382 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
16383 ))),
16384 DialectType::Spark | DialectType::Databricks => {
16385 Ok(Expression::Function(Box::new(Function::new(
16386 "MAP_FROM_ARRAYS".to_string(),
16387 f.args,
16388 ))))
16389 }
16390 _ => Ok(Expression::Function(Box::new(Function::new(
16391 "MAP".to_string(),
16392 f.args,
16393 )))),
16394 },
16395 // VARIANCE(x) -> varSamp(x) for ClickHouse
16396 "VARIANCE" if f.args.len() == 1 => match target {
16397 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
16398 Function::new("varSamp".to_string(), f.args),
16399 ))),
16400 _ => Ok(Expression::Function(f)),
16401 },
16402 // STDDEV(x) -> stddevSamp(x) for ClickHouse
16403 "STDDEV" if f.args.len() == 1 => match target {
16404 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
16405 Function::new("stddevSamp".to_string(), f.args),
16406 ))),
16407 _ => Ok(Expression::Function(f)),
16408 },
16409 // ISINF(x) -> IS_INF(x) for BigQuery
16410 "ISINF" if f.args.len() == 1 => match target {
16411 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16412 Function::new("IS_INF".to_string(), f.args),
16413 ))),
16414 _ => Ok(Expression::Function(f)),
16415 },
16416 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
16417 "CONTAINS" if f.args.len() == 2 => match target {
16418 DialectType::Spark
16419 | DialectType::Databricks
16420 | DialectType::Hive => Ok(Expression::Function(Box::new(
16421 Function::new("ARRAY_CONTAINS".to_string(), f.args),
16422 ))),
16423 _ => Ok(Expression::Function(f)),
16424 },
16425 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
16426 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
16427 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16428 Ok(Expression::Function(Box::new(Function::new(
16429 "CONTAINS".to_string(),
16430 f.args,
16431 ))))
16432 }
16433 DialectType::DuckDB => Ok(Expression::Function(Box::new(
16434 Function::new("ARRAY_CONTAINS".to_string(), f.args),
16435 ))),
16436 _ => Ok(Expression::Function(f)),
16437 },
16438 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
16439 "TO_UNIXTIME" if f.args.len() == 1 => match target {
16440 DialectType::Hive
16441 | DialectType::Spark
16442 | DialectType::Databricks => Ok(Expression::Function(Box::new(
16443 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
16444 ))),
16445 _ => Ok(Expression::Function(f)),
16446 },
16447 // FROM_UNIXTIME(x) -> target-specific
16448 "FROM_UNIXTIME" if f.args.len() == 1 => {
16449 match target {
16450 DialectType::Hive
16451 | DialectType::Spark
16452 | DialectType::Databricks
16453 | DialectType::Presto
16454 | DialectType::Trino => Ok(Expression::Function(f)),
16455 DialectType::DuckDB => {
16456 // DuckDB: TO_TIMESTAMP(x)
16457 let arg = f.args.into_iter().next().unwrap();
16458 Ok(Expression::Function(Box::new(Function::new(
16459 "TO_TIMESTAMP".to_string(),
16460 vec![arg],
16461 ))))
16462 }
16463 DialectType::PostgreSQL => {
16464 // PG: TO_TIMESTAMP(col)
16465 let arg = f.args.into_iter().next().unwrap();
16466 Ok(Expression::Function(Box::new(Function::new(
16467 "TO_TIMESTAMP".to_string(),
16468 vec![arg],
16469 ))))
16470 }
16471 DialectType::Redshift => {
16472 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
16473 let arg = f.args.into_iter().next().unwrap();
16474 let epoch_ts = Expression::Literal(Literal::Timestamp(
16475 "epoch".to_string(),
16476 ));
16477 let interval = Expression::Interval(Box::new(
16478 crate::expressions::Interval {
16479 this: Some(Expression::string("1 SECOND")),
16480 unit: None,
16481 },
16482 ));
16483 let mul =
16484 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
16485 let add =
16486 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
16487 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16488 this: add,
16489 trailing_comments: Vec::new(),
16490 })))
16491 }
16492 _ => Ok(Expression::Function(f)),
16493 }
16494 }
16495 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
16496 "FROM_UNIXTIME"
16497 if f.args.len() == 2
16498 && matches!(
16499 source,
16500 DialectType::Hive
16501 | DialectType::Spark
16502 | DialectType::Databricks
16503 ) =>
16504 {
16505 let mut args = f.args;
16506 let unix_ts = args.remove(0);
16507 let fmt_expr = args.remove(0);
16508 match target {
16509 DialectType::DuckDB => {
16510 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
16511 let to_ts = Expression::Function(Box::new(Function::new(
16512 "TO_TIMESTAMP".to_string(),
16513 vec![unix_ts],
16514 )));
16515 if let Expression::Literal(
16516 crate::expressions::Literal::String(s),
16517 ) = &fmt_expr
16518 {
16519 let c_fmt = Self::hive_format_to_c_format(s);
16520 Ok(Expression::Function(Box::new(Function::new(
16521 "STRFTIME".to_string(),
16522 vec![to_ts, Expression::string(&c_fmt)],
16523 ))))
16524 } else {
16525 Ok(Expression::Function(Box::new(Function::new(
16526 "STRFTIME".to_string(),
16527 vec![to_ts, fmt_expr],
16528 ))))
16529 }
16530 }
16531 DialectType::Presto
16532 | DialectType::Trino
16533 | DialectType::Athena => {
16534 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
16535 let from_unix =
16536 Expression::Function(Box::new(Function::new(
16537 "FROM_UNIXTIME".to_string(),
16538 vec![unix_ts],
16539 )));
16540 if let Expression::Literal(
16541 crate::expressions::Literal::String(s),
16542 ) = &fmt_expr
16543 {
16544 let p_fmt = Self::hive_format_to_presto_format(s);
16545 Ok(Expression::Function(Box::new(Function::new(
16546 "DATE_FORMAT".to_string(),
16547 vec![from_unix, Expression::string(&p_fmt)],
16548 ))))
16549 } else {
16550 Ok(Expression::Function(Box::new(Function::new(
16551 "DATE_FORMAT".to_string(),
16552 vec![from_unix, fmt_expr],
16553 ))))
16554 }
16555 }
16556 _ => {
16557 // Keep as FROM_UNIXTIME(x, fmt) for other targets
16558 Ok(Expression::Function(Box::new(Function::new(
16559 "FROM_UNIXTIME".to_string(),
16560 vec![unix_ts, fmt_expr],
16561 ))))
16562 }
16563 }
16564 }
16565 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
16566 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
16567 let unit_str = Self::get_unit_str_static(&f.args[0]);
16568 // Get the raw unit text preserving original case
16569 let raw_unit = match &f.args[0] {
16570 Expression::Identifier(id) => id.name.clone(),
16571 Expression::Literal(crate::expressions::Literal::String(s)) => {
16572 s.clone()
16573 }
16574 Expression::Column(col) => col.name.name.clone(),
16575 _ => unit_str.clone(),
16576 };
16577 match target {
16578 DialectType::TSQL | DialectType::Fabric => {
16579 // Preserve original case of unit for TSQL
16580 let unit_name = match unit_str.as_str() {
16581 "YY" | "YYYY" => "YEAR".to_string(),
16582 "QQ" | "Q" => "QUARTER".to_string(),
16583 "MM" | "M" => "MONTH".to_string(),
16584 "WK" | "WW" => "WEEK".to_string(),
16585 "DD" | "D" | "DY" => "DAY".to_string(),
16586 "HH" => "HOUR".to_string(),
16587 "MI" | "N" => "MINUTE".to_string(),
16588 "SS" | "S" => "SECOND".to_string(),
16589 _ => raw_unit.clone(), // preserve original case
16590 };
16591 let mut args = f.args;
16592 args[0] =
16593 Expression::Identifier(Identifier::new(&unit_name));
16594 Ok(Expression::Function(Box::new(Function::new(
16595 "DATEPART".to_string(),
16596 args,
16597 ))))
16598 }
16599 DialectType::Spark | DialectType::Databricks => {
16600 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
16601 // Preserve original case for non-abbreviation units
16602 let unit = match unit_str.as_str() {
16603 "YY" | "YYYY" => "YEAR".to_string(),
16604 "QQ" | "Q" => "QUARTER".to_string(),
16605 "MM" | "M" => "MONTH".to_string(),
16606 "WK" | "WW" => "WEEK".to_string(),
16607 "DD" | "D" | "DY" => "DAY".to_string(),
16608 "HH" => "HOUR".to_string(),
16609 "MI" | "N" => "MINUTE".to_string(),
16610 "SS" | "S" => "SECOND".to_string(),
16611 _ => raw_unit, // preserve original case
16612 };
16613 Ok(Expression::Extract(Box::new(
16614 crate::expressions::ExtractFunc {
16615 this: f.args[1].clone(),
16616 field: crate::expressions::DateTimeField::Custom(
16617 unit,
16618 ),
16619 },
16620 )))
16621 }
16622 _ => Ok(Expression::Function(Box::new(Function::new(
16623 "DATE_PART".to_string(),
16624 f.args,
16625 )))),
16626 }
16627 }
16628 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
16629 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
16630 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
16631 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
16632 "DATENAME" if f.args.len() == 2 => {
16633 let unit_str = Self::get_unit_str_static(&f.args[0]);
16634 let date_expr = f.args[1].clone();
16635 match unit_str.as_str() {
16636 "MM" | "M" | "MONTH" => match target {
16637 DialectType::TSQL => {
16638 let cast_date = Expression::Cast(Box::new(
16639 crate::expressions::Cast {
16640 this: date_expr,
16641 to: DataType::Custom {
16642 name: "DATETIME2".to_string(),
16643 },
16644 trailing_comments: Vec::new(),
16645 double_colon_syntax: false,
16646 format: None,
16647 default: None,
16648 inferred_type: None,
16649 },
16650 ));
16651 Ok(Expression::Function(Box::new(Function::new(
16652 "FORMAT".to_string(),
16653 vec![cast_date, Expression::string("MMMM")],
16654 ))))
16655 }
16656 DialectType::Spark | DialectType::Databricks => {
16657 let cast_date = Expression::Cast(Box::new(
16658 crate::expressions::Cast {
16659 this: date_expr,
16660 to: DataType::Timestamp {
16661 timezone: false,
16662 precision: None,
16663 },
16664 trailing_comments: Vec::new(),
16665 double_colon_syntax: false,
16666 format: None,
16667 default: None,
16668 inferred_type: None,
16669 },
16670 ));
16671 Ok(Expression::Function(Box::new(Function::new(
16672 "DATE_FORMAT".to_string(),
16673 vec![cast_date, Expression::string("MMMM")],
16674 ))))
16675 }
16676 _ => Ok(Expression::Function(f)),
16677 },
16678 "DW" | "WEEKDAY" => match target {
16679 DialectType::TSQL => {
16680 let cast_date = Expression::Cast(Box::new(
16681 crate::expressions::Cast {
16682 this: date_expr,
16683 to: DataType::Custom {
16684 name: "DATETIME2".to_string(),
16685 },
16686 trailing_comments: Vec::new(),
16687 double_colon_syntax: false,
16688 format: None,
16689 default: None,
16690 inferred_type: None,
16691 },
16692 ));
16693 Ok(Expression::Function(Box::new(Function::new(
16694 "FORMAT".to_string(),
16695 vec![cast_date, Expression::string("dddd")],
16696 ))))
16697 }
16698 DialectType::Spark | DialectType::Databricks => {
16699 let cast_date = Expression::Cast(Box::new(
16700 crate::expressions::Cast {
16701 this: date_expr,
16702 to: DataType::Timestamp {
16703 timezone: false,
16704 precision: None,
16705 },
16706 trailing_comments: Vec::new(),
16707 double_colon_syntax: false,
16708 format: None,
16709 default: None,
16710 inferred_type: None,
16711 },
16712 ));
16713 Ok(Expression::Function(Box::new(Function::new(
16714 "DATE_FORMAT".to_string(),
16715 vec![cast_date, Expression::string("EEEE")],
16716 ))))
16717 }
16718 _ => Ok(Expression::Function(f)),
16719 },
16720 _ => Ok(Expression::Function(f)),
16721 }
16722 }
16723 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
16724 "STRING_AGG" if f.args.len() >= 2 => {
16725 let x = f.args[0].clone();
16726 let sep = f.args[1].clone();
16727 match target {
16728 DialectType::MySQL
16729 | DialectType::SingleStore
16730 | DialectType::Doris
16731 | DialectType::StarRocks => Ok(Expression::GroupConcat(
16732 Box::new(crate::expressions::GroupConcatFunc {
16733 this: x,
16734 separator: Some(sep),
16735 order_by: None,
16736 distinct: false,
16737 filter: None,
16738 inferred_type: None,
16739 }),
16740 )),
16741 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
16742 crate::expressions::GroupConcatFunc {
16743 this: x,
16744 separator: Some(sep),
16745 order_by: None,
16746 distinct: false,
16747 filter: None,
16748 inferred_type: None,
16749 },
16750 ))),
16751 DialectType::PostgreSQL | DialectType::Redshift => {
16752 Ok(Expression::StringAgg(Box::new(
16753 crate::expressions::StringAggFunc {
16754 this: x,
16755 separator: Some(sep),
16756 order_by: None,
16757 distinct: false,
16758 filter: None,
16759 limit: None,
16760 inferred_type: None,
16761 },
16762 )))
16763 }
16764 _ => Ok(Expression::Function(f)),
16765 }
16766 }
16767 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
16768 "JSON_ARRAYAGG" => match target {
16769 DialectType::PostgreSQL => {
16770 Ok(Expression::Function(Box::new(Function {
16771 name: "JSON_AGG".to_string(),
16772 ..(*f)
16773 })))
16774 }
16775 _ => Ok(Expression::Function(f)),
16776 },
16777 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
16778 "SCHEMA_NAME" => match target {
16779 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
16780 crate::expressions::CurrentSchema { this: None },
16781 ))),
16782 DialectType::SQLite => Ok(Expression::string("main")),
16783 _ => Ok(Expression::Function(f)),
16784 },
16785 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
16786 "TO_TIMESTAMP"
16787 if f.args.len() == 2
16788 && matches!(
16789 source,
16790 DialectType::Spark
16791 | DialectType::Databricks
16792 | DialectType::Hive
16793 )
16794 && matches!(target, DialectType::DuckDB) =>
16795 {
16796 let mut args = f.args;
16797 let val = args.remove(0);
16798 let fmt_expr = args.remove(0);
16799 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
16800 // Convert Java/Spark format to C strptime format
16801 fn java_to_c_fmt(fmt: &str) -> String {
16802 let result = fmt
16803 .replace("yyyy", "%Y")
16804 .replace("SSSSSS", "%f")
16805 .replace("EEEE", "%W")
16806 .replace("MM", "%m")
16807 .replace("dd", "%d")
16808 .replace("HH", "%H")
16809 .replace("mm", "%M")
16810 .replace("ss", "%S")
16811 .replace("yy", "%y");
16812 let mut out = String::new();
16813 let chars: Vec<char> = result.chars().collect();
16814 let mut i = 0;
16815 while i < chars.len() {
16816 if chars[i] == '%' && i + 1 < chars.len() {
16817 out.push(chars[i]);
16818 out.push(chars[i + 1]);
16819 i += 2;
16820 } else if chars[i] == 'z' {
16821 out.push_str("%Z");
16822 i += 1;
16823 } else if chars[i] == 'Z' {
16824 out.push_str("%z");
16825 i += 1;
16826 } else {
16827 out.push(chars[i]);
16828 i += 1;
16829 }
16830 }
16831 out
16832 }
16833 let c_fmt = java_to_c_fmt(s);
16834 Ok(Expression::Function(Box::new(Function::new(
16835 "STRPTIME".to_string(),
16836 vec![val, Expression::string(&c_fmt)],
16837 ))))
16838 } else {
16839 Ok(Expression::Function(Box::new(Function::new(
16840 "STRPTIME".to_string(),
16841 vec![val, fmt_expr],
16842 ))))
16843 }
16844 }
16845 // TO_DATE(x) 1-arg from Doris: date conversion
16846 "TO_DATE"
16847 if f.args.len() == 1
16848 && matches!(
16849 source,
16850 DialectType::Doris | DialectType::StarRocks
16851 ) =>
16852 {
16853 let arg = f.args.into_iter().next().unwrap();
16854 match target {
16855 DialectType::Oracle
16856 | DialectType::DuckDB
16857 | DialectType::TSQL => {
16858 // CAST(x AS DATE)
16859 Ok(Expression::Cast(Box::new(Cast {
16860 this: arg,
16861 to: DataType::Date,
16862 double_colon_syntax: false,
16863 trailing_comments: vec![],
16864 format: None,
16865 default: None,
16866 inferred_type: None,
16867 })))
16868 }
16869 DialectType::MySQL | DialectType::SingleStore => {
16870 // DATE(x)
16871 Ok(Expression::Function(Box::new(Function::new(
16872 "DATE".to_string(),
16873 vec![arg],
16874 ))))
16875 }
16876 _ => {
16877 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
16878 Ok(Expression::Function(Box::new(Function::new(
16879 "TO_DATE".to_string(),
16880 vec![arg],
16881 ))))
16882 }
16883 }
16884 }
16885 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
16886 "TO_DATE"
16887 if f.args.len() == 1
16888 && matches!(
16889 source,
16890 DialectType::Spark
16891 | DialectType::Databricks
16892 | DialectType::Hive
16893 ) =>
16894 {
16895 let arg = f.args.into_iter().next().unwrap();
16896 match target {
16897 DialectType::DuckDB => {
16898 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
16899 Ok(Expression::TryCast(Box::new(Cast {
16900 this: arg,
16901 to: DataType::Date,
16902 double_colon_syntax: false,
16903 trailing_comments: vec![],
16904 format: None,
16905 default: None,
16906 inferred_type: None,
16907 })))
16908 }
16909 DialectType::Presto
16910 | DialectType::Trino
16911 | DialectType::Athena => {
16912 // CAST(CAST(x AS TIMESTAMP) AS DATE)
16913 Ok(Self::double_cast_timestamp_date(arg))
16914 }
16915 DialectType::Snowflake => {
16916 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
16917 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
16918 Ok(Expression::Function(Box::new(Function::new(
16919 "TRY_TO_DATE".to_string(),
16920 vec![arg, Expression::string("yyyy-mm-DD")],
16921 ))))
16922 }
16923 _ => {
16924 // Default: keep as TO_DATE(x)
16925 Ok(Expression::Function(Box::new(Function::new(
16926 "TO_DATE".to_string(),
16927 vec![arg],
16928 ))))
16929 }
16930 }
16931 }
16932 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
16933 "TO_DATE"
16934 if f.args.len() == 2
16935 && matches!(
16936 source,
16937 DialectType::Spark
16938 | DialectType::Databricks
16939 | DialectType::Hive
16940 ) =>
16941 {
16942 let mut args = f.args;
16943 let val = args.remove(0);
16944 let fmt_expr = args.remove(0);
16945 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
16946
16947 if is_default_format {
16948 // Default format: same as 1-arg form
16949 match target {
16950 DialectType::DuckDB => {
16951 Ok(Expression::TryCast(Box::new(Cast {
16952 this: val,
16953 to: DataType::Date,
16954 double_colon_syntax: false,
16955 trailing_comments: vec![],
16956 format: None,
16957 default: None,
16958 inferred_type: None,
16959 })))
16960 }
16961 DialectType::Presto
16962 | DialectType::Trino
16963 | DialectType::Athena => {
16964 Ok(Self::double_cast_timestamp_date(val))
16965 }
16966 DialectType::Snowflake => {
16967 // TRY_TO_DATE(x, format) with Snowflake format mapping
16968 let sf_fmt = "yyyy-MM-dd"
16969 .replace("yyyy", "yyyy")
16970 .replace("MM", "mm")
16971 .replace("dd", "DD");
16972 Ok(Expression::Function(Box::new(Function::new(
16973 "TRY_TO_DATE".to_string(),
16974 vec![val, Expression::string(&sf_fmt)],
16975 ))))
16976 }
16977 _ => Ok(Expression::Function(Box::new(Function::new(
16978 "TO_DATE".to_string(),
16979 vec![val],
16980 )))),
16981 }
16982 } else {
16983 // Non-default format: use format-based parsing
16984 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
16985 match target {
16986 DialectType::DuckDB => {
16987 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
16988 fn java_to_c_fmt_todate(fmt: &str) -> String {
16989 let result = fmt
16990 .replace("yyyy", "%Y")
16991 .replace("SSSSSS", "%f")
16992 .replace("EEEE", "%W")
16993 .replace("MM", "%m")
16994 .replace("dd", "%d")
16995 .replace("HH", "%H")
16996 .replace("mm", "%M")
16997 .replace("ss", "%S")
16998 .replace("yy", "%y");
16999 let mut out = String::new();
17000 let chars: Vec<char> = result.chars().collect();
17001 let mut i = 0;
17002 while i < chars.len() {
17003 if chars[i] == '%' && i + 1 < chars.len() {
17004 out.push(chars[i]);
17005 out.push(chars[i + 1]);
17006 i += 2;
17007 } else if chars[i] == 'z' {
17008 out.push_str("%Z");
17009 i += 1;
17010 } else if chars[i] == 'Z' {
17011 out.push_str("%z");
17012 i += 1;
17013 } else {
17014 out.push(chars[i]);
17015 i += 1;
17016 }
17017 }
17018 out
17019 }
17020 let c_fmt = java_to_c_fmt_todate(s);
17021 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
17022 let try_strptime =
17023 Expression::Function(Box::new(Function::new(
17024 "TRY_STRPTIME".to_string(),
17025 vec![val, Expression::string(&c_fmt)],
17026 )));
17027 let cast_ts = Expression::Cast(Box::new(Cast {
17028 this: try_strptime,
17029 to: DataType::Timestamp {
17030 precision: None,
17031 timezone: false,
17032 },
17033 double_colon_syntax: false,
17034 trailing_comments: vec![],
17035 format: None,
17036 default: None,
17037 inferred_type: None,
17038 }));
17039 Ok(Expression::Cast(Box::new(Cast {
17040 this: cast_ts,
17041 to: DataType::Date,
17042 double_colon_syntax: false,
17043 trailing_comments: vec![],
17044 format: None,
17045 default: None,
17046 inferred_type: None,
17047 })))
17048 }
17049 DialectType::Presto
17050 | DialectType::Trino
17051 | DialectType::Athena => {
17052 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
17053 let p_fmt = s
17054 .replace("yyyy", "%Y")
17055 .replace("SSSSSS", "%f")
17056 .replace("MM", "%m")
17057 .replace("dd", "%d")
17058 .replace("HH", "%H")
17059 .replace("mm", "%M")
17060 .replace("ss", "%S")
17061 .replace("yy", "%y");
17062 let date_parse =
17063 Expression::Function(Box::new(Function::new(
17064 "DATE_PARSE".to_string(),
17065 vec![val, Expression::string(&p_fmt)],
17066 )));
17067 Ok(Expression::Cast(Box::new(Cast {
17068 this: date_parse,
17069 to: DataType::Date,
17070 double_colon_syntax: false,
17071 trailing_comments: vec![],
17072 format: None,
17073 default: None,
17074 inferred_type: None,
17075 })))
17076 }
17077 DialectType::Snowflake => {
17078 // TRY_TO_DATE(x, snowflake_fmt)
17079 Ok(Expression::Function(Box::new(Function::new(
17080 "TRY_TO_DATE".to_string(),
17081 vec![val, Expression::string(s)],
17082 ))))
17083 }
17084 _ => Ok(Expression::Function(Box::new(Function::new(
17085 "TO_DATE".to_string(),
17086 vec![val, fmt_expr],
17087 )))),
17088 }
17089 } else {
17090 Ok(Expression::Function(Box::new(Function::new(
17091 "TO_DATE".to_string(),
17092 vec![val, fmt_expr],
17093 ))))
17094 }
17095 }
17096 }
17097 // TO_TIMESTAMP(x) 1-arg: epoch conversion
17098 "TO_TIMESTAMP"
17099 if f.args.len() == 1
17100 && matches!(source, DialectType::DuckDB)
17101 && matches!(
17102 target,
17103 DialectType::BigQuery
17104 | DialectType::Presto
17105 | DialectType::Trino
17106 | DialectType::Hive
17107 | DialectType::Spark
17108 | DialectType::Databricks
17109 | DialectType::Athena
17110 ) =>
17111 {
17112 let arg = f.args.into_iter().next().unwrap();
17113 let func_name = match target {
17114 DialectType::BigQuery => "TIMESTAMP_SECONDS",
17115 DialectType::Presto
17116 | DialectType::Trino
17117 | DialectType::Athena
17118 | DialectType::Hive
17119 | DialectType::Spark
17120 | DialectType::Databricks => "FROM_UNIXTIME",
17121 _ => "TO_TIMESTAMP",
17122 };
17123 Ok(Expression::Function(Box::new(Function::new(
17124 func_name.to_string(),
17125 vec![arg],
17126 ))))
17127 }
17128 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
17129 "CONCAT" if f.args.len() == 1 => {
17130 let arg = f.args.into_iter().next().unwrap();
17131 match target {
17132 DialectType::Presto
17133 | DialectType::Trino
17134 | DialectType::Athena => {
17135 // CONCAT(a) -> CAST(a AS VARCHAR)
17136 Ok(Expression::Cast(Box::new(Cast {
17137 this: arg,
17138 to: DataType::VarChar {
17139 length: None,
17140 parenthesized_length: false,
17141 },
17142 trailing_comments: vec![],
17143 double_colon_syntax: false,
17144 format: None,
17145 default: None,
17146 inferred_type: None,
17147 })))
17148 }
17149 DialectType::TSQL => {
17150 // CONCAT(a) -> a
17151 Ok(arg)
17152 }
17153 DialectType::DuckDB => {
17154 // Keep CONCAT(a) for DuckDB (native support)
17155 Ok(Expression::Function(Box::new(Function::new(
17156 "CONCAT".to_string(),
17157 vec![arg],
17158 ))))
17159 }
17160 DialectType::Spark | DialectType::Databricks => {
17161 let coalesced = Expression::Coalesce(Box::new(
17162 crate::expressions::VarArgFunc {
17163 expressions: vec![arg, Expression::string("")],
17164 original_name: None,
17165 inferred_type: None,
17166 },
17167 ));
17168 Ok(Expression::Function(Box::new(Function::new(
17169 "CONCAT".to_string(),
17170 vec![coalesced],
17171 ))))
17172 }
17173 _ => Ok(Expression::Function(Box::new(Function::new(
17174 "CONCAT".to_string(),
17175 vec![arg],
17176 )))),
17177 }
17178 }
17179 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
17180 "REGEXP_EXTRACT"
17181 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
17182 {
17183 // If group_index is 0, drop it
17184 let drop_group = match &f.args[2] {
17185 Expression::Literal(Literal::Number(n)) => n == "0",
17186 _ => false,
17187 };
17188 if drop_group {
17189 let mut args = f.args;
17190 args.truncate(2);
17191 Ok(Expression::Function(Box::new(Function::new(
17192 "REGEXP_EXTRACT".to_string(),
17193 args,
17194 ))))
17195 } else {
17196 Ok(Expression::Function(f))
17197 }
17198 }
17199 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
17200 "REGEXP_EXTRACT"
17201 if f.args.len() == 4
17202 && matches!(target, DialectType::Snowflake) =>
17203 {
17204 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
17205 let mut args = f.args;
17206 let this = args.remove(0);
17207 let pattern = args.remove(0);
17208 let group = args.remove(0);
17209 let flags = args.remove(0);
17210 Ok(Expression::Function(Box::new(Function::new(
17211 "REGEXP_SUBSTR".to_string(),
17212 vec![
17213 this,
17214 pattern,
17215 Expression::number(1),
17216 Expression::number(1),
17217 flags,
17218 group,
17219 ],
17220 ))))
17221 }
17222 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
17223 "REGEXP_SUBSTR"
17224 if f.args.len() == 3
17225 && matches!(
17226 target,
17227 DialectType::DuckDB
17228 | DialectType::Presto
17229 | DialectType::Trino
17230 | DialectType::Spark
17231 | DialectType::Databricks
17232 ) =>
17233 {
17234 let mut args = f.args;
17235 let this = args.remove(0);
17236 let pattern = args.remove(0);
17237 let position = args.remove(0);
17238 // Wrap subject in SUBSTRING(this, position) to apply the offset
17239 let substring_expr = Expression::Function(Box::new(Function::new(
17240 "SUBSTRING".to_string(),
17241 vec![this, position],
17242 )));
17243 let target_name = match target {
17244 DialectType::DuckDB => "REGEXP_EXTRACT",
17245 _ => "REGEXP_EXTRACT",
17246 };
17247 Ok(Expression::Function(Box::new(Function::new(
17248 target_name.to_string(),
17249 vec![substring_expr, pattern],
17250 ))))
17251 }
17252 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
17253 "TO_DAYS" if f.args.len() == 1 => {
17254 let x = f.args.into_iter().next().unwrap();
17255 let epoch = Expression::string("0000-01-01");
17256 // Build the final target-specific expression directly
17257 let datediff_expr = match target {
17258 DialectType::MySQL | DialectType::SingleStore => {
17259 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
17260 Expression::Function(Box::new(Function::new(
17261 "DATEDIFF".to_string(),
17262 vec![x, epoch],
17263 )))
17264 }
17265 DialectType::DuckDB => {
17266 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
17267 let cast_epoch = Expression::Cast(Box::new(Cast {
17268 this: epoch,
17269 to: DataType::Date,
17270 trailing_comments: Vec::new(),
17271 double_colon_syntax: false,
17272 format: None,
17273 default: None,
17274 inferred_type: None,
17275 }));
17276 let cast_x = Expression::Cast(Box::new(Cast {
17277 this: x,
17278 to: DataType::Date,
17279 trailing_comments: Vec::new(),
17280 double_colon_syntax: false,
17281 format: None,
17282 default: None,
17283 inferred_type: None,
17284 }));
17285 Expression::Function(Box::new(Function::new(
17286 "DATE_DIFF".to_string(),
17287 vec![Expression::string("DAY"), cast_epoch, cast_x],
17288 )))
17289 }
17290 DialectType::Presto
17291 | DialectType::Trino
17292 | DialectType::Athena => {
17293 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
17294 let cast_epoch = Self::double_cast_timestamp_date(epoch);
17295 let cast_x = Self::double_cast_timestamp_date(x);
17296 Expression::Function(Box::new(Function::new(
17297 "DATE_DIFF".to_string(),
17298 vec![Expression::string("DAY"), cast_epoch, cast_x],
17299 )))
17300 }
17301 _ => {
17302 // Default: (DATEDIFF(x, '0000-01-01') + 1)
17303 Expression::Function(Box::new(Function::new(
17304 "DATEDIFF".to_string(),
17305 vec![x, epoch],
17306 )))
17307 }
17308 };
17309 let add_one = Expression::Add(Box::new(BinaryOp::new(
17310 datediff_expr,
17311 Expression::number(1),
17312 )));
17313 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17314 this: add_one,
17315 trailing_comments: Vec::new(),
17316 })))
17317 }
17318 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
17319 "STR_TO_DATE"
17320 if f.args.len() == 2
17321 && matches!(
17322 target,
17323 DialectType::Presto | DialectType::Trino
17324 ) =>
17325 {
17326 let mut args = f.args;
17327 let x = args.remove(0);
17328 let format_expr = args.remove(0);
17329 // Check if the format contains time components
17330 let has_time =
17331 if let Expression::Literal(Literal::String(ref fmt)) =
17332 format_expr
17333 {
17334 fmt.contains("%H")
17335 || fmt.contains("%T")
17336 || fmt.contains("%M")
17337 || fmt.contains("%S")
17338 || fmt.contains("%I")
17339 || fmt.contains("%p")
17340 } else {
17341 false
17342 };
17343 let date_parse = Expression::Function(Box::new(Function::new(
17344 "DATE_PARSE".to_string(),
17345 vec![x, format_expr],
17346 )));
17347 if has_time {
17348 // Has time components: just DATE_PARSE
17349 Ok(date_parse)
17350 } else {
17351 // Date-only: CAST(DATE_PARSE(...) AS DATE)
17352 Ok(Expression::Cast(Box::new(Cast {
17353 this: date_parse,
17354 to: DataType::Date,
17355 trailing_comments: Vec::new(),
17356 double_colon_syntax: false,
17357 format: None,
17358 default: None,
17359 inferred_type: None,
17360 })))
17361 }
17362 }
17363 "STR_TO_DATE"
17364 if f.args.len() == 2
17365 && matches!(
17366 target,
17367 DialectType::PostgreSQL | DialectType::Redshift
17368 ) =>
17369 {
17370 let mut args = f.args;
17371 let x = args.remove(0);
17372 let fmt = args.remove(0);
17373 let pg_fmt = match fmt {
17374 Expression::Literal(Literal::String(s)) => Expression::string(
17375 &s.replace("%Y", "YYYY")
17376 .replace("%m", "MM")
17377 .replace("%d", "DD")
17378 .replace("%H", "HH24")
17379 .replace("%M", "MI")
17380 .replace("%S", "SS"),
17381 ),
17382 other => other,
17383 };
17384 let to_date = Expression::Function(Box::new(Function::new(
17385 "TO_DATE".to_string(),
17386 vec![x, pg_fmt],
17387 )));
17388 Ok(Expression::Cast(Box::new(Cast {
17389 this: to_date,
17390 to: DataType::Timestamp {
17391 timezone: false,
17392 precision: None,
17393 },
17394 trailing_comments: Vec::new(),
17395 double_colon_syntax: false,
17396 format: None,
17397 default: None,
17398 inferred_type: None,
17399 })))
17400 }
17401 // RANGE(start, end) -> GENERATE_SERIES for SQLite
17402 "RANGE"
17403 if (f.args.len() == 1 || f.args.len() == 2)
17404 && matches!(target, DialectType::SQLite) =>
17405 {
17406 if f.args.len() == 2 {
17407 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
17408 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
17409 let mut args = f.args;
17410 let start = args.remove(0);
17411 let end = args.remove(0);
17412 Ok(Expression::Function(Box::new(Function::new(
17413 "GENERATE_SERIES".to_string(),
17414 vec![start, end],
17415 ))))
17416 } else {
17417 Ok(Expression::Function(f))
17418 }
17419 }
17420 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
17421 // When source is Snowflake, keep as-is (args already in correct form)
17422 "UNIFORM"
17423 if matches!(target, DialectType::Snowflake)
17424 && (f.args.len() == 2 || f.args.len() == 3) =>
17425 {
17426 if matches!(source, DialectType::Snowflake) {
17427 // Snowflake -> Snowflake: keep as-is
17428 Ok(Expression::Function(f))
17429 } else {
17430 let mut args = f.args;
17431 let low = args.remove(0);
17432 let high = args.remove(0);
17433 let random = if !args.is_empty() {
17434 let seed = args.remove(0);
17435 Expression::Function(Box::new(Function::new(
17436 "RANDOM".to_string(),
17437 vec![seed],
17438 )))
17439 } else {
17440 Expression::Function(Box::new(Function::new(
17441 "RANDOM".to_string(),
17442 vec![],
17443 )))
17444 };
17445 Ok(Expression::Function(Box::new(Function::new(
17446 "UNIFORM".to_string(),
17447 vec![low, high, random],
17448 ))))
17449 }
17450 }
17451 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
17452 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
17453 let mut args = f.args;
17454 let ts_arg = args.remove(0);
17455 let tz_arg = args.remove(0);
17456 // Cast string literal to TIMESTAMP for all targets
17457 let ts_cast =
17458 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
17459 Expression::Cast(Box::new(Cast {
17460 this: ts_arg,
17461 to: DataType::Timestamp {
17462 timezone: false,
17463 precision: None,
17464 },
17465 trailing_comments: vec![],
17466 double_colon_syntax: false,
17467 format: None,
17468 default: None,
17469 inferred_type: None,
17470 }))
17471 } else {
17472 ts_arg
17473 };
17474 match target {
17475 DialectType::Spark | DialectType::Databricks => {
17476 Ok(Expression::Function(Box::new(Function::new(
17477 "TO_UTC_TIMESTAMP".to_string(),
17478 vec![ts_cast, tz_arg],
17479 ))))
17480 }
17481 DialectType::Snowflake => {
17482 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
17483 Ok(Expression::Function(Box::new(Function::new(
17484 "CONVERT_TIMEZONE".to_string(),
17485 vec![tz_arg, Expression::string("UTC"), ts_cast],
17486 ))))
17487 }
17488 DialectType::Presto
17489 | DialectType::Trino
17490 | DialectType::Athena => {
17491 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
17492 let wtz = Expression::Function(Box::new(Function::new(
17493 "WITH_TIMEZONE".to_string(),
17494 vec![ts_cast, tz_arg],
17495 )));
17496 Ok(Expression::AtTimeZone(Box::new(
17497 crate::expressions::AtTimeZone {
17498 this: wtz,
17499 zone: Expression::string("UTC"),
17500 },
17501 )))
17502 }
17503 DialectType::BigQuery => {
17504 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
17505 let cast_dt = Expression::Cast(Box::new(Cast {
17506 this: if let Expression::Cast(c) = ts_cast {
17507 c.this
17508 } else {
17509 ts_cast.clone()
17510 },
17511 to: DataType::Custom {
17512 name: "DATETIME".to_string(),
17513 },
17514 trailing_comments: vec![],
17515 double_colon_syntax: false,
17516 format: None,
17517 default: None,
17518 inferred_type: None,
17519 }));
17520 let ts_func =
17521 Expression::Function(Box::new(Function::new(
17522 "TIMESTAMP".to_string(),
17523 vec![cast_dt, tz_arg],
17524 )));
17525 Ok(Expression::Function(Box::new(Function::new(
17526 "DATETIME".to_string(),
17527 vec![ts_func, Expression::string("UTC")],
17528 ))))
17529 }
17530 _ => {
17531 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
17532 let atz1 = Expression::AtTimeZone(Box::new(
17533 crate::expressions::AtTimeZone {
17534 this: ts_cast,
17535 zone: tz_arg,
17536 },
17537 ));
17538 Ok(Expression::AtTimeZone(Box::new(
17539 crate::expressions::AtTimeZone {
17540 this: atz1,
17541 zone: Expression::string("UTC"),
17542 },
17543 )))
17544 }
17545 }
17546 }
17547 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
17548 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
17549 let mut args = f.args;
17550 let ts_arg = args.remove(0);
17551 let tz_arg = args.remove(0);
17552 // Cast string literal to TIMESTAMP
17553 let ts_cast =
17554 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
17555 Expression::Cast(Box::new(Cast {
17556 this: ts_arg,
17557 to: DataType::Timestamp {
17558 timezone: false,
17559 precision: None,
17560 },
17561 trailing_comments: vec![],
17562 double_colon_syntax: false,
17563 format: None,
17564 default: None,
17565 inferred_type: None,
17566 }))
17567 } else {
17568 ts_arg
17569 };
17570 match target {
17571 DialectType::Spark | DialectType::Databricks => {
17572 Ok(Expression::Function(Box::new(Function::new(
17573 "FROM_UTC_TIMESTAMP".to_string(),
17574 vec![ts_cast, tz_arg],
17575 ))))
17576 }
17577 DialectType::Presto
17578 | DialectType::Trino
17579 | DialectType::Athena => {
17580 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
17581 Ok(Expression::Function(Box::new(Function::new(
17582 "AT_TIMEZONE".to_string(),
17583 vec![ts_cast, tz_arg],
17584 ))))
17585 }
17586 DialectType::Snowflake => {
17587 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
17588 Ok(Expression::Function(Box::new(Function::new(
17589 "CONVERT_TIMEZONE".to_string(),
17590 vec![Expression::string("UTC"), tz_arg, ts_cast],
17591 ))))
17592 }
17593 _ => {
17594 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
17595 Ok(Expression::AtTimeZone(Box::new(
17596 crate::expressions::AtTimeZone {
17597 this: ts_cast,
17598 zone: tz_arg,
17599 },
17600 )))
17601 }
17602 }
17603 }
17604 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17605 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
17606 let name = match target {
17607 DialectType::Snowflake => "OBJECT_CONSTRUCT",
17608 _ => "MAP",
17609 };
17610 Ok(Expression::Function(Box::new(Function::new(
17611 name.to_string(),
17612 f.args,
17613 ))))
17614 }
17615 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
17616 "STR_TO_MAP" if f.args.len() >= 1 => match target {
17617 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17618 Ok(Expression::Function(Box::new(Function::new(
17619 "SPLIT_TO_MAP".to_string(),
17620 f.args,
17621 ))))
17622 }
17623 _ => Ok(Expression::Function(f)),
17624 },
17625 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
17626 "TIME_TO_STR" if f.args.len() == 2 => {
17627 let mut args = f.args;
17628 let this = args.remove(0);
17629 let fmt_expr = args.remove(0);
17630 let format =
17631 if let Expression::Literal(Literal::String(s)) = fmt_expr {
17632 s
17633 } else {
17634 "%Y-%m-%d %H:%M:%S".to_string()
17635 };
17636 Ok(Expression::TimeToStr(Box::new(
17637 crate::expressions::TimeToStr {
17638 this: Box::new(this),
17639 format,
17640 culture: None,
17641 zone: None,
17642 },
17643 )))
17644 }
17645 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
17646 "STR_TO_TIME" if f.args.len() == 2 => {
17647 let mut args = f.args;
17648 let this = args.remove(0);
17649 let fmt_expr = args.remove(0);
17650 let format =
17651 if let Expression::Literal(Literal::String(s)) = fmt_expr {
17652 s
17653 } else {
17654 "%Y-%m-%d %H:%M:%S".to_string()
17655 };
17656 Ok(Expression::StrToTime(Box::new(
17657 crate::expressions::StrToTime {
17658 this: Box::new(this),
17659 format,
17660 zone: None,
17661 safe: None,
17662 target_type: None,
17663 },
17664 )))
17665 }
17666 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
17667 "STR_TO_UNIX" if f.args.len() >= 1 => {
17668 let mut args = f.args;
17669 let this = args.remove(0);
17670 let format = if !args.is_empty() {
17671 if let Expression::Literal(Literal::String(s)) = args.remove(0)
17672 {
17673 Some(s)
17674 } else {
17675 None
17676 }
17677 } else {
17678 None
17679 };
17680 Ok(Expression::StrToUnix(Box::new(
17681 crate::expressions::StrToUnix {
17682 this: Some(Box::new(this)),
17683 format,
17684 },
17685 )))
17686 }
17687 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
17688 "TIME_TO_UNIX" if f.args.len() == 1 => {
17689 let mut args = f.args;
17690 let this = args.remove(0);
17691 Ok(Expression::TimeToUnix(Box::new(
17692 crate::expressions::UnaryFunc {
17693 this,
17694 original_name: None,
17695 inferred_type: None,
17696 },
17697 )))
17698 }
17699 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
17700 "UNIX_TO_STR" if f.args.len() >= 1 => {
17701 let mut args = f.args;
17702 let this = args.remove(0);
17703 let format = if !args.is_empty() {
17704 if let Expression::Literal(Literal::String(s)) = args.remove(0)
17705 {
17706 Some(s)
17707 } else {
17708 None
17709 }
17710 } else {
17711 None
17712 };
17713 Ok(Expression::UnixToStr(Box::new(
17714 crate::expressions::UnixToStr {
17715 this: Box::new(this),
17716 format,
17717 },
17718 )))
17719 }
17720 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
17721 "UNIX_TO_TIME" if f.args.len() == 1 => {
17722 let mut args = f.args;
17723 let this = args.remove(0);
17724 Ok(Expression::UnixToTime(Box::new(
17725 crate::expressions::UnixToTime {
17726 this: Box::new(this),
17727 scale: None,
17728 zone: None,
17729 hours: None,
17730 minutes: None,
17731 format: None,
17732 target_type: None,
17733 },
17734 )))
17735 }
17736 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
17737 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
17738 let mut args = f.args;
17739 let this = args.remove(0);
17740 Ok(Expression::TimeStrToDate(Box::new(
17741 crate::expressions::UnaryFunc {
17742 this,
17743 original_name: None,
17744 inferred_type: None,
17745 },
17746 )))
17747 }
17748 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
17749 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
17750 let mut args = f.args;
17751 let this = args.remove(0);
17752 Ok(Expression::TimeStrToTime(Box::new(
17753 crate::expressions::TimeStrToTime {
17754 this: Box::new(this),
17755 zone: None,
17756 },
17757 )))
17758 }
17759 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
17760 "MONTHS_BETWEEN" if f.args.len() == 2 => {
17761 match target {
17762 DialectType::DuckDB => {
17763 let mut args = f.args;
17764 let end_date = args.remove(0);
17765 let start_date = args.remove(0);
17766 let cast_end = Self::ensure_cast_date(end_date);
17767 let cast_start = Self::ensure_cast_date(start_date);
17768 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
17769 let dd = Expression::Function(Box::new(Function::new(
17770 "DATE_DIFF".to_string(),
17771 vec![
17772 Expression::string("MONTH"),
17773 cast_start.clone(),
17774 cast_end.clone(),
17775 ],
17776 )));
17777 let day_end =
17778 Expression::Function(Box::new(Function::new(
17779 "DAY".to_string(),
17780 vec![cast_end.clone()],
17781 )));
17782 let day_start =
17783 Expression::Function(Box::new(Function::new(
17784 "DAY".to_string(),
17785 vec![cast_start.clone()],
17786 )));
17787 let last_day_end =
17788 Expression::Function(Box::new(Function::new(
17789 "LAST_DAY".to_string(),
17790 vec![cast_end.clone()],
17791 )));
17792 let last_day_start =
17793 Expression::Function(Box::new(Function::new(
17794 "LAST_DAY".to_string(),
17795 vec![cast_start.clone()],
17796 )));
17797 let day_last_end = Expression::Function(Box::new(
17798 Function::new("DAY".to_string(), vec![last_day_end]),
17799 ));
17800 let day_last_start = Expression::Function(Box::new(
17801 Function::new("DAY".to_string(), vec![last_day_start]),
17802 ));
17803 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
17804 day_end.clone(),
17805 day_last_end,
17806 )));
17807 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
17808 day_start.clone(),
17809 day_last_start,
17810 )));
17811 let both_cond =
17812 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
17813 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
17814 day_end, day_start,
17815 )));
17816 let day_diff_paren = Expression::Paren(Box::new(
17817 crate::expressions::Paren {
17818 this: day_diff,
17819 trailing_comments: Vec::new(),
17820 },
17821 ));
17822 let frac = Expression::Div(Box::new(BinaryOp::new(
17823 day_diff_paren,
17824 Expression::Literal(Literal::Number(
17825 "31.0".to_string(),
17826 )),
17827 )));
17828 let case_expr = Expression::Case(Box::new(Case {
17829 operand: None,
17830 whens: vec![(both_cond, Expression::number(0))],
17831 else_: Some(frac),
17832 comments: Vec::new(),
17833 inferred_type: None,
17834 }));
17835 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
17836 }
17837 DialectType::Snowflake | DialectType::Redshift => {
17838 let mut args = f.args;
17839 let end_date = args.remove(0);
17840 let start_date = args.remove(0);
17841 let unit = Expression::Identifier(Identifier::new("MONTH"));
17842 Ok(Expression::Function(Box::new(Function::new(
17843 "DATEDIFF".to_string(),
17844 vec![unit, start_date, end_date],
17845 ))))
17846 }
17847 DialectType::Presto
17848 | DialectType::Trino
17849 | DialectType::Athena => {
17850 let mut args = f.args;
17851 let end_date = args.remove(0);
17852 let start_date = args.remove(0);
17853 Ok(Expression::Function(Box::new(Function::new(
17854 "DATE_DIFF".to_string(),
17855 vec![Expression::string("MONTH"), start_date, end_date],
17856 ))))
17857 }
17858 _ => Ok(Expression::Function(f)),
17859 }
17860 }
17861 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
17862 // Drop the roundOff arg for non-Spark targets, keep it for Spark
17863 "MONTHS_BETWEEN" if f.args.len() == 3 => {
17864 match target {
17865 DialectType::Spark | DialectType::Databricks => {
17866 Ok(Expression::Function(f))
17867 }
17868 _ => {
17869 // Drop the 3rd arg and delegate to the 2-arg logic
17870 let mut args = f.args;
17871 let end_date = args.remove(0);
17872 let start_date = args.remove(0);
17873 // Re-create as 2-arg and process
17874 let f2 = Function::new(
17875 "MONTHS_BETWEEN".to_string(),
17876 vec![end_date, start_date],
17877 );
17878 let e2 = Expression::Function(Box::new(f2));
17879 Self::cross_dialect_normalize(e2, source, target)
17880 }
17881 }
17882 }
17883 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
17884 "TO_TIMESTAMP"
17885 if f.args.len() == 1
17886 && matches!(
17887 source,
17888 DialectType::Spark
17889 | DialectType::Databricks
17890 | DialectType::Hive
17891 ) =>
17892 {
17893 let arg = f.args.into_iter().next().unwrap();
17894 Ok(Expression::Cast(Box::new(Cast {
17895 this: arg,
17896 to: DataType::Timestamp {
17897 timezone: false,
17898 precision: None,
17899 },
17900 trailing_comments: vec![],
17901 double_colon_syntax: false,
17902 format: None,
17903 default: None,
17904 inferred_type: None,
17905 })))
17906 }
17907 // STRING(x) -> CAST(x AS STRING) for Spark target
17908 "STRING"
17909 if f.args.len() == 1
17910 && matches!(
17911 source,
17912 DialectType::Spark | DialectType::Databricks
17913 ) =>
17914 {
17915 let arg = f.args.into_iter().next().unwrap();
17916 let dt = match target {
17917 DialectType::Spark
17918 | DialectType::Databricks
17919 | DialectType::Hive => DataType::Custom {
17920 name: "STRING".to_string(),
17921 },
17922 _ => DataType::Text,
17923 };
17924 Ok(Expression::Cast(Box::new(Cast {
17925 this: arg,
17926 to: dt,
17927 trailing_comments: vec![],
17928 double_colon_syntax: false,
17929 format: None,
17930 default: None,
17931 inferred_type: None,
17932 })))
17933 }
17934 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
17935 "LOGICAL_OR" if f.args.len() == 1 => {
17936 let name = match target {
17937 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
17938 _ => "LOGICAL_OR",
17939 };
17940 Ok(Expression::Function(Box::new(Function::new(
17941 name.to_string(),
17942 f.args,
17943 ))))
17944 }
17945 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
17946 "SPLIT"
17947 if f.args.len() == 2
17948 && matches!(
17949 source,
17950 DialectType::Spark
17951 | DialectType::Databricks
17952 | DialectType::Hive
17953 ) =>
17954 {
17955 let name = match target {
17956 DialectType::DuckDB => "STR_SPLIT_REGEX",
17957 DialectType::Presto
17958 | DialectType::Trino
17959 | DialectType::Athena => "REGEXP_SPLIT",
17960 DialectType::Spark
17961 | DialectType::Databricks
17962 | DialectType::Hive => "SPLIT",
17963 _ => "SPLIT",
17964 };
17965 Ok(Expression::Function(Box::new(Function::new(
17966 name.to_string(),
17967 f.args,
17968 ))))
17969 }
17970 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
17971 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
17972 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17973 Ok(Expression::Function(Box::new(Function::new(
17974 "ELEMENT_AT".to_string(),
17975 f.args,
17976 ))))
17977 }
17978 DialectType::DuckDB => {
17979 let mut args = f.args;
17980 let arr = args.remove(0);
17981 let idx = args.remove(0);
17982 Ok(Expression::Subscript(Box::new(
17983 crate::expressions::Subscript {
17984 this: arr,
17985 index: idx,
17986 },
17987 )))
17988 }
17989 _ => Ok(Expression::Function(f)),
17990 },
17991 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
17992 "ARRAY_FILTER" if f.args.len() == 2 => {
17993 let name = match target {
17994 DialectType::DuckDB => "LIST_FILTER",
17995 DialectType::StarRocks => "ARRAY_FILTER",
17996 _ => "FILTER",
17997 };
17998 Ok(Expression::Function(Box::new(Function::new(
17999 name.to_string(),
18000 f.args,
18001 ))))
18002 }
18003 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
18004 "FILTER" if f.args.len() == 2 => {
18005 let name = match target {
18006 DialectType::DuckDB => "LIST_FILTER",
18007 DialectType::StarRocks => "ARRAY_FILTER",
18008 _ => "FILTER",
18009 };
18010 Ok(Expression::Function(Box::new(Function::new(
18011 name.to_string(),
18012 f.args,
18013 ))))
18014 }
18015 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
18016 "REDUCE" if f.args.len() >= 3 => {
18017 let name = match target {
18018 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
18019 _ => "REDUCE",
18020 };
18021 Ok(Expression::Function(Box::new(Function::new(
18022 name.to_string(),
18023 f.args,
18024 ))))
18025 }
18026 // CURRENT_SCHEMA() -> dialect-specific
18027 "CURRENT_SCHEMA" => {
18028 match target {
18029 DialectType::PostgreSQL => {
18030 // PostgreSQL: CURRENT_SCHEMA (no parens)
18031 Ok(Expression::Function(Box::new(Function {
18032 name: "CURRENT_SCHEMA".to_string(),
18033 args: vec![],
18034 distinct: false,
18035 trailing_comments: vec![],
18036 use_bracket_syntax: false,
18037 no_parens: true,
18038 quoted: false,
18039 span: None,
18040 inferred_type: None,
18041 })))
18042 }
18043 DialectType::MySQL
18044 | DialectType::Doris
18045 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
18046 Function::new("SCHEMA".to_string(), vec![]),
18047 ))),
18048 DialectType::TSQL => Ok(Expression::Function(Box::new(
18049 Function::new("SCHEMA_NAME".to_string(), vec![]),
18050 ))),
18051 DialectType::SQLite => {
18052 Ok(Expression::Literal(Literal::String("main".to_string())))
18053 }
18054 _ => Ok(Expression::Function(f)),
18055 }
18056 }
18057 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18058 "LTRIM" if f.args.len() == 2 => match target {
18059 DialectType::Spark
18060 | DialectType::Hive
18061 | DialectType::Databricks
18062 | DialectType::ClickHouse => {
18063 let mut args = f.args;
18064 let str_expr = args.remove(0);
18065 let chars = args.remove(0);
18066 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18067 this: str_expr,
18068 characters: Some(chars),
18069 position: crate::expressions::TrimPosition::Leading,
18070 sql_standard_syntax: true,
18071 position_explicit: true,
18072 })))
18073 }
18074 _ => Ok(Expression::Function(f)),
18075 },
18076 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18077 "RTRIM" if f.args.len() == 2 => match target {
18078 DialectType::Spark
18079 | DialectType::Hive
18080 | DialectType::Databricks
18081 | DialectType::ClickHouse => {
18082 let mut args = f.args;
18083 let str_expr = args.remove(0);
18084 let chars = args.remove(0);
18085 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18086 this: str_expr,
18087 characters: Some(chars),
18088 position: crate::expressions::TrimPosition::Trailing,
18089 sql_standard_syntax: true,
18090 position_explicit: true,
18091 })))
18092 }
18093 _ => Ok(Expression::Function(f)),
18094 },
18095 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
18096 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
18097 DialectType::ClickHouse => {
18098 let mut new_f = *f;
18099 new_f.name = "arrayReverse".to_string();
18100 Ok(Expression::Function(Box::new(new_f)))
18101 }
18102 _ => Ok(Expression::Function(f)),
18103 },
18104 // UUID() -> NEWID() for TSQL
18105 "UUID" if f.args.is_empty() => match target {
18106 DialectType::TSQL | DialectType::Fabric => {
18107 Ok(Expression::Function(Box::new(Function::new(
18108 "NEWID".to_string(),
18109 vec![],
18110 ))))
18111 }
18112 _ => Ok(Expression::Function(f)),
18113 },
18114 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
18115 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
18116 DialectType::ClickHouse => {
18117 let mut new_f = *f;
18118 new_f.name = "farmFingerprint64".to_string();
18119 Ok(Expression::Function(Box::new(new_f)))
18120 }
18121 DialectType::Redshift => {
18122 let mut new_f = *f;
18123 new_f.name = "FARMFINGERPRINT64".to_string();
18124 Ok(Expression::Function(Box::new(new_f)))
18125 }
18126 _ => Ok(Expression::Function(f)),
18127 },
18128 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
18129 "JSON_KEYS" => match target {
18130 DialectType::Databricks | DialectType::Spark => {
18131 let mut new_f = *f;
18132 new_f.name = "JSON_OBJECT_KEYS".to_string();
18133 Ok(Expression::Function(Box::new(new_f)))
18134 }
18135 DialectType::Snowflake => {
18136 let mut new_f = *f;
18137 new_f.name = "OBJECT_KEYS".to_string();
18138 Ok(Expression::Function(Box::new(new_f)))
18139 }
18140 _ => Ok(Expression::Function(f)),
18141 },
18142 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
18143 "WEEKOFYEAR" => match target {
18144 DialectType::Snowflake => {
18145 let mut new_f = *f;
18146 new_f.name = "WEEKISO".to_string();
18147 Ok(Expression::Function(Box::new(new_f)))
18148 }
18149 _ => Ok(Expression::Function(f)),
18150 },
18151 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
18152 "FORMAT"
18153 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
18154 {
18155 match target {
18156 DialectType::Databricks | DialectType::Spark => {
18157 let mut new_f = *f;
18158 new_f.name = "FORMAT_STRING".to_string();
18159 Ok(Expression::Function(Box::new(new_f)))
18160 }
18161 _ => Ok(Expression::Function(f)),
18162 }
18163 }
18164 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
18165 "CONCAT_WS" if f.args.len() >= 2 => match target {
18166 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18167 let mut args = f.args;
18168 let sep = args.remove(0);
18169 let cast_args: Vec<Expression> = args
18170 .into_iter()
18171 .map(|a| {
18172 Expression::Cast(Box::new(Cast {
18173 this: a,
18174 to: DataType::VarChar {
18175 length: None,
18176 parenthesized_length: false,
18177 },
18178 double_colon_syntax: false,
18179 trailing_comments: Vec::new(),
18180 format: None,
18181 default: None,
18182 inferred_type: None,
18183 }))
18184 })
18185 .collect();
18186 let mut new_args = vec![sep];
18187 new_args.extend(cast_args);
18188 Ok(Expression::Function(Box::new(Function::new(
18189 "CONCAT_WS".to_string(),
18190 new_args,
18191 ))))
18192 }
18193 _ => Ok(Expression::Function(f)),
18194 },
18195 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
18196 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
18197 DialectType::Presto
18198 | DialectType::Trino
18199 | DialectType::Athena
18200 | DialectType::Databricks
18201 | DialectType::Spark => {
18202 let mut new_f = *f;
18203 new_f.name = "SLICE".to_string();
18204 Ok(Expression::Function(Box::new(new_f)))
18205 }
18206 DialectType::ClickHouse => {
18207 let mut new_f = *f;
18208 new_f.name = "arraySlice".to_string();
18209 Ok(Expression::Function(Box::new(new_f)))
18210 }
18211 _ => Ok(Expression::Function(f)),
18212 },
18213 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
18214 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
18215 DialectType::DuckDB => {
18216 let mut args = f.args;
18217 let arr = args.remove(0);
18218 let val = args.remove(0);
18219 Ok(Expression::Function(Box::new(Function::new(
18220 "LIST_PREPEND".to_string(),
18221 vec![val, arr],
18222 ))))
18223 }
18224 _ => Ok(Expression::Function(f)),
18225 },
18226 // ARRAY_REMOVE(arr, target) -> dialect-specific
18227 "ARRAY_REMOVE" if f.args.len() == 2 => {
18228 match target {
18229 DialectType::DuckDB => {
18230 let mut args = f.args;
18231 let arr = args.remove(0);
18232 let target_val = args.remove(0);
18233 let u_id = crate::expressions::Identifier::new("_u");
18234 // LIST_FILTER(arr, _u -> _u <> target)
18235 let lambda = Expression::Lambda(Box::new(
18236 crate::expressions::LambdaExpr {
18237 parameters: vec![u_id.clone()],
18238 body: Expression::Neq(Box::new(BinaryOp {
18239 left: Expression::Identifier(u_id),
18240 right: target_val,
18241 left_comments: Vec::new(),
18242 operator_comments: Vec::new(),
18243 trailing_comments: Vec::new(),
18244 inferred_type: None,
18245 })),
18246 colon: false,
18247 parameter_types: Vec::new(),
18248 },
18249 ));
18250 Ok(Expression::Function(Box::new(Function::new(
18251 "LIST_FILTER".to_string(),
18252 vec![arr, lambda],
18253 ))))
18254 }
18255 DialectType::ClickHouse => {
18256 let mut args = f.args;
18257 let arr = args.remove(0);
18258 let target_val = args.remove(0);
18259 let u_id = crate::expressions::Identifier::new("_u");
18260 // arrayFilter(_u -> _u <> target, arr)
18261 let lambda = Expression::Lambda(Box::new(
18262 crate::expressions::LambdaExpr {
18263 parameters: vec![u_id.clone()],
18264 body: Expression::Neq(Box::new(BinaryOp {
18265 left: Expression::Identifier(u_id),
18266 right: target_val,
18267 left_comments: Vec::new(),
18268 operator_comments: Vec::new(),
18269 trailing_comments: Vec::new(),
18270 inferred_type: None,
18271 })),
18272 colon: false,
18273 parameter_types: Vec::new(),
18274 },
18275 ));
18276 Ok(Expression::Function(Box::new(Function::new(
18277 "arrayFilter".to_string(),
18278 vec![lambda, arr],
18279 ))))
18280 }
18281 DialectType::BigQuery => {
18282 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
18283 let mut args = f.args;
18284 let arr = args.remove(0);
18285 let target_val = args.remove(0);
18286 let u_id = crate::expressions::Identifier::new("_u");
18287 let u_col =
18288 Expression::Column(crate::expressions::Column {
18289 name: u_id.clone(),
18290 table: None,
18291 join_mark: false,
18292 trailing_comments: Vec::new(),
18293 span: None,
18294 inferred_type: None,
18295 });
18296 // UNNEST(the_array) AS _u
18297 let unnest_expr = Expression::Unnest(Box::new(
18298 crate::expressions::UnnestFunc {
18299 this: arr,
18300 expressions: Vec::new(),
18301 with_ordinality: false,
18302 alias: None,
18303 offset_alias: None,
18304 },
18305 ));
18306 let aliased_unnest = Expression::Alias(Box::new(
18307 crate::expressions::Alias {
18308 this: unnest_expr,
18309 alias: u_id.clone(),
18310 column_aliases: Vec::new(),
18311 pre_alias_comments: Vec::new(),
18312 trailing_comments: Vec::new(),
18313 inferred_type: None,
18314 },
18315 ));
18316 // _u <> target
18317 let where_cond = Expression::Neq(Box::new(BinaryOp {
18318 left: u_col.clone(),
18319 right: target_val,
18320 left_comments: Vec::new(),
18321 operator_comments: Vec::new(),
18322 trailing_comments: Vec::new(),
18323 inferred_type: None,
18324 }));
18325 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
18326 let subquery = Expression::Select(Box::new(
18327 crate::expressions::Select::new()
18328 .column(u_col)
18329 .from(aliased_unnest)
18330 .where_(where_cond),
18331 ));
18332 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
18333 Ok(Expression::ArrayFunc(Box::new(
18334 crate::expressions::ArrayConstructor {
18335 expressions: vec![subquery],
18336 bracket_notation: false,
18337 use_list_keyword: false,
18338 },
18339 )))
18340 }
18341 _ => Ok(Expression::Function(f)),
18342 }
18343 }
18344 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
18345 "PARSE_JSON" if f.args.len() == 1 => {
18346 match target {
18347 DialectType::SQLite
18348 | DialectType::Doris
18349 | DialectType::MySQL
18350 | DialectType::StarRocks => {
18351 // Strip PARSE_JSON, return the inner argument
18352 Ok(f.args.into_iter().next().unwrap())
18353 }
18354 _ => Ok(Expression::Function(f)),
18355 }
18356 }
18357 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
18358 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
18359 "JSON_REMOVE" => Ok(Expression::Function(f)),
18360 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
18361 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
18362 "JSON_SET" => Ok(Expression::Function(f)),
18363 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
18364 // Behavior per search value type:
18365 // NULL literal -> CASE WHEN x IS NULL THEN result
18366 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
18367 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
18368 "DECODE" if f.args.len() >= 3 => {
18369 // Keep as DECODE for targets that support it natively
18370 let keep_as_decode = matches!(
18371 target,
18372 DialectType::Oracle
18373 | DialectType::Snowflake
18374 | DialectType::Redshift
18375 | DialectType::Teradata
18376 | DialectType::Spark
18377 | DialectType::Databricks
18378 );
18379 if keep_as_decode {
18380 return Ok(Expression::Function(f));
18381 }
18382
18383 let mut args = f.args;
18384 let this_expr = args.remove(0);
18385 let mut pairs = Vec::new();
18386 let mut default = None;
18387 let mut i = 0;
18388 while i + 1 < args.len() {
18389 pairs.push((args[i].clone(), args[i + 1].clone()));
18390 i += 2;
18391 }
18392 if i < args.len() {
18393 default = Some(args[i].clone());
18394 }
18395 // Helper: check if expression is a literal value
18396 fn is_literal(e: &Expression) -> bool {
18397 matches!(
18398 e,
18399 Expression::Literal(_)
18400 | Expression::Boolean(_)
18401 | Expression::Neg(_)
18402 )
18403 }
18404 let whens: Vec<(Expression, Expression)> = pairs
18405 .into_iter()
18406 .map(|(search, result)| {
18407 if matches!(&search, Expression::Null(_)) {
18408 // NULL search -> IS NULL
18409 let condition = Expression::Is(Box::new(BinaryOp {
18410 left: this_expr.clone(),
18411 right: Expression::Null(crate::expressions::Null),
18412 left_comments: Vec::new(),
18413 operator_comments: Vec::new(),
18414 trailing_comments: Vec::new(),
18415 inferred_type: None,
18416 }));
18417 (condition, result)
18418 } else if is_literal(&search) {
18419 // Literal search -> simple equality
18420 let eq = Expression::Eq(Box::new(BinaryOp {
18421 left: this_expr.clone(),
18422 right: search,
18423 left_comments: Vec::new(),
18424 operator_comments: Vec::new(),
18425 trailing_comments: Vec::new(),
18426 inferred_type: None,
18427 }));
18428 (eq, result)
18429 } else {
18430 // Non-literal (column ref, expression) -> null-safe comparison
18431 let needs_paren = matches!(
18432 &search,
18433 Expression::Eq(_)
18434 | Expression::Neq(_)
18435 | Expression::Gt(_)
18436 | Expression::Gte(_)
18437 | Expression::Lt(_)
18438 | Expression::Lte(_)
18439 );
18440 let search_for_eq = if needs_paren {
18441 Expression::Paren(Box::new(
18442 crate::expressions::Paren {
18443 this: search.clone(),
18444 trailing_comments: Vec::new(),
18445 },
18446 ))
18447 } else {
18448 search.clone()
18449 };
18450 let eq = Expression::Eq(Box::new(BinaryOp {
18451 left: this_expr.clone(),
18452 right: search_for_eq,
18453 left_comments: Vec::new(),
18454 operator_comments: Vec::new(),
18455 trailing_comments: Vec::new(),
18456 inferred_type: None,
18457 }));
18458 let search_for_null = if needs_paren {
18459 Expression::Paren(Box::new(
18460 crate::expressions::Paren {
18461 this: search.clone(),
18462 trailing_comments: Vec::new(),
18463 },
18464 ))
18465 } else {
18466 search.clone()
18467 };
18468 let x_is_null = Expression::Is(Box::new(BinaryOp {
18469 left: this_expr.clone(),
18470 right: Expression::Null(crate::expressions::Null),
18471 left_comments: Vec::new(),
18472 operator_comments: Vec::new(),
18473 trailing_comments: Vec::new(),
18474 inferred_type: None,
18475 }));
18476 let s_is_null = Expression::Is(Box::new(BinaryOp {
18477 left: search_for_null,
18478 right: Expression::Null(crate::expressions::Null),
18479 left_comments: Vec::new(),
18480 operator_comments: Vec::new(),
18481 trailing_comments: Vec::new(),
18482 inferred_type: None,
18483 }));
18484 let both_null = Expression::And(Box::new(BinaryOp {
18485 left: x_is_null,
18486 right: s_is_null,
18487 left_comments: Vec::new(),
18488 operator_comments: Vec::new(),
18489 trailing_comments: Vec::new(),
18490 inferred_type: None,
18491 }));
18492 let condition = Expression::Or(Box::new(BinaryOp {
18493 left: eq,
18494 right: Expression::Paren(Box::new(
18495 crate::expressions::Paren {
18496 this: both_null,
18497 trailing_comments: Vec::new(),
18498 },
18499 )),
18500 left_comments: Vec::new(),
18501 operator_comments: Vec::new(),
18502 trailing_comments: Vec::new(),
18503 inferred_type: None,
18504 }));
18505 (condition, result)
18506 }
18507 })
18508 .collect();
18509 Ok(Expression::Case(Box::new(Case {
18510 operand: None,
18511 whens,
18512 else_: default,
18513 comments: Vec::new(),
18514 inferred_type: None,
18515 })))
18516 }
18517 // LEVENSHTEIN(a, b, ...) -> dialect-specific
18518 "LEVENSHTEIN" => {
18519 match target {
18520 DialectType::BigQuery => {
18521 let mut new_f = *f;
18522 new_f.name = "EDIT_DISTANCE".to_string();
18523 Ok(Expression::Function(Box::new(new_f)))
18524 }
18525 DialectType::Drill => {
18526 let mut new_f = *f;
18527 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
18528 Ok(Expression::Function(Box::new(new_f)))
18529 }
18530 DialectType::PostgreSQL if f.args.len() == 6 => {
18531 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
18532 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
18533 let mut new_f = *f;
18534 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
18535 Ok(Expression::Function(Box::new(new_f)))
18536 }
18537 _ => Ok(Expression::Function(f)),
18538 }
18539 }
18540 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
18541 "ARRAY_MAX" => {
18542 let name = match target {
18543 DialectType::ClickHouse => "arrayMax",
18544 DialectType::DuckDB => "LIST_MAX",
18545 _ => "ARRAY_MAX",
18546 };
18547 let mut new_f = *f;
18548 new_f.name = name.to_string();
18549 Ok(Expression::Function(Box::new(new_f)))
18550 }
18551 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
18552 "ARRAY_MIN" => {
18553 let name = match target {
18554 DialectType::ClickHouse => "arrayMin",
18555 DialectType::DuckDB => "LIST_MIN",
18556 _ => "ARRAY_MIN",
18557 };
18558 let mut new_f = *f;
18559 new_f.name = name.to_string();
18560 Ok(Expression::Function(Box::new(new_f)))
18561 }
18562 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
18563 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
18564 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
18565 let mut args = f.args;
18566 let b = args.pop().unwrap();
18567 let a = args.pop().unwrap();
18568 match target {
18569 DialectType::ClickHouse => {
18570 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
18571 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
18572 Ok(Expression::Function(Box::new(Function::new(
18573 "jaroWinklerSimilarity".to_string(),
18574 vec![upper_a, upper_b],
18575 ))))
18576 }
18577 DialectType::DuckDB => {
18578 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
18579 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
18580 Ok(Expression::Function(Box::new(Function::new(
18581 "JARO_WINKLER_SIMILARITY".to_string(),
18582 vec![upper_a, upper_b],
18583 ))))
18584 }
18585 _ => {
18586 Ok(Expression::Function(Box::new(Function::new(
18587 "JAROWINKLER_SIMILARITY".to_string(),
18588 vec![a, b],
18589 ))))
18590 }
18591 }
18592 }
18593 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
18594 "CURRENT_SCHEMAS" => match target {
18595 DialectType::Snowflake => {
18596 Ok(Expression::Function(Box::new(Function::new(
18597 "CURRENT_SCHEMAS".to_string(),
18598 vec![],
18599 ))))
18600 }
18601 _ => Ok(Expression::Function(f)),
18602 },
18603 // TRUNC/TRUNCATE (numeric) -> dialect-specific
18604 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
18605 match target {
18606 DialectType::TSQL | DialectType::Fabric => {
18607 // ROUND(x, decimals, 1) - the 1 flag means truncation
18608 let mut args = f.args;
18609 let this = if args.is_empty() {
18610 return Ok(Expression::Function(Box::new(Function::new(
18611 "TRUNC".to_string(), args,
18612 ))));
18613 } else {
18614 args.remove(0)
18615 };
18616 let decimals = if args.is_empty() {
18617 Expression::Literal(Literal::Number("0".to_string()))
18618 } else {
18619 args.remove(0)
18620 };
18621 Ok(Expression::Function(Box::new(Function::new(
18622 "ROUND".to_string(),
18623 vec![this, decimals, Expression::Literal(Literal::Number("1".to_string()))],
18624 ))))
18625 }
18626 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18627 // TRUNCATE(x, decimals)
18628 let mut new_f = *f;
18629 new_f.name = "TRUNCATE".to_string();
18630 Ok(Expression::Function(Box::new(new_f)))
18631 }
18632 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
18633 // TRUNCATE(x, decimals)
18634 let mut new_f = *f;
18635 new_f.name = "TRUNCATE".to_string();
18636 Ok(Expression::Function(Box::new(new_f)))
18637 }
18638 DialectType::DuckDB => {
18639 // TRUNC(x) - drop decimals
18640 let this = f.args.into_iter().next().unwrap_or(
18641 Expression::Literal(Literal::Number("0".to_string()))
18642 );
18643 Ok(Expression::Function(Box::new(Function::new(
18644 "TRUNC".to_string(),
18645 vec![this],
18646 ))))
18647 }
18648 DialectType::ClickHouse => {
18649 // trunc(x, decimals) - lowercase
18650 let mut new_f = *f;
18651 new_f.name = "trunc".to_string();
18652 Ok(Expression::Function(Box::new(new_f)))
18653 }
18654 DialectType::Spark | DialectType::Databricks => {
18655 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
18656 let this = f.args.into_iter().next().unwrap_or(
18657 Expression::Literal(Literal::Number("0".to_string()))
18658 );
18659 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
18660 this,
18661 to: crate::expressions::DataType::BigInt { length: None },
18662 double_colon_syntax: false,
18663 trailing_comments: Vec::new(),
18664 format: None,
18665 default: None,
18666 inferred_type: None,
18667 })))
18668 }
18669 _ => {
18670 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
18671 let mut new_f = *f;
18672 new_f.name = "TRUNC".to_string();
18673 Ok(Expression::Function(Box::new(new_f)))
18674 }
18675 }
18676 }
18677 // CURRENT_VERSION() -> VERSION() for most dialects
18678 "CURRENT_VERSION" => match target {
18679 DialectType::Snowflake
18680 | DialectType::Databricks
18681 | DialectType::StarRocks => {
18682 Ok(Expression::Function(f))
18683 }
18684 DialectType::SQLite => {
18685 let mut new_f = *f;
18686 new_f.name = "SQLITE_VERSION".to_string();
18687 Ok(Expression::Function(Box::new(new_f)))
18688 }
18689 _ => {
18690 let mut new_f = *f;
18691 new_f.name = "VERSION".to_string();
18692 Ok(Expression::Function(Box::new(new_f)))
18693 }
18694 },
18695 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
18696 "ARRAY_REVERSE" => match target {
18697 DialectType::ClickHouse => {
18698 let mut new_f = *f;
18699 new_f.name = "arrayReverse".to_string();
18700 Ok(Expression::Function(Box::new(new_f)))
18701 }
18702 _ => Ok(Expression::Function(f)),
18703 },
18704 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
18705 "GENERATE_DATE_ARRAY" => {
18706 let mut args = f.args;
18707 if matches!(target, DialectType::BigQuery) {
18708 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
18709 if args.len() == 2 {
18710 let default_interval = Expression::Interval(Box::new(
18711 crate::expressions::Interval {
18712 this: Some(Expression::Literal(Literal::String(
18713 "1".to_string(),
18714 ))),
18715 unit: Some(
18716 crate::expressions::IntervalUnitSpec::Simple {
18717 unit: crate::expressions::IntervalUnit::Day,
18718 use_plural: false,
18719 },
18720 ),
18721 },
18722 ));
18723 args.push(default_interval);
18724 }
18725 Ok(Expression::Function(Box::new(Function::new(
18726 "GENERATE_DATE_ARRAY".to_string(),
18727 args,
18728 ))))
18729 } else if matches!(target, DialectType::DuckDB) {
18730 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
18731 let start = args.get(0).cloned();
18732 let end = args.get(1).cloned();
18733 let step = args.get(2).cloned().or_else(|| {
18734 Some(Expression::Interval(Box::new(
18735 crate::expressions::Interval {
18736 this: Some(Expression::Literal(Literal::String(
18737 "1".to_string(),
18738 ))),
18739 unit: Some(
18740 crate::expressions::IntervalUnitSpec::Simple {
18741 unit: crate::expressions::IntervalUnit::Day,
18742 use_plural: false,
18743 },
18744 ),
18745 },
18746 )))
18747 });
18748 let gen_series = Expression::GenerateSeries(Box::new(
18749 crate::expressions::GenerateSeries {
18750 start: start.map(Box::new),
18751 end: end.map(Box::new),
18752 step: step.map(Box::new),
18753 is_end_exclusive: None,
18754 },
18755 ));
18756 Ok(Expression::Cast(Box::new(Cast {
18757 this: gen_series,
18758 to: DataType::Array {
18759 element_type: Box::new(DataType::Date),
18760 dimension: None,
18761 },
18762 trailing_comments: vec![],
18763 double_colon_syntax: false,
18764 format: None,
18765 default: None,
18766 inferred_type: None,
18767 })))
18768 } else if matches!(
18769 target,
18770 DialectType::Presto | DialectType::Trino | DialectType::Athena
18771 ) {
18772 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
18773 let start = args.get(0).cloned();
18774 let end = args.get(1).cloned();
18775 let step = args.get(2).cloned().or_else(|| {
18776 Some(Expression::Interval(Box::new(
18777 crate::expressions::Interval {
18778 this: Some(Expression::Literal(Literal::String(
18779 "1".to_string(),
18780 ))),
18781 unit: Some(
18782 crate::expressions::IntervalUnitSpec::Simple {
18783 unit: crate::expressions::IntervalUnit::Day,
18784 use_plural: false,
18785 },
18786 ),
18787 },
18788 )))
18789 });
18790 let gen_series = Expression::GenerateSeries(Box::new(
18791 crate::expressions::GenerateSeries {
18792 start: start.map(Box::new),
18793 end: end.map(Box::new),
18794 step: step.map(Box::new),
18795 is_end_exclusive: None,
18796 },
18797 ));
18798 Ok(gen_series)
18799 } else if matches!(
18800 target,
18801 DialectType::Spark | DialectType::Databricks
18802 ) {
18803 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
18804 let start = args.get(0).cloned();
18805 let end = args.get(1).cloned();
18806 let step = args.get(2).cloned().or_else(|| {
18807 Some(Expression::Interval(Box::new(
18808 crate::expressions::Interval {
18809 this: Some(Expression::Literal(Literal::String(
18810 "1".to_string(),
18811 ))),
18812 unit: Some(
18813 crate::expressions::IntervalUnitSpec::Simple {
18814 unit: crate::expressions::IntervalUnit::Day,
18815 use_plural: false,
18816 },
18817 ),
18818 },
18819 )))
18820 });
18821 let gen_series = Expression::GenerateSeries(Box::new(
18822 crate::expressions::GenerateSeries {
18823 start: start.map(Box::new),
18824 end: end.map(Box::new),
18825 step: step.map(Box::new),
18826 is_end_exclusive: None,
18827 },
18828 ));
18829 Ok(gen_series)
18830 } else if matches!(target, DialectType::Snowflake) {
18831 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
18832 if args.len() == 2 {
18833 let default_interval = Expression::Interval(Box::new(
18834 crate::expressions::Interval {
18835 this: Some(Expression::Literal(Literal::String(
18836 "1".to_string(),
18837 ))),
18838 unit: Some(
18839 crate::expressions::IntervalUnitSpec::Simple {
18840 unit: crate::expressions::IntervalUnit::Day,
18841 use_plural: false,
18842 },
18843 ),
18844 },
18845 ));
18846 args.push(default_interval);
18847 }
18848 Ok(Expression::Function(Box::new(Function::new(
18849 "GENERATE_DATE_ARRAY".to_string(),
18850 args,
18851 ))))
18852 } else if matches!(
18853 target,
18854 DialectType::MySQL
18855 | DialectType::TSQL
18856 | DialectType::Fabric
18857 | DialectType::Redshift
18858 ) {
18859 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
18860 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
18861 Ok(Expression::Function(Box::new(Function::new(
18862 "GENERATE_DATE_ARRAY".to_string(),
18863 args,
18864 ))))
18865 } else {
18866 // PostgreSQL/others: convert to GenerateSeries
18867 let start = args.get(0).cloned();
18868 let end = args.get(1).cloned();
18869 let step = args.get(2).cloned().or_else(|| {
18870 Some(Expression::Interval(Box::new(
18871 crate::expressions::Interval {
18872 this: Some(Expression::Literal(Literal::String(
18873 "1".to_string(),
18874 ))),
18875 unit: Some(
18876 crate::expressions::IntervalUnitSpec::Simple {
18877 unit: crate::expressions::IntervalUnit::Day,
18878 use_plural: false,
18879 },
18880 ),
18881 },
18882 )))
18883 });
18884 Ok(Expression::GenerateSeries(Box::new(
18885 crate::expressions::GenerateSeries {
18886 start: start.map(Box::new),
18887 end: end.map(Box::new),
18888 step: step.map(Box::new),
18889 is_end_exclusive: None,
18890 },
18891 )))
18892 }
18893 }
18894 _ => Ok(Expression::Function(f)),
18895 }
18896 } else if let Expression::AggregateFunction(mut af) = e {
18897 let name = af.name.to_uppercase();
18898 match name.as_str() {
18899 "ARBITRARY" if af.args.len() == 1 => {
18900 let arg = af.args.into_iter().next().unwrap();
18901 Ok(convert_arbitrary(arg, target))
18902 }
18903 "JSON_ARRAYAGG" => {
18904 match target {
18905 DialectType::PostgreSQL => {
18906 af.name = "JSON_AGG".to_string();
18907 // Add NULLS FIRST to ORDER BY items for PostgreSQL
18908 for ordered in af.order_by.iter_mut() {
18909 if ordered.nulls_first.is_none() {
18910 ordered.nulls_first = Some(true);
18911 }
18912 }
18913 Ok(Expression::AggregateFunction(af))
18914 }
18915 _ => Ok(Expression::AggregateFunction(af)),
18916 }
18917 }
18918 _ => Ok(Expression::AggregateFunction(af)),
18919 }
18920 } else if let Expression::JSONArrayAgg(ja) = e {
18921 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
18922 match target {
18923 DialectType::PostgreSQL => {
18924 let mut order_by = Vec::new();
18925 if let Some(order_expr) = ja.order {
18926 if let Expression::OrderBy(ob) = *order_expr {
18927 for mut ordered in ob.expressions {
18928 if ordered.nulls_first.is_none() {
18929 ordered.nulls_first = Some(true);
18930 }
18931 order_by.push(ordered);
18932 }
18933 }
18934 }
18935 Ok(Expression::AggregateFunction(Box::new(
18936 crate::expressions::AggregateFunction {
18937 name: "JSON_AGG".to_string(),
18938 args: vec![*ja.this],
18939 distinct: false,
18940 filter: None,
18941 order_by,
18942 limit: None,
18943 ignore_nulls: None,
18944 inferred_type: None,
18945 },
18946 )))
18947 }
18948 _ => Ok(Expression::JSONArrayAgg(ja)),
18949 }
18950 } else if let Expression::ToNumber(tn) = e {
18951 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
18952 let arg = *tn.this;
18953 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
18954 this: arg,
18955 to: crate::expressions::DataType::Double {
18956 precision: None,
18957 scale: None,
18958 },
18959 double_colon_syntax: false,
18960 trailing_comments: Vec::new(),
18961 format: None,
18962 default: None,
18963 inferred_type: None,
18964 })))
18965 } else {
18966 Ok(e)
18967 }
18968 }
18969
18970 Action::RegexpLikeToDuckDB => {
18971 if let Expression::RegexpLike(f) = e {
18972 let mut args = vec![f.this, f.pattern];
18973 if let Some(flags) = f.flags {
18974 args.push(flags);
18975 }
18976 Ok(Expression::Function(Box::new(Function::new(
18977 "REGEXP_MATCHES".to_string(),
18978 args,
18979 ))))
18980 } else {
18981 Ok(e)
18982 }
18983 }
18984 Action::EpochConvert => {
18985 if let Expression::Epoch(f) = e {
18986 let arg = f.this;
18987 let name = match target {
18988 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
18989 "UNIX_TIMESTAMP"
18990 }
18991 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
18992 DialectType::BigQuery => "TIME_TO_UNIX",
18993 _ => "EPOCH",
18994 };
18995 Ok(Expression::Function(Box::new(Function::new(
18996 name.to_string(),
18997 vec![arg],
18998 ))))
18999 } else {
19000 Ok(e)
19001 }
19002 }
19003 Action::EpochMsConvert => {
19004 use crate::expressions::{BinaryOp, Cast};
19005 if let Expression::EpochMs(f) = e {
19006 let arg = f.this;
19007 match target {
19008 DialectType::Spark | DialectType::Databricks => {
19009 Ok(Expression::Function(Box::new(Function::new(
19010 "TIMESTAMP_MILLIS".to_string(),
19011 vec![arg],
19012 ))))
19013 }
19014 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19015 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
19016 ))),
19017 DialectType::Presto | DialectType::Trino => {
19018 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
19019 let cast_arg = Expression::Cast(Box::new(Cast {
19020 this: arg,
19021 to: DataType::Double {
19022 precision: None,
19023 scale: None,
19024 },
19025 trailing_comments: Vec::new(),
19026 double_colon_syntax: false,
19027 format: None,
19028 default: None,
19029 inferred_type: None,
19030 }));
19031 let div = Expression::Div(Box::new(BinaryOp::new(
19032 cast_arg,
19033 Expression::Function(Box::new(Function::new(
19034 "POW".to_string(),
19035 vec![Expression::number(10), Expression::number(3)],
19036 ))),
19037 )));
19038 Ok(Expression::Function(Box::new(Function::new(
19039 "FROM_UNIXTIME".to_string(),
19040 vec![div],
19041 ))))
19042 }
19043 DialectType::MySQL => {
19044 // FROM_UNIXTIME(x / POWER(10, 3))
19045 let div = Expression::Div(Box::new(BinaryOp::new(
19046 arg,
19047 Expression::Function(Box::new(Function::new(
19048 "POWER".to_string(),
19049 vec![Expression::number(10), Expression::number(3)],
19050 ))),
19051 )));
19052 Ok(Expression::Function(Box::new(Function::new(
19053 "FROM_UNIXTIME".to_string(),
19054 vec![div],
19055 ))))
19056 }
19057 DialectType::PostgreSQL | DialectType::Redshift => {
19058 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
19059 let cast_arg = Expression::Cast(Box::new(Cast {
19060 this: arg,
19061 to: DataType::Custom {
19062 name: "DOUBLE PRECISION".to_string(),
19063 },
19064 trailing_comments: Vec::new(),
19065 double_colon_syntax: false,
19066 format: None,
19067 default: None,
19068 inferred_type: None,
19069 }));
19070 let div = Expression::Div(Box::new(BinaryOp::new(
19071 cast_arg,
19072 Expression::Function(Box::new(Function::new(
19073 "POWER".to_string(),
19074 vec![Expression::number(10), Expression::number(3)],
19075 ))),
19076 )));
19077 Ok(Expression::Function(Box::new(Function::new(
19078 "TO_TIMESTAMP".to_string(),
19079 vec![div],
19080 ))))
19081 }
19082 DialectType::ClickHouse => {
19083 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
19084 let cast_arg = Expression::Cast(Box::new(Cast {
19085 this: arg,
19086 to: DataType::Nullable {
19087 inner: Box::new(DataType::BigInt { length: None }),
19088 },
19089 trailing_comments: Vec::new(),
19090 double_colon_syntax: false,
19091 format: None,
19092 default: None,
19093 inferred_type: None,
19094 }));
19095 Ok(Expression::Function(Box::new(Function::new(
19096 "fromUnixTimestamp64Milli".to_string(),
19097 vec![cast_arg],
19098 ))))
19099 }
19100 _ => Ok(Expression::Function(Box::new(Function::new(
19101 "EPOCH_MS".to_string(),
19102 vec![arg],
19103 )))),
19104 }
19105 } else {
19106 Ok(e)
19107 }
19108 }
19109 Action::TSQLTypeNormalize => {
19110 if let Expression::DataType(dt) = e {
19111 let new_dt = match &dt {
19112 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
19113 DataType::Decimal {
19114 precision: Some(15),
19115 scale: Some(4),
19116 }
19117 }
19118 DataType::Custom { name }
19119 if name.eq_ignore_ascii_case("SMALLMONEY") =>
19120 {
19121 DataType::Decimal {
19122 precision: Some(6),
19123 scale: Some(4),
19124 }
19125 }
19126 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
19127 DataType::Timestamp {
19128 timezone: false,
19129 precision: None,
19130 }
19131 }
19132 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
19133 DataType::Float {
19134 precision: None,
19135 scale: None,
19136 real_spelling: false,
19137 }
19138 }
19139 DataType::Float {
19140 real_spelling: true,
19141 ..
19142 } => DataType::Float {
19143 precision: None,
19144 scale: None,
19145 real_spelling: false,
19146 },
19147 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
19148 DataType::Custom {
19149 name: "BLOB".to_string(),
19150 }
19151 }
19152 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
19153 DataType::Boolean
19154 }
19155 DataType::Custom { name }
19156 if name.eq_ignore_ascii_case("ROWVERSION") =>
19157 {
19158 DataType::Custom {
19159 name: "BINARY".to_string(),
19160 }
19161 }
19162 DataType::Custom { name }
19163 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
19164 {
19165 match target {
19166 DialectType::Spark
19167 | DialectType::Databricks
19168 | DialectType::Hive => DataType::Custom {
19169 name: "STRING".to_string(),
19170 },
19171 _ => DataType::VarChar {
19172 length: Some(36),
19173 parenthesized_length: true,
19174 },
19175 }
19176 }
19177 DataType::Custom { name }
19178 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
19179 {
19180 match target {
19181 DialectType::Spark
19182 | DialectType::Databricks
19183 | DialectType::Hive => DataType::Timestamp {
19184 timezone: false,
19185 precision: None,
19186 },
19187 _ => DataType::Timestamp {
19188 timezone: true,
19189 precision: None,
19190 },
19191 }
19192 }
19193 DataType::Custom { ref name }
19194 if name.to_uppercase().starts_with("DATETIME2(") =>
19195 {
19196 // DATETIME2(n) -> TIMESTAMP
19197 DataType::Timestamp {
19198 timezone: false,
19199 precision: None,
19200 }
19201 }
19202 DataType::Custom { ref name }
19203 if name.to_uppercase().starts_with("TIME(") =>
19204 {
19205 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
19206 match target {
19207 DialectType::Spark
19208 | DialectType::Databricks
19209 | DialectType::Hive => DataType::Timestamp {
19210 timezone: false,
19211 precision: None,
19212 },
19213 _ => return Ok(Expression::DataType(dt)),
19214 }
19215 }
19216 DataType::Custom { ref name }
19217 if name.to_uppercase().starts_with("NUMERIC") =>
19218 {
19219 // Parse NUMERIC(p,s) back to Decimal(p,s)
19220 let upper = name.to_uppercase();
19221 if let Some(inner) = upper
19222 .strip_prefix("NUMERIC(")
19223 .and_then(|s| s.strip_suffix(')'))
19224 {
19225 let parts: Vec<&str> = inner.split(',').collect();
19226 let precision =
19227 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
19228 let scale =
19229 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
19230 DataType::Decimal { precision, scale }
19231 } else if upper == "NUMERIC" {
19232 DataType::Decimal {
19233 precision: None,
19234 scale: None,
19235 }
19236 } else {
19237 return Ok(Expression::DataType(dt));
19238 }
19239 }
19240 DataType::Float {
19241 precision: Some(p), ..
19242 } => {
19243 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
19244 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
19245 let boundary = match target {
19246 DialectType::Hive
19247 | DialectType::Spark
19248 | DialectType::Databricks => 32,
19249 _ => 24,
19250 };
19251 if *p <= boundary {
19252 DataType::Float {
19253 precision: None,
19254 scale: None,
19255 real_spelling: false,
19256 }
19257 } else {
19258 DataType::Double {
19259 precision: None,
19260 scale: None,
19261 }
19262 }
19263 }
19264 DataType::TinyInt { .. } => match target {
19265 DialectType::DuckDB => DataType::Custom {
19266 name: "UTINYINT".to_string(),
19267 },
19268 DialectType::Hive
19269 | DialectType::Spark
19270 | DialectType::Databricks => DataType::SmallInt { length: None },
19271 _ => return Ok(Expression::DataType(dt)),
19272 },
19273 // INTEGER -> INT for Spark/Databricks
19274 DataType::Int {
19275 length,
19276 integer_spelling: true,
19277 } => DataType::Int {
19278 length: *length,
19279 integer_spelling: false,
19280 },
19281 _ => return Ok(Expression::DataType(dt)),
19282 };
19283 Ok(Expression::DataType(new_dt))
19284 } else {
19285 Ok(e)
19286 }
19287 }
19288 Action::MySQLSafeDivide => {
19289 use crate::expressions::{BinaryOp, Cast};
19290 if let Expression::Div(op) = e {
19291 let left = op.left;
19292 let right = op.right;
19293 // For SQLite: CAST left as REAL but NO NULLIF wrapping
19294 if matches!(target, DialectType::SQLite) {
19295 let new_left = Expression::Cast(Box::new(Cast {
19296 this: left,
19297 to: DataType::Float {
19298 precision: None,
19299 scale: None,
19300 real_spelling: true,
19301 },
19302 trailing_comments: Vec::new(),
19303 double_colon_syntax: false,
19304 format: None,
19305 default: None,
19306 inferred_type: None,
19307 }));
19308 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
19309 }
19310 // Wrap right in NULLIF(right, 0)
19311 let nullif_right = Expression::Function(Box::new(Function::new(
19312 "NULLIF".to_string(),
19313 vec![right, Expression::number(0)],
19314 )));
19315 // For some dialects, also CAST the left side
19316 let new_left = match target {
19317 DialectType::PostgreSQL
19318 | DialectType::Redshift
19319 | DialectType::Teradata
19320 | DialectType::Materialize
19321 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
19322 this: left,
19323 to: DataType::Custom {
19324 name: "DOUBLE PRECISION".to_string(),
19325 },
19326 trailing_comments: Vec::new(),
19327 double_colon_syntax: false,
19328 format: None,
19329 default: None,
19330 inferred_type: None,
19331 })),
19332 DialectType::Drill
19333 | DialectType::Trino
19334 | DialectType::Presto
19335 | DialectType::Athena => Expression::Cast(Box::new(Cast {
19336 this: left,
19337 to: DataType::Double {
19338 precision: None,
19339 scale: None,
19340 },
19341 trailing_comments: Vec::new(),
19342 double_colon_syntax: false,
19343 format: None,
19344 default: None,
19345 inferred_type: None,
19346 })),
19347 DialectType::TSQL => Expression::Cast(Box::new(Cast {
19348 this: left,
19349 to: DataType::Float {
19350 precision: None,
19351 scale: None,
19352 real_spelling: false,
19353 },
19354 trailing_comments: Vec::new(),
19355 double_colon_syntax: false,
19356 format: None,
19357 default: None,
19358 inferred_type: None,
19359 })),
19360 _ => left,
19361 };
19362 Ok(Expression::Div(Box::new(BinaryOp::new(
19363 new_left,
19364 nullif_right,
19365 ))))
19366 } else {
19367 Ok(e)
19368 }
19369 }
19370 Action::AlterTableRenameStripSchema => {
19371 if let Expression::AlterTable(mut at) = e {
19372 if let Some(crate::expressions::AlterTableAction::RenameTable(
19373 ref mut new_tbl,
19374 )) = at.actions.first_mut()
19375 {
19376 new_tbl.schema = None;
19377 new_tbl.catalog = None;
19378 }
19379 Ok(Expression::AlterTable(at))
19380 } else {
19381 Ok(e)
19382 }
19383 }
19384 Action::NullsOrdering => {
19385 // Fill in the source dialect's implied null ordering default.
19386 // This makes implicit null ordering explicit so the target generator
19387 // can correctly strip or keep it.
19388 //
19389 // Dialect null ordering categories:
19390 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
19391 // ASC -> NULLS LAST, DESC -> NULLS FIRST
19392 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
19393 // ASC -> NULLS FIRST, DESC -> NULLS LAST
19394 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
19395 // NULLS LAST always (both ASC and DESC)
19396 if let Expression::Ordered(mut o) = e {
19397 let is_asc = !o.desc;
19398
19399 let is_source_nulls_large = matches!(
19400 source,
19401 DialectType::Oracle
19402 | DialectType::PostgreSQL
19403 | DialectType::Redshift
19404 | DialectType::Snowflake
19405 );
19406 let is_source_nulls_last = matches!(
19407 source,
19408 DialectType::DuckDB
19409 | DialectType::Presto
19410 | DialectType::Trino
19411 | DialectType::Dremio
19412 | DialectType::Athena
19413 | DialectType::ClickHouse
19414 | DialectType::Drill
19415 | DialectType::Exasol
19416 | DialectType::DataFusion
19417 );
19418
19419 // Determine target category to check if default matches
19420 let is_target_nulls_large = matches!(
19421 target,
19422 DialectType::Oracle
19423 | DialectType::PostgreSQL
19424 | DialectType::Redshift
19425 | DialectType::Snowflake
19426 );
19427 let is_target_nulls_last = matches!(
19428 target,
19429 DialectType::DuckDB
19430 | DialectType::Presto
19431 | DialectType::Trino
19432 | DialectType::Dremio
19433 | DialectType::Athena
19434 | DialectType::ClickHouse
19435 | DialectType::Drill
19436 | DialectType::Exasol
19437 | DialectType::DataFusion
19438 );
19439
19440 // Compute the implied nulls_first for source
19441 let source_nulls_first = if is_source_nulls_large {
19442 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
19443 } else if is_source_nulls_last {
19444 false // NULLS LAST always
19445 } else {
19446 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
19447 };
19448
19449 // Compute the target's default
19450 let target_nulls_first = if is_target_nulls_large {
19451 !is_asc
19452 } else if is_target_nulls_last {
19453 false
19454 } else {
19455 is_asc
19456 };
19457
19458 // Only add explicit nulls ordering if source and target defaults differ
19459 if source_nulls_first != target_nulls_first {
19460 o.nulls_first = Some(source_nulls_first);
19461 }
19462 // If they match, leave nulls_first as None so the generator won't output it
19463
19464 Ok(Expression::Ordered(o))
19465 } else {
19466 Ok(e)
19467 }
19468 }
19469 Action::StringAggConvert => {
19470 match e {
19471 Expression::WithinGroup(wg) => {
19472 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
19473 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
19474 let (x_opt, sep_opt, distinct) = match wg.this {
19475 Expression::AggregateFunction(ref af)
19476 if af.name.eq_ignore_ascii_case("STRING_AGG")
19477 && af.args.len() >= 2 =>
19478 {
19479 (
19480 Some(af.args[0].clone()),
19481 Some(af.args[1].clone()),
19482 af.distinct,
19483 )
19484 }
19485 Expression::Function(ref f)
19486 if f.name.eq_ignore_ascii_case("STRING_AGG")
19487 && f.args.len() >= 2 =>
19488 {
19489 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
19490 }
19491 Expression::StringAgg(ref sa) => {
19492 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
19493 }
19494 _ => (None, None, false),
19495 };
19496 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
19497 let order_by = wg.order_by;
19498
19499 match target {
19500 DialectType::TSQL | DialectType::Fabric => {
19501 // Keep as WithinGroup(StringAgg) for TSQL
19502 Ok(Expression::WithinGroup(Box::new(
19503 crate::expressions::WithinGroup {
19504 this: Expression::StringAgg(Box::new(
19505 crate::expressions::StringAggFunc {
19506 this: x,
19507 separator: Some(sep),
19508 order_by: None, // order_by goes in WithinGroup, not StringAgg
19509 distinct,
19510 filter: None,
19511 limit: None,
19512 inferred_type: None,
19513 },
19514 )),
19515 order_by,
19516 },
19517 )))
19518 }
19519 DialectType::MySQL
19520 | DialectType::SingleStore
19521 | DialectType::Doris
19522 | DialectType::StarRocks => {
19523 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
19524 Ok(Expression::GroupConcat(Box::new(
19525 crate::expressions::GroupConcatFunc {
19526 this: x,
19527 separator: Some(sep),
19528 order_by: Some(order_by),
19529 distinct,
19530 filter: None,
19531 inferred_type: None,
19532 },
19533 )))
19534 }
19535 DialectType::SQLite => {
19536 // GROUP_CONCAT(x, sep) - no ORDER BY support
19537 Ok(Expression::GroupConcat(Box::new(
19538 crate::expressions::GroupConcatFunc {
19539 this: x,
19540 separator: Some(sep),
19541 order_by: None,
19542 distinct,
19543 filter: None,
19544 inferred_type: None,
19545 },
19546 )))
19547 }
19548 DialectType::PostgreSQL | DialectType::Redshift => {
19549 // STRING_AGG(x, sep ORDER BY z)
19550 Ok(Expression::StringAgg(Box::new(
19551 crate::expressions::StringAggFunc {
19552 this: x,
19553 separator: Some(sep),
19554 order_by: Some(order_by),
19555 distinct,
19556 filter: None,
19557 limit: None,
19558 inferred_type: None,
19559 },
19560 )))
19561 }
19562 _ => {
19563 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
19564 Ok(Expression::StringAgg(Box::new(
19565 crate::expressions::StringAggFunc {
19566 this: x,
19567 separator: Some(sep),
19568 order_by: Some(order_by),
19569 distinct,
19570 filter: None,
19571 limit: None,
19572 inferred_type: None,
19573 },
19574 )))
19575 }
19576 }
19577 } else {
19578 Ok(Expression::WithinGroup(wg))
19579 }
19580 }
19581 Expression::StringAgg(sa) => {
19582 match target {
19583 DialectType::MySQL
19584 | DialectType::SingleStore
19585 | DialectType::Doris
19586 | DialectType::StarRocks => {
19587 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
19588 Ok(Expression::GroupConcat(Box::new(
19589 crate::expressions::GroupConcatFunc {
19590 this: sa.this,
19591 separator: sa.separator,
19592 order_by: sa.order_by,
19593 distinct: sa.distinct,
19594 filter: sa.filter,
19595 inferred_type: None,
19596 },
19597 )))
19598 }
19599 DialectType::SQLite => {
19600 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
19601 Ok(Expression::GroupConcat(Box::new(
19602 crate::expressions::GroupConcatFunc {
19603 this: sa.this,
19604 separator: sa.separator,
19605 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
19606 distinct: sa.distinct,
19607 filter: sa.filter,
19608 inferred_type: None,
19609 },
19610 )))
19611 }
19612 DialectType::Spark | DialectType::Databricks => {
19613 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
19614 Ok(Expression::ListAgg(Box::new(
19615 crate::expressions::ListAggFunc {
19616 this: sa.this,
19617 separator: sa.separator,
19618 on_overflow: None,
19619 order_by: sa.order_by,
19620 distinct: sa.distinct,
19621 filter: None,
19622 inferred_type: None,
19623 },
19624 )))
19625 }
19626 _ => Ok(Expression::StringAgg(sa)),
19627 }
19628 }
19629 _ => Ok(e),
19630 }
19631 }
19632 Action::GroupConcatConvert => {
19633 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
19634 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
19635 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
19636 if let Expression::Function(ref f) = expr {
19637 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
19638 let mut result = f.args[0].clone();
19639 for arg in &f.args[1..] {
19640 result = Expression::Concat(Box::new(BinaryOp {
19641 left: result,
19642 right: arg.clone(),
19643 left_comments: vec![],
19644 operator_comments: vec![],
19645 trailing_comments: vec![],
19646 inferred_type: None,
19647 }));
19648 }
19649 return result;
19650 }
19651 }
19652 expr
19653 }
19654 fn expand_concat_to_plus(expr: Expression) -> Expression {
19655 if let Expression::Function(ref f) = expr {
19656 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
19657 let mut result = f.args[0].clone();
19658 for arg in &f.args[1..] {
19659 result = Expression::Add(Box::new(BinaryOp {
19660 left: result,
19661 right: arg.clone(),
19662 left_comments: vec![],
19663 operator_comments: vec![],
19664 trailing_comments: vec![],
19665 inferred_type: None,
19666 }));
19667 }
19668 return result;
19669 }
19670 }
19671 expr
19672 }
19673 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
19674 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
19675 if let Expression::Function(ref f) = expr {
19676 if f.name.to_uppercase() == "CONCAT" && f.args.len() > 1 {
19677 let new_args: Vec<Expression> = f
19678 .args
19679 .iter()
19680 .map(|arg| {
19681 Expression::Cast(Box::new(crate::expressions::Cast {
19682 this: arg.clone(),
19683 to: crate::expressions::DataType::VarChar {
19684 length: None,
19685 parenthesized_length: false,
19686 },
19687 trailing_comments: Vec::new(),
19688 double_colon_syntax: false,
19689 format: None,
19690 default: None,
19691 inferred_type: None,
19692 }))
19693 })
19694 .collect();
19695 return Expression::Function(Box::new(
19696 crate::expressions::Function::new(
19697 "CONCAT".to_string(),
19698 new_args,
19699 ),
19700 ));
19701 }
19702 }
19703 expr
19704 }
19705 if let Expression::GroupConcat(gc) = e {
19706 match target {
19707 DialectType::Presto => {
19708 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
19709 let sep = gc.separator.unwrap_or(Expression::string(","));
19710 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
19711 let this = wrap_concat_args_in_varchar_cast(gc.this);
19712 let array_agg =
19713 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
19714 this,
19715 distinct: gc.distinct,
19716 filter: gc.filter,
19717 order_by: gc.order_by.unwrap_or_default(),
19718 name: None,
19719 ignore_nulls: None,
19720 having_max: None,
19721 limit: None,
19722 inferred_type: None,
19723 }));
19724 Ok(Expression::ArrayJoin(Box::new(
19725 crate::expressions::ArrayJoinFunc {
19726 this: array_agg,
19727 separator: sep,
19728 null_replacement: None,
19729 },
19730 )))
19731 }
19732 DialectType::Trino => {
19733 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
19734 let sep = gc.separator.unwrap_or(Expression::string(","));
19735 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
19736 let this = wrap_concat_args_in_varchar_cast(gc.this);
19737 Ok(Expression::ListAgg(Box::new(
19738 crate::expressions::ListAggFunc {
19739 this,
19740 separator: Some(sep),
19741 on_overflow: None,
19742 order_by: gc.order_by,
19743 distinct: gc.distinct,
19744 filter: gc.filter,
19745 inferred_type: None,
19746 },
19747 )))
19748 }
19749 DialectType::PostgreSQL
19750 | DialectType::Redshift
19751 | DialectType::Snowflake
19752 | DialectType::DuckDB
19753 | DialectType::Hive
19754 | DialectType::ClickHouse => {
19755 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
19756 let sep = gc.separator.unwrap_or(Expression::string(","));
19757 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
19758 let this = expand_concat_to_dpipe(gc.this);
19759 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
19760 let order_by = if target == DialectType::PostgreSQL {
19761 gc.order_by.map(|ords| {
19762 ords.into_iter()
19763 .map(|mut o| {
19764 if o.nulls_first.is_none() {
19765 if o.desc {
19766 o.nulls_first = Some(false);
19767 // NULLS LAST
19768 } else {
19769 o.nulls_first = Some(true);
19770 // NULLS FIRST
19771 }
19772 }
19773 o
19774 })
19775 .collect()
19776 })
19777 } else {
19778 gc.order_by
19779 };
19780 Ok(Expression::StringAgg(Box::new(
19781 crate::expressions::StringAggFunc {
19782 this,
19783 separator: Some(sep),
19784 order_by,
19785 distinct: gc.distinct,
19786 filter: gc.filter,
19787 limit: None,
19788 inferred_type: None,
19789 },
19790 )))
19791 }
19792 DialectType::TSQL => {
19793 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
19794 // TSQL doesn't support DISTINCT in STRING_AGG
19795 let sep = gc.separator.unwrap_or(Expression::string(","));
19796 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
19797 let this = expand_concat_to_plus(gc.this);
19798 Ok(Expression::StringAgg(Box::new(
19799 crate::expressions::StringAggFunc {
19800 this,
19801 separator: Some(sep),
19802 order_by: gc.order_by,
19803 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
19804 filter: gc.filter,
19805 limit: None,
19806 inferred_type: None,
19807 },
19808 )))
19809 }
19810 DialectType::SQLite => {
19811 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
19812 // SQLite GROUP_CONCAT doesn't support ORDER BY
19813 // Expand CONCAT(a,b,c) -> a || b || c
19814 let this = expand_concat_to_dpipe(gc.this);
19815 Ok(Expression::GroupConcat(Box::new(
19816 crate::expressions::GroupConcatFunc {
19817 this,
19818 separator: gc.separator,
19819 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
19820 distinct: gc.distinct,
19821 filter: gc.filter,
19822 inferred_type: None,
19823 },
19824 )))
19825 }
19826 DialectType::Spark | DialectType::Databricks => {
19827 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
19828 let sep = gc.separator.unwrap_or(Expression::string(","));
19829 Ok(Expression::ListAgg(Box::new(
19830 crate::expressions::ListAggFunc {
19831 this: gc.this,
19832 separator: Some(sep),
19833 on_overflow: None,
19834 order_by: gc.order_by,
19835 distinct: gc.distinct,
19836 filter: None,
19837 inferred_type: None,
19838 },
19839 )))
19840 }
19841 DialectType::MySQL
19842 | DialectType::SingleStore
19843 | DialectType::StarRocks => {
19844 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
19845 if gc.separator.is_none() {
19846 let mut gc = gc;
19847 gc.separator = Some(Expression::string(","));
19848 Ok(Expression::GroupConcat(gc))
19849 } else {
19850 Ok(Expression::GroupConcat(gc))
19851 }
19852 }
19853 _ => Ok(Expression::GroupConcat(gc)),
19854 }
19855 } else {
19856 Ok(e)
19857 }
19858 }
19859 Action::TempTableHash => {
19860 match e {
19861 Expression::CreateTable(mut ct) => {
19862 // TSQL #table -> TEMPORARY TABLE with # stripped from name
19863 let name = &ct.name.name.name;
19864 if name.starts_with('#') {
19865 ct.name.name.name = name.trim_start_matches('#').to_string();
19866 }
19867 // Set temporary flag
19868 ct.temporary = true;
19869 Ok(Expression::CreateTable(ct))
19870 }
19871 Expression::Table(mut tr) => {
19872 // Strip # from table references
19873 let name = &tr.name.name;
19874 if name.starts_with('#') {
19875 tr.name.name = name.trim_start_matches('#').to_string();
19876 }
19877 Ok(Expression::Table(tr))
19878 }
19879 Expression::DropTable(mut dt) => {
19880 // Strip # from DROP TABLE names
19881 for table_ref in &mut dt.names {
19882 if table_ref.name.name.starts_with('#') {
19883 table_ref.name.name =
19884 table_ref.name.name.trim_start_matches('#').to_string();
19885 }
19886 }
19887 Ok(Expression::DropTable(dt))
19888 }
19889 _ => Ok(e),
19890 }
19891 }
19892 Action::NvlClearOriginal => {
19893 if let Expression::Nvl(mut f) = e {
19894 f.original_name = None;
19895 Ok(Expression::Nvl(f))
19896 } else {
19897 Ok(e)
19898 }
19899 }
19900 Action::HiveCastToTryCast => {
19901 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
19902 if let Expression::Cast(mut c) = e {
19903 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
19904 // (Spark's TIMESTAMP is always timezone-aware)
19905 if matches!(target, DialectType::DuckDB)
19906 && matches!(source, DialectType::Spark | DialectType::Databricks)
19907 && matches!(
19908 c.to,
19909 DataType::Timestamp {
19910 timezone: false,
19911 ..
19912 }
19913 )
19914 {
19915 c.to = DataType::Custom {
19916 name: "TIMESTAMPTZ".to_string(),
19917 };
19918 }
19919 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
19920 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
19921 if matches!(target, DialectType::Databricks | DialectType::Spark)
19922 && matches!(
19923 source,
19924 DialectType::Spark | DialectType::Databricks | DialectType::Hive
19925 )
19926 && Self::has_varchar_char_type(&c.to)
19927 {
19928 c.to = Self::normalize_varchar_to_string(c.to);
19929 }
19930 Ok(Expression::TryCast(c))
19931 } else {
19932 Ok(e)
19933 }
19934 }
19935 Action::XorExpand => {
19936 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
19937 // Snowflake: use BOOLXOR(a, b) instead
19938 if let Expression::Xor(xor) = e {
19939 // Collect all XOR operands
19940 let mut operands = Vec::new();
19941 if let Some(this) = xor.this {
19942 operands.push(*this);
19943 }
19944 if let Some(expr) = xor.expression {
19945 operands.push(*expr);
19946 }
19947 operands.extend(xor.expressions);
19948
19949 // Snowflake: use BOOLXOR(a, b)
19950 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
19951 let a = operands.remove(0);
19952 let b = operands.remove(0);
19953 return Ok(Expression::Function(Box::new(Function::new(
19954 "BOOLXOR".to_string(),
19955 vec![a, b],
19956 ))));
19957 }
19958
19959 // Helper to build (a AND NOT b) OR (NOT a AND b)
19960 let make_xor = |a: Expression, b: Expression| -> Expression {
19961 let not_b = Expression::Not(Box::new(
19962 crate::expressions::UnaryOp::new(b.clone()),
19963 ));
19964 let not_a = Expression::Not(Box::new(
19965 crate::expressions::UnaryOp::new(a.clone()),
19966 ));
19967 let left_and = Expression::And(Box::new(BinaryOp {
19968 left: a,
19969 right: Expression::Paren(Box::new(Paren {
19970 this: not_b,
19971 trailing_comments: Vec::new(),
19972 })),
19973 left_comments: Vec::new(),
19974 operator_comments: Vec::new(),
19975 trailing_comments: Vec::new(),
19976 inferred_type: None,
19977 }));
19978 let right_and = Expression::And(Box::new(BinaryOp {
19979 left: Expression::Paren(Box::new(Paren {
19980 this: not_a,
19981 trailing_comments: Vec::new(),
19982 })),
19983 right: b,
19984 left_comments: Vec::new(),
19985 operator_comments: Vec::new(),
19986 trailing_comments: Vec::new(),
19987 inferred_type: None,
19988 }));
19989 Expression::Or(Box::new(BinaryOp {
19990 left: Expression::Paren(Box::new(Paren {
19991 this: left_and,
19992 trailing_comments: Vec::new(),
19993 })),
19994 right: Expression::Paren(Box::new(Paren {
19995 this: right_and,
19996 trailing_comments: Vec::new(),
19997 })),
19998 left_comments: Vec::new(),
19999 operator_comments: Vec::new(),
20000 trailing_comments: Vec::new(),
20001 inferred_type: None,
20002 }))
20003 };
20004
20005 if operands.len() >= 2 {
20006 let mut result = make_xor(operands.remove(0), operands.remove(0));
20007 for operand in operands {
20008 result = make_xor(result, operand);
20009 }
20010 Ok(result)
20011 } else if operands.len() == 1 {
20012 Ok(operands.remove(0))
20013 } else {
20014 // No operands - return FALSE (shouldn't happen)
20015 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
20016 value: false,
20017 }))
20018 }
20019 } else {
20020 Ok(e)
20021 }
20022 }
20023 Action::DatePartUnquote => {
20024 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
20025 // Convert the quoted string first arg to a bare Column/Identifier
20026 if let Expression::Function(mut f) = e {
20027 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
20028 f.args.first()
20029 {
20030 let bare_name = s.to_lowercase();
20031 f.args[0] = Expression::Column(crate::expressions::Column {
20032 name: Identifier::new(bare_name),
20033 table: None,
20034 join_mark: false,
20035 trailing_comments: Vec::new(),
20036 span: None,
20037 inferred_type: None,
20038 });
20039 }
20040 Ok(Expression::Function(f))
20041 } else {
20042 Ok(e)
20043 }
20044 }
20045 Action::ArrayLengthConvert => {
20046 // Extract the argument from the expression
20047 let arg = match e {
20048 Expression::Cardinality(ref f) => f.this.clone(),
20049 Expression::ArrayLength(ref f) => f.this.clone(),
20050 Expression::ArraySize(ref f) => f.this.clone(),
20051 _ => return Ok(e),
20052 };
20053 match target {
20054 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20055 Ok(Expression::Function(Box::new(Function::new(
20056 "SIZE".to_string(),
20057 vec![arg],
20058 ))))
20059 }
20060 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20061 Ok(Expression::Cardinality(Box::new(
20062 crate::expressions::UnaryFunc::new(arg),
20063 )))
20064 }
20065 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
20066 crate::expressions::UnaryFunc::new(arg),
20067 ))),
20068 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
20069 crate::expressions::UnaryFunc::new(arg),
20070 ))),
20071 DialectType::PostgreSQL | DialectType::Redshift => {
20072 // PostgreSQL ARRAY_LENGTH requires dimension arg
20073 Ok(Expression::Function(Box::new(Function::new(
20074 "ARRAY_LENGTH".to_string(),
20075 vec![arg, Expression::number(1)],
20076 ))))
20077 }
20078 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
20079 crate::expressions::UnaryFunc::new(arg),
20080 ))),
20081 _ => Ok(e), // Keep original
20082 }
20083 }
20084
20085 Action::JsonExtractToArrow => {
20086 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
20087 if let Expression::JsonExtract(mut f) = e {
20088 f.arrow_syntax = true;
20089 // Transform path: convert bracket notation to dot notation
20090 // SQLite strips wildcards, DuckDB preserves them
20091 if let Expression::Literal(Literal::String(ref s)) = f.path {
20092 let mut transformed = s.clone();
20093 if matches!(target, DialectType::SQLite) {
20094 transformed = Self::strip_json_wildcards(&transformed);
20095 }
20096 transformed = Self::bracket_to_dot_notation(&transformed);
20097 if transformed != *s {
20098 f.path = Expression::string(&transformed);
20099 }
20100 }
20101 Ok(Expression::JsonExtract(f))
20102 } else {
20103 Ok(e)
20104 }
20105 }
20106
20107 Action::JsonExtractToGetJsonObject => {
20108 if let Expression::JsonExtract(f) = e {
20109 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
20110 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
20111 // Use proper decomposition that handles brackets
20112 let keys: Vec<Expression> =
20113 if let Expression::Literal(Literal::String(ref s)) = f.path {
20114 let parts = Self::decompose_json_path(s);
20115 parts.into_iter().map(|k| Expression::string(&k)).collect()
20116 } else {
20117 vec![f.path]
20118 };
20119 let func_name = if matches!(target, DialectType::Redshift) {
20120 "JSON_EXTRACT_PATH_TEXT"
20121 } else {
20122 "JSON_EXTRACT_PATH"
20123 };
20124 let mut args = vec![f.this];
20125 args.extend(keys);
20126 Ok(Expression::Function(Box::new(Function::new(
20127 func_name.to_string(),
20128 args,
20129 ))))
20130 } else {
20131 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
20132 // Convert bracket double quotes to single quotes
20133 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
20134 let normalized = Self::bracket_to_single_quotes(s);
20135 if normalized != *s {
20136 Expression::string(&normalized)
20137 } else {
20138 f.path
20139 }
20140 } else {
20141 f.path
20142 };
20143 Ok(Expression::Function(Box::new(Function::new(
20144 "GET_JSON_OBJECT".to_string(),
20145 vec![f.this, path],
20146 ))))
20147 }
20148 } else {
20149 Ok(e)
20150 }
20151 }
20152
20153 Action::JsonExtractScalarToGetJsonObject => {
20154 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
20155 if let Expression::JsonExtractScalar(f) = e {
20156 Ok(Expression::Function(Box::new(Function::new(
20157 "GET_JSON_OBJECT".to_string(),
20158 vec![f.this, f.path],
20159 ))))
20160 } else {
20161 Ok(e)
20162 }
20163 }
20164
20165 Action::JsonExtractToTsql => {
20166 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
20167 let (this, path) = match e {
20168 Expression::JsonExtract(f) => (f.this, f.path),
20169 Expression::JsonExtractScalar(f) => (f.this, f.path),
20170 _ => return Ok(e),
20171 };
20172 // Transform path: strip wildcards, convert bracket notation to dot notation
20173 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
20174 {
20175 let stripped = Self::strip_json_wildcards(s);
20176 let dotted = Self::bracket_to_dot_notation(&stripped);
20177 Expression::string(&dotted)
20178 } else {
20179 path
20180 };
20181 let json_query = Expression::Function(Box::new(Function::new(
20182 "JSON_QUERY".to_string(),
20183 vec![this.clone(), transformed_path.clone()],
20184 )));
20185 let json_value = Expression::Function(Box::new(Function::new(
20186 "JSON_VALUE".to_string(),
20187 vec![this, transformed_path],
20188 )));
20189 Ok(Expression::Function(Box::new(Function::new(
20190 "ISNULL".to_string(),
20191 vec![json_query, json_value],
20192 ))))
20193 }
20194
20195 Action::JsonExtractToClickHouse => {
20196 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
20197 let (this, path) = match e {
20198 Expression::JsonExtract(f) => (f.this, f.path),
20199 Expression::JsonExtractScalar(f) => (f.this, f.path),
20200 _ => return Ok(e),
20201 };
20202 let args: Vec<Expression> =
20203 if let Expression::Literal(Literal::String(ref s)) = path {
20204 let parts = Self::decompose_json_path(s);
20205 let mut result = vec![this];
20206 for part in parts {
20207 // ClickHouse uses 1-based integer indices for array access
20208 if let Ok(idx) = part.parse::<i64>() {
20209 result.push(Expression::number(idx + 1));
20210 } else {
20211 result.push(Expression::string(&part));
20212 }
20213 }
20214 result
20215 } else {
20216 vec![this, path]
20217 };
20218 Ok(Expression::Function(Box::new(Function::new(
20219 "JSONExtractString".to_string(),
20220 args,
20221 ))))
20222 }
20223
20224 Action::JsonExtractScalarConvert => {
20225 // JSON_EXTRACT_SCALAR -> target-specific
20226 if let Expression::JsonExtractScalar(f) = e {
20227 match target {
20228 DialectType::PostgreSQL | DialectType::Redshift => {
20229 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
20230 let keys: Vec<Expression> =
20231 if let Expression::Literal(Literal::String(ref s)) = f.path {
20232 let parts = Self::decompose_json_path(s);
20233 parts.into_iter().map(|k| Expression::string(&k)).collect()
20234 } else {
20235 vec![f.path]
20236 };
20237 let mut args = vec![f.this];
20238 args.extend(keys);
20239 Ok(Expression::Function(Box::new(Function::new(
20240 "JSON_EXTRACT_PATH_TEXT".to_string(),
20241 args,
20242 ))))
20243 }
20244 DialectType::Snowflake => {
20245 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
20246 let stripped_path =
20247 if let Expression::Literal(Literal::String(ref s)) = f.path {
20248 let stripped = Self::strip_json_dollar_prefix(s);
20249 Expression::string(&stripped)
20250 } else {
20251 f.path
20252 };
20253 Ok(Expression::Function(Box::new(Function::new(
20254 "JSON_EXTRACT_PATH_TEXT".to_string(),
20255 vec![f.this, stripped_path],
20256 ))))
20257 }
20258 DialectType::SQLite | DialectType::DuckDB => {
20259 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
20260 Ok(Expression::JsonExtractScalar(Box::new(
20261 crate::expressions::JsonExtractFunc {
20262 this: f.this,
20263 path: f.path,
20264 returning: f.returning,
20265 arrow_syntax: true,
20266 hash_arrow_syntax: false,
20267 wrapper_option: None,
20268 quotes_option: None,
20269 on_scalar_string: false,
20270 on_error: None,
20271 },
20272 )))
20273 }
20274 _ => Ok(Expression::JsonExtractScalar(f)),
20275 }
20276 } else {
20277 Ok(e)
20278 }
20279 }
20280
20281 Action::JsonPathNormalize => {
20282 // Normalize JSON path format for BigQuery, MySQL, etc.
20283 if let Expression::JsonExtract(mut f) = e {
20284 if let Expression::Literal(Literal::String(ref s)) = f.path {
20285 let mut normalized = s.clone();
20286 // Convert bracket notation and handle wildcards per dialect
20287 match target {
20288 DialectType::BigQuery => {
20289 // BigQuery strips wildcards and uses single quotes in brackets
20290 normalized = Self::strip_json_wildcards(&normalized);
20291 normalized = Self::bracket_to_single_quotes(&normalized);
20292 }
20293 DialectType::MySQL => {
20294 // MySQL preserves wildcards, converts brackets to dot notation
20295 normalized = Self::bracket_to_dot_notation(&normalized);
20296 }
20297 _ => {}
20298 }
20299 if normalized != *s {
20300 f.path = Expression::string(&normalized);
20301 }
20302 }
20303 Ok(Expression::JsonExtract(f))
20304 } else {
20305 Ok(e)
20306 }
20307 }
20308
20309 Action::JsonQueryValueConvert => {
20310 // JsonQuery/JsonValue -> target-specific
20311 let (f, is_query) = match e {
20312 Expression::JsonQuery(f) => (f, true),
20313 Expression::JsonValue(f) => (f, false),
20314 _ => return Ok(e),
20315 };
20316 match target {
20317 DialectType::TSQL | DialectType::Fabric => {
20318 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
20319 let json_query = Expression::Function(Box::new(Function::new(
20320 "JSON_QUERY".to_string(),
20321 vec![f.this.clone(), f.path.clone()],
20322 )));
20323 let json_value = Expression::Function(Box::new(Function::new(
20324 "JSON_VALUE".to_string(),
20325 vec![f.this, f.path],
20326 )));
20327 Ok(Expression::Function(Box::new(Function::new(
20328 "ISNULL".to_string(),
20329 vec![json_query, json_value],
20330 ))))
20331 }
20332 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20333 Ok(Expression::Function(Box::new(Function::new(
20334 "GET_JSON_OBJECT".to_string(),
20335 vec![f.this, f.path],
20336 ))))
20337 }
20338 DialectType::PostgreSQL | DialectType::Redshift => {
20339 Ok(Expression::Function(Box::new(Function::new(
20340 "JSON_EXTRACT_PATH_TEXT".to_string(),
20341 vec![f.this, f.path],
20342 ))))
20343 }
20344 DialectType::DuckDB | DialectType::SQLite => {
20345 // json -> path arrow syntax
20346 Ok(Expression::JsonExtract(Box::new(
20347 crate::expressions::JsonExtractFunc {
20348 this: f.this,
20349 path: f.path,
20350 returning: f.returning,
20351 arrow_syntax: true,
20352 hash_arrow_syntax: false,
20353 wrapper_option: f.wrapper_option,
20354 quotes_option: f.quotes_option,
20355 on_scalar_string: f.on_scalar_string,
20356 on_error: f.on_error,
20357 },
20358 )))
20359 }
20360 DialectType::Snowflake => {
20361 // GET_PATH(PARSE_JSON(json), 'path')
20362 // Strip $. prefix from path
20363 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
20364 let json_expr = match &f.this {
20365 Expression::Function(ref inner_f)
20366 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20367 {
20368 f.this
20369 }
20370 Expression::ParseJson(_) => {
20371 // Already a ParseJson expression, which generates as PARSE_JSON(...)
20372 f.this
20373 }
20374 _ => Expression::Function(Box::new(Function::new(
20375 "PARSE_JSON".to_string(),
20376 vec![f.this],
20377 ))),
20378 };
20379 let path_str = match &f.path {
20380 Expression::Literal(Literal::String(s)) => {
20381 let stripped = s.strip_prefix("$.").unwrap_or(s);
20382 Expression::Literal(Literal::String(stripped.to_string()))
20383 }
20384 other => other.clone(),
20385 };
20386 Ok(Expression::Function(Box::new(Function::new(
20387 "GET_PATH".to_string(),
20388 vec![json_expr, path_str],
20389 ))))
20390 }
20391 _ => {
20392 // Default: keep as JSON_QUERY/JSON_VALUE function
20393 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
20394 Ok(Expression::Function(Box::new(Function::new(
20395 func_name.to_string(),
20396 vec![f.this, f.path],
20397 ))))
20398 }
20399 }
20400 }
20401
20402 Action::JsonLiteralToJsonParse => {
20403 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
20404 if let Expression::Cast(c) = e {
20405 let func_name = if matches!(target, DialectType::Snowflake) {
20406 "PARSE_JSON"
20407 } else {
20408 "JSON_PARSE"
20409 };
20410 Ok(Expression::Function(Box::new(Function::new(
20411 func_name.to_string(),
20412 vec![c.this],
20413 ))))
20414 } else {
20415 Ok(e)
20416 }
20417 }
20418
20419 Action::AtTimeZoneConvert => {
20420 // AT TIME ZONE -> target-specific conversion
20421 if let Expression::AtTimeZone(atz) = e {
20422 match target {
20423 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20424 Ok(Expression::Function(Box::new(Function::new(
20425 "AT_TIMEZONE".to_string(),
20426 vec![atz.this, atz.zone],
20427 ))))
20428 }
20429 DialectType::Spark | DialectType::Databricks => {
20430 Ok(Expression::Function(Box::new(Function::new(
20431 "FROM_UTC_TIMESTAMP".to_string(),
20432 vec![atz.this, atz.zone],
20433 ))))
20434 }
20435 DialectType::Snowflake => {
20436 // CONVERT_TIMEZONE('zone', expr)
20437 Ok(Expression::Function(Box::new(Function::new(
20438 "CONVERT_TIMEZONE".to_string(),
20439 vec![atz.zone, atz.this],
20440 ))))
20441 }
20442 DialectType::BigQuery => {
20443 // TIMESTAMP(DATETIME(expr, 'zone'))
20444 let datetime_call = Expression::Function(Box::new(Function::new(
20445 "DATETIME".to_string(),
20446 vec![atz.this, atz.zone],
20447 )));
20448 Ok(Expression::Function(Box::new(Function::new(
20449 "TIMESTAMP".to_string(),
20450 vec![datetime_call],
20451 ))))
20452 }
20453 _ => Ok(Expression::Function(Box::new(Function::new(
20454 "AT_TIMEZONE".to_string(),
20455 vec![atz.this, atz.zone],
20456 )))),
20457 }
20458 } else {
20459 Ok(e)
20460 }
20461 }
20462
20463 Action::DayOfWeekConvert => {
20464 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
20465 if let Expression::DayOfWeek(f) = e {
20466 match target {
20467 DialectType::DuckDB => Ok(Expression::Function(Box::new(
20468 Function::new("ISODOW".to_string(), vec![f.this]),
20469 ))),
20470 DialectType::Spark | DialectType::Databricks => {
20471 // ((DAYOFWEEK(x) % 7) + 1)
20472 let dayofweek = Expression::Function(Box::new(Function::new(
20473 "DAYOFWEEK".to_string(),
20474 vec![f.this],
20475 )));
20476 let modulo = Expression::Mod(Box::new(BinaryOp {
20477 left: dayofweek,
20478 right: Expression::number(7),
20479 left_comments: Vec::new(),
20480 operator_comments: Vec::new(),
20481 trailing_comments: Vec::new(),
20482 inferred_type: None,
20483 }));
20484 let paren_mod = Expression::Paren(Box::new(Paren {
20485 this: modulo,
20486 trailing_comments: Vec::new(),
20487 }));
20488 let add_one = Expression::Add(Box::new(BinaryOp {
20489 left: paren_mod,
20490 right: Expression::number(1),
20491 left_comments: Vec::new(),
20492 operator_comments: Vec::new(),
20493 trailing_comments: Vec::new(),
20494 inferred_type: None,
20495 }));
20496 Ok(Expression::Paren(Box::new(Paren {
20497 this: add_one,
20498 trailing_comments: Vec::new(),
20499 })))
20500 }
20501 _ => Ok(Expression::DayOfWeek(f)),
20502 }
20503 } else {
20504 Ok(e)
20505 }
20506 }
20507
20508 Action::MaxByMinByConvert => {
20509 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
20510 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
20511 // Handle both Expression::Function and Expression::AggregateFunction
20512 let (is_max, args) = match &e {
20513 Expression::Function(f) => {
20514 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
20515 }
20516 Expression::AggregateFunction(af) => {
20517 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
20518 }
20519 _ => return Ok(e),
20520 };
20521 match target {
20522 DialectType::ClickHouse => {
20523 let name = if is_max { "argMax" } else { "argMin" };
20524 let mut args = args;
20525 args.truncate(2);
20526 Ok(Expression::Function(Box::new(Function::new(
20527 name.to_string(),
20528 args,
20529 ))))
20530 }
20531 DialectType::DuckDB => {
20532 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
20533 Ok(Expression::Function(Box::new(Function::new(
20534 name.to_string(),
20535 args,
20536 ))))
20537 }
20538 DialectType::Spark | DialectType::Databricks => {
20539 let mut args = args;
20540 args.truncate(2);
20541 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
20542 Ok(Expression::Function(Box::new(Function::new(
20543 name.to_string(),
20544 args,
20545 ))))
20546 }
20547 _ => Ok(e),
20548 }
20549 }
20550
20551 Action::ElementAtConvert => {
20552 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
20553 let (arr, idx) = if let Expression::ElementAt(bf) = e {
20554 (bf.this, bf.expression)
20555 } else if let Expression::Function(ref f) = e {
20556 if f.args.len() >= 2 {
20557 if let Expression::Function(f) = e {
20558 let mut args = f.args;
20559 let arr = args.remove(0);
20560 let idx = args.remove(0);
20561 (arr, idx)
20562 } else {
20563 unreachable!("outer condition already matched Expression::Function")
20564 }
20565 } else {
20566 return Ok(e);
20567 }
20568 } else {
20569 return Ok(e);
20570 };
20571 match target {
20572 DialectType::PostgreSQL => {
20573 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
20574 let arr_expr = Expression::Paren(Box::new(Paren {
20575 this: arr,
20576 trailing_comments: vec![],
20577 }));
20578 Ok(Expression::Subscript(Box::new(
20579 crate::expressions::Subscript {
20580 this: arr_expr,
20581 index: idx,
20582 },
20583 )))
20584 }
20585 DialectType::BigQuery => {
20586 // BigQuery: convert ARRAY[...] to bare [...] for subscript
20587 let arr_expr = match arr {
20588 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
20589 crate::expressions::ArrayConstructor {
20590 expressions: af.expressions,
20591 bracket_notation: true,
20592 use_list_keyword: false,
20593 },
20594 )),
20595 other => other,
20596 };
20597 let safe_ordinal = Expression::Function(Box::new(Function::new(
20598 "SAFE_ORDINAL".to_string(),
20599 vec![idx],
20600 )));
20601 Ok(Expression::Subscript(Box::new(
20602 crate::expressions::Subscript {
20603 this: arr_expr,
20604 index: safe_ordinal,
20605 },
20606 )))
20607 }
20608 _ => Ok(Expression::Function(Box::new(Function::new(
20609 "ELEMENT_AT".to_string(),
20610 vec![arr, idx],
20611 )))),
20612 }
20613 }
20614
20615 Action::CurrentUserParens => {
20616 // CURRENT_USER -> CURRENT_USER() for Snowflake
20617 Ok(Expression::Function(Box::new(Function::new(
20618 "CURRENT_USER".to_string(),
20619 vec![],
20620 ))))
20621 }
20622
20623 Action::ArrayAggToCollectList => {
20624 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
20625 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
20626 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
20627 match e {
20628 Expression::AggregateFunction(mut af) => {
20629 let is_simple =
20630 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
20631 let args = if af.args.is_empty() {
20632 vec![]
20633 } else {
20634 vec![af.args[0].clone()]
20635 };
20636 af.name = "COLLECT_LIST".to_string();
20637 af.args = args;
20638 if is_simple {
20639 af.order_by = Vec::new();
20640 }
20641 Ok(Expression::AggregateFunction(af))
20642 }
20643 Expression::ArrayAgg(agg) => {
20644 let is_simple =
20645 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
20646 Ok(Expression::AggregateFunction(Box::new(
20647 crate::expressions::AggregateFunction {
20648 name: "COLLECT_LIST".to_string(),
20649 args: vec![agg.this.clone()],
20650 distinct: agg.distinct,
20651 filter: agg.filter.clone(),
20652 order_by: if is_simple {
20653 Vec::new()
20654 } else {
20655 agg.order_by.clone()
20656 },
20657 limit: agg.limit.clone(),
20658 ignore_nulls: agg.ignore_nulls,
20659 inferred_type: None,
20660 },
20661 )))
20662 }
20663 _ => Ok(e),
20664 }
20665 }
20666
20667 Action::ArraySyntaxConvert => {
20668 match e {
20669 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
20670 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
20671 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
20672 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
20673 expressions: arr.expressions,
20674 bracket_notation: true,
20675 use_list_keyword: false,
20676 })),
20677 ),
20678 // ARRAY(y) function style -> ArrayFunc for target dialect
20679 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
20680 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
20681 let bracket = matches!(
20682 target,
20683 DialectType::BigQuery
20684 | DialectType::DuckDB
20685 | DialectType::Snowflake
20686 | DialectType::ClickHouse
20687 | DialectType::StarRocks
20688 );
20689 Ok(Expression::ArrayFunc(Box::new(
20690 crate::expressions::ArrayConstructor {
20691 expressions: f.args,
20692 bracket_notation: bracket,
20693 use_list_keyword: false,
20694 },
20695 )))
20696 }
20697 _ => Ok(e),
20698 }
20699 }
20700
20701 Action::CastToJsonForSpark => {
20702 // CAST(x AS JSON) -> TO_JSON(x) for Spark
20703 if let Expression::Cast(c) = e {
20704 Ok(Expression::Function(Box::new(Function::new(
20705 "TO_JSON".to_string(),
20706 vec![c.this],
20707 ))))
20708 } else {
20709 Ok(e)
20710 }
20711 }
20712
20713 Action::CastJsonToFromJson => {
20714 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
20715 if let Expression::Cast(c) = e {
20716 // Extract the string literal from ParseJson
20717 let literal_expr = if let Expression::ParseJson(pj) = c.this {
20718 pj.this
20719 } else {
20720 c.this
20721 };
20722 // Convert the target DataType to Spark's type string format
20723 let type_str = Self::data_type_to_spark_string(&c.to);
20724 Ok(Expression::Function(Box::new(Function::new(
20725 "FROM_JSON".to_string(),
20726 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
20727 ))))
20728 } else {
20729 Ok(e)
20730 }
20731 }
20732
20733 Action::ToJsonConvert => {
20734 // TO_JSON(x) -> target-specific conversion
20735 if let Expression::ToJson(f) = e {
20736 let arg = f.this;
20737 match target {
20738 DialectType::Presto | DialectType::Trino => {
20739 // JSON_FORMAT(CAST(x AS JSON))
20740 let cast_json = Expression::Cast(Box::new(Cast {
20741 this: arg,
20742 to: DataType::Custom {
20743 name: "JSON".to_string(),
20744 },
20745 trailing_comments: vec![],
20746 double_colon_syntax: false,
20747 format: None,
20748 default: None,
20749 inferred_type: None,
20750 }));
20751 Ok(Expression::Function(Box::new(Function::new(
20752 "JSON_FORMAT".to_string(),
20753 vec![cast_json],
20754 ))))
20755 }
20756 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20757 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
20758 ))),
20759 DialectType::DuckDB => {
20760 // CAST(TO_JSON(x) AS TEXT)
20761 let to_json =
20762 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
20763 this: arg,
20764 original_name: None,
20765 inferred_type: None,
20766 }));
20767 Ok(Expression::Cast(Box::new(Cast {
20768 this: to_json,
20769 to: DataType::Text,
20770 trailing_comments: vec![],
20771 double_colon_syntax: false,
20772 format: None,
20773 default: None,
20774 inferred_type: None,
20775 })))
20776 }
20777 _ => Ok(Expression::ToJson(Box::new(
20778 crate::expressions::UnaryFunc {
20779 this: arg,
20780 original_name: None,
20781 inferred_type: None,
20782 },
20783 ))),
20784 }
20785 } else {
20786 Ok(e)
20787 }
20788 }
20789
20790 Action::VarianceToClickHouse => {
20791 if let Expression::Variance(f) = e {
20792 Ok(Expression::Function(Box::new(Function::new(
20793 "varSamp".to_string(),
20794 vec![f.this],
20795 ))))
20796 } else {
20797 Ok(e)
20798 }
20799 }
20800
20801 Action::StddevToClickHouse => {
20802 if let Expression::Stddev(f) = e {
20803 Ok(Expression::Function(Box::new(Function::new(
20804 "stddevSamp".to_string(),
20805 vec![f.this],
20806 ))))
20807 } else {
20808 Ok(e)
20809 }
20810 }
20811
20812 Action::ApproxQuantileConvert => {
20813 if let Expression::ApproxQuantile(aq) = e {
20814 let mut args = vec![*aq.this];
20815 if let Some(q) = aq.quantile {
20816 args.push(*q);
20817 }
20818 Ok(Expression::Function(Box::new(Function::new(
20819 "APPROX_PERCENTILE".to_string(),
20820 args,
20821 ))))
20822 } else {
20823 Ok(e)
20824 }
20825 }
20826
20827 Action::DollarParamConvert => {
20828 if let Expression::Parameter(p) = e {
20829 Ok(Expression::Parameter(Box::new(
20830 crate::expressions::Parameter {
20831 name: p.name,
20832 index: p.index,
20833 style: crate::expressions::ParameterStyle::At,
20834 quoted: p.quoted,
20835 string_quoted: p.string_quoted,
20836 expression: p.expression,
20837 },
20838 )))
20839 } else {
20840 Ok(e)
20841 }
20842 }
20843
20844 Action::EscapeStringNormalize => {
20845 if let Expression::Literal(Literal::EscapeString(s)) = e {
20846 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
20847 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
20848 s[2..].to_string()
20849 } else {
20850 s
20851 };
20852 let normalized = stripped
20853 .replace('\n', "\\n")
20854 .replace('\r', "\\r")
20855 .replace('\t', "\\t");
20856 match target {
20857 DialectType::BigQuery => {
20858 // BigQuery: e'...' -> CAST(b'...' AS STRING)
20859 // Use Raw for the b'...' part to avoid double-escaping
20860 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
20861 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
20862 }
20863 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
20864 }
20865 } else {
20866 Ok(e)
20867 }
20868 }
20869
20870 Action::StraightJoinCase => {
20871 // straight_join: keep lowercase for DuckDB, quote for MySQL
20872 if let Expression::Column(col) = e {
20873 if col.name.name == "STRAIGHT_JOIN" {
20874 let mut new_col = col;
20875 new_col.name.name = "straight_join".to_string();
20876 if matches!(target, DialectType::MySQL) {
20877 // MySQL: needs quoting since it's a reserved keyword
20878 new_col.name.quoted = true;
20879 }
20880 Ok(Expression::Column(new_col))
20881 } else {
20882 Ok(Expression::Column(col))
20883 }
20884 } else {
20885 Ok(e)
20886 }
20887 }
20888
20889 Action::TablesampleReservoir => {
20890 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
20891 if let Expression::TableSample(mut ts) = e {
20892 if let Some(ref mut sample) = ts.sample {
20893 sample.method = crate::expressions::SampleMethod::Reservoir;
20894 sample.explicit_method = true;
20895 }
20896 Ok(Expression::TableSample(ts))
20897 } else {
20898 Ok(e)
20899 }
20900 }
20901
20902 Action::TablesampleSnowflakeStrip => {
20903 // Strip method and PERCENT for Snowflake target from non-Snowflake source
20904 match e {
20905 Expression::TableSample(mut ts) => {
20906 if let Some(ref mut sample) = ts.sample {
20907 sample.suppress_method_output = true;
20908 sample.unit_after_size = false;
20909 sample.is_percent = false;
20910 }
20911 Ok(Expression::TableSample(ts))
20912 }
20913 Expression::Table(mut t) => {
20914 if let Some(ref mut sample) = t.table_sample {
20915 sample.suppress_method_output = true;
20916 sample.unit_after_size = false;
20917 sample.is_percent = false;
20918 }
20919 Ok(Expression::Table(t))
20920 }
20921 _ => Ok(e),
20922 }
20923 }
20924
20925 Action::FirstToAnyValue => {
20926 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
20927 if let Expression::First(mut agg) = e {
20928 agg.ignore_nulls = None;
20929 agg.name = Some("ANY_VALUE".to_string());
20930 Ok(Expression::AnyValue(agg))
20931 } else {
20932 Ok(e)
20933 }
20934 }
20935
20936 Action::ArrayIndexConvert => {
20937 // Subscript index: 1-based to 0-based for BigQuery
20938 if let Expression::Subscript(mut sub) = e {
20939 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
20940 if let Ok(val) = n.parse::<i64>() {
20941 sub.index =
20942 Expression::Literal(Literal::Number((val - 1).to_string()));
20943 }
20944 }
20945 Ok(Expression::Subscript(sub))
20946 } else {
20947 Ok(e)
20948 }
20949 }
20950
20951 Action::AnyValueIgnoreNulls => {
20952 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
20953 if let Expression::AnyValue(mut av) = e {
20954 if av.ignore_nulls.is_none() {
20955 av.ignore_nulls = Some(true);
20956 }
20957 Ok(Expression::AnyValue(av))
20958 } else {
20959 Ok(e)
20960 }
20961 }
20962
20963 Action::BigQueryNullsOrdering => {
20964 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
20965 if let Expression::WindowFunction(mut wf) = e {
20966 for o in &mut wf.over.order_by {
20967 o.nulls_first = None;
20968 }
20969 Ok(Expression::WindowFunction(wf))
20970 } else if let Expression::Ordered(mut o) = e {
20971 o.nulls_first = None;
20972 Ok(Expression::Ordered(o))
20973 } else {
20974 Ok(e)
20975 }
20976 }
20977
20978 Action::SnowflakeFloatProtect => {
20979 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
20980 // Snowflake's target transform from converting it to DOUBLE.
20981 // Non-Snowflake sources should keep their FLOAT spelling.
20982 if let Expression::DataType(DataType::Float { .. }) = e {
20983 Ok(Expression::DataType(DataType::Custom {
20984 name: "FLOAT".to_string(),
20985 }))
20986 } else {
20987 Ok(e)
20988 }
20989 }
20990
20991 Action::MysqlNullsOrdering => {
20992 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
20993 if let Expression::Ordered(mut o) = e {
20994 let nulls_last = o.nulls_first == Some(false);
20995 let desc = o.desc;
20996 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
20997 // If requested ordering matches default, just strip NULLS clause
20998 let matches_default = if desc {
20999 // DESC default is NULLS FIRST, so nulls_first=true matches
21000 o.nulls_first == Some(true)
21001 } else {
21002 // ASC default is NULLS LAST, so nulls_first=false matches
21003 nulls_last
21004 };
21005 if matches_default {
21006 o.nulls_first = None;
21007 Ok(Expression::Ordered(o))
21008 } else {
21009 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
21010 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
21011 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
21012 let null_val = if desc { 1 } else { 0 };
21013 let non_null_val = if desc { 0 } else { 1 };
21014 let _case_expr = Expression::Case(Box::new(Case {
21015 operand: None,
21016 whens: vec![(
21017 Expression::IsNull(Box::new(crate::expressions::IsNull {
21018 this: o.this.clone(),
21019 not: false,
21020 postfix_form: false,
21021 })),
21022 Expression::number(null_val),
21023 )],
21024 else_: Some(Expression::number(non_null_val)),
21025 comments: Vec::new(),
21026 inferred_type: None,
21027 }));
21028 o.nulls_first = None;
21029 // Return a tuple of [case_expr, ordered_expr]
21030 // We need to return both as part of the ORDER BY
21031 // But since transform_recursive processes individual expressions,
21032 // we can't easily add extra ORDER BY items here.
21033 // Instead, strip the nulls_first
21034 o.nulls_first = None;
21035 Ok(Expression::Ordered(o))
21036 }
21037 } else {
21038 Ok(e)
21039 }
21040 }
21041
21042 Action::MysqlNullsLastRewrite => {
21043 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
21044 // to simulate NULLS LAST for ASC ordering
21045 if let Expression::WindowFunction(mut wf) = e {
21046 let mut new_order_by = Vec::new();
21047 for o in wf.over.order_by {
21048 if !o.desc {
21049 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
21050 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
21051 let case_expr = Expression::Case(Box::new(Case {
21052 operand: None,
21053 whens: vec![(
21054 Expression::IsNull(Box::new(crate::expressions::IsNull {
21055 this: o.this.clone(),
21056 not: false,
21057 postfix_form: false,
21058 })),
21059 Expression::Literal(Literal::Number("1".to_string())),
21060 )],
21061 else_: Some(Expression::Literal(Literal::Number(
21062 "0".to_string(),
21063 ))),
21064 comments: Vec::new(),
21065 inferred_type: None,
21066 }));
21067 new_order_by.push(crate::expressions::Ordered {
21068 this: case_expr,
21069 desc: false,
21070 nulls_first: None,
21071 explicit_asc: false,
21072 with_fill: None,
21073 });
21074 let mut ordered = o;
21075 ordered.nulls_first = None;
21076 new_order_by.push(ordered);
21077 } else {
21078 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
21079 // No change needed
21080 let mut ordered = o;
21081 ordered.nulls_first = None;
21082 new_order_by.push(ordered);
21083 }
21084 }
21085 wf.over.order_by = new_order_by;
21086 Ok(Expression::WindowFunction(wf))
21087 } else {
21088 Ok(e)
21089 }
21090 }
21091
21092 Action::RespectNullsConvert => {
21093 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
21094 if let Expression::WindowFunction(mut wf) = e {
21095 match &mut wf.this {
21096 Expression::FirstValue(ref mut vf) => {
21097 if vf.ignore_nulls == Some(false) {
21098 vf.ignore_nulls = None;
21099 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
21100 // but that's handled by the generator's NULLS ordering
21101 }
21102 }
21103 Expression::LastValue(ref mut vf) => {
21104 if vf.ignore_nulls == Some(false) {
21105 vf.ignore_nulls = None;
21106 }
21107 }
21108 _ => {}
21109 }
21110 Ok(Expression::WindowFunction(wf))
21111 } else {
21112 Ok(e)
21113 }
21114 }
21115
21116 Action::CreateTableStripComment => {
21117 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
21118 if let Expression::CreateTable(mut ct) = e {
21119 for col in &mut ct.columns {
21120 col.comment = None;
21121 col.constraints.retain(|c| {
21122 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
21123 });
21124 // Also remove Comment from constraint_order
21125 col.constraint_order.retain(|c| {
21126 !matches!(c, crate::expressions::ConstraintType::Comment)
21127 });
21128 }
21129 // Strip properties (USING, PARTITIONED BY, etc.)
21130 ct.properties.clear();
21131 Ok(Expression::CreateTable(ct))
21132 } else {
21133 Ok(e)
21134 }
21135 }
21136
21137 Action::AlterTableToSpRename => {
21138 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
21139 if let Expression::AlterTable(ref at) = e {
21140 if let Some(crate::expressions::AlterTableAction::RenameTable(
21141 ref new_tbl,
21142 )) = at.actions.first()
21143 {
21144 // Build the old table name using TSQL bracket quoting
21145 let old_name = if let Some(ref schema) = at.name.schema {
21146 if at.name.name.quoted || schema.quoted {
21147 format!("[{}].[{}]", schema.name, at.name.name.name)
21148 } else {
21149 format!("{}.{}", schema.name, at.name.name.name)
21150 }
21151 } else {
21152 if at.name.name.quoted {
21153 format!("[{}]", at.name.name.name)
21154 } else {
21155 at.name.name.name.clone()
21156 }
21157 };
21158 let new_name = new_tbl.name.name.clone();
21159 // EXEC sp_rename 'old_name', 'new_name'
21160 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
21161 Ok(Expression::Raw(crate::expressions::Raw { sql }))
21162 } else {
21163 Ok(e)
21164 }
21165 } else {
21166 Ok(e)
21167 }
21168 }
21169
21170 Action::SnowflakeIntervalFormat => {
21171 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
21172 if let Expression::Interval(mut iv) = e {
21173 if let (
21174 Some(Expression::Literal(Literal::String(ref val))),
21175 Some(ref unit_spec),
21176 ) = (&iv.this, &iv.unit)
21177 {
21178 let unit_str = match unit_spec {
21179 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
21180 match unit {
21181 crate::expressions::IntervalUnit::Year => "YEAR",
21182 crate::expressions::IntervalUnit::Quarter => "QUARTER",
21183 crate::expressions::IntervalUnit::Month => "MONTH",
21184 crate::expressions::IntervalUnit::Week => "WEEK",
21185 crate::expressions::IntervalUnit::Day => "DAY",
21186 crate::expressions::IntervalUnit::Hour => "HOUR",
21187 crate::expressions::IntervalUnit::Minute => "MINUTE",
21188 crate::expressions::IntervalUnit::Second => "SECOND",
21189 crate::expressions::IntervalUnit::Millisecond => {
21190 "MILLISECOND"
21191 }
21192 crate::expressions::IntervalUnit::Microsecond => {
21193 "MICROSECOND"
21194 }
21195 crate::expressions::IntervalUnit::Nanosecond => {
21196 "NANOSECOND"
21197 }
21198 }
21199 }
21200 _ => "",
21201 };
21202 if !unit_str.is_empty() {
21203 let combined = format!("{} {}", val, unit_str);
21204 iv.this = Some(Expression::Literal(Literal::String(combined)));
21205 iv.unit = None;
21206 }
21207 }
21208 Ok(Expression::Interval(iv))
21209 } else {
21210 Ok(e)
21211 }
21212 }
21213
21214 Action::ArrayConcatBracketConvert => {
21215 // Expression::Array/ArrayFunc -> target-specific
21216 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
21217 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
21218 match e {
21219 Expression::Array(arr) => {
21220 if matches!(target, DialectType::Redshift) {
21221 Ok(Expression::Function(Box::new(Function::new(
21222 "ARRAY".to_string(),
21223 arr.expressions,
21224 ))))
21225 } else {
21226 Ok(Expression::ArrayFunc(Box::new(
21227 crate::expressions::ArrayConstructor {
21228 expressions: arr.expressions,
21229 bracket_notation: false,
21230 use_list_keyword: false,
21231 },
21232 )))
21233 }
21234 }
21235 Expression::ArrayFunc(arr) => {
21236 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
21237 if matches!(target, DialectType::Redshift) {
21238 Ok(Expression::Function(Box::new(Function::new(
21239 "ARRAY".to_string(),
21240 arr.expressions,
21241 ))))
21242 } else {
21243 Ok(Expression::ArrayFunc(arr))
21244 }
21245 }
21246 _ => Ok(e),
21247 }
21248 }
21249
21250 Action::BitAggFloatCast => {
21251 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
21252 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
21253 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
21254 let int_type = DataType::Int {
21255 length: None,
21256 integer_spelling: false,
21257 };
21258 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
21259 if let Expression::Cast(c) = agg_this {
21260 match &c.to {
21261 DataType::Float { .. }
21262 | DataType::Double { .. }
21263 | DataType::Custom { .. } => {
21264 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
21265 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
21266 let inner_type = match &c.to {
21267 DataType::Float {
21268 precision, scale, ..
21269 } => DataType::Float {
21270 precision: *precision,
21271 scale: *scale,
21272 real_spelling: true,
21273 },
21274 other => other.clone(),
21275 };
21276 let inner_cast =
21277 Expression::Cast(Box::new(crate::expressions::Cast {
21278 this: c.this.clone(),
21279 to: inner_type,
21280 trailing_comments: Vec::new(),
21281 double_colon_syntax: false,
21282 format: None,
21283 default: None,
21284 inferred_type: None,
21285 }));
21286 let rounded = Expression::Function(Box::new(Function::new(
21287 "ROUND".to_string(),
21288 vec![inner_cast],
21289 )));
21290 Expression::Cast(Box::new(crate::expressions::Cast {
21291 this: rounded,
21292 to: int_dt,
21293 trailing_comments: Vec::new(),
21294 double_colon_syntax: false,
21295 format: None,
21296 default: None,
21297 inferred_type: None,
21298 }))
21299 }
21300 DataType::Decimal { .. } => {
21301 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
21302 Expression::Cast(Box::new(crate::expressions::Cast {
21303 this: Expression::Cast(c),
21304 to: int_dt,
21305 trailing_comments: Vec::new(),
21306 double_colon_syntax: false,
21307 format: None,
21308 default: None,
21309 inferred_type: None,
21310 }))
21311 }
21312 _ => Expression::Cast(c),
21313 }
21314 } else {
21315 agg_this
21316 }
21317 };
21318 match e {
21319 Expression::BitwiseOrAgg(mut f) => {
21320 f.this = wrap_agg(f.this, int_type);
21321 Ok(Expression::BitwiseOrAgg(f))
21322 }
21323 Expression::BitwiseAndAgg(mut f) => {
21324 let int_type = DataType::Int {
21325 length: None,
21326 integer_spelling: false,
21327 };
21328 f.this = wrap_agg(f.this, int_type);
21329 Ok(Expression::BitwiseAndAgg(f))
21330 }
21331 Expression::BitwiseXorAgg(mut f) => {
21332 let int_type = DataType::Int {
21333 length: None,
21334 integer_spelling: false,
21335 };
21336 f.this = wrap_agg(f.this, int_type);
21337 Ok(Expression::BitwiseXorAgg(f))
21338 }
21339 _ => Ok(e),
21340 }
21341 }
21342
21343 Action::BitAggSnowflakeRename => {
21344 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
21345 match e {
21346 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
21347 Function::new("BITORAGG".to_string(), vec![f.this]),
21348 ))),
21349 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
21350 Function::new("BITANDAGG".to_string(), vec![f.this]),
21351 ))),
21352 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
21353 Function::new("BITXORAGG".to_string(), vec![f.this]),
21354 ))),
21355 _ => Ok(e),
21356 }
21357 }
21358
21359 Action::StrftimeCastTimestamp => {
21360 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
21361 if let Expression::Cast(mut c) = e {
21362 if matches!(
21363 c.to,
21364 DataType::Timestamp {
21365 timezone: false,
21366 ..
21367 }
21368 ) {
21369 c.to = DataType::Custom {
21370 name: "TIMESTAMP_NTZ".to_string(),
21371 };
21372 }
21373 Ok(Expression::Cast(c))
21374 } else {
21375 Ok(e)
21376 }
21377 }
21378
21379 Action::DecimalDefaultPrecision => {
21380 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
21381 if let Expression::Cast(mut c) = e {
21382 if matches!(
21383 c.to,
21384 DataType::Decimal {
21385 precision: None,
21386 ..
21387 }
21388 ) {
21389 c.to = DataType::Decimal {
21390 precision: Some(18),
21391 scale: Some(3),
21392 };
21393 }
21394 Ok(Expression::Cast(c))
21395 } else {
21396 Ok(e)
21397 }
21398 }
21399
21400 Action::FilterToIff => {
21401 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
21402 if let Expression::Filter(f) = e {
21403 let condition = *f.expression;
21404 let agg = *f.this;
21405 // Strip WHERE from condition
21406 let cond = match condition {
21407 Expression::Where(w) => w.this,
21408 other => other,
21409 };
21410 // Extract the aggregate function and its argument
21411 // We want AVG(IFF(condition, x, NULL))
21412 match agg {
21413 Expression::Function(mut func) => {
21414 if !func.args.is_empty() {
21415 let orig_arg = func.args[0].clone();
21416 let iff_call = Expression::Function(Box::new(Function::new(
21417 "IFF".to_string(),
21418 vec![cond, orig_arg, Expression::Null(Null)],
21419 )));
21420 func.args[0] = iff_call;
21421 Ok(Expression::Function(func))
21422 } else {
21423 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
21424 this: Box::new(Expression::Function(func)),
21425 expression: Box::new(cond),
21426 })))
21427 }
21428 }
21429 Expression::Avg(mut avg) => {
21430 let iff_call = Expression::Function(Box::new(Function::new(
21431 "IFF".to_string(),
21432 vec![cond, avg.this.clone(), Expression::Null(Null)],
21433 )));
21434 avg.this = iff_call;
21435 Ok(Expression::Avg(avg))
21436 }
21437 Expression::Sum(mut s) => {
21438 let iff_call = Expression::Function(Box::new(Function::new(
21439 "IFF".to_string(),
21440 vec![cond, s.this.clone(), Expression::Null(Null)],
21441 )));
21442 s.this = iff_call;
21443 Ok(Expression::Sum(s))
21444 }
21445 Expression::Count(mut c) => {
21446 if let Some(ref this_expr) = c.this {
21447 let iff_call = Expression::Function(Box::new(Function::new(
21448 "IFF".to_string(),
21449 vec![cond, this_expr.clone(), Expression::Null(Null)],
21450 )));
21451 c.this = Some(iff_call);
21452 }
21453 Ok(Expression::Count(c))
21454 }
21455 other => {
21456 // Fallback: keep as Filter
21457 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
21458 this: Box::new(other),
21459 expression: Box::new(cond),
21460 })))
21461 }
21462 }
21463 } else {
21464 Ok(e)
21465 }
21466 }
21467
21468 Action::AggFilterToIff => {
21469 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
21470 // Helper macro to handle the common AggFunc case
21471 macro_rules! handle_agg_filter_to_iff {
21472 ($variant:ident, $agg:expr) => {{
21473 let mut agg = $agg;
21474 if let Some(filter_cond) = agg.filter.take() {
21475 let iff_call = Expression::Function(Box::new(Function::new(
21476 "IFF".to_string(),
21477 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
21478 )));
21479 agg.this = iff_call;
21480 }
21481 Ok(Expression::$variant(agg))
21482 }};
21483 }
21484
21485 match e {
21486 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
21487 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
21488 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
21489 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
21490 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
21491 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
21492 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
21493 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
21494 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
21495 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
21496 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
21497 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
21498 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
21499 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
21500 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
21501 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
21502 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
21503 Expression::ApproxDistinct(agg) => {
21504 handle_agg_filter_to_iff!(ApproxDistinct, agg)
21505 }
21506 Expression::Count(mut c) => {
21507 if let Some(filter_cond) = c.filter.take() {
21508 if let Some(ref this_expr) = c.this {
21509 let iff_call = Expression::Function(Box::new(Function::new(
21510 "IFF".to_string(),
21511 vec![
21512 filter_cond,
21513 this_expr.clone(),
21514 Expression::Null(Null),
21515 ],
21516 )));
21517 c.this = Some(iff_call);
21518 }
21519 }
21520 Ok(Expression::Count(c))
21521 }
21522 other => Ok(other),
21523 }
21524 }
21525
21526 Action::JsonToGetPath => {
21527 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
21528 if let Expression::JsonExtract(je) = e {
21529 // Convert to PARSE_JSON() wrapper:
21530 // - JSON(x) -> PARSE_JSON(x)
21531 // - PARSE_JSON(x) -> keep as-is
21532 // - anything else -> wrap in PARSE_JSON()
21533 let this = match &je.this {
21534 Expression::Function(f)
21535 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
21536 {
21537 Expression::Function(Box::new(Function::new(
21538 "PARSE_JSON".to_string(),
21539 f.args.clone(),
21540 )))
21541 }
21542 Expression::Function(f)
21543 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
21544 {
21545 je.this.clone()
21546 }
21547 // GET_PATH result is already JSON, don't wrap
21548 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
21549 je.this.clone()
21550 }
21551 other => {
21552 // Wrap non-JSON expressions in PARSE_JSON()
21553 Expression::Function(Box::new(Function::new(
21554 "PARSE_JSON".to_string(),
21555 vec![other.clone()],
21556 )))
21557 }
21558 };
21559 // Convert path: extract key from JSONPath or strip $. prefix from string
21560 let path = match &je.path {
21561 Expression::JSONPath(jp) => {
21562 // Extract the key from JSONPath: $root.key -> 'key'
21563 let mut key_parts = Vec::new();
21564 for expr in &jp.expressions {
21565 match expr {
21566 Expression::JSONPathRoot(_) => {} // skip root
21567 Expression::JSONPathKey(k) => {
21568 if let Expression::Literal(Literal::String(s)) =
21569 &*k.this
21570 {
21571 key_parts.push(s.clone());
21572 }
21573 }
21574 _ => {}
21575 }
21576 }
21577 if !key_parts.is_empty() {
21578 Expression::Literal(Literal::String(key_parts.join(".")))
21579 } else {
21580 je.path.clone()
21581 }
21582 }
21583 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
21584 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
21585 Expression::Literal(Literal::String(stripped))
21586 }
21587 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
21588 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
21589 Expression::Literal(Literal::String(stripped))
21590 }
21591 _ => je.path.clone(),
21592 };
21593 Ok(Expression::Function(Box::new(Function::new(
21594 "GET_PATH".to_string(),
21595 vec![this, path],
21596 ))))
21597 } else {
21598 Ok(e)
21599 }
21600 }
21601
21602 Action::StructToRow => {
21603 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
21604 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
21605
21606 // Extract key-value pairs from either Struct or MapFunc
21607 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
21608 Expression::Struct(s) => Some(
21609 s.fields
21610 .iter()
21611 .map(|(opt_name, field_expr)| {
21612 if let Some(name) = opt_name {
21613 (name.clone(), field_expr.clone())
21614 } else if let Expression::NamedArgument(na) = field_expr {
21615 (na.name.name.clone(), na.value.clone())
21616 } else {
21617 (String::new(), field_expr.clone())
21618 }
21619 })
21620 .collect(),
21621 ),
21622 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
21623 m.keys
21624 .iter()
21625 .zip(m.values.iter())
21626 .map(|(key, value)| {
21627 let key_name = match key {
21628 Expression::Literal(Literal::String(s)) => s.clone(),
21629 Expression::Identifier(id) => id.name.clone(),
21630 _ => String::new(),
21631 };
21632 (key_name, value.clone())
21633 })
21634 .collect(),
21635 ),
21636 _ => None,
21637 };
21638
21639 if let Some(pairs) = kv_pairs {
21640 let mut named_args = Vec::new();
21641 for (key_name, value) in pairs {
21642 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
21643 named_args.push(Expression::Alias(Box::new(
21644 crate::expressions::Alias::new(
21645 value,
21646 Identifier::new(key_name),
21647 ),
21648 )));
21649 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
21650 named_args.push(value);
21651 } else {
21652 named_args.push(value);
21653 }
21654 }
21655
21656 if matches!(target, DialectType::BigQuery) {
21657 Ok(Expression::Function(Box::new(Function::new(
21658 "STRUCT".to_string(),
21659 named_args,
21660 ))))
21661 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
21662 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
21663 let row_func = Expression::Function(Box::new(Function::new(
21664 "ROW".to_string(),
21665 named_args,
21666 )));
21667
21668 // Try to infer types for each pair
21669 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
21670 Expression::Struct(s) => Some(
21671 s.fields
21672 .iter()
21673 .map(|(opt_name, field_expr)| {
21674 if let Some(name) = opt_name {
21675 (name.clone(), field_expr.clone())
21676 } else if let Expression::NamedArgument(na) = field_expr
21677 {
21678 (na.name.name.clone(), na.value.clone())
21679 } else {
21680 (String::new(), field_expr.clone())
21681 }
21682 })
21683 .collect(),
21684 ),
21685 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
21686 m.keys
21687 .iter()
21688 .zip(m.values.iter())
21689 .map(|(key, value)| {
21690 let key_name = match key {
21691 Expression::Literal(Literal::String(s)) => {
21692 s.clone()
21693 }
21694 Expression::Identifier(id) => id.name.clone(),
21695 _ => String::new(),
21696 };
21697 (key_name, value.clone())
21698 })
21699 .collect(),
21700 ),
21701 _ => None,
21702 };
21703
21704 if let Some(pairs) = kv_pairs_again {
21705 // Infer types for all values
21706 let mut all_inferred = true;
21707 let mut fields = Vec::new();
21708 for (name, value) in &pairs {
21709 let inferred_type = match value {
21710 Expression::Literal(Literal::Number(n)) => {
21711 if n.contains('.') {
21712 Some(DataType::Double {
21713 precision: None,
21714 scale: None,
21715 })
21716 } else {
21717 Some(DataType::Int {
21718 length: None,
21719 integer_spelling: true,
21720 })
21721 }
21722 }
21723 Expression::Literal(Literal::String(_)) => {
21724 Some(DataType::VarChar {
21725 length: None,
21726 parenthesized_length: false,
21727 })
21728 }
21729 Expression::Boolean(_) => Some(DataType::Boolean),
21730 _ => None,
21731 };
21732 if let Some(dt) = inferred_type {
21733 fields.push(crate::expressions::StructField::new(
21734 name.clone(),
21735 dt,
21736 ));
21737 } else {
21738 all_inferred = false;
21739 break;
21740 }
21741 }
21742
21743 if all_inferred && !fields.is_empty() {
21744 let row_type = DataType::Struct {
21745 fields,
21746 nested: true,
21747 };
21748 Ok(Expression::Cast(Box::new(Cast {
21749 this: row_func,
21750 to: row_type,
21751 trailing_comments: Vec::new(),
21752 double_colon_syntax: false,
21753 format: None,
21754 default: None,
21755 inferred_type: None,
21756 })))
21757 } else {
21758 Ok(row_func)
21759 }
21760 } else {
21761 Ok(row_func)
21762 }
21763 } else {
21764 Ok(Expression::Function(Box::new(Function::new(
21765 "ROW".to_string(),
21766 named_args,
21767 ))))
21768 }
21769 } else {
21770 Ok(e)
21771 }
21772 }
21773
21774 Action::SparkStructConvert => {
21775 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
21776 // or DuckDB {'name': val, ...}
21777 if let Expression::Function(f) = e {
21778 // Extract name-value pairs from aliased args
21779 let mut pairs: Vec<(String, Expression)> = Vec::new();
21780 for arg in &f.args {
21781 match arg {
21782 Expression::Alias(a) => {
21783 pairs.push((a.alias.name.clone(), a.this.clone()));
21784 }
21785 _ => {
21786 pairs.push((String::new(), arg.clone()));
21787 }
21788 }
21789 }
21790
21791 match target {
21792 DialectType::DuckDB => {
21793 // Convert to DuckDB struct literal {'name': value, ...}
21794 let mut keys = Vec::new();
21795 let mut values = Vec::new();
21796 for (name, value) in &pairs {
21797 keys.push(Expression::Literal(Literal::String(name.clone())));
21798 values.push(value.clone());
21799 }
21800 Ok(Expression::MapFunc(Box::new(
21801 crate::expressions::MapConstructor {
21802 keys,
21803 values,
21804 curly_brace_syntax: true,
21805 with_map_keyword: false,
21806 },
21807 )))
21808 }
21809 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21810 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
21811 let row_args: Vec<Expression> =
21812 pairs.iter().map(|(_, v)| v.clone()).collect();
21813 let row_func = Expression::Function(Box::new(Function::new(
21814 "ROW".to_string(),
21815 row_args,
21816 )));
21817
21818 // Infer types
21819 let mut all_inferred = true;
21820 let mut fields = Vec::new();
21821 for (name, value) in &pairs {
21822 let inferred_type = match value {
21823 Expression::Literal(Literal::Number(n)) => {
21824 if n.contains('.') {
21825 Some(DataType::Double {
21826 precision: None,
21827 scale: None,
21828 })
21829 } else {
21830 Some(DataType::Int {
21831 length: None,
21832 integer_spelling: true,
21833 })
21834 }
21835 }
21836 Expression::Literal(Literal::String(_)) => {
21837 Some(DataType::VarChar {
21838 length: None,
21839 parenthesized_length: false,
21840 })
21841 }
21842 Expression::Boolean(_) => Some(DataType::Boolean),
21843 _ => None,
21844 };
21845 if let Some(dt) = inferred_type {
21846 fields.push(crate::expressions::StructField::new(
21847 name.clone(),
21848 dt,
21849 ));
21850 } else {
21851 all_inferred = false;
21852 break;
21853 }
21854 }
21855
21856 if all_inferred && !fields.is_empty() {
21857 let row_type = DataType::Struct {
21858 fields,
21859 nested: true,
21860 };
21861 Ok(Expression::Cast(Box::new(Cast {
21862 this: row_func,
21863 to: row_type,
21864 trailing_comments: Vec::new(),
21865 double_colon_syntax: false,
21866 format: None,
21867 default: None,
21868 inferred_type: None,
21869 })))
21870 } else {
21871 Ok(row_func)
21872 }
21873 }
21874 _ => Ok(Expression::Function(f)),
21875 }
21876 } else {
21877 Ok(e)
21878 }
21879 }
21880
21881 Action::ApproxCountDistinctToApproxDistinct => {
21882 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
21883 if let Expression::ApproxCountDistinct(f) = e {
21884 Ok(Expression::ApproxDistinct(f))
21885 } else {
21886 Ok(e)
21887 }
21888 }
21889
21890 Action::CollectListToArrayAgg => {
21891 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
21892 if let Expression::AggregateFunction(f) = e {
21893 let filter_expr = if !f.args.is_empty() {
21894 let arg = f.args[0].clone();
21895 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
21896 this: arg,
21897 not: true,
21898 postfix_form: false,
21899 })))
21900 } else {
21901 None
21902 };
21903 let agg = crate::expressions::AggFunc {
21904 this: if f.args.is_empty() {
21905 Expression::Null(crate::expressions::Null)
21906 } else {
21907 f.args[0].clone()
21908 },
21909 distinct: f.distinct,
21910 order_by: f.order_by.clone(),
21911 filter: filter_expr,
21912 ignore_nulls: None,
21913 name: None,
21914 having_max: None,
21915 limit: None,
21916 inferred_type: None,
21917 };
21918 Ok(Expression::ArrayAgg(Box::new(agg)))
21919 } else {
21920 Ok(e)
21921 }
21922 }
21923
21924 Action::CollectSetConvert => {
21925 // COLLECT_SET(x) -> target-specific
21926 if let Expression::AggregateFunction(f) = e {
21927 match target {
21928 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
21929 crate::expressions::AggregateFunction {
21930 name: "SET_AGG".to_string(),
21931 args: f.args,
21932 distinct: false,
21933 order_by: f.order_by,
21934 filter: f.filter,
21935 limit: f.limit,
21936 ignore_nulls: f.ignore_nulls,
21937 inferred_type: None,
21938 },
21939 ))),
21940 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
21941 crate::expressions::AggregateFunction {
21942 name: "ARRAY_UNIQUE_AGG".to_string(),
21943 args: f.args,
21944 distinct: false,
21945 order_by: f.order_by,
21946 filter: f.filter,
21947 limit: f.limit,
21948 ignore_nulls: f.ignore_nulls,
21949 inferred_type: None,
21950 },
21951 ))),
21952 DialectType::Trino | DialectType::DuckDB => {
21953 let agg = crate::expressions::AggFunc {
21954 this: if f.args.is_empty() {
21955 Expression::Null(crate::expressions::Null)
21956 } else {
21957 f.args[0].clone()
21958 },
21959 distinct: true,
21960 order_by: Vec::new(),
21961 filter: None,
21962 ignore_nulls: None,
21963 name: None,
21964 having_max: None,
21965 limit: None,
21966 inferred_type: None,
21967 };
21968 Ok(Expression::ArrayAgg(Box::new(agg)))
21969 }
21970 _ => Ok(Expression::AggregateFunction(f)),
21971 }
21972 } else {
21973 Ok(e)
21974 }
21975 }
21976
21977 Action::PercentileConvert => {
21978 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
21979 if let Expression::AggregateFunction(f) = e {
21980 let name = match target {
21981 DialectType::DuckDB => "QUANTILE",
21982 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
21983 _ => "PERCENTILE",
21984 };
21985 Ok(Expression::AggregateFunction(Box::new(
21986 crate::expressions::AggregateFunction {
21987 name: name.to_string(),
21988 args: f.args,
21989 distinct: f.distinct,
21990 order_by: f.order_by,
21991 filter: f.filter,
21992 limit: f.limit,
21993 ignore_nulls: f.ignore_nulls,
21994 inferred_type: None,
21995 },
21996 )))
21997 } else {
21998 Ok(e)
21999 }
22000 }
22001
22002 Action::CorrIsnanWrap => {
22003 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
22004 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
22005 let corr_clone = e.clone();
22006 let isnan = Expression::Function(Box::new(Function::new(
22007 "ISNAN".to_string(),
22008 vec![corr_clone.clone()],
22009 )));
22010 let case_expr = Expression::Case(Box::new(Case {
22011 operand: None,
22012 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
22013 else_: Some(corr_clone),
22014 comments: Vec::new(),
22015 inferred_type: None,
22016 }));
22017 Ok(case_expr)
22018 }
22019
22020 Action::TruncToDateTrunc => {
22021 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
22022 if let Expression::Function(f) = e {
22023 if f.args.len() == 2 {
22024 let timestamp = f.args[0].clone();
22025 let unit_expr = f.args[1].clone();
22026
22027 if matches!(target, DialectType::ClickHouse) {
22028 // For ClickHouse, produce Expression::DateTrunc which the generator
22029 // outputs as DATE_TRUNC(...) without going through the ClickHouse
22030 // target transform that would convert it to dateTrunc
22031 let unit_str = Self::get_unit_str_static(&unit_expr);
22032 let dt_field = match unit_str.as_str() {
22033 "YEAR" => DateTimeField::Year,
22034 "MONTH" => DateTimeField::Month,
22035 "DAY" => DateTimeField::Day,
22036 "HOUR" => DateTimeField::Hour,
22037 "MINUTE" => DateTimeField::Minute,
22038 "SECOND" => DateTimeField::Second,
22039 "WEEK" => DateTimeField::Week,
22040 "QUARTER" => DateTimeField::Quarter,
22041 _ => DateTimeField::Custom(unit_str),
22042 };
22043 Ok(Expression::DateTrunc(Box::new(
22044 crate::expressions::DateTruncFunc {
22045 this: timestamp,
22046 unit: dt_field,
22047 },
22048 )))
22049 } else {
22050 let new_args = vec![unit_expr, timestamp];
22051 Ok(Expression::Function(Box::new(Function::new(
22052 "DATE_TRUNC".to_string(),
22053 new_args,
22054 ))))
22055 }
22056 } else {
22057 Ok(Expression::Function(f))
22058 }
22059 } else {
22060 Ok(e)
22061 }
22062 }
22063
22064 Action::ArrayContainsConvert => {
22065 if let Expression::ArrayContains(f) = e {
22066 match target {
22067 DialectType::Presto | DialectType::Trino => {
22068 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
22069 Ok(Expression::Function(Box::new(Function::new(
22070 "CONTAINS".to_string(),
22071 vec![f.this, f.expression],
22072 ))))
22073 }
22074 DialectType::Snowflake => {
22075 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
22076 let cast_val =
22077 Expression::Cast(Box::new(crate::expressions::Cast {
22078 this: f.expression,
22079 to: crate::expressions::DataType::Custom {
22080 name: "VARIANT".to_string(),
22081 },
22082 trailing_comments: Vec::new(),
22083 double_colon_syntax: false,
22084 format: None,
22085 default: None,
22086 inferred_type: None,
22087 }));
22088 Ok(Expression::Function(Box::new(Function::new(
22089 "ARRAY_CONTAINS".to_string(),
22090 vec![cast_val, f.this],
22091 ))))
22092 }
22093 _ => Ok(Expression::ArrayContains(f)),
22094 }
22095 } else {
22096 Ok(e)
22097 }
22098 }
22099
22100 Action::ArrayExceptConvert => {
22101 if let Expression::ArrayExcept(f) = e {
22102 let source_arr = f.this;
22103 let exclude_arr = f.expression;
22104 match target {
22105 DialectType::DuckDB => {
22106 // ARRAY_EXCEPT(source, exclude) -> complex CASE expression for DuckDB:
22107 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
22108 // ELSE LIST_TRANSFORM(LIST_FILTER(LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
22109 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
22110 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
22111 // pair -> pair[1])
22112 // END
22113
22114 // Build: source IS NULL
22115 let source_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22116 this: source_arr.clone(),
22117 not: false,
22118 postfix_form: false,
22119 }));
22120 // Build: exclude IS NULL
22121 let exclude_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22122 this: exclude_arr.clone(),
22123 not: false,
22124 postfix_form: false,
22125 }));
22126 // source IS NULL OR exclude IS NULL
22127 let null_check = Expression::Or(Box::new(crate::expressions::BinaryOp {
22128 left: source_is_null,
22129 right: exclude_is_null,
22130 left_comments: vec![],
22131 operator_comments: vec![],
22132 trailing_comments: vec![],
22133 inferred_type: None,
22134 }));
22135
22136 // GENERATE_SERIES(1, LENGTH(source))
22137 let length_source = Expression::Function(Box::new(Function::new(
22138 "LENGTH".to_string(),
22139 vec![source_arr.clone()],
22140 )));
22141 let gen_series = Expression::Function(Box::new(Function::new(
22142 "GENERATE_SERIES".to_string(),
22143 vec![Expression::number(1), length_source],
22144 )));
22145
22146 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
22147 let list_zip = Expression::Function(Box::new(Function::new(
22148 "LIST_ZIP".to_string(),
22149 vec![source_arr.clone(), gen_series],
22150 )));
22151
22152 // pair[1] - first element of pair
22153 let pair_col = Expression::column("pair");
22154 let pair_1 = Expression::Subscript(Box::new(crate::expressions::Subscript {
22155 this: pair_col.clone(),
22156 index: Expression::number(1),
22157 }));
22158 // pair[2] - second element of pair (index)
22159 let pair_2 = Expression::Subscript(Box::new(crate::expressions::Subscript {
22160 this: pair_col.clone(),
22161 index: Expression::number(2),
22162 }));
22163
22164 // source[1:pair[2]] - slice from 1 to pair[2]
22165 let source_slice = Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
22166 this: source_arr.clone(),
22167 start: Some(Expression::number(1)),
22168 end: Some(pair_2.clone()),
22169 }));
22170
22171 // e column for lambda
22172 let e_col = Expression::column("e");
22173
22174 // e IS NOT DISTINCT FROM pair[1] (for source slice filter)
22175 let is_not_distinct_1 = Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
22176 left: e_col.clone(),
22177 right: pair_1.clone(),
22178 left_comments: vec![],
22179 operator_comments: vec![],
22180 trailing_comments: vec![],
22181 inferred_type: None,
22182 }));
22183
22184 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
22185 let lambda_1 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22186 parameters: vec![crate::expressions::Identifier::new("e")],
22187 body: is_not_distinct_1,
22188 colon: false,
22189 parameter_types: vec![],
22190 }));
22191 let list_filter_source_slice = Expression::Function(Box::new(Function::new(
22192 "LIST_FILTER".to_string(),
22193 vec![source_slice, lambda_1],
22194 )));
22195 // LENGTH(LIST_FILTER(source[1:pair[2]], e -> ...))
22196 let len_source_slice = Expression::Function(Box::new(Function::new(
22197 "LENGTH".to_string(),
22198 vec![list_filter_source_slice],
22199 )));
22200
22201 // e IS NOT DISTINCT FROM pair[1] (for exclude filter)
22202 let is_not_distinct_2 = Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
22203 left: e_col.clone(),
22204 right: pair_1.clone(),
22205 left_comments: vec![],
22206 operator_comments: vec![],
22207 trailing_comments: vec![],
22208 inferred_type: None,
22209 }));
22210
22211 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
22212 let lambda_2 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22213 parameters: vec![crate::expressions::Identifier::new("e")],
22214 body: is_not_distinct_2,
22215 colon: false,
22216 parameter_types: vec![],
22217 }));
22218 let list_filter_exclude = Expression::Function(Box::new(Function::new(
22219 "LIST_FILTER".to_string(),
22220 vec![exclude_arr.clone(), lambda_2],
22221 )));
22222 // LENGTH(LIST_FILTER(exclude, e -> ...))
22223 let len_exclude = Expression::Function(Box::new(Function::new(
22224 "LENGTH".to_string(),
22225 vec![list_filter_exclude],
22226 )));
22227
22228 // LENGTH(...) > LENGTH(...)
22229 let gt_expr = Expression::Gt(Box::new(crate::expressions::BinaryOp {
22230 left: len_source_slice,
22231 right: len_exclude,
22232 left_comments: vec![],
22233 operator_comments: vec![],
22234 trailing_comments: vec![],
22235 inferred_type: None,
22236 }));
22237
22238 // Wrap in parens for the lambda body
22239 let gt_paren = Expression::Paren(Box::new(crate::expressions::Paren {
22240 this: gt_expr,
22241 trailing_comments: vec![],
22242 }));
22243
22244 // pair -> (LENGTH(...) > LENGTH(...))
22245 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22246 parameters: vec![crate::expressions::Identifier::new("pair")],
22247 body: gt_paren,
22248 colon: false,
22249 parameter_types: vec![],
22250 }));
22251
22252 // LIST_FILTER(LIST_ZIP(...), pair -> (...))
22253 let list_filter_outer = Expression::Function(Box::new(Function::new(
22254 "LIST_FILTER".to_string(),
22255 vec![list_zip, filter_lambda],
22256 )));
22257
22258 // pair -> pair[1] (for LIST_TRANSFORM)
22259 let transform_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22260 parameters: vec![crate::expressions::Identifier::new("pair")],
22261 body: pair_1.clone(),
22262 colon: false,
22263 parameter_types: vec![],
22264 }));
22265
22266 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22267 let list_transform = Expression::Function(Box::new(Function::new(
22268 "LIST_TRANSFORM".to_string(),
22269 vec![list_filter_outer, transform_lambda],
22270 )));
22271
22272 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_TRANSFORM(...) END
22273 Ok(Expression::Case(Box::new(Case {
22274 operand: None,
22275 whens: vec![(
22276 null_check,
22277 Expression::Null(Null),
22278 )],
22279 else_: Some(list_transform),
22280 comments: Vec::new(),
22281 inferred_type: None,
22282 })))
22283 }
22284 DialectType::Snowflake => {
22285 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
22286 Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
22287 this: source_arr,
22288 expression: exclude_arr,
22289 original_name: None,
22290 inferred_type: None,
22291 })))
22292 }
22293 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22294 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
22295 Ok(Expression::Function(Box::new(Function::new(
22296 "ARRAY_EXCEPT".to_string(),
22297 vec![source_arr, exclude_arr],
22298 ))))
22299 }
22300 _ => Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
22301 this: source_arr,
22302 expression: exclude_arr,
22303 original_name: None,
22304 inferred_type: None,
22305 }))),
22306 }
22307 } else {
22308 Ok(e)
22309 }
22310 }
22311
22312 Action::ArrayDistinctConvert => {
22313 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
22314 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
22315 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
22316 // ELSE LIST_DISTINCT(arr)
22317 // END
22318 if let Expression::ArrayDistinct(f) = e {
22319 let arr = f.this;
22320
22321 // ARRAY_LENGTH(arr)
22322 let array_length = Expression::Function(Box::new(Function::new(
22323 "ARRAY_LENGTH".to_string(),
22324 vec![arr.clone()],
22325 )));
22326 // LIST_COUNT(arr)
22327 let list_count = Expression::Function(Box::new(Function::new(
22328 "LIST_COUNT".to_string(),
22329 vec![arr.clone()],
22330 )));
22331 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
22332 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
22333 left: array_length,
22334 right: list_count,
22335 left_comments: vec![],
22336 operator_comments: vec![],
22337 trailing_comments: vec![],
22338 inferred_type: None,
22339 }));
22340
22341 // _u column
22342 let u_col = Expression::column("_u");
22343 // NOT _u IS NULL
22344 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22345 this: u_col.clone(),
22346 not: false,
22347 postfix_form: false,
22348 }));
22349 let not_u_is_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
22350 this: u_is_null,
22351 inferred_type: None,
22352 }));
22353 // _u -> NOT _u IS NULL
22354 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22355 parameters: vec![crate::expressions::Identifier::new("_u")],
22356 body: not_u_is_null,
22357 colon: false,
22358 parameter_types: vec![],
22359 }));
22360 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
22361 let list_filter = Expression::Function(Box::new(Function::new(
22362 "LIST_FILTER".to_string(),
22363 vec![arr.clone(), filter_lambda],
22364 )));
22365 // LIST_DISTINCT(LIST_FILTER(arr, ...))
22366 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
22367 "LIST_DISTINCT".to_string(),
22368 vec![list_filter],
22369 )));
22370 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
22371 let list_append = Expression::Function(Box::new(Function::new(
22372 "LIST_APPEND".to_string(),
22373 vec![list_distinct_filtered, Expression::Null(Null)],
22374 )));
22375
22376 // LIST_DISTINCT(arr)
22377 let list_distinct = Expression::Function(Box::new(Function::new(
22378 "LIST_DISTINCT".to_string(),
22379 vec![arr],
22380 )));
22381
22382 // CASE WHEN neq THEN list_append ELSE list_distinct END
22383 Ok(Expression::Case(Box::new(Case {
22384 operand: None,
22385 whens: vec![(neq, list_append)],
22386 else_: Some(list_distinct),
22387 comments: Vec::new(),
22388 inferred_type: None,
22389 })))
22390 } else {
22391 Ok(e)
22392 }
22393 }
22394
22395 Action::ArrayContainsDuckDBConvert => {
22396 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
22397 // CASE WHEN value IS NULL
22398 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
22399 // ELSE ARRAY_CONTAINS(array, value)
22400 // END
22401 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
22402 if let Expression::ArrayContains(f) = e {
22403 let value = f.this;
22404 let array = f.expression;
22405
22406 // value IS NULL
22407 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22408 this: value.clone(),
22409 not: false,
22410 postfix_form: false,
22411 }));
22412
22413 // ARRAY_LENGTH(array)
22414 let array_length = Expression::Function(Box::new(Function::new(
22415 "ARRAY_LENGTH".to_string(),
22416 vec![array.clone()],
22417 )));
22418 // LIST_COUNT(array)
22419 let list_count = Expression::Function(Box::new(Function::new(
22420 "LIST_COUNT".to_string(),
22421 vec![array.clone()],
22422 )));
22423 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
22424 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
22425 left: array_length,
22426 right: list_count,
22427 left_comments: vec![],
22428 operator_comments: vec![],
22429 trailing_comments: vec![],
22430 inferred_type: None,
22431 }));
22432 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
22433 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
22434 this: Box::new(neq),
22435 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
22436 }));
22437
22438 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
22439 let array_contains = Expression::Function(Box::new(Function::new(
22440 "ARRAY_CONTAINS".to_string(),
22441 vec![array, value],
22442 )));
22443
22444 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
22445 Ok(Expression::Case(Box::new(Case {
22446 operand: None,
22447 whens: vec![(value_is_null, nullif)],
22448 else_: Some(array_contains),
22449 comments: Vec::new(),
22450 inferred_type: None,
22451 })))
22452 } else {
22453 Ok(e)
22454 }
22455 }
22456
22457 Action::StrPositionExpand => {
22458 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
22459 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
22460 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
22461 if let Expression::StrPosition(sp) = e {
22462 let crate::expressions::StrPosition {
22463 this,
22464 substr,
22465 position,
22466 occurrence,
22467 } = *sp;
22468 let string = *this;
22469 let substr_expr = match substr {
22470 Some(s) => *s,
22471 None => Expression::Null(Null),
22472 };
22473 let pos = match position {
22474 Some(p) => *p,
22475 None => Expression::number(1),
22476 };
22477
22478 // SUBSTRING(string, pos)
22479 let substring_call = Expression::Function(Box::new(Function::new(
22480 "SUBSTRING".to_string(),
22481 vec![string.clone(), pos.clone()],
22482 )));
22483 // STRPOS(SUBSTRING(string, pos), substr)
22484 let strpos_call = Expression::Function(Box::new(Function::new(
22485 "STRPOS".to_string(),
22486 vec![substring_call, substr_expr.clone()],
22487 )));
22488 // STRPOS(...) + pos - 1
22489 let pos_adjusted =
22490 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
22491 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
22492 strpos_call.clone(),
22493 pos.clone(),
22494 ))),
22495 Expression::number(1),
22496 )));
22497 // STRPOS(...) = 0
22498 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
22499 strpos_call.clone(),
22500 Expression::number(0),
22501 )));
22502
22503 match target {
22504 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22505 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
22506 Ok(Expression::Function(Box::new(Function::new(
22507 "IF".to_string(),
22508 vec![is_zero, Expression::number(0), pos_adjusted],
22509 ))))
22510 }
22511 DialectType::DuckDB => {
22512 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
22513 Ok(Expression::Case(Box::new(Case {
22514 operand: None,
22515 whens: vec![(is_zero, Expression::number(0))],
22516 else_: Some(pos_adjusted),
22517 comments: Vec::new(),
22518 inferred_type: None,
22519 })))
22520 }
22521 _ => {
22522 // Reconstruct StrPosition
22523 Ok(Expression::StrPosition(Box::new(
22524 crate::expressions::StrPosition {
22525 this: Box::new(string),
22526 substr: Some(Box::new(substr_expr)),
22527 position: Some(Box::new(pos)),
22528 occurrence,
22529 },
22530 )))
22531 }
22532 }
22533 } else {
22534 Ok(e)
22535 }
22536 }
22537
22538 Action::MonthsBetweenConvert => {
22539 if let Expression::MonthsBetween(mb) = e {
22540 let crate::expressions::BinaryFunc {
22541 this: end_date,
22542 expression: start_date,
22543 ..
22544 } = *mb;
22545 match target {
22546 DialectType::DuckDB => {
22547 let cast_end = Self::ensure_cast_date(end_date);
22548 let cast_start = Self::ensure_cast_date(start_date);
22549 let dd = Expression::Function(Box::new(Function::new(
22550 "DATE_DIFF".to_string(),
22551 vec![
22552 Expression::string("MONTH"),
22553 cast_start.clone(),
22554 cast_end.clone(),
22555 ],
22556 )));
22557 let day_end = Expression::Function(Box::new(Function::new(
22558 "DAY".to_string(),
22559 vec![cast_end.clone()],
22560 )));
22561 let day_start = Expression::Function(Box::new(Function::new(
22562 "DAY".to_string(),
22563 vec![cast_start.clone()],
22564 )));
22565 let last_day_end = Expression::Function(Box::new(Function::new(
22566 "LAST_DAY".to_string(),
22567 vec![cast_end.clone()],
22568 )));
22569 let last_day_start = Expression::Function(Box::new(Function::new(
22570 "LAST_DAY".to_string(),
22571 vec![cast_start.clone()],
22572 )));
22573 let day_last_end = Expression::Function(Box::new(Function::new(
22574 "DAY".to_string(),
22575 vec![last_day_end],
22576 )));
22577 let day_last_start = Expression::Function(Box::new(Function::new(
22578 "DAY".to_string(),
22579 vec![last_day_start],
22580 )));
22581 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
22582 day_end.clone(),
22583 day_last_end,
22584 )));
22585 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
22586 day_start.clone(),
22587 day_last_start,
22588 )));
22589 let both_cond =
22590 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
22591 let day_diff =
22592 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
22593 let day_diff_paren =
22594 Expression::Paren(Box::new(crate::expressions::Paren {
22595 this: day_diff,
22596 trailing_comments: Vec::new(),
22597 }));
22598 let frac = Expression::Div(Box::new(BinaryOp::new(
22599 day_diff_paren,
22600 Expression::Literal(Literal::Number("31.0".to_string())),
22601 )));
22602 let case_expr = Expression::Case(Box::new(Case {
22603 operand: None,
22604 whens: vec![(both_cond, Expression::number(0))],
22605 else_: Some(frac),
22606 comments: Vec::new(),
22607 inferred_type: None,
22608 }));
22609 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
22610 }
22611 DialectType::Snowflake | DialectType::Redshift => {
22612 let unit = Expression::Identifier(Identifier::new("MONTH"));
22613 Ok(Expression::Function(Box::new(Function::new(
22614 "DATEDIFF".to_string(),
22615 vec![unit, start_date, end_date],
22616 ))))
22617 }
22618 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22619 Ok(Expression::Function(Box::new(Function::new(
22620 "DATE_DIFF".to_string(),
22621 vec![Expression::string("MONTH"), start_date, end_date],
22622 ))))
22623 }
22624 _ => Ok(Expression::MonthsBetween(Box::new(
22625 crate::expressions::BinaryFunc {
22626 this: end_date,
22627 expression: start_date,
22628 original_name: None,
22629 inferred_type: None,
22630 },
22631 ))),
22632 }
22633 } else {
22634 Ok(e)
22635 }
22636 }
22637
22638 Action::AddMonthsConvert => {
22639 if let Expression::AddMonths(am) = e {
22640 let date = am.this;
22641 let val = am.expression;
22642 match target {
22643 DialectType::TSQL | DialectType::Fabric => {
22644 let cast_date = Self::ensure_cast_datetime2(date);
22645 Ok(Expression::Function(Box::new(Function::new(
22646 "DATEADD".to_string(),
22647 vec![
22648 Expression::Identifier(Identifier::new("MONTH")),
22649 val,
22650 cast_date,
22651 ],
22652 ))))
22653 }
22654 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
22655 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
22656 // Optionally wrapped in CAST(... AS type) if the input had a specific type
22657
22658 // Determine the cast type from the date expression
22659 let (cast_date, return_type) = match &date {
22660 Expression::Literal(Literal::String(_)) => {
22661 // String literal: CAST(str AS TIMESTAMP), no outer CAST
22662 (
22663 Expression::Cast(Box::new(Cast {
22664 this: date.clone(),
22665 to: DataType::Timestamp {
22666 precision: None,
22667 timezone: false,
22668 },
22669 trailing_comments: Vec::new(),
22670 double_colon_syntax: false,
22671 format: None,
22672 default: None,
22673 inferred_type: None,
22674 })),
22675 None,
22676 )
22677 }
22678 Expression::Cast(c) => {
22679 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
22680 (date.clone(), Some(c.to.clone()))
22681 }
22682 _ => {
22683 // Expression or NULL::TYPE - keep as-is, check for cast type
22684 if let Expression::Cast(c) = &date {
22685 (date.clone(), Some(c.to.clone()))
22686 } else {
22687 (date.clone(), None)
22688 }
22689 }
22690 };
22691
22692 // Build the interval expression
22693 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
22694 // For integer values, use INTERVAL val MONTH
22695 let is_non_integer_val = match &val {
22696 Expression::Literal(Literal::Number(n)) => n.contains('.'),
22697 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
22698 Expression::Neg(n) => {
22699 if let Expression::Literal(Literal::Number(s)) = &n.this {
22700 s.contains('.')
22701 } else {
22702 false
22703 }
22704 }
22705 _ => false,
22706 };
22707
22708 let add_interval = if is_non_integer_val {
22709 // TO_MONTHS(CAST(ROUND(val) AS INT))
22710 let round_val = Expression::Function(Box::new(Function::new(
22711 "ROUND".to_string(),
22712 vec![val.clone()],
22713 )));
22714 let cast_int = Expression::Cast(Box::new(Cast {
22715 this: round_val,
22716 to: DataType::Int {
22717 length: None,
22718 integer_spelling: false,
22719 },
22720 trailing_comments: Vec::new(),
22721 double_colon_syntax: false,
22722 format: None,
22723 default: None,
22724 inferred_type: None,
22725 }));
22726 Expression::Function(Box::new(Function::new(
22727 "TO_MONTHS".to_string(),
22728 vec![cast_int],
22729 )))
22730 } else {
22731 // INTERVAL val MONTH
22732 // For negative numbers, wrap in parens
22733 let interval_val = match &val {
22734 Expression::Literal(Literal::Number(n))
22735 if n.starts_with('-') =>
22736 {
22737 Expression::Paren(Box::new(Paren {
22738 this: val.clone(),
22739 trailing_comments: Vec::new(),
22740 }))
22741 }
22742 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
22743 this: val.clone(),
22744 trailing_comments: Vec::new(),
22745 })),
22746 Expression::Null(_) => Expression::Paren(Box::new(Paren {
22747 this: val.clone(),
22748 trailing_comments: Vec::new(),
22749 })),
22750 _ => val.clone(),
22751 };
22752 Expression::Interval(Box::new(crate::expressions::Interval {
22753 this: Some(interval_val),
22754 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22755 unit: crate::expressions::IntervalUnit::Month,
22756 use_plural: false,
22757 }),
22758 }))
22759 };
22760
22761 // Build: date + interval
22762 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
22763 cast_date.clone(),
22764 add_interval.clone(),
22765 )));
22766
22767 // Build LAST_DAY(date)
22768 let last_day_date = Expression::Function(Box::new(Function::new(
22769 "LAST_DAY".to_string(),
22770 vec![cast_date.clone()],
22771 )));
22772
22773 // Build LAST_DAY(date + interval)
22774 let last_day_date_plus =
22775 Expression::Function(Box::new(Function::new(
22776 "LAST_DAY".to_string(),
22777 vec![date_plus_interval.clone()],
22778 )));
22779
22780 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
22781 let case_expr = Expression::Case(Box::new(Case {
22782 operand: None,
22783 whens: vec![(
22784 Expression::Eq(Box::new(BinaryOp::new(
22785 last_day_date,
22786 cast_date.clone(),
22787 ))),
22788 last_day_date_plus,
22789 )],
22790 else_: Some(date_plus_interval),
22791 comments: Vec::new(),
22792 inferred_type: None,
22793 }));
22794
22795 // Wrap in CAST(... AS type) if needed
22796 if let Some(dt) = return_type {
22797 Ok(Expression::Cast(Box::new(Cast {
22798 this: case_expr,
22799 to: dt,
22800 trailing_comments: Vec::new(),
22801 double_colon_syntax: false,
22802 format: None,
22803 default: None,
22804 inferred_type: None,
22805 })))
22806 } else {
22807 Ok(case_expr)
22808 }
22809 }
22810 DialectType::DuckDB => {
22811 // Non-Snowflake source: simple date + INTERVAL
22812 let cast_date =
22813 if matches!(&date, Expression::Literal(Literal::String(_))) {
22814 Expression::Cast(Box::new(Cast {
22815 this: date,
22816 to: DataType::Timestamp {
22817 precision: None,
22818 timezone: false,
22819 },
22820 trailing_comments: Vec::new(),
22821 double_colon_syntax: false,
22822 format: None,
22823 default: None,
22824 inferred_type: None,
22825 }))
22826 } else {
22827 date
22828 };
22829 let interval =
22830 Expression::Interval(Box::new(crate::expressions::Interval {
22831 this: Some(val),
22832 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22833 unit: crate::expressions::IntervalUnit::Month,
22834 use_plural: false,
22835 }),
22836 }));
22837 Ok(Expression::Add(Box::new(BinaryOp::new(
22838 cast_date, interval,
22839 ))))
22840 }
22841 DialectType::Snowflake => {
22842 // Keep ADD_MONTHS when source is also Snowflake
22843 if matches!(source, DialectType::Snowflake) {
22844 Ok(Expression::Function(Box::new(Function::new(
22845 "ADD_MONTHS".to_string(),
22846 vec![date, val],
22847 ))))
22848 } else {
22849 Ok(Expression::Function(Box::new(Function::new(
22850 "DATEADD".to_string(),
22851 vec![
22852 Expression::Identifier(Identifier::new("MONTH")),
22853 val,
22854 date,
22855 ],
22856 ))))
22857 }
22858 }
22859 DialectType::Redshift => {
22860 Ok(Expression::Function(Box::new(Function::new(
22861 "DATEADD".to_string(),
22862 vec![
22863 Expression::Identifier(Identifier::new("MONTH")),
22864 val,
22865 date,
22866 ],
22867 ))))
22868 }
22869 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22870 let cast_date =
22871 if matches!(&date, Expression::Literal(Literal::String(_))) {
22872 Expression::Cast(Box::new(Cast {
22873 this: date,
22874 to: DataType::Timestamp {
22875 precision: None,
22876 timezone: false,
22877 },
22878 trailing_comments: Vec::new(),
22879 double_colon_syntax: false,
22880 format: None,
22881 default: None,
22882 inferred_type: None,
22883 }))
22884 } else {
22885 date
22886 };
22887 Ok(Expression::Function(Box::new(Function::new(
22888 "DATE_ADD".to_string(),
22889 vec![Expression::string("MONTH"), val, cast_date],
22890 ))))
22891 }
22892 DialectType::BigQuery => {
22893 let interval =
22894 Expression::Interval(Box::new(crate::expressions::Interval {
22895 this: Some(val),
22896 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22897 unit: crate::expressions::IntervalUnit::Month,
22898 use_plural: false,
22899 }),
22900 }));
22901 let cast_date =
22902 if matches!(&date, Expression::Literal(Literal::String(_))) {
22903 Expression::Cast(Box::new(Cast {
22904 this: date,
22905 to: DataType::Custom {
22906 name: "DATETIME".to_string(),
22907 },
22908 trailing_comments: Vec::new(),
22909 double_colon_syntax: false,
22910 format: None,
22911 default: None,
22912 inferred_type: None,
22913 }))
22914 } else {
22915 date
22916 };
22917 Ok(Expression::Function(Box::new(Function::new(
22918 "DATE_ADD".to_string(),
22919 vec![cast_date, interval],
22920 ))))
22921 }
22922 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22923 Ok(Expression::Function(Box::new(Function::new(
22924 "ADD_MONTHS".to_string(),
22925 vec![date, val],
22926 ))))
22927 }
22928 _ => {
22929 // Default: keep as AddMonths expression
22930 Ok(Expression::AddMonths(Box::new(
22931 crate::expressions::BinaryFunc {
22932 this: date,
22933 expression: val,
22934 original_name: None,
22935 inferred_type: None,
22936 },
22937 )))
22938 }
22939 }
22940 } else {
22941 Ok(e)
22942 }
22943 }
22944
22945 Action::PercentileContConvert => {
22946 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
22947 // Presto/Trino: APPROX_PERCENTILE(col, p)
22948 // Spark/Databricks: PERCENTILE_APPROX(col, p)
22949 if let Expression::WithinGroup(wg) = e {
22950 // Extract percentile value and order by column
22951 let (percentile, _is_disc) = match &wg.this {
22952 Expression::Function(f) => {
22953 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
22954 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
22955 Literal::Number("0.5".to_string()),
22956 ));
22957 (pct, is_disc)
22958 }
22959 Expression::AggregateFunction(af) => {
22960 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
22961 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
22962 Literal::Number("0.5".to_string()),
22963 ));
22964 (pct, is_disc)
22965 }
22966 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
22967 _ => return Ok(Expression::WithinGroup(wg)),
22968 };
22969 let col = wg
22970 .order_by
22971 .first()
22972 .map(|o| o.this.clone())
22973 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
22974
22975 let func_name = match target {
22976 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22977 "APPROX_PERCENTILE"
22978 }
22979 _ => "PERCENTILE_APPROX", // Spark, Databricks
22980 };
22981 Ok(Expression::Function(Box::new(Function::new(
22982 func_name.to_string(),
22983 vec![col, percentile],
22984 ))))
22985 } else {
22986 Ok(e)
22987 }
22988 }
22989
22990 Action::CurrentUserSparkParens => {
22991 // CURRENT_USER -> CURRENT_USER() for Spark
22992 if let Expression::CurrentUser(_) = e {
22993 Ok(Expression::Function(Box::new(Function::new(
22994 "CURRENT_USER".to_string(),
22995 vec![],
22996 ))))
22997 } else {
22998 Ok(e)
22999 }
23000 }
23001
23002 Action::SparkDateFuncCast => {
23003 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
23004 let cast_arg = |arg: Expression| -> Expression {
23005 match target {
23006 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23007 Self::double_cast_timestamp_date(arg)
23008 }
23009 _ => {
23010 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
23011 Self::ensure_cast_date(arg)
23012 }
23013 }
23014 };
23015 match e {
23016 Expression::Month(f) => Ok(Expression::Month(Box::new(
23017 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23018 ))),
23019 Expression::Year(f) => Ok(Expression::Year(Box::new(
23020 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23021 ))),
23022 Expression::Day(f) => Ok(Expression::Day(Box::new(
23023 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23024 ))),
23025 other => Ok(other),
23026 }
23027 }
23028
23029 Action::MapFromArraysConvert => {
23030 // Expression::MapFromArrays -> target-specific
23031 if let Expression::MapFromArrays(mfa) = e {
23032 let keys = mfa.this;
23033 let values = mfa.expression;
23034 match target {
23035 DialectType::Snowflake => Ok(Expression::Function(Box::new(
23036 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
23037 ))),
23038 _ => {
23039 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
23040 Ok(Expression::Function(Box::new(Function::new(
23041 "MAP".to_string(),
23042 vec![keys, values],
23043 ))))
23044 }
23045 }
23046 } else {
23047 Ok(e)
23048 }
23049 }
23050
23051 Action::AnyToExists => {
23052 if let Expression::Any(q) = e {
23053 if let Some(op) = q.op.clone() {
23054 let lambda_param = crate::expressions::Identifier::new("x");
23055 let rhs = Expression::Identifier(lambda_param.clone());
23056 let body = match op {
23057 crate::expressions::QuantifiedOp::Eq => {
23058 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
23059 }
23060 crate::expressions::QuantifiedOp::Neq => {
23061 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
23062 }
23063 crate::expressions::QuantifiedOp::Lt => {
23064 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
23065 }
23066 crate::expressions::QuantifiedOp::Lte => {
23067 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
23068 }
23069 crate::expressions::QuantifiedOp::Gt => {
23070 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
23071 }
23072 crate::expressions::QuantifiedOp::Gte => {
23073 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
23074 }
23075 };
23076 let lambda =
23077 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23078 parameters: vec![lambda_param],
23079 body,
23080 colon: false,
23081 parameter_types: Vec::new(),
23082 }));
23083 Ok(Expression::Function(Box::new(Function::new(
23084 "EXISTS".to_string(),
23085 vec![q.subquery, lambda],
23086 ))))
23087 } else {
23088 Ok(Expression::Any(q))
23089 }
23090 } else {
23091 Ok(e)
23092 }
23093 }
23094
23095 Action::GenerateSeriesConvert => {
23096 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
23097 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
23098 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
23099 if let Expression::Function(f) = e {
23100 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
23101 let start = f.args[0].clone();
23102 let end = f.args[1].clone();
23103 let step = f.args.get(2).cloned();
23104
23105 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
23106 let step = step.map(|s| Self::normalize_interval_string(s, target));
23107
23108 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
23109 let maybe_cast_timestamp = |arg: Expression| -> Expression {
23110 if matches!(
23111 target,
23112 DialectType::Presto
23113 | DialectType::Trino
23114 | DialectType::Athena
23115 | DialectType::Spark
23116 | DialectType::Databricks
23117 | DialectType::Hive
23118 ) {
23119 match &arg {
23120 Expression::CurrentTimestamp(_) => {
23121 Expression::Cast(Box::new(Cast {
23122 this: arg,
23123 to: DataType::Timestamp {
23124 precision: None,
23125 timezone: false,
23126 },
23127 trailing_comments: Vec::new(),
23128 double_colon_syntax: false,
23129 format: None,
23130 default: None,
23131 inferred_type: None,
23132 }))
23133 }
23134 _ => arg,
23135 }
23136 } else {
23137 arg
23138 }
23139 };
23140
23141 let start = maybe_cast_timestamp(start);
23142 let end = maybe_cast_timestamp(end);
23143
23144 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
23145 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23146 let mut gs_args = vec![start, end];
23147 if let Some(step) = step {
23148 gs_args.push(step);
23149 }
23150 return Ok(Expression::Function(Box::new(Function::new(
23151 "GENERATE_SERIES".to_string(),
23152 gs_args,
23153 ))));
23154 }
23155
23156 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
23157 if matches!(target, DialectType::DuckDB) {
23158 let mut gs_args = vec![start, end];
23159 if let Some(step) = step {
23160 gs_args.push(step);
23161 }
23162 let gs = Expression::Function(Box::new(Function::new(
23163 "GENERATE_SERIES".to_string(),
23164 gs_args,
23165 )));
23166 return Ok(Expression::Function(Box::new(Function::new(
23167 "UNNEST".to_string(),
23168 vec![gs],
23169 ))));
23170 }
23171
23172 let mut seq_args = vec![start, end];
23173 if let Some(step) = step {
23174 seq_args.push(step);
23175 }
23176
23177 let seq = Expression::Function(Box::new(Function::new(
23178 "SEQUENCE".to_string(),
23179 seq_args,
23180 )));
23181
23182 match target {
23183 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23184 // Wrap in UNNEST
23185 Ok(Expression::Function(Box::new(Function::new(
23186 "UNNEST".to_string(),
23187 vec![seq],
23188 ))))
23189 }
23190 DialectType::Spark
23191 | DialectType::Databricks
23192 | DialectType::Hive => {
23193 // Wrap in EXPLODE
23194 Ok(Expression::Function(Box::new(Function::new(
23195 "EXPLODE".to_string(),
23196 vec![seq],
23197 ))))
23198 }
23199 _ => {
23200 // Just SEQUENCE for others
23201 Ok(seq)
23202 }
23203 }
23204 } else {
23205 Ok(Expression::Function(f))
23206 }
23207 } else {
23208 Ok(e)
23209 }
23210 }
23211
23212 Action::ConcatCoalesceWrap => {
23213 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
23214 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
23215 if let Expression::Function(f) = e {
23216 if f.name.eq_ignore_ascii_case("CONCAT") {
23217 let new_args: Vec<Expression> = f
23218 .args
23219 .into_iter()
23220 .map(|arg| {
23221 let cast_arg = if matches!(
23222 target,
23223 DialectType::Presto
23224 | DialectType::Trino
23225 | DialectType::Athena
23226 ) {
23227 Expression::Cast(Box::new(Cast {
23228 this: arg,
23229 to: DataType::VarChar {
23230 length: None,
23231 parenthesized_length: false,
23232 },
23233 trailing_comments: Vec::new(),
23234 double_colon_syntax: false,
23235 format: None,
23236 default: None,
23237 inferred_type: None,
23238 }))
23239 } else {
23240 arg
23241 };
23242 Expression::Function(Box::new(Function::new(
23243 "COALESCE".to_string(),
23244 vec![cast_arg, Expression::string("")],
23245 )))
23246 })
23247 .collect();
23248 Ok(Expression::Function(Box::new(Function::new(
23249 "CONCAT".to_string(),
23250 new_args,
23251 ))))
23252 } else {
23253 Ok(Expression::Function(f))
23254 }
23255 } else {
23256 Ok(e)
23257 }
23258 }
23259
23260 Action::PipeConcatToConcat => {
23261 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
23262 if let Expression::Concat(op) = e {
23263 let cast_left = Expression::Cast(Box::new(Cast {
23264 this: op.left,
23265 to: DataType::VarChar {
23266 length: None,
23267 parenthesized_length: false,
23268 },
23269 trailing_comments: Vec::new(),
23270 double_colon_syntax: false,
23271 format: None,
23272 default: None,
23273 inferred_type: None,
23274 }));
23275 let cast_right = Expression::Cast(Box::new(Cast {
23276 this: op.right,
23277 to: DataType::VarChar {
23278 length: None,
23279 parenthesized_length: false,
23280 },
23281 trailing_comments: Vec::new(),
23282 double_colon_syntax: false,
23283 format: None,
23284 default: None,
23285 inferred_type: None,
23286 }));
23287 Ok(Expression::Function(Box::new(Function::new(
23288 "CONCAT".to_string(),
23289 vec![cast_left, cast_right],
23290 ))))
23291 } else {
23292 Ok(e)
23293 }
23294 }
23295
23296 Action::DivFuncConvert => {
23297 // DIV(a, b) -> target-specific integer division
23298 if let Expression::Function(f) = e {
23299 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
23300 let a = f.args[0].clone();
23301 let b = f.args[1].clone();
23302 match target {
23303 DialectType::DuckDB => {
23304 // DIV(a, b) -> CAST(a // b AS DECIMAL)
23305 let int_div = Expression::IntDiv(Box::new(
23306 crate::expressions::BinaryFunc {
23307 this: a,
23308 expression: b,
23309 original_name: None,
23310 inferred_type: None,
23311 },
23312 ));
23313 Ok(Expression::Cast(Box::new(Cast {
23314 this: int_div,
23315 to: DataType::Decimal {
23316 precision: None,
23317 scale: None,
23318 },
23319 trailing_comments: Vec::new(),
23320 double_colon_syntax: false,
23321 format: None,
23322 default: None,
23323 inferred_type: None,
23324 })))
23325 }
23326 DialectType::BigQuery => {
23327 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
23328 let div_func = Expression::Function(Box::new(Function::new(
23329 "DIV".to_string(),
23330 vec![a, b],
23331 )));
23332 Ok(Expression::Cast(Box::new(Cast {
23333 this: div_func,
23334 to: DataType::Custom {
23335 name: "NUMERIC".to_string(),
23336 },
23337 trailing_comments: Vec::new(),
23338 double_colon_syntax: false,
23339 format: None,
23340 default: None,
23341 inferred_type: None,
23342 })))
23343 }
23344 DialectType::SQLite => {
23345 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
23346 let cast_a = Expression::Cast(Box::new(Cast {
23347 this: a,
23348 to: DataType::Custom {
23349 name: "REAL".to_string(),
23350 },
23351 trailing_comments: Vec::new(),
23352 double_colon_syntax: false,
23353 format: None,
23354 default: None,
23355 inferred_type: None,
23356 }));
23357 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
23358 let cast_int = Expression::Cast(Box::new(Cast {
23359 this: div,
23360 to: DataType::Int {
23361 length: None,
23362 integer_spelling: true,
23363 },
23364 trailing_comments: Vec::new(),
23365 double_colon_syntax: false,
23366 format: None,
23367 default: None,
23368 inferred_type: None,
23369 }));
23370 Ok(Expression::Cast(Box::new(Cast {
23371 this: cast_int,
23372 to: DataType::Custom {
23373 name: "REAL".to_string(),
23374 },
23375 trailing_comments: Vec::new(),
23376 double_colon_syntax: false,
23377 format: None,
23378 default: None,
23379 inferred_type: None,
23380 })))
23381 }
23382 _ => Ok(Expression::Function(f)),
23383 }
23384 } else {
23385 Ok(Expression::Function(f))
23386 }
23387 } else {
23388 Ok(e)
23389 }
23390 }
23391
23392 Action::JsonObjectAggConvert => {
23393 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
23394 match e {
23395 Expression::Function(f) => Ok(Expression::Function(Box::new(
23396 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
23397 ))),
23398 Expression::AggregateFunction(af) => {
23399 // AggregateFunction stores all args in the `args` vec
23400 Ok(Expression::Function(Box::new(Function::new(
23401 "JSON_GROUP_OBJECT".to_string(),
23402 af.args,
23403 ))))
23404 }
23405 other => Ok(other),
23406 }
23407 }
23408
23409 Action::JsonbExistsConvert => {
23410 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
23411 if let Expression::Function(f) = e {
23412 if f.args.len() == 2 {
23413 let json_expr = f.args[0].clone();
23414 let key = match &f.args[1] {
23415 Expression::Literal(crate::expressions::Literal::String(s)) => {
23416 format!("$.{}", s)
23417 }
23418 _ => return Ok(Expression::Function(f)),
23419 };
23420 Ok(Expression::Function(Box::new(Function::new(
23421 "JSON_EXISTS".to_string(),
23422 vec![json_expr, Expression::string(&key)],
23423 ))))
23424 } else {
23425 Ok(Expression::Function(f))
23426 }
23427 } else {
23428 Ok(e)
23429 }
23430 }
23431
23432 Action::DateBinConvert => {
23433 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
23434 if let Expression::Function(f) = e {
23435 Ok(Expression::Function(Box::new(Function::new(
23436 "TIME_BUCKET".to_string(),
23437 f.args,
23438 ))))
23439 } else {
23440 Ok(e)
23441 }
23442 }
23443
23444 Action::MysqlCastCharToText => {
23445 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
23446 if let Expression::Cast(mut c) = e {
23447 c.to = DataType::Text;
23448 Ok(Expression::Cast(c))
23449 } else {
23450 Ok(e)
23451 }
23452 }
23453
23454 Action::SparkCastVarcharToString => {
23455 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
23456 match e {
23457 Expression::Cast(mut c) => {
23458 c.to = Self::normalize_varchar_to_string(c.to);
23459 Ok(Expression::Cast(c))
23460 }
23461 Expression::TryCast(mut c) => {
23462 c.to = Self::normalize_varchar_to_string(c.to);
23463 Ok(Expression::TryCast(c))
23464 }
23465 _ => Ok(e),
23466 }
23467 }
23468
23469 Action::MinMaxToLeastGreatest => {
23470 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
23471 if let Expression::Function(f) = e {
23472 let name = f.name.to_uppercase();
23473 let new_name = match name.as_str() {
23474 "MIN" => "LEAST",
23475 "MAX" => "GREATEST",
23476 _ => return Ok(Expression::Function(f)),
23477 };
23478 Ok(Expression::Function(Box::new(Function::new(
23479 new_name.to_string(),
23480 f.args,
23481 ))))
23482 } else {
23483 Ok(e)
23484 }
23485 }
23486
23487 Action::ClickHouseUniqToApproxCountDistinct => {
23488 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
23489 if let Expression::Function(f) = e {
23490 Ok(Expression::Function(Box::new(Function::new(
23491 "APPROX_COUNT_DISTINCT".to_string(),
23492 f.args,
23493 ))))
23494 } else {
23495 Ok(e)
23496 }
23497 }
23498
23499 Action::ClickHouseAnyToAnyValue => {
23500 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
23501 if let Expression::Function(f) = e {
23502 Ok(Expression::Function(Box::new(Function::new(
23503 "ANY_VALUE".to_string(),
23504 f.args,
23505 ))))
23506 } else {
23507 Ok(e)
23508 }
23509 }
23510
23511 Action::OracleVarchar2ToVarchar => {
23512 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
23513 if let Expression::DataType(DataType::Custom { ref name }) = e {
23514 let upper = name.to_uppercase();
23515 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
23516 let inner =
23517 if upper.starts_with("VARCHAR2(") || upper.starts_with("NVARCHAR2(") {
23518 let start = if upper.starts_with("N") { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
23519 let end = name.len() - 1; // skip trailing ")"
23520 Some(&name[start..end])
23521 } else {
23522 Option::None
23523 };
23524 if let Some(inner_str) = inner {
23525 // Parse the number part, ignoring BYTE/CHAR qualifier
23526 let num_str = inner_str.split_whitespace().next().unwrap_or("");
23527 if let Ok(n) = num_str.parse::<u32>() {
23528 Ok(Expression::DataType(DataType::VarChar {
23529 length: Some(n),
23530 parenthesized_length: false,
23531 }))
23532 } else {
23533 Ok(e)
23534 }
23535 } else {
23536 // Plain VARCHAR2 / NVARCHAR2 without parens
23537 Ok(Expression::DataType(DataType::VarChar {
23538 length: Option::None,
23539 parenthesized_length: false,
23540 }))
23541 }
23542 } else {
23543 Ok(e)
23544 }
23545 }
23546
23547 Action::Nvl2Expand => {
23548 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
23549 // But keep as NVL2 for dialects that support it natively
23550 let nvl2_native = matches!(
23551 target,
23552 DialectType::Oracle
23553 | DialectType::Snowflake
23554 | DialectType::Redshift
23555 | DialectType::Teradata
23556 | DialectType::Spark
23557 | DialectType::Databricks
23558 );
23559 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
23560 if nvl2_native {
23561 return Ok(Expression::Nvl2(nvl2));
23562 }
23563 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
23564 } else if let Expression::Function(f) = e {
23565 if nvl2_native {
23566 return Ok(Expression::Function(Box::new(Function::new(
23567 "NVL2".to_string(),
23568 f.args,
23569 ))));
23570 }
23571 if f.args.len() < 2 {
23572 return Ok(Expression::Function(f));
23573 }
23574 let mut args = f.args;
23575 let a = args.remove(0);
23576 let b = args.remove(0);
23577 let c = if !args.is_empty() {
23578 Some(args.remove(0))
23579 } else {
23580 Option::None
23581 };
23582 (a, b, c)
23583 } else {
23584 return Ok(e);
23585 };
23586 // Build: NOT (a IS NULL)
23587 let is_null = Expression::IsNull(Box::new(IsNull {
23588 this: a,
23589 not: false,
23590 postfix_form: false,
23591 }));
23592 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
23593 this: is_null,
23594 inferred_type: None,
23595 }));
23596 Ok(Expression::Case(Box::new(Case {
23597 operand: Option::None,
23598 whens: vec![(not_null, b)],
23599 else_: c,
23600 comments: Vec::new(),
23601 inferred_type: None,
23602 })))
23603 }
23604
23605 Action::IfnullToCoalesce => {
23606 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
23607 if let Expression::Coalesce(mut cf) = e {
23608 cf.original_name = Option::None;
23609 Ok(Expression::Coalesce(cf))
23610 } else if let Expression::Function(f) = e {
23611 Ok(Expression::Function(Box::new(Function::new(
23612 "COALESCE".to_string(),
23613 f.args,
23614 ))))
23615 } else {
23616 Ok(e)
23617 }
23618 }
23619
23620 Action::IsAsciiConvert => {
23621 // IS_ASCII(x) -> dialect-specific ASCII check
23622 if let Expression::Function(f) = e {
23623 let arg = f.args.into_iter().next().unwrap();
23624 match target {
23625 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
23626 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
23627 Ok(Expression::Function(Box::new(Function::new(
23628 "REGEXP_LIKE".to_string(),
23629 vec![
23630 arg,
23631 Expression::Literal(Literal::String(
23632 "^[[:ascii:]]*$".to_string(),
23633 )),
23634 ],
23635 ))))
23636 }
23637 DialectType::PostgreSQL
23638 | DialectType::Redshift
23639 | DialectType::Materialize
23640 | DialectType::RisingWave => {
23641 // (x ~ '^[[:ascii:]]*$')
23642 Ok(Expression::Paren(Box::new(Paren {
23643 this: Expression::RegexpLike(Box::new(
23644 crate::expressions::RegexpFunc {
23645 this: arg,
23646 pattern: Expression::Literal(Literal::String(
23647 "^[[:ascii:]]*$".to_string(),
23648 )),
23649 flags: Option::None,
23650 },
23651 )),
23652 trailing_comments: Vec::new(),
23653 })))
23654 }
23655 DialectType::SQLite => {
23656 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
23657 let hex_lit = Expression::Literal(Literal::HexString(
23658 "2a5b5e012d7f5d2a".to_string(),
23659 ));
23660 let cast_expr = Expression::Cast(Box::new(Cast {
23661 this: hex_lit,
23662 to: DataType::Text,
23663 trailing_comments: Vec::new(),
23664 double_colon_syntax: false,
23665 format: Option::None,
23666 default: Option::None,
23667 inferred_type: None,
23668 }));
23669 let glob = Expression::Glob(Box::new(BinaryOp {
23670 left: arg,
23671 right: cast_expr,
23672 left_comments: Vec::new(),
23673 operator_comments: Vec::new(),
23674 trailing_comments: Vec::new(),
23675 inferred_type: None,
23676 }));
23677 Ok(Expression::Paren(Box::new(Paren {
23678 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
23679 this: glob,
23680 inferred_type: None,
23681 })),
23682 trailing_comments: Vec::new(),
23683 })))
23684 }
23685 DialectType::TSQL | DialectType::Fabric => {
23686 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
23687 let hex_lit = Expression::Literal(Literal::HexNumber(
23688 "255b5e002d7f5d25".to_string(),
23689 ));
23690 let convert_expr = Expression::Convert(Box::new(
23691 crate::expressions::ConvertFunc {
23692 this: hex_lit,
23693 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
23694 style: None,
23695 },
23696 ));
23697 let collated = Expression::Collation(Box::new(
23698 crate::expressions::CollationExpr {
23699 this: convert_expr,
23700 collation: "Latin1_General_BIN".to_string(),
23701 quoted: false,
23702 double_quoted: false,
23703 },
23704 ));
23705 let patindex = Expression::Function(Box::new(Function::new(
23706 "PATINDEX".to_string(),
23707 vec![collated, arg],
23708 )));
23709 let zero = Expression::Literal(Literal::Number("0".to_string()));
23710 let eq_zero = Expression::Eq(Box::new(BinaryOp {
23711 left: patindex,
23712 right: zero,
23713 left_comments: Vec::new(),
23714 operator_comments: Vec::new(),
23715 trailing_comments: Vec::new(),
23716 inferred_type: None,
23717 }));
23718 Ok(Expression::Paren(Box::new(Paren {
23719 this: eq_zero,
23720 trailing_comments: Vec::new(),
23721 })))
23722 }
23723 DialectType::Oracle => {
23724 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
23725 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
23726 let s1 = Expression::Literal(Literal::String("^[".to_string()));
23727 let chr1 = Expression::Function(Box::new(Function::new(
23728 "CHR".to_string(),
23729 vec![Expression::Literal(Literal::Number("1".to_string()))],
23730 )));
23731 let dash = Expression::Literal(Literal::String("-".to_string()));
23732 let chr127 = Expression::Function(Box::new(Function::new(
23733 "CHR".to_string(),
23734 vec![Expression::Literal(Literal::Number("127".to_string()))],
23735 )));
23736 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
23737 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
23738 let concat1 =
23739 Expression::DPipe(Box::new(crate::expressions::DPipe {
23740 this: Box::new(s1),
23741 expression: Box::new(chr1),
23742 safe: None,
23743 }));
23744 let concat2 =
23745 Expression::DPipe(Box::new(crate::expressions::DPipe {
23746 this: Box::new(concat1),
23747 expression: Box::new(dash),
23748 safe: None,
23749 }));
23750 let concat3 =
23751 Expression::DPipe(Box::new(crate::expressions::DPipe {
23752 this: Box::new(concat2),
23753 expression: Box::new(chr127),
23754 safe: None,
23755 }));
23756 let concat4 =
23757 Expression::DPipe(Box::new(crate::expressions::DPipe {
23758 this: Box::new(concat3),
23759 expression: Box::new(s2),
23760 safe: None,
23761 }));
23762 let regexp_like = Expression::Function(Box::new(Function::new(
23763 "REGEXP_LIKE".to_string(),
23764 vec![arg, concat4],
23765 )));
23766 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
23767 let true_expr = Expression::Column(crate::expressions::Column {
23768 name: Identifier {
23769 name: "TRUE".to_string(),
23770 quoted: false,
23771 trailing_comments: Vec::new(),
23772 span: None,
23773 },
23774 table: None,
23775 join_mark: false,
23776 trailing_comments: Vec::new(),
23777 span: None,
23778 inferred_type: None,
23779 });
23780 let nvl = Expression::Function(Box::new(Function::new(
23781 "NVL".to_string(),
23782 vec![regexp_like, true_expr],
23783 )));
23784 Ok(nvl)
23785 }
23786 _ => Ok(Expression::Function(Box::new(Function::new(
23787 "IS_ASCII".to_string(),
23788 vec![arg],
23789 )))),
23790 }
23791 } else {
23792 Ok(e)
23793 }
23794 }
23795
23796 Action::StrPositionConvert => {
23797 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
23798 if let Expression::Function(f) = e {
23799 if f.args.len() < 2 {
23800 return Ok(Expression::Function(f));
23801 }
23802 let mut args = f.args;
23803
23804 let haystack = args.remove(0);
23805 let needle = args.remove(0);
23806 let position = if !args.is_empty() {
23807 Some(args.remove(0))
23808 } else {
23809 Option::None
23810 };
23811 let occurrence = if !args.is_empty() {
23812 Some(args.remove(0))
23813 } else {
23814 Option::None
23815 };
23816
23817 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
23818 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
23819 fn build_position_expansion(
23820 haystack: Expression,
23821 needle: Expression,
23822 pos: Expression,
23823 occurrence: Option<Expression>,
23824 inner_func: &str,
23825 wrapper: &str, // "CASE", "IF", "IIF"
23826 ) -> Expression {
23827 let substr = Expression::Function(Box::new(Function::new(
23828 "SUBSTRING".to_string(),
23829 vec![haystack, pos.clone()],
23830 )));
23831 let mut inner_args = vec![substr, needle];
23832 if let Some(occ) = occurrence {
23833 inner_args.push(occ);
23834 }
23835 let inner_call = Expression::Function(Box::new(Function::new(
23836 inner_func.to_string(),
23837 inner_args,
23838 )));
23839 let zero = Expression::Literal(Literal::Number("0".to_string()));
23840 let one = Expression::Literal(Literal::Number("1".to_string()));
23841 let eq_zero = Expression::Eq(Box::new(BinaryOp {
23842 left: inner_call.clone(),
23843 right: zero.clone(),
23844 left_comments: Vec::new(),
23845 operator_comments: Vec::new(),
23846 trailing_comments: Vec::new(),
23847 inferred_type: None,
23848 }));
23849 let add_pos = Expression::Add(Box::new(BinaryOp {
23850 left: inner_call,
23851 right: pos,
23852 left_comments: Vec::new(),
23853 operator_comments: Vec::new(),
23854 trailing_comments: Vec::new(),
23855 inferred_type: None,
23856 }));
23857 let sub_one = Expression::Sub(Box::new(BinaryOp {
23858 left: add_pos,
23859 right: one,
23860 left_comments: Vec::new(),
23861 operator_comments: Vec::new(),
23862 trailing_comments: Vec::new(),
23863 inferred_type: None,
23864 }));
23865
23866 match wrapper {
23867 "CASE" => Expression::Case(Box::new(Case {
23868 operand: Option::None,
23869 whens: vec![(eq_zero, zero)],
23870 else_: Some(sub_one),
23871 comments: Vec::new(),
23872 inferred_type: None,
23873 })),
23874 "IIF" => Expression::Function(Box::new(Function::new(
23875 "IIF".to_string(),
23876 vec![eq_zero, zero, sub_one],
23877 ))),
23878 _ => Expression::Function(Box::new(Function::new(
23879 "IF".to_string(),
23880 vec![eq_zero, zero, sub_one],
23881 ))),
23882 }
23883 }
23884
23885 match target {
23886 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
23887 DialectType::Athena
23888 | DialectType::DuckDB
23889 | DialectType::Presto
23890 | DialectType::Trino
23891 | DialectType::Drill => {
23892 if let Some(pos) = position {
23893 let wrapper = if matches!(target, DialectType::DuckDB) {
23894 "CASE"
23895 } else {
23896 "IF"
23897 };
23898 let result = build_position_expansion(
23899 haystack, needle, pos, occurrence, "STRPOS", wrapper,
23900 );
23901 if matches!(target, DialectType::Drill) {
23902 // Drill uses backtick-quoted `IF`
23903 if let Expression::Function(mut f) = result {
23904 f.name = "`IF`".to_string();
23905 Ok(Expression::Function(f))
23906 } else {
23907 Ok(result)
23908 }
23909 } else {
23910 Ok(result)
23911 }
23912 } else {
23913 Ok(Expression::Function(Box::new(Function::new(
23914 "STRPOS".to_string(),
23915 vec![haystack, needle],
23916 ))))
23917 }
23918 }
23919 // SQLite: IIF wrapper
23920 DialectType::SQLite => {
23921 if let Some(pos) = position {
23922 Ok(build_position_expansion(
23923 haystack, needle, pos, occurrence, "INSTR", "IIF",
23924 ))
23925 } else {
23926 Ok(Expression::Function(Box::new(Function::new(
23927 "INSTR".to_string(),
23928 vec![haystack, needle],
23929 ))))
23930 }
23931 }
23932 // INSTR group: Teradata, BigQuery, Oracle
23933 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
23934 let mut a = vec![haystack, needle];
23935 if let Some(pos) = position {
23936 a.push(pos);
23937 }
23938 if let Some(occ) = occurrence {
23939 a.push(occ);
23940 }
23941 Ok(Expression::Function(Box::new(Function::new(
23942 "INSTR".to_string(),
23943 a,
23944 ))))
23945 }
23946 // CHARINDEX group: Snowflake, TSQL
23947 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
23948 let mut a = vec![needle, haystack];
23949 if let Some(pos) = position {
23950 a.push(pos);
23951 }
23952 Ok(Expression::Function(Box::new(Function::new(
23953 "CHARINDEX".to_string(),
23954 a,
23955 ))))
23956 }
23957 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
23958 DialectType::PostgreSQL
23959 | DialectType::Materialize
23960 | DialectType::RisingWave
23961 | DialectType::Redshift => {
23962 if let Some(pos) = position {
23963 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
23964 // ELSE POSITION(...) + pos - 1 END
23965 let substr = Expression::Substring(Box::new(
23966 crate::expressions::SubstringFunc {
23967 this: haystack,
23968 start: pos.clone(),
23969 length: Option::None,
23970 from_for_syntax: true,
23971 },
23972 ));
23973 let pos_in = Expression::StrPosition(Box::new(
23974 crate::expressions::StrPosition {
23975 this: Box::new(substr),
23976 substr: Some(Box::new(needle)),
23977 position: Option::None,
23978 occurrence: Option::None,
23979 },
23980 ));
23981 let zero =
23982 Expression::Literal(Literal::Number("0".to_string()));
23983 let one = Expression::Literal(Literal::Number("1".to_string()));
23984 let eq_zero = Expression::Eq(Box::new(BinaryOp {
23985 left: pos_in.clone(),
23986 right: zero.clone(),
23987 left_comments: Vec::new(),
23988 operator_comments: Vec::new(),
23989 trailing_comments: Vec::new(),
23990 inferred_type: None,
23991 }));
23992 let add_pos = Expression::Add(Box::new(BinaryOp {
23993 left: pos_in,
23994 right: pos,
23995 left_comments: Vec::new(),
23996 operator_comments: Vec::new(),
23997 trailing_comments: Vec::new(),
23998 inferred_type: None,
23999 }));
24000 let sub_one = Expression::Sub(Box::new(BinaryOp {
24001 left: add_pos,
24002 right: one,
24003 left_comments: Vec::new(),
24004 operator_comments: Vec::new(),
24005 trailing_comments: Vec::new(),
24006 inferred_type: None,
24007 }));
24008 Ok(Expression::Case(Box::new(Case {
24009 operand: Option::None,
24010 whens: vec![(eq_zero, zero)],
24011 else_: Some(sub_one),
24012 comments: Vec::new(),
24013 inferred_type: None,
24014 })))
24015 } else {
24016 Ok(Expression::StrPosition(Box::new(
24017 crate::expressions::StrPosition {
24018 this: Box::new(haystack),
24019 substr: Some(Box::new(needle)),
24020 position: Option::None,
24021 occurrence: Option::None,
24022 },
24023 )))
24024 }
24025 }
24026 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
24027 DialectType::MySQL
24028 | DialectType::SingleStore
24029 | DialectType::TiDB
24030 | DialectType::Hive
24031 | DialectType::Spark
24032 | DialectType::Databricks
24033 | DialectType::Doris
24034 | DialectType::StarRocks => {
24035 let mut a = vec![needle, haystack];
24036 if let Some(pos) = position {
24037 a.push(pos);
24038 }
24039 Ok(Expression::Function(Box::new(Function::new(
24040 "LOCATE".to_string(),
24041 a,
24042 ))))
24043 }
24044 // ClickHouse: POSITION(haystack, needle[, position])
24045 DialectType::ClickHouse => {
24046 let mut a = vec![haystack, needle];
24047 if let Some(pos) = position {
24048 a.push(pos);
24049 }
24050 Ok(Expression::Function(Box::new(Function::new(
24051 "POSITION".to_string(),
24052 a,
24053 ))))
24054 }
24055 _ => {
24056 let mut a = vec![haystack, needle];
24057 if let Some(pos) = position {
24058 a.push(pos);
24059 }
24060 if let Some(occ) = occurrence {
24061 a.push(occ);
24062 }
24063 Ok(Expression::Function(Box::new(Function::new(
24064 "STR_POSITION".to_string(),
24065 a,
24066 ))))
24067 }
24068 }
24069 } else {
24070 Ok(e)
24071 }
24072 }
24073
24074 Action::ArraySumConvert => {
24075 // ARRAY_SUM(arr) -> dialect-specific
24076 if let Expression::Function(f) = e {
24077 let args = f.args;
24078 match target {
24079 DialectType::DuckDB => Ok(Expression::Function(Box::new(
24080 Function::new("LIST_SUM".to_string(), args),
24081 ))),
24082 DialectType::Spark | DialectType::Databricks => {
24083 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
24084 let arr = args.into_iter().next().unwrap();
24085 let zero = Expression::Literal(Literal::Number("0".to_string()));
24086 let acc_id = Identifier::new("acc");
24087 let x_id = Identifier::new("x");
24088 let acc = Expression::Identifier(acc_id.clone());
24089 let x = Expression::Identifier(x_id.clone());
24090 let add = Expression::Add(Box::new(BinaryOp {
24091 left: acc.clone(),
24092 right: x,
24093 left_comments: Vec::new(),
24094 operator_comments: Vec::new(),
24095 trailing_comments: Vec::new(),
24096 inferred_type: None,
24097 }));
24098 let lambda1 =
24099 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24100 parameters: vec![acc_id.clone(), x_id],
24101 body: add,
24102 colon: false,
24103 parameter_types: Vec::new(),
24104 }));
24105 let lambda2 =
24106 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24107 parameters: vec![acc_id],
24108 body: acc,
24109 colon: false,
24110 parameter_types: Vec::new(),
24111 }));
24112 Ok(Expression::Function(Box::new(Function::new(
24113 "AGGREGATE".to_string(),
24114 vec![arr, zero, lambda1, lambda2],
24115 ))))
24116 }
24117 DialectType::Presto | DialectType::Athena => {
24118 // Presto/Athena keep ARRAY_SUM natively
24119 Ok(Expression::Function(Box::new(Function::new(
24120 "ARRAY_SUM".to_string(),
24121 args,
24122 ))))
24123 }
24124 DialectType::Trino => {
24125 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
24126 if args.len() == 1 {
24127 let arr = args.into_iter().next().unwrap();
24128 let zero =
24129 Expression::Literal(Literal::Number("0".to_string()));
24130 let acc_id = Identifier::new("acc");
24131 let x_id = Identifier::new("x");
24132 let acc = Expression::Identifier(acc_id.clone());
24133 let x = Expression::Identifier(x_id.clone());
24134 let add = Expression::Add(Box::new(BinaryOp {
24135 left: acc.clone(),
24136 right: x,
24137 left_comments: Vec::new(),
24138 operator_comments: Vec::new(),
24139 trailing_comments: Vec::new(),
24140 inferred_type: None,
24141 }));
24142 let lambda1 = Expression::Lambda(Box::new(
24143 crate::expressions::LambdaExpr {
24144 parameters: vec![acc_id.clone(), x_id],
24145 body: add,
24146 colon: false,
24147 parameter_types: Vec::new(),
24148 },
24149 ));
24150 let lambda2 = Expression::Lambda(Box::new(
24151 crate::expressions::LambdaExpr {
24152 parameters: vec![acc_id],
24153 body: acc,
24154 colon: false,
24155 parameter_types: Vec::new(),
24156 },
24157 ));
24158 Ok(Expression::Function(Box::new(Function::new(
24159 "REDUCE".to_string(),
24160 vec![arr, zero, lambda1, lambda2],
24161 ))))
24162 } else {
24163 Ok(Expression::Function(Box::new(Function::new(
24164 "ARRAY_SUM".to_string(),
24165 args,
24166 ))))
24167 }
24168 }
24169 DialectType::ClickHouse => {
24170 // arraySum(lambda, arr) or arraySum(arr)
24171 Ok(Expression::Function(Box::new(Function::new(
24172 "arraySum".to_string(),
24173 args,
24174 ))))
24175 }
24176 _ => Ok(Expression::Function(Box::new(Function::new(
24177 "ARRAY_SUM".to_string(),
24178 args,
24179 )))),
24180 }
24181 } else {
24182 Ok(e)
24183 }
24184 }
24185
24186 Action::ArraySizeConvert => {
24187 if let Expression::Function(f) = e {
24188 Ok(Expression::Function(Box::new(Function::new(
24189 "REPEATED_COUNT".to_string(),
24190 f.args,
24191 ))))
24192 } else {
24193 Ok(e)
24194 }
24195 }
24196
24197 Action::ArrayAnyConvert => {
24198 if let Expression::Function(f) = e {
24199 let mut args = f.args;
24200 if args.len() == 2 {
24201 let arr = args.remove(0);
24202 let lambda = args.remove(0);
24203
24204 // Extract lambda parameter name and body
24205 let (param_name, pred_body) =
24206 if let Expression::Lambda(ref lam) = lambda {
24207 let name = if let Some(p) = lam.parameters.first() {
24208 p.name.clone()
24209 } else {
24210 "x".to_string()
24211 };
24212 (name, lam.body.clone())
24213 } else {
24214 ("x".to_string(), lambda.clone())
24215 };
24216
24217 // Helper: build a function call Expression
24218 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
24219 Expression::Function(Box::new(Function::new(
24220 name.to_string(),
24221 args,
24222 )))
24223 };
24224
24225 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
24226 let build_filter_pattern = |len_func: &str,
24227 len_args_extra: Vec<Expression>,
24228 filter_expr: Expression|
24229 -> Expression {
24230 // len_func(arr, ...extra) = 0
24231 let mut len_arr_args = vec![arr.clone()];
24232 len_arr_args.extend(len_args_extra.clone());
24233 let len_arr = make_func(len_func, len_arr_args);
24234 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
24235 len_arr,
24236 Expression::number(0),
24237 )));
24238
24239 // len_func(filter_expr, ...extra) <> 0
24240 let mut len_filter_args = vec![filter_expr];
24241 len_filter_args.extend(len_args_extra);
24242 let len_filter = make_func(len_func, len_filter_args);
24243 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
24244 len_filter,
24245 Expression::number(0),
24246 )));
24247
24248 // (eq_zero OR neq_zero)
24249 let or_expr =
24250 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
24251 Expression::Paren(Box::new(Paren {
24252 this: or_expr,
24253 trailing_comments: Vec::new(),
24254 }))
24255 };
24256
24257 match target {
24258 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
24259 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
24260 }
24261 DialectType::ClickHouse => {
24262 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
24263 // ClickHouse arrayFilter takes lambda first, then array
24264 let filter_expr =
24265 make_func("arrayFilter", vec![lambda, arr.clone()]);
24266 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
24267 }
24268 DialectType::Databricks | DialectType::Spark => {
24269 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
24270 let filter_expr =
24271 make_func("FILTER", vec![arr.clone(), lambda]);
24272 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
24273 }
24274 DialectType::DuckDB => {
24275 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
24276 let filter_expr =
24277 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
24278 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
24279 }
24280 DialectType::Teradata => {
24281 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
24282 let filter_expr =
24283 make_func("FILTER", vec![arr.clone(), lambda]);
24284 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
24285 }
24286 DialectType::BigQuery => {
24287 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
24288 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
24289 let param_col = Expression::column(¶m_name);
24290 let unnest_expr = Expression::Unnest(Box::new(
24291 crate::expressions::UnnestFunc {
24292 this: arr.clone(),
24293 expressions: vec![],
24294 with_ordinality: false,
24295 alias: Some(Identifier::new(¶m_name)),
24296 offset_alias: None,
24297 },
24298 ));
24299 let mut sel = crate::expressions::Select::default();
24300 sel.expressions = vec![param_col];
24301 sel.from = Some(crate::expressions::From {
24302 expressions: vec![unnest_expr],
24303 });
24304 sel.where_clause =
24305 Some(crate::expressions::Where { this: pred_body });
24306 let array_subquery =
24307 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
24308 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
24309 }
24310 DialectType::PostgreSQL => {
24311 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
24312 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
24313 let param_col = Expression::column(¶m_name);
24314 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
24315 let unnest_with_alias =
24316 Expression::Alias(Box::new(crate::expressions::Alias {
24317 this: Expression::Unnest(Box::new(
24318 crate::expressions::UnnestFunc {
24319 this: arr.clone(),
24320 expressions: vec![],
24321 with_ordinality: false,
24322 alias: None,
24323 offset_alias: None,
24324 },
24325 )),
24326 alias: Identifier::new("_t0"),
24327 column_aliases: vec![Identifier::new(¶m_name)],
24328 pre_alias_comments: Vec::new(),
24329 trailing_comments: Vec::new(),
24330 inferred_type: None,
24331 }));
24332 let mut sel = crate::expressions::Select::default();
24333 sel.expressions = vec![param_col];
24334 sel.from = Some(crate::expressions::From {
24335 expressions: vec![unnest_with_alias],
24336 });
24337 sel.where_clause =
24338 Some(crate::expressions::Where { this: pred_body });
24339 let array_subquery =
24340 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
24341 Ok(build_filter_pattern(
24342 "ARRAY_LENGTH",
24343 vec![Expression::number(1)],
24344 array_subquery,
24345 ))
24346 }
24347 _ => Ok(Expression::Function(Box::new(Function::new(
24348 "ARRAY_ANY".to_string(),
24349 vec![arr, lambda],
24350 )))),
24351 }
24352 } else {
24353 Ok(Expression::Function(Box::new(Function::new(
24354 "ARRAY_ANY".to_string(),
24355 args,
24356 ))))
24357 }
24358 } else {
24359 Ok(e)
24360 }
24361 }
24362
24363 Action::DecodeSimplify => {
24364 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
24365 // For literal search values: CASE WHEN x = search THEN result
24366 // For NULL search: CASE WHEN x IS NULL THEN result
24367 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
24368 fn is_decode_literal(e: &Expression) -> bool {
24369 matches!(
24370 e,
24371 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
24372 )
24373 }
24374
24375 let build_decode_case =
24376 |this_expr: Expression,
24377 pairs: Vec<(Expression, Expression)>,
24378 default: Option<Expression>| {
24379 let whens: Vec<(Expression, Expression)> = pairs
24380 .into_iter()
24381 .map(|(search, result)| {
24382 if matches!(&search, Expression::Null(_)) {
24383 // NULL search -> IS NULL
24384 let condition = Expression::Is(Box::new(BinaryOp {
24385 left: this_expr.clone(),
24386 right: Expression::Null(crate::expressions::Null),
24387 left_comments: Vec::new(),
24388 operator_comments: Vec::new(),
24389 trailing_comments: Vec::new(),
24390 inferred_type: None,
24391 }));
24392 (condition, result)
24393 } else if is_decode_literal(&search)
24394 || is_decode_literal(&this_expr)
24395 {
24396 // At least one side is a literal -> simple equality (no NULL check needed)
24397 let eq = Expression::Eq(Box::new(BinaryOp {
24398 left: this_expr.clone(),
24399 right: search,
24400 left_comments: Vec::new(),
24401 operator_comments: Vec::new(),
24402 trailing_comments: Vec::new(),
24403 inferred_type: None,
24404 }));
24405 (eq, result)
24406 } else {
24407 // Non-literal -> null-safe comparison
24408 let needs_paren = matches!(
24409 &search,
24410 Expression::Eq(_)
24411 | Expression::Neq(_)
24412 | Expression::Gt(_)
24413 | Expression::Gte(_)
24414 | Expression::Lt(_)
24415 | Expression::Lte(_)
24416 );
24417 let search_ref = if needs_paren {
24418 Expression::Paren(Box::new(crate::expressions::Paren {
24419 this: search.clone(),
24420 trailing_comments: Vec::new(),
24421 }))
24422 } else {
24423 search.clone()
24424 };
24425 // Build: x = search OR (x IS NULL AND search IS NULL)
24426 let eq = Expression::Eq(Box::new(BinaryOp {
24427 left: this_expr.clone(),
24428 right: search_ref,
24429 left_comments: Vec::new(),
24430 operator_comments: Vec::new(),
24431 trailing_comments: Vec::new(),
24432 inferred_type: None,
24433 }));
24434 let search_in_null = if needs_paren {
24435 Expression::Paren(Box::new(crate::expressions::Paren {
24436 this: search.clone(),
24437 trailing_comments: Vec::new(),
24438 }))
24439 } else {
24440 search.clone()
24441 };
24442 let x_is_null = Expression::Is(Box::new(BinaryOp {
24443 left: this_expr.clone(),
24444 right: Expression::Null(crate::expressions::Null),
24445 left_comments: Vec::new(),
24446 operator_comments: Vec::new(),
24447 trailing_comments: Vec::new(),
24448 inferred_type: None,
24449 }));
24450 let search_is_null = Expression::Is(Box::new(BinaryOp {
24451 left: search_in_null,
24452 right: Expression::Null(crate::expressions::Null),
24453 left_comments: Vec::new(),
24454 operator_comments: Vec::new(),
24455 trailing_comments: Vec::new(),
24456 inferred_type: None,
24457 }));
24458 let both_null = Expression::And(Box::new(BinaryOp {
24459 left: x_is_null,
24460 right: search_is_null,
24461 left_comments: Vec::new(),
24462 operator_comments: Vec::new(),
24463 trailing_comments: Vec::new(),
24464 inferred_type: None,
24465 }));
24466 let condition = Expression::Or(Box::new(BinaryOp {
24467 left: eq,
24468 right: Expression::Paren(Box::new(
24469 crate::expressions::Paren {
24470 this: both_null,
24471 trailing_comments: Vec::new(),
24472 },
24473 )),
24474 left_comments: Vec::new(),
24475 operator_comments: Vec::new(),
24476 trailing_comments: Vec::new(),
24477 inferred_type: None,
24478 }));
24479 (condition, result)
24480 }
24481 })
24482 .collect();
24483 Expression::Case(Box::new(Case {
24484 operand: None,
24485 whens,
24486 else_: default,
24487 comments: Vec::new(),
24488 inferred_type: None,
24489 }))
24490 };
24491
24492 if let Expression::Decode(decode) = e {
24493 Ok(build_decode_case(
24494 decode.this,
24495 decode.search_results,
24496 decode.default,
24497 ))
24498 } else if let Expression::DecodeCase(dc) = e {
24499 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
24500 let mut exprs = dc.expressions;
24501 if exprs.len() < 3 {
24502 return Ok(Expression::DecodeCase(Box::new(
24503 crate::expressions::DecodeCase { expressions: exprs },
24504 )));
24505 }
24506 let this_expr = exprs.remove(0);
24507 let mut pairs = Vec::new();
24508 let mut default = None;
24509 let mut i = 0;
24510 while i + 1 < exprs.len() {
24511 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
24512 i += 2;
24513 }
24514 if i < exprs.len() {
24515 // Odd remaining element is the default
24516 default = Some(exprs[i].clone());
24517 }
24518 Ok(build_decode_case(this_expr, pairs, default))
24519 } else {
24520 Ok(e)
24521 }
24522 }
24523
24524 Action::CreateTableLikeToCtas => {
24525 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
24526 if let Expression::CreateTable(ct) = e {
24527 let like_source = ct.constraints.iter().find_map(|c| {
24528 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24529 Some(source.clone())
24530 } else {
24531 None
24532 }
24533 });
24534 if let Some(source_table) = like_source {
24535 let mut new_ct = *ct;
24536 new_ct.constraints.clear();
24537 // Build: SELECT * FROM b LIMIT 0
24538 let select = Expression::Select(Box::new(crate::expressions::Select {
24539 expressions: vec![Expression::Star(crate::expressions::Star {
24540 table: None,
24541 except: None,
24542 replace: None,
24543 rename: None,
24544 trailing_comments: Vec::new(),
24545 span: None,
24546 })],
24547 from: Some(crate::expressions::From {
24548 expressions: vec![Expression::Table(source_table)],
24549 }),
24550 limit: Some(crate::expressions::Limit {
24551 this: Expression::Literal(Literal::Number("0".to_string())),
24552 percent: false,
24553 comments: Vec::new(),
24554 }),
24555 ..Default::default()
24556 }));
24557 new_ct.as_select = Some(select);
24558 Ok(Expression::CreateTable(Box::new(new_ct)))
24559 } else {
24560 Ok(Expression::CreateTable(ct))
24561 }
24562 } else {
24563 Ok(e)
24564 }
24565 }
24566
24567 Action::CreateTableLikeToSelectInto => {
24568 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
24569 if let Expression::CreateTable(ct) = e {
24570 let like_source = ct.constraints.iter().find_map(|c| {
24571 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24572 Some(source.clone())
24573 } else {
24574 None
24575 }
24576 });
24577 if let Some(source_table) = like_source {
24578 let mut aliased_source = source_table;
24579 aliased_source.alias = Some(Identifier::new("temp"));
24580 // Build: SELECT TOP 0 * INTO a FROM b AS temp
24581 let select = Expression::Select(Box::new(crate::expressions::Select {
24582 expressions: vec![Expression::Star(crate::expressions::Star {
24583 table: None,
24584 except: None,
24585 replace: None,
24586 rename: None,
24587 trailing_comments: Vec::new(),
24588 span: None,
24589 })],
24590 from: Some(crate::expressions::From {
24591 expressions: vec![Expression::Table(aliased_source)],
24592 }),
24593 into: Some(crate::expressions::SelectInto {
24594 this: Expression::Table(ct.name.clone()),
24595 temporary: false,
24596 unlogged: false,
24597 bulk_collect: false,
24598 expressions: Vec::new(),
24599 }),
24600 top: Some(crate::expressions::Top {
24601 this: Expression::Literal(Literal::Number("0".to_string())),
24602 percent: false,
24603 with_ties: false,
24604 parenthesized: false,
24605 }),
24606 ..Default::default()
24607 }));
24608 Ok(select)
24609 } else {
24610 Ok(Expression::CreateTable(ct))
24611 }
24612 } else {
24613 Ok(e)
24614 }
24615 }
24616
24617 Action::CreateTableLikeToAs => {
24618 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
24619 if let Expression::CreateTable(ct) = e {
24620 let like_source = ct.constraints.iter().find_map(|c| {
24621 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24622 Some(source.clone())
24623 } else {
24624 None
24625 }
24626 });
24627 if let Some(source_table) = like_source {
24628 let mut new_ct = *ct;
24629 new_ct.constraints.clear();
24630 // AS b (just a table reference, not a SELECT)
24631 new_ct.as_select = Some(Expression::Table(source_table));
24632 Ok(Expression::CreateTable(Box::new(new_ct)))
24633 } else {
24634 Ok(Expression::CreateTable(ct))
24635 }
24636 } else {
24637 Ok(e)
24638 }
24639 }
24640
24641 Action::TsOrDsToDateConvert => {
24642 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
24643 if let Expression::Function(f) = e {
24644 let mut args = f.args;
24645 let this = args.remove(0);
24646 let fmt = if !args.is_empty() {
24647 match &args[0] {
24648 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24649 _ => None,
24650 }
24651 } else {
24652 None
24653 };
24654 Ok(Expression::TsOrDsToDate(Box::new(
24655 crate::expressions::TsOrDsToDate {
24656 this: Box::new(this),
24657 format: fmt,
24658 safe: None,
24659 },
24660 )))
24661 } else {
24662 Ok(e)
24663 }
24664 }
24665
24666 Action::TsOrDsToDateStrConvert => {
24667 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
24668 if let Expression::Function(f) = e {
24669 let arg = f.args.into_iter().next().unwrap();
24670 let str_type = match target {
24671 DialectType::DuckDB
24672 | DialectType::PostgreSQL
24673 | DialectType::Materialize => DataType::Text,
24674 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24675 DataType::Custom {
24676 name: "STRING".to_string(),
24677 }
24678 }
24679 DialectType::Presto
24680 | DialectType::Trino
24681 | DialectType::Athena
24682 | DialectType::Drill => DataType::VarChar {
24683 length: None,
24684 parenthesized_length: false,
24685 },
24686 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
24687 DataType::Custom {
24688 name: "STRING".to_string(),
24689 }
24690 }
24691 _ => DataType::VarChar {
24692 length: None,
24693 parenthesized_length: false,
24694 },
24695 };
24696 let cast_expr = Expression::Cast(Box::new(Cast {
24697 this: arg,
24698 to: str_type,
24699 double_colon_syntax: false,
24700 trailing_comments: Vec::new(),
24701 format: None,
24702 default: None,
24703 inferred_type: None,
24704 }));
24705 Ok(Expression::Substring(Box::new(
24706 crate::expressions::SubstringFunc {
24707 this: cast_expr,
24708 start: Expression::number(1),
24709 length: Some(Expression::number(10)),
24710 from_for_syntax: false,
24711 },
24712 )))
24713 } else {
24714 Ok(e)
24715 }
24716 }
24717
24718 Action::DateStrToDateConvert => {
24719 // DATE_STR_TO_DATE(x) -> dialect-specific
24720 if let Expression::Function(f) = e {
24721 let arg = f.args.into_iter().next().unwrap();
24722 match target {
24723 DialectType::SQLite => {
24724 // SQLite: just the bare expression (dates are strings)
24725 Ok(arg)
24726 }
24727 _ => Ok(Expression::Cast(Box::new(Cast {
24728 this: arg,
24729 to: DataType::Date,
24730 double_colon_syntax: false,
24731 trailing_comments: Vec::new(),
24732 format: None,
24733 default: None,
24734 inferred_type: None,
24735 }))),
24736 }
24737 } else {
24738 Ok(e)
24739 }
24740 }
24741
24742 Action::TimeStrToDateConvert => {
24743 // TIME_STR_TO_DATE(x) -> dialect-specific
24744 if let Expression::Function(f) = e {
24745 let arg = f.args.into_iter().next().unwrap();
24746 match target {
24747 DialectType::Hive
24748 | DialectType::Doris
24749 | DialectType::StarRocks
24750 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
24751 Function::new("TO_DATE".to_string(), vec![arg]),
24752 ))),
24753 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24754 // Presto: CAST(x AS TIMESTAMP)
24755 Ok(Expression::Cast(Box::new(Cast {
24756 this: arg,
24757 to: DataType::Timestamp {
24758 timezone: false,
24759 precision: None,
24760 },
24761 double_colon_syntax: false,
24762 trailing_comments: Vec::new(),
24763 format: None,
24764 default: None,
24765 inferred_type: None,
24766 })))
24767 }
24768 _ => {
24769 // Default: CAST(x AS DATE)
24770 Ok(Expression::Cast(Box::new(Cast {
24771 this: arg,
24772 to: DataType::Date,
24773 double_colon_syntax: false,
24774 trailing_comments: Vec::new(),
24775 format: None,
24776 default: None,
24777 inferred_type: None,
24778 })))
24779 }
24780 }
24781 } else {
24782 Ok(e)
24783 }
24784 }
24785
24786 Action::TimeStrToTimeConvert => {
24787 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
24788 if let Expression::Function(f) = e {
24789 let mut args = f.args;
24790 let this = args.remove(0);
24791 let zone = if !args.is_empty() {
24792 match &args[0] {
24793 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24794 _ => None,
24795 }
24796 } else {
24797 None
24798 };
24799 let has_zone = zone.is_some();
24800
24801 match target {
24802 DialectType::SQLite => {
24803 // SQLite: just the bare expression
24804 Ok(this)
24805 }
24806 DialectType::MySQL => {
24807 if has_zone {
24808 // MySQL with zone: TIMESTAMP(x)
24809 Ok(Expression::Function(Box::new(Function::new(
24810 "TIMESTAMP".to_string(),
24811 vec![this],
24812 ))))
24813 } else {
24814 // MySQL: CAST(x AS DATETIME) or with precision
24815 // Use DataType::Custom to avoid MySQL's transform_cast converting
24816 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
24817 let precision =
24818 if let Expression::Literal(Literal::String(ref s)) = this {
24819 if let Some(dot_pos) = s.rfind('.') {
24820 let frac = &s[dot_pos + 1..];
24821 let digit_count = frac
24822 .chars()
24823 .take_while(|c| c.is_ascii_digit())
24824 .count();
24825 if digit_count > 0 {
24826 Some(digit_count)
24827 } else {
24828 None
24829 }
24830 } else {
24831 None
24832 }
24833 } else {
24834 None
24835 };
24836 let type_name = match precision {
24837 Some(p) => format!("DATETIME({})", p),
24838 None => "DATETIME".to_string(),
24839 };
24840 Ok(Expression::Cast(Box::new(Cast {
24841 this,
24842 to: DataType::Custom { name: type_name },
24843 double_colon_syntax: false,
24844 trailing_comments: Vec::new(),
24845 format: None,
24846 default: None,
24847 inferred_type: None,
24848 })))
24849 }
24850 }
24851 DialectType::ClickHouse => {
24852 if has_zone {
24853 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
24854 // We need to strip the timezone offset from the literal if present
24855 let clean_this =
24856 if let Expression::Literal(Literal::String(ref s)) = this {
24857 // Strip timezone offset like "-08:00" or "+00:00"
24858 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
24859 if let Some(offset_pos) = re_offset {
24860 if offset_pos > 10 {
24861 // After the date part
24862 let trimmed = s[..offset_pos].to_string();
24863 Expression::Literal(Literal::String(trimmed))
24864 } else {
24865 this.clone()
24866 }
24867 } else {
24868 this.clone()
24869 }
24870 } else {
24871 this.clone()
24872 };
24873 let zone_str = zone.unwrap();
24874 // Build: CAST(x AS DateTime64(6, 'zone'))
24875 let type_name = format!("DateTime64(6, '{}')", zone_str);
24876 Ok(Expression::Cast(Box::new(Cast {
24877 this: clean_this,
24878 to: DataType::Custom { name: type_name },
24879 double_colon_syntax: false,
24880 trailing_comments: Vec::new(),
24881 format: None,
24882 default: None,
24883 inferred_type: None,
24884 })))
24885 } else {
24886 Ok(Expression::Cast(Box::new(Cast {
24887 this,
24888 to: DataType::Custom {
24889 name: "DateTime64(6)".to_string(),
24890 },
24891 double_colon_syntax: false,
24892 trailing_comments: Vec::new(),
24893 format: None,
24894 default: None,
24895 inferred_type: None,
24896 })))
24897 }
24898 }
24899 DialectType::BigQuery => {
24900 if has_zone {
24901 // BigQuery with zone: CAST(x AS TIMESTAMP)
24902 Ok(Expression::Cast(Box::new(Cast {
24903 this,
24904 to: DataType::Timestamp {
24905 timezone: false,
24906 precision: None,
24907 },
24908 double_colon_syntax: false,
24909 trailing_comments: Vec::new(),
24910 format: None,
24911 default: None,
24912 inferred_type: None,
24913 })))
24914 } else {
24915 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
24916 Ok(Expression::Cast(Box::new(Cast {
24917 this,
24918 to: DataType::Custom {
24919 name: "DATETIME".to_string(),
24920 },
24921 double_colon_syntax: false,
24922 trailing_comments: Vec::new(),
24923 format: None,
24924 default: None,
24925 inferred_type: None,
24926 })))
24927 }
24928 }
24929 DialectType::Doris => {
24930 // Doris: CAST(x AS DATETIME)
24931 Ok(Expression::Cast(Box::new(Cast {
24932 this,
24933 to: DataType::Custom {
24934 name: "DATETIME".to_string(),
24935 },
24936 double_colon_syntax: false,
24937 trailing_comments: Vec::new(),
24938 format: None,
24939 default: None,
24940 inferred_type: None,
24941 })))
24942 }
24943 DialectType::TSQL | DialectType::Fabric => {
24944 if has_zone {
24945 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
24946 let cast_expr = Expression::Cast(Box::new(Cast {
24947 this,
24948 to: DataType::Custom {
24949 name: "DATETIMEOFFSET".to_string(),
24950 },
24951 double_colon_syntax: false,
24952 trailing_comments: Vec::new(),
24953 format: None,
24954 default: None,
24955 inferred_type: None,
24956 }));
24957 Ok(Expression::AtTimeZone(Box::new(
24958 crate::expressions::AtTimeZone {
24959 this: cast_expr,
24960 zone: Expression::Literal(Literal::String(
24961 "UTC".to_string(),
24962 )),
24963 },
24964 )))
24965 } else {
24966 // TSQL: CAST(x AS DATETIME2)
24967 Ok(Expression::Cast(Box::new(Cast {
24968 this,
24969 to: DataType::Custom {
24970 name: "DATETIME2".to_string(),
24971 },
24972 double_colon_syntax: false,
24973 trailing_comments: Vec::new(),
24974 format: None,
24975 default: None,
24976 inferred_type: None,
24977 })))
24978 }
24979 }
24980 DialectType::DuckDB => {
24981 if has_zone {
24982 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
24983 Ok(Expression::Cast(Box::new(Cast {
24984 this,
24985 to: DataType::Timestamp {
24986 timezone: true,
24987 precision: None,
24988 },
24989 double_colon_syntax: false,
24990 trailing_comments: Vec::new(),
24991 format: None,
24992 default: None,
24993 inferred_type: None,
24994 })))
24995 } else {
24996 // DuckDB: CAST(x AS TIMESTAMP)
24997 Ok(Expression::Cast(Box::new(Cast {
24998 this,
24999 to: DataType::Timestamp {
25000 timezone: false,
25001 precision: None,
25002 },
25003 double_colon_syntax: false,
25004 trailing_comments: Vec::new(),
25005 format: None,
25006 default: None,
25007 inferred_type: None,
25008 })))
25009 }
25010 }
25011 DialectType::PostgreSQL
25012 | DialectType::Materialize
25013 | DialectType::RisingWave => {
25014 if has_zone {
25015 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
25016 Ok(Expression::Cast(Box::new(Cast {
25017 this,
25018 to: DataType::Timestamp {
25019 timezone: true,
25020 precision: None,
25021 },
25022 double_colon_syntax: false,
25023 trailing_comments: Vec::new(),
25024 format: None,
25025 default: None,
25026 inferred_type: None,
25027 })))
25028 } else {
25029 // PostgreSQL: CAST(x AS TIMESTAMP)
25030 Ok(Expression::Cast(Box::new(Cast {
25031 this,
25032 to: DataType::Timestamp {
25033 timezone: false,
25034 precision: None,
25035 },
25036 double_colon_syntax: false,
25037 trailing_comments: Vec::new(),
25038 format: None,
25039 default: None,
25040 inferred_type: None,
25041 })))
25042 }
25043 }
25044 DialectType::Snowflake => {
25045 if has_zone {
25046 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
25047 Ok(Expression::Cast(Box::new(Cast {
25048 this,
25049 to: DataType::Timestamp {
25050 timezone: true,
25051 precision: None,
25052 },
25053 double_colon_syntax: false,
25054 trailing_comments: Vec::new(),
25055 format: None,
25056 default: None,
25057 inferred_type: None,
25058 })))
25059 } else {
25060 // Snowflake: CAST(x AS TIMESTAMP)
25061 Ok(Expression::Cast(Box::new(Cast {
25062 this,
25063 to: DataType::Timestamp {
25064 timezone: false,
25065 precision: None,
25066 },
25067 double_colon_syntax: false,
25068 trailing_comments: Vec::new(),
25069 format: None,
25070 default: None,
25071 inferred_type: None,
25072 })))
25073 }
25074 }
25075 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25076 if has_zone {
25077 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
25078 // Check for precision from sub-second digits
25079 let precision =
25080 if let Expression::Literal(Literal::String(ref s)) = this {
25081 if let Some(dot_pos) = s.rfind('.') {
25082 let frac = &s[dot_pos + 1..];
25083 let digit_count = frac
25084 .chars()
25085 .take_while(|c| c.is_ascii_digit())
25086 .count();
25087 if digit_count > 0
25088 && matches!(target, DialectType::Trino)
25089 {
25090 Some(digit_count as u32)
25091 } else {
25092 None
25093 }
25094 } else {
25095 None
25096 }
25097 } else {
25098 None
25099 };
25100 let dt = if let Some(prec) = precision {
25101 DataType::Timestamp {
25102 timezone: true,
25103 precision: Some(prec),
25104 }
25105 } else {
25106 DataType::Timestamp {
25107 timezone: true,
25108 precision: None,
25109 }
25110 };
25111 Ok(Expression::Cast(Box::new(Cast {
25112 this,
25113 to: dt,
25114 double_colon_syntax: false,
25115 trailing_comments: Vec::new(),
25116 format: None,
25117 default: None,
25118 inferred_type: None,
25119 })))
25120 } else {
25121 // Check for sub-second precision for Trino
25122 let precision =
25123 if let Expression::Literal(Literal::String(ref s)) = this {
25124 if let Some(dot_pos) = s.rfind('.') {
25125 let frac = &s[dot_pos + 1..];
25126 let digit_count = frac
25127 .chars()
25128 .take_while(|c| c.is_ascii_digit())
25129 .count();
25130 if digit_count > 0
25131 && matches!(target, DialectType::Trino)
25132 {
25133 Some(digit_count as u32)
25134 } else {
25135 None
25136 }
25137 } else {
25138 None
25139 }
25140 } else {
25141 None
25142 };
25143 let dt = DataType::Timestamp {
25144 timezone: false,
25145 precision,
25146 };
25147 Ok(Expression::Cast(Box::new(Cast {
25148 this,
25149 to: dt,
25150 double_colon_syntax: false,
25151 trailing_comments: Vec::new(),
25152 format: None,
25153 default: None,
25154 inferred_type: None,
25155 })))
25156 }
25157 }
25158 DialectType::Redshift => {
25159 if has_zone {
25160 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
25161 Ok(Expression::Cast(Box::new(Cast {
25162 this,
25163 to: DataType::Timestamp {
25164 timezone: true,
25165 precision: None,
25166 },
25167 double_colon_syntax: false,
25168 trailing_comments: Vec::new(),
25169 format: None,
25170 default: None,
25171 inferred_type: None,
25172 })))
25173 } else {
25174 // Redshift: CAST(x AS TIMESTAMP)
25175 Ok(Expression::Cast(Box::new(Cast {
25176 this,
25177 to: DataType::Timestamp {
25178 timezone: false,
25179 precision: None,
25180 },
25181 double_colon_syntax: false,
25182 trailing_comments: Vec::new(),
25183 format: None,
25184 default: None,
25185 inferred_type: None,
25186 })))
25187 }
25188 }
25189 _ => {
25190 // Default: CAST(x AS TIMESTAMP)
25191 Ok(Expression::Cast(Box::new(Cast {
25192 this,
25193 to: DataType::Timestamp {
25194 timezone: false,
25195 precision: None,
25196 },
25197 double_colon_syntax: false,
25198 trailing_comments: Vec::new(),
25199 format: None,
25200 default: None,
25201 inferred_type: None,
25202 })))
25203 }
25204 }
25205 } else {
25206 Ok(e)
25207 }
25208 }
25209
25210 Action::DateToDateStrConvert => {
25211 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
25212 if let Expression::Function(f) = e {
25213 let arg = f.args.into_iter().next().unwrap();
25214 let str_type = match target {
25215 DialectType::DuckDB => DataType::Text,
25216 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25217 DataType::Custom {
25218 name: "STRING".to_string(),
25219 }
25220 }
25221 DialectType::Presto
25222 | DialectType::Trino
25223 | DialectType::Athena
25224 | DialectType::Drill => DataType::VarChar {
25225 length: None,
25226 parenthesized_length: false,
25227 },
25228 _ => DataType::VarChar {
25229 length: None,
25230 parenthesized_length: false,
25231 },
25232 };
25233 Ok(Expression::Cast(Box::new(Cast {
25234 this: arg,
25235 to: str_type,
25236 double_colon_syntax: false,
25237 trailing_comments: Vec::new(),
25238 format: None,
25239 default: None,
25240 inferred_type: None,
25241 })))
25242 } else {
25243 Ok(e)
25244 }
25245 }
25246
25247 Action::DateToDiConvert => {
25248 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
25249 if let Expression::Function(f) = e {
25250 let arg = f.args.into_iter().next().unwrap();
25251 let inner = match target {
25252 DialectType::DuckDB => {
25253 // STRFTIME(x, '%Y%m%d')
25254 Expression::Function(Box::new(Function::new(
25255 "STRFTIME".to_string(),
25256 vec![arg, Expression::string("%Y%m%d")],
25257 )))
25258 }
25259 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25260 // DATE_FORMAT(x, 'yyyyMMdd')
25261 Expression::Function(Box::new(Function::new(
25262 "DATE_FORMAT".to_string(),
25263 vec![arg, Expression::string("yyyyMMdd")],
25264 )))
25265 }
25266 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25267 // DATE_FORMAT(x, '%Y%m%d')
25268 Expression::Function(Box::new(Function::new(
25269 "DATE_FORMAT".to_string(),
25270 vec![arg, Expression::string("%Y%m%d")],
25271 )))
25272 }
25273 DialectType::Drill => {
25274 // TO_DATE(x, 'yyyyMMdd')
25275 Expression::Function(Box::new(Function::new(
25276 "TO_DATE".to_string(),
25277 vec![arg, Expression::string("yyyyMMdd")],
25278 )))
25279 }
25280 _ => {
25281 // Default: STRFTIME(x, '%Y%m%d')
25282 Expression::Function(Box::new(Function::new(
25283 "STRFTIME".to_string(),
25284 vec![arg, Expression::string("%Y%m%d")],
25285 )))
25286 }
25287 };
25288 // Use INT (not INTEGER) for Presto/Trino
25289 let int_type = match target {
25290 DialectType::Presto
25291 | DialectType::Trino
25292 | DialectType::Athena
25293 | DialectType::TSQL
25294 | DialectType::Fabric
25295 | DialectType::SQLite
25296 | DialectType::Redshift => DataType::Custom {
25297 name: "INT".to_string(),
25298 },
25299 _ => DataType::Int {
25300 length: None,
25301 integer_spelling: false,
25302 },
25303 };
25304 Ok(Expression::Cast(Box::new(Cast {
25305 this: inner,
25306 to: int_type,
25307 double_colon_syntax: false,
25308 trailing_comments: Vec::new(),
25309 format: None,
25310 default: None,
25311 inferred_type: None,
25312 })))
25313 } else {
25314 Ok(e)
25315 }
25316 }
25317
25318 Action::DiToDateConvert => {
25319 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
25320 if let Expression::Function(f) = e {
25321 let arg = f.args.into_iter().next().unwrap();
25322 match target {
25323 DialectType::DuckDB => {
25324 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
25325 let cast_text = Expression::Cast(Box::new(Cast {
25326 this: arg,
25327 to: DataType::Text,
25328 double_colon_syntax: false,
25329 trailing_comments: Vec::new(),
25330 format: None,
25331 default: None,
25332 inferred_type: None,
25333 }));
25334 let strptime = Expression::Function(Box::new(Function::new(
25335 "STRPTIME".to_string(),
25336 vec![cast_text, Expression::string("%Y%m%d")],
25337 )));
25338 Ok(Expression::Cast(Box::new(Cast {
25339 this: strptime,
25340 to: DataType::Date,
25341 double_colon_syntax: false,
25342 trailing_comments: Vec::new(),
25343 format: None,
25344 default: None,
25345 inferred_type: None,
25346 })))
25347 }
25348 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25349 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
25350 let cast_str = Expression::Cast(Box::new(Cast {
25351 this: arg,
25352 to: DataType::Custom {
25353 name: "STRING".to_string(),
25354 },
25355 double_colon_syntax: false,
25356 trailing_comments: Vec::new(),
25357 format: None,
25358 default: None,
25359 inferred_type: None,
25360 }));
25361 Ok(Expression::Function(Box::new(Function::new(
25362 "TO_DATE".to_string(),
25363 vec![cast_str, Expression::string("yyyyMMdd")],
25364 ))))
25365 }
25366 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25367 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
25368 let cast_varchar = Expression::Cast(Box::new(Cast {
25369 this: arg,
25370 to: DataType::VarChar {
25371 length: None,
25372 parenthesized_length: false,
25373 },
25374 double_colon_syntax: false,
25375 trailing_comments: Vec::new(),
25376 format: None,
25377 default: None,
25378 inferred_type: None,
25379 }));
25380 let date_parse = Expression::Function(Box::new(Function::new(
25381 "DATE_PARSE".to_string(),
25382 vec![cast_varchar, Expression::string("%Y%m%d")],
25383 )));
25384 Ok(Expression::Cast(Box::new(Cast {
25385 this: date_parse,
25386 to: DataType::Date,
25387 double_colon_syntax: false,
25388 trailing_comments: Vec::new(),
25389 format: None,
25390 default: None,
25391 inferred_type: None,
25392 })))
25393 }
25394 DialectType::Drill => {
25395 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
25396 let cast_varchar = Expression::Cast(Box::new(Cast {
25397 this: arg,
25398 to: DataType::VarChar {
25399 length: None,
25400 parenthesized_length: false,
25401 },
25402 double_colon_syntax: false,
25403 trailing_comments: Vec::new(),
25404 format: None,
25405 default: None,
25406 inferred_type: None,
25407 }));
25408 Ok(Expression::Function(Box::new(Function::new(
25409 "TO_DATE".to_string(),
25410 vec![cast_varchar, Expression::string("yyyyMMdd")],
25411 ))))
25412 }
25413 _ => Ok(Expression::Function(Box::new(Function::new(
25414 "DI_TO_DATE".to_string(),
25415 vec![arg],
25416 )))),
25417 }
25418 } else {
25419 Ok(e)
25420 }
25421 }
25422
25423 Action::TsOrDiToDiConvert => {
25424 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
25425 if let Expression::Function(f) = e {
25426 let arg = f.args.into_iter().next().unwrap();
25427 let str_type = match target {
25428 DialectType::DuckDB => DataType::Text,
25429 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25430 DataType::Custom {
25431 name: "STRING".to_string(),
25432 }
25433 }
25434 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25435 DataType::VarChar {
25436 length: None,
25437 parenthesized_length: false,
25438 }
25439 }
25440 _ => DataType::VarChar {
25441 length: None,
25442 parenthesized_length: false,
25443 },
25444 };
25445 let cast_str = Expression::Cast(Box::new(Cast {
25446 this: arg,
25447 to: str_type,
25448 double_colon_syntax: false,
25449 trailing_comments: Vec::new(),
25450 format: None,
25451 default: None,
25452 inferred_type: None,
25453 }));
25454 let replace_expr = Expression::Function(Box::new(Function::new(
25455 "REPLACE".to_string(),
25456 vec![cast_str, Expression::string("-"), Expression::string("")],
25457 )));
25458 let substr_name = match target {
25459 DialectType::DuckDB
25460 | DialectType::Hive
25461 | DialectType::Spark
25462 | DialectType::Databricks => "SUBSTR",
25463 _ => "SUBSTR",
25464 };
25465 let substr = Expression::Function(Box::new(Function::new(
25466 substr_name.to_string(),
25467 vec![replace_expr, Expression::number(1), Expression::number(8)],
25468 )));
25469 // Use INT (not INTEGER) for Presto/Trino etc.
25470 let int_type = match target {
25471 DialectType::Presto
25472 | DialectType::Trino
25473 | DialectType::Athena
25474 | DialectType::TSQL
25475 | DialectType::Fabric
25476 | DialectType::SQLite
25477 | DialectType::Redshift => DataType::Custom {
25478 name: "INT".to_string(),
25479 },
25480 _ => DataType::Int {
25481 length: None,
25482 integer_spelling: false,
25483 },
25484 };
25485 Ok(Expression::Cast(Box::new(Cast {
25486 this: substr,
25487 to: int_type,
25488 double_colon_syntax: false,
25489 trailing_comments: Vec::new(),
25490 format: None,
25491 default: None,
25492 inferred_type: None,
25493 })))
25494 } else {
25495 Ok(e)
25496 }
25497 }
25498
25499 Action::UnixToStrConvert => {
25500 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
25501 if let Expression::Function(f) = e {
25502 let mut args = f.args;
25503 let this = args.remove(0);
25504 let fmt_expr = if !args.is_empty() {
25505 Some(args.remove(0))
25506 } else {
25507 None
25508 };
25509
25510 // Check if format is a string literal
25511 let fmt_str = fmt_expr.as_ref().and_then(|f| {
25512 if let Expression::Literal(Literal::String(s)) = f {
25513 Some(s.clone())
25514 } else {
25515 None
25516 }
25517 });
25518
25519 if let Some(fmt_string) = fmt_str {
25520 // String literal format -> use UnixToStr expression (generator handles it)
25521 Ok(Expression::UnixToStr(Box::new(
25522 crate::expressions::UnixToStr {
25523 this: Box::new(this),
25524 format: Some(fmt_string),
25525 },
25526 )))
25527 } else if let Some(fmt_e) = fmt_expr {
25528 // Non-literal format (e.g., identifier `y`) -> build target expression directly
25529 match target {
25530 DialectType::DuckDB => {
25531 // STRFTIME(TO_TIMESTAMP(x), y)
25532 let to_ts = Expression::Function(Box::new(Function::new(
25533 "TO_TIMESTAMP".to_string(),
25534 vec![this],
25535 )));
25536 Ok(Expression::Function(Box::new(Function::new(
25537 "STRFTIME".to_string(),
25538 vec![to_ts, fmt_e],
25539 ))))
25540 }
25541 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25542 // DATE_FORMAT(FROM_UNIXTIME(x), y)
25543 let from_unix = Expression::Function(Box::new(Function::new(
25544 "FROM_UNIXTIME".to_string(),
25545 vec![this],
25546 )));
25547 Ok(Expression::Function(Box::new(Function::new(
25548 "DATE_FORMAT".to_string(),
25549 vec![from_unix, fmt_e],
25550 ))))
25551 }
25552 DialectType::Hive
25553 | DialectType::Spark
25554 | DialectType::Databricks
25555 | DialectType::Doris
25556 | DialectType::StarRocks => {
25557 // FROM_UNIXTIME(x, y)
25558 Ok(Expression::Function(Box::new(Function::new(
25559 "FROM_UNIXTIME".to_string(),
25560 vec![this, fmt_e],
25561 ))))
25562 }
25563 _ => {
25564 // Default: keep as UNIX_TO_STR(x, y)
25565 Ok(Expression::Function(Box::new(Function::new(
25566 "UNIX_TO_STR".to_string(),
25567 vec![this, fmt_e],
25568 ))))
25569 }
25570 }
25571 } else {
25572 Ok(Expression::UnixToStr(Box::new(
25573 crate::expressions::UnixToStr {
25574 this: Box::new(this),
25575 format: None,
25576 },
25577 )))
25578 }
25579 } else {
25580 Ok(e)
25581 }
25582 }
25583
25584 Action::UnixToTimeConvert => {
25585 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
25586 if let Expression::Function(f) = e {
25587 let arg = f.args.into_iter().next().unwrap();
25588 Ok(Expression::UnixToTime(Box::new(
25589 crate::expressions::UnixToTime {
25590 this: Box::new(arg),
25591 scale: None,
25592 zone: None,
25593 hours: None,
25594 minutes: None,
25595 format: None,
25596 target_type: None,
25597 },
25598 )))
25599 } else {
25600 Ok(e)
25601 }
25602 }
25603
25604 Action::UnixToTimeStrConvert => {
25605 // UNIX_TO_TIME_STR(x) -> dialect-specific
25606 if let Expression::Function(f) = e {
25607 let arg = f.args.into_iter().next().unwrap();
25608 match target {
25609 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25610 // FROM_UNIXTIME(x)
25611 Ok(Expression::Function(Box::new(Function::new(
25612 "FROM_UNIXTIME".to_string(),
25613 vec![arg],
25614 ))))
25615 }
25616 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25617 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
25618 let from_unix = Expression::Function(Box::new(Function::new(
25619 "FROM_UNIXTIME".to_string(),
25620 vec![arg],
25621 )));
25622 Ok(Expression::Cast(Box::new(Cast {
25623 this: from_unix,
25624 to: DataType::VarChar {
25625 length: None,
25626 parenthesized_length: false,
25627 },
25628 double_colon_syntax: false,
25629 trailing_comments: Vec::new(),
25630 format: None,
25631 default: None,
25632 inferred_type: None,
25633 })))
25634 }
25635 DialectType::DuckDB => {
25636 // CAST(TO_TIMESTAMP(x) AS TEXT)
25637 let to_ts = Expression::Function(Box::new(Function::new(
25638 "TO_TIMESTAMP".to_string(),
25639 vec![arg],
25640 )));
25641 Ok(Expression::Cast(Box::new(Cast {
25642 this: to_ts,
25643 to: DataType::Text,
25644 double_colon_syntax: false,
25645 trailing_comments: Vec::new(),
25646 format: None,
25647 default: None,
25648 inferred_type: None,
25649 })))
25650 }
25651 _ => Ok(Expression::Function(Box::new(Function::new(
25652 "UNIX_TO_TIME_STR".to_string(),
25653 vec![arg],
25654 )))),
25655 }
25656 } else {
25657 Ok(e)
25658 }
25659 }
25660
25661 Action::TimeToUnixConvert => {
25662 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
25663 if let Expression::Function(f) = e {
25664 let arg = f.args.into_iter().next().unwrap();
25665 Ok(Expression::TimeToUnix(Box::new(
25666 crate::expressions::UnaryFunc {
25667 this: arg,
25668 original_name: None,
25669 inferred_type: None,
25670 },
25671 )))
25672 } else {
25673 Ok(e)
25674 }
25675 }
25676
25677 Action::TimeToStrConvert => {
25678 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
25679 if let Expression::Function(f) = e {
25680 let mut args = f.args;
25681 let this = args.remove(0);
25682 let fmt = match args.remove(0) {
25683 Expression::Literal(Literal::String(s)) => s,
25684 other => {
25685 return Ok(Expression::Function(Box::new(Function::new(
25686 "TIME_TO_STR".to_string(),
25687 vec![this, other],
25688 ))));
25689 }
25690 };
25691 Ok(Expression::TimeToStr(Box::new(
25692 crate::expressions::TimeToStr {
25693 this: Box::new(this),
25694 format: fmt,
25695 culture: None,
25696 zone: None,
25697 },
25698 )))
25699 } else {
25700 Ok(e)
25701 }
25702 }
25703
25704 Action::StrToUnixConvert => {
25705 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
25706 if let Expression::Function(f) = e {
25707 let mut args = f.args;
25708 let this = args.remove(0);
25709 let fmt = match args.remove(0) {
25710 Expression::Literal(Literal::String(s)) => s,
25711 other => {
25712 return Ok(Expression::Function(Box::new(Function::new(
25713 "STR_TO_UNIX".to_string(),
25714 vec![this, other],
25715 ))));
25716 }
25717 };
25718 Ok(Expression::StrToUnix(Box::new(
25719 crate::expressions::StrToUnix {
25720 this: Some(Box::new(this)),
25721 format: Some(fmt),
25722 },
25723 )))
25724 } else {
25725 Ok(e)
25726 }
25727 }
25728
25729 Action::TimeStrToUnixConvert => {
25730 // TIME_STR_TO_UNIX(x) -> dialect-specific
25731 if let Expression::Function(f) = e {
25732 let arg = f.args.into_iter().next().unwrap();
25733 match target {
25734 DialectType::DuckDB => {
25735 // EPOCH(CAST(x AS TIMESTAMP))
25736 let cast_ts = Expression::Cast(Box::new(Cast {
25737 this: arg,
25738 to: DataType::Timestamp {
25739 timezone: false,
25740 precision: None,
25741 },
25742 double_colon_syntax: false,
25743 trailing_comments: Vec::new(),
25744 format: None,
25745 default: None,
25746 inferred_type: None,
25747 }));
25748 Ok(Expression::Function(Box::new(Function::new(
25749 "EPOCH".to_string(),
25750 vec![cast_ts],
25751 ))))
25752 }
25753 DialectType::Hive
25754 | DialectType::Doris
25755 | DialectType::StarRocks
25756 | DialectType::MySQL => {
25757 // UNIX_TIMESTAMP(x)
25758 Ok(Expression::Function(Box::new(Function::new(
25759 "UNIX_TIMESTAMP".to_string(),
25760 vec![arg],
25761 ))))
25762 }
25763 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25764 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
25765 let date_parse = Expression::Function(Box::new(Function::new(
25766 "DATE_PARSE".to_string(),
25767 vec![arg, Expression::string("%Y-%m-%d %T")],
25768 )));
25769 Ok(Expression::Function(Box::new(Function::new(
25770 "TO_UNIXTIME".to_string(),
25771 vec![date_parse],
25772 ))))
25773 }
25774 _ => Ok(Expression::Function(Box::new(Function::new(
25775 "TIME_STR_TO_UNIX".to_string(),
25776 vec![arg],
25777 )))),
25778 }
25779 } else {
25780 Ok(e)
25781 }
25782 }
25783
25784 Action::TimeToTimeStrConvert => {
25785 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
25786 if let Expression::Function(f) = e {
25787 let arg = f.args.into_iter().next().unwrap();
25788 let str_type = match target {
25789 DialectType::DuckDB => DataType::Text,
25790 DialectType::Hive
25791 | DialectType::Spark
25792 | DialectType::Databricks
25793 | DialectType::Doris
25794 | DialectType::StarRocks => DataType::Custom {
25795 name: "STRING".to_string(),
25796 },
25797 DialectType::Redshift => DataType::Custom {
25798 name: "VARCHAR(MAX)".to_string(),
25799 },
25800 _ => DataType::VarChar {
25801 length: None,
25802 parenthesized_length: false,
25803 },
25804 };
25805 Ok(Expression::Cast(Box::new(Cast {
25806 this: arg,
25807 to: str_type,
25808 double_colon_syntax: false,
25809 trailing_comments: Vec::new(),
25810 format: None,
25811 default: None,
25812 inferred_type: None,
25813 })))
25814 } else {
25815 Ok(e)
25816 }
25817 }
25818
25819 Action::DateTruncSwapArgs => {
25820 // DATE_TRUNC('unit', x) from Generic -> target-specific
25821 if let Expression::Function(f) = e {
25822 if f.args.len() == 2 {
25823 let unit_arg = f.args[0].clone();
25824 let expr_arg = f.args[1].clone();
25825 // Extract unit string from the first arg
25826 let unit_str = match &unit_arg {
25827 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
25828 _ => return Ok(Expression::Function(f)),
25829 };
25830 match target {
25831 DialectType::BigQuery => {
25832 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
25833 let unit_ident =
25834 Expression::Column(crate::expressions::Column {
25835 name: crate::expressions::Identifier::new(unit_str),
25836 table: None,
25837 join_mark: false,
25838 trailing_comments: Vec::new(),
25839 span: None,
25840 inferred_type: None,
25841 });
25842 Ok(Expression::Function(Box::new(Function::new(
25843 "DATE_TRUNC".to_string(),
25844 vec![expr_arg, unit_ident],
25845 ))))
25846 }
25847 DialectType::Doris => {
25848 // Doris: DATE_TRUNC(x, 'UNIT')
25849 Ok(Expression::Function(Box::new(Function::new(
25850 "DATE_TRUNC".to_string(),
25851 vec![expr_arg, Expression::string(&unit_str)],
25852 ))))
25853 }
25854 DialectType::StarRocks => {
25855 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
25856 Ok(Expression::Function(Box::new(Function::new(
25857 "DATE_TRUNC".to_string(),
25858 vec![Expression::string(&unit_str), expr_arg],
25859 ))))
25860 }
25861 DialectType::Spark | DialectType::Databricks => {
25862 // Spark: TRUNC(x, 'UNIT')
25863 Ok(Expression::Function(Box::new(Function::new(
25864 "TRUNC".to_string(),
25865 vec![expr_arg, Expression::string(&unit_str)],
25866 ))))
25867 }
25868 DialectType::MySQL => {
25869 // MySQL: complex expansion based on unit
25870 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
25871 }
25872 _ => Ok(Expression::Function(f)),
25873 }
25874 } else {
25875 Ok(Expression::Function(f))
25876 }
25877 } else {
25878 Ok(e)
25879 }
25880 }
25881
25882 Action::TimestampTruncConvert => {
25883 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
25884 if let Expression::Function(f) = e {
25885 if f.args.len() >= 2 {
25886 let expr_arg = f.args[0].clone();
25887 let unit_arg = f.args[1].clone();
25888 let tz_arg = if f.args.len() >= 3 {
25889 Some(f.args[2].clone())
25890 } else {
25891 None
25892 };
25893 // Extract unit string
25894 let unit_str = match &unit_arg {
25895 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
25896 Expression::Column(c) => c.name.name.to_uppercase(),
25897 _ => {
25898 return Ok(Expression::Function(f));
25899 }
25900 };
25901 match target {
25902 DialectType::Spark | DialectType::Databricks => {
25903 // Spark: DATE_TRUNC('UNIT', x)
25904 Ok(Expression::Function(Box::new(Function::new(
25905 "DATE_TRUNC".to_string(),
25906 vec![Expression::string(&unit_str), expr_arg],
25907 ))))
25908 }
25909 DialectType::Doris | DialectType::StarRocks => {
25910 // Doris: DATE_TRUNC(x, 'UNIT')
25911 Ok(Expression::Function(Box::new(Function::new(
25912 "DATE_TRUNC".to_string(),
25913 vec![expr_arg, Expression::string(&unit_str)],
25914 ))))
25915 }
25916 DialectType::BigQuery => {
25917 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
25918 let unit_ident =
25919 Expression::Column(crate::expressions::Column {
25920 name: crate::expressions::Identifier::new(unit_str),
25921 table: None,
25922 join_mark: false,
25923 trailing_comments: Vec::new(),
25924 span: None,
25925 inferred_type: None,
25926 });
25927 let mut args = vec![expr_arg, unit_ident];
25928 if let Some(tz) = tz_arg {
25929 args.push(tz);
25930 }
25931 Ok(Expression::Function(Box::new(Function::new(
25932 "TIMESTAMP_TRUNC".to_string(),
25933 args,
25934 ))))
25935 }
25936 DialectType::DuckDB => {
25937 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
25938 if let Some(tz) = tz_arg {
25939 let tz_str = match &tz {
25940 Expression::Literal(Literal::String(s)) => s.clone(),
25941 _ => "UTC".to_string(),
25942 };
25943 // x AT TIME ZONE 'tz'
25944 let at_tz = Expression::AtTimeZone(Box::new(
25945 crate::expressions::AtTimeZone {
25946 this: expr_arg,
25947 zone: Expression::string(&tz_str),
25948 },
25949 ));
25950 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
25951 let trunc = Expression::Function(Box::new(Function::new(
25952 "DATE_TRUNC".to_string(),
25953 vec![Expression::string(&unit_str), at_tz],
25954 )));
25955 // DATE_TRUNC(...) AT TIME ZONE 'tz'
25956 Ok(Expression::AtTimeZone(Box::new(
25957 crate::expressions::AtTimeZone {
25958 this: trunc,
25959 zone: Expression::string(&tz_str),
25960 },
25961 )))
25962 } else {
25963 Ok(Expression::Function(Box::new(Function::new(
25964 "DATE_TRUNC".to_string(),
25965 vec![Expression::string(&unit_str), expr_arg],
25966 ))))
25967 }
25968 }
25969 DialectType::Presto
25970 | DialectType::Trino
25971 | DialectType::Athena
25972 | DialectType::Snowflake => {
25973 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
25974 Ok(Expression::Function(Box::new(Function::new(
25975 "DATE_TRUNC".to_string(),
25976 vec![Expression::string(&unit_str), expr_arg],
25977 ))))
25978 }
25979 _ => {
25980 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
25981 let mut args = vec![Expression::string(&unit_str), expr_arg];
25982 if let Some(tz) = tz_arg {
25983 args.push(tz);
25984 }
25985 Ok(Expression::Function(Box::new(Function::new(
25986 "DATE_TRUNC".to_string(),
25987 args,
25988 ))))
25989 }
25990 }
25991 } else {
25992 Ok(Expression::Function(f))
25993 }
25994 } else {
25995 Ok(e)
25996 }
25997 }
25998
25999 Action::StrToDateConvert => {
26000 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
26001 if let Expression::Function(f) = e {
26002 if f.args.len() == 2 {
26003 let mut args = f.args;
26004 let this = args.remove(0);
26005 let fmt_expr = args.remove(0);
26006 let fmt_str = match &fmt_expr {
26007 Expression::Literal(Literal::String(s)) => Some(s.clone()),
26008 _ => None,
26009 };
26010 let default_date = "%Y-%m-%d";
26011 let default_time = "%Y-%m-%d %H:%M:%S";
26012 let is_default = fmt_str
26013 .as_ref()
26014 .map_or(false, |f| f == default_date || f == default_time);
26015
26016 if is_default {
26017 // Default format: handle per-dialect
26018 match target {
26019 DialectType::MySQL
26020 | DialectType::Doris
26021 | DialectType::StarRocks => {
26022 // Keep STR_TO_DATE(x, fmt) as-is
26023 Ok(Expression::Function(Box::new(Function::new(
26024 "STR_TO_DATE".to_string(),
26025 vec![this, fmt_expr],
26026 ))))
26027 }
26028 DialectType::Hive => {
26029 // Hive: CAST(x AS DATE)
26030 Ok(Expression::Cast(Box::new(Cast {
26031 this,
26032 to: DataType::Date,
26033 double_colon_syntax: false,
26034 trailing_comments: Vec::new(),
26035 format: None,
26036 default: None,
26037 inferred_type: None,
26038 })))
26039 }
26040 DialectType::Presto
26041 | DialectType::Trino
26042 | DialectType::Athena => {
26043 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
26044 let date_parse =
26045 Expression::Function(Box::new(Function::new(
26046 "DATE_PARSE".to_string(),
26047 vec![this, fmt_expr],
26048 )));
26049 Ok(Expression::Cast(Box::new(Cast {
26050 this: date_parse,
26051 to: DataType::Date,
26052 double_colon_syntax: false,
26053 trailing_comments: Vec::new(),
26054 format: None,
26055 default: None,
26056 inferred_type: None,
26057 })))
26058 }
26059 _ => {
26060 // Others: TsOrDsToDate (delegates to generator)
26061 Ok(Expression::TsOrDsToDate(Box::new(
26062 crate::expressions::TsOrDsToDate {
26063 this: Box::new(this),
26064 format: None,
26065 safe: None,
26066 },
26067 )))
26068 }
26069 }
26070 } else if let Some(fmt) = fmt_str {
26071 match target {
26072 DialectType::Doris
26073 | DialectType::StarRocks
26074 | DialectType::MySQL => {
26075 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
26076 let mut normalized = fmt.clone();
26077 normalized = normalized.replace("%-d", "%e");
26078 normalized = normalized.replace("%-m", "%c");
26079 normalized = normalized.replace("%H:%M:%S", "%T");
26080 Ok(Expression::Function(Box::new(Function::new(
26081 "STR_TO_DATE".to_string(),
26082 vec![this, Expression::string(&normalized)],
26083 ))))
26084 }
26085 DialectType::Hive => {
26086 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
26087 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26088 let unix_ts =
26089 Expression::Function(Box::new(Function::new(
26090 "UNIX_TIMESTAMP".to_string(),
26091 vec![this, Expression::string(&java_fmt)],
26092 )));
26093 let from_unix =
26094 Expression::Function(Box::new(Function::new(
26095 "FROM_UNIXTIME".to_string(),
26096 vec![unix_ts],
26097 )));
26098 Ok(Expression::Cast(Box::new(Cast {
26099 this: from_unix,
26100 to: DataType::Date,
26101 double_colon_syntax: false,
26102 trailing_comments: Vec::new(),
26103 format: None,
26104 default: None,
26105 inferred_type: None,
26106 })))
26107 }
26108 DialectType::Spark | DialectType::Databricks => {
26109 // Spark: TO_DATE(x, java_fmt)
26110 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26111 Ok(Expression::Function(Box::new(Function::new(
26112 "TO_DATE".to_string(),
26113 vec![this, Expression::string(&java_fmt)],
26114 ))))
26115 }
26116 DialectType::Drill => {
26117 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
26118 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
26119 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26120 let java_fmt = java_fmt.replace('T', "'T'");
26121 Ok(Expression::Function(Box::new(Function::new(
26122 "TO_DATE".to_string(),
26123 vec![this, Expression::string(&java_fmt)],
26124 ))))
26125 }
26126 _ => {
26127 // For other dialects: use TsOrDsToDate which delegates to generator
26128 Ok(Expression::TsOrDsToDate(Box::new(
26129 crate::expressions::TsOrDsToDate {
26130 this: Box::new(this),
26131 format: Some(fmt),
26132 safe: None,
26133 },
26134 )))
26135 }
26136 }
26137 } else {
26138 // Non-string format - keep as-is
26139 let mut new_args = Vec::new();
26140 new_args.push(this);
26141 new_args.push(fmt_expr);
26142 Ok(Expression::Function(Box::new(Function::new(
26143 "STR_TO_DATE".to_string(),
26144 new_args,
26145 ))))
26146 }
26147 } else {
26148 Ok(Expression::Function(f))
26149 }
26150 } else {
26151 Ok(e)
26152 }
26153 }
26154
26155 Action::TsOrDsAddConvert => {
26156 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
26157 if let Expression::Function(f) = e {
26158 if f.args.len() == 3 {
26159 let mut args = f.args;
26160 let x = args.remove(0);
26161 let n = args.remove(0);
26162 let unit_expr = args.remove(0);
26163 let unit_str = match &unit_expr {
26164 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
26165 _ => "DAY".to_string(),
26166 };
26167
26168 match target {
26169 DialectType::Hive
26170 | DialectType::Spark
26171 | DialectType::Databricks => {
26172 // DATE_ADD(x, n) - only supports DAY unit
26173 Ok(Expression::Function(Box::new(Function::new(
26174 "DATE_ADD".to_string(),
26175 vec![x, n],
26176 ))))
26177 }
26178 DialectType::MySQL => {
26179 // DATE_ADD(x, INTERVAL n UNIT)
26180 let iu = match unit_str.to_uppercase().as_str() {
26181 "YEAR" => crate::expressions::IntervalUnit::Year,
26182 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26183 "MONTH" => crate::expressions::IntervalUnit::Month,
26184 "WEEK" => crate::expressions::IntervalUnit::Week,
26185 "HOUR" => crate::expressions::IntervalUnit::Hour,
26186 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26187 "SECOND" => crate::expressions::IntervalUnit::Second,
26188 _ => crate::expressions::IntervalUnit::Day,
26189 };
26190 let interval = Expression::Interval(Box::new(
26191 crate::expressions::Interval {
26192 this: Some(n),
26193 unit: Some(
26194 crate::expressions::IntervalUnitSpec::Simple {
26195 unit: iu,
26196 use_plural: false,
26197 },
26198 ),
26199 },
26200 ));
26201 Ok(Expression::Function(Box::new(Function::new(
26202 "DATE_ADD".to_string(),
26203 vec![x, interval],
26204 ))))
26205 }
26206 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26207 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
26208 let cast_ts = Expression::Cast(Box::new(Cast {
26209 this: x,
26210 to: DataType::Timestamp {
26211 precision: None,
26212 timezone: false,
26213 },
26214 double_colon_syntax: false,
26215 trailing_comments: Vec::new(),
26216 format: None,
26217 default: None,
26218 inferred_type: None,
26219 }));
26220 let cast_date = Expression::Cast(Box::new(Cast {
26221 this: cast_ts,
26222 to: DataType::Date,
26223 double_colon_syntax: false,
26224 trailing_comments: Vec::new(),
26225 format: None,
26226 default: None,
26227 inferred_type: None,
26228 }));
26229 Ok(Expression::Function(Box::new(Function::new(
26230 "DATE_ADD".to_string(),
26231 vec![Expression::string(&unit_str), n, cast_date],
26232 ))))
26233 }
26234 DialectType::DuckDB => {
26235 // CAST(x AS DATE) + INTERVAL n UNIT
26236 let cast_date = Expression::Cast(Box::new(Cast {
26237 this: x,
26238 to: DataType::Date,
26239 double_colon_syntax: false,
26240 trailing_comments: Vec::new(),
26241 format: None,
26242 default: None,
26243 inferred_type: None,
26244 }));
26245 let iu = match unit_str.to_uppercase().as_str() {
26246 "YEAR" => crate::expressions::IntervalUnit::Year,
26247 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26248 "MONTH" => crate::expressions::IntervalUnit::Month,
26249 "WEEK" => crate::expressions::IntervalUnit::Week,
26250 "HOUR" => crate::expressions::IntervalUnit::Hour,
26251 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26252 "SECOND" => crate::expressions::IntervalUnit::Second,
26253 _ => crate::expressions::IntervalUnit::Day,
26254 };
26255 let interval = Expression::Interval(Box::new(
26256 crate::expressions::Interval {
26257 this: Some(n),
26258 unit: Some(
26259 crate::expressions::IntervalUnitSpec::Simple {
26260 unit: iu,
26261 use_plural: false,
26262 },
26263 ),
26264 },
26265 ));
26266 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
26267 left: cast_date,
26268 right: interval,
26269 left_comments: Vec::new(),
26270 operator_comments: Vec::new(),
26271 trailing_comments: Vec::new(),
26272 inferred_type: None,
26273 })))
26274 }
26275 DialectType::Drill => {
26276 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
26277 let cast_date = Expression::Cast(Box::new(Cast {
26278 this: x,
26279 to: DataType::Date,
26280 double_colon_syntax: false,
26281 trailing_comments: Vec::new(),
26282 format: None,
26283 default: None,
26284 inferred_type: None,
26285 }));
26286 let iu = match unit_str.to_uppercase().as_str() {
26287 "YEAR" => crate::expressions::IntervalUnit::Year,
26288 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26289 "MONTH" => crate::expressions::IntervalUnit::Month,
26290 "WEEK" => crate::expressions::IntervalUnit::Week,
26291 "HOUR" => crate::expressions::IntervalUnit::Hour,
26292 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26293 "SECOND" => crate::expressions::IntervalUnit::Second,
26294 _ => crate::expressions::IntervalUnit::Day,
26295 };
26296 let interval = Expression::Interval(Box::new(
26297 crate::expressions::Interval {
26298 this: Some(n),
26299 unit: Some(
26300 crate::expressions::IntervalUnitSpec::Simple {
26301 unit: iu,
26302 use_plural: false,
26303 },
26304 ),
26305 },
26306 ));
26307 Ok(Expression::Function(Box::new(Function::new(
26308 "DATE_ADD".to_string(),
26309 vec![cast_date, interval],
26310 ))))
26311 }
26312 _ => {
26313 // Default: keep as TS_OR_DS_ADD
26314 Ok(Expression::Function(Box::new(Function::new(
26315 "TS_OR_DS_ADD".to_string(),
26316 vec![x, n, unit_expr],
26317 ))))
26318 }
26319 }
26320 } else {
26321 Ok(Expression::Function(f))
26322 }
26323 } else {
26324 Ok(e)
26325 }
26326 }
26327
26328 Action::DateFromUnixDateConvert => {
26329 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
26330 if let Expression::Function(f) = e {
26331 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
26332 if matches!(
26333 target,
26334 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
26335 ) {
26336 return Ok(Expression::Function(Box::new(Function::new(
26337 "DATE_FROM_UNIX_DATE".to_string(),
26338 f.args,
26339 ))));
26340 }
26341 let n = f.args.into_iter().next().unwrap();
26342 let epoch_date = Expression::Cast(Box::new(Cast {
26343 this: Expression::string("1970-01-01"),
26344 to: DataType::Date,
26345 double_colon_syntax: false,
26346 trailing_comments: Vec::new(),
26347 format: None,
26348 default: None,
26349 inferred_type: None,
26350 }));
26351 match target {
26352 DialectType::DuckDB => {
26353 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
26354 let interval =
26355 Expression::Interval(Box::new(crate::expressions::Interval {
26356 this: Some(n),
26357 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26358 unit: crate::expressions::IntervalUnit::Day,
26359 use_plural: false,
26360 }),
26361 }));
26362 Ok(Expression::Add(Box::new(
26363 crate::expressions::BinaryOp::new(epoch_date, interval),
26364 )))
26365 }
26366 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26367 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
26368 Ok(Expression::Function(Box::new(Function::new(
26369 "DATE_ADD".to_string(),
26370 vec![Expression::string("DAY"), n, epoch_date],
26371 ))))
26372 }
26373 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
26374 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
26375 Ok(Expression::Function(Box::new(Function::new(
26376 "DATEADD".to_string(),
26377 vec![
26378 Expression::Identifier(Identifier::new("DAY")),
26379 n,
26380 epoch_date,
26381 ],
26382 ))))
26383 }
26384 DialectType::BigQuery => {
26385 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
26386 let interval =
26387 Expression::Interval(Box::new(crate::expressions::Interval {
26388 this: Some(n),
26389 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26390 unit: crate::expressions::IntervalUnit::Day,
26391 use_plural: false,
26392 }),
26393 }));
26394 Ok(Expression::Function(Box::new(Function::new(
26395 "DATE_ADD".to_string(),
26396 vec![epoch_date, interval],
26397 ))))
26398 }
26399 DialectType::MySQL
26400 | DialectType::Doris
26401 | DialectType::StarRocks
26402 | DialectType::Drill => {
26403 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
26404 let interval =
26405 Expression::Interval(Box::new(crate::expressions::Interval {
26406 this: Some(n),
26407 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26408 unit: crate::expressions::IntervalUnit::Day,
26409 use_plural: false,
26410 }),
26411 }));
26412 Ok(Expression::Function(Box::new(Function::new(
26413 "DATE_ADD".to_string(),
26414 vec![epoch_date, interval],
26415 ))))
26416 }
26417 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26418 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
26419 Ok(Expression::Function(Box::new(Function::new(
26420 "DATE_ADD".to_string(),
26421 vec![epoch_date, n],
26422 ))))
26423 }
26424 DialectType::PostgreSQL
26425 | DialectType::Materialize
26426 | DialectType::RisingWave => {
26427 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
26428 let n_str = match &n {
26429 Expression::Literal(Literal::Number(s)) => s.clone(),
26430 _ => Self::expr_to_string_static(&n),
26431 };
26432 let interval =
26433 Expression::Interval(Box::new(crate::expressions::Interval {
26434 this: Some(Expression::string(&format!("{} DAY", n_str))),
26435 unit: None,
26436 }));
26437 Ok(Expression::Add(Box::new(
26438 crate::expressions::BinaryOp::new(epoch_date, interval),
26439 )))
26440 }
26441 _ => {
26442 // Default: keep as-is
26443 Ok(Expression::Function(Box::new(Function::new(
26444 "DATE_FROM_UNIX_DATE".to_string(),
26445 vec![n],
26446 ))))
26447 }
26448 }
26449 } else {
26450 Ok(e)
26451 }
26452 }
26453
26454 Action::ArrayRemoveConvert => {
26455 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
26456 if let Expression::ArrayRemove(bf) = e {
26457 let arr = bf.this;
26458 let target_val = bf.expression;
26459 match target {
26460 DialectType::DuckDB => {
26461 let u_id = crate::expressions::Identifier::new("_u");
26462 let lambda =
26463 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26464 parameters: vec![u_id.clone()],
26465 body: Expression::Neq(Box::new(BinaryOp {
26466 left: Expression::Identifier(u_id),
26467 right: target_val,
26468 left_comments: Vec::new(),
26469 operator_comments: Vec::new(),
26470 trailing_comments: Vec::new(),
26471 inferred_type: None,
26472 })),
26473 colon: false,
26474 parameter_types: Vec::new(),
26475 }));
26476 Ok(Expression::Function(Box::new(Function::new(
26477 "LIST_FILTER".to_string(),
26478 vec![arr, lambda],
26479 ))))
26480 }
26481 DialectType::ClickHouse => {
26482 let u_id = crate::expressions::Identifier::new("_u");
26483 let lambda =
26484 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26485 parameters: vec![u_id.clone()],
26486 body: Expression::Neq(Box::new(BinaryOp {
26487 left: Expression::Identifier(u_id),
26488 right: target_val,
26489 left_comments: Vec::new(),
26490 operator_comments: Vec::new(),
26491 trailing_comments: Vec::new(),
26492 inferred_type: None,
26493 })),
26494 colon: false,
26495 parameter_types: Vec::new(),
26496 }));
26497 Ok(Expression::Function(Box::new(Function::new(
26498 "arrayFilter".to_string(),
26499 vec![lambda, arr],
26500 ))))
26501 }
26502 DialectType::BigQuery => {
26503 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
26504 let u_id = crate::expressions::Identifier::new("_u");
26505 let u_col = Expression::Column(crate::expressions::Column {
26506 name: u_id.clone(),
26507 table: None,
26508 join_mark: false,
26509 trailing_comments: Vec::new(),
26510 span: None,
26511 inferred_type: None,
26512 });
26513 let unnest_expr =
26514 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
26515 this: arr,
26516 expressions: Vec::new(),
26517 with_ordinality: false,
26518 alias: None,
26519 offset_alias: None,
26520 }));
26521 let aliased_unnest =
26522 Expression::Alias(Box::new(crate::expressions::Alias {
26523 this: unnest_expr,
26524 alias: u_id.clone(),
26525 column_aliases: Vec::new(),
26526 pre_alias_comments: Vec::new(),
26527 trailing_comments: Vec::new(),
26528 inferred_type: None,
26529 }));
26530 let where_cond = Expression::Neq(Box::new(BinaryOp {
26531 left: u_col.clone(),
26532 right: target_val,
26533 left_comments: Vec::new(),
26534 operator_comments: Vec::new(),
26535 trailing_comments: Vec::new(),
26536 inferred_type: None,
26537 }));
26538 let subquery = Expression::Select(Box::new(
26539 crate::expressions::Select::new()
26540 .column(u_col)
26541 .from(aliased_unnest)
26542 .where_(where_cond),
26543 ));
26544 Ok(Expression::ArrayFunc(Box::new(
26545 crate::expressions::ArrayConstructor {
26546 expressions: vec![subquery],
26547 bracket_notation: false,
26548 use_list_keyword: false,
26549 },
26550 )))
26551 }
26552 _ => Ok(Expression::ArrayRemove(Box::new(
26553 crate::expressions::BinaryFunc {
26554 original_name: None,
26555 this: arr,
26556 expression: target_val,
26557 inferred_type: None,
26558 },
26559 ))),
26560 }
26561 } else {
26562 Ok(e)
26563 }
26564 }
26565
26566 Action::ArrayReverseConvert => {
26567 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
26568 if let Expression::ArrayReverse(af) = e {
26569 Ok(Expression::Function(Box::new(Function::new(
26570 "arrayReverse".to_string(),
26571 vec![af.this],
26572 ))))
26573 } else {
26574 Ok(e)
26575 }
26576 }
26577
26578 Action::JsonKeysConvert => {
26579 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
26580 if let Expression::JsonKeys(uf) = e {
26581 match target {
26582 DialectType::Spark | DialectType::Databricks => {
26583 Ok(Expression::Function(Box::new(Function::new(
26584 "JSON_OBJECT_KEYS".to_string(),
26585 vec![uf.this],
26586 ))))
26587 }
26588 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26589 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
26590 ))),
26591 _ => Ok(Expression::JsonKeys(uf)),
26592 }
26593 } else {
26594 Ok(e)
26595 }
26596 }
26597
26598 Action::ParseJsonStrip => {
26599 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
26600 if let Expression::ParseJson(uf) = e {
26601 Ok(uf.this)
26602 } else {
26603 Ok(e)
26604 }
26605 }
26606
26607 Action::ArraySizeDrill => {
26608 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
26609 if let Expression::ArraySize(uf) = e {
26610 Ok(Expression::Function(Box::new(Function::new(
26611 "REPEATED_COUNT".to_string(),
26612 vec![uf.this],
26613 ))))
26614 } else {
26615 Ok(e)
26616 }
26617 }
26618
26619 Action::WeekOfYearToWeekIso => {
26620 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
26621 if let Expression::WeekOfYear(uf) = e {
26622 Ok(Expression::Function(Box::new(Function::new(
26623 "WEEKISO".to_string(),
26624 vec![uf.this],
26625 ))))
26626 } else {
26627 Ok(e)
26628 }
26629 }
26630 }
26631 })
26632 }
26633
26634 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
26635 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
26636 use crate::expressions::Function;
26637 match unit {
26638 "DAY" => {
26639 // DATE(x)
26640 Ok(Expression::Function(Box::new(Function::new(
26641 "DATE".to_string(),
26642 vec![expr.clone()],
26643 ))))
26644 }
26645 "WEEK" => {
26646 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
26647 let year_x = Expression::Function(Box::new(Function::new(
26648 "YEAR".to_string(),
26649 vec![expr.clone()],
26650 )));
26651 let week_x = Expression::Function(Box::new(Function::new(
26652 "WEEK".to_string(),
26653 vec![expr.clone(), Expression::number(1)],
26654 )));
26655 let concat_args = vec![
26656 year_x,
26657 Expression::string(" "),
26658 week_x,
26659 Expression::string(" 1"),
26660 ];
26661 let concat = Expression::Function(Box::new(Function::new(
26662 "CONCAT".to_string(),
26663 concat_args,
26664 )));
26665 Ok(Expression::Function(Box::new(Function::new(
26666 "STR_TO_DATE".to_string(),
26667 vec![concat, Expression::string("%Y %u %w")],
26668 ))))
26669 }
26670 "MONTH" => {
26671 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
26672 let year_x = Expression::Function(Box::new(Function::new(
26673 "YEAR".to_string(),
26674 vec![expr.clone()],
26675 )));
26676 let month_x = Expression::Function(Box::new(Function::new(
26677 "MONTH".to_string(),
26678 vec![expr.clone()],
26679 )));
26680 let concat_args = vec![
26681 year_x,
26682 Expression::string(" "),
26683 month_x,
26684 Expression::string(" 1"),
26685 ];
26686 let concat = Expression::Function(Box::new(Function::new(
26687 "CONCAT".to_string(),
26688 concat_args,
26689 )));
26690 Ok(Expression::Function(Box::new(Function::new(
26691 "STR_TO_DATE".to_string(),
26692 vec![concat, Expression::string("%Y %c %e")],
26693 ))))
26694 }
26695 "QUARTER" => {
26696 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
26697 let year_x = Expression::Function(Box::new(Function::new(
26698 "YEAR".to_string(),
26699 vec![expr.clone()],
26700 )));
26701 let quarter_x = Expression::Function(Box::new(Function::new(
26702 "QUARTER".to_string(),
26703 vec![expr.clone()],
26704 )));
26705 // QUARTER(x) * 3 - 2
26706 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
26707 left: quarter_x,
26708 right: Expression::number(3),
26709 left_comments: Vec::new(),
26710 operator_comments: Vec::new(),
26711 trailing_comments: Vec::new(),
26712 inferred_type: None,
26713 }));
26714 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
26715 left: mul,
26716 right: Expression::number(2),
26717 left_comments: Vec::new(),
26718 operator_comments: Vec::new(),
26719 trailing_comments: Vec::new(),
26720 inferred_type: None,
26721 }));
26722 let concat_args = vec![
26723 year_x,
26724 Expression::string(" "),
26725 sub,
26726 Expression::string(" 1"),
26727 ];
26728 let concat = Expression::Function(Box::new(Function::new(
26729 "CONCAT".to_string(),
26730 concat_args,
26731 )));
26732 Ok(Expression::Function(Box::new(Function::new(
26733 "STR_TO_DATE".to_string(),
26734 vec![concat, Expression::string("%Y %c %e")],
26735 ))))
26736 }
26737 "YEAR" => {
26738 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
26739 let year_x = Expression::Function(Box::new(Function::new(
26740 "YEAR".to_string(),
26741 vec![expr.clone()],
26742 )));
26743 let concat_args = vec![year_x, Expression::string(" 1 1")];
26744 let concat = Expression::Function(Box::new(Function::new(
26745 "CONCAT".to_string(),
26746 concat_args,
26747 )));
26748 Ok(Expression::Function(Box::new(Function::new(
26749 "STR_TO_DATE".to_string(),
26750 vec![concat, Expression::string("%Y %c %e")],
26751 ))))
26752 }
26753 _ => {
26754 // Unsupported unit -> keep as DATE_TRUNC
26755 Ok(Expression::Function(Box::new(Function::new(
26756 "DATE_TRUNC".to_string(),
26757 vec![Expression::string(unit), expr.clone()],
26758 ))))
26759 }
26760 }
26761 }
26762
26763 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
26764 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
26765 use crate::expressions::DataType;
26766 match dt {
26767 DataType::VarChar { .. } | DataType::Char { .. } => true,
26768 DataType::Struct { fields, .. } => fields
26769 .iter()
26770 .any(|f| Self::has_varchar_char_type(&f.data_type)),
26771 _ => false,
26772 }
26773 }
26774
26775 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
26776 fn normalize_varchar_to_string(
26777 dt: crate::expressions::DataType,
26778 ) -> crate::expressions::DataType {
26779 use crate::expressions::DataType;
26780 match dt {
26781 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
26782 name: "STRING".to_string(),
26783 },
26784 DataType::Struct { fields, nested } => {
26785 let fields = fields
26786 .into_iter()
26787 .map(|mut f| {
26788 f.data_type = Self::normalize_varchar_to_string(f.data_type);
26789 f
26790 })
26791 .collect();
26792 DataType::Struct { fields, nested }
26793 }
26794 other => other,
26795 }
26796 }
26797
26798 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
26799 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
26800 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
26801 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
26802 let trimmed = s.trim();
26803
26804 // Find where digits end and unit text begins
26805 let digit_end = trimmed
26806 .find(|c: char| !c.is_ascii_digit())
26807 .unwrap_or(trimmed.len());
26808 if digit_end == 0 || digit_end == trimmed.len() {
26809 return expr;
26810 }
26811 let num = &trimmed[..digit_end];
26812 let unit_text = trimmed[digit_end..].trim().to_uppercase();
26813 if unit_text.is_empty() {
26814 return expr;
26815 }
26816
26817 let known_units = [
26818 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
26819 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
26820 ];
26821 if !known_units.contains(&unit_text.as_str()) {
26822 return expr;
26823 }
26824
26825 let unit_str = unit_text.clone();
26826 // Singularize
26827 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
26828 &unit_str[..unit_str.len() - 1]
26829 } else {
26830 &unit_str
26831 };
26832 let unit = unit_singular;
26833
26834 match target {
26835 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26836 // INTERVAL '2' DAY
26837 let iu = match unit {
26838 "DAY" => crate::expressions::IntervalUnit::Day,
26839 "HOUR" => crate::expressions::IntervalUnit::Hour,
26840 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26841 "SECOND" => crate::expressions::IntervalUnit::Second,
26842 "WEEK" => crate::expressions::IntervalUnit::Week,
26843 "MONTH" => crate::expressions::IntervalUnit::Month,
26844 "YEAR" => crate::expressions::IntervalUnit::Year,
26845 _ => return expr,
26846 };
26847 return Expression::Interval(Box::new(crate::expressions::Interval {
26848 this: Some(Expression::string(num)),
26849 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26850 unit: iu,
26851 use_plural: false,
26852 }),
26853 }));
26854 }
26855 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
26856 // INTERVAL '2 DAYS'
26857 let plural = if num != "1" && !unit_str.ends_with('S') {
26858 format!("{} {}S", num, unit)
26859 } else if unit_str.ends_with('S') {
26860 format!("{} {}", num, unit_str)
26861 } else {
26862 format!("{} {}", num, unit)
26863 };
26864 return Expression::Interval(Box::new(crate::expressions::Interval {
26865 this: Some(Expression::string(&plural)),
26866 unit: None,
26867 }));
26868 }
26869 _ => {
26870 // Spark/Databricks/Hive: INTERVAL '1' DAY
26871 let iu = match unit {
26872 "DAY" => crate::expressions::IntervalUnit::Day,
26873 "HOUR" => crate::expressions::IntervalUnit::Hour,
26874 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26875 "SECOND" => crate::expressions::IntervalUnit::Second,
26876 "WEEK" => crate::expressions::IntervalUnit::Week,
26877 "MONTH" => crate::expressions::IntervalUnit::Month,
26878 "YEAR" => crate::expressions::IntervalUnit::Year,
26879 _ => return expr,
26880 };
26881 return Expression::Interval(Box::new(crate::expressions::Interval {
26882 this: Some(Expression::string(num)),
26883 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26884 unit: iu,
26885 use_plural: false,
26886 }),
26887 }));
26888 }
26889 }
26890 }
26891 // If it's already an INTERVAL expression, pass through
26892 expr
26893 }
26894
26895 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
26896 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
26897 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
26898 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
26899 fn rewrite_unnest_expansion(
26900 select: &crate::expressions::Select,
26901 target: DialectType,
26902 ) -> Option<crate::expressions::Select> {
26903 use crate::expressions::{
26904 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
26905 UnnestFunc,
26906 };
26907
26908 let index_offset: i64 = match target {
26909 DialectType::Presto | DialectType::Trino => 1,
26910 _ => 0, // BigQuery, Snowflake
26911 };
26912
26913 let if_func_name = match target {
26914 DialectType::Snowflake => "IFF",
26915 _ => "IF",
26916 };
26917
26918 let array_length_func = match target {
26919 DialectType::BigQuery => "ARRAY_LENGTH",
26920 DialectType::Presto | DialectType::Trino => "CARDINALITY",
26921 DialectType::Snowflake => "ARRAY_SIZE",
26922 _ => "ARRAY_LENGTH",
26923 };
26924
26925 let use_table_aliases = matches!(
26926 target,
26927 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
26928 );
26929 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
26930
26931 fn make_col(name: &str, table: Option<&str>) -> Expression {
26932 if let Some(tbl) = table {
26933 Expression::Column(Column {
26934 name: Identifier::new(name.to_string()),
26935 table: Some(Identifier::new(tbl.to_string())),
26936 join_mark: false,
26937 trailing_comments: Vec::new(),
26938 span: None,
26939 inferred_type: None,
26940 })
26941 } else {
26942 Expression::Identifier(Identifier::new(name.to_string()))
26943 }
26944 }
26945
26946 fn make_join(this: Expression) -> Join {
26947 Join {
26948 this,
26949 on: None,
26950 using: Vec::new(),
26951 kind: JoinKind::Cross,
26952 use_inner_keyword: false,
26953 use_outer_keyword: false,
26954 deferred_condition: false,
26955 join_hint: None,
26956 match_condition: None,
26957 pivots: Vec::new(),
26958 comments: Vec::new(),
26959 nesting_group: 0,
26960 directed: false,
26961 }
26962 }
26963
26964 // Collect UNNEST info from SELECT expressions
26965 struct UnnestInfo {
26966 arr_expr: Expression,
26967 col_alias: String,
26968 pos_alias: String,
26969 source_alias: String,
26970 original_expr: Expression,
26971 has_outer_alias: Option<String>,
26972 }
26973
26974 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
26975 let mut col_counter = 0usize;
26976 let mut pos_counter = 1usize;
26977 let mut source_counter = 1usize;
26978
26979 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
26980 match expr {
26981 Expression::Unnest(u) => Some(u.this.clone()),
26982 Expression::Function(f)
26983 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
26984 {
26985 Some(f.args[0].clone())
26986 }
26987 Expression::Alias(a) => extract_unnest_arg(&a.this),
26988 Expression::Add(op)
26989 | Expression::Sub(op)
26990 | Expression::Mul(op)
26991 | Expression::Div(op) => {
26992 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
26993 }
26994 _ => None,
26995 }
26996 }
26997
26998 fn get_alias_name(expr: &Expression) -> Option<String> {
26999 if let Expression::Alias(a) = expr {
27000 Some(a.alias.name.clone())
27001 } else {
27002 None
27003 }
27004 }
27005
27006 for sel_expr in &select.expressions {
27007 if let Some(arr) = extract_unnest_arg(sel_expr) {
27008 col_counter += 1;
27009 pos_counter += 1;
27010 source_counter += 1;
27011
27012 let col_alias = if col_counter == 1 {
27013 "col".to_string()
27014 } else {
27015 format!("col_{}", col_counter)
27016 };
27017 let pos_alias = format!("pos_{}", pos_counter);
27018 let source_alias = format!("_u_{}", source_counter);
27019 let has_outer_alias = get_alias_name(sel_expr);
27020
27021 unnest_infos.push(UnnestInfo {
27022 arr_expr: arr,
27023 col_alias,
27024 pos_alias,
27025 source_alias,
27026 original_expr: sel_expr.clone(),
27027 has_outer_alias,
27028 });
27029 }
27030 }
27031
27032 if unnest_infos.is_empty() {
27033 return None;
27034 }
27035
27036 let series_alias = "pos".to_string();
27037 let series_source_alias = "_u".to_string();
27038 let tbl_ref = if use_table_aliases {
27039 Some(series_source_alias.as_str())
27040 } else {
27041 None
27042 };
27043
27044 // Build new SELECT expressions
27045 let mut new_select_exprs = Vec::new();
27046 for info in &unnest_infos {
27047 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
27048 let src_ref = if use_table_aliases {
27049 Some(info.source_alias.as_str())
27050 } else {
27051 None
27052 };
27053
27054 let pos_col = make_col(&series_alias, tbl_ref);
27055 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
27056 let col_ref = make_col(actual_col_name, src_ref);
27057
27058 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
27059 pos_col.clone(),
27060 unnest_pos_col.clone(),
27061 )));
27062 let mut if_args = vec![eq_cond, col_ref];
27063 if null_third_arg {
27064 if_args.push(Expression::Null(crate::expressions::Null));
27065 }
27066
27067 let if_expr =
27068 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
27069 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
27070
27071 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
27072 final_expr,
27073 Identifier::new(actual_col_name.clone()),
27074 ))));
27075 }
27076
27077 // Build array size expressions for GREATEST
27078 let size_exprs: Vec<Expression> = unnest_infos
27079 .iter()
27080 .map(|info| {
27081 Expression::Function(Box::new(Function::new(
27082 array_length_func.to_string(),
27083 vec![info.arr_expr.clone()],
27084 )))
27085 })
27086 .collect();
27087
27088 let greatest =
27089 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
27090
27091 let series_end = if index_offset == 0 {
27092 Expression::Sub(Box::new(BinaryOp::new(
27093 greatest,
27094 Expression::Literal(Literal::Number("1".to_string())),
27095 )))
27096 } else {
27097 greatest
27098 };
27099
27100 // Build the position array source
27101 let series_unnest_expr = match target {
27102 DialectType::BigQuery => {
27103 let gen_array = Expression::Function(Box::new(Function::new(
27104 "GENERATE_ARRAY".to_string(),
27105 vec![
27106 Expression::Literal(Literal::Number("0".to_string())),
27107 series_end,
27108 ],
27109 )));
27110 Expression::Unnest(Box::new(UnnestFunc {
27111 this: gen_array,
27112 expressions: Vec::new(),
27113 with_ordinality: false,
27114 alias: None,
27115 offset_alias: None,
27116 }))
27117 }
27118 DialectType::Presto | DialectType::Trino => {
27119 let sequence = Expression::Function(Box::new(Function::new(
27120 "SEQUENCE".to_string(),
27121 vec![
27122 Expression::Literal(Literal::Number("1".to_string())),
27123 series_end,
27124 ],
27125 )));
27126 Expression::Unnest(Box::new(UnnestFunc {
27127 this: sequence,
27128 expressions: Vec::new(),
27129 with_ordinality: false,
27130 alias: None,
27131 offset_alias: None,
27132 }))
27133 }
27134 DialectType::Snowflake => {
27135 let range_end = Expression::Add(Box::new(BinaryOp::new(
27136 Expression::Paren(Box::new(crate::expressions::Paren {
27137 this: series_end,
27138 trailing_comments: Vec::new(),
27139 })),
27140 Expression::Literal(Literal::Number("1".to_string())),
27141 )));
27142 let gen_range = Expression::Function(Box::new(Function::new(
27143 "ARRAY_GENERATE_RANGE".to_string(),
27144 vec![
27145 Expression::Literal(Literal::Number("0".to_string())),
27146 range_end,
27147 ],
27148 )));
27149 let flatten_arg =
27150 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
27151 name: Identifier::new("INPUT".to_string()),
27152 value: gen_range,
27153 separator: crate::expressions::NamedArgSeparator::DArrow,
27154 }));
27155 let flatten = Expression::Function(Box::new(Function::new(
27156 "FLATTEN".to_string(),
27157 vec![flatten_arg],
27158 )));
27159 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
27160 }
27161 _ => return None,
27162 };
27163
27164 // Build series alias expression
27165 let series_alias_expr = if use_table_aliases {
27166 let col_aliases = if matches!(target, DialectType::Snowflake) {
27167 vec![
27168 Identifier::new("seq".to_string()),
27169 Identifier::new("key".to_string()),
27170 Identifier::new("path".to_string()),
27171 Identifier::new("index".to_string()),
27172 Identifier::new(series_alias.clone()),
27173 Identifier::new("this".to_string()),
27174 ]
27175 } else {
27176 vec![Identifier::new(series_alias.clone())]
27177 };
27178 Expression::Alias(Box::new(Alias {
27179 this: series_unnest_expr,
27180 alias: Identifier::new(series_source_alias.clone()),
27181 column_aliases: col_aliases,
27182 pre_alias_comments: Vec::new(),
27183 trailing_comments: Vec::new(),
27184 inferred_type: None,
27185 }))
27186 } else {
27187 Expression::Alias(Box::new(Alias::new(
27188 series_unnest_expr,
27189 Identifier::new(series_alias.clone()),
27190 )))
27191 };
27192
27193 // Build CROSS JOINs for each UNNEST
27194 let mut joins = Vec::new();
27195 for info in &unnest_infos {
27196 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
27197
27198 let unnest_join_expr = match target {
27199 DialectType::BigQuery => {
27200 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
27201 let unnest = UnnestFunc {
27202 this: info.arr_expr.clone(),
27203 expressions: Vec::new(),
27204 with_ordinality: true,
27205 alias: Some(Identifier::new(actual_col_name.clone())),
27206 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
27207 };
27208 Expression::Unnest(Box::new(unnest))
27209 }
27210 DialectType::Presto | DialectType::Trino => {
27211 let unnest = UnnestFunc {
27212 this: info.arr_expr.clone(),
27213 expressions: Vec::new(),
27214 with_ordinality: true,
27215 alias: None,
27216 offset_alias: None,
27217 };
27218 Expression::Alias(Box::new(Alias {
27219 this: Expression::Unnest(Box::new(unnest)),
27220 alias: Identifier::new(info.source_alias.clone()),
27221 column_aliases: vec![
27222 Identifier::new(actual_col_name.clone()),
27223 Identifier::new(info.pos_alias.clone()),
27224 ],
27225 pre_alias_comments: Vec::new(),
27226 trailing_comments: Vec::new(),
27227 inferred_type: None,
27228 }))
27229 }
27230 DialectType::Snowflake => {
27231 let flatten_arg =
27232 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
27233 name: Identifier::new("INPUT".to_string()),
27234 value: info.arr_expr.clone(),
27235 separator: crate::expressions::NamedArgSeparator::DArrow,
27236 }));
27237 let flatten = Expression::Function(Box::new(Function::new(
27238 "FLATTEN".to_string(),
27239 vec![flatten_arg],
27240 )));
27241 let table_fn = Expression::Function(Box::new(Function::new(
27242 "TABLE".to_string(),
27243 vec![flatten],
27244 )));
27245 Expression::Alias(Box::new(Alias {
27246 this: table_fn,
27247 alias: Identifier::new(info.source_alias.clone()),
27248 column_aliases: vec![
27249 Identifier::new("seq".to_string()),
27250 Identifier::new("key".to_string()),
27251 Identifier::new("path".to_string()),
27252 Identifier::new(info.pos_alias.clone()),
27253 Identifier::new(actual_col_name.clone()),
27254 Identifier::new("this".to_string()),
27255 ],
27256 pre_alias_comments: Vec::new(),
27257 trailing_comments: Vec::new(),
27258 inferred_type: None,
27259 }))
27260 }
27261 _ => return None,
27262 };
27263
27264 joins.push(make_join(unnest_join_expr));
27265 }
27266
27267 // Build WHERE clause
27268 let mut where_conditions: Vec<Expression> = Vec::new();
27269 for info in &unnest_infos {
27270 let src_ref = if use_table_aliases {
27271 Some(info.source_alias.as_str())
27272 } else {
27273 None
27274 };
27275 let pos_col = make_col(&series_alias, tbl_ref);
27276 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
27277
27278 let arr_size = Expression::Function(Box::new(Function::new(
27279 array_length_func.to_string(),
27280 vec![info.arr_expr.clone()],
27281 )));
27282
27283 let size_ref = if index_offset == 0 {
27284 Expression::Paren(Box::new(crate::expressions::Paren {
27285 this: Expression::Sub(Box::new(BinaryOp::new(
27286 arr_size,
27287 Expression::Literal(Literal::Number("1".to_string())),
27288 ))),
27289 trailing_comments: Vec::new(),
27290 }))
27291 } else {
27292 arr_size
27293 };
27294
27295 let eq = Expression::Eq(Box::new(BinaryOp::new(
27296 pos_col.clone(),
27297 unnest_pos_col.clone(),
27298 )));
27299 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
27300 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
27301 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
27302 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
27303 this: and_cond,
27304 trailing_comments: Vec::new(),
27305 }));
27306 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
27307
27308 where_conditions.push(or_cond);
27309 }
27310
27311 let where_expr = if where_conditions.len() == 1 {
27312 // Single condition: no parens needed
27313 where_conditions.into_iter().next().unwrap()
27314 } else {
27315 // Multiple conditions: wrap each OR in parens, then combine with AND
27316 let wrap = |e: Expression| {
27317 Expression::Paren(Box::new(crate::expressions::Paren {
27318 this: e,
27319 trailing_comments: Vec::new(),
27320 }))
27321 };
27322 let mut iter = where_conditions.into_iter();
27323 let first = wrap(iter.next().unwrap());
27324 let second = wrap(iter.next().unwrap());
27325 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
27326 this: Expression::And(Box::new(BinaryOp::new(first, second))),
27327 trailing_comments: Vec::new(),
27328 }));
27329 for cond in iter {
27330 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
27331 }
27332 combined
27333 };
27334
27335 // Build the new SELECT
27336 let mut new_select = select.clone();
27337 new_select.expressions = new_select_exprs;
27338
27339 if new_select.from.is_some() {
27340 let mut all_joins = vec![make_join(series_alias_expr)];
27341 all_joins.extend(joins);
27342 new_select.joins.extend(all_joins);
27343 } else {
27344 new_select.from = Some(From {
27345 expressions: vec![series_alias_expr],
27346 });
27347 new_select.joins.extend(joins);
27348 }
27349
27350 if let Some(ref existing_where) = new_select.where_clause {
27351 let combined = Expression::And(Box::new(BinaryOp::new(
27352 existing_where.this.clone(),
27353 where_expr,
27354 )));
27355 new_select.where_clause = Some(crate::expressions::Where { this: combined });
27356 } else {
27357 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
27358 }
27359
27360 Some(new_select)
27361 }
27362
27363 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
27364 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
27365 match original {
27366 Expression::Unnest(_) => replacement.clone(),
27367 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
27368 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
27369 Expression::Add(op) => {
27370 let left = Self::replace_unnest_with_if(&op.left, replacement);
27371 let right = Self::replace_unnest_with_if(&op.right, replacement);
27372 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
27373 }
27374 Expression::Sub(op) => {
27375 let left = Self::replace_unnest_with_if(&op.left, replacement);
27376 let right = Self::replace_unnest_with_if(&op.right, replacement);
27377 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
27378 }
27379 Expression::Mul(op) => {
27380 let left = Self::replace_unnest_with_if(&op.left, replacement);
27381 let right = Self::replace_unnest_with_if(&op.right, replacement);
27382 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
27383 }
27384 Expression::Div(op) => {
27385 let left = Self::replace_unnest_with_if(&op.left, replacement);
27386 let right = Self::replace_unnest_with_if(&op.right, replacement);
27387 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
27388 }
27389 _ => original.clone(),
27390 }
27391 }
27392
27393 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
27394 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
27395 fn decompose_json_path(path: &str) -> Vec<String> {
27396 let mut parts = Vec::new();
27397 let path = if path.starts_with("$.") {
27398 &path[2..]
27399 } else if path.starts_with('$') {
27400 &path[1..]
27401 } else {
27402 path
27403 };
27404 if path.is_empty() {
27405 return parts;
27406 }
27407 let mut current = String::new();
27408 let chars: Vec<char> = path.chars().collect();
27409 let mut i = 0;
27410 while i < chars.len() {
27411 match chars[i] {
27412 '.' => {
27413 if !current.is_empty() {
27414 parts.push(current.clone());
27415 current.clear();
27416 }
27417 i += 1;
27418 }
27419 '[' => {
27420 if !current.is_empty() {
27421 parts.push(current.clone());
27422 current.clear();
27423 }
27424 i += 1;
27425 let mut bracket_content = String::new();
27426 while i < chars.len() && chars[i] != ']' {
27427 if chars[i] == '"' || chars[i] == '\'' {
27428 let quote = chars[i];
27429 i += 1;
27430 while i < chars.len() && chars[i] != quote {
27431 bracket_content.push(chars[i]);
27432 i += 1;
27433 }
27434 if i < chars.len() {
27435 i += 1;
27436 }
27437 } else {
27438 bracket_content.push(chars[i]);
27439 i += 1;
27440 }
27441 }
27442 if i < chars.len() {
27443 i += 1;
27444 }
27445 if bracket_content != "*" {
27446 parts.push(bracket_content);
27447 }
27448 }
27449 _ => {
27450 current.push(chars[i]);
27451 i += 1;
27452 }
27453 }
27454 }
27455 if !current.is_empty() {
27456 parts.push(current);
27457 }
27458 parts
27459 }
27460
27461 /// Strip `$` prefix from a JSON path, keeping the rest.
27462 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
27463 fn strip_json_dollar_prefix(path: &str) -> String {
27464 if path.starts_with("$.") {
27465 path[2..].to_string()
27466 } else if path.starts_with('$') {
27467 path[1..].to_string()
27468 } else {
27469 path.to_string()
27470 }
27471 }
27472
27473 /// Strip `[*]` wildcards from a JSON path.
27474 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
27475 fn strip_json_wildcards(path: &str) -> String {
27476 path.replace("[*]", "")
27477 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
27478 .trim_end_matches('.')
27479 .to_string()
27480 }
27481
27482 /// Convert bracket notation to dot notation for JSON paths.
27483 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
27484 fn bracket_to_dot_notation(path: &str) -> String {
27485 let mut result = String::new();
27486 let chars: Vec<char> = path.chars().collect();
27487 let mut i = 0;
27488 while i < chars.len() {
27489 if chars[i] == '[' {
27490 // Read bracket content
27491 i += 1;
27492 let mut bracket_content = String::new();
27493 let mut is_quoted = false;
27494 let mut _quote_char = '"';
27495 while i < chars.len() && chars[i] != ']' {
27496 if chars[i] == '"' || chars[i] == '\'' {
27497 is_quoted = true;
27498 _quote_char = chars[i];
27499 i += 1;
27500 while i < chars.len() && chars[i] != _quote_char {
27501 bracket_content.push(chars[i]);
27502 i += 1;
27503 }
27504 if i < chars.len() {
27505 i += 1;
27506 }
27507 } else {
27508 bracket_content.push(chars[i]);
27509 i += 1;
27510 }
27511 }
27512 if i < chars.len() {
27513 i += 1;
27514 } // skip ]
27515 if bracket_content == "*" {
27516 // Keep wildcard as-is
27517 result.push_str("[*]");
27518 } else if is_quoted {
27519 // Quoted bracket -> dot notation with quotes
27520 result.push('.');
27521 result.push('"');
27522 result.push_str(&bracket_content);
27523 result.push('"');
27524 } else {
27525 // Numeric index -> keep as bracket
27526 result.push('[');
27527 result.push_str(&bracket_content);
27528 result.push(']');
27529 }
27530 } else {
27531 result.push(chars[i]);
27532 i += 1;
27533 }
27534 }
27535 result
27536 }
27537
27538 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
27539 /// `$["a b"]` -> `$['a b']`
27540 fn bracket_to_single_quotes(path: &str) -> String {
27541 let mut result = String::new();
27542 let chars: Vec<char> = path.chars().collect();
27543 let mut i = 0;
27544 while i < chars.len() {
27545 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
27546 result.push('[');
27547 result.push('\'');
27548 i += 2; // skip [ and "
27549 while i < chars.len() && chars[i] != '"' {
27550 result.push(chars[i]);
27551 i += 1;
27552 }
27553 if i < chars.len() {
27554 i += 1;
27555 } // skip closing "
27556 result.push('\'');
27557 } else {
27558 result.push(chars[i]);
27559 i += 1;
27560 }
27561 }
27562 result
27563 }
27564
27565 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
27566 /// or PostgreSQL #temp -> TEMPORARY.
27567 /// Also strips # from INSERT INTO #table for non-TSQL targets.
27568 fn transform_select_into(
27569 expr: Expression,
27570 _source: DialectType,
27571 target: DialectType,
27572 ) -> Expression {
27573 use crate::expressions::{CreateTable, Expression, TableRef};
27574
27575 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
27576 if let Expression::Insert(ref insert) = expr {
27577 if insert.table.name.name.starts_with('#')
27578 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
27579 {
27580 let mut new_insert = insert.clone();
27581 new_insert.table.name.name =
27582 insert.table.name.name.trim_start_matches('#').to_string();
27583 return Expression::Insert(new_insert);
27584 }
27585 return expr;
27586 }
27587
27588 if let Expression::Select(ref select) = expr {
27589 if let Some(ref into) = select.into {
27590 let table_name_raw = match &into.this {
27591 Expression::Table(tr) => tr.name.name.clone(),
27592 Expression::Identifier(id) => id.name.clone(),
27593 _ => String::new(),
27594 };
27595 let is_temp = table_name_raw.starts_with('#') || into.temporary;
27596 let clean_name = table_name_raw.trim_start_matches('#').to_string();
27597
27598 match target {
27599 DialectType::DuckDB | DialectType::Snowflake => {
27600 // SELECT INTO -> CREATE TABLE AS SELECT
27601 let mut new_select = select.clone();
27602 new_select.into = None;
27603 let ct = CreateTable {
27604 name: TableRef::new(clean_name),
27605 on_cluster: None,
27606 columns: Vec::new(),
27607 constraints: Vec::new(),
27608 if_not_exists: false,
27609 temporary: is_temp,
27610 or_replace: false,
27611 table_modifier: None,
27612 as_select: Some(Expression::Select(new_select)),
27613 as_select_parenthesized: false,
27614 on_commit: None,
27615 clone_source: None,
27616 clone_at_clause: None,
27617 shallow_clone: false,
27618 is_copy: false,
27619 leading_comments: Vec::new(),
27620 with_properties: Vec::new(),
27621 teradata_post_name_options: Vec::new(),
27622 with_data: None,
27623 with_statistics: None,
27624 teradata_indexes: Vec::new(),
27625 with_cte: None,
27626 properties: Vec::new(),
27627 partition_of: None,
27628 post_table_properties: Vec::new(),
27629 mysql_table_options: Vec::new(),
27630 inherits: Vec::new(),
27631 on_property: None,
27632 copy_grants: false,
27633 using_template: None,
27634 rollup: None,
27635 };
27636 return Expression::CreateTable(Box::new(ct));
27637 }
27638 DialectType::PostgreSQL | DialectType::Redshift => {
27639 // PostgreSQL: #foo -> INTO TEMPORARY foo
27640 if is_temp && !into.temporary {
27641 let mut new_select = select.clone();
27642 let mut new_into = into.clone();
27643 new_into.temporary = true;
27644 new_into.unlogged = false;
27645 new_into.this = Expression::Table(TableRef::new(clean_name));
27646 new_select.into = Some(new_into);
27647 Expression::Select(new_select)
27648 } else {
27649 expr
27650 }
27651 }
27652 _ => expr,
27653 }
27654 } else {
27655 expr
27656 }
27657 } else {
27658 expr
27659 }
27660 }
27661
27662 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
27663 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
27664 fn transform_create_table_properties(
27665 ct: &mut crate::expressions::CreateTable,
27666 _source: DialectType,
27667 target: DialectType,
27668 ) {
27669 use crate::expressions::{
27670 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
27671 Properties,
27672 };
27673
27674 // Helper to convert a raw property value string to the correct Expression
27675 let value_to_expr = |v: &str| -> Expression {
27676 let trimmed = v.trim();
27677 // Check if it's a quoted string (starts and ends with ')
27678 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
27679 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
27680 }
27681 // Check if it's a number
27682 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
27683 Expression::Literal(Literal::Number(trimmed.to_string()))
27684 }
27685 // Check if it's ARRAY[...] or ARRAY(...)
27686 else if trimmed.to_uppercase().starts_with("ARRAY") {
27687 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
27688 let inner = trimmed
27689 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
27690 .trim_start_matches('[')
27691 .trim_start_matches('(')
27692 .trim_end_matches(']')
27693 .trim_end_matches(')');
27694 let elements: Vec<Expression> = inner
27695 .split(',')
27696 .map(|e| {
27697 let elem = e.trim().trim_matches('\'');
27698 Expression::Literal(Literal::String(elem.to_string()))
27699 })
27700 .collect();
27701 Expression::Function(Box::new(crate::expressions::Function::new(
27702 "ARRAY".to_string(),
27703 elements,
27704 )))
27705 }
27706 // Otherwise, just output as identifier (unquoted)
27707 else {
27708 Expression::Identifier(Identifier::new(trimmed.to_string()))
27709 }
27710 };
27711
27712 if ct.with_properties.is_empty() && ct.properties.is_empty() {
27713 return;
27714 }
27715
27716 // Handle Presto-style WITH properties
27717 if !ct.with_properties.is_empty() {
27718 // Extract FORMAT property and remaining properties
27719 let mut format_value: Option<String> = None;
27720 let mut partitioned_by: Option<String> = None;
27721 let mut other_props: Vec<(String, String)> = Vec::new();
27722
27723 for (key, value) in ct.with_properties.drain(..) {
27724 let key_upper = key.to_uppercase();
27725 if key_upper == "FORMAT" {
27726 // Strip surrounding quotes from value if present
27727 format_value = Some(value.trim_matches('\'').to_string());
27728 } else if key_upper == "PARTITIONED_BY" {
27729 partitioned_by = Some(value);
27730 } else {
27731 other_props.push((key, value));
27732 }
27733 }
27734
27735 match target {
27736 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27737 // Presto: keep WITH properties but lowercase 'format' key
27738 if let Some(fmt) = format_value {
27739 ct.with_properties
27740 .push(("format".to_string(), format!("'{}'", fmt)));
27741 }
27742 if let Some(part) = partitioned_by {
27743 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
27744 let trimmed = part.trim();
27745 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
27746 // Also handle ARRAY['...'] format - keep as-is
27747 if trimmed.to_uppercase().starts_with("ARRAY") {
27748 ct.with_properties
27749 .push(("PARTITIONED_BY".to_string(), part));
27750 } else {
27751 // Parse column names from the parenthesized list
27752 let cols: Vec<&str> = inner
27753 .split(',')
27754 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
27755 .collect();
27756 let array_val = format!(
27757 "ARRAY[{}]",
27758 cols.iter()
27759 .map(|c| format!("'{}'", c))
27760 .collect::<Vec<_>>()
27761 .join(", ")
27762 );
27763 ct.with_properties
27764 .push(("PARTITIONED_BY".to_string(), array_val));
27765 }
27766 }
27767 ct.with_properties.extend(other_props);
27768 }
27769 DialectType::Hive => {
27770 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
27771 if let Some(fmt) = format_value {
27772 ct.properties.push(Expression::FileFormatProperty(Box::new(
27773 FileFormatProperty {
27774 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
27775 expressions: vec![],
27776 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
27777 value: true,
27778 }))),
27779 },
27780 )));
27781 }
27782 if let Some(_part) = partitioned_by {
27783 // PARTITIONED_BY handling is complex - move columns to partitioned by
27784 // For now, the partition columns are extracted from the column list
27785 Self::apply_partitioned_by(ct, &_part, target);
27786 }
27787 if !other_props.is_empty() {
27788 let eq_exprs: Vec<Expression> = other_props
27789 .into_iter()
27790 .map(|(k, v)| {
27791 Expression::Eq(Box::new(BinaryOp::new(
27792 Expression::Literal(Literal::String(k)),
27793 value_to_expr(&v),
27794 )))
27795 })
27796 .collect();
27797 ct.properties
27798 .push(Expression::Properties(Box::new(Properties {
27799 expressions: eq_exprs,
27800 })));
27801 }
27802 }
27803 DialectType::Spark | DialectType::Databricks => {
27804 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
27805 if let Some(fmt) = format_value {
27806 ct.properties.push(Expression::FileFormatProperty(Box::new(
27807 FileFormatProperty {
27808 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
27809 expressions: vec![],
27810 hive_format: None, // None means USING syntax
27811 },
27812 )));
27813 }
27814 if let Some(_part) = partitioned_by {
27815 Self::apply_partitioned_by(ct, &_part, target);
27816 }
27817 if !other_props.is_empty() {
27818 let eq_exprs: Vec<Expression> = other_props
27819 .into_iter()
27820 .map(|(k, v)| {
27821 Expression::Eq(Box::new(BinaryOp::new(
27822 Expression::Literal(Literal::String(k)),
27823 value_to_expr(&v),
27824 )))
27825 })
27826 .collect();
27827 ct.properties
27828 .push(Expression::Properties(Box::new(Properties {
27829 expressions: eq_exprs,
27830 })));
27831 }
27832 }
27833 DialectType::DuckDB => {
27834 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
27835 // Keep nothing
27836 }
27837 _ => {
27838 // For other dialects, keep WITH properties as-is
27839 if let Some(fmt) = format_value {
27840 ct.with_properties
27841 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
27842 }
27843 if let Some(part) = partitioned_by {
27844 ct.with_properties
27845 .push(("PARTITIONED_BY".to_string(), part));
27846 }
27847 ct.with_properties.extend(other_props);
27848 }
27849 }
27850 }
27851
27852 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
27853 // and Hive STORED AS -> Presto WITH (format=...) conversion
27854 if !ct.properties.is_empty() {
27855 let is_presto_target = matches!(
27856 target,
27857 DialectType::Presto | DialectType::Trino | DialectType::Athena
27858 );
27859 let is_duckdb_target = matches!(target, DialectType::DuckDB);
27860
27861 if is_presto_target || is_duckdb_target {
27862 let mut new_properties = Vec::new();
27863 for prop in ct.properties.drain(..) {
27864 match &prop {
27865 Expression::FileFormatProperty(ffp) => {
27866 if is_presto_target {
27867 // Convert STORED AS/USING to WITH (format=...)
27868 if let Some(ref fmt_expr) = ffp.this {
27869 let fmt_str = match fmt_expr.as_ref() {
27870 Expression::Identifier(id) => id.name.clone(),
27871 Expression::Literal(Literal::String(s)) => s.clone(),
27872 _ => {
27873 new_properties.push(prop);
27874 continue;
27875 }
27876 };
27877 ct.with_properties
27878 .push(("format".to_string(), format!("'{}'", fmt_str)));
27879 }
27880 }
27881 // DuckDB: just strip file format properties
27882 }
27883 // Convert TBLPROPERTIES to WITH properties for Presto target
27884 Expression::Properties(props) if is_presto_target => {
27885 for expr in &props.expressions {
27886 if let Expression::Eq(eq) = expr {
27887 // Extract key and value from the Eq expression
27888 let key = match &eq.left {
27889 Expression::Literal(Literal::String(s)) => s.clone(),
27890 Expression::Identifier(id) => id.name.clone(),
27891 _ => continue,
27892 };
27893 let value = match &eq.right {
27894 Expression::Literal(Literal::String(s)) => {
27895 format!("'{}'", s)
27896 }
27897 Expression::Literal(Literal::Number(n)) => n.clone(),
27898 Expression::Identifier(id) => id.name.clone(),
27899 _ => continue,
27900 };
27901 ct.with_properties.push((key, value));
27902 }
27903 }
27904 }
27905 // Convert PartitionedByProperty for Presto target
27906 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
27907 // Check if it contains ColumnDef expressions (Hive-style with types)
27908 if let Expression::Tuple(ref tuple) = *pbp.this {
27909 let mut col_names: Vec<String> = Vec::new();
27910 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
27911 let mut has_col_defs = false;
27912 for expr in &tuple.expressions {
27913 if let Expression::ColumnDef(ref cd) = expr {
27914 has_col_defs = true;
27915 col_names.push(cd.name.name.clone());
27916 col_defs.push(*cd.clone());
27917 } else if let Expression::Column(ref col) = expr {
27918 col_names.push(col.name.name.clone());
27919 } else if let Expression::Identifier(ref id) = expr {
27920 col_names.push(id.name.clone());
27921 } else {
27922 // For function expressions like MONTHS(y), serialize to SQL
27923 let generic = Dialect::get(DialectType::Generic);
27924 if let Ok(sql) = generic.generate(expr) {
27925 col_names.push(sql);
27926 }
27927 }
27928 }
27929 if has_col_defs {
27930 // Merge partition column defs into the main column list
27931 for cd in col_defs {
27932 ct.columns.push(cd);
27933 }
27934 }
27935 if !col_names.is_empty() {
27936 // Add PARTITIONED_BY property
27937 let array_val = format!(
27938 "ARRAY[{}]",
27939 col_names
27940 .iter()
27941 .map(|n| format!("'{}'", n))
27942 .collect::<Vec<_>>()
27943 .join(", ")
27944 );
27945 ct.with_properties
27946 .push(("PARTITIONED_BY".to_string(), array_val));
27947 }
27948 }
27949 // Skip - don't keep in properties
27950 }
27951 _ => {
27952 if !is_duckdb_target {
27953 new_properties.push(prop);
27954 }
27955 }
27956 }
27957 }
27958 ct.properties = new_properties;
27959 } else {
27960 // For Hive/Spark targets, unquote format names in STORED AS
27961 for prop in &mut ct.properties {
27962 if let Expression::FileFormatProperty(ref mut ffp) = prop {
27963 if let Some(ref mut fmt_expr) = ffp.this {
27964 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
27965 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
27966 let unquoted = s.clone();
27967 *fmt_expr =
27968 Box::new(Expression::Identifier(Identifier::new(unquoted)));
27969 }
27970 }
27971 }
27972 }
27973 }
27974 }
27975 }
27976
27977 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
27978 fn apply_partitioned_by(
27979 ct: &mut crate::expressions::CreateTable,
27980 partitioned_by_value: &str,
27981 target: DialectType,
27982 ) {
27983 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
27984
27985 // Parse the ARRAY['col1', 'col2'] value to extract column names
27986 let mut col_names: Vec<String> = Vec::new();
27987 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
27988 let inner = partitioned_by_value
27989 .trim()
27990 .trim_start_matches("ARRAY")
27991 .trim_start_matches('[')
27992 .trim_start_matches('(')
27993 .trim_end_matches(']')
27994 .trim_end_matches(')');
27995 for part in inner.split(',') {
27996 let col = part.trim().trim_matches('\'').trim_matches('"');
27997 if !col.is_empty() {
27998 col_names.push(col.to_string());
27999 }
28000 }
28001
28002 if col_names.is_empty() {
28003 return;
28004 }
28005
28006 if matches!(target, DialectType::Hive) {
28007 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
28008 let mut partition_col_defs = Vec::new();
28009 for col_name in &col_names {
28010 // Find and remove from columns
28011 if let Some(pos) = ct
28012 .columns
28013 .iter()
28014 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
28015 {
28016 let col_def = ct.columns.remove(pos);
28017 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
28018 }
28019 }
28020 if !partition_col_defs.is_empty() {
28021 ct.properties
28022 .push(Expression::PartitionedByProperty(Box::new(
28023 PartitionedByProperty {
28024 this: Box::new(Expression::Tuple(Box::new(Tuple {
28025 expressions: partition_col_defs,
28026 }))),
28027 },
28028 )));
28029 }
28030 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
28031 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
28032 // Use quoted identifiers to match the quoting style of the original column definitions
28033 let partition_exprs: Vec<Expression> = col_names
28034 .iter()
28035 .map(|name| {
28036 // Check if the column exists in the column list and use its quoting
28037 let is_quoted = ct
28038 .columns
28039 .iter()
28040 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
28041 let ident = if is_quoted {
28042 Identifier::quoted(name.clone())
28043 } else {
28044 Identifier::new(name.clone())
28045 };
28046 Expression::Column(Column {
28047 name: ident,
28048 table: None,
28049 join_mark: false,
28050 trailing_comments: Vec::new(),
28051 span: None,
28052 inferred_type: None,
28053 })
28054 })
28055 .collect();
28056 ct.properties
28057 .push(Expression::PartitionedByProperty(Box::new(
28058 PartitionedByProperty {
28059 this: Box::new(Expression::Tuple(Box::new(Tuple {
28060 expressions: partition_exprs,
28061 }))),
28062 },
28063 )));
28064 }
28065 // DuckDB: strip partitioned_by entirely (already handled)
28066 }
28067
28068 /// Convert a DataType to Spark's type string format (using angle brackets)
28069 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
28070 use crate::expressions::DataType;
28071 match dt {
28072 DataType::Int { .. } => "INT".to_string(),
28073 DataType::BigInt { .. } => "BIGINT".to_string(),
28074 DataType::SmallInt { .. } => "SMALLINT".to_string(),
28075 DataType::TinyInt { .. } => "TINYINT".to_string(),
28076 DataType::Float { .. } => "FLOAT".to_string(),
28077 DataType::Double { .. } => "DOUBLE".to_string(),
28078 DataType::Decimal {
28079 precision: Some(p),
28080 scale: Some(s),
28081 } => format!("DECIMAL({}, {})", p, s),
28082 DataType::Decimal {
28083 precision: Some(p), ..
28084 } => format!("DECIMAL({})", p),
28085 DataType::Decimal { .. } => "DECIMAL".to_string(),
28086 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
28087 "STRING".to_string()
28088 }
28089 DataType::Char { .. } => "STRING".to_string(),
28090 DataType::Boolean => "BOOLEAN".to_string(),
28091 DataType::Date => "DATE".to_string(),
28092 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
28093 DataType::Json | DataType::JsonB => "STRING".to_string(),
28094 DataType::Binary { .. } => "BINARY".to_string(),
28095 DataType::Array { element_type, .. } => {
28096 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
28097 }
28098 DataType::Map {
28099 key_type,
28100 value_type,
28101 } => format!(
28102 "MAP<{}, {}>",
28103 Self::data_type_to_spark_string(key_type),
28104 Self::data_type_to_spark_string(value_type)
28105 ),
28106 DataType::Struct { fields, .. } => {
28107 let field_strs: Vec<String> = fields
28108 .iter()
28109 .map(|f| {
28110 if f.name.is_empty() {
28111 Self::data_type_to_spark_string(&f.data_type)
28112 } else {
28113 format!(
28114 "{}: {}",
28115 f.name,
28116 Self::data_type_to_spark_string(&f.data_type)
28117 )
28118 }
28119 })
28120 .collect();
28121 format!("STRUCT<{}>", field_strs.join(", "))
28122 }
28123 DataType::Custom { name } => name.clone(),
28124 _ => format!("{:?}", dt),
28125 }
28126 }
28127
28128 /// Extract value and unit from an Interval expression
28129 /// Returns (value_expression, IntervalUnit)
28130 fn extract_interval_parts(
28131 interval_expr: &Expression,
28132 ) -> (Expression, crate::expressions::IntervalUnit) {
28133 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
28134
28135 if let Expression::Interval(iv) = interval_expr {
28136 let val = iv.this.clone().unwrap_or(Expression::number(0));
28137 let unit = match &iv.unit {
28138 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
28139 None => {
28140 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
28141 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
28142 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
28143 if parts.len() == 2 {
28144 let unit_str = parts[1].trim().to_uppercase();
28145 let parsed_unit = match unit_str.as_str() {
28146 "YEAR" | "YEARS" => IntervalUnit::Year,
28147 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
28148 "MONTH" | "MONTHS" => IntervalUnit::Month,
28149 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
28150 "DAY" | "DAYS" => IntervalUnit::Day,
28151 "HOUR" | "HOURS" => IntervalUnit::Hour,
28152 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
28153 "SECOND" | "SECONDS" => IntervalUnit::Second,
28154 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
28155 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
28156 _ => IntervalUnit::Day,
28157 };
28158 // Return just the numeric part as value and parsed unit
28159 return (
28160 Expression::Literal(crate::expressions::Literal::String(
28161 parts[0].to_string(),
28162 )),
28163 parsed_unit,
28164 );
28165 }
28166 IntervalUnit::Day
28167 } else {
28168 IntervalUnit::Day
28169 }
28170 }
28171 _ => IntervalUnit::Day,
28172 };
28173 (val, unit)
28174 } else {
28175 // Not an interval - pass through
28176 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
28177 }
28178 }
28179
28180 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
28181 fn normalize_bigquery_function(
28182 e: Expression,
28183 source: DialectType,
28184 target: DialectType,
28185 ) -> Result<Expression> {
28186 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
28187
28188 let f = if let Expression::Function(f) = e {
28189 *f
28190 } else {
28191 return Ok(e);
28192 };
28193 let name = f.name.to_uppercase();
28194 let mut args = f.args;
28195
28196 /// Helper to extract unit string from an identifier, column, or literal expression
28197 fn get_unit_str(expr: &Expression) -> String {
28198 match expr {
28199 Expression::Identifier(id) => id.name.to_uppercase(),
28200 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
28201 Expression::Column(col) => col.name.name.to_uppercase(),
28202 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
28203 Expression::Function(f) => {
28204 let base = f.name.to_uppercase();
28205 if !f.args.is_empty() {
28206 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
28207 let inner = get_unit_str(&f.args[0]);
28208 format!("{}({})", base, inner)
28209 } else {
28210 base
28211 }
28212 }
28213 _ => "DAY".to_string(),
28214 }
28215 }
28216
28217 /// Parse unit string to IntervalUnit
28218 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
28219 match s {
28220 "YEAR" => crate::expressions::IntervalUnit::Year,
28221 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28222 "MONTH" => crate::expressions::IntervalUnit::Month,
28223 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
28224 "DAY" => crate::expressions::IntervalUnit::Day,
28225 "HOUR" => crate::expressions::IntervalUnit::Hour,
28226 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28227 "SECOND" => crate::expressions::IntervalUnit::Second,
28228 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
28229 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
28230 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
28231 _ => crate::expressions::IntervalUnit::Day,
28232 }
28233 }
28234
28235 match name.as_str() {
28236 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
28237 // (BigQuery: result = date1 - date2, Standard: result = end - start)
28238 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
28239 let date1 = args.remove(0);
28240 let date2 = args.remove(0);
28241 let unit_expr = args.remove(0);
28242 let unit_str = get_unit_str(&unit_expr);
28243
28244 if matches!(target, DialectType::BigQuery) {
28245 // BigQuery -> BigQuery: just uppercase the unit
28246 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
28247 return Ok(Expression::Function(Box::new(Function::new(
28248 f.name,
28249 vec![date1, date2, unit],
28250 ))));
28251 }
28252
28253 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
28254 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
28255 if matches!(target, DialectType::Snowflake) {
28256 return Ok(Expression::TimestampDiff(Box::new(
28257 crate::expressions::TimestampDiff {
28258 this: Box::new(date2),
28259 expression: Box::new(date1),
28260 unit: Some(unit_str),
28261 },
28262 )));
28263 }
28264
28265 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
28266 if matches!(target, DialectType::DuckDB) {
28267 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
28268 // CAST to TIME
28269 let cast_fn = |e: Expression| -> Expression {
28270 match e {
28271 Expression::Literal(Literal::String(s)) => {
28272 Expression::Cast(Box::new(Cast {
28273 this: Expression::Literal(Literal::String(s)),
28274 to: DataType::Custom {
28275 name: "TIME".to_string(),
28276 },
28277 trailing_comments: vec![],
28278 double_colon_syntax: false,
28279 format: None,
28280 default: None,
28281 inferred_type: None,
28282 }))
28283 }
28284 other => other,
28285 }
28286 };
28287 (cast_fn(date1), cast_fn(date2))
28288 } else if name == "DATETIME_DIFF" {
28289 // CAST to TIMESTAMP
28290 (
28291 Self::ensure_cast_timestamp(date1),
28292 Self::ensure_cast_timestamp(date2),
28293 )
28294 } else {
28295 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
28296 (
28297 Self::ensure_cast_timestamptz(date1),
28298 Self::ensure_cast_timestamptz(date2),
28299 )
28300 };
28301 return Ok(Expression::Function(Box::new(Function::new(
28302 "DATE_DIFF".to_string(),
28303 vec![
28304 Expression::Literal(Literal::String(unit_str)),
28305 cast_d2,
28306 cast_d1,
28307 ],
28308 ))));
28309 }
28310
28311 // Convert to standard TIMESTAMPDIFF(unit, start, end)
28312 let unit = Expression::Identifier(Identifier::new(unit_str));
28313 Ok(Expression::Function(Box::new(Function::new(
28314 "TIMESTAMPDIFF".to_string(),
28315 vec![unit, date2, date1],
28316 ))))
28317 }
28318
28319 // DATEDIFF(unit, start, end) -> target-specific form
28320 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
28321 "DATEDIFF" if args.len() == 3 => {
28322 let arg0 = args.remove(0);
28323 let arg1 = args.remove(0);
28324 let arg2 = args.remove(0);
28325 let unit_str = get_unit_str(&arg0);
28326
28327 // Redshift DATEDIFF(unit, start, end) order: result = end - start
28328 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
28329 // TSQL DATEDIFF(unit, start, end) order: result = end - start
28330
28331 if matches!(target, DialectType::Snowflake) {
28332 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
28333 let unit = Expression::Identifier(Identifier::new(unit_str));
28334 return Ok(Expression::Function(Box::new(Function::new(
28335 "DATEDIFF".to_string(),
28336 vec![unit, arg1, arg2],
28337 ))));
28338 }
28339
28340 if matches!(target, DialectType::DuckDB) {
28341 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
28342 let cast_d1 = Self::ensure_cast_timestamp(arg1);
28343 let cast_d2 = Self::ensure_cast_timestamp(arg2);
28344 return Ok(Expression::Function(Box::new(Function::new(
28345 "DATE_DIFF".to_string(),
28346 vec![
28347 Expression::Literal(Literal::String(unit_str)),
28348 cast_d1,
28349 cast_d2,
28350 ],
28351 ))));
28352 }
28353
28354 if matches!(target, DialectType::BigQuery) {
28355 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
28356 let cast_d1 = Self::ensure_cast_datetime(arg1);
28357 let cast_d2 = Self::ensure_cast_datetime(arg2);
28358 let unit = Expression::Identifier(Identifier::new(unit_str));
28359 return Ok(Expression::Function(Box::new(Function::new(
28360 "DATE_DIFF".to_string(),
28361 vec![cast_d2, cast_d1, unit],
28362 ))));
28363 }
28364
28365 if matches!(target, DialectType::Spark | DialectType::Databricks) {
28366 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
28367 let unit = Expression::Identifier(Identifier::new(unit_str));
28368 return Ok(Expression::Function(Box::new(Function::new(
28369 "DATEDIFF".to_string(),
28370 vec![unit, arg1, arg2],
28371 ))));
28372 }
28373
28374 if matches!(target, DialectType::Hive) {
28375 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
28376 match unit_str.as_str() {
28377 "MONTH" => {
28378 return Ok(Expression::Function(Box::new(Function::new(
28379 "CAST".to_string(),
28380 vec![Expression::Function(Box::new(Function::new(
28381 "MONTHS_BETWEEN".to_string(),
28382 vec![arg2, arg1],
28383 )))],
28384 ))));
28385 }
28386 "WEEK" => {
28387 return Ok(Expression::Cast(Box::new(Cast {
28388 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
28389 Expression::Function(Box::new(Function::new(
28390 "DATEDIFF".to_string(),
28391 vec![arg2, arg1],
28392 ))),
28393 Expression::Literal(Literal::Number("7".to_string())),
28394 ))),
28395 to: DataType::Int {
28396 length: None,
28397 integer_spelling: false,
28398 },
28399 trailing_comments: vec![],
28400 double_colon_syntax: false,
28401 format: None,
28402 default: None,
28403 inferred_type: None,
28404 })));
28405 }
28406 _ => {
28407 // Default: DATEDIFF(end, start) for DAY
28408 return Ok(Expression::Function(Box::new(Function::new(
28409 "DATEDIFF".to_string(),
28410 vec![arg2, arg1],
28411 ))));
28412 }
28413 }
28414 }
28415
28416 if matches!(
28417 target,
28418 DialectType::Presto | DialectType::Trino | DialectType::Athena
28419 ) {
28420 // Presto/Trino: DATE_DIFF('UNIT', start, end)
28421 return Ok(Expression::Function(Box::new(Function::new(
28422 "DATE_DIFF".to_string(),
28423 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
28424 ))));
28425 }
28426
28427 if matches!(target, DialectType::TSQL) {
28428 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
28429 let cast_d2 = Self::ensure_cast_datetime2(arg2);
28430 let unit = Expression::Identifier(Identifier::new(unit_str));
28431 return Ok(Expression::Function(Box::new(Function::new(
28432 "DATEDIFF".to_string(),
28433 vec![unit, arg1, cast_d2],
28434 ))));
28435 }
28436
28437 if matches!(target, DialectType::PostgreSQL) {
28438 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
28439 // For now, use DATEDIFF (passthrough) with uppercased unit
28440 let unit = Expression::Identifier(Identifier::new(unit_str));
28441 return Ok(Expression::Function(Box::new(Function::new(
28442 "DATEDIFF".to_string(),
28443 vec![unit, arg1, arg2],
28444 ))));
28445 }
28446
28447 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
28448 let unit = Expression::Identifier(Identifier::new(unit_str));
28449 Ok(Expression::Function(Box::new(Function::new(
28450 "DATEDIFF".to_string(),
28451 vec![unit, arg1, arg2],
28452 ))))
28453 }
28454
28455 // DATE_DIFF(date1, date2, unit) -> standard form
28456 "DATE_DIFF" if args.len() == 3 => {
28457 let date1 = args.remove(0);
28458 let date2 = args.remove(0);
28459 let unit_expr = args.remove(0);
28460 let unit_str = get_unit_str(&unit_expr);
28461
28462 if matches!(target, DialectType::BigQuery) {
28463 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
28464 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
28465 "WEEK".to_string()
28466 } else {
28467 unit_str
28468 };
28469 let norm_d1 = Self::date_literal_to_cast(date1);
28470 let norm_d2 = Self::date_literal_to_cast(date2);
28471 let unit = Expression::Identifier(Identifier::new(norm_unit));
28472 return Ok(Expression::Function(Box::new(Function::new(
28473 f.name,
28474 vec![norm_d1, norm_d2, unit],
28475 ))));
28476 }
28477
28478 if matches!(target, DialectType::MySQL) {
28479 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
28480 let norm_d1 = Self::date_literal_to_cast(date1);
28481 let norm_d2 = Self::date_literal_to_cast(date2);
28482 return Ok(Expression::Function(Box::new(Function::new(
28483 "DATEDIFF".to_string(),
28484 vec![norm_d1, norm_d2],
28485 ))));
28486 }
28487
28488 if matches!(target, DialectType::StarRocks) {
28489 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
28490 let norm_d1 = Self::date_literal_to_cast(date1);
28491 let norm_d2 = Self::date_literal_to_cast(date2);
28492 return Ok(Expression::Function(Box::new(Function::new(
28493 "DATE_DIFF".to_string(),
28494 vec![
28495 Expression::Literal(Literal::String(unit_str)),
28496 norm_d1,
28497 norm_d2,
28498 ],
28499 ))));
28500 }
28501
28502 if matches!(target, DialectType::DuckDB) {
28503 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
28504 let norm_d1 = Self::ensure_cast_date(date1);
28505 let norm_d2 = Self::ensure_cast_date(date2);
28506
28507 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
28508 let is_week_variant = unit_str == "WEEK"
28509 || unit_str.starts_with("WEEK(")
28510 || unit_str == "ISOWEEK";
28511 if is_week_variant {
28512 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
28513 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
28514 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
28515 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
28516 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
28517 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
28518 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
28519 Some("1") // Shift Sunday to Monday alignment
28520 } else if unit_str == "WEEK(SATURDAY)" {
28521 Some("-5")
28522 } else if unit_str == "WEEK(TUESDAY)" {
28523 Some("-1")
28524 } else if unit_str == "WEEK(WEDNESDAY)" {
28525 Some("-2")
28526 } else if unit_str == "WEEK(THURSDAY)" {
28527 Some("-3")
28528 } else if unit_str == "WEEK(FRIDAY)" {
28529 Some("-4")
28530 } else {
28531 Some("1") // default to Sunday
28532 };
28533
28534 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
28535 let shifted = if let Some(off) = offset {
28536 let interval =
28537 Expression::Interval(Box::new(crate::expressions::Interval {
28538 this: Some(Expression::Literal(Literal::String(
28539 off.to_string(),
28540 ))),
28541 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28542 unit: crate::expressions::IntervalUnit::Day,
28543 use_plural: false,
28544 }),
28545 }));
28546 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
28547 date, interval,
28548 )))
28549 } else {
28550 date
28551 };
28552 Expression::Function(Box::new(Function::new(
28553 "DATE_TRUNC".to_string(),
28554 vec![
28555 Expression::Literal(Literal::String("WEEK".to_string())),
28556 shifted,
28557 ],
28558 )))
28559 };
28560
28561 let trunc_d2 = make_trunc(norm_d2, day_offset);
28562 let trunc_d1 = make_trunc(norm_d1, day_offset);
28563 return Ok(Expression::Function(Box::new(Function::new(
28564 "DATE_DIFF".to_string(),
28565 vec![
28566 Expression::Literal(Literal::String("WEEK".to_string())),
28567 trunc_d2,
28568 trunc_d1,
28569 ],
28570 ))));
28571 }
28572
28573 return Ok(Expression::Function(Box::new(Function::new(
28574 "DATE_DIFF".to_string(),
28575 vec![
28576 Expression::Literal(Literal::String(unit_str)),
28577 norm_d2,
28578 norm_d1,
28579 ],
28580 ))));
28581 }
28582
28583 // Default: DATEDIFF(unit, date2, date1)
28584 let unit = Expression::Identifier(Identifier::new(unit_str));
28585 Ok(Expression::Function(Box::new(Function::new(
28586 "DATEDIFF".to_string(),
28587 vec![unit, date2, date1],
28588 ))))
28589 }
28590
28591 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
28592 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
28593 let ts = args.remove(0);
28594 let interval_expr = args.remove(0);
28595 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28596
28597 match target {
28598 DialectType::Snowflake => {
28599 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
28600 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
28601 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
28602 let unit_str = Self::interval_unit_to_string(&unit);
28603 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
28604 Ok(Expression::TimestampAdd(Box::new(
28605 crate::expressions::TimestampAdd {
28606 this: Box::new(val),
28607 expression: Box::new(cast_ts),
28608 unit: Some(unit_str),
28609 },
28610 )))
28611 }
28612 DialectType::Spark | DialectType::Databricks => {
28613 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
28614 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
28615 let interval =
28616 Expression::Interval(Box::new(crate::expressions::Interval {
28617 this: Some(val),
28618 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28619 unit,
28620 use_plural: false,
28621 }),
28622 }));
28623 Ok(Expression::Add(Box::new(
28624 crate::expressions::BinaryOp::new(ts, interval),
28625 )))
28626 } else if name == "DATETIME_ADD"
28627 && matches!(target, DialectType::Databricks)
28628 {
28629 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
28630 let unit_str = Self::interval_unit_to_string(&unit);
28631 Ok(Expression::Function(Box::new(Function::new(
28632 "TIMESTAMPADD".to_string(),
28633 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
28634 ))))
28635 } else {
28636 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
28637 let unit_str = Self::interval_unit_to_string(&unit);
28638 let cast_ts =
28639 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
28640 Self::maybe_cast_ts(ts)
28641 } else {
28642 ts
28643 };
28644 Ok(Expression::Function(Box::new(Function::new(
28645 "DATE_ADD".to_string(),
28646 vec![
28647 Expression::Identifier(Identifier::new(unit_str)),
28648 val,
28649 cast_ts,
28650 ],
28651 ))))
28652 }
28653 }
28654 DialectType::MySQL => {
28655 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
28656 let mysql_ts = if name.starts_with("TIMESTAMP") {
28657 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
28658 match &ts {
28659 Expression::Function(ref inner_f)
28660 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
28661 {
28662 // Already wrapped, keep as-is
28663 ts
28664 }
28665 _ => {
28666 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
28667 let unwrapped = match ts {
28668 Expression::Literal(Literal::Timestamp(s)) => {
28669 Expression::Literal(Literal::String(s))
28670 }
28671 other => other,
28672 };
28673 Expression::Function(Box::new(Function::new(
28674 "TIMESTAMP".to_string(),
28675 vec![unwrapped],
28676 )))
28677 }
28678 }
28679 } else {
28680 ts
28681 };
28682 Ok(Expression::DateAdd(Box::new(
28683 crate::expressions::DateAddFunc {
28684 this: mysql_ts,
28685 interval: val,
28686 unit,
28687 },
28688 )))
28689 }
28690 _ => {
28691 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
28692 let cast_ts = if matches!(target, DialectType::DuckDB) {
28693 if name == "DATETIME_ADD" {
28694 Self::ensure_cast_timestamp(ts)
28695 } else if name.starts_with("TIMESTAMP") {
28696 Self::maybe_cast_ts_to_tz(ts, &name)
28697 } else {
28698 ts
28699 }
28700 } else {
28701 ts
28702 };
28703 Ok(Expression::DateAdd(Box::new(
28704 crate::expressions::DateAddFunc {
28705 this: cast_ts,
28706 interval: val,
28707 unit,
28708 },
28709 )))
28710 }
28711 }
28712 }
28713
28714 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
28715 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
28716 let ts = args.remove(0);
28717 let interval_expr = args.remove(0);
28718 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28719
28720 match target {
28721 DialectType::Snowflake => {
28722 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
28723 let unit_str = Self::interval_unit_to_string(&unit);
28724 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
28725 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
28726 val,
28727 Expression::Neg(Box::new(crate::expressions::UnaryOp {
28728 this: Expression::number(1),
28729 inferred_type: None,
28730 })),
28731 )));
28732 Ok(Expression::TimestampAdd(Box::new(
28733 crate::expressions::TimestampAdd {
28734 this: Box::new(neg_val),
28735 expression: Box::new(cast_ts),
28736 unit: Some(unit_str),
28737 },
28738 )))
28739 }
28740 DialectType::Spark | DialectType::Databricks => {
28741 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
28742 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
28743 {
28744 // Spark: ts - INTERVAL val UNIT
28745 let cast_ts = if name.starts_with("TIMESTAMP") {
28746 Self::maybe_cast_ts(ts)
28747 } else {
28748 ts
28749 };
28750 let interval =
28751 Expression::Interval(Box::new(crate::expressions::Interval {
28752 this: Some(val),
28753 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28754 unit,
28755 use_plural: false,
28756 }),
28757 }));
28758 Ok(Expression::Sub(Box::new(
28759 crate::expressions::BinaryOp::new(cast_ts, interval),
28760 )))
28761 } else {
28762 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
28763 let unit_str = Self::interval_unit_to_string(&unit);
28764 let neg_val =
28765 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
28766 val,
28767 Expression::Neg(Box::new(crate::expressions::UnaryOp {
28768 this: Expression::number(1),
28769 inferred_type: None,
28770 })),
28771 )));
28772 Ok(Expression::Function(Box::new(Function::new(
28773 "TIMESTAMPADD".to_string(),
28774 vec![
28775 Expression::Identifier(Identifier::new(unit_str)),
28776 neg_val,
28777 ts,
28778 ],
28779 ))))
28780 }
28781 }
28782 DialectType::MySQL => {
28783 let mysql_ts = if name.starts_with("TIMESTAMP") {
28784 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
28785 match &ts {
28786 Expression::Function(ref inner_f)
28787 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
28788 {
28789 // Already wrapped, keep as-is
28790 ts
28791 }
28792 _ => {
28793 let unwrapped = match ts {
28794 Expression::Literal(Literal::Timestamp(s)) => {
28795 Expression::Literal(Literal::String(s))
28796 }
28797 other => other,
28798 };
28799 Expression::Function(Box::new(Function::new(
28800 "TIMESTAMP".to_string(),
28801 vec![unwrapped],
28802 )))
28803 }
28804 }
28805 } else {
28806 ts
28807 };
28808 Ok(Expression::DateSub(Box::new(
28809 crate::expressions::DateAddFunc {
28810 this: mysql_ts,
28811 interval: val,
28812 unit,
28813 },
28814 )))
28815 }
28816 _ => {
28817 let cast_ts = if matches!(target, DialectType::DuckDB) {
28818 if name == "DATETIME_SUB" {
28819 Self::ensure_cast_timestamp(ts)
28820 } else if name.starts_with("TIMESTAMP") {
28821 Self::maybe_cast_ts_to_tz(ts, &name)
28822 } else {
28823 ts
28824 }
28825 } else {
28826 ts
28827 };
28828 Ok(Expression::DateSub(Box::new(
28829 crate::expressions::DateAddFunc {
28830 this: cast_ts,
28831 interval: val,
28832 unit,
28833 },
28834 )))
28835 }
28836 }
28837 }
28838
28839 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
28840 "DATE_SUB" if args.len() == 2 => {
28841 let date = args.remove(0);
28842 let interval_expr = args.remove(0);
28843 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28844
28845 match target {
28846 DialectType::Databricks | DialectType::Spark => {
28847 // Databricks/Spark: DATE_ADD(date, -val)
28848 // Use DateAdd expression with negative val so it generates correctly
28849 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
28850 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
28851 // Instead, we directly output as a simple negated DateSub
28852 Ok(Expression::DateSub(Box::new(
28853 crate::expressions::DateAddFunc {
28854 this: date,
28855 interval: val,
28856 unit,
28857 },
28858 )))
28859 }
28860 DialectType::DuckDB => {
28861 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
28862 let cast_date = Self::ensure_cast_date(date);
28863 let interval =
28864 Expression::Interval(Box::new(crate::expressions::Interval {
28865 this: Some(val),
28866 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28867 unit,
28868 use_plural: false,
28869 }),
28870 }));
28871 Ok(Expression::Sub(Box::new(
28872 crate::expressions::BinaryOp::new(cast_date, interval),
28873 )))
28874 }
28875 DialectType::Snowflake => {
28876 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
28877 // Just ensure the date is cast properly
28878 let cast_date = Self::ensure_cast_date(date);
28879 Ok(Expression::DateSub(Box::new(
28880 crate::expressions::DateAddFunc {
28881 this: cast_date,
28882 interval: val,
28883 unit,
28884 },
28885 )))
28886 }
28887 DialectType::PostgreSQL => {
28888 // PostgreSQL: date - INTERVAL 'val UNIT'
28889 let unit_str = Self::interval_unit_to_string(&unit);
28890 let interval =
28891 Expression::Interval(Box::new(crate::expressions::Interval {
28892 this: Some(Expression::Literal(Literal::String(format!(
28893 "{} {}",
28894 Self::expr_to_string(&val),
28895 unit_str
28896 )))),
28897 unit: None,
28898 }));
28899 Ok(Expression::Sub(Box::new(
28900 crate::expressions::BinaryOp::new(date, interval),
28901 )))
28902 }
28903 _ => Ok(Expression::DateSub(Box::new(
28904 crate::expressions::DateAddFunc {
28905 this: date,
28906 interval: val,
28907 unit,
28908 },
28909 ))),
28910 }
28911 }
28912
28913 // DATEADD(unit, val, date) -> target-specific form
28914 // Used by: Redshift, Snowflake, TSQL, ClickHouse
28915 "DATEADD" if args.len() == 3 => {
28916 let arg0 = args.remove(0);
28917 let arg1 = args.remove(0);
28918 let arg2 = args.remove(0);
28919 let unit_str = get_unit_str(&arg0);
28920
28921 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
28922 // Keep DATEADD(UNIT, val, date) with uppercased unit
28923 let unit = Expression::Identifier(Identifier::new(unit_str));
28924 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
28925 let date = if matches!(target, DialectType::TSQL)
28926 && !matches!(
28927 source,
28928 DialectType::Spark | DialectType::Databricks | DialectType::Hive
28929 ) {
28930 Self::ensure_cast_datetime2(arg2)
28931 } else {
28932 arg2
28933 };
28934 return Ok(Expression::Function(Box::new(Function::new(
28935 "DATEADD".to_string(),
28936 vec![unit, arg1, date],
28937 ))));
28938 }
28939
28940 if matches!(target, DialectType::DuckDB) {
28941 // DuckDB: date + INTERVAL 'val' UNIT
28942 let iu = parse_interval_unit(&unit_str);
28943 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
28944 this: Some(arg1),
28945 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28946 unit: iu,
28947 use_plural: false,
28948 }),
28949 }));
28950 let cast_date = Self::ensure_cast_timestamp(arg2);
28951 return Ok(Expression::Add(Box::new(
28952 crate::expressions::BinaryOp::new(cast_date, interval),
28953 )));
28954 }
28955
28956 if matches!(target, DialectType::BigQuery) {
28957 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
28958 let iu = parse_interval_unit(&unit_str);
28959 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
28960 this: Some(arg1),
28961 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28962 unit: iu,
28963 use_plural: false,
28964 }),
28965 }));
28966 return Ok(Expression::Function(Box::new(Function::new(
28967 "DATE_ADD".to_string(),
28968 vec![arg2, interval],
28969 ))));
28970 }
28971
28972 if matches!(target, DialectType::Databricks) {
28973 // Databricks: keep DATEADD(UNIT, val, date) format
28974 let unit = Expression::Identifier(Identifier::new(unit_str));
28975 return Ok(Expression::Function(Box::new(Function::new(
28976 "DATEADD".to_string(),
28977 vec![unit, arg1, arg2],
28978 ))));
28979 }
28980
28981 if matches!(target, DialectType::Spark) {
28982 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
28983 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
28984 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
28985 if let Ok(val) = n.parse::<i64>() {
28986 return Expression::Literal(crate::expressions::Literal::Number(
28987 (val * factor).to_string(),
28988 ));
28989 }
28990 }
28991 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
28992 expr,
28993 Expression::Literal(crate::expressions::Literal::Number(
28994 factor.to_string(),
28995 )),
28996 )))
28997 }
28998 match unit_str.as_str() {
28999 "YEAR" => {
29000 let months = multiply_expr_dateadd(arg1, 12);
29001 return Ok(Expression::Function(Box::new(Function::new(
29002 "ADD_MONTHS".to_string(),
29003 vec![arg2, months],
29004 ))));
29005 }
29006 "QUARTER" => {
29007 let months = multiply_expr_dateadd(arg1, 3);
29008 return Ok(Expression::Function(Box::new(Function::new(
29009 "ADD_MONTHS".to_string(),
29010 vec![arg2, months],
29011 ))));
29012 }
29013 "MONTH" => {
29014 return Ok(Expression::Function(Box::new(Function::new(
29015 "ADD_MONTHS".to_string(),
29016 vec![arg2, arg1],
29017 ))));
29018 }
29019 "WEEK" => {
29020 let days = multiply_expr_dateadd(arg1, 7);
29021 return Ok(Expression::Function(Box::new(Function::new(
29022 "DATE_ADD".to_string(),
29023 vec![arg2, days],
29024 ))));
29025 }
29026 "DAY" => {
29027 return Ok(Expression::Function(Box::new(Function::new(
29028 "DATE_ADD".to_string(),
29029 vec![arg2, arg1],
29030 ))));
29031 }
29032 _ => {
29033 let unit = Expression::Identifier(Identifier::new(unit_str));
29034 return Ok(Expression::Function(Box::new(Function::new(
29035 "DATE_ADD".to_string(),
29036 vec![unit, arg1, arg2],
29037 ))));
29038 }
29039 }
29040 }
29041
29042 if matches!(target, DialectType::Hive) {
29043 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
29044 match unit_str.as_str() {
29045 "DAY" => {
29046 return Ok(Expression::Function(Box::new(Function::new(
29047 "DATE_ADD".to_string(),
29048 vec![arg2, arg1],
29049 ))));
29050 }
29051 "MONTH" => {
29052 return Ok(Expression::Function(Box::new(Function::new(
29053 "ADD_MONTHS".to_string(),
29054 vec![arg2, arg1],
29055 ))));
29056 }
29057 _ => {
29058 let iu = parse_interval_unit(&unit_str);
29059 let interval =
29060 Expression::Interval(Box::new(crate::expressions::Interval {
29061 this: Some(arg1),
29062 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29063 unit: iu,
29064 use_plural: false,
29065 }),
29066 }));
29067 return Ok(Expression::Add(Box::new(
29068 crate::expressions::BinaryOp::new(arg2, interval),
29069 )));
29070 }
29071 }
29072 }
29073
29074 if matches!(target, DialectType::PostgreSQL) {
29075 // PostgreSQL: date + INTERVAL 'val UNIT'
29076 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29077 this: Some(Expression::Literal(Literal::String(format!(
29078 "{} {}",
29079 Self::expr_to_string(&arg1),
29080 unit_str
29081 )))),
29082 unit: None,
29083 }));
29084 return Ok(Expression::Add(Box::new(
29085 crate::expressions::BinaryOp::new(arg2, interval),
29086 )));
29087 }
29088
29089 if matches!(
29090 target,
29091 DialectType::Presto | DialectType::Trino | DialectType::Athena
29092 ) {
29093 // Presto/Trino: DATE_ADD('UNIT', val, date)
29094 return Ok(Expression::Function(Box::new(Function::new(
29095 "DATE_ADD".to_string(),
29096 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
29097 ))));
29098 }
29099
29100 if matches!(target, DialectType::ClickHouse) {
29101 // ClickHouse: DATE_ADD(UNIT, val, date)
29102 let unit = Expression::Identifier(Identifier::new(unit_str));
29103 return Ok(Expression::Function(Box::new(Function::new(
29104 "DATE_ADD".to_string(),
29105 vec![unit, arg1, arg2],
29106 ))));
29107 }
29108
29109 // Default: keep DATEADD with uppercased unit
29110 let unit = Expression::Identifier(Identifier::new(unit_str));
29111 Ok(Expression::Function(Box::new(Function::new(
29112 "DATEADD".to_string(),
29113 vec![unit, arg1, arg2],
29114 ))))
29115 }
29116
29117 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
29118 "DATE_ADD" if args.len() == 3 => {
29119 let arg0 = args.remove(0);
29120 let arg1 = args.remove(0);
29121 let arg2 = args.remove(0);
29122 let unit_str = get_unit_str(&arg0);
29123
29124 if matches!(
29125 target,
29126 DialectType::Presto | DialectType::Trino | DialectType::Athena
29127 ) {
29128 // Presto/Trino: DATE_ADD('UNIT', val, date)
29129 return Ok(Expression::Function(Box::new(Function::new(
29130 "DATE_ADD".to_string(),
29131 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
29132 ))));
29133 }
29134
29135 if matches!(
29136 target,
29137 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
29138 ) {
29139 // DATEADD(UNIT, val, date)
29140 let unit = Expression::Identifier(Identifier::new(unit_str));
29141 let date = if matches!(target, DialectType::TSQL) {
29142 Self::ensure_cast_datetime2(arg2)
29143 } else {
29144 arg2
29145 };
29146 return Ok(Expression::Function(Box::new(Function::new(
29147 "DATEADD".to_string(),
29148 vec![unit, arg1, date],
29149 ))));
29150 }
29151
29152 if matches!(target, DialectType::DuckDB) {
29153 // DuckDB: date + INTERVAL val UNIT
29154 let iu = parse_interval_unit(&unit_str);
29155 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29156 this: Some(arg1),
29157 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29158 unit: iu,
29159 use_plural: false,
29160 }),
29161 }));
29162 return Ok(Expression::Add(Box::new(
29163 crate::expressions::BinaryOp::new(arg2, interval),
29164 )));
29165 }
29166
29167 if matches!(target, DialectType::Spark | DialectType::Databricks) {
29168 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
29169 let unit = Expression::Identifier(Identifier::new(unit_str));
29170 return Ok(Expression::Function(Box::new(Function::new(
29171 "DATE_ADD".to_string(),
29172 vec![unit, arg1, arg2],
29173 ))));
29174 }
29175
29176 // Default: DATE_ADD(UNIT, val, date)
29177 let unit = Expression::Identifier(Identifier::new(unit_str));
29178 Ok(Expression::Function(Box::new(Function::new(
29179 "DATE_ADD".to_string(),
29180 vec![unit, arg1, arg2],
29181 ))))
29182 }
29183
29184 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
29185 "DATE_ADD" if args.len() == 2 => {
29186 let date = args.remove(0);
29187 let interval_expr = args.remove(0);
29188 let (val, unit) = Self::extract_interval_parts(&interval_expr);
29189 let unit_str = Self::interval_unit_to_string(&unit);
29190
29191 match target {
29192 DialectType::DuckDB => {
29193 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
29194 let cast_date = Self::ensure_cast_date(date);
29195 let quoted_val = Self::quote_interval_val(&val);
29196 let interval =
29197 Expression::Interval(Box::new(crate::expressions::Interval {
29198 this: Some(quoted_val),
29199 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29200 unit,
29201 use_plural: false,
29202 }),
29203 }));
29204 Ok(Expression::Add(Box::new(
29205 crate::expressions::BinaryOp::new(cast_date, interval),
29206 )))
29207 }
29208 DialectType::PostgreSQL => {
29209 // PostgreSQL: date + INTERVAL 'val UNIT'
29210 let interval =
29211 Expression::Interval(Box::new(crate::expressions::Interval {
29212 this: Some(Expression::Literal(Literal::String(format!(
29213 "{} {}",
29214 Self::expr_to_string(&val),
29215 unit_str
29216 )))),
29217 unit: None,
29218 }));
29219 Ok(Expression::Add(Box::new(
29220 crate::expressions::BinaryOp::new(date, interval),
29221 )))
29222 }
29223 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29224 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
29225 let val_str = Self::expr_to_string(&val);
29226 Ok(Expression::Function(Box::new(Function::new(
29227 "DATE_ADD".to_string(),
29228 vec![
29229 Expression::Literal(Literal::String(unit_str)),
29230 Expression::Cast(Box::new(Cast {
29231 this: Expression::Literal(Literal::String(val_str)),
29232 to: DataType::BigInt { length: None },
29233 trailing_comments: vec![],
29234 double_colon_syntax: false,
29235 format: None,
29236 default: None,
29237 inferred_type: None,
29238 })),
29239 date,
29240 ],
29241 ))))
29242 }
29243 DialectType::Spark | DialectType::Hive => {
29244 // Spark/Hive: DATE_ADD(date, val) for DAY
29245 match unit_str.as_str() {
29246 "DAY" => Ok(Expression::Function(Box::new(Function::new(
29247 "DATE_ADD".to_string(),
29248 vec![date, val],
29249 )))),
29250 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
29251 "ADD_MONTHS".to_string(),
29252 vec![date, val],
29253 )))),
29254 _ => {
29255 let iu = parse_interval_unit(&unit_str);
29256 let interval =
29257 Expression::Interval(Box::new(crate::expressions::Interval {
29258 this: Some(val),
29259 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29260 unit: iu,
29261 use_plural: false,
29262 }),
29263 }));
29264 Ok(Expression::Function(Box::new(Function::new(
29265 "DATE_ADD".to_string(),
29266 vec![date, interval],
29267 ))))
29268 }
29269 }
29270 }
29271 DialectType::Snowflake => {
29272 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
29273 let cast_date = Self::ensure_cast_date(date);
29274 let val_str = Self::expr_to_string(&val);
29275 Ok(Expression::Function(Box::new(Function::new(
29276 "DATEADD".to_string(),
29277 vec![
29278 Expression::Identifier(Identifier::new(unit_str)),
29279 Expression::Literal(Literal::String(val_str)),
29280 cast_date,
29281 ],
29282 ))))
29283 }
29284 DialectType::TSQL | DialectType::Fabric => {
29285 let cast_date = Self::ensure_cast_datetime2(date);
29286 Ok(Expression::Function(Box::new(Function::new(
29287 "DATEADD".to_string(),
29288 vec![
29289 Expression::Identifier(Identifier::new(unit_str)),
29290 val,
29291 cast_date,
29292 ],
29293 ))))
29294 }
29295 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
29296 "DATEADD".to_string(),
29297 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
29298 )))),
29299 DialectType::MySQL => {
29300 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
29301 let quoted_val = Self::quote_interval_val(&val);
29302 let iu = parse_interval_unit(&unit_str);
29303 let interval =
29304 Expression::Interval(Box::new(crate::expressions::Interval {
29305 this: Some(quoted_val),
29306 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29307 unit: iu,
29308 use_plural: false,
29309 }),
29310 }));
29311 Ok(Expression::Function(Box::new(Function::new(
29312 "DATE_ADD".to_string(),
29313 vec![date, interval],
29314 ))))
29315 }
29316 DialectType::BigQuery => {
29317 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
29318 let quoted_val = Self::quote_interval_val(&val);
29319 let iu = parse_interval_unit(&unit_str);
29320 let interval =
29321 Expression::Interval(Box::new(crate::expressions::Interval {
29322 this: Some(quoted_val),
29323 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29324 unit: iu,
29325 use_plural: false,
29326 }),
29327 }));
29328 Ok(Expression::Function(Box::new(Function::new(
29329 "DATE_ADD".to_string(),
29330 vec![date, interval],
29331 ))))
29332 }
29333 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
29334 "DATEADD".to_string(),
29335 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
29336 )))),
29337 _ => {
29338 // Default: keep as DATE_ADD with decomposed interval
29339 Ok(Expression::DateAdd(Box::new(
29340 crate::expressions::DateAddFunc {
29341 this: date,
29342 interval: val,
29343 unit,
29344 },
29345 )))
29346 }
29347 }
29348 }
29349
29350 // ADD_MONTHS(date, val) -> target-specific form
29351 "ADD_MONTHS" if args.len() == 2 => {
29352 let date = args.remove(0);
29353 let val = args.remove(0);
29354
29355 if matches!(target, DialectType::TSQL) {
29356 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
29357 let cast_date = Self::ensure_cast_datetime2(date);
29358 return Ok(Expression::Function(Box::new(Function::new(
29359 "DATEADD".to_string(),
29360 vec![
29361 Expression::Identifier(Identifier::new("MONTH")),
29362 val,
29363 cast_date,
29364 ],
29365 ))));
29366 }
29367
29368 if matches!(target, DialectType::DuckDB) {
29369 // DuckDB: date + INTERVAL val MONTH
29370 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29371 this: Some(val),
29372 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29373 unit: crate::expressions::IntervalUnit::Month,
29374 use_plural: false,
29375 }),
29376 }));
29377 return Ok(Expression::Add(Box::new(
29378 crate::expressions::BinaryOp::new(date, interval),
29379 )));
29380 }
29381
29382 if matches!(target, DialectType::Snowflake) {
29383 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
29384 if matches!(source, DialectType::Snowflake) {
29385 return Ok(Expression::Function(Box::new(Function::new(
29386 "ADD_MONTHS".to_string(),
29387 vec![date, val],
29388 ))));
29389 }
29390 return Ok(Expression::Function(Box::new(Function::new(
29391 "DATEADD".to_string(),
29392 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
29393 ))));
29394 }
29395
29396 if matches!(target, DialectType::Spark | DialectType::Databricks) {
29397 // Spark: ADD_MONTHS(date, val) - keep as is
29398 return Ok(Expression::Function(Box::new(Function::new(
29399 "ADD_MONTHS".to_string(),
29400 vec![date, val],
29401 ))));
29402 }
29403
29404 if matches!(target, DialectType::Hive) {
29405 return Ok(Expression::Function(Box::new(Function::new(
29406 "ADD_MONTHS".to_string(),
29407 vec![date, val],
29408 ))));
29409 }
29410
29411 if matches!(
29412 target,
29413 DialectType::Presto | DialectType::Trino | DialectType::Athena
29414 ) {
29415 // Presto: DATE_ADD('MONTH', val, date)
29416 return Ok(Expression::Function(Box::new(Function::new(
29417 "DATE_ADD".to_string(),
29418 vec![
29419 Expression::Literal(Literal::String("MONTH".to_string())),
29420 val,
29421 date,
29422 ],
29423 ))));
29424 }
29425
29426 // Default: keep ADD_MONTHS
29427 Ok(Expression::Function(Box::new(Function::new(
29428 "ADD_MONTHS".to_string(),
29429 vec![date, val],
29430 ))))
29431 }
29432
29433 // SAFE_DIVIDE(x, y) -> target-specific form directly
29434 "SAFE_DIVIDE" if args.len() == 2 => {
29435 let x = args.remove(0);
29436 let y = args.remove(0);
29437 // Wrap x and y in parens if they're complex expressions
29438 let y_ref = match &y {
29439 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
29440 y.clone()
29441 }
29442 _ => Expression::Paren(Box::new(Paren {
29443 this: y.clone(),
29444 trailing_comments: vec![],
29445 })),
29446 };
29447 let x_ref = match &x {
29448 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
29449 x.clone()
29450 }
29451 _ => Expression::Paren(Box::new(Paren {
29452 this: x.clone(),
29453 trailing_comments: vec![],
29454 })),
29455 };
29456 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
29457 y_ref.clone(),
29458 Expression::number(0),
29459 )));
29460 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
29461 x_ref.clone(),
29462 y_ref.clone(),
29463 )));
29464
29465 match target {
29466 DialectType::DuckDB | DialectType::PostgreSQL => {
29467 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
29468 let result_div = if matches!(target, DialectType::PostgreSQL) {
29469 let cast_x = Expression::Cast(Box::new(Cast {
29470 this: x_ref,
29471 to: DataType::Custom {
29472 name: "DOUBLE PRECISION".to_string(),
29473 },
29474 trailing_comments: vec![],
29475 double_colon_syntax: false,
29476 format: None,
29477 default: None,
29478 inferred_type: None,
29479 }));
29480 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
29481 cast_x, y_ref,
29482 )))
29483 } else {
29484 div_expr
29485 };
29486 Ok(Expression::Case(Box::new(crate::expressions::Case {
29487 operand: None,
29488 whens: vec![(condition, result_div)],
29489 else_: Some(Expression::Null(crate::expressions::Null)),
29490 comments: Vec::new(),
29491 inferred_type: None,
29492 })))
29493 }
29494 DialectType::Snowflake => {
29495 // IFF(y <> 0, x / y, NULL)
29496 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29497 condition,
29498 true_value: div_expr,
29499 false_value: Some(Expression::Null(crate::expressions::Null)),
29500 original_name: Some("IFF".to_string()),
29501 inferred_type: None,
29502 })))
29503 }
29504 DialectType::Presto | DialectType::Trino => {
29505 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
29506 let cast_x = Expression::Cast(Box::new(Cast {
29507 this: x_ref,
29508 to: DataType::Double {
29509 precision: None,
29510 scale: None,
29511 },
29512 trailing_comments: vec![],
29513 double_colon_syntax: false,
29514 format: None,
29515 default: None,
29516 inferred_type: None,
29517 }));
29518 let cast_div = Expression::Div(Box::new(
29519 crate::expressions::BinaryOp::new(cast_x, y_ref),
29520 ));
29521 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29522 condition,
29523 true_value: cast_div,
29524 false_value: Some(Expression::Null(crate::expressions::Null)),
29525 original_name: None,
29526 inferred_type: None,
29527 })))
29528 }
29529 _ => {
29530 // IF(y <> 0, x / y, NULL)
29531 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29532 condition,
29533 true_value: div_expr,
29534 false_value: Some(Expression::Null(crate::expressions::Null)),
29535 original_name: None,
29536 inferred_type: None,
29537 })))
29538 }
29539 }
29540 }
29541
29542 // GENERATE_UUID() -> UUID() with CAST to string
29543 "GENERATE_UUID" => {
29544 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
29545 this: None,
29546 name: None,
29547 is_string: None,
29548 }));
29549 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
29550 let cast_type = match target {
29551 DialectType::DuckDB => Some(DataType::Text),
29552 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
29553 length: None,
29554 parenthesized_length: false,
29555 }),
29556 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29557 Some(DataType::String { length: None })
29558 }
29559 _ => None,
29560 };
29561 if let Some(dt) = cast_type {
29562 Ok(Expression::Cast(Box::new(Cast {
29563 this: uuid_expr,
29564 to: dt,
29565 trailing_comments: vec![],
29566 double_colon_syntax: false,
29567 format: None,
29568 default: None,
29569 inferred_type: None,
29570 })))
29571 } else {
29572 Ok(uuid_expr)
29573 }
29574 }
29575
29576 // COUNTIF(x) -> CountIf expression
29577 "COUNTIF" if args.len() == 1 => {
29578 let arg = args.remove(0);
29579 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
29580 this: arg,
29581 distinct: false,
29582 filter: None,
29583 order_by: vec![],
29584 name: None,
29585 ignore_nulls: None,
29586 having_max: None,
29587 limit: None,
29588 inferred_type: None,
29589 })))
29590 }
29591
29592 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
29593 "EDIT_DISTANCE" => {
29594 // Strip named arguments (max_distance => N) and pass as positional
29595 let mut positional_args: Vec<Expression> = vec![];
29596 for arg in args {
29597 match arg {
29598 Expression::NamedArgument(na) => {
29599 positional_args.push(na.value);
29600 }
29601 other => positional_args.push(other),
29602 }
29603 }
29604 if positional_args.len() >= 2 {
29605 let col1 = positional_args.remove(0);
29606 let col2 = positional_args.remove(0);
29607 let levenshtein = crate::expressions::BinaryFunc {
29608 this: col1,
29609 expression: col2,
29610 original_name: None,
29611 inferred_type: None,
29612 };
29613 // Pass extra args through a function wrapper with all args
29614 if !positional_args.is_empty() {
29615 let max_dist = positional_args.remove(0);
29616 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
29617 if matches!(target, DialectType::DuckDB) {
29618 let lev = Expression::Function(Box::new(Function::new(
29619 "LEVENSHTEIN".to_string(),
29620 vec![levenshtein.this, levenshtein.expression],
29621 )));
29622 let lev_is_null =
29623 Expression::IsNull(Box::new(crate::expressions::IsNull {
29624 this: lev.clone(),
29625 not: false,
29626 postfix_form: false,
29627 }));
29628 let max_is_null =
29629 Expression::IsNull(Box::new(crate::expressions::IsNull {
29630 this: max_dist.clone(),
29631 not: false,
29632 postfix_form: false,
29633 }));
29634 let null_check =
29635 Expression::Or(Box::new(crate::expressions::BinaryOp {
29636 left: lev_is_null,
29637 right: max_is_null,
29638 left_comments: Vec::new(),
29639 operator_comments: Vec::new(),
29640 trailing_comments: Vec::new(),
29641 inferred_type: None,
29642 }));
29643 let least =
29644 Expression::Least(Box::new(crate::expressions::VarArgFunc {
29645 expressions: vec![lev, max_dist],
29646 original_name: None,
29647 inferred_type: None,
29648 }));
29649 return Ok(Expression::Case(Box::new(crate::expressions::Case {
29650 operand: None,
29651 whens: vec![(
29652 null_check,
29653 Expression::Null(crate::expressions::Null),
29654 )],
29655 else_: Some(least),
29656 comments: Vec::new(),
29657 inferred_type: None,
29658 })));
29659 }
29660 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
29661 all_args.extend(positional_args);
29662 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
29663 let func_name = if matches!(target, DialectType::PostgreSQL) {
29664 "LEVENSHTEIN_LESS_EQUAL"
29665 } else {
29666 "LEVENSHTEIN"
29667 };
29668 return Ok(Expression::Function(Box::new(Function::new(
29669 func_name.to_string(),
29670 all_args,
29671 ))));
29672 }
29673 Ok(Expression::Levenshtein(Box::new(levenshtein)))
29674 } else {
29675 Ok(Expression::Function(Box::new(Function::new(
29676 "EDIT_DISTANCE".to_string(),
29677 positional_args,
29678 ))))
29679 }
29680 }
29681
29682 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
29683 "TIMESTAMP_SECONDS" if args.len() == 1 => {
29684 let arg = args.remove(0);
29685 Ok(Expression::UnixToTime(Box::new(
29686 crate::expressions::UnixToTime {
29687 this: Box::new(arg),
29688 scale: Some(0),
29689 zone: None,
29690 hours: None,
29691 minutes: None,
29692 format: None,
29693 target_type: None,
29694 },
29695 )))
29696 }
29697
29698 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
29699 "TIMESTAMP_MILLIS" if args.len() == 1 => {
29700 let arg = args.remove(0);
29701 Ok(Expression::UnixToTime(Box::new(
29702 crate::expressions::UnixToTime {
29703 this: Box::new(arg),
29704 scale: Some(3),
29705 zone: None,
29706 hours: None,
29707 minutes: None,
29708 format: None,
29709 target_type: None,
29710 },
29711 )))
29712 }
29713
29714 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
29715 "TIMESTAMP_MICROS" if args.len() == 1 => {
29716 let arg = args.remove(0);
29717 Ok(Expression::UnixToTime(Box::new(
29718 crate::expressions::UnixToTime {
29719 this: Box::new(arg),
29720 scale: Some(6),
29721 zone: None,
29722 hours: None,
29723 minutes: None,
29724 format: None,
29725 target_type: None,
29726 },
29727 )))
29728 }
29729
29730 // DIV(x, y) -> IntDiv expression
29731 "DIV" if args.len() == 2 => {
29732 let x = args.remove(0);
29733 let y = args.remove(0);
29734 Ok(Expression::IntDiv(Box::new(
29735 crate::expressions::BinaryFunc {
29736 this: x,
29737 expression: y,
29738 original_name: None,
29739 inferred_type: None,
29740 },
29741 )))
29742 }
29743
29744 // TO_HEX(x) -> target-specific form
29745 "TO_HEX" if args.len() == 1 => {
29746 let arg = args.remove(0);
29747 // Check if inner function already returns hex string in certain targets
29748 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
29749 if matches!(target, DialectType::BigQuery) {
29750 // BQ->BQ: keep as TO_HEX
29751 Ok(Expression::Function(Box::new(Function::new(
29752 "TO_HEX".to_string(),
29753 vec![arg],
29754 ))))
29755 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
29756 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
29757 Ok(arg)
29758 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
29759 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
29760 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
29761 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
29762 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
29763 if let Expression::Function(ref inner_f) = arg {
29764 let inner_args = inner_f.args.clone();
29765 let binary_func = match inner_f.name.to_uppercase().as_str() {
29766 "SHA1" => Expression::Function(Box::new(Function::new(
29767 "SHA1_BINARY".to_string(),
29768 inner_args,
29769 ))),
29770 "MD5" => Expression::Function(Box::new(Function::new(
29771 "MD5_BINARY".to_string(),
29772 inner_args,
29773 ))),
29774 "SHA256" => {
29775 let mut a = inner_args;
29776 a.push(Expression::number(256));
29777 Expression::Function(Box::new(Function::new(
29778 "SHA2_BINARY".to_string(),
29779 a,
29780 )))
29781 }
29782 "SHA512" => {
29783 let mut a = inner_args;
29784 a.push(Expression::number(512));
29785 Expression::Function(Box::new(Function::new(
29786 "SHA2_BINARY".to_string(),
29787 a,
29788 )))
29789 }
29790 _ => arg.clone(),
29791 };
29792 Ok(Expression::Function(Box::new(Function::new(
29793 "TO_CHAR".to_string(),
29794 vec![binary_func],
29795 ))))
29796 } else {
29797 let inner = Expression::Function(Box::new(Function::new(
29798 "HEX".to_string(),
29799 vec![arg],
29800 )));
29801 Ok(Expression::Lower(Box::new(
29802 crate::expressions::UnaryFunc::new(inner),
29803 )))
29804 }
29805 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
29806 let inner = Expression::Function(Box::new(Function::new(
29807 "TO_HEX".to_string(),
29808 vec![arg],
29809 )));
29810 Ok(Expression::Lower(Box::new(
29811 crate::expressions::UnaryFunc::new(inner),
29812 )))
29813 } else {
29814 let inner =
29815 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
29816 Ok(Expression::Lower(Box::new(
29817 crate::expressions::UnaryFunc::new(inner),
29818 )))
29819 }
29820 }
29821
29822 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
29823 "LAST_DAY" if args.len() == 2 => {
29824 let date = args.remove(0);
29825 let _unit = args.remove(0); // Strip the unit (MONTH is default)
29826 Ok(Expression::Function(Box::new(Function::new(
29827 "LAST_DAY".to_string(),
29828 vec![date],
29829 ))))
29830 }
29831
29832 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
29833 "GENERATE_ARRAY" => {
29834 let start = args.get(0).cloned();
29835 let end = args.get(1).cloned();
29836 let step = args.get(2).cloned();
29837 Ok(Expression::GenerateSeries(Box::new(
29838 crate::expressions::GenerateSeries {
29839 start: start.map(Box::new),
29840 end: end.map(Box::new),
29841 step: step.map(Box::new),
29842 is_end_exclusive: None,
29843 },
29844 )))
29845 }
29846
29847 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
29848 "GENERATE_TIMESTAMP_ARRAY" => {
29849 let start = args.get(0).cloned();
29850 let end = args.get(1).cloned();
29851 let step = args.get(2).cloned();
29852
29853 if matches!(target, DialectType::DuckDB) {
29854 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
29855 // Only cast string literals - leave columns/expressions as-is
29856 let maybe_cast_ts = |expr: Expression| -> Expression {
29857 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29858 Expression::Cast(Box::new(Cast {
29859 this: expr,
29860 to: DataType::Timestamp {
29861 precision: None,
29862 timezone: false,
29863 },
29864 trailing_comments: vec![],
29865 double_colon_syntax: false,
29866 format: None,
29867 default: None,
29868 inferred_type: None,
29869 }))
29870 } else {
29871 expr
29872 }
29873 };
29874 let cast_start = start.map(maybe_cast_ts);
29875 let cast_end = end.map(maybe_cast_ts);
29876 Ok(Expression::GenerateSeries(Box::new(
29877 crate::expressions::GenerateSeries {
29878 start: cast_start.map(Box::new),
29879 end: cast_end.map(Box::new),
29880 step: step.map(Box::new),
29881 is_end_exclusive: None,
29882 },
29883 )))
29884 } else {
29885 Ok(Expression::GenerateSeries(Box::new(
29886 crate::expressions::GenerateSeries {
29887 start: start.map(Box::new),
29888 end: end.map(Box::new),
29889 step: step.map(Box::new),
29890 is_end_exclusive: None,
29891 },
29892 )))
29893 }
29894 }
29895
29896 // TO_JSON(x) -> target-specific (from Spark/Hive)
29897 "TO_JSON" => {
29898 match target {
29899 DialectType::Presto | DialectType::Trino => {
29900 // JSON_FORMAT(CAST(x AS JSON))
29901 let arg = args
29902 .into_iter()
29903 .next()
29904 .unwrap_or(Expression::Null(crate::expressions::Null));
29905 let cast_json = Expression::Cast(Box::new(Cast {
29906 this: arg,
29907 to: DataType::Custom {
29908 name: "JSON".to_string(),
29909 },
29910 trailing_comments: vec![],
29911 double_colon_syntax: false,
29912 format: None,
29913 default: None,
29914 inferred_type: None,
29915 }));
29916 Ok(Expression::Function(Box::new(Function::new(
29917 "JSON_FORMAT".to_string(),
29918 vec![cast_json],
29919 ))))
29920 }
29921 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
29922 "TO_JSON_STRING".to_string(),
29923 args,
29924 )))),
29925 DialectType::DuckDB => {
29926 // CAST(TO_JSON(x) AS TEXT)
29927 let arg = args
29928 .into_iter()
29929 .next()
29930 .unwrap_or(Expression::Null(crate::expressions::Null));
29931 let to_json = Expression::Function(Box::new(Function::new(
29932 "TO_JSON".to_string(),
29933 vec![arg],
29934 )));
29935 Ok(Expression::Cast(Box::new(Cast {
29936 this: to_json,
29937 to: DataType::Text,
29938 trailing_comments: vec![],
29939 double_colon_syntax: false,
29940 format: None,
29941 default: None,
29942 inferred_type: None,
29943 })))
29944 }
29945 _ => Ok(Expression::Function(Box::new(Function::new(
29946 "TO_JSON".to_string(),
29947 args,
29948 )))),
29949 }
29950 }
29951
29952 // TO_JSON_STRING(x) -> target-specific
29953 "TO_JSON_STRING" => {
29954 match target {
29955 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
29956 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
29957 ),
29958 DialectType::Presto | DialectType::Trino => {
29959 // JSON_FORMAT(CAST(x AS JSON))
29960 let arg = args
29961 .into_iter()
29962 .next()
29963 .unwrap_or(Expression::Null(crate::expressions::Null));
29964 let cast_json = Expression::Cast(Box::new(Cast {
29965 this: arg,
29966 to: DataType::Custom {
29967 name: "JSON".to_string(),
29968 },
29969 trailing_comments: vec![],
29970 double_colon_syntax: false,
29971 format: None,
29972 default: None,
29973 inferred_type: None,
29974 }));
29975 Ok(Expression::Function(Box::new(Function::new(
29976 "JSON_FORMAT".to_string(),
29977 vec![cast_json],
29978 ))))
29979 }
29980 DialectType::DuckDB => {
29981 // CAST(TO_JSON(x) AS TEXT)
29982 let arg = args
29983 .into_iter()
29984 .next()
29985 .unwrap_or(Expression::Null(crate::expressions::Null));
29986 let to_json = Expression::Function(Box::new(Function::new(
29987 "TO_JSON".to_string(),
29988 vec![arg],
29989 )));
29990 Ok(Expression::Cast(Box::new(Cast {
29991 this: to_json,
29992 to: DataType::Text,
29993 trailing_comments: vec![],
29994 double_colon_syntax: false,
29995 format: None,
29996 default: None,
29997 inferred_type: None,
29998 })))
29999 }
30000 DialectType::Snowflake => {
30001 // TO_JSON(x)
30002 Ok(Expression::Function(Box::new(Function::new(
30003 "TO_JSON".to_string(),
30004 args,
30005 ))))
30006 }
30007 _ => Ok(Expression::Function(Box::new(Function::new(
30008 "TO_JSON_STRING".to_string(),
30009 args,
30010 )))),
30011 }
30012 }
30013
30014 // SAFE_ADD(x, y) -> SafeAdd expression
30015 "SAFE_ADD" if args.len() == 2 => {
30016 let x = args.remove(0);
30017 let y = args.remove(0);
30018 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
30019 this: Box::new(x),
30020 expression: Box::new(y),
30021 })))
30022 }
30023
30024 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
30025 "SAFE_SUBTRACT" if args.len() == 2 => {
30026 let x = args.remove(0);
30027 let y = args.remove(0);
30028 Ok(Expression::SafeSubtract(Box::new(
30029 crate::expressions::SafeSubtract {
30030 this: Box::new(x),
30031 expression: Box::new(y),
30032 },
30033 )))
30034 }
30035
30036 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
30037 "SAFE_MULTIPLY" if args.len() == 2 => {
30038 let x = args.remove(0);
30039 let y = args.remove(0);
30040 Ok(Expression::SafeMultiply(Box::new(
30041 crate::expressions::SafeMultiply {
30042 this: Box::new(x),
30043 expression: Box::new(y),
30044 },
30045 )))
30046 }
30047
30048 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
30049 "REGEXP_CONTAINS" if args.len() == 2 => {
30050 let str_expr = args.remove(0);
30051 let pattern = args.remove(0);
30052 Ok(Expression::RegexpLike(Box::new(
30053 crate::expressions::RegexpFunc {
30054 this: str_expr,
30055 pattern,
30056 flags: None,
30057 },
30058 )))
30059 }
30060
30061 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
30062 "CONTAINS_SUBSTR" if args.len() == 2 => {
30063 let a = args.remove(0);
30064 let b = args.remove(0);
30065 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
30066 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
30067 Ok(Expression::Function(Box::new(Function::new(
30068 "CONTAINS".to_string(),
30069 vec![lower_a, lower_b],
30070 ))))
30071 }
30072
30073 // INT64(x) -> CAST(x AS BIGINT)
30074 "INT64" if args.len() == 1 => {
30075 let arg = args.remove(0);
30076 Ok(Expression::Cast(Box::new(Cast {
30077 this: arg,
30078 to: DataType::BigInt { length: None },
30079 trailing_comments: vec![],
30080 double_colon_syntax: false,
30081 format: None,
30082 default: None,
30083 inferred_type: None,
30084 })))
30085 }
30086
30087 // INSTR(str, substr) -> target-specific
30088 "INSTR" if args.len() >= 2 => {
30089 let str_expr = args.remove(0);
30090 let substr = args.remove(0);
30091 if matches!(target, DialectType::Snowflake) {
30092 // CHARINDEX(substr, str)
30093 Ok(Expression::Function(Box::new(Function::new(
30094 "CHARINDEX".to_string(),
30095 vec![substr, str_expr],
30096 ))))
30097 } else if matches!(target, DialectType::BigQuery) {
30098 // Keep as INSTR
30099 Ok(Expression::Function(Box::new(Function::new(
30100 "INSTR".to_string(),
30101 vec![str_expr, substr],
30102 ))))
30103 } else {
30104 // Default: keep as INSTR
30105 Ok(Expression::Function(Box::new(Function::new(
30106 "INSTR".to_string(),
30107 vec![str_expr, substr],
30108 ))))
30109 }
30110 }
30111
30112 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
30113 "DATE_TRUNC" if args.len() == 2 => {
30114 let expr = args.remove(0);
30115 let unit_expr = args.remove(0);
30116 let unit_str = get_unit_str(&unit_expr);
30117
30118 match target {
30119 DialectType::DuckDB
30120 | DialectType::Snowflake
30121 | DialectType::PostgreSQL
30122 | DialectType::Presto
30123 | DialectType::Trino
30124 | DialectType::Databricks
30125 | DialectType::Spark
30126 | DialectType::Redshift
30127 | DialectType::ClickHouse
30128 | DialectType::TSQL => {
30129 // Standard: DATE_TRUNC('UNIT', expr)
30130 Ok(Expression::Function(Box::new(Function::new(
30131 "DATE_TRUNC".to_string(),
30132 vec![Expression::Literal(Literal::String(unit_str)), expr],
30133 ))))
30134 }
30135 _ => {
30136 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
30137 Ok(Expression::Function(Box::new(Function::new(
30138 "DATE_TRUNC".to_string(),
30139 vec![expr, unit_expr],
30140 ))))
30141 }
30142 }
30143 }
30144
30145 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
30146 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
30147 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
30148 let ts = args.remove(0);
30149 let unit_expr = args.remove(0);
30150 let tz = if !args.is_empty() {
30151 Some(args.remove(0))
30152 } else {
30153 None
30154 };
30155 let unit_str = get_unit_str(&unit_expr);
30156
30157 match target {
30158 DialectType::DuckDB => {
30159 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
30160 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
30161 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
30162 let is_coarse = matches!(
30163 unit_str.as_str(),
30164 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
30165 );
30166 // For DATETIME_TRUNC, cast string args to TIMESTAMP
30167 let cast_ts = if name == "DATETIME_TRUNC" {
30168 match ts {
30169 Expression::Literal(Literal::String(ref _s)) => {
30170 Expression::Cast(Box::new(Cast {
30171 this: ts,
30172 to: DataType::Timestamp {
30173 precision: None,
30174 timezone: false,
30175 },
30176 trailing_comments: vec![],
30177 double_colon_syntax: false,
30178 format: None,
30179 default: None,
30180 inferred_type: None,
30181 }))
30182 }
30183 _ => Self::maybe_cast_ts_to_tz(ts, &name),
30184 }
30185 } else {
30186 Self::maybe_cast_ts_to_tz(ts, &name)
30187 };
30188
30189 if let Some(tz_arg) = tz {
30190 if is_coarse {
30191 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
30192 let at_tz = Expression::AtTimeZone(Box::new(
30193 crate::expressions::AtTimeZone {
30194 this: cast_ts,
30195 zone: tz_arg.clone(),
30196 },
30197 ));
30198 let date_trunc = Expression::Function(Box::new(Function::new(
30199 "DATE_TRUNC".to_string(),
30200 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
30201 )));
30202 Ok(Expression::AtTimeZone(Box::new(
30203 crate::expressions::AtTimeZone {
30204 this: date_trunc,
30205 zone: tz_arg,
30206 },
30207 )))
30208 } else {
30209 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
30210 Ok(Expression::Function(Box::new(Function::new(
30211 "DATE_TRUNC".to_string(),
30212 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
30213 ))))
30214 }
30215 } else {
30216 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
30217 Ok(Expression::Function(Box::new(Function::new(
30218 "DATE_TRUNC".to_string(),
30219 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
30220 ))))
30221 }
30222 }
30223 DialectType::Databricks | DialectType::Spark => {
30224 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
30225 Ok(Expression::Function(Box::new(Function::new(
30226 "DATE_TRUNC".to_string(),
30227 vec![Expression::Literal(Literal::String(unit_str)), ts],
30228 ))))
30229 }
30230 _ => {
30231 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
30232 let unit = Expression::Literal(Literal::String(unit_str));
30233 let mut date_trunc_args = vec![unit, ts];
30234 if let Some(tz_arg) = tz {
30235 date_trunc_args.push(tz_arg);
30236 }
30237 Ok(Expression::Function(Box::new(Function::new(
30238 "TIMESTAMP_TRUNC".to_string(),
30239 date_trunc_args,
30240 ))))
30241 }
30242 }
30243 }
30244
30245 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
30246 "TIME" => {
30247 if args.len() == 3 {
30248 // TIME(h, m, s) constructor
30249 match target {
30250 DialectType::TSQL => {
30251 // TIMEFROMPARTS(h, m, s, 0, 0)
30252 args.push(Expression::number(0));
30253 args.push(Expression::number(0));
30254 Ok(Expression::Function(Box::new(Function::new(
30255 "TIMEFROMPARTS".to_string(),
30256 args,
30257 ))))
30258 }
30259 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
30260 "MAKETIME".to_string(),
30261 args,
30262 )))),
30263 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
30264 Function::new("MAKE_TIME".to_string(), args),
30265 ))),
30266 _ => Ok(Expression::Function(Box::new(Function::new(
30267 "TIME".to_string(),
30268 args,
30269 )))),
30270 }
30271 } else if args.len() == 1 {
30272 let arg = args.remove(0);
30273 if matches!(target, DialectType::Spark) {
30274 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
30275 Ok(Expression::Cast(Box::new(Cast {
30276 this: arg,
30277 to: DataType::Timestamp {
30278 timezone: false,
30279 precision: None,
30280 },
30281 trailing_comments: vec![],
30282 double_colon_syntax: false,
30283 format: None,
30284 default: None,
30285 inferred_type: None,
30286 })))
30287 } else {
30288 // Most targets: CAST(x AS TIME)
30289 Ok(Expression::Cast(Box::new(Cast {
30290 this: arg,
30291 to: DataType::Time {
30292 precision: None,
30293 timezone: false,
30294 },
30295 trailing_comments: vec![],
30296 double_colon_syntax: false,
30297 format: None,
30298 default: None,
30299 inferred_type: None,
30300 })))
30301 }
30302 } else if args.len() == 2 {
30303 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
30304 let expr = args.remove(0);
30305 let tz = args.remove(0);
30306 let cast_tstz = Expression::Cast(Box::new(Cast {
30307 this: expr,
30308 to: DataType::Timestamp {
30309 timezone: true,
30310 precision: None,
30311 },
30312 trailing_comments: vec![],
30313 double_colon_syntax: false,
30314 format: None,
30315 default: None,
30316 inferred_type: None,
30317 }));
30318 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30319 this: cast_tstz,
30320 zone: tz,
30321 }));
30322 Ok(Expression::Cast(Box::new(Cast {
30323 this: at_tz,
30324 to: DataType::Time {
30325 precision: None,
30326 timezone: false,
30327 },
30328 trailing_comments: vec![],
30329 double_colon_syntax: false,
30330 format: None,
30331 default: None,
30332 inferred_type: None,
30333 })))
30334 } else {
30335 Ok(Expression::Function(Box::new(Function::new(
30336 "TIME".to_string(),
30337 args,
30338 ))))
30339 }
30340 }
30341
30342 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
30343 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
30344 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
30345 // DATETIME(y, m, d, h, min, s) -> target-specific
30346 "DATETIME" => {
30347 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
30348 if matches!(target, DialectType::BigQuery) {
30349 if args.len() == 2 {
30350 let has_time_literal =
30351 matches!(&args[1], Expression::Literal(Literal::Time(_)));
30352 if has_time_literal {
30353 let first = args.remove(0);
30354 let second = args.remove(0);
30355 let time_as_cast = match second {
30356 Expression::Literal(Literal::Time(s)) => {
30357 Expression::Cast(Box::new(Cast {
30358 this: Expression::Literal(Literal::String(s)),
30359 to: DataType::Time {
30360 precision: None,
30361 timezone: false,
30362 },
30363 trailing_comments: vec![],
30364 double_colon_syntax: false,
30365 format: None,
30366 default: None,
30367 inferred_type: None,
30368 }))
30369 }
30370 other => other,
30371 };
30372 return Ok(Expression::Function(Box::new(Function::new(
30373 "DATETIME".to_string(),
30374 vec![first, time_as_cast],
30375 ))));
30376 }
30377 }
30378 return Ok(Expression::Function(Box::new(Function::new(
30379 "DATETIME".to_string(),
30380 args,
30381 ))));
30382 }
30383
30384 if args.len() == 1 {
30385 let arg = args.remove(0);
30386 Ok(Expression::Cast(Box::new(Cast {
30387 this: arg,
30388 to: DataType::Timestamp {
30389 timezone: false,
30390 precision: None,
30391 },
30392 trailing_comments: vec![],
30393 double_colon_syntax: false,
30394 format: None,
30395 default: None,
30396 inferred_type: None,
30397 })))
30398 } else if args.len() == 2 {
30399 let first = args.remove(0);
30400 let second = args.remove(0);
30401 // Check if second arg is a TIME literal
30402 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
30403 if is_time_literal {
30404 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
30405 let cast_date = Expression::Cast(Box::new(Cast {
30406 this: first,
30407 to: DataType::Date,
30408 trailing_comments: vec![],
30409 double_colon_syntax: false,
30410 format: None,
30411 default: None,
30412 inferred_type: None,
30413 }));
30414 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
30415 let time_as_string = match second {
30416 Expression::Literal(Literal::Time(s)) => {
30417 Expression::Literal(Literal::String(s))
30418 }
30419 other => other,
30420 };
30421 let cast_time = Expression::Cast(Box::new(Cast {
30422 this: time_as_string,
30423 to: DataType::Time {
30424 precision: None,
30425 timezone: false,
30426 },
30427 trailing_comments: vec![],
30428 double_colon_syntax: false,
30429 format: None,
30430 default: None,
30431 inferred_type: None,
30432 }));
30433 let add_expr =
30434 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
30435 Ok(Expression::Cast(Box::new(Cast {
30436 this: add_expr,
30437 to: DataType::Timestamp {
30438 timezone: false,
30439 precision: None,
30440 },
30441 trailing_comments: vec![],
30442 double_colon_syntax: false,
30443 format: None,
30444 default: None,
30445 inferred_type: None,
30446 })))
30447 } else {
30448 // DATETIME('string', 'timezone')
30449 let cast_tstz = Expression::Cast(Box::new(Cast {
30450 this: first,
30451 to: DataType::Timestamp {
30452 timezone: true,
30453 precision: None,
30454 },
30455 trailing_comments: vec![],
30456 double_colon_syntax: false,
30457 format: None,
30458 default: None,
30459 inferred_type: None,
30460 }));
30461 let at_tz =
30462 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30463 this: cast_tstz,
30464 zone: second,
30465 }));
30466 Ok(Expression::Cast(Box::new(Cast {
30467 this: at_tz,
30468 to: DataType::Timestamp {
30469 timezone: false,
30470 precision: None,
30471 },
30472 trailing_comments: vec![],
30473 double_colon_syntax: false,
30474 format: None,
30475 default: None,
30476 inferred_type: None,
30477 })))
30478 }
30479 } else if args.len() >= 3 {
30480 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
30481 // For other targets, use MAKE_TIMESTAMP or similar
30482 if matches!(target, DialectType::Snowflake) {
30483 Ok(Expression::Function(Box::new(Function::new(
30484 "TIMESTAMP_FROM_PARTS".to_string(),
30485 args,
30486 ))))
30487 } else {
30488 Ok(Expression::Function(Box::new(Function::new(
30489 "DATETIME".to_string(),
30490 args,
30491 ))))
30492 }
30493 } else {
30494 Ok(Expression::Function(Box::new(Function::new(
30495 "DATETIME".to_string(),
30496 args,
30497 ))))
30498 }
30499 }
30500
30501 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
30502 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
30503 "TIMESTAMP" => {
30504 if args.len() == 1 {
30505 let arg = args.remove(0);
30506 Ok(Expression::Cast(Box::new(Cast {
30507 this: arg,
30508 to: DataType::Timestamp {
30509 timezone: true,
30510 precision: None,
30511 },
30512 trailing_comments: vec![],
30513 double_colon_syntax: false,
30514 format: None,
30515 default: None,
30516 inferred_type: None,
30517 })))
30518 } else if args.len() == 2 {
30519 let arg = args.remove(0);
30520 let tz = args.remove(0);
30521 let cast_ts = Expression::Cast(Box::new(Cast {
30522 this: arg,
30523 to: DataType::Timestamp {
30524 timezone: false,
30525 precision: None,
30526 },
30527 trailing_comments: vec![],
30528 double_colon_syntax: false,
30529 format: None,
30530 default: None,
30531 inferred_type: None,
30532 }));
30533 if matches!(target, DialectType::Snowflake) {
30534 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
30535 Ok(Expression::Function(Box::new(Function::new(
30536 "CONVERT_TIMEZONE".to_string(),
30537 vec![tz, cast_ts],
30538 ))))
30539 } else {
30540 Ok(Expression::AtTimeZone(Box::new(
30541 crate::expressions::AtTimeZone {
30542 this: cast_ts,
30543 zone: tz,
30544 },
30545 )))
30546 }
30547 } else {
30548 Ok(Expression::Function(Box::new(Function::new(
30549 "TIMESTAMP".to_string(),
30550 args,
30551 ))))
30552 }
30553 }
30554
30555 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
30556 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
30557 "STRING" => {
30558 if args.len() == 1 {
30559 let arg = args.remove(0);
30560 let cast_type = match target {
30561 DialectType::DuckDB => DataType::Text,
30562 _ => DataType::VarChar {
30563 length: None,
30564 parenthesized_length: false,
30565 },
30566 };
30567 Ok(Expression::Cast(Box::new(Cast {
30568 this: arg,
30569 to: cast_type,
30570 trailing_comments: vec![],
30571 double_colon_syntax: false,
30572 format: None,
30573 default: None,
30574 inferred_type: None,
30575 })))
30576 } else if args.len() == 2 {
30577 let arg = args.remove(0);
30578 let tz = args.remove(0);
30579 let cast_type = match target {
30580 DialectType::DuckDB => DataType::Text,
30581 _ => DataType::VarChar {
30582 length: None,
30583 parenthesized_length: false,
30584 },
30585 };
30586 if matches!(target, DialectType::Snowflake) {
30587 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
30588 let convert_tz = Expression::Function(Box::new(Function::new(
30589 "CONVERT_TIMEZONE".to_string(),
30590 vec![
30591 Expression::Literal(Literal::String("UTC".to_string())),
30592 tz,
30593 arg,
30594 ],
30595 )));
30596 Ok(Expression::Cast(Box::new(Cast {
30597 this: convert_tz,
30598 to: cast_type,
30599 trailing_comments: vec![],
30600 double_colon_syntax: false,
30601 format: None,
30602 default: None,
30603 inferred_type: None,
30604 })))
30605 } else {
30606 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
30607 let cast_ts = Expression::Cast(Box::new(Cast {
30608 this: arg,
30609 to: DataType::Timestamp {
30610 timezone: false,
30611 precision: None,
30612 },
30613 trailing_comments: vec![],
30614 double_colon_syntax: false,
30615 format: None,
30616 default: None,
30617 inferred_type: None,
30618 }));
30619 let at_utc =
30620 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30621 this: cast_ts,
30622 zone: Expression::Literal(Literal::String("UTC".to_string())),
30623 }));
30624 let at_tz =
30625 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30626 this: at_utc,
30627 zone: tz,
30628 }));
30629 Ok(Expression::Cast(Box::new(Cast {
30630 this: at_tz,
30631 to: cast_type,
30632 trailing_comments: vec![],
30633 double_colon_syntax: false,
30634 format: None,
30635 default: None,
30636 inferred_type: None,
30637 })))
30638 }
30639 } else {
30640 Ok(Expression::Function(Box::new(Function::new(
30641 "STRING".to_string(),
30642 args,
30643 ))))
30644 }
30645 }
30646
30647 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
30648 "UNIX_SECONDS" if args.len() == 1 => {
30649 let ts = args.remove(0);
30650 match target {
30651 DialectType::DuckDB => {
30652 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
30653 let cast_ts = Self::ensure_cast_timestamptz(ts);
30654 let epoch = Expression::Function(Box::new(Function::new(
30655 "EPOCH".to_string(),
30656 vec![cast_ts],
30657 )));
30658 Ok(Expression::Cast(Box::new(Cast {
30659 this: epoch,
30660 to: DataType::BigInt { length: None },
30661 trailing_comments: vec![],
30662 double_colon_syntax: false,
30663 format: None,
30664 default: None,
30665 inferred_type: None,
30666 })))
30667 }
30668 DialectType::Snowflake => {
30669 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
30670 let epoch = Expression::Cast(Box::new(Cast {
30671 this: Expression::Literal(Literal::String(
30672 "1970-01-01 00:00:00+00".to_string(),
30673 )),
30674 to: DataType::Timestamp {
30675 timezone: true,
30676 precision: None,
30677 },
30678 trailing_comments: vec![],
30679 double_colon_syntax: false,
30680 format: None,
30681 default: None,
30682 inferred_type: None,
30683 }));
30684 Ok(Expression::TimestampDiff(Box::new(
30685 crate::expressions::TimestampDiff {
30686 this: Box::new(epoch),
30687 expression: Box::new(ts),
30688 unit: Some("SECONDS".to_string()),
30689 },
30690 )))
30691 }
30692 _ => Ok(Expression::Function(Box::new(Function::new(
30693 "UNIX_SECONDS".to_string(),
30694 vec![ts],
30695 )))),
30696 }
30697 }
30698
30699 "UNIX_MILLIS" if args.len() == 1 => {
30700 let ts = args.remove(0);
30701 match target {
30702 DialectType::DuckDB => {
30703 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
30704 let cast_ts = Self::ensure_cast_timestamptz(ts);
30705 Ok(Expression::Function(Box::new(Function::new(
30706 "EPOCH_MS".to_string(),
30707 vec![cast_ts],
30708 ))))
30709 }
30710 _ => Ok(Expression::Function(Box::new(Function::new(
30711 "UNIX_MILLIS".to_string(),
30712 vec![ts],
30713 )))),
30714 }
30715 }
30716
30717 "UNIX_MICROS" if args.len() == 1 => {
30718 let ts = args.remove(0);
30719 match target {
30720 DialectType::DuckDB => {
30721 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
30722 let cast_ts = Self::ensure_cast_timestamptz(ts);
30723 Ok(Expression::Function(Box::new(Function::new(
30724 "EPOCH_US".to_string(),
30725 vec![cast_ts],
30726 ))))
30727 }
30728 _ => Ok(Expression::Function(Box::new(Function::new(
30729 "UNIX_MICROS".to_string(),
30730 vec![ts],
30731 )))),
30732 }
30733 }
30734
30735 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
30736 "ARRAY_CONCAT" | "LIST_CONCAT" => {
30737 match target {
30738 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30739 // CONCAT(arr1, arr2, ...)
30740 Ok(Expression::Function(Box::new(Function::new(
30741 "CONCAT".to_string(),
30742 args,
30743 ))))
30744 }
30745 DialectType::Presto | DialectType::Trino => {
30746 // CONCAT(arr1, arr2, ...)
30747 Ok(Expression::Function(Box::new(Function::new(
30748 "CONCAT".to_string(),
30749 args,
30750 ))))
30751 }
30752 DialectType::Snowflake => {
30753 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
30754 if args.len() == 1 {
30755 // ARRAY_CAT requires 2 args, add empty array as []
30756 let empty_arr = Expression::ArrayFunc(Box::new(
30757 crate::expressions::ArrayConstructor {
30758 expressions: vec![],
30759 bracket_notation: true,
30760 use_list_keyword: false,
30761 },
30762 ));
30763 let mut new_args = args;
30764 new_args.push(empty_arr);
30765 Ok(Expression::Function(Box::new(Function::new(
30766 "ARRAY_CAT".to_string(),
30767 new_args,
30768 ))))
30769 } else if args.is_empty() {
30770 Ok(Expression::Function(Box::new(Function::new(
30771 "ARRAY_CAT".to_string(),
30772 args,
30773 ))))
30774 } else {
30775 let mut it = args.into_iter().rev();
30776 let mut result = it.next().unwrap();
30777 for arr in it {
30778 result = Expression::Function(Box::new(Function::new(
30779 "ARRAY_CAT".to_string(),
30780 vec![arr, result],
30781 )));
30782 }
30783 Ok(result)
30784 }
30785 }
30786 DialectType::PostgreSQL => {
30787 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
30788 if args.len() <= 1 {
30789 Ok(Expression::Function(Box::new(Function::new(
30790 "ARRAY_CAT".to_string(),
30791 args,
30792 ))))
30793 } else {
30794 let mut it = args.into_iter().rev();
30795 let mut result = it.next().unwrap();
30796 for arr in it {
30797 result = Expression::Function(Box::new(Function::new(
30798 "ARRAY_CAT".to_string(),
30799 vec![arr, result],
30800 )));
30801 }
30802 Ok(result)
30803 }
30804 }
30805 DialectType::Redshift => {
30806 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
30807 if args.len() <= 2 {
30808 Ok(Expression::Function(Box::new(Function::new(
30809 "ARRAY_CONCAT".to_string(),
30810 args,
30811 ))))
30812 } else {
30813 let mut it = args.into_iter().rev();
30814 let mut result = it.next().unwrap();
30815 for arr in it {
30816 result = Expression::Function(Box::new(Function::new(
30817 "ARRAY_CONCAT".to_string(),
30818 vec![arr, result],
30819 )));
30820 }
30821 Ok(result)
30822 }
30823 }
30824 DialectType::DuckDB => {
30825 // LIST_CONCAT supports multiple args natively in DuckDB
30826 Ok(Expression::Function(Box::new(Function::new(
30827 "LIST_CONCAT".to_string(),
30828 args,
30829 ))))
30830 }
30831 _ => Ok(Expression::Function(Box::new(Function::new(
30832 "ARRAY_CONCAT".to_string(),
30833 args,
30834 )))),
30835 }
30836 }
30837
30838 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
30839 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
30840 let arg = args.remove(0);
30841 match target {
30842 DialectType::Snowflake => {
30843 let array_agg =
30844 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
30845 this: arg,
30846 distinct: false,
30847 filter: None,
30848 order_by: vec![],
30849 name: None,
30850 ignore_nulls: None,
30851 having_max: None,
30852 limit: None,
30853 inferred_type: None,
30854 }));
30855 Ok(Expression::Function(Box::new(Function::new(
30856 "ARRAY_FLATTEN".to_string(),
30857 vec![array_agg],
30858 ))))
30859 }
30860 _ => Ok(Expression::Function(Box::new(Function::new(
30861 "ARRAY_CONCAT_AGG".to_string(),
30862 vec![arg],
30863 )))),
30864 }
30865 }
30866
30867 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
30868 "MD5" if args.len() == 1 => {
30869 let arg = args.remove(0);
30870 match target {
30871 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30872 // UNHEX(MD5(x))
30873 let md5 = Expression::Function(Box::new(Function::new(
30874 "MD5".to_string(),
30875 vec![arg],
30876 )));
30877 Ok(Expression::Function(Box::new(Function::new(
30878 "UNHEX".to_string(),
30879 vec![md5],
30880 ))))
30881 }
30882 DialectType::Snowflake => {
30883 // MD5_BINARY(x)
30884 Ok(Expression::Function(Box::new(Function::new(
30885 "MD5_BINARY".to_string(),
30886 vec![arg],
30887 ))))
30888 }
30889 _ => Ok(Expression::Function(Box::new(Function::new(
30890 "MD5".to_string(),
30891 vec![arg],
30892 )))),
30893 }
30894 }
30895
30896 "SHA1" if args.len() == 1 => {
30897 let arg = args.remove(0);
30898 match target {
30899 DialectType::DuckDB => {
30900 // UNHEX(SHA1(x))
30901 let sha1 = Expression::Function(Box::new(Function::new(
30902 "SHA1".to_string(),
30903 vec![arg],
30904 )));
30905 Ok(Expression::Function(Box::new(Function::new(
30906 "UNHEX".to_string(),
30907 vec![sha1],
30908 ))))
30909 }
30910 _ => Ok(Expression::Function(Box::new(Function::new(
30911 "SHA1".to_string(),
30912 vec![arg],
30913 )))),
30914 }
30915 }
30916
30917 "SHA256" if args.len() == 1 => {
30918 let arg = args.remove(0);
30919 match target {
30920 DialectType::DuckDB => {
30921 // UNHEX(SHA256(x))
30922 let sha = Expression::Function(Box::new(Function::new(
30923 "SHA256".to_string(),
30924 vec![arg],
30925 )));
30926 Ok(Expression::Function(Box::new(Function::new(
30927 "UNHEX".to_string(),
30928 vec![sha],
30929 ))))
30930 }
30931 DialectType::Snowflake => {
30932 // SHA2_BINARY(x, 256)
30933 Ok(Expression::Function(Box::new(Function::new(
30934 "SHA2_BINARY".to_string(),
30935 vec![arg, Expression::number(256)],
30936 ))))
30937 }
30938 DialectType::Redshift | DialectType::Spark => {
30939 // SHA2(x, 256)
30940 Ok(Expression::Function(Box::new(Function::new(
30941 "SHA2".to_string(),
30942 vec![arg, Expression::number(256)],
30943 ))))
30944 }
30945 _ => Ok(Expression::Function(Box::new(Function::new(
30946 "SHA256".to_string(),
30947 vec![arg],
30948 )))),
30949 }
30950 }
30951
30952 "SHA512" if args.len() == 1 => {
30953 let arg = args.remove(0);
30954 match target {
30955 DialectType::Snowflake => {
30956 // SHA2_BINARY(x, 512)
30957 Ok(Expression::Function(Box::new(Function::new(
30958 "SHA2_BINARY".to_string(),
30959 vec![arg, Expression::number(512)],
30960 ))))
30961 }
30962 DialectType::Redshift | DialectType::Spark => {
30963 // SHA2(x, 512)
30964 Ok(Expression::Function(Box::new(Function::new(
30965 "SHA2".to_string(),
30966 vec![arg, Expression::number(512)],
30967 ))))
30968 }
30969 _ => Ok(Expression::Function(Box::new(Function::new(
30970 "SHA512".to_string(),
30971 vec![arg],
30972 )))),
30973 }
30974 }
30975
30976 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
30977 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
30978 let str_expr = args.remove(0);
30979 let pattern = args.remove(0);
30980
30981 // Check if pattern contains capturing groups (parentheses)
30982 let has_groups = match &pattern {
30983 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
30984 _ => false,
30985 };
30986
30987 match target {
30988 DialectType::DuckDB => {
30989 let group = if has_groups {
30990 Expression::number(1)
30991 } else {
30992 Expression::number(0)
30993 };
30994 Ok(Expression::Function(Box::new(Function::new(
30995 "REGEXP_EXTRACT_ALL".to_string(),
30996 vec![str_expr, pattern, group],
30997 ))))
30998 }
30999 DialectType::Spark | DialectType::Databricks => {
31000 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
31001 if has_groups {
31002 Ok(Expression::Function(Box::new(Function::new(
31003 "REGEXP_EXTRACT_ALL".to_string(),
31004 vec![str_expr, pattern],
31005 ))))
31006 } else {
31007 Ok(Expression::Function(Box::new(Function::new(
31008 "REGEXP_EXTRACT_ALL".to_string(),
31009 vec![str_expr, pattern, Expression::number(0)],
31010 ))))
31011 }
31012 }
31013 DialectType::Presto | DialectType::Trino => {
31014 if has_groups {
31015 Ok(Expression::Function(Box::new(Function::new(
31016 "REGEXP_EXTRACT_ALL".to_string(),
31017 vec![str_expr, pattern, Expression::number(1)],
31018 ))))
31019 } else {
31020 Ok(Expression::Function(Box::new(Function::new(
31021 "REGEXP_EXTRACT_ALL".to_string(),
31022 vec![str_expr, pattern],
31023 ))))
31024 }
31025 }
31026 DialectType::Snowflake => {
31027 if has_groups {
31028 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
31029 Ok(Expression::Function(Box::new(Function::new(
31030 "REGEXP_EXTRACT_ALL".to_string(),
31031 vec![
31032 str_expr,
31033 pattern,
31034 Expression::number(1),
31035 Expression::number(1),
31036 Expression::Literal(Literal::String("c".to_string())),
31037 Expression::number(1),
31038 ],
31039 ))))
31040 } else {
31041 Ok(Expression::Function(Box::new(Function::new(
31042 "REGEXP_EXTRACT_ALL".to_string(),
31043 vec![str_expr, pattern],
31044 ))))
31045 }
31046 }
31047 _ => Ok(Expression::Function(Box::new(Function::new(
31048 "REGEXP_EXTRACT_ALL".to_string(),
31049 vec![str_expr, pattern],
31050 )))),
31051 }
31052 }
31053
31054 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
31055 "MOD" if args.len() == 2 => {
31056 match target {
31057 DialectType::PostgreSQL
31058 | DialectType::DuckDB
31059 | DialectType::Presto
31060 | DialectType::Trino
31061 | DialectType::Athena
31062 | DialectType::Snowflake => {
31063 let x = args.remove(0);
31064 let y = args.remove(0);
31065 // Wrap complex expressions in parens to preserve precedence
31066 let needs_paren = |e: &Expression| {
31067 matches!(
31068 e,
31069 Expression::Add(_)
31070 | Expression::Sub(_)
31071 | Expression::Mul(_)
31072 | Expression::Div(_)
31073 )
31074 };
31075 let x = if needs_paren(&x) {
31076 Expression::Paren(Box::new(crate::expressions::Paren {
31077 this: x,
31078 trailing_comments: vec![],
31079 }))
31080 } else {
31081 x
31082 };
31083 let y = if needs_paren(&y) {
31084 Expression::Paren(Box::new(crate::expressions::Paren {
31085 this: y,
31086 trailing_comments: vec![],
31087 }))
31088 } else {
31089 y
31090 };
31091 Ok(Expression::Mod(Box::new(
31092 crate::expressions::BinaryOp::new(x, y),
31093 )))
31094 }
31095 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
31096 // Hive/Spark: a % b
31097 let x = args.remove(0);
31098 let y = args.remove(0);
31099 let needs_paren = |e: &Expression| {
31100 matches!(
31101 e,
31102 Expression::Add(_)
31103 | Expression::Sub(_)
31104 | Expression::Mul(_)
31105 | Expression::Div(_)
31106 )
31107 };
31108 let x = if needs_paren(&x) {
31109 Expression::Paren(Box::new(crate::expressions::Paren {
31110 this: x,
31111 trailing_comments: vec![],
31112 }))
31113 } else {
31114 x
31115 };
31116 let y = if needs_paren(&y) {
31117 Expression::Paren(Box::new(crate::expressions::Paren {
31118 this: y,
31119 trailing_comments: vec![],
31120 }))
31121 } else {
31122 y
31123 };
31124 Ok(Expression::Mod(Box::new(
31125 crate::expressions::BinaryOp::new(x, y),
31126 )))
31127 }
31128 _ => Ok(Expression::Function(Box::new(Function::new(
31129 "MOD".to_string(),
31130 args,
31131 )))),
31132 }
31133 }
31134
31135 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
31136 "ARRAY_FILTER" if args.len() == 2 => {
31137 let name = match target {
31138 DialectType::DuckDB => "LIST_FILTER",
31139 DialectType::StarRocks => "ARRAY_FILTER",
31140 _ => "FILTER",
31141 };
31142 Ok(Expression::Function(Box::new(Function::new(
31143 name.to_string(),
31144 args,
31145 ))))
31146 }
31147 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
31148 "FILTER" if args.len() == 2 => {
31149 let name = match target {
31150 DialectType::DuckDB => "LIST_FILTER",
31151 DialectType::StarRocks => "ARRAY_FILTER",
31152 _ => "FILTER",
31153 };
31154 Ok(Expression::Function(Box::new(Function::new(
31155 name.to_string(),
31156 args,
31157 ))))
31158 }
31159 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
31160 "REDUCE" if args.len() >= 3 => {
31161 let name = match target {
31162 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
31163 _ => "REDUCE",
31164 };
31165 Ok(Expression::Function(Box::new(Function::new(
31166 name.to_string(),
31167 args,
31168 ))))
31169 }
31170 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
31171 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
31172 Function::new("ARRAY_REVERSE".to_string(), args),
31173 ))),
31174
31175 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
31176 "CONCAT" if args.len() > 2 => match target {
31177 DialectType::DuckDB => {
31178 let mut it = args.into_iter();
31179 let mut result = it.next().unwrap();
31180 for arg in it {
31181 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
31182 this: Box::new(result),
31183 expression: Box::new(arg),
31184 safe: None,
31185 }));
31186 }
31187 Ok(result)
31188 }
31189 _ => Ok(Expression::Function(Box::new(Function::new(
31190 "CONCAT".to_string(),
31191 args,
31192 )))),
31193 },
31194
31195 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
31196 "GENERATE_DATE_ARRAY" => {
31197 if matches!(target, DialectType::BigQuery) {
31198 // BQ->BQ: add default interval if not present
31199 if args.len() == 2 {
31200 let start = args.remove(0);
31201 let end = args.remove(0);
31202 let default_interval =
31203 Expression::Interval(Box::new(crate::expressions::Interval {
31204 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31205 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31206 unit: crate::expressions::IntervalUnit::Day,
31207 use_plural: false,
31208 }),
31209 }));
31210 Ok(Expression::Function(Box::new(Function::new(
31211 "GENERATE_DATE_ARRAY".to_string(),
31212 vec![start, end, default_interval],
31213 ))))
31214 } else {
31215 Ok(Expression::Function(Box::new(Function::new(
31216 "GENERATE_DATE_ARRAY".to_string(),
31217 args,
31218 ))))
31219 }
31220 } else if matches!(target, DialectType::DuckDB) {
31221 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
31222 let start = args.get(0).cloned();
31223 let end = args.get(1).cloned();
31224 let step = args.get(2).cloned().or_else(|| {
31225 Some(Expression::Interval(Box::new(
31226 crate::expressions::Interval {
31227 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31228 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31229 unit: crate::expressions::IntervalUnit::Day,
31230 use_plural: false,
31231 }),
31232 },
31233 )))
31234 });
31235
31236 // Wrap start/end in CAST(... AS DATE) only for string literals
31237 let maybe_cast_date = |expr: Expression| -> Expression {
31238 if matches!(&expr, Expression::Literal(Literal::String(_))) {
31239 Expression::Cast(Box::new(Cast {
31240 this: expr,
31241 to: DataType::Date,
31242 trailing_comments: vec![],
31243 double_colon_syntax: false,
31244 format: None,
31245 default: None,
31246 inferred_type: None,
31247 }))
31248 } else {
31249 expr
31250 }
31251 };
31252 let cast_start = start.map(maybe_cast_date);
31253 let cast_end = end.map(maybe_cast_date);
31254
31255 let gen_series =
31256 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
31257 start: cast_start.map(Box::new),
31258 end: cast_end.map(Box::new),
31259 step: step.map(Box::new),
31260 is_end_exclusive: None,
31261 }));
31262
31263 // Wrap in CAST(... AS DATE[])
31264 Ok(Expression::Cast(Box::new(Cast {
31265 this: gen_series,
31266 to: DataType::Array {
31267 element_type: Box::new(DataType::Date),
31268 dimension: None,
31269 },
31270 trailing_comments: vec![],
31271 double_colon_syntax: false,
31272 format: None,
31273 default: None,
31274 inferred_type: None,
31275 })))
31276 } else if matches!(target, DialectType::Snowflake) {
31277 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
31278 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
31279 if args.len() == 2 {
31280 let start = args.remove(0);
31281 let end = args.remove(0);
31282 let default_interval =
31283 Expression::Interval(Box::new(crate::expressions::Interval {
31284 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31285 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31286 unit: crate::expressions::IntervalUnit::Day,
31287 use_plural: false,
31288 }),
31289 }));
31290 Ok(Expression::Function(Box::new(Function::new(
31291 "GENERATE_DATE_ARRAY".to_string(),
31292 vec![start, end, default_interval],
31293 ))))
31294 } else {
31295 Ok(Expression::Function(Box::new(Function::new(
31296 "GENERATE_DATE_ARRAY".to_string(),
31297 args,
31298 ))))
31299 }
31300 } else {
31301 // Convert to GenerateSeries for other targets
31302 let start = args.get(0).cloned();
31303 let end = args.get(1).cloned();
31304 let step = args.get(2).cloned().or_else(|| {
31305 Some(Expression::Interval(Box::new(
31306 crate::expressions::Interval {
31307 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31308 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31309 unit: crate::expressions::IntervalUnit::Day,
31310 use_plural: false,
31311 }),
31312 },
31313 )))
31314 });
31315 Ok(Expression::GenerateSeries(Box::new(
31316 crate::expressions::GenerateSeries {
31317 start: start.map(Box::new),
31318 end: end.map(Box::new),
31319 step: step.map(Box::new),
31320 is_end_exclusive: None,
31321 },
31322 )))
31323 }
31324 }
31325
31326 // PARSE_DATE(format, str) -> target-specific
31327 "PARSE_DATE" if args.len() == 2 => {
31328 let format = args.remove(0);
31329 let str_expr = args.remove(0);
31330 match target {
31331 DialectType::DuckDB => {
31332 // CAST(STRPTIME(str, duck_format) AS DATE)
31333 let duck_format = Self::bq_format_to_duckdb(&format);
31334 let strptime = Expression::Function(Box::new(Function::new(
31335 "STRPTIME".to_string(),
31336 vec![str_expr, duck_format],
31337 )));
31338 Ok(Expression::Cast(Box::new(Cast {
31339 this: strptime,
31340 to: DataType::Date,
31341 trailing_comments: vec![],
31342 double_colon_syntax: false,
31343 format: None,
31344 default: None,
31345 inferred_type: None,
31346 })))
31347 }
31348 DialectType::Snowflake => {
31349 // _POLYGLOT_DATE(str, snowflake_format)
31350 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
31351 let sf_format = Self::bq_format_to_snowflake(&format);
31352 Ok(Expression::Function(Box::new(Function::new(
31353 "_POLYGLOT_DATE".to_string(),
31354 vec![str_expr, sf_format],
31355 ))))
31356 }
31357 _ => Ok(Expression::Function(Box::new(Function::new(
31358 "PARSE_DATE".to_string(),
31359 vec![format, str_expr],
31360 )))),
31361 }
31362 }
31363
31364 // PARSE_TIMESTAMP(format, str) -> target-specific
31365 "PARSE_TIMESTAMP" if args.len() >= 2 => {
31366 let format = args.remove(0);
31367 let str_expr = args.remove(0);
31368 let tz = if !args.is_empty() {
31369 Some(args.remove(0))
31370 } else {
31371 None
31372 };
31373 match target {
31374 DialectType::DuckDB => {
31375 let duck_format = Self::bq_format_to_duckdb(&format);
31376 let strptime = Expression::Function(Box::new(Function::new(
31377 "STRPTIME".to_string(),
31378 vec![str_expr, duck_format],
31379 )));
31380 Ok(strptime)
31381 }
31382 _ => {
31383 let mut result_args = vec![format, str_expr];
31384 if let Some(tz_arg) = tz {
31385 result_args.push(tz_arg);
31386 }
31387 Ok(Expression::Function(Box::new(Function::new(
31388 "PARSE_TIMESTAMP".to_string(),
31389 result_args,
31390 ))))
31391 }
31392 }
31393 }
31394
31395 // FORMAT_DATE(format, date) -> target-specific
31396 "FORMAT_DATE" if args.len() == 2 => {
31397 let format = args.remove(0);
31398 let date_expr = args.remove(0);
31399 match target {
31400 DialectType::DuckDB => {
31401 // STRFTIME(CAST(date AS DATE), format)
31402 let cast_date = Expression::Cast(Box::new(Cast {
31403 this: date_expr,
31404 to: DataType::Date,
31405 trailing_comments: vec![],
31406 double_colon_syntax: false,
31407 format: None,
31408 default: None,
31409 inferred_type: None,
31410 }));
31411 Ok(Expression::Function(Box::new(Function::new(
31412 "STRFTIME".to_string(),
31413 vec![cast_date, format],
31414 ))))
31415 }
31416 _ => Ok(Expression::Function(Box::new(Function::new(
31417 "FORMAT_DATE".to_string(),
31418 vec![format, date_expr],
31419 )))),
31420 }
31421 }
31422
31423 // FORMAT_DATETIME(format, datetime) -> target-specific
31424 "FORMAT_DATETIME" if args.len() == 2 => {
31425 let format = args.remove(0);
31426 let dt_expr = args.remove(0);
31427
31428 if matches!(target, DialectType::BigQuery) {
31429 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
31430 let norm_format = Self::bq_format_normalize_bq(&format);
31431 // Also strip DATETIME keyword from typed literals
31432 let norm_dt = match dt_expr {
31433 Expression::Literal(Literal::Timestamp(s)) => {
31434 Expression::Cast(Box::new(Cast {
31435 this: Expression::Literal(Literal::String(s)),
31436 to: DataType::Custom {
31437 name: "DATETIME".to_string(),
31438 },
31439 trailing_comments: vec![],
31440 double_colon_syntax: false,
31441 format: None,
31442 default: None,
31443 inferred_type: None,
31444 }))
31445 }
31446 other => other,
31447 };
31448 return Ok(Expression::Function(Box::new(Function::new(
31449 "FORMAT_DATETIME".to_string(),
31450 vec![norm_format, norm_dt],
31451 ))));
31452 }
31453
31454 match target {
31455 DialectType::DuckDB => {
31456 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
31457 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
31458 let duck_format = Self::bq_format_to_duckdb(&format);
31459 Ok(Expression::Function(Box::new(Function::new(
31460 "STRFTIME".to_string(),
31461 vec![cast_dt, duck_format],
31462 ))))
31463 }
31464 _ => Ok(Expression::Function(Box::new(Function::new(
31465 "FORMAT_DATETIME".to_string(),
31466 vec![format, dt_expr],
31467 )))),
31468 }
31469 }
31470
31471 // FORMAT_TIMESTAMP(format, ts) -> target-specific
31472 "FORMAT_TIMESTAMP" if args.len() == 2 => {
31473 let format = args.remove(0);
31474 let ts_expr = args.remove(0);
31475 match target {
31476 DialectType::DuckDB => {
31477 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
31478 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
31479 let cast_ts = Expression::Cast(Box::new(Cast {
31480 this: cast_tstz,
31481 to: DataType::Timestamp {
31482 timezone: false,
31483 precision: None,
31484 },
31485 trailing_comments: vec![],
31486 double_colon_syntax: false,
31487 format: None,
31488 default: None,
31489 inferred_type: None,
31490 }));
31491 Ok(Expression::Function(Box::new(Function::new(
31492 "STRFTIME".to_string(),
31493 vec![cast_ts, format],
31494 ))))
31495 }
31496 DialectType::Snowflake => {
31497 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
31498 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
31499 let cast_ts = Expression::Cast(Box::new(Cast {
31500 this: cast_tstz,
31501 to: DataType::Timestamp {
31502 timezone: false,
31503 precision: None,
31504 },
31505 trailing_comments: vec![],
31506 double_colon_syntax: false,
31507 format: None,
31508 default: None,
31509 inferred_type: None,
31510 }));
31511 let sf_format = Self::bq_format_to_snowflake(&format);
31512 Ok(Expression::Function(Box::new(Function::new(
31513 "TO_CHAR".to_string(),
31514 vec![cast_ts, sf_format],
31515 ))))
31516 }
31517 _ => Ok(Expression::Function(Box::new(Function::new(
31518 "FORMAT_TIMESTAMP".to_string(),
31519 vec![format, ts_expr],
31520 )))),
31521 }
31522 }
31523
31524 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
31525 "UNIX_DATE" if args.len() == 1 => {
31526 let date = args.remove(0);
31527 match target {
31528 DialectType::DuckDB => {
31529 let epoch = Expression::Cast(Box::new(Cast {
31530 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
31531 to: DataType::Date,
31532 trailing_comments: vec![],
31533 double_colon_syntax: false,
31534 format: None,
31535 default: None,
31536 inferred_type: None,
31537 }));
31538 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
31539 // Need to convert DATE literal to CAST
31540 let norm_date = Self::date_literal_to_cast(date);
31541 Ok(Expression::Function(Box::new(Function::new(
31542 "DATE_DIFF".to_string(),
31543 vec![
31544 Expression::Literal(Literal::String("DAY".to_string())),
31545 epoch,
31546 norm_date,
31547 ],
31548 ))))
31549 }
31550 _ => Ok(Expression::Function(Box::new(Function::new(
31551 "UNIX_DATE".to_string(),
31552 vec![date],
31553 )))),
31554 }
31555 }
31556
31557 // UNIX_SECONDS(ts) -> target-specific
31558 "UNIX_SECONDS" if args.len() == 1 => {
31559 let ts = args.remove(0);
31560 match target {
31561 DialectType::DuckDB => {
31562 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
31563 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31564 let epoch = Expression::Function(Box::new(Function::new(
31565 "EPOCH".to_string(),
31566 vec![norm_ts],
31567 )));
31568 Ok(Expression::Cast(Box::new(Cast {
31569 this: epoch,
31570 to: DataType::BigInt { length: None },
31571 trailing_comments: vec![],
31572 double_colon_syntax: false,
31573 format: None,
31574 default: None,
31575 inferred_type: None,
31576 })))
31577 }
31578 DialectType::Snowflake => {
31579 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
31580 let epoch = Expression::Cast(Box::new(Cast {
31581 this: Expression::Literal(Literal::String(
31582 "1970-01-01 00:00:00+00".to_string(),
31583 )),
31584 to: DataType::Timestamp {
31585 timezone: true,
31586 precision: None,
31587 },
31588 trailing_comments: vec![],
31589 double_colon_syntax: false,
31590 format: None,
31591 default: None,
31592 inferred_type: None,
31593 }));
31594 Ok(Expression::Function(Box::new(Function::new(
31595 "TIMESTAMPDIFF".to_string(),
31596 vec![
31597 Expression::Identifier(Identifier::new("SECONDS".to_string())),
31598 epoch,
31599 ts,
31600 ],
31601 ))))
31602 }
31603 _ => Ok(Expression::Function(Box::new(Function::new(
31604 "UNIX_SECONDS".to_string(),
31605 vec![ts],
31606 )))),
31607 }
31608 }
31609
31610 // UNIX_MILLIS(ts) -> target-specific
31611 "UNIX_MILLIS" if args.len() == 1 => {
31612 let ts = args.remove(0);
31613 match target {
31614 DialectType::DuckDB => {
31615 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31616 Ok(Expression::Function(Box::new(Function::new(
31617 "EPOCH_MS".to_string(),
31618 vec![norm_ts],
31619 ))))
31620 }
31621 _ => Ok(Expression::Function(Box::new(Function::new(
31622 "UNIX_MILLIS".to_string(),
31623 vec![ts],
31624 )))),
31625 }
31626 }
31627
31628 // UNIX_MICROS(ts) -> target-specific
31629 "UNIX_MICROS" if args.len() == 1 => {
31630 let ts = args.remove(0);
31631 match target {
31632 DialectType::DuckDB => {
31633 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31634 Ok(Expression::Function(Box::new(Function::new(
31635 "EPOCH_US".to_string(),
31636 vec![norm_ts],
31637 ))))
31638 }
31639 _ => Ok(Expression::Function(Box::new(Function::new(
31640 "UNIX_MICROS".to_string(),
31641 vec![ts],
31642 )))),
31643 }
31644 }
31645
31646 // INSTR(str, substr) -> target-specific
31647 "INSTR" => {
31648 if matches!(target, DialectType::BigQuery) {
31649 // BQ->BQ: keep as INSTR
31650 Ok(Expression::Function(Box::new(Function::new(
31651 "INSTR".to_string(),
31652 args,
31653 ))))
31654 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
31655 // Snowflake: CHARINDEX(substr, str) - swap args
31656 let str_expr = args.remove(0);
31657 let substr = args.remove(0);
31658 Ok(Expression::Function(Box::new(Function::new(
31659 "CHARINDEX".to_string(),
31660 vec![substr, str_expr],
31661 ))))
31662 } else {
31663 // Keep as INSTR for other targets
31664 Ok(Expression::Function(Box::new(Function::new(
31665 "INSTR".to_string(),
31666 args,
31667 ))))
31668 }
31669 }
31670
31671 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
31672 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
31673 if matches!(target, DialectType::BigQuery) {
31674 // BQ->BQ: always output with parens (function form), keep any timezone arg
31675 Ok(Expression::Function(Box::new(Function::new(name, args))))
31676 } else if name == "CURRENT_DATE" && args.len() == 1 {
31677 // CURRENT_DATE('UTC') - has timezone arg
31678 let tz_arg = args.remove(0);
31679 match target {
31680 DialectType::DuckDB => {
31681 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
31682 let ct = Expression::CurrentTimestamp(
31683 crate::expressions::CurrentTimestamp {
31684 precision: None,
31685 sysdate: false,
31686 },
31687 );
31688 let at_tz =
31689 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31690 this: ct,
31691 zone: tz_arg,
31692 }));
31693 Ok(Expression::Cast(Box::new(Cast {
31694 this: at_tz,
31695 to: DataType::Date,
31696 trailing_comments: vec![],
31697 double_colon_syntax: false,
31698 format: None,
31699 default: None,
31700 inferred_type: None,
31701 })))
31702 }
31703 DialectType::Snowflake => {
31704 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
31705 let ct = Expression::Function(Box::new(Function::new(
31706 "CURRENT_TIMESTAMP".to_string(),
31707 vec![],
31708 )));
31709 let convert = Expression::Function(Box::new(Function::new(
31710 "CONVERT_TIMEZONE".to_string(),
31711 vec![tz_arg, ct],
31712 )));
31713 Ok(Expression::Cast(Box::new(Cast {
31714 this: convert,
31715 to: DataType::Date,
31716 trailing_comments: vec![],
31717 double_colon_syntax: false,
31718 format: None,
31719 default: None,
31720 inferred_type: None,
31721 })))
31722 }
31723 _ => {
31724 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
31725 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
31726 Ok(Expression::AtTimeZone(Box::new(
31727 crate::expressions::AtTimeZone {
31728 this: cd,
31729 zone: tz_arg,
31730 },
31731 )))
31732 }
31733 }
31734 } else if (name == "CURRENT_TIMESTAMP"
31735 || name == "CURRENT_TIME"
31736 || name == "CURRENT_DATE")
31737 && args.is_empty()
31738 && matches!(
31739 target,
31740 DialectType::PostgreSQL
31741 | DialectType::DuckDB
31742 | DialectType::Presto
31743 | DialectType::Trino
31744 )
31745 {
31746 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
31747 if name == "CURRENT_TIMESTAMP" {
31748 Ok(Expression::CurrentTimestamp(
31749 crate::expressions::CurrentTimestamp {
31750 precision: None,
31751 sysdate: false,
31752 },
31753 ))
31754 } else if name == "CURRENT_DATE" {
31755 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
31756 } else {
31757 // CURRENT_TIME
31758 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
31759 precision: None,
31760 }))
31761 }
31762 } else {
31763 // All other targets: keep as function (with parens)
31764 Ok(Expression::Function(Box::new(Function::new(name, args))))
31765 }
31766 }
31767
31768 // JSON_QUERY(json, path) -> target-specific
31769 "JSON_QUERY" if args.len() == 2 => {
31770 match target {
31771 DialectType::DuckDB | DialectType::SQLite => {
31772 // json -> path syntax
31773 let json_expr = args.remove(0);
31774 let path = args.remove(0);
31775 Ok(Expression::JsonExtract(Box::new(
31776 crate::expressions::JsonExtractFunc {
31777 this: json_expr,
31778 path,
31779 returning: None,
31780 arrow_syntax: true,
31781 hash_arrow_syntax: false,
31782 wrapper_option: None,
31783 quotes_option: None,
31784 on_scalar_string: false,
31785 on_error: None,
31786 },
31787 )))
31788 }
31789 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
31790 Ok(Expression::Function(Box::new(Function::new(
31791 "GET_JSON_OBJECT".to_string(),
31792 args,
31793 ))))
31794 }
31795 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
31796 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
31797 )),
31798 _ => Ok(Expression::Function(Box::new(Function::new(
31799 "JSON_QUERY".to_string(),
31800 args,
31801 )))),
31802 }
31803 }
31804
31805 // JSON_VALUE_ARRAY(json, path) -> target-specific
31806 "JSON_VALUE_ARRAY" if args.len() == 2 => {
31807 match target {
31808 DialectType::DuckDB => {
31809 // CAST(json -> path AS TEXT[])
31810 let json_expr = args.remove(0);
31811 let path = args.remove(0);
31812 let arrow = Expression::JsonExtract(Box::new(
31813 crate::expressions::JsonExtractFunc {
31814 this: json_expr,
31815 path,
31816 returning: None,
31817 arrow_syntax: true,
31818 hash_arrow_syntax: false,
31819 wrapper_option: None,
31820 quotes_option: None,
31821 on_scalar_string: false,
31822 on_error: None,
31823 },
31824 ));
31825 Ok(Expression::Cast(Box::new(Cast {
31826 this: arrow,
31827 to: DataType::Array {
31828 element_type: Box::new(DataType::Text),
31829 dimension: None,
31830 },
31831 trailing_comments: vec![],
31832 double_colon_syntax: false,
31833 format: None,
31834 default: None,
31835 inferred_type: None,
31836 })))
31837 }
31838 DialectType::Snowflake => {
31839 let json_expr = args.remove(0);
31840 let path_expr = args.remove(0);
31841 // Convert JSON path from $.path to just path
31842 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
31843 {
31844 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
31845 Expression::Literal(Literal::String(trimmed.to_string()))
31846 } else {
31847 path_expr
31848 };
31849 let parse_json = Expression::Function(Box::new(Function::new(
31850 "PARSE_JSON".to_string(),
31851 vec![json_expr],
31852 )));
31853 let get_path = Expression::Function(Box::new(Function::new(
31854 "GET_PATH".to_string(),
31855 vec![parse_json, sf_path],
31856 )));
31857 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
31858 let cast_expr = Expression::Cast(Box::new(Cast {
31859 this: Expression::Identifier(Identifier::new("x")),
31860 to: DataType::VarChar {
31861 length: None,
31862 parenthesized_length: false,
31863 },
31864 trailing_comments: vec![],
31865 double_colon_syntax: false,
31866 format: None,
31867 default: None,
31868 inferred_type: None,
31869 }));
31870 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31871 parameters: vec![Identifier::new("x")],
31872 body: cast_expr,
31873 colon: false,
31874 parameter_types: vec![],
31875 }));
31876 Ok(Expression::Function(Box::new(Function::new(
31877 "TRANSFORM".to_string(),
31878 vec![get_path, lambda],
31879 ))))
31880 }
31881 _ => Ok(Expression::Function(Box::new(Function::new(
31882 "JSON_VALUE_ARRAY".to_string(),
31883 args,
31884 )))),
31885 }
31886 }
31887
31888 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
31889 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
31890 // This is different from Hive/Spark where 3rd arg is "group_index"
31891 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
31892 match target {
31893 DialectType::DuckDB
31894 | DialectType::Presto
31895 | DialectType::Trino
31896 | DialectType::Athena => {
31897 if args.len() == 2 {
31898 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
31899 args.push(Expression::number(1));
31900 Ok(Expression::Function(Box::new(Function::new(
31901 "REGEXP_EXTRACT".to_string(),
31902 args,
31903 ))))
31904 } else if args.len() == 3 {
31905 let val = args.remove(0);
31906 let regex = args.remove(0);
31907 let position = args.remove(0);
31908 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
31909 if is_pos_1 {
31910 Ok(Expression::Function(Box::new(Function::new(
31911 "REGEXP_EXTRACT".to_string(),
31912 vec![val, regex, Expression::number(1)],
31913 ))))
31914 } else {
31915 let substring_expr = Expression::Function(Box::new(Function::new(
31916 "SUBSTRING".to_string(),
31917 vec![val, position],
31918 )));
31919 let nullif_expr = Expression::Function(Box::new(Function::new(
31920 "NULLIF".to_string(),
31921 vec![
31922 substring_expr,
31923 Expression::Literal(Literal::String(String::new())),
31924 ],
31925 )));
31926 Ok(Expression::Function(Box::new(Function::new(
31927 "REGEXP_EXTRACT".to_string(),
31928 vec![nullif_expr, regex, Expression::number(1)],
31929 ))))
31930 }
31931 } else if args.len() == 4 {
31932 let val = args.remove(0);
31933 let regex = args.remove(0);
31934 let position = args.remove(0);
31935 let occurrence = args.remove(0);
31936 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
31937 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
31938 if is_pos_1 && is_occ_1 {
31939 Ok(Expression::Function(Box::new(Function::new(
31940 "REGEXP_EXTRACT".to_string(),
31941 vec![val, regex, Expression::number(1)],
31942 ))))
31943 } else {
31944 let subject = if is_pos_1 {
31945 val
31946 } else {
31947 let substring_expr = Expression::Function(Box::new(
31948 Function::new("SUBSTRING".to_string(), vec![val, position]),
31949 ));
31950 Expression::Function(Box::new(Function::new(
31951 "NULLIF".to_string(),
31952 vec![
31953 substring_expr,
31954 Expression::Literal(Literal::String(String::new())),
31955 ],
31956 )))
31957 };
31958 let extract_all = Expression::Function(Box::new(Function::new(
31959 "REGEXP_EXTRACT_ALL".to_string(),
31960 vec![subject, regex, Expression::number(1)],
31961 )));
31962 Ok(Expression::Function(Box::new(Function::new(
31963 "ARRAY_EXTRACT".to_string(),
31964 vec![extract_all, occurrence],
31965 ))))
31966 }
31967 } else {
31968 Ok(Expression::Function(Box::new(Function {
31969 name: f.name,
31970 args,
31971 distinct: f.distinct,
31972 trailing_comments: f.trailing_comments,
31973 use_bracket_syntax: f.use_bracket_syntax,
31974 no_parens: f.no_parens,
31975 quoted: f.quoted,
31976 span: None,
31977 inferred_type: None,
31978 })))
31979 }
31980 }
31981 DialectType::Snowflake => {
31982 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
31983 Ok(Expression::Function(Box::new(Function::new(
31984 "REGEXP_SUBSTR".to_string(),
31985 args,
31986 ))))
31987 }
31988 _ => {
31989 // For other targets (Hive/Spark/BigQuery): pass through as-is
31990 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
31991 Ok(Expression::Function(Box::new(Function {
31992 name: f.name,
31993 args,
31994 distinct: f.distinct,
31995 trailing_comments: f.trailing_comments,
31996 use_bracket_syntax: f.use_bracket_syntax,
31997 no_parens: f.no_parens,
31998 quoted: f.quoted,
31999 span: None,
32000 inferred_type: None,
32001 })))
32002 }
32003 }
32004 }
32005
32006 // BigQuery STRUCT(args) -> target-specific struct expression
32007 "STRUCT" => {
32008 // Convert Function args to Struct fields
32009 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
32010 for (i, arg) in args.into_iter().enumerate() {
32011 match arg {
32012 Expression::Alias(a) => {
32013 // Named field: expr AS name
32014 fields.push((Some(a.alias.name.clone()), a.this));
32015 }
32016 other => {
32017 // Unnamed field: for Spark/Hive, keep as None
32018 // For Snowflake, auto-name as _N
32019 // For DuckDB, use column name for column refs, _N for others
32020 if matches!(target, DialectType::Snowflake) {
32021 fields.push((Some(format!("_{}", i)), other));
32022 } else if matches!(target, DialectType::DuckDB) {
32023 let auto_name = match &other {
32024 Expression::Column(col) => col.name.name.clone(),
32025 _ => format!("_{}", i),
32026 };
32027 fields.push((Some(auto_name), other));
32028 } else {
32029 fields.push((None, other));
32030 }
32031 }
32032 }
32033 }
32034
32035 match target {
32036 DialectType::Snowflake => {
32037 // OBJECT_CONSTRUCT('name', value, ...)
32038 let mut oc_args = Vec::new();
32039 for (name, val) in &fields {
32040 if let Some(n) = name {
32041 oc_args.push(Expression::Literal(Literal::String(n.clone())));
32042 oc_args.push(val.clone());
32043 } else {
32044 oc_args.push(val.clone());
32045 }
32046 }
32047 Ok(Expression::Function(Box::new(Function::new(
32048 "OBJECT_CONSTRUCT".to_string(),
32049 oc_args,
32050 ))))
32051 }
32052 DialectType::DuckDB => {
32053 // {'name': value, ...}
32054 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32055 fields,
32056 })))
32057 }
32058 DialectType::Hive => {
32059 // STRUCT(val1, val2, ...) - strip aliases
32060 let hive_fields: Vec<(Option<String>, Expression)> =
32061 fields.into_iter().map(|(_, v)| (None, v)).collect();
32062 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32063 fields: hive_fields,
32064 })))
32065 }
32066 DialectType::Spark | DialectType::Databricks => {
32067 // Use Expression::Struct to bypass Spark target transform auto-naming
32068 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32069 fields,
32070 })))
32071 }
32072 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
32073 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
32074 let all_named =
32075 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
32076 let all_types_inferable = all_named
32077 && fields
32078 .iter()
32079 .all(|(_, val)| Self::can_infer_presto_type(val));
32080 let row_args: Vec<Expression> =
32081 fields.iter().map(|(_, v)| v.clone()).collect();
32082 let row_expr = Expression::Function(Box::new(Function::new(
32083 "ROW".to_string(),
32084 row_args,
32085 )));
32086 if all_named && all_types_inferable {
32087 // Build ROW type with inferred types
32088 let mut row_type_fields = Vec::new();
32089 for (name, val) in &fields {
32090 if let Some(n) = name {
32091 let type_str = Self::infer_sql_type_for_presto(val);
32092 row_type_fields.push(crate::expressions::StructField::new(
32093 n.clone(),
32094 crate::expressions::DataType::Custom { name: type_str },
32095 ));
32096 }
32097 }
32098 let row_type = crate::expressions::DataType::Struct {
32099 fields: row_type_fields,
32100 nested: true,
32101 };
32102 Ok(Expression::Cast(Box::new(Cast {
32103 this: row_expr,
32104 to: row_type,
32105 trailing_comments: Vec::new(),
32106 double_colon_syntax: false,
32107 format: None,
32108 default: None,
32109 inferred_type: None,
32110 })))
32111 } else {
32112 Ok(row_expr)
32113 }
32114 }
32115 _ => {
32116 // Default: keep as STRUCT function with original args
32117 let mut new_args = Vec::new();
32118 for (name, val) in fields {
32119 if let Some(n) = name {
32120 new_args.push(Expression::Alias(Box::new(
32121 crate::expressions::Alias::new(val, Identifier::new(n)),
32122 )));
32123 } else {
32124 new_args.push(val);
32125 }
32126 }
32127 Ok(Expression::Function(Box::new(Function::new(
32128 "STRUCT".to_string(),
32129 new_args,
32130 ))))
32131 }
32132 }
32133 }
32134
32135 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
32136 "ROUND" if args.len() == 3 => {
32137 let x = args.remove(0);
32138 let n = args.remove(0);
32139 let mode = args.remove(0);
32140 // Check if mode is 'ROUND_HALF_EVEN'
32141 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
32142 if is_half_even && matches!(target, DialectType::DuckDB) {
32143 Ok(Expression::Function(Box::new(Function::new(
32144 "ROUND_EVEN".to_string(),
32145 vec![x, n],
32146 ))))
32147 } else {
32148 // Pass through with all args
32149 Ok(Expression::Function(Box::new(Function::new(
32150 "ROUND".to_string(),
32151 vec![x, n, mode],
32152 ))))
32153 }
32154 }
32155
32156 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
32157 "MAKE_INTERVAL" => {
32158 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
32159 // The positional args are: year, month
32160 // Named args are: day =>, minute =>, etc.
32161 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
32162 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
32163 // For BigQuery->BigQuery: reorder named args (day before minute)
32164 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
32165 let mut parts: Vec<(String, String)> = Vec::new();
32166 let mut pos_idx = 0;
32167 let pos_units = ["year", "month"];
32168 for arg in &args {
32169 if let Expression::NamedArgument(na) = arg {
32170 // Named arg like minute => 5
32171 let unit = na.name.name.clone();
32172 if let Expression::Literal(Literal::Number(n)) = &na.value {
32173 parts.push((unit, n.clone()));
32174 }
32175 } else if pos_idx < pos_units.len() {
32176 if let Expression::Literal(Literal::Number(n)) = arg {
32177 parts.push((pos_units[pos_idx].to_string(), n.clone()));
32178 }
32179 pos_idx += 1;
32180 }
32181 }
32182 // Don't sort - preserve original argument order
32183 let separator = if matches!(target, DialectType::Snowflake) {
32184 ", "
32185 } else {
32186 " "
32187 };
32188 let interval_str = parts
32189 .iter()
32190 .map(|(u, v)| format!("{} {}", v, u))
32191 .collect::<Vec<_>>()
32192 .join(separator);
32193 Ok(Expression::Interval(Box::new(
32194 crate::expressions::Interval {
32195 this: Some(Expression::Literal(Literal::String(interval_str))),
32196 unit: None,
32197 },
32198 )))
32199 } else if matches!(target, DialectType::BigQuery) {
32200 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
32201 let mut positional = Vec::new();
32202 let mut named: Vec<(
32203 String,
32204 Expression,
32205 crate::expressions::NamedArgSeparator,
32206 )> = Vec::new();
32207 let _pos_units = ["year", "month"];
32208 let mut _pos_idx = 0;
32209 for arg in args {
32210 if let Expression::NamedArgument(na) = arg {
32211 named.push((na.name.name.clone(), na.value, na.separator));
32212 } else {
32213 positional.push(arg);
32214 _pos_idx += 1;
32215 }
32216 }
32217 // Sort named args by: day, hour, minute, second
32218 let unit_order = |u: &str| -> usize {
32219 match u.to_lowercase().as_str() {
32220 "day" => 0,
32221 "hour" => 1,
32222 "minute" => 2,
32223 "second" => 3,
32224 _ => 4,
32225 }
32226 };
32227 named.sort_by_key(|(u, _, _)| unit_order(u));
32228 let mut result_args = positional;
32229 for (name, value, sep) in named {
32230 result_args.push(Expression::NamedArgument(Box::new(
32231 crate::expressions::NamedArgument {
32232 name: Identifier::new(&name),
32233 value,
32234 separator: sep,
32235 },
32236 )));
32237 }
32238 Ok(Expression::Function(Box::new(Function::new(
32239 "MAKE_INTERVAL".to_string(),
32240 result_args,
32241 ))))
32242 } else {
32243 Ok(Expression::Function(Box::new(Function::new(
32244 "MAKE_INTERVAL".to_string(),
32245 args,
32246 ))))
32247 }
32248 }
32249
32250 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
32251 "ARRAY_TO_STRING" if args.len() == 3 => {
32252 let arr = args.remove(0);
32253 let sep = args.remove(0);
32254 let null_text = args.remove(0);
32255 match target {
32256 DialectType::DuckDB => {
32257 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
32258 let _lambda_param =
32259 Expression::Identifier(crate::expressions::Identifier::new("x"));
32260 let coalesce =
32261 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
32262 original_name: None,
32263 expressions: vec![
32264 Expression::Identifier(crate::expressions::Identifier::new(
32265 "x",
32266 )),
32267 null_text,
32268 ],
32269 inferred_type: None,
32270 }));
32271 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
32272 parameters: vec![crate::expressions::Identifier::new("x")],
32273 body: coalesce,
32274 colon: false,
32275 parameter_types: vec![],
32276 }));
32277 let list_transform = Expression::Function(Box::new(Function::new(
32278 "LIST_TRANSFORM".to_string(),
32279 vec![arr, lambda],
32280 )));
32281 Ok(Expression::Function(Box::new(Function::new(
32282 "ARRAY_TO_STRING".to_string(),
32283 vec![list_transform, sep],
32284 ))))
32285 }
32286 _ => Ok(Expression::Function(Box::new(Function::new(
32287 "ARRAY_TO_STRING".to_string(),
32288 vec![arr, sep, null_text],
32289 )))),
32290 }
32291 }
32292
32293 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
32294 "LENGTH" if args.len() == 1 => {
32295 let arg = args.remove(0);
32296 match target {
32297 DialectType::DuckDB => {
32298 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
32299 let typeof_func = Expression::Function(Box::new(Function::new(
32300 "TYPEOF".to_string(),
32301 vec![arg.clone()],
32302 )));
32303 let blob_cast = Expression::Cast(Box::new(Cast {
32304 this: arg.clone(),
32305 to: DataType::VarBinary { length: None },
32306 trailing_comments: vec![],
32307 double_colon_syntax: false,
32308 format: None,
32309 default: None,
32310 inferred_type: None,
32311 }));
32312 let octet_length = Expression::Function(Box::new(Function::new(
32313 "OCTET_LENGTH".to_string(),
32314 vec![blob_cast],
32315 )));
32316 let text_cast = Expression::Cast(Box::new(Cast {
32317 this: arg,
32318 to: DataType::Text,
32319 trailing_comments: vec![],
32320 double_colon_syntax: false,
32321 format: None,
32322 default: None,
32323 inferred_type: None,
32324 }));
32325 let length_text = Expression::Function(Box::new(Function::new(
32326 "LENGTH".to_string(),
32327 vec![text_cast],
32328 )));
32329 Ok(Expression::Case(Box::new(crate::expressions::Case {
32330 operand: Some(typeof_func),
32331 whens: vec![(
32332 Expression::Literal(Literal::String("BLOB".to_string())),
32333 octet_length,
32334 )],
32335 else_: Some(length_text),
32336 comments: Vec::new(),
32337 inferred_type: None,
32338 })))
32339 }
32340 _ => Ok(Expression::Function(Box::new(Function::new(
32341 "LENGTH".to_string(),
32342 vec![arg],
32343 )))),
32344 }
32345 }
32346
32347 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
32348 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
32349 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
32350 // The args should be [x, fraction] with the null handling stripped
32351 // For DuckDB: QUANTILE_CONT(x, fraction)
32352 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
32353 match target {
32354 DialectType::DuckDB => {
32355 // Strip down to just 2 args, rename to QUANTILE_CONT
32356 let x = args[0].clone();
32357 let frac = args[1].clone();
32358 Ok(Expression::Function(Box::new(Function::new(
32359 "QUANTILE_CONT".to_string(),
32360 vec![x, frac],
32361 ))))
32362 }
32363 _ => Ok(Expression::Function(Box::new(Function::new(
32364 "PERCENTILE_CONT".to_string(),
32365 args,
32366 )))),
32367 }
32368 }
32369
32370 // All others: pass through
32371 _ => Ok(Expression::Function(Box::new(Function {
32372 name: f.name,
32373 args,
32374 distinct: f.distinct,
32375 trailing_comments: f.trailing_comments,
32376 use_bracket_syntax: f.use_bracket_syntax,
32377 no_parens: f.no_parens,
32378 quoted: f.quoted,
32379 span: None,
32380 inferred_type: None,
32381 }))),
32382 }
32383 }
32384
32385 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
32386 /// Returns false for column references and other non-literal expressions where the type is unknown.
32387 fn can_infer_presto_type(expr: &Expression) -> bool {
32388 match expr {
32389 Expression::Literal(_) => true,
32390 Expression::Boolean(_) => true,
32391 Expression::Array(_) | Expression::ArrayFunc(_) => true,
32392 Expression::Struct(_) | Expression::StructFunc(_) => true,
32393 Expression::Function(f) => {
32394 let up = f.name.to_uppercase();
32395 up == "STRUCT"
32396 || up == "ROW"
32397 || up == "CURRENT_DATE"
32398 || up == "CURRENT_TIMESTAMP"
32399 || up == "NOW"
32400 }
32401 Expression::Cast(_) => true,
32402 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
32403 _ => false,
32404 }
32405 }
32406
32407 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
32408 fn infer_sql_type_for_presto(expr: &Expression) -> String {
32409 use crate::expressions::Literal;
32410 match expr {
32411 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
32412 Expression::Literal(Literal::Number(n)) => {
32413 if n.contains('.') {
32414 "DOUBLE".to_string()
32415 } else {
32416 "INTEGER".to_string()
32417 }
32418 }
32419 Expression::Boolean(_) => "BOOLEAN".to_string(),
32420 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
32421 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
32422 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
32423 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
32424 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
32425 Expression::Function(f) => {
32426 let up = f.name.to_uppercase();
32427 if up == "STRUCT" || up == "ROW" {
32428 "ROW".to_string()
32429 } else if up == "CURRENT_DATE" {
32430 "DATE".to_string()
32431 } else if up == "CURRENT_TIMESTAMP" || up == "NOW" {
32432 "TIMESTAMP".to_string()
32433 } else {
32434 "VARCHAR".to_string()
32435 }
32436 }
32437 Expression::Cast(c) => {
32438 // If already cast, use the target type
32439 Self::data_type_to_presto_string(&c.to)
32440 }
32441 _ => "VARCHAR".to_string(),
32442 }
32443 }
32444
32445 /// Convert a DataType to its Presto/Trino string representation for ROW type
32446 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
32447 use crate::expressions::DataType;
32448 match dt {
32449 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
32450 "VARCHAR".to_string()
32451 }
32452 DataType::Int { .. }
32453 | DataType::BigInt { .. }
32454 | DataType::SmallInt { .. }
32455 | DataType::TinyInt { .. } => "INTEGER".to_string(),
32456 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
32457 DataType::Boolean => "BOOLEAN".to_string(),
32458 DataType::Date => "DATE".to_string(),
32459 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
32460 DataType::Struct { fields, .. } => {
32461 let field_strs: Vec<String> = fields
32462 .iter()
32463 .map(|f| {
32464 format!(
32465 "{} {}",
32466 f.name,
32467 Self::data_type_to_presto_string(&f.data_type)
32468 )
32469 })
32470 .collect();
32471 format!("ROW({})", field_strs.join(", "))
32472 }
32473 DataType::Array { element_type, .. } => {
32474 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
32475 }
32476 DataType::Custom { name } => {
32477 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
32478 name.clone()
32479 }
32480 _ => "VARCHAR".to_string(),
32481 }
32482 }
32483
32484 /// Convert IntervalUnit to string
32485 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> String {
32486 match unit {
32487 crate::expressions::IntervalUnit::Year => "YEAR".to_string(),
32488 crate::expressions::IntervalUnit::Quarter => "QUARTER".to_string(),
32489 crate::expressions::IntervalUnit::Month => "MONTH".to_string(),
32490 crate::expressions::IntervalUnit::Week => "WEEK".to_string(),
32491 crate::expressions::IntervalUnit::Day => "DAY".to_string(),
32492 crate::expressions::IntervalUnit::Hour => "HOUR".to_string(),
32493 crate::expressions::IntervalUnit::Minute => "MINUTE".to_string(),
32494 crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
32495 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
32496 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
32497 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
32498 }
32499 }
32500
32501 /// Extract unit string from an expression (uppercased)
32502 fn get_unit_str_static(expr: &Expression) -> String {
32503 use crate::expressions::Literal;
32504 match expr {
32505 Expression::Identifier(id) => id.name.to_uppercase(),
32506 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
32507 Expression::Column(col) => col.name.name.to_uppercase(),
32508 Expression::Function(f) => {
32509 let base = f.name.to_uppercase();
32510 if !f.args.is_empty() {
32511 let inner = Self::get_unit_str_static(&f.args[0]);
32512 format!("{}({})", base, inner)
32513 } else {
32514 base
32515 }
32516 }
32517 _ => "DAY".to_string(),
32518 }
32519 }
32520
32521 /// Parse unit string to IntervalUnit
32522 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
32523 match s {
32524 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
32525 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
32526 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
32527 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32528 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
32529 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
32530 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
32531 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
32532 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
32533 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
32534 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32535 _ => crate::expressions::IntervalUnit::Day,
32536 }
32537 }
32538
32539 /// Convert expression to simple string for interval building
32540 fn expr_to_string_static(expr: &Expression) -> String {
32541 use crate::expressions::Literal;
32542 match expr {
32543 Expression::Literal(Literal::Number(s)) => s.clone(),
32544 Expression::Literal(Literal::String(s)) => s.clone(),
32545 Expression::Identifier(id) => id.name.clone(),
32546 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
32547 _ => "1".to_string(),
32548 }
32549 }
32550
32551 /// Extract a simple string representation from a literal expression
32552 fn expr_to_string(expr: &Expression) -> String {
32553 use crate::expressions::Literal;
32554 match expr {
32555 Expression::Literal(Literal::Number(s)) => s.clone(),
32556 Expression::Literal(Literal::String(s)) => s.clone(),
32557 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
32558 Expression::Identifier(id) => id.name.clone(),
32559 _ => "1".to_string(),
32560 }
32561 }
32562
32563 /// Quote an interval value expression as a string literal if it's a number (or negated number)
32564 fn quote_interval_val(expr: &Expression) -> Expression {
32565 use crate::expressions::Literal;
32566 match expr {
32567 Expression::Literal(Literal::Number(n)) => {
32568 Expression::Literal(Literal::String(n.clone()))
32569 }
32570 Expression::Literal(Literal::String(_)) => expr.clone(),
32571 Expression::Neg(inner) => {
32572 if let Expression::Literal(Literal::Number(n)) = &inner.this {
32573 Expression::Literal(Literal::String(format!("-{}", n)))
32574 } else {
32575 expr.clone()
32576 }
32577 }
32578 _ => expr.clone(),
32579 }
32580 }
32581
32582 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
32583 fn timestamp_string_has_timezone(ts: &str) -> bool {
32584 let trimmed = ts.trim();
32585 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
32586 if let Some(last_space) = trimmed.rfind(' ') {
32587 let suffix = &trimmed[last_space + 1..];
32588 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
32589 let rest = &suffix[1..];
32590 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
32591 return true;
32592 }
32593 }
32594 }
32595 // Check for named timezone abbreviations
32596 let ts_lower = trimmed.to_lowercase();
32597 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
32598 for abbrev in &tz_abbrevs {
32599 if ts_lower.ends_with(abbrev) {
32600 return true;
32601 }
32602 }
32603 false
32604 }
32605
32606 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
32607 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
32608 use crate::expressions::{Cast, DataType, Literal};
32609 match expr {
32610 Expression::Literal(Literal::Timestamp(s)) => {
32611 let tz = func_name.starts_with("TIMESTAMP");
32612 Expression::Cast(Box::new(Cast {
32613 this: Expression::Literal(Literal::String(s)),
32614 to: if tz {
32615 DataType::Timestamp {
32616 timezone: true,
32617 precision: None,
32618 }
32619 } else {
32620 DataType::Timestamp {
32621 timezone: false,
32622 precision: None,
32623 }
32624 },
32625 trailing_comments: vec![],
32626 double_colon_syntax: false,
32627 format: None,
32628 default: None,
32629 inferred_type: None,
32630 }))
32631 }
32632 other => other,
32633 }
32634 }
32635
32636 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
32637 fn maybe_cast_ts(expr: Expression) -> Expression {
32638 use crate::expressions::{Cast, DataType, Literal};
32639 match expr {
32640 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
32641 this: Expression::Literal(Literal::String(s)),
32642 to: DataType::Timestamp {
32643 timezone: false,
32644 precision: None,
32645 },
32646 trailing_comments: vec![],
32647 double_colon_syntax: false,
32648 format: None,
32649 default: None,
32650 inferred_type: None,
32651 })),
32652 other => other,
32653 }
32654 }
32655
32656 /// Convert DATE 'x' literal to CAST('x' AS DATE)
32657 fn date_literal_to_cast(expr: Expression) -> Expression {
32658 use crate::expressions::{Cast, DataType, Literal};
32659 match expr {
32660 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
32661 this: Expression::Literal(Literal::String(s)),
32662 to: DataType::Date,
32663 trailing_comments: vec![],
32664 double_colon_syntax: false,
32665 format: None,
32666 default: None,
32667 inferred_type: None,
32668 })),
32669 other => other,
32670 }
32671 }
32672
32673 /// Ensure an expression that should be a date is CAST(... AS DATE).
32674 /// Handles both DATE literals and string literals that look like dates.
32675 fn ensure_cast_date(expr: Expression) -> Expression {
32676 use crate::expressions::{Cast, DataType, Literal};
32677 match expr {
32678 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
32679 this: Expression::Literal(Literal::String(s)),
32680 to: DataType::Date,
32681 trailing_comments: vec![],
32682 double_colon_syntax: false,
32683 format: None,
32684 default: None,
32685 inferred_type: None,
32686 })),
32687 Expression::Literal(Literal::String(ref _s)) => {
32688 // String literal that should be a date -> CAST('s' AS DATE)
32689 Expression::Cast(Box::new(Cast {
32690 this: expr,
32691 to: DataType::Date,
32692 trailing_comments: vec![],
32693 double_colon_syntax: false,
32694 format: None,
32695 default: None,
32696 inferred_type: None,
32697 }))
32698 }
32699 // Already a CAST or other expression -> leave as-is
32700 other => other,
32701 }
32702 }
32703
32704 /// Force CAST(expr AS DATE) for any expression (not just literals)
32705 /// Skips if the expression is already a CAST to DATE
32706 fn force_cast_date(expr: Expression) -> Expression {
32707 use crate::expressions::{Cast, DataType};
32708 // If it's already a CAST to DATE, don't double-wrap
32709 if let Expression::Cast(ref c) = expr {
32710 if matches!(c.to, DataType::Date) {
32711 return expr;
32712 }
32713 }
32714 Expression::Cast(Box::new(Cast {
32715 this: expr,
32716 to: DataType::Date,
32717 trailing_comments: vec![],
32718 double_colon_syntax: false,
32719 format: None,
32720 default: None,
32721 inferred_type: None,
32722 }))
32723 }
32724
32725 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
32726 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
32727 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
32728 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
32729
32730 fn ensure_to_date_preserved(expr: Expression) -> Expression {
32731 use crate::expressions::{Function, Literal};
32732 if matches!(expr, Expression::Literal(Literal::String(_))) {
32733 Expression::Function(Box::new(Function::new(
32734 Self::PRESERVED_TO_DATE.to_string(),
32735 vec![expr],
32736 )))
32737 } else {
32738 expr
32739 }
32740 }
32741
32742 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
32743 fn try_cast_date(expr: Expression) -> Expression {
32744 use crate::expressions::{Cast, DataType};
32745 Expression::TryCast(Box::new(Cast {
32746 this: expr,
32747 to: DataType::Date,
32748 trailing_comments: vec![],
32749 double_colon_syntax: false,
32750 format: None,
32751 default: None,
32752 inferred_type: None,
32753 }))
32754 }
32755
32756 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
32757 fn double_cast_timestamp_date(expr: Expression) -> Expression {
32758 use crate::expressions::{Cast, DataType};
32759 let inner = Expression::Cast(Box::new(Cast {
32760 this: expr,
32761 to: DataType::Timestamp {
32762 timezone: false,
32763 precision: None,
32764 },
32765 trailing_comments: vec![],
32766 double_colon_syntax: false,
32767 format: None,
32768 default: None,
32769 inferred_type: None,
32770 }));
32771 Expression::Cast(Box::new(Cast {
32772 this: inner,
32773 to: DataType::Date,
32774 trailing_comments: vec![],
32775 double_colon_syntax: false,
32776 format: None,
32777 default: None,
32778 inferred_type: None,
32779 }))
32780 }
32781
32782 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
32783 fn double_cast_datetime_date(expr: Expression) -> Expression {
32784 use crate::expressions::{Cast, DataType};
32785 let inner = Expression::Cast(Box::new(Cast {
32786 this: expr,
32787 to: DataType::Custom {
32788 name: "DATETIME".to_string(),
32789 },
32790 trailing_comments: vec![],
32791 double_colon_syntax: false,
32792 format: None,
32793 default: None,
32794 inferred_type: None,
32795 }));
32796 Expression::Cast(Box::new(Cast {
32797 this: inner,
32798 to: DataType::Date,
32799 trailing_comments: vec![],
32800 double_colon_syntax: false,
32801 format: None,
32802 default: None,
32803 inferred_type: None,
32804 }))
32805 }
32806
32807 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
32808 fn double_cast_datetime2_date(expr: Expression) -> Expression {
32809 use crate::expressions::{Cast, DataType};
32810 let inner = Expression::Cast(Box::new(Cast {
32811 this: expr,
32812 to: DataType::Custom {
32813 name: "DATETIME2".to_string(),
32814 },
32815 trailing_comments: vec![],
32816 double_colon_syntax: false,
32817 format: None,
32818 default: None,
32819 inferred_type: None,
32820 }));
32821 Expression::Cast(Box::new(Cast {
32822 this: inner,
32823 to: DataType::Date,
32824 trailing_comments: vec![],
32825 double_colon_syntax: false,
32826 format: None,
32827 default: None,
32828 inferred_type: None,
32829 }))
32830 }
32831
32832 /// Convert Hive/Java-style date format strings to C-style (strftime) format
32833 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
32834 fn hive_format_to_c_format(fmt: &str) -> String {
32835 let mut result = String::new();
32836 let chars: Vec<char> = fmt.chars().collect();
32837 let mut i = 0;
32838 while i < chars.len() {
32839 match chars[i] {
32840 'y' => {
32841 let mut count = 0;
32842 while i < chars.len() && chars[i] == 'y' {
32843 count += 1;
32844 i += 1;
32845 }
32846 if count >= 4 {
32847 result.push_str("%Y");
32848 } else if count == 2 {
32849 result.push_str("%y");
32850 } else {
32851 result.push_str("%Y");
32852 }
32853 }
32854 'M' => {
32855 let mut count = 0;
32856 while i < chars.len() && chars[i] == 'M' {
32857 count += 1;
32858 i += 1;
32859 }
32860 if count >= 3 {
32861 result.push_str("%b");
32862 } else if count == 2 {
32863 result.push_str("%m");
32864 } else {
32865 result.push_str("%m");
32866 }
32867 }
32868 'd' => {
32869 let mut _count = 0;
32870 while i < chars.len() && chars[i] == 'd' {
32871 _count += 1;
32872 i += 1;
32873 }
32874 result.push_str("%d");
32875 }
32876 'H' => {
32877 let mut _count = 0;
32878 while i < chars.len() && chars[i] == 'H' {
32879 _count += 1;
32880 i += 1;
32881 }
32882 result.push_str("%H");
32883 }
32884 'h' => {
32885 let mut _count = 0;
32886 while i < chars.len() && chars[i] == 'h' {
32887 _count += 1;
32888 i += 1;
32889 }
32890 result.push_str("%I");
32891 }
32892 'm' => {
32893 let mut _count = 0;
32894 while i < chars.len() && chars[i] == 'm' {
32895 _count += 1;
32896 i += 1;
32897 }
32898 result.push_str("%M");
32899 }
32900 's' => {
32901 let mut _count = 0;
32902 while i < chars.len() && chars[i] == 's' {
32903 _count += 1;
32904 i += 1;
32905 }
32906 result.push_str("%S");
32907 }
32908 'S' => {
32909 // Fractional seconds - skip
32910 while i < chars.len() && chars[i] == 'S' {
32911 i += 1;
32912 }
32913 result.push_str("%f");
32914 }
32915 'a' => {
32916 // AM/PM
32917 while i < chars.len() && chars[i] == 'a' {
32918 i += 1;
32919 }
32920 result.push_str("%p");
32921 }
32922 'E' => {
32923 let mut count = 0;
32924 while i < chars.len() && chars[i] == 'E' {
32925 count += 1;
32926 i += 1;
32927 }
32928 if count >= 4 {
32929 result.push_str("%A");
32930 } else {
32931 result.push_str("%a");
32932 }
32933 }
32934 '\'' => {
32935 // Quoted literal text - pass through the quotes and content
32936 result.push('\'');
32937 i += 1;
32938 while i < chars.len() && chars[i] != '\'' {
32939 result.push(chars[i]);
32940 i += 1;
32941 }
32942 if i < chars.len() {
32943 result.push('\'');
32944 i += 1;
32945 }
32946 }
32947 c => {
32948 result.push(c);
32949 i += 1;
32950 }
32951 }
32952 }
32953 result
32954 }
32955
32956 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
32957 fn hive_format_to_presto_format(fmt: &str) -> String {
32958 let c_fmt = Self::hive_format_to_c_format(fmt);
32959 // Presto uses %T for HH:MM:SS
32960 c_fmt.replace("%H:%M:%S", "%T")
32961 }
32962
32963 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
32964 fn ensure_cast_timestamp(expr: Expression) -> Expression {
32965 use crate::expressions::{Cast, DataType, Literal};
32966 match expr {
32967 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
32968 this: Expression::Literal(Literal::String(s)),
32969 to: DataType::Timestamp {
32970 timezone: false,
32971 precision: None,
32972 },
32973 trailing_comments: vec![],
32974 double_colon_syntax: false,
32975 format: None,
32976 default: None,
32977 inferred_type: None,
32978 })),
32979 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
32980 this: expr,
32981 to: DataType::Timestamp {
32982 timezone: false,
32983 precision: None,
32984 },
32985 trailing_comments: vec![],
32986 double_colon_syntax: false,
32987 format: None,
32988 default: None,
32989 inferred_type: None,
32990 })),
32991 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
32992 this: Expression::Literal(Literal::String(s)),
32993 to: DataType::Timestamp {
32994 timezone: false,
32995 precision: None,
32996 },
32997 trailing_comments: vec![],
32998 double_colon_syntax: false,
32999 format: None,
33000 default: None,
33001 inferred_type: None,
33002 })),
33003 other => other,
33004 }
33005 }
33006
33007 /// Force CAST to TIMESTAMP for any expression (not just literals)
33008 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
33009 fn force_cast_timestamp(expr: Expression) -> Expression {
33010 use crate::expressions::{Cast, DataType};
33011 // Don't double-wrap if already a CAST to TIMESTAMP
33012 if let Expression::Cast(ref c) = expr {
33013 if matches!(c.to, DataType::Timestamp { .. }) {
33014 return expr;
33015 }
33016 }
33017 Expression::Cast(Box::new(Cast {
33018 this: expr,
33019 to: DataType::Timestamp {
33020 timezone: false,
33021 precision: None,
33022 },
33023 trailing_comments: vec![],
33024 double_colon_syntax: false,
33025 format: None,
33026 default: None,
33027 inferred_type: None,
33028 }))
33029 }
33030
33031 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
33032 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
33033 use crate::expressions::{Cast, DataType, Literal};
33034 match expr {
33035 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
33036 this: Expression::Literal(Literal::String(s)),
33037 to: DataType::Timestamp {
33038 timezone: true,
33039 precision: None,
33040 },
33041 trailing_comments: vec![],
33042 double_colon_syntax: false,
33043 format: None,
33044 default: None,
33045 inferred_type: None,
33046 })),
33047 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33048 this: expr,
33049 to: DataType::Timestamp {
33050 timezone: true,
33051 precision: None,
33052 },
33053 trailing_comments: vec![],
33054 double_colon_syntax: false,
33055 format: None,
33056 default: None,
33057 inferred_type: None,
33058 })),
33059 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
33060 this: Expression::Literal(Literal::String(s)),
33061 to: DataType::Timestamp {
33062 timezone: true,
33063 precision: None,
33064 },
33065 trailing_comments: vec![],
33066 double_colon_syntax: false,
33067 format: None,
33068 default: None,
33069 inferred_type: None,
33070 })),
33071 other => other,
33072 }
33073 }
33074
33075 /// Ensure expression is CAST to DATETIME (for BigQuery)
33076 fn ensure_cast_datetime(expr: Expression) -> Expression {
33077 use crate::expressions::{Cast, DataType, Literal};
33078 match expr {
33079 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33080 this: expr,
33081 to: DataType::Custom {
33082 name: "DATETIME".to_string(),
33083 },
33084 trailing_comments: vec![],
33085 double_colon_syntax: false,
33086 format: None,
33087 default: None,
33088 inferred_type: None,
33089 })),
33090 other => other,
33091 }
33092 }
33093
33094 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
33095 fn force_cast_datetime(expr: Expression) -> Expression {
33096 use crate::expressions::{Cast, DataType};
33097 if let Expression::Cast(ref c) = expr {
33098 if let DataType::Custom { ref name } = c.to {
33099 if name.eq_ignore_ascii_case("DATETIME") {
33100 return expr;
33101 }
33102 }
33103 }
33104 Expression::Cast(Box::new(Cast {
33105 this: expr,
33106 to: DataType::Custom {
33107 name: "DATETIME".to_string(),
33108 },
33109 trailing_comments: vec![],
33110 double_colon_syntax: false,
33111 format: None,
33112 default: None,
33113 inferred_type: None,
33114 }))
33115 }
33116
33117 /// Ensure expression is CAST to DATETIME2 (for TSQL)
33118 fn ensure_cast_datetime2(expr: Expression) -> Expression {
33119 use crate::expressions::{Cast, DataType, Literal};
33120 match expr {
33121 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33122 this: expr,
33123 to: DataType::Custom {
33124 name: "DATETIME2".to_string(),
33125 },
33126 trailing_comments: vec![],
33127 double_colon_syntax: false,
33128 format: None,
33129 default: None,
33130 inferred_type: None,
33131 })),
33132 other => other,
33133 }
33134 }
33135
33136 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
33137 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
33138 use crate::expressions::{Cast, DataType, Literal};
33139 match expr {
33140 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
33141 this: Expression::Literal(Literal::String(s)),
33142 to: DataType::Timestamp {
33143 timezone: true,
33144 precision: None,
33145 },
33146 trailing_comments: vec![],
33147 double_colon_syntax: false,
33148 format: None,
33149 default: None,
33150 inferred_type: None,
33151 })),
33152 other => other,
33153 }
33154 }
33155
33156 /// Convert BigQuery format string to Snowflake format string
33157 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
33158 use crate::expressions::Literal;
33159 if let Expression::Literal(Literal::String(s)) = format_expr {
33160 let sf = s
33161 .replace("%Y", "yyyy")
33162 .replace("%m", "mm")
33163 .replace("%d", "DD")
33164 .replace("%H", "HH24")
33165 .replace("%M", "MI")
33166 .replace("%S", "SS")
33167 .replace("%b", "mon")
33168 .replace("%B", "Month")
33169 .replace("%e", "FMDD");
33170 Expression::Literal(Literal::String(sf))
33171 } else {
33172 format_expr.clone()
33173 }
33174 }
33175
33176 /// Convert BigQuery format string to DuckDB format string
33177 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
33178 use crate::expressions::Literal;
33179 if let Expression::Literal(Literal::String(s)) = format_expr {
33180 let duck = s
33181 .replace("%T", "%H:%M:%S")
33182 .replace("%F", "%Y-%m-%d")
33183 .replace("%D", "%m/%d/%y")
33184 .replace("%x", "%m/%d/%y")
33185 .replace("%c", "%a %b %-d %H:%M:%S %Y")
33186 .replace("%e", "%-d")
33187 .replace("%E6S", "%S.%f");
33188 Expression::Literal(Literal::String(duck))
33189 } else {
33190 format_expr.clone()
33191 }
33192 }
33193
33194 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
33195 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
33196 use crate::expressions::Literal;
33197 if let Expression::Literal(Literal::String(s)) = format_expr {
33198 // Replace format elements from longest to shortest to avoid partial matches
33199 let result = s
33200 .replace("YYYYMMDD", "%Y%m%d")
33201 .replace("YYYY", "%Y")
33202 .replace("YY", "%y")
33203 .replace("MONTH", "%B")
33204 .replace("MON", "%b")
33205 .replace("MM", "%m")
33206 .replace("DD", "%d")
33207 .replace("HH24", "%H")
33208 .replace("HH12", "%I")
33209 .replace("HH", "%I")
33210 .replace("MI", "%M")
33211 .replace("SSTZH", "%S%z")
33212 .replace("SS", "%S")
33213 .replace("TZH", "%z");
33214 Expression::Literal(Literal::String(result))
33215 } else {
33216 format_expr.clone()
33217 }
33218 }
33219
33220 /// Normalize BigQuery format strings for BQ->BQ output
33221 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
33222 use crate::expressions::Literal;
33223 if let Expression::Literal(Literal::String(s)) = format_expr {
33224 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
33225 Expression::Literal(Literal::String(norm))
33226 } else {
33227 format_expr.clone()
33228 }
33229 }
33230}
33231
33232#[cfg(test)]
33233mod tests {
33234 use super::*;
33235
33236 #[test]
33237 fn test_dialect_type_from_str() {
33238 assert_eq!(
33239 "postgres".parse::<DialectType>().unwrap(),
33240 DialectType::PostgreSQL
33241 );
33242 assert_eq!(
33243 "postgresql".parse::<DialectType>().unwrap(),
33244 DialectType::PostgreSQL
33245 );
33246 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
33247 assert_eq!(
33248 "bigquery".parse::<DialectType>().unwrap(),
33249 DialectType::BigQuery
33250 );
33251 }
33252
33253 #[test]
33254 fn test_basic_transpile() {
33255 let dialect = Dialect::get(DialectType::Generic);
33256 let result = dialect
33257 .transpile_to("SELECT 1", DialectType::PostgreSQL)
33258 .unwrap();
33259 assert_eq!(result.len(), 1);
33260 assert_eq!(result[0], "SELECT 1");
33261 }
33262
33263 #[test]
33264 fn test_function_transformation_mysql() {
33265 // NVL should be transformed to IFNULL in MySQL
33266 let dialect = Dialect::get(DialectType::Generic);
33267 let result = dialect
33268 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
33269 .unwrap();
33270 assert_eq!(result[0], "SELECT IFNULL(a, b)");
33271 }
33272
33273 #[test]
33274 fn test_get_path_duckdb() {
33275 // Test: step by step
33276 let snowflake = Dialect::get(DialectType::Snowflake);
33277
33278 // Step 1: Parse and check what Snowflake produces as intermediate
33279 let result_sf_sf = snowflake
33280 .transpile_to(
33281 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
33282 DialectType::Snowflake,
33283 )
33284 .unwrap();
33285 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
33286
33287 // Step 2: DuckDB target
33288 let result_sf_dk = snowflake
33289 .transpile_to(
33290 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
33291 DialectType::DuckDB,
33292 )
33293 .unwrap();
33294 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
33295
33296 // Step 3: GET_PATH directly
33297 let result_gp = snowflake
33298 .transpile_to(
33299 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
33300 DialectType::DuckDB,
33301 )
33302 .unwrap();
33303 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
33304 }
33305
33306 #[test]
33307 fn test_function_transformation_postgres() {
33308 // IFNULL should be transformed to COALESCE in PostgreSQL
33309 let dialect = Dialect::get(DialectType::Generic);
33310 let result = dialect
33311 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
33312 .unwrap();
33313 assert_eq!(result[0], "SELECT COALESCE(a, b)");
33314
33315 // NVL should also be transformed to COALESCE
33316 let result = dialect
33317 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
33318 .unwrap();
33319 assert_eq!(result[0], "SELECT COALESCE(a, b)");
33320 }
33321
33322 #[test]
33323 fn test_hive_cast_to_trycast() {
33324 // Hive CAST should become TRY_CAST for targets that support it
33325 let hive = Dialect::get(DialectType::Hive);
33326 let result = hive
33327 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
33328 .unwrap();
33329 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
33330
33331 let result = hive
33332 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
33333 .unwrap();
33334 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
33335 }
33336
33337 #[test]
33338 fn test_hive_array_identity() {
33339 // Hive ARRAY<DATE> should preserve angle bracket syntax
33340 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
33341 let hive = Dialect::get(DialectType::Hive);
33342
33343 // Test via transpile_to (this works)
33344 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
33345 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
33346 assert!(
33347 result[0].contains("ARRAY<DATE>"),
33348 "transpile_to: Expected ARRAY<DATE>, got: {}",
33349 result[0]
33350 );
33351
33352 // Test via parse -> transform -> generate (identity test path)
33353 let ast = hive.parse(sql).unwrap();
33354 let transformed = hive.transform(ast[0].clone()).unwrap();
33355 let output = hive.generate(&transformed).unwrap();
33356 eprintln!("Hive ARRAY via identity path: {}", output);
33357 assert!(
33358 output.contains("ARRAY<DATE>"),
33359 "identity path: Expected ARRAY<DATE>, got: {}",
33360 output
33361 );
33362 }
33363
33364 #[test]
33365 fn test_starrocks_delete_between_expansion() {
33366 // StarRocks doesn't support BETWEEN in DELETE statements
33367 let dialect = Dialect::get(DialectType::Generic);
33368
33369 // BETWEEN should be expanded to >= AND <= in DELETE
33370 let result = dialect
33371 .transpile_to(
33372 "DELETE FROM t WHERE a BETWEEN b AND c",
33373 DialectType::StarRocks,
33374 )
33375 .unwrap();
33376 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
33377
33378 // NOT BETWEEN should be expanded to < OR > in DELETE
33379 let result = dialect
33380 .transpile_to(
33381 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
33382 DialectType::StarRocks,
33383 )
33384 .unwrap();
33385 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
33386
33387 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
33388 let result = dialect
33389 .transpile_to(
33390 "SELECT * FROM t WHERE a BETWEEN b AND c",
33391 DialectType::StarRocks,
33392 )
33393 .unwrap();
33394 assert!(
33395 result[0].contains("BETWEEN"),
33396 "BETWEEN should be preserved in SELECT"
33397 );
33398 }
33399
33400 #[test]
33401 fn test_snowflake_ltrim_rtrim_parse() {
33402 let sf = Dialect::get(DialectType::Snowflake);
33403 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
33404 let result = sf.transpile_to(sql, DialectType::DuckDB);
33405 match &result {
33406 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
33407 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
33408 }
33409 assert!(
33410 result.is_ok(),
33411 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
33412 result.err()
33413 );
33414 }
33415
33416 #[test]
33417 fn test_duckdb_count_if_parse() {
33418 let duck = Dialect::get(DialectType::DuckDB);
33419 let sql = "COUNT_IF(x)";
33420 let result = duck.transpile_to(sql, DialectType::DuckDB);
33421 match &result {
33422 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
33423 Err(e) => eprintln!("COUNT_IF error: {}", e),
33424 }
33425 assert!(
33426 result.is_ok(),
33427 "Expected successful parse of COUNT_IF(x), got error: {:?}",
33428 result.err()
33429 );
33430 }
33431
33432 #[test]
33433 fn test_tsql_cast_tinyint_parse() {
33434 let tsql = Dialect::get(DialectType::TSQL);
33435 let sql = "CAST(X AS TINYINT)";
33436 let result = tsql.transpile_to(sql, DialectType::DuckDB);
33437 match &result {
33438 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
33439 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
33440 }
33441 assert!(
33442 result.is_ok(),
33443 "Expected successful transpile, got error: {:?}",
33444 result.err()
33445 );
33446 }
33447
33448 #[test]
33449 fn test_pg_hash_bitwise_xor() {
33450 let dialect = Dialect::get(DialectType::PostgreSQL);
33451 let result = dialect
33452 .transpile_to("x # y", DialectType::PostgreSQL)
33453 .unwrap();
33454 assert_eq!(result[0], "x # y");
33455 }
33456
33457 #[test]
33458 fn test_pg_array_to_duckdb() {
33459 let dialect = Dialect::get(DialectType::PostgreSQL);
33460 let result = dialect
33461 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
33462 .unwrap();
33463 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
33464 }
33465
33466 #[test]
33467 fn test_array_remove_bigquery() {
33468 let dialect = Dialect::get(DialectType::Generic);
33469 let result = dialect
33470 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
33471 .unwrap();
33472 assert_eq!(
33473 result[0],
33474 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
33475 );
33476 }
33477
33478 #[test]
33479 fn test_map_clickhouse_case() {
33480 let dialect = Dialect::get(DialectType::Generic);
33481 let parsed = dialect
33482 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
33483 .unwrap();
33484 eprintln!("MAP parsed: {:?}", parsed);
33485 let result = dialect
33486 .transpile_to(
33487 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
33488 DialectType::ClickHouse,
33489 )
33490 .unwrap();
33491 eprintln!("MAP result: {}", result[0]);
33492 }
33493
33494 #[test]
33495 fn test_generate_date_array_presto() {
33496 let dialect = Dialect::get(DialectType::Generic);
33497 let result = dialect.transpile_to(
33498 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33499 DialectType::Presto,
33500 ).unwrap();
33501 eprintln!("GDA -> Presto: {}", result[0]);
33502 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
33503 }
33504
33505 #[test]
33506 fn test_generate_date_array_postgres() {
33507 let dialect = Dialect::get(DialectType::Generic);
33508 let result = dialect.transpile_to(
33509 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33510 DialectType::PostgreSQL,
33511 ).unwrap();
33512 eprintln!("GDA -> PostgreSQL: {}", result[0]);
33513 }
33514
33515 #[test]
33516 fn test_generate_date_array_snowflake() {
33517 std::thread::Builder::new()
33518 .stack_size(16 * 1024 * 1024)
33519 .spawn(|| {
33520 let dialect = Dialect::get(DialectType::Generic);
33521 let result = dialect.transpile_to(
33522 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33523 DialectType::Snowflake,
33524 ).unwrap();
33525 eprintln!("GDA -> Snowflake: {}", result[0]);
33526 })
33527 .unwrap()
33528 .join()
33529 .unwrap();
33530 }
33531
33532 #[test]
33533 fn test_array_length_generate_date_array_snowflake() {
33534 let dialect = Dialect::get(DialectType::Generic);
33535 let result = dialect.transpile_to(
33536 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33537 DialectType::Snowflake,
33538 ).unwrap();
33539 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
33540 }
33541
33542 #[test]
33543 fn test_generate_date_array_mysql() {
33544 let dialect = Dialect::get(DialectType::Generic);
33545 let result = dialect.transpile_to(
33546 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33547 DialectType::MySQL,
33548 ).unwrap();
33549 eprintln!("GDA -> MySQL: {}", result[0]);
33550 }
33551
33552 #[test]
33553 fn test_generate_date_array_redshift() {
33554 let dialect = Dialect::get(DialectType::Generic);
33555 let result = dialect.transpile_to(
33556 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33557 DialectType::Redshift,
33558 ).unwrap();
33559 eprintln!("GDA -> Redshift: {}", result[0]);
33560 }
33561
33562 #[test]
33563 fn test_generate_date_array_tsql() {
33564 let dialect = Dialect::get(DialectType::Generic);
33565 let result = dialect.transpile_to(
33566 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33567 DialectType::TSQL,
33568 ).unwrap();
33569 eprintln!("GDA -> TSQL: {}", result[0]);
33570 }
33571
33572 #[test]
33573 fn test_struct_colon_syntax() {
33574 let dialect = Dialect::get(DialectType::Generic);
33575 // Test without colon first
33576 let result = dialect.transpile_to(
33577 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
33578 DialectType::ClickHouse,
33579 );
33580 match result {
33581 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
33582 Err(e) => eprintln!("STRUCT no colon error: {}", e),
33583 }
33584 // Now test with colon
33585 let result = dialect.transpile_to(
33586 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
33587 DialectType::ClickHouse,
33588 );
33589 match result {
33590 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
33591 Err(e) => eprintln!("STRUCT colon error: {}", e),
33592 }
33593 }
33594
33595 #[test]
33596 fn test_generate_date_array_cte_wrapped_mysql() {
33597 let dialect = Dialect::get(DialectType::Generic);
33598 let result = dialect.transpile_to(
33599 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
33600 DialectType::MySQL,
33601 ).unwrap();
33602 eprintln!("GDA CTE -> MySQL: {}", result[0]);
33603 }
33604
33605 #[test]
33606 fn test_generate_date_array_cte_wrapped_tsql() {
33607 let dialect = Dialect::get(DialectType::Generic);
33608 let result = dialect.transpile_to(
33609 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
33610 DialectType::TSQL,
33611 ).unwrap();
33612 eprintln!("GDA CTE -> TSQL: {}", result[0]);
33613 }
33614
33615 #[test]
33616 fn test_decode_literal_no_null_check() {
33617 // Oracle DECODE with all literals should produce simple equality, no IS NULL
33618 let dialect = Dialect::get(DialectType::Oracle);
33619 let result = dialect
33620 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
33621 .unwrap();
33622 assert_eq!(
33623 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
33624 "Literal DECODE should not have IS NULL checks"
33625 );
33626 }
33627
33628 #[test]
33629 fn test_decode_column_vs_literal_no_null_check() {
33630 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
33631 let dialect = Dialect::get(DialectType::Oracle);
33632 let result = dialect
33633 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
33634 .unwrap();
33635 assert_eq!(
33636 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
33637 "Column vs literal DECODE should not have IS NULL checks"
33638 );
33639 }
33640
33641 #[test]
33642 fn test_decode_column_vs_column_keeps_null_check() {
33643 // Oracle DECODE with column vs column should keep null-safe comparison
33644 let dialect = Dialect::get(DialectType::Oracle);
33645 let result = dialect
33646 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
33647 .unwrap();
33648 assert!(
33649 result[0].contains("IS NULL"),
33650 "Column vs column DECODE should have IS NULL checks, got: {}",
33651 result[0]
33652 );
33653 }
33654
33655 #[test]
33656 fn test_decode_null_search() {
33657 // Oracle DECODE with NULL search should use IS NULL
33658 let dialect = Dialect::get(DialectType::Oracle);
33659 let result = dialect
33660 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
33661 .unwrap();
33662 assert_eq!(
33663 result[0],
33664 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
33665 );
33666 }
33667
33668 // =========================================================================
33669 // REGEXP function transpilation tests
33670 // =========================================================================
33671
33672 #[test]
33673 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
33674 let dialect = Dialect::get(DialectType::Snowflake);
33675 let result = dialect
33676 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
33677 .unwrap();
33678 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33679 }
33680
33681 #[test]
33682 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
33683 let dialect = Dialect::get(DialectType::Snowflake);
33684 let result = dialect
33685 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
33686 .unwrap();
33687 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33688 }
33689
33690 #[test]
33691 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
33692 let dialect = Dialect::get(DialectType::Snowflake);
33693 let result = dialect
33694 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
33695 .unwrap();
33696 assert_eq!(
33697 result[0],
33698 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
33699 );
33700 }
33701
33702 #[test]
33703 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
33704 let dialect = Dialect::get(DialectType::Snowflake);
33705 let result = dialect
33706 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)", DialectType::DuckDB)
33707 .unwrap();
33708 assert_eq!(
33709 result[0],
33710 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
33711 );
33712 }
33713
33714 #[test]
33715 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
33716 let dialect = Dialect::get(DialectType::Snowflake);
33717 let result = dialect
33718 .transpile_to(
33719 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
33720 DialectType::DuckDB,
33721 )
33722 .unwrap();
33723 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33724 }
33725
33726 #[test]
33727 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
33728 let dialect = Dialect::get(DialectType::Snowflake);
33729 let result = dialect
33730 .transpile_to(
33731 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
33732 DialectType::DuckDB,
33733 )
33734 .unwrap();
33735 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33736 }
33737
33738 #[test]
33739 fn test_regexp_substr_snowflake_identity_strip_group0() {
33740 let dialect = Dialect::get(DialectType::Snowflake);
33741 let result = dialect
33742 .transpile_to(
33743 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
33744 DialectType::Snowflake,
33745 )
33746 .unwrap();
33747 assert_eq!(
33748 result[0],
33749 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')"
33750 );
33751 }
33752
33753 #[test]
33754 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
33755 let dialect = Dialect::get(DialectType::Snowflake);
33756 let result = dialect
33757 .transpile_to("SELECT REGEXP_SUBSTR_ALL(s, 'pattern')", DialectType::DuckDB)
33758 .unwrap();
33759 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33760 }
33761
33762 #[test]
33763 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
33764 let dialect = Dialect::get(DialectType::Snowflake);
33765 let result = dialect
33766 .transpile_to(
33767 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
33768 DialectType::DuckDB,
33769 )
33770 .unwrap();
33771 assert_eq!(
33772 result[0],
33773 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
33774 );
33775 }
33776
33777 #[test]
33778 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
33779 let dialect = Dialect::get(DialectType::Snowflake);
33780 let result = dialect
33781 .transpile_to(
33782 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
33783 DialectType::DuckDB,
33784 )
33785 .unwrap();
33786 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33787 }
33788
33789 #[test]
33790 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
33791 let dialect = Dialect::get(DialectType::Snowflake);
33792 let result = dialect
33793 .transpile_to(
33794 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
33795 DialectType::DuckDB,
33796 )
33797 .unwrap();
33798 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33799 }
33800
33801 #[test]
33802 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
33803 let dialect = Dialect::get(DialectType::Snowflake);
33804 let result = dialect
33805 .transpile_to(
33806 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
33807 DialectType::Snowflake,
33808 )
33809 .unwrap();
33810 assert_eq!(
33811 result[0],
33812 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
33813 );
33814 }
33815
33816 #[test]
33817 fn test_regexp_count_snowflake_to_duckdb_2arg() {
33818 let dialect = Dialect::get(DialectType::Snowflake);
33819 let result = dialect
33820 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
33821 .unwrap();
33822 assert_eq!(
33823 result[0],
33824 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
33825 );
33826 }
33827
33828 #[test]
33829 fn test_regexp_count_snowflake_to_duckdb_3arg() {
33830 let dialect = Dialect::get(DialectType::Snowflake);
33831 let result = dialect
33832 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
33833 .unwrap();
33834 assert_eq!(
33835 result[0],
33836 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
33837 );
33838 }
33839
33840 #[test]
33841 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
33842 let dialect = Dialect::get(DialectType::Snowflake);
33843 let result = dialect
33844 .transpile_to(
33845 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
33846 DialectType::DuckDB,
33847 )
33848 .unwrap();
33849 assert_eq!(
33850 result[0],
33851 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
33852 );
33853 }
33854
33855 #[test]
33856 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
33857 let dialect = Dialect::get(DialectType::Snowflake);
33858 let result = dialect
33859 .transpile_to(
33860 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
33861 DialectType::DuckDB,
33862 )
33863 .unwrap();
33864 assert_eq!(
33865 result[0],
33866 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
33867 );
33868 }
33869
33870 #[test]
33871 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
33872 let dialect = Dialect::get(DialectType::Snowflake);
33873 let result = dialect
33874 .transpile_to(
33875 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
33876 DialectType::DuckDB,
33877 )
33878 .unwrap();
33879 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
33880 }
33881
33882 #[test]
33883 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
33884 let dialect = Dialect::get(DialectType::Snowflake);
33885 let result = dialect
33886 .transpile_to(
33887 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
33888 DialectType::DuckDB,
33889 )
33890 .unwrap();
33891 assert_eq!(
33892 result[0],
33893 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
33894 );
33895 }
33896
33897 #[test]
33898 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
33899 let dialect = Dialect::get(DialectType::Snowflake);
33900 let result = dialect
33901 .transpile_to(
33902 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
33903 DialectType::DuckDB,
33904 )
33905 .unwrap();
33906 assert_eq!(
33907 result[0],
33908 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
33909 );
33910 }
33911
33912 #[test]
33913 fn test_rlike_snowflake_to_duckdb_2arg() {
33914 let dialect = Dialect::get(DialectType::Snowflake);
33915 let result = dialect
33916 .transpile_to("SELECT RLIKE(a, b)", DialectType::DuckDB)
33917 .unwrap();
33918 assert_eq!(
33919 result[0],
33920 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$')"
33921 );
33922 }
33923
33924 #[test]
33925 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
33926 let dialect = Dialect::get(DialectType::Snowflake);
33927 let result = dialect
33928 .transpile_to("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
33929 .unwrap();
33930 assert_eq!(
33931 result[0],
33932 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$', 'i')"
33933 );
33934 }
33935
33936 #[test]
33937 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
33938 let dialect = Dialect::get(DialectType::BigQuery);
33939 let result = dialect
33940 .transpile_to(
33941 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
33942 DialectType::Snowflake,
33943 )
33944 .unwrap();
33945 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
33946 }
33947
33948 #[test]
33949 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
33950 let dialect = Dialect::get(DialectType::BigQuery);
33951 let result = dialect
33952 .transpile_to(
33953 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
33954 DialectType::Snowflake,
33955 )
33956 .unwrap();
33957 assert_eq!(
33958 result[0],
33959 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
33960 );
33961 }
33962
33963 #[test]
33964 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
33965 let handle = std::thread::Builder::new()
33966 .stack_size(16 * 1024 * 1024)
33967 .spawn(|| {
33968 let dialect = Dialect::get(DialectType::Snowflake);
33969 let result = dialect
33970 .transpile_to("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
33971 .unwrap();
33972 // Should produce a CASE WHEN expression
33973 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN in result: {}", result[0]);
33974 assert!(result[0].contains("LIST_SUM"), "Expected LIST_SUM in result: {}", result[0]);
33975 })
33976 .unwrap();
33977 handle.join().unwrap();
33978 }
33979
33980 #[test]
33981 fn test_array_except_generic_to_duckdb() {
33982 // Use larger stack to avoid overflow from deeply nested expression Drop
33983 let handle = std::thread::Builder::new()
33984 .stack_size(16 * 1024 * 1024)
33985 .spawn(|| {
33986 let dialect = Dialect::get(DialectType::Generic);
33987 let result = dialect
33988 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::DuckDB)
33989 .unwrap();
33990 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
33991 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
33992 assert!(result[0].contains("LIST_TRANSFORM"), "Expected LIST_TRANSFORM: {}", result[0]);
33993 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
33994 assert!(result[0].contains("LIST_ZIP"), "Expected LIST_ZIP: {}", result[0]);
33995 assert!(result[0].contains("GENERATE_SERIES"), "Expected GENERATE_SERIES: {}", result[0]);
33996 assert!(result[0].contains("IS NOT DISTINCT FROM"), "Expected IS NOT DISTINCT FROM: {}", result[0]);
33997 })
33998 .unwrap();
33999 handle.join().unwrap();
34000 }
34001
34002 #[test]
34003 fn test_array_except_generic_to_snowflake() {
34004 let dialect = Dialect::get(DialectType::Generic);
34005 let result = dialect
34006 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Snowflake)
34007 .unwrap();
34008 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
34009 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
34010 }
34011
34012 #[test]
34013 fn test_array_except_generic_to_presto() {
34014 let dialect = Dialect::get(DialectType::Generic);
34015 let result = dialect
34016 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Presto)
34017 .unwrap();
34018 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
34019 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
34020 }
34021
34022 #[test]
34023 fn test_array_except_snowflake_to_duckdb() {
34024 let handle = std::thread::Builder::new()
34025 .stack_size(16 * 1024 * 1024)
34026 .spawn(|| {
34027 let dialect = Dialect::get(DialectType::Snowflake);
34028 let result = dialect
34029 .transpile_to("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
34030 .unwrap();
34031 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
34032 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34033 assert!(result[0].contains("LIST_TRANSFORM"), "Expected LIST_TRANSFORM: {}", result[0]);
34034 })
34035 .unwrap();
34036 handle.join().unwrap();
34037 }
34038
34039 #[test]
34040 fn test_array_contains_snowflake_to_snowflake() {
34041 let dialect = Dialect::get(DialectType::Snowflake);
34042 let result = dialect
34043 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::Snowflake)
34044 .unwrap();
34045 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
34046 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
34047 }
34048
34049 #[test]
34050 fn test_array_contains_snowflake_to_duckdb() {
34051 let dialect = Dialect::get(DialectType::Snowflake);
34052 let result = dialect
34053 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::DuckDB)
34054 .unwrap();
34055 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
34056 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34057 assert!(result[0].contains("NULLIF"), "Expected NULLIF: {}", result[0]);
34058 assert!(result[0].contains("ARRAY_CONTAINS"), "Expected ARRAY_CONTAINS: {}", result[0]);
34059 }
34060
34061 #[test]
34062 fn test_array_distinct_snowflake_to_duckdb() {
34063 let dialect = Dialect::get(DialectType::Snowflake);
34064 let result = dialect
34065 .transpile_to("SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])", DialectType::DuckDB)
34066 .unwrap();
34067 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
34068 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34069 assert!(result[0].contains("LIST_DISTINCT"), "Expected LIST_DISTINCT: {}", result[0]);
34070 assert!(result[0].contains("LIST_APPEND"), "Expected LIST_APPEND: {}", result[0]);
34071 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
34072 }
34073}