polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // Fast path: leaf nodes never need child traversal, apply transform directly
563 if matches!(
564 &expr,
565 Expression::Literal(_)
566 | Expression::Boolean(_)
567 | Expression::Null(_)
568 | Expression::Identifier(_)
569 | Expression::Star(_)
570 | Expression::Parameter(_)
571 | Expression::Placeholder(_)
572 | Expression::SessionParameter(_)
573 ) {
574 return transform_fn(expr);
575 }
576
577 // First recursively transform children, then apply the transform function
578 let expr = match expr {
579 Expression::Select(mut select) => {
580 select.expressions = select
581 .expressions
582 .into_iter()
583 .map(|e| transform_recursive(e, transform_fn))
584 .collect::<Result<Vec<_>>>()?;
585
586 // Transform FROM clause
587 if let Some(mut from) = select.from.take() {
588 from.expressions = from
589 .expressions
590 .into_iter()
591 .map(|e| transform_recursive(e, transform_fn))
592 .collect::<Result<Vec<_>>>()?;
593 select.from = Some(from);
594 }
595
596 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
597 select.joins = select
598 .joins
599 .into_iter()
600 .map(|mut join| {
601 join.this = transform_recursive(join.this, transform_fn)?;
602 if let Some(on) = join.on.take() {
603 join.on = Some(transform_recursive(on, transform_fn)?);
604 }
605 // Wrap join in Expression::Join to allow transform_fn to transform it
606 match transform_fn(Expression::Join(Box::new(join)))? {
607 Expression::Join(j) => Ok(*j),
608 _ => Err(crate::error::Error::parse(
609 "Join transformation returned non-join expression",
610 0,
611 0,
612 0,
613 0,
614 )),
615 }
616 })
617 .collect::<Result<Vec<_>>>()?;
618
619 // Transform LATERAL VIEW expressions (Hive/Spark)
620 select.lateral_views = select
621 .lateral_views
622 .into_iter()
623 .map(|mut lv| {
624 lv.this = transform_recursive(lv.this, transform_fn)?;
625 Ok(lv)
626 })
627 .collect::<Result<Vec<_>>>()?;
628
629 // Transform WHERE clause
630 if let Some(mut where_clause) = select.where_clause.take() {
631 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
632 select.where_clause = Some(where_clause);
633 }
634
635 // Transform GROUP BY
636 if let Some(mut group_by) = select.group_by.take() {
637 group_by.expressions = group_by
638 .expressions
639 .into_iter()
640 .map(|e| transform_recursive(e, transform_fn))
641 .collect::<Result<Vec<_>>>()?;
642 select.group_by = Some(group_by);
643 }
644
645 // Transform HAVING
646 if let Some(mut having) = select.having.take() {
647 having.this = transform_recursive(having.this, transform_fn)?;
648 select.having = Some(having);
649 }
650
651 // Transform WITH (CTEs)
652 if let Some(mut with) = select.with.take() {
653 with.ctes = with
654 .ctes
655 .into_iter()
656 .map(|mut cte| {
657 let original = cte.this.clone();
658 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
659 cte
660 })
661 .collect();
662 select.with = Some(with);
663 }
664
665 // Transform ORDER BY
666 if let Some(mut order) = select.order_by.take() {
667 order.expressions = order
668 .expressions
669 .into_iter()
670 .map(|o| {
671 let mut o = o;
672 let original = o.this.clone();
673 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
674 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
675 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
676 Ok(Expression::Ordered(transformed)) => *transformed,
677 Ok(_) | Err(_) => o,
678 }
679 })
680 .collect();
681 select.order_by = Some(order);
682 }
683
684 // Transform WINDOW clause order_by
685 if let Some(ref mut windows) = select.windows {
686 for nw in windows.iter_mut() {
687 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
688 .into_iter()
689 .map(|o| {
690 let mut o = o;
691 let original = o.this.clone();
692 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
693 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
694 Ok(Expression::Ordered(transformed)) => *transformed,
695 Ok(_) | Err(_) => o,
696 }
697 })
698 .collect();
699 }
700 }
701
702 // Transform QUALIFY
703 if let Some(mut qual) = select.qualify.take() {
704 qual.this = transform_recursive(qual.this, transform_fn)?;
705 select.qualify = Some(qual);
706 }
707
708 Expression::Select(select)
709 }
710 Expression::Function(mut f) => {
711 f.args = f
712 .args
713 .into_iter()
714 .map(|e| transform_recursive(e, transform_fn))
715 .collect::<Result<Vec<_>>>()?;
716 Expression::Function(f)
717 }
718 Expression::AggregateFunction(mut f) => {
719 f.args = f
720 .args
721 .into_iter()
722 .map(|e| transform_recursive(e, transform_fn))
723 .collect::<Result<Vec<_>>>()?;
724 if let Some(filter) = f.filter {
725 f.filter = Some(transform_recursive(filter, transform_fn)?);
726 }
727 Expression::AggregateFunction(f)
728 }
729 Expression::WindowFunction(mut wf) => {
730 wf.this = transform_recursive(wf.this, transform_fn)?;
731 wf.over.partition_by = wf
732 .over
733 .partition_by
734 .into_iter()
735 .map(|e| transform_recursive(e, transform_fn))
736 .collect::<Result<Vec<_>>>()?;
737 // Transform order_by items through Expression::Ordered wrapper
738 wf.over.order_by = wf
739 .over
740 .order_by
741 .into_iter()
742 .map(|o| {
743 let mut o = o;
744 o.this = transform_recursive(o.this, transform_fn)?;
745 match transform_fn(Expression::Ordered(Box::new(o)))? {
746 Expression::Ordered(transformed) => Ok(*transformed),
747 _ => Err(crate::error::Error::parse(
748 "Ordered transformation returned non-Ordered expression",
749 0,
750 0,
751 0,
752 0,
753 )),
754 }
755 })
756 .collect::<Result<Vec<_>>>()?;
757 Expression::WindowFunction(wf)
758 }
759 Expression::Alias(mut a) => {
760 a.this = transform_recursive(a.this, transform_fn)?;
761 Expression::Alias(a)
762 }
763 Expression::Cast(mut c) => {
764 c.this = transform_recursive(c.this, transform_fn)?;
765 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
766 c.to = transform_data_type_recursive(c.to, transform_fn)?;
767 Expression::Cast(c)
768 }
769 Expression::And(op) => transform_binary!(And, *op),
770 Expression::Or(op) => transform_binary!(Or, *op),
771 Expression::Add(op) => transform_binary!(Add, *op),
772 Expression::Sub(op) => transform_binary!(Sub, *op),
773 Expression::Mul(op) => transform_binary!(Mul, *op),
774 Expression::Div(op) => transform_binary!(Div, *op),
775 Expression::Eq(op) => transform_binary!(Eq, *op),
776 Expression::Lt(op) => transform_binary!(Lt, *op),
777 Expression::Gt(op) => transform_binary!(Gt, *op),
778 Expression::Paren(mut p) => {
779 p.this = transform_recursive(p.this, transform_fn)?;
780 Expression::Paren(p)
781 }
782 Expression::Coalesce(mut f) => {
783 f.expressions = f
784 .expressions
785 .into_iter()
786 .map(|e| transform_recursive(e, transform_fn))
787 .collect::<Result<Vec<_>>>()?;
788 Expression::Coalesce(f)
789 }
790 Expression::IfNull(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 f.expression = transform_recursive(f.expression, transform_fn)?;
793 Expression::IfNull(f)
794 }
795 Expression::Nvl(mut f) => {
796 f.this = transform_recursive(f.this, transform_fn)?;
797 f.expression = transform_recursive(f.expression, transform_fn)?;
798 Expression::Nvl(f)
799 }
800 Expression::In(mut i) => {
801 i.this = transform_recursive(i.this, transform_fn)?;
802 i.expressions = i
803 .expressions
804 .into_iter()
805 .map(|e| transform_recursive(e, transform_fn))
806 .collect::<Result<Vec<_>>>()?;
807 if let Some(query) = i.query {
808 i.query = Some(transform_recursive(query, transform_fn)?);
809 }
810 Expression::In(i)
811 }
812 Expression::Not(mut n) => {
813 n.this = transform_recursive(n.this, transform_fn)?;
814 Expression::Not(n)
815 }
816 Expression::ArraySlice(mut s) => {
817 s.this = transform_recursive(s.this, transform_fn)?;
818 if let Some(start) = s.start {
819 s.start = Some(transform_recursive(start, transform_fn)?);
820 }
821 if let Some(end) = s.end {
822 s.end = Some(transform_recursive(end, transform_fn)?);
823 }
824 Expression::ArraySlice(s)
825 }
826 Expression::Subscript(mut s) => {
827 s.this = transform_recursive(s.this, transform_fn)?;
828 s.index = transform_recursive(s.index, transform_fn)?;
829 Expression::Subscript(s)
830 }
831 Expression::Array(mut a) => {
832 a.expressions = a
833 .expressions
834 .into_iter()
835 .map(|e| transform_recursive(e, transform_fn))
836 .collect::<Result<Vec<_>>>()?;
837 Expression::Array(a)
838 }
839 Expression::Struct(mut s) => {
840 let mut new_fields = Vec::new();
841 for (name, expr) in s.fields {
842 let transformed = transform_recursive(expr, transform_fn)?;
843 new_fields.push((name, transformed));
844 }
845 s.fields = new_fields;
846 Expression::Struct(s)
847 }
848 Expression::NamedArgument(mut na) => {
849 na.value = transform_recursive(na.value, transform_fn)?;
850 Expression::NamedArgument(na)
851 }
852 Expression::MapFunc(mut m) => {
853 m.keys = m
854 .keys
855 .into_iter()
856 .map(|e| transform_recursive(e, transform_fn))
857 .collect::<Result<Vec<_>>>()?;
858 m.values = m
859 .values
860 .into_iter()
861 .map(|e| transform_recursive(e, transform_fn))
862 .collect::<Result<Vec<_>>>()?;
863 Expression::MapFunc(m)
864 }
865 Expression::ArrayFunc(mut a) => {
866 a.expressions = a
867 .expressions
868 .into_iter()
869 .map(|e| transform_recursive(e, transform_fn))
870 .collect::<Result<Vec<_>>>()?;
871 Expression::ArrayFunc(a)
872 }
873 Expression::Lambda(mut l) => {
874 l.body = transform_recursive(l.body, transform_fn)?;
875 Expression::Lambda(l)
876 }
877 Expression::JsonExtract(mut f) => {
878 f.this = transform_recursive(f.this, transform_fn)?;
879 f.path = transform_recursive(f.path, transform_fn)?;
880 Expression::JsonExtract(f)
881 }
882 Expression::JsonExtractScalar(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 f.path = transform_recursive(f.path, transform_fn)?;
885 Expression::JsonExtractScalar(f)
886 }
887
888 // ===== UnaryFunc-based expressions =====
889 // These all have a single `this: Expression` child
890 Expression::Length(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::Length(f)
893 }
894 Expression::Upper(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Upper(f)
897 }
898 Expression::Lower(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Lower(f)
901 }
902 Expression::LTrim(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::LTrim(f)
905 }
906 Expression::RTrim(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::RTrim(f)
909 }
910 Expression::Reverse(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Reverse(f)
913 }
914 Expression::Abs(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Abs(f)
917 }
918 Expression::Ceil(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Ceil(f)
921 }
922 Expression::Floor(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Floor(f)
925 }
926 Expression::Sign(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 Expression::Sign(f)
929 }
930 Expression::Sqrt(mut f) => {
931 f.this = transform_recursive(f.this, transform_fn)?;
932 Expression::Sqrt(f)
933 }
934 Expression::Cbrt(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Cbrt(f)
937 }
938 Expression::Ln(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Ln(f)
941 }
942 Expression::Log(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 if let Some(base) = f.base {
945 f.base = Some(transform_recursive(base, transform_fn)?);
946 }
947 Expression::Log(f)
948 }
949 Expression::Exp(mut f) => {
950 f.this = transform_recursive(f.this, transform_fn)?;
951 Expression::Exp(f)
952 }
953 Expression::Date(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 Expression::Date(f)
956 }
957 Expression::Stddev(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 Expression::Stddev(f)
960 }
961 Expression::Variance(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 Expression::Variance(f)
964 }
965
966 // ===== BinaryFunc-based expressions =====
967 Expression::ModFunc(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ModFunc(f)
971 }
972 Expression::Power(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::Power(f)
976 }
977 Expression::MapFromArrays(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.expression = transform_recursive(f.expression, transform_fn)?;
980 Expression::MapFromArrays(f)
981 }
982 Expression::ElementAt(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.expression = transform_recursive(f.expression, transform_fn)?;
985 Expression::ElementAt(f)
986 }
987 Expression::MapContainsKey(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.expression = transform_recursive(f.expression, transform_fn)?;
990 Expression::MapContainsKey(f)
991 }
992 Expression::Left(mut f) => {
993 f.this = transform_recursive(f.this, transform_fn)?;
994 f.length = transform_recursive(f.length, transform_fn)?;
995 Expression::Left(f)
996 }
997 Expression::Right(mut f) => {
998 f.this = transform_recursive(f.this, transform_fn)?;
999 f.length = transform_recursive(f.length, transform_fn)?;
1000 Expression::Right(f)
1001 }
1002 Expression::Repeat(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.times = transform_recursive(f.times, transform_fn)?;
1005 Expression::Repeat(f)
1006 }
1007
1008 // ===== Complex function expressions =====
1009 Expression::Substring(mut f) => {
1010 f.this = transform_recursive(f.this, transform_fn)?;
1011 f.start = transform_recursive(f.start, transform_fn)?;
1012 if let Some(len) = f.length {
1013 f.length = Some(transform_recursive(len, transform_fn)?);
1014 }
1015 Expression::Substring(f)
1016 }
1017 Expression::Replace(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 f.old = transform_recursive(f.old, transform_fn)?;
1020 f.new = transform_recursive(f.new, transform_fn)?;
1021 Expression::Replace(f)
1022 }
1023 Expression::ConcatWs(mut f) => {
1024 f.separator = transform_recursive(f.separator, transform_fn)?;
1025 f.expressions = f
1026 .expressions
1027 .into_iter()
1028 .map(|e| transform_recursive(e, transform_fn))
1029 .collect::<Result<Vec<_>>>()?;
1030 Expression::ConcatWs(f)
1031 }
1032 Expression::Trim(mut f) => {
1033 f.this = transform_recursive(f.this, transform_fn)?;
1034 if let Some(chars) = f.characters {
1035 f.characters = Some(transform_recursive(chars, transform_fn)?);
1036 }
1037 Expression::Trim(f)
1038 }
1039 Expression::Split(mut f) => {
1040 f.this = transform_recursive(f.this, transform_fn)?;
1041 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1042 Expression::Split(f)
1043 }
1044 Expression::Lpad(mut f) => {
1045 f.this = transform_recursive(f.this, transform_fn)?;
1046 f.length = transform_recursive(f.length, transform_fn)?;
1047 if let Some(fill) = f.fill {
1048 f.fill = Some(transform_recursive(fill, transform_fn)?);
1049 }
1050 Expression::Lpad(f)
1051 }
1052 Expression::Rpad(mut f) => {
1053 f.this = transform_recursive(f.this, transform_fn)?;
1054 f.length = transform_recursive(f.length, transform_fn)?;
1055 if let Some(fill) = f.fill {
1056 f.fill = Some(transform_recursive(fill, transform_fn)?);
1057 }
1058 Expression::Rpad(f)
1059 }
1060
1061 // ===== Conditional expressions =====
1062 Expression::Case(mut c) => {
1063 if let Some(operand) = c.operand {
1064 c.operand = Some(transform_recursive(operand, transform_fn)?);
1065 }
1066 c.whens = c
1067 .whens
1068 .into_iter()
1069 .map(|(cond, then)| {
1070 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1071 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1072 (new_cond, new_then)
1073 })
1074 .collect();
1075 if let Some(else_expr) = c.else_ {
1076 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1077 }
1078 Expression::Case(c)
1079 }
1080 Expression::IfFunc(mut f) => {
1081 f.condition = transform_recursive(f.condition, transform_fn)?;
1082 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1083 if let Some(false_val) = f.false_value {
1084 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1085 }
1086 Expression::IfFunc(f)
1087 }
1088
1089 // ===== Date/Time expressions =====
1090 Expression::DateAdd(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 f.interval = transform_recursive(f.interval, transform_fn)?;
1093 Expression::DateAdd(f)
1094 }
1095 Expression::DateSub(mut f) => {
1096 f.this = transform_recursive(f.this, transform_fn)?;
1097 f.interval = transform_recursive(f.interval, transform_fn)?;
1098 Expression::DateSub(f)
1099 }
1100 Expression::DateDiff(mut f) => {
1101 f.this = transform_recursive(f.this, transform_fn)?;
1102 f.expression = transform_recursive(f.expression, transform_fn)?;
1103 Expression::DateDiff(f)
1104 }
1105 Expression::DateTrunc(mut f) => {
1106 f.this = transform_recursive(f.this, transform_fn)?;
1107 Expression::DateTrunc(f)
1108 }
1109 Expression::Extract(mut f) => {
1110 f.this = transform_recursive(f.this, transform_fn)?;
1111 Expression::Extract(f)
1112 }
1113
1114 // ===== JSON expressions =====
1115 Expression::JsonObject(mut f) => {
1116 f.pairs = f
1117 .pairs
1118 .into_iter()
1119 .map(|(k, v)| {
1120 let new_k = transform_recursive(k, transform_fn)?;
1121 let new_v = transform_recursive(v, transform_fn)?;
1122 Ok((new_k, new_v))
1123 })
1124 .collect::<Result<Vec<_>>>()?;
1125 Expression::JsonObject(f)
1126 }
1127
1128 // ===== Subquery expressions =====
1129 Expression::Subquery(mut s) => {
1130 s.this = transform_recursive(s.this, transform_fn)?;
1131 Expression::Subquery(s)
1132 }
1133 Expression::Exists(mut e) => {
1134 e.this = transform_recursive(e.this, transform_fn)?;
1135 Expression::Exists(e)
1136 }
1137 Expression::Describe(mut d) => {
1138 d.target = transform_recursive(d.target, transform_fn)?;
1139 Expression::Describe(d)
1140 }
1141
1142 // ===== Set operations =====
1143 Expression::Union(mut u) => {
1144 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
1145 u.left = transform_recursive(left, transform_fn)?;
1146 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
1147 u.right = transform_recursive(right, transform_fn)?;
1148 if let Some(mut with) = u.with.take() {
1149 with.ctes = with
1150 .ctes
1151 .into_iter()
1152 .map(|mut cte| {
1153 let original = cte.this.clone();
1154 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1155 cte
1156 })
1157 .collect();
1158 u.with = Some(with);
1159 }
1160 Expression::Union(u)
1161 }
1162 Expression::Intersect(mut i) => {
1163 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
1164 i.left = transform_recursive(left, transform_fn)?;
1165 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
1166 i.right = transform_recursive(right, transform_fn)?;
1167 if let Some(mut with) = i.with.take() {
1168 with.ctes = with
1169 .ctes
1170 .into_iter()
1171 .map(|mut cte| {
1172 let original = cte.this.clone();
1173 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1174 cte
1175 })
1176 .collect();
1177 i.with = Some(with);
1178 }
1179 Expression::Intersect(i)
1180 }
1181 Expression::Except(mut e) => {
1182 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
1183 e.left = transform_recursive(left, transform_fn)?;
1184 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
1185 e.right = transform_recursive(right, transform_fn)?;
1186 if let Some(mut with) = e.with.take() {
1187 with.ctes = with
1188 .ctes
1189 .into_iter()
1190 .map(|mut cte| {
1191 let original = cte.this.clone();
1192 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1193 cte
1194 })
1195 .collect();
1196 e.with = Some(with);
1197 }
1198 Expression::Except(e)
1199 }
1200
1201 // ===== DML expressions =====
1202 Expression::Insert(mut ins) => {
1203 // Transform VALUES clause expressions
1204 let mut new_values = Vec::new();
1205 for row in ins.values {
1206 let mut new_row = Vec::new();
1207 for e in row {
1208 new_row.push(transform_recursive(e, transform_fn)?);
1209 }
1210 new_values.push(new_row);
1211 }
1212 ins.values = new_values;
1213
1214 // Transform query (for INSERT ... SELECT)
1215 if let Some(query) = ins.query {
1216 ins.query = Some(transform_recursive(query, transform_fn)?);
1217 }
1218
1219 // Transform RETURNING clause
1220 let mut new_returning = Vec::new();
1221 for e in ins.returning {
1222 new_returning.push(transform_recursive(e, transform_fn)?);
1223 }
1224 ins.returning = new_returning;
1225
1226 // Transform ON CONFLICT clause
1227 if let Some(on_conflict) = ins.on_conflict {
1228 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1229 }
1230
1231 Expression::Insert(ins)
1232 }
1233 Expression::Update(mut upd) => {
1234 upd.set = upd
1235 .set
1236 .into_iter()
1237 .map(|(id, val)| {
1238 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1239 (id, new_val)
1240 })
1241 .collect();
1242 if let Some(mut where_clause) = upd.where_clause.take() {
1243 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1244 upd.where_clause = Some(where_clause);
1245 }
1246 Expression::Update(upd)
1247 }
1248 Expression::Delete(mut del) => {
1249 if let Some(mut where_clause) = del.where_clause.take() {
1250 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1251 del.where_clause = Some(where_clause);
1252 }
1253 Expression::Delete(del)
1254 }
1255
1256 // ===== CTE expressions =====
1257 Expression::With(mut w) => {
1258 w.ctes = w
1259 .ctes
1260 .into_iter()
1261 .map(|mut cte| {
1262 let original = cte.this.clone();
1263 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1264 cte
1265 })
1266 .collect();
1267 Expression::With(w)
1268 }
1269 Expression::Cte(mut c) => {
1270 c.this = transform_recursive(c.this, transform_fn)?;
1271 Expression::Cte(c)
1272 }
1273
1274 // ===== Order expressions =====
1275 Expression::Ordered(mut o) => {
1276 o.this = transform_recursive(o.this, transform_fn)?;
1277 Expression::Ordered(o)
1278 }
1279
1280 // ===== Negation =====
1281 Expression::Neg(mut n) => {
1282 n.this = transform_recursive(n.this, transform_fn)?;
1283 Expression::Neg(n)
1284 }
1285
1286 // ===== Between =====
1287 Expression::Between(mut b) => {
1288 b.this = transform_recursive(b.this, transform_fn)?;
1289 b.low = transform_recursive(b.low, transform_fn)?;
1290 b.high = transform_recursive(b.high, transform_fn)?;
1291 Expression::Between(b)
1292 }
1293 Expression::IsNull(mut i) => {
1294 i.this = transform_recursive(i.this, transform_fn)?;
1295 Expression::IsNull(i)
1296 }
1297 Expression::IsTrue(mut i) => {
1298 i.this = transform_recursive(i.this, transform_fn)?;
1299 Expression::IsTrue(i)
1300 }
1301 Expression::IsFalse(mut i) => {
1302 i.this = transform_recursive(i.this, transform_fn)?;
1303 Expression::IsFalse(i)
1304 }
1305
1306 // ===== Like expressions =====
1307 Expression::Like(mut l) => {
1308 l.left = transform_recursive(l.left, transform_fn)?;
1309 l.right = transform_recursive(l.right, transform_fn)?;
1310 Expression::Like(l)
1311 }
1312 Expression::ILike(mut l) => {
1313 l.left = transform_recursive(l.left, transform_fn)?;
1314 l.right = transform_recursive(l.right, transform_fn)?;
1315 Expression::ILike(l)
1316 }
1317
1318 // ===== Additional binary ops not covered by macro =====
1319 Expression::Neq(op) => transform_binary!(Neq, *op),
1320 Expression::Lte(op) => transform_binary!(Lte, *op),
1321 Expression::Gte(op) => transform_binary!(Gte, *op),
1322 Expression::Mod(op) => transform_binary!(Mod, *op),
1323 Expression::Concat(op) => transform_binary!(Concat, *op),
1324 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1325 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1326 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1327 Expression::Is(op) => transform_binary!(Is, *op),
1328
1329 // ===== TryCast / SafeCast =====
1330 Expression::TryCast(mut c) => {
1331 c.this = transform_recursive(c.this, transform_fn)?;
1332 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1333 Expression::TryCast(c)
1334 }
1335 Expression::SafeCast(mut c) => {
1336 c.this = transform_recursive(c.this, transform_fn)?;
1337 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1338 Expression::SafeCast(c)
1339 }
1340
1341 // ===== Misc =====
1342 Expression::Unnest(mut f) => {
1343 f.this = transform_recursive(f.this, transform_fn)?;
1344 f.expressions = f
1345 .expressions
1346 .into_iter()
1347 .map(|e| transform_recursive(e, transform_fn))
1348 .collect::<Result<Vec<_>>>()?;
1349 Expression::Unnest(f)
1350 }
1351 Expression::Explode(mut f) => {
1352 f.this = transform_recursive(f.this, transform_fn)?;
1353 Expression::Explode(f)
1354 }
1355 Expression::GroupConcat(mut f) => {
1356 f.this = transform_recursive(f.this, transform_fn)?;
1357 Expression::GroupConcat(f)
1358 }
1359 Expression::StringAgg(mut f) => {
1360 f.this = transform_recursive(f.this, transform_fn)?;
1361 Expression::StringAgg(f)
1362 }
1363 Expression::ListAgg(mut f) => {
1364 f.this = transform_recursive(f.this, transform_fn)?;
1365 Expression::ListAgg(f)
1366 }
1367 Expression::ArrayAgg(mut f) => {
1368 f.this = transform_recursive(f.this, transform_fn)?;
1369 Expression::ArrayAgg(f)
1370 }
1371 Expression::ParseJson(mut f) => {
1372 f.this = transform_recursive(f.this, transform_fn)?;
1373 Expression::ParseJson(f)
1374 }
1375 Expression::ToJson(mut f) => {
1376 f.this = transform_recursive(f.this, transform_fn)?;
1377 Expression::ToJson(f)
1378 }
1379 Expression::JSONExtract(mut e) => {
1380 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1381 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1382 Expression::JSONExtract(e)
1383 }
1384 Expression::JSONExtractScalar(mut e) => {
1385 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1386 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1387 Expression::JSONExtractScalar(e)
1388 }
1389
1390 // StrToTime: recurse into this
1391 Expression::StrToTime(mut e) => {
1392 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1393 Expression::StrToTime(e)
1394 }
1395
1396 // UnixToTime: recurse into this
1397 Expression::UnixToTime(mut e) => {
1398 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1399 Expression::UnixToTime(e)
1400 }
1401
1402 // CreateTable: recurse into column defaults, on_update expressions, and data types
1403 Expression::CreateTable(mut ct) => {
1404 for col in &mut ct.columns {
1405 if let Some(default_expr) = col.default.take() {
1406 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1407 }
1408 if let Some(on_update_expr) = col.on_update.take() {
1409 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1410 }
1411 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1412 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1413 // contexts and may not produce correct results for DDL column definitions.
1414 // The DDL type mappings would need dedicated handling per source/target pair.
1415 }
1416 if let Some(as_select) = ct.as_select.take() {
1417 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1418 }
1419 Expression::CreateTable(ct)
1420 }
1421
1422 // CreateView: recurse into the view body query
1423 Expression::CreateView(mut cv) => {
1424 cv.query = transform_recursive(cv.query, transform_fn)?;
1425 Expression::CreateView(cv)
1426 }
1427
1428 // CreateTask: recurse into the task body
1429 Expression::CreateTask(mut ct) => {
1430 ct.body = transform_recursive(ct.body, transform_fn)?;
1431 Expression::CreateTask(ct)
1432 }
1433
1434 // CreateProcedure: recurse into body expressions
1435 Expression::CreateProcedure(mut cp) => {
1436 if let Some(body) = cp.body.take() {
1437 cp.body = Some(match body {
1438 FunctionBody::Expression(expr) => {
1439 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1440 }
1441 FunctionBody::Return(expr) => {
1442 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1443 }
1444 FunctionBody::Statements(stmts) => {
1445 let transformed_stmts = stmts
1446 .into_iter()
1447 .map(|s| transform_recursive(s, transform_fn))
1448 .collect::<Result<Vec<_>>>()?;
1449 FunctionBody::Statements(transformed_stmts)
1450 }
1451 other => other,
1452 });
1453 }
1454 Expression::CreateProcedure(cp)
1455 }
1456
1457 // CreateFunction: recurse into body expressions
1458 Expression::CreateFunction(mut cf) => {
1459 if let Some(body) = cf.body.take() {
1460 cf.body = Some(match body {
1461 FunctionBody::Expression(expr) => {
1462 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1463 }
1464 FunctionBody::Return(expr) => {
1465 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1466 }
1467 FunctionBody::Statements(stmts) => {
1468 let transformed_stmts = stmts
1469 .into_iter()
1470 .map(|s| transform_recursive(s, transform_fn))
1471 .collect::<Result<Vec<_>>>()?;
1472 FunctionBody::Statements(transformed_stmts)
1473 }
1474 other => other,
1475 });
1476 }
1477 Expression::CreateFunction(cf)
1478 }
1479
1480 // MemberOf: recurse into left and right operands
1481 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1482 // ArrayContainsAll (@>): recurse into left and right operands
1483 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1484 // ArrayContainedBy (<@): recurse into left and right operands
1485 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1486 // ArrayOverlaps (&&): recurse into left and right operands
1487 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1488 // TsMatch (@@): recurse into left and right operands
1489 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1490 // Adjacent (-|-): recurse into left and right operands
1491 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1492
1493 // Table: recurse into when (HistoricalData) and changes fields
1494 Expression::Table(mut t) => {
1495 if let Some(when) = t.when.take() {
1496 let transformed =
1497 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1498 if let Expression::HistoricalData(hd) = transformed {
1499 t.when = Some(hd);
1500 }
1501 }
1502 if let Some(changes) = t.changes.take() {
1503 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1504 if let Expression::Changes(c) = transformed {
1505 t.changes = Some(c);
1506 }
1507 }
1508 Expression::Table(t)
1509 }
1510
1511 // HistoricalData (Snowflake time travel): recurse into expression
1512 Expression::HistoricalData(mut hd) => {
1513 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1514 Expression::HistoricalData(hd)
1515 }
1516
1517 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1518 Expression::Changes(mut c) => {
1519 if let Some(at_before) = c.at_before.take() {
1520 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1521 }
1522 if let Some(end) = c.end.take() {
1523 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1524 }
1525 Expression::Changes(c)
1526 }
1527
1528 // TableArgument: TABLE(expr) or MODEL(expr)
1529 Expression::TableArgument(mut ta) => {
1530 ta.this = transform_recursive(ta.this, transform_fn)?;
1531 Expression::TableArgument(ta)
1532 }
1533
1534 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1535 Expression::JoinedTable(mut jt) => {
1536 jt.left = transform_recursive(jt.left, transform_fn)?;
1537 for join in &mut jt.joins {
1538 join.this = transform_recursive(
1539 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1540 transform_fn,
1541 )?;
1542 if let Some(on) = join.on.take() {
1543 join.on = Some(transform_recursive(on, transform_fn)?);
1544 }
1545 }
1546 jt.lateral_views = jt
1547 .lateral_views
1548 .into_iter()
1549 .map(|mut lv| {
1550 lv.this = transform_recursive(lv.this, transform_fn)?;
1551 Ok(lv)
1552 })
1553 .collect::<Result<Vec<_>>>()?;
1554 Expression::JoinedTable(jt)
1555 }
1556
1557 // Lateral: LATERAL func() - recurse into the function expression
1558 Expression::Lateral(mut lat) => {
1559 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1560 Expression::Lateral(lat)
1561 }
1562
1563 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1564 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1565 // as a unit together with the WithinGroup wrapper
1566 Expression::WithinGroup(mut wg) => {
1567 wg.order_by = wg
1568 .order_by
1569 .into_iter()
1570 .map(|mut o| {
1571 let original = o.this.clone();
1572 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1573 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1574 Ok(Expression::Ordered(transformed)) => *transformed,
1575 Ok(_) | Err(_) => o,
1576 }
1577 })
1578 .collect();
1579 Expression::WithinGroup(wg)
1580 }
1581
1582 // Filter: recurse into both the aggregate and the filter condition
1583 Expression::Filter(mut f) => {
1584 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1585 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1586 Expression::Filter(f)
1587 }
1588
1589 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1590 Expression::BitwiseOrAgg(mut f) => {
1591 f.this = transform_recursive(f.this, transform_fn)?;
1592 Expression::BitwiseOrAgg(f)
1593 }
1594 Expression::BitwiseAndAgg(mut f) => {
1595 f.this = transform_recursive(f.this, transform_fn)?;
1596 Expression::BitwiseAndAgg(f)
1597 }
1598 Expression::BitwiseXorAgg(mut f) => {
1599 f.this = transform_recursive(f.this, transform_fn)?;
1600 Expression::BitwiseXorAgg(f)
1601 }
1602 Expression::PipeOperator(mut pipe) => {
1603 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1604 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1605 Expression::PipeOperator(pipe)
1606 }
1607
1608 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1609 Expression::ArrayExcept(mut f) => {
1610 f.this = transform_recursive(f.this, transform_fn)?;
1611 f.expression = transform_recursive(f.expression, transform_fn)?;
1612 Expression::ArrayExcept(f)
1613 }
1614 Expression::ArrayContains(mut f) => {
1615 f.this = transform_recursive(f.this, transform_fn)?;
1616 f.expression = transform_recursive(f.expression, transform_fn)?;
1617 Expression::ArrayContains(f)
1618 }
1619 Expression::ArrayDistinct(mut f) => {
1620 f.this = transform_recursive(f.this, transform_fn)?;
1621 Expression::ArrayDistinct(f)
1622 }
1623 Expression::ArrayPosition(mut f) => {
1624 f.this = transform_recursive(f.this, transform_fn)?;
1625 f.expression = transform_recursive(f.expression, transform_fn)?;
1626 Expression::ArrayPosition(f)
1627 }
1628
1629 // Pass through leaf nodes unchanged
1630 other => other,
1631 };
1632
1633 // Then apply the transform function
1634 transform_fn(expr)
1635}
1636
1637/// Returns the tokenizer config, generator config, and expression transform closure
1638/// for a built-in dialect type. This is the shared implementation used by both
1639/// `Dialect::get()` and custom dialect construction.
1640// ---------------------------------------------------------------------------
1641// Cached dialect configurations
1642// ---------------------------------------------------------------------------
1643
1644/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
1645/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
1646struct CachedDialectConfig {
1647 tokenizer_config: TokenizerConfig,
1648 generator_config: Arc<GeneratorConfig>,
1649}
1650
1651/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
1652macro_rules! cached_dialect {
1653 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
1654 #[cfg(feature = $feature)]
1655 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1656 let d = $dialect_struct;
1657 CachedDialectConfig {
1658 tokenizer_config: d.tokenizer_config(),
1659 generator_config: Arc::new(d.generator_config()),
1660 }
1661 });
1662 };
1663}
1664
1665static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1666 let d = GenericDialect;
1667 CachedDialectConfig {
1668 tokenizer_config: d.tokenizer_config(),
1669 generator_config: Arc::new(d.generator_config()),
1670 }
1671});
1672
1673cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
1674cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
1675cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
1676cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
1677cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
1678cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
1679cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
1680cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
1681cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
1682cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
1683cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
1684cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
1685cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
1686cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
1687cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
1688cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
1689cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
1690cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
1691cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
1692cached_dialect!(
1693 CACHED_MATERIALIZE,
1694 MaterializeDialect,
1695 "dialect-materialize"
1696);
1697cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
1698cached_dialect!(
1699 CACHED_SINGLESTORE,
1700 SingleStoreDialect,
1701 "dialect-singlestore"
1702);
1703cached_dialect!(
1704 CACHED_COCKROACHDB,
1705 CockroachDBDialect,
1706 "dialect-cockroachdb"
1707);
1708cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
1709cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
1710cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
1711cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
1712cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
1713cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
1714cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
1715cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
1716cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
1717cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
1718
1719fn configs_for_dialect_type(
1720 dt: DialectType,
1721) -> (
1722 TokenizerConfig,
1723 Arc<GeneratorConfig>,
1724 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1725) {
1726 /// Clone configs from a cached static and pair with a fresh transform closure.
1727 macro_rules! from_cache {
1728 ($cache:expr, $dialect_struct:expr) => {{
1729 let c = &*$cache;
1730 (
1731 c.tokenizer_config.clone(),
1732 c.generator_config.clone(),
1733 Box::new(move |e| $dialect_struct.transform_expr(e)),
1734 )
1735 }};
1736 }
1737 match dt {
1738 #[cfg(feature = "dialect-postgresql")]
1739 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
1740 #[cfg(feature = "dialect-mysql")]
1741 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
1742 #[cfg(feature = "dialect-bigquery")]
1743 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
1744 #[cfg(feature = "dialect-snowflake")]
1745 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
1746 #[cfg(feature = "dialect-duckdb")]
1747 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
1748 #[cfg(feature = "dialect-tsql")]
1749 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
1750 #[cfg(feature = "dialect-oracle")]
1751 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
1752 #[cfg(feature = "dialect-hive")]
1753 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
1754 #[cfg(feature = "dialect-spark")]
1755 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
1756 #[cfg(feature = "dialect-sqlite")]
1757 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
1758 #[cfg(feature = "dialect-presto")]
1759 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
1760 #[cfg(feature = "dialect-trino")]
1761 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
1762 #[cfg(feature = "dialect-redshift")]
1763 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
1764 #[cfg(feature = "dialect-clickhouse")]
1765 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
1766 #[cfg(feature = "dialect-databricks")]
1767 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
1768 #[cfg(feature = "dialect-athena")]
1769 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
1770 #[cfg(feature = "dialect-teradata")]
1771 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
1772 #[cfg(feature = "dialect-doris")]
1773 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
1774 #[cfg(feature = "dialect-starrocks")]
1775 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
1776 #[cfg(feature = "dialect-materialize")]
1777 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
1778 #[cfg(feature = "dialect-risingwave")]
1779 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
1780 #[cfg(feature = "dialect-singlestore")]
1781 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
1782 #[cfg(feature = "dialect-cockroachdb")]
1783 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
1784 #[cfg(feature = "dialect-tidb")]
1785 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
1786 #[cfg(feature = "dialect-druid")]
1787 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
1788 #[cfg(feature = "dialect-solr")]
1789 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
1790 #[cfg(feature = "dialect-tableau")]
1791 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
1792 #[cfg(feature = "dialect-dune")]
1793 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
1794 #[cfg(feature = "dialect-fabric")]
1795 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
1796 #[cfg(feature = "dialect-drill")]
1797 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
1798 #[cfg(feature = "dialect-dremio")]
1799 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
1800 #[cfg(feature = "dialect-exasol")]
1801 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
1802 #[cfg(feature = "dialect-datafusion")]
1803 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
1804 _ => from_cache!(CACHED_GENERIC, GenericDialect),
1805 }
1806}
1807
1808// ---------------------------------------------------------------------------
1809// Custom dialect registry
1810// ---------------------------------------------------------------------------
1811
1812static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1813 LazyLock::new(|| RwLock::new(HashMap::new()));
1814
1815struct CustomDialectConfig {
1816 name: String,
1817 base_dialect: DialectType,
1818 tokenizer_config: TokenizerConfig,
1819 generator_config: GeneratorConfig,
1820 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1821 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1822}
1823
1824/// Fluent builder for creating and registering custom SQL dialects.
1825///
1826/// A custom dialect is based on an existing built-in dialect and allows selective
1827/// overrides of tokenizer configuration, generator configuration, and expression
1828/// transforms.
1829///
1830/// # Example
1831///
1832/// ```rust,ignore
1833/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1834/// use polyglot_sql::generator::NormalizeFunctions;
1835///
1836/// CustomDialectBuilder::new("my_postgres")
1837/// .based_on(DialectType::PostgreSQL)
1838/// .generator_config_modifier(|gc| {
1839/// gc.normalize_functions = NormalizeFunctions::Lower;
1840/// })
1841/// .register()
1842/// .unwrap();
1843///
1844/// let d = Dialect::get_by_name("my_postgres").unwrap();
1845/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1846/// let sql = d.generate(&exprs[0]).unwrap();
1847/// assert_eq!(sql, "select count(*)");
1848///
1849/// polyglot_sql::unregister_custom_dialect("my_postgres");
1850/// ```
1851pub struct CustomDialectBuilder {
1852 name: String,
1853 base_dialect: DialectType,
1854 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1855 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1856 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1857 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1858}
1859
1860impl CustomDialectBuilder {
1861 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1862 pub fn new(name: impl Into<String>) -> Self {
1863 Self {
1864 name: name.into(),
1865 base_dialect: DialectType::Generic,
1866 tokenizer_modifier: None,
1867 generator_modifier: None,
1868 transform: None,
1869 preprocess: None,
1870 }
1871 }
1872
1873 /// Set the base built-in dialect to inherit configuration from.
1874 pub fn based_on(mut self, dialect: DialectType) -> Self {
1875 self.base_dialect = dialect;
1876 self
1877 }
1878
1879 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1880 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1881 where
1882 F: FnOnce(&mut TokenizerConfig) + 'static,
1883 {
1884 self.tokenizer_modifier = Some(Box::new(f));
1885 self
1886 }
1887
1888 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1889 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1890 where
1891 F: FnOnce(&mut GeneratorConfig) + 'static,
1892 {
1893 self.generator_modifier = Some(Box::new(f));
1894 self
1895 }
1896
1897 /// Set a custom per-node expression transform function.
1898 ///
1899 /// This replaces the base dialect's transform. It is called on every expression
1900 /// node during the recursive transform pass.
1901 pub fn transform_fn<F>(mut self, f: F) -> Self
1902 where
1903 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1904 {
1905 self.transform = Some(Arc::new(f));
1906 self
1907 }
1908
1909 /// Set a custom whole-tree preprocessing function.
1910 ///
1911 /// This replaces the base dialect's built-in preprocessing. It is called once
1912 /// on the entire expression tree before the recursive per-node transform.
1913 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1914 where
1915 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1916 {
1917 self.preprocess = Some(Arc::new(f));
1918 self
1919 }
1920
1921 /// Build the custom dialect configuration and register it in the global registry.
1922 ///
1923 /// Returns an error if:
1924 /// - The name collides with a built-in dialect name
1925 /// - A custom dialect with the same name is already registered
1926 pub fn register(self) -> Result<()> {
1927 // Reject names that collide with built-in dialects
1928 if DialectType::from_str(&self.name).is_ok() {
1929 return Err(crate::error::Error::parse(
1930 format!(
1931 "Cannot register custom dialect '{}': name collides with built-in dialect",
1932 self.name
1933 ),
1934 0,
1935 0,
1936 0,
1937 0,
1938 ));
1939 }
1940
1941 // Get base configs
1942 let (mut tok_config, arc_gen_config, _base_transform) =
1943 configs_for_dialect_type(self.base_dialect);
1944 let mut gen_config = (*arc_gen_config).clone();
1945
1946 // Apply modifiers
1947 if let Some(tok_mod) = self.tokenizer_modifier {
1948 tok_mod(&mut tok_config);
1949 }
1950 if let Some(gen_mod) = self.generator_modifier {
1951 gen_mod(&mut gen_config);
1952 }
1953
1954 let config = CustomDialectConfig {
1955 name: self.name.clone(),
1956 base_dialect: self.base_dialect,
1957 tokenizer_config: tok_config,
1958 generator_config: gen_config,
1959 transform: self.transform,
1960 preprocess: self.preprocess,
1961 };
1962
1963 register_custom_dialect(config)
1964 }
1965}
1966
1967use std::str::FromStr;
1968
1969fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1970 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1971 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1972 })?;
1973
1974 if registry.contains_key(&config.name) {
1975 return Err(crate::error::Error::parse(
1976 format!("Custom dialect '{}' is already registered", config.name),
1977 0,
1978 0,
1979 0,
1980 0,
1981 ));
1982 }
1983
1984 registry.insert(config.name.clone(), Arc::new(config));
1985 Ok(())
1986}
1987
1988/// Remove a custom dialect from the global registry.
1989///
1990/// Returns `true` if a dialect with that name was found and removed,
1991/// `false` if no such custom dialect existed.
1992pub fn unregister_custom_dialect(name: &str) -> bool {
1993 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1994 registry.remove(name).is_some()
1995 } else {
1996 false
1997 }
1998}
1999
2000fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
2001 CUSTOM_DIALECT_REGISTRY
2002 .read()
2003 .ok()
2004 .and_then(|registry| registry.get(name).cloned())
2005}
2006
2007/// Main entry point for dialect-specific SQL operations.
2008///
2009/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
2010/// transformer for a specific SQL database engine. It is the high-level API through
2011/// which callers parse, generate, transform, and transpile SQL.
2012///
2013/// # Usage
2014///
2015/// ```rust,ignore
2016/// use polyglot_sql::dialects::{Dialect, DialectType};
2017///
2018/// // Parse PostgreSQL SQL into an AST
2019/// let pg = Dialect::get(DialectType::PostgreSQL);
2020/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
2021///
2022/// // Transpile from PostgreSQL to BigQuery
2023/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
2024/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
2025/// ```
2026///
2027/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
2028/// The struct is `Send + Sync` safe so it can be shared across threads.
2029pub struct Dialect {
2030 dialect_type: DialectType,
2031 tokenizer: Tokenizer,
2032 generator_config: Arc<GeneratorConfig>,
2033 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2034 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
2035 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
2036 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
2037 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2038}
2039
2040impl Dialect {
2041 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
2042 ///
2043 /// This is the primary constructor. It initializes the tokenizer, generator config,
2044 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
2045 /// For hybrid dialects like Athena, it also sets up expression-specific generator
2046 /// config routing.
2047 pub fn get(dialect_type: DialectType) -> Self {
2048 let (tokenizer_config, generator_config, transformer) =
2049 configs_for_dialect_type(dialect_type);
2050
2051 // Set up expression-specific generator config for hybrid dialects
2052 let generator_config_for_expr: Option<
2053 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
2054 > = match dialect_type {
2055 #[cfg(feature = "dialect-athena")]
2056 DialectType::Athena => Some(Box::new(|expr| {
2057 AthenaDialect.generator_config_for_expr(expr)
2058 })),
2059 _ => None,
2060 };
2061
2062 Self {
2063 dialect_type,
2064 tokenizer: Tokenizer::new(tokenizer_config),
2065 generator_config,
2066 transformer,
2067 generator_config_for_expr,
2068 custom_preprocess: None,
2069 }
2070 }
2071
2072 /// Look up a dialect by string name.
2073 ///
2074 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
2075 /// falls back to the custom dialect registry. Returns `None` if no dialect
2076 /// with the given name exists.
2077 pub fn get_by_name(name: &str) -> Option<Self> {
2078 // Try built-in first
2079 if let Ok(dt) = DialectType::from_str(name) {
2080 return Some(Self::get(dt));
2081 }
2082
2083 // Try custom registry
2084 let config = get_custom_dialect_config(name)?;
2085 Some(Self::from_custom_config(&config))
2086 }
2087
2088 /// Construct a `Dialect` from a custom dialect configuration.
2089 fn from_custom_config(config: &CustomDialectConfig) -> Self {
2090 // Build the transformer: use custom if provided, else use base dialect's
2091 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
2092 if let Some(ref custom_transform) = config.transform {
2093 let t = Arc::clone(custom_transform);
2094 Box::new(move |e| t(e))
2095 } else {
2096 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
2097 base_transform
2098 };
2099
2100 // Build the custom preprocess: use custom if provided
2101 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
2102 config.preprocess.as_ref().map(|p| {
2103 let p = Arc::clone(p);
2104 Box::new(move |e: Expression| p(e))
2105 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
2106 });
2107
2108 Self {
2109 dialect_type: config.base_dialect,
2110 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
2111 generator_config: Arc::new(config.generator_config.clone()),
2112 transformer,
2113 generator_config_for_expr: None,
2114 custom_preprocess,
2115 }
2116 }
2117
2118 /// Get the dialect type
2119 pub fn dialect_type(&self) -> DialectType {
2120 self.dialect_type
2121 }
2122
2123 /// Get the generator configuration
2124 pub fn generator_config(&self) -> &GeneratorConfig {
2125 &self.generator_config
2126 }
2127
2128 /// Parses a SQL string into a list of [`Expression`] AST nodes.
2129 ///
2130 /// The input may contain multiple semicolon-separated statements; each one
2131 /// produces a separate element in the returned vector. Tokenization uses
2132 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
2133 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
2134 let tokens = self.tokenizer.tokenize(sql)?;
2135 let config = crate::parser::ParserConfig {
2136 dialect: Some(self.dialect_type),
2137 ..Default::default()
2138 };
2139 let mut parser = Parser::with_source(tokens, config, sql.to_string());
2140 parser.parse()
2141 }
2142
2143 /// Tokenize SQL using this dialect's tokenizer configuration.
2144 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
2145 self.tokenizer.tokenize(sql)
2146 }
2147
2148 /// Get the generator config for a specific expression (supports hybrid dialects).
2149 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
2150 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
2151 if let Some(ref config_fn) = self.generator_config_for_expr {
2152 config_fn(expr)
2153 } else {
2154 (*self.generator_config).clone()
2155 }
2156 }
2157
2158 /// Generates a SQL string from an [`Expression`] AST node.
2159 ///
2160 /// The output uses this dialect's generator configuration for identifier quoting,
2161 /// keyword casing, function name normalization, and syntax style. The result is
2162 /// a single-line (non-pretty) SQL string.
2163 pub fn generate(&self, expr: &Expression) -> Result<String> {
2164 // Fast path: when no per-expression config override, share the Arc cheaply.
2165 if self.generator_config_for_expr.is_none() {
2166 let mut generator = Generator::with_arc_config(self.generator_config.clone());
2167 return generator.generate(expr);
2168 }
2169 let config = self.get_config_for_expr(expr);
2170 let mut generator = Generator::with_config(config);
2171 generator.generate(expr)
2172 }
2173
2174 /// Generate SQL from an expression with pretty printing enabled
2175 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2176 let mut config = self.get_config_for_expr(expr);
2177 config.pretty = true;
2178 let mut generator = Generator::with_config(config);
2179 generator.generate(expr)
2180 }
2181
2182 /// Generate SQL from an expression with source dialect info (for transpilation)
2183 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2184 let mut config = self.get_config_for_expr(expr);
2185 config.source_dialect = Some(source);
2186 let mut generator = Generator::with_config(config);
2187 generator.generate(expr)
2188 }
2189
2190 /// Generate SQL from an expression with pretty printing and source dialect info
2191 pub fn generate_pretty_with_source(
2192 &self,
2193 expr: &Expression,
2194 source: DialectType,
2195 ) -> Result<String> {
2196 let mut config = self.get_config_for_expr(expr);
2197 config.pretty = true;
2198 config.source_dialect = Some(source);
2199 let mut generator = Generator::with_config(config);
2200 generator.generate(expr)
2201 }
2202
2203 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2204 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2205 let mut config = self.get_config_for_expr(expr);
2206 config.always_quote_identifiers = true;
2207 let mut generator = Generator::with_config(config);
2208 generator.generate(expr)
2209 }
2210
2211 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2212 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2213 let mut config = (*self.generator_config).clone();
2214 config.pretty = true;
2215 config.always_quote_identifiers = true;
2216 let mut generator = Generator::with_config(config);
2217 generator.generate(expr)
2218 }
2219
2220 /// Generate SQL from an expression with caller-specified config overrides
2221 pub fn generate_with_overrides(
2222 &self,
2223 expr: &Expression,
2224 overrides: impl FnOnce(&mut GeneratorConfig),
2225 ) -> Result<String> {
2226 let mut config = self.get_config_for_expr(expr);
2227 overrides(&mut config);
2228 let mut generator = Generator::with_config(config);
2229 generator.generate(expr)
2230 }
2231
2232 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2233 ///
2234 /// The transformation proceeds in two phases:
2235 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2236 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2237 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2238 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2239 ///
2240 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2241 /// and for identity transforms (normalizing SQL within the same dialect).
2242 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2243 // Apply preprocessing transforms based on dialect
2244 let preprocessed = self.preprocess(expr)?;
2245 // Then apply recursive transformation
2246 transform_recursive(preprocessed, &self.transformer)
2247 }
2248
2249 /// Apply dialect-specific preprocessing transforms
2250 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2251 // If a custom preprocess function is set, use it instead of the built-in logic
2252 if let Some(ref custom_preprocess) = self.custom_preprocess {
2253 return custom_preprocess(expr);
2254 }
2255
2256 #[cfg(any(
2257 feature = "dialect-mysql",
2258 feature = "dialect-postgresql",
2259 feature = "dialect-bigquery",
2260 feature = "dialect-snowflake",
2261 feature = "dialect-tsql",
2262 feature = "dialect-spark",
2263 feature = "dialect-databricks",
2264 feature = "dialect-hive",
2265 feature = "dialect-sqlite",
2266 feature = "dialect-trino",
2267 feature = "dialect-presto",
2268 feature = "dialect-duckdb",
2269 feature = "dialect-redshift",
2270 feature = "dialect-starrocks",
2271 feature = "dialect-oracle",
2272 feature = "dialect-clickhouse",
2273 ))]
2274 use crate::transforms;
2275
2276 match self.dialect_type {
2277 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2278 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2279 #[cfg(feature = "dialect-mysql")]
2280 DialectType::MySQL => {
2281 let expr = transforms::eliminate_qualify(expr)?;
2282 let expr = transforms::eliminate_full_outer_join(expr)?;
2283 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2284 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2285 Ok(expr)
2286 }
2287 // PostgreSQL doesn't support QUALIFY
2288 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2289 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2290 #[cfg(feature = "dialect-postgresql")]
2291 DialectType::PostgreSQL => {
2292 let expr = transforms::eliminate_qualify(expr)?;
2293 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2294 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2295 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2296 // Only normalize when sqlglot would fully parse (no body) —
2297 // sqlglot falls back to Command for complex function bodies,
2298 // preserving the original text including TO.
2299 let expr = if let Expression::CreateFunction(mut cf) = expr {
2300 if cf.body.is_none() {
2301 for opt in &mut cf.set_options {
2302 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2303 &mut opt.value
2304 {
2305 *use_to = false;
2306 }
2307 }
2308 }
2309 Expression::CreateFunction(cf)
2310 } else {
2311 expr
2312 };
2313 Ok(expr)
2314 }
2315 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2316 #[cfg(feature = "dialect-bigquery")]
2317 DialectType::BigQuery => {
2318 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2319 let expr = transforms::pushdown_cte_column_names(expr)?;
2320 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2321 Ok(expr)
2322 }
2323 // Snowflake
2324 #[cfg(feature = "dialect-snowflake")]
2325 DialectType::Snowflake => {
2326 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2327 let expr = transforms::eliminate_window_clause(expr)?;
2328 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2329 Ok(expr)
2330 }
2331 // TSQL doesn't support QUALIFY
2332 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2333 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2334 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2335 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2336 #[cfg(feature = "dialect-tsql")]
2337 DialectType::TSQL => {
2338 let expr = transforms::eliminate_qualify(expr)?;
2339 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2340 let expr = transforms::ensure_bools(expr)?;
2341 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2342 let expr = transforms::move_ctes_to_top_level(expr)?;
2343 let expr = transforms::qualify_derived_table_outputs(expr)?;
2344 Ok(expr)
2345 }
2346 // Spark doesn't support QUALIFY (but Databricks does)
2347 // Spark doesn't support CTEs in subqueries (hoist to top level)
2348 #[cfg(feature = "dialect-spark")]
2349 DialectType::Spark => {
2350 let expr = transforms::eliminate_qualify(expr)?;
2351 let expr = transforms::add_auto_table_alias(expr)?;
2352 let expr = transforms::simplify_nested_paren_values(expr)?;
2353 let expr = transforms::move_ctes_to_top_level(expr)?;
2354 Ok(expr)
2355 }
2356 // Databricks supports QUALIFY natively
2357 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2358 #[cfg(feature = "dialect-databricks")]
2359 DialectType::Databricks => {
2360 let expr = transforms::add_auto_table_alias(expr)?;
2361 let expr = transforms::simplify_nested_paren_values(expr)?;
2362 let expr = transforms::move_ctes_to_top_level(expr)?;
2363 Ok(expr)
2364 }
2365 // Hive doesn't support QUALIFY or CTEs in subqueries
2366 #[cfg(feature = "dialect-hive")]
2367 DialectType::Hive => {
2368 let expr = transforms::eliminate_qualify(expr)?;
2369 let expr = transforms::move_ctes_to_top_level(expr)?;
2370 Ok(expr)
2371 }
2372 // SQLite doesn't support QUALIFY
2373 #[cfg(feature = "dialect-sqlite")]
2374 DialectType::SQLite => {
2375 let expr = transforms::eliminate_qualify(expr)?;
2376 Ok(expr)
2377 }
2378 // Trino doesn't support QUALIFY
2379 #[cfg(feature = "dialect-trino")]
2380 DialectType::Trino => {
2381 let expr = transforms::eliminate_qualify(expr)?;
2382 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2383 Ok(expr)
2384 }
2385 // Presto doesn't support QUALIFY or WINDOW clause
2386 #[cfg(feature = "dialect-presto")]
2387 DialectType::Presto => {
2388 let expr = transforms::eliminate_qualify(expr)?;
2389 let expr = transforms::eliminate_window_clause(expr)?;
2390 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2391 Ok(expr)
2392 }
2393 // DuckDB supports QUALIFY - no elimination needed
2394 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2395 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2396 #[cfg(feature = "dialect-duckdb")]
2397 DialectType::DuckDB => {
2398 let expr = transforms::expand_posexplode_duckdb(expr)?;
2399 let expr = transforms::expand_like_any(expr)?;
2400 Ok(expr)
2401 }
2402 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2403 #[cfg(feature = "dialect-redshift")]
2404 DialectType::Redshift => {
2405 let expr = transforms::eliminate_qualify(expr)?;
2406 let expr = transforms::eliminate_window_clause(expr)?;
2407 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2408 Ok(expr)
2409 }
2410 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2411 #[cfg(feature = "dialect-starrocks")]
2412 DialectType::StarRocks => {
2413 let expr = transforms::eliminate_qualify(expr)?;
2414 let expr = transforms::expand_between_in_delete(expr)?;
2415 Ok(expr)
2416 }
2417 // DataFusion supports QUALIFY and semi/anti joins natively
2418 #[cfg(feature = "dialect-datafusion")]
2419 DialectType::DataFusion => Ok(expr),
2420 // Oracle doesn't support QUALIFY
2421 #[cfg(feature = "dialect-oracle")]
2422 DialectType::Oracle => {
2423 let expr = transforms::eliminate_qualify(expr)?;
2424 Ok(expr)
2425 }
2426 // Drill - no special preprocessing needed
2427 #[cfg(feature = "dialect-drill")]
2428 DialectType::Drill => Ok(expr),
2429 // Teradata - no special preprocessing needed
2430 #[cfg(feature = "dialect-teradata")]
2431 DialectType::Teradata => Ok(expr),
2432 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2433 #[cfg(feature = "dialect-clickhouse")]
2434 DialectType::ClickHouse => {
2435 let expr = transforms::no_limit_order_by_union(expr)?;
2436 Ok(expr)
2437 }
2438 // Other dialects - no preprocessing
2439 _ => Ok(expr),
2440 }
2441 }
2442
2443 /// Transpile SQL from this dialect to another
2444 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2445 self.transpile_to_inner(sql, target, false)
2446 }
2447
2448 /// Transpile SQL from this dialect to another with pretty printing enabled
2449 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2450 self.transpile_to_inner(sql, target, true)
2451 }
2452
2453 #[cfg(not(feature = "transpile"))]
2454 fn transpile_to_inner(
2455 &self,
2456 sql: &str,
2457 target: DialectType,
2458 pretty: bool,
2459 ) -> Result<Vec<String>> {
2460 // Without the transpile feature, only same-dialect or to/from generic is supported
2461 if self.dialect_type != target
2462 && self.dialect_type != DialectType::Generic
2463 && target != DialectType::Generic
2464 {
2465 return Err(crate::error::Error::parse(
2466 "Cross-dialect transpilation not available in this build",
2467 0,
2468 0,
2469 0,
2470 0,
2471 ));
2472 }
2473
2474 let expressions = self.parse(sql)?;
2475 let target_dialect = Dialect::get(target);
2476 let generic_identity =
2477 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2478
2479 if generic_identity {
2480 return expressions
2481 .into_iter()
2482 .map(|expr| {
2483 if pretty {
2484 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2485 } else {
2486 target_dialect.generate_with_source(&expr, self.dialect_type)
2487 }
2488 })
2489 .collect();
2490 }
2491
2492 expressions
2493 .into_iter()
2494 .map(|expr| {
2495 let transformed = target_dialect.transform(expr)?;
2496 if pretty {
2497 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2498 } else {
2499 target_dialect.generate_with_source(&transformed, self.dialect_type)
2500 }
2501 })
2502 .collect()
2503 }
2504
2505 #[cfg(feature = "transpile")]
2506 fn transpile_to_inner(
2507 &self,
2508 sql: &str,
2509 target: DialectType,
2510 pretty: bool,
2511 ) -> Result<Vec<String>> {
2512 let expressions = self.parse(sql)?;
2513 let target_dialect = Dialect::get(target);
2514 let generic_identity =
2515 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2516
2517 if generic_identity {
2518 return expressions
2519 .into_iter()
2520 .map(|expr| {
2521 if pretty {
2522 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2523 } else {
2524 target_dialect.generate_with_source(&expr, self.dialect_type)
2525 }
2526 })
2527 .collect();
2528 }
2529
2530 expressions
2531 .into_iter()
2532 .map(|expr| {
2533 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2534 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2535 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2536 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2537 use crate::expressions::DataType as DT;
2538 transform_recursive(expr, &|e| match e {
2539 Expression::DataType(DT::VarChar { .. }) => {
2540 Ok(Expression::DataType(DT::Text))
2541 }
2542 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2543 _ => Ok(e),
2544 })?
2545 } else {
2546 expr
2547 };
2548
2549 // When source and target differ, first normalize the source dialect's
2550 // AST constructs to standard SQL, so that the target dialect can handle them.
2551 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2552 let normalized =
2553 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2554 self.transform(expr)?
2555 } else {
2556 expr
2557 };
2558
2559 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2560 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2561 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2562 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2563 let normalized =
2564 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2565 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2566 {
2567 transform_recursive(normalized, &|e| {
2568 if let Expression::Function(ref f) = e {
2569 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2570 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2571 if let (
2572 Expression::Function(ref jq),
2573 Expression::Function(ref jv),
2574 ) = (&f.args[0], &f.args[1])
2575 {
2576 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2577 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2578 {
2579 // Unwrap: return just JSON_QUERY(...)
2580 return Ok(f.args[0].clone());
2581 }
2582 }
2583 }
2584 }
2585 Ok(e)
2586 })?
2587 } else {
2588 normalized
2589 };
2590
2591 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2592 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2593 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2594 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2595 && !matches!(target, DialectType::Snowflake)
2596 {
2597 transform_recursive(normalized, &|e| {
2598 if let Expression::Function(ref f) = e {
2599 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2600 return Ok(Expression::Localtime(Box::new(
2601 crate::expressions::Localtime { this: None },
2602 )));
2603 }
2604 }
2605 Ok(e)
2606 })?
2607 } else {
2608 normalized
2609 };
2610
2611 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2612 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2613 // transform. DuckDB requires the count argument to be BIGINT.
2614 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2615 && matches!(target, DialectType::DuckDB)
2616 {
2617 transform_recursive(normalized, &|e| {
2618 if let Expression::Function(ref f) = e {
2619 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2620 // Check if first arg is space string literal
2621 if let Expression::Literal(ref lit) = f.args[0] {
2622 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
2623 {
2624 if s == " " {
2625 // Wrap second arg in CAST(... AS BIGINT) if not already
2626 if !matches!(f.args[1], Expression::Cast(_)) {
2627 let mut new_args = f.args.clone();
2628 new_args[1] = Expression::Cast(Box::new(
2629 crate::expressions::Cast {
2630 this: new_args[1].clone(),
2631 to: crate::expressions::DataType::BigInt {
2632 length: None,
2633 },
2634 trailing_comments: Vec::new(),
2635 double_colon_syntax: false,
2636 format: None,
2637 default: None,
2638 inferred_type: None,
2639 },
2640 ));
2641 return Ok(Expression::Function(Box::new(
2642 crate::expressions::Function {
2643 name: f.name.clone(),
2644 args: new_args,
2645 distinct: f.distinct,
2646 trailing_comments: f
2647 .trailing_comments
2648 .clone(),
2649 use_bracket_syntax: f.use_bracket_syntax,
2650 no_parens: f.no_parens,
2651 quoted: f.quoted,
2652 span: None,
2653 inferred_type: None,
2654 },
2655 )));
2656 }
2657 }
2658 }
2659 }
2660 }
2661 }
2662 Ok(e)
2663 })?
2664 } else {
2665 normalized
2666 };
2667
2668 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2669 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2670 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2671 && !matches!(target, DialectType::BigQuery)
2672 {
2673 crate::transforms::propagate_struct_field_names(normalized)?
2674 } else {
2675 normalized
2676 };
2677
2678 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
2679 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
2680 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
2681 // functions handle their generator args differently (as float seeds).
2682 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2683 && matches!(target, DialectType::DuckDB)
2684 {
2685 fn make_scaled_random() -> Expression {
2686 let lower =
2687 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2688 "-9.223372036854776E+18".to_string(),
2689 )));
2690 let upper =
2691 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2692 "9.223372036854776e+18".to_string(),
2693 )));
2694 let random_call = Expression::Random(crate::expressions::Random);
2695 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
2696 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
2697 left: upper,
2698 right: lower.clone(),
2699 left_comments: vec![],
2700 operator_comments: vec![],
2701 trailing_comments: vec![],
2702 inferred_type: None,
2703 })),
2704 trailing_comments: vec![],
2705 }));
2706 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
2707 left: random_call,
2708 right: range_size,
2709 left_comments: vec![],
2710 operator_comments: vec![],
2711 trailing_comments: vec![],
2712 inferred_type: None,
2713 }));
2714 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
2715 left: lower,
2716 right: scaled,
2717 left_comments: vec![],
2718 operator_comments: vec![],
2719 trailing_comments: vec![],
2720 inferred_type: None,
2721 }));
2722 Expression::Cast(Box::new(crate::expressions::Cast {
2723 this: shifted,
2724 to: crate::expressions::DataType::BigInt { length: None },
2725 trailing_comments: vec![],
2726 double_colon_syntax: false,
2727 format: None,
2728 default: None,
2729 inferred_type: None,
2730 }))
2731 }
2732
2733 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
2734 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
2735 // This prevents transform_recursive (which is bottom-up) from expanding
2736 // seeded RANDOM into make_scaled_random() and losing the seed value.
2737 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
2738 // and then un-expanded back to Expression::Random by the code below.
2739 let normalized = transform_recursive(normalized, &|e| {
2740 if let Expression::Function(ref f) = e {
2741 let n = f.name.to_ascii_uppercase();
2742 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
2743 if let Expression::Function(mut f) = e {
2744 for arg in f.args.iter_mut() {
2745 if let Expression::Rand(ref r) = arg {
2746 if r.lower.is_none() && r.upper.is_none() {
2747 if let Some(ref seed) = r.seed {
2748 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
2749 // so it won't be expanded by the RANDOM expansion below
2750 *arg = Expression::Function(Box::new(
2751 crate::expressions::Function::new(
2752 "RANDOM".to_string(),
2753 vec![*seed.clone()],
2754 ),
2755 ));
2756 }
2757 }
2758 }
2759 }
2760 return Ok(Expression::Function(f));
2761 }
2762 }
2763 }
2764 Ok(e)
2765 })?;
2766
2767 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
2768 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
2769 // we see the parent. We detect this and undo the expansion by replacing
2770 // the expanded pattern back with Expression::Random.
2771 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
2772 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
2773 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
2774 transform_recursive(normalized, &|e| {
2775 if let Expression::Function(ref f) = e {
2776 let n = f.name.to_ascii_uppercase();
2777 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
2778 if let Expression::Function(mut f) = e {
2779 for arg in f.args.iter_mut() {
2780 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
2781 if let Expression::Cast(ref cast) = arg {
2782 if matches!(
2783 cast.to,
2784 crate::expressions::DataType::BigInt { .. }
2785 ) {
2786 if let Expression::Add(ref add) = cast.this {
2787 if let Expression::Literal(ref lit) = add.left {
2788 if let crate::expressions::Literal::Number(
2789 ref num,
2790 ) = lit.as_ref()
2791 {
2792 if num == "-9.223372036854776E+18" {
2793 *arg = Expression::Random(
2794 crate::expressions::Random,
2795 );
2796 }
2797 }
2798 }
2799 }
2800 }
2801 }
2802 }
2803 return Ok(Expression::Function(f));
2804 }
2805 return Ok(e);
2806 }
2807 }
2808 match e {
2809 Expression::Random(_) => Ok(make_scaled_random()),
2810 // Rand(seed) with no bounds: drop seed and expand
2811 // (DuckDB RANDOM doesn't support seeds)
2812 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
2813 Ok(make_scaled_random())
2814 }
2815 _ => Ok(e),
2816 }
2817 })?
2818 } else {
2819 normalized
2820 };
2821
2822 // Apply cross-dialect semantic normalizations
2823 let normalized =
2824 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2825
2826 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2827 // (SELECT UNNEST(..., max_depth => 2)) subquery
2828 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2829 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2830 && matches!(target, DialectType::DuckDB)
2831 {
2832 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2833 } else {
2834 normalized
2835 };
2836
2837 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2838 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2839 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2840 && matches!(
2841 target,
2842 DialectType::DuckDB
2843 | DialectType::Presto
2844 | DialectType::Trino
2845 | DialectType::Athena
2846 | DialectType::Spark
2847 | DialectType::Databricks
2848 ) {
2849 crate::transforms::unnest_alias_to_column_alias(normalized)?
2850 } else if matches!(self.dialect_type, DialectType::BigQuery)
2851 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2852 {
2853 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2854 // but don't convert alias format (no _t0 wrapper)
2855 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2856 // For Redshift: strip UNNEST when arg is a column reference path
2857 if matches!(target, DialectType::Redshift) {
2858 crate::transforms::strip_unnest_column_refs(result)?
2859 } else {
2860 result
2861 }
2862 } else {
2863 normalized
2864 };
2865
2866 // For Presto/Trino targets from PostgreSQL/Redshift source:
2867 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2868 let normalized = if matches!(
2869 self.dialect_type,
2870 DialectType::PostgreSQL | DialectType::Redshift
2871 ) && matches!(
2872 target,
2873 DialectType::Presto | DialectType::Trino | DialectType::Athena
2874 ) {
2875 crate::transforms::wrap_unnest_join_aliases(normalized)?
2876 } else {
2877 normalized
2878 };
2879
2880 // Eliminate DISTINCT ON with target-dialect awareness
2881 // This must happen after source transform (which may produce DISTINCT ON)
2882 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2883 let normalized =
2884 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2885
2886 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2887 let normalized = if matches!(target, DialectType::Snowflake) {
2888 Self::transform_generate_date_array_snowflake(normalized)?
2889 } else {
2890 normalized
2891 };
2892
2893 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2894 let normalized = if matches!(
2895 target,
2896 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2897 ) {
2898 crate::transforms::unnest_to_explode_select(normalized)?
2899 } else {
2900 normalized
2901 };
2902
2903 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2904 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2905 crate::transforms::no_limit_order_by_union(normalized)?
2906 } else {
2907 normalized
2908 };
2909
2910 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2911 // Python sqlglot does this in the TSQL generator, but we can't do it there
2912 // because it would break TSQL -> TSQL identity
2913 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2914 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2915 {
2916 transform_recursive(normalized, &|e| {
2917 if let Expression::Count(ref c) = e {
2918 // Build COUNT_BIG(...) as an AggregateFunction
2919 let args = if c.star {
2920 vec![Expression::Star(crate::expressions::Star {
2921 table: None,
2922 except: None,
2923 replace: None,
2924 rename: None,
2925 trailing_comments: Vec::new(),
2926 span: None,
2927 })]
2928 } else if let Some(ref this) = c.this {
2929 vec![this.clone()]
2930 } else {
2931 vec![]
2932 };
2933 Ok(Expression::AggregateFunction(Box::new(
2934 crate::expressions::AggregateFunction {
2935 name: "COUNT_BIG".to_string(),
2936 args,
2937 distinct: c.distinct,
2938 filter: c.filter.clone(),
2939 order_by: Vec::new(),
2940 limit: None,
2941 ignore_nulls: None,
2942 inferred_type: None,
2943 },
2944 )))
2945 } else {
2946 Ok(e)
2947 }
2948 })?
2949 } else {
2950 normalized
2951 };
2952
2953 let transformed = target_dialect.transform(normalized)?;
2954
2955 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
2956 let transformed = if matches!(target, DialectType::DuckDB) {
2957 Self::seq_rownum_to_range(transformed)?
2958 } else {
2959 transformed
2960 };
2961
2962 let mut sql = if pretty {
2963 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2964 } else {
2965 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2966 };
2967
2968 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2969 if pretty && target == DialectType::Snowflake {
2970 sql = Self::normalize_snowflake_pretty(sql);
2971 }
2972
2973 Ok(sql)
2974 })
2975 .collect()
2976 }
2977}
2978
2979// Transpile-only methods: cross-dialect normalization and helpers
2980#[cfg(feature = "transpile")]
2981impl Dialect {
2982 /// For DuckDB target: when FROM clause contains RANGE(n), replace
2983 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
2984 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
2985 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
2986 if let Expression::Select(mut select) = expr {
2987 // Check if FROM contains a RANGE function
2988 let has_range_from = if let Some(ref from) = select.from {
2989 from.expressions.iter().any(|e| {
2990 // Check for direct RANGE(...) or aliased RANGE(...)
2991 match e {
2992 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
2993 Expression::Alias(a) => {
2994 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
2995 }
2996 _ => false,
2997 }
2998 })
2999 } else {
3000 false
3001 };
3002
3003 if has_range_from {
3004 // Replace the ROW_NUMBER pattern in select expressions
3005 select.expressions = select
3006 .expressions
3007 .into_iter()
3008 .map(|e| Self::replace_rownum_with_range(e))
3009 .collect();
3010 }
3011
3012 Ok(Expression::Select(select))
3013 } else {
3014 Ok(expr)
3015 }
3016 }
3017
3018 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
3019 fn replace_rownum_with_range(expr: Expression) -> Expression {
3020 match expr {
3021 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
3022 Expression::Mod(op) => {
3023 let new_left = Self::try_replace_rownum_paren(&op.left);
3024 Expression::Mod(Box::new(crate::expressions::BinaryOp {
3025 left: new_left,
3026 right: op.right,
3027 left_comments: op.left_comments,
3028 operator_comments: op.operator_comments,
3029 trailing_comments: op.trailing_comments,
3030 inferred_type: op.inferred_type,
3031 }))
3032 }
3033 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
3034 Expression::Paren(p) => {
3035 let inner = Self::replace_rownum_with_range(p.this);
3036 Expression::Paren(Box::new(crate::expressions::Paren {
3037 this: inner,
3038 trailing_comments: p.trailing_comments,
3039 }))
3040 }
3041 Expression::Case(mut c) => {
3042 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
3043 c.whens = c
3044 .whens
3045 .into_iter()
3046 .map(|(cond, then)| {
3047 (
3048 Self::replace_rownum_with_range(cond),
3049 Self::replace_rownum_with_range(then),
3050 )
3051 })
3052 .collect();
3053 if let Some(else_) = c.else_ {
3054 c.else_ = Some(Self::replace_rownum_with_range(else_));
3055 }
3056 Expression::Case(c)
3057 }
3058 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
3059 left: Self::replace_rownum_with_range(op.left),
3060 right: op.right,
3061 left_comments: op.left_comments,
3062 operator_comments: op.operator_comments,
3063 trailing_comments: op.trailing_comments,
3064 inferred_type: op.inferred_type,
3065 })),
3066 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
3067 left: Self::replace_rownum_with_range(op.left),
3068 right: op.right,
3069 left_comments: op.left_comments,
3070 operator_comments: op.operator_comments,
3071 trailing_comments: op.trailing_comments,
3072 inferred_type: op.inferred_type,
3073 })),
3074 Expression::Alias(mut a) => {
3075 a.this = Self::replace_rownum_with_range(a.this);
3076 Expression::Alias(a)
3077 }
3078 other => other,
3079 }
3080 }
3081
3082 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
3083 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
3084 if let Expression::Paren(ref p) = expr {
3085 if let Expression::Sub(ref sub) = p.this {
3086 if let Expression::WindowFunction(ref wf) = sub.left {
3087 if let Expression::Function(ref f) = wf.this {
3088 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
3089 if let Expression::Literal(ref lit) = sub.right {
3090 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
3091 if n == "1" {
3092 return Expression::column("range");
3093 }
3094 }
3095 }
3096 }
3097 }
3098 }
3099 }
3100 }
3101 expr.clone()
3102 }
3103
3104 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
3105 /// Converts:
3106 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
3107 /// To:
3108 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
3109 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
3110 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
3111 use crate::expressions::*;
3112 transform_recursive(expr, &|e| {
3113 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
3114 if let Expression::ArraySize(ref af) = e {
3115 if let Expression::Function(ref f) = af.this {
3116 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3117 let result = Self::convert_array_size_gda_snowflake(f)?;
3118 return Ok(result);
3119 }
3120 }
3121 }
3122
3123 let Expression::Select(mut sel) = e else {
3124 return Ok(e);
3125 };
3126
3127 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
3128 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
3129 let mut gda_join_idx: Option<usize> = None;
3130
3131 for (idx, join) in sel.joins.iter().enumerate() {
3132 // The join.this may be:
3133 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
3134 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
3135 let (unnest_ref, alias_name) = match &join.this {
3136 Expression::Unnest(ref unnest) => {
3137 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
3138 (Some(unnest.as_ref()), alias)
3139 }
3140 Expression::Alias(ref a) => {
3141 if let Expression::Unnest(ref unnest) = a.this {
3142 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
3143 } else {
3144 (None, None)
3145 }
3146 }
3147 _ => (None, None),
3148 };
3149
3150 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
3151 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
3152 if let Expression::Function(ref f) = unnest.this {
3153 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3154 let start_expr = f.args[0].clone();
3155 let end_expr = f.args[1].clone();
3156 let step = f.args.get(2).cloned();
3157
3158 // Extract unit from step interval
3159 let unit = if let Some(Expression::Interval(ref iv)) = step {
3160 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3161 Some(format!("{:?}", unit).to_ascii_uppercase())
3162 } else if let Some(ref this) = iv.this {
3163 // The interval may be stored as a string like "1 MONTH"
3164 if let Expression::Literal(lit) = this {
3165 if let Literal::String(ref s) = lit.as_ref() {
3166 let parts: Vec<&str> = s.split_whitespace().collect();
3167 if parts.len() == 2 {
3168 Some(parts[1].to_ascii_uppercase())
3169 } else if parts.len() == 1 {
3170 // Single word like "MONTH" or just "1"
3171 let upper = parts[0].to_ascii_uppercase();
3172 if matches!(
3173 upper.as_str(),
3174 "YEAR"
3175 | "QUARTER"
3176 | "MONTH"
3177 | "WEEK"
3178 | "DAY"
3179 | "HOUR"
3180 | "MINUTE"
3181 | "SECOND"
3182 ) {
3183 Some(upper)
3184 } else {
3185 None
3186 }
3187 } else {
3188 None
3189 }
3190 } else {
3191 None
3192 }
3193 } else {
3194 None
3195 }
3196 } else {
3197 None
3198 }
3199 } else {
3200 None
3201 };
3202
3203 if let Some(unit_str) = unit {
3204 gda_info = Some((alias, start_expr, end_expr, unit_str));
3205 gda_join_idx = Some(idx);
3206 }
3207 }
3208 }
3209 }
3210 if gda_info.is_some() {
3211 break;
3212 }
3213 }
3214
3215 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
3216 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
3217 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
3218 let result = Self::try_transform_from_gda_snowflake(sel);
3219 return result;
3220 };
3221 let join_idx = gda_join_idx.unwrap();
3222
3223 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
3224 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
3225 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
3226 let datediff = Expression::Function(Box::new(Function::new(
3227 "DATEDIFF".to_string(),
3228 vec![
3229 Expression::boxed_column(Column {
3230 name: Identifier::new(&unit_str),
3231 table: None,
3232 join_mark: false,
3233 trailing_comments: vec![],
3234 span: None,
3235 inferred_type: None,
3236 }),
3237 start_expr.clone(),
3238 end_expr.clone(),
3239 ],
3240 )));
3241 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3242 left: datediff,
3243 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3244 left_comments: vec![],
3245 operator_comments: vec![],
3246 trailing_comments: vec![],
3247 inferred_type: None,
3248 }));
3249
3250 let array_gen_range = Expression::Function(Box::new(Function::new(
3251 "ARRAY_GENERATE_RANGE".to_string(),
3252 vec![
3253 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3254 datediff_plus_one,
3255 ],
3256 )));
3257
3258 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
3259 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3260 name: Identifier::new("INPUT"),
3261 value: array_gen_range,
3262 separator: crate::expressions::NamedArgSeparator::DArrow,
3263 }));
3264 let flatten = Expression::Function(Box::new(Function::new(
3265 "FLATTEN".to_string(),
3266 vec![flatten_input],
3267 )));
3268
3269 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
3270 let alias_table = Alias {
3271 this: flatten,
3272 alias: Identifier::new("_t0"),
3273 column_aliases: vec![
3274 Identifier::new("seq"),
3275 Identifier::new("key"),
3276 Identifier::new("path"),
3277 Identifier::new("index"),
3278 Identifier::new(&alias_name),
3279 Identifier::new("this"),
3280 ],
3281 pre_alias_comments: vec![],
3282 trailing_comments: vec![],
3283 inferred_type: None,
3284 };
3285 let lateral_expr = Expression::Lateral(Box::new(Lateral {
3286 this: Box::new(Expression::Alias(Box::new(alias_table))),
3287 view: None,
3288 outer: None,
3289 alias: None,
3290 alias_quoted: false,
3291 cross_apply: None,
3292 ordinality: None,
3293 column_aliases: vec![],
3294 }));
3295
3296 // Remove the original join and add to FROM expressions
3297 sel.joins.remove(join_idx);
3298 if let Some(ref mut from) = sel.from {
3299 from.expressions.push(lateral_expr);
3300 }
3301
3302 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3303 let dateadd_expr = Expression::Function(Box::new(Function::new(
3304 "DATEADD".to_string(),
3305 vec![
3306 Expression::boxed_column(Column {
3307 name: Identifier::new(&unit_str),
3308 table: None,
3309 join_mark: false,
3310 trailing_comments: vec![],
3311 span: None,
3312 inferred_type: None,
3313 }),
3314 Expression::Cast(Box::new(Cast {
3315 this: Expression::boxed_column(Column {
3316 name: Identifier::new(&alias_name),
3317 table: None,
3318 join_mark: false,
3319 trailing_comments: vec![],
3320 span: None,
3321 inferred_type: None,
3322 }),
3323 to: DataType::Int {
3324 length: None,
3325 integer_spelling: false,
3326 },
3327 trailing_comments: vec![],
3328 double_colon_syntax: false,
3329 format: None,
3330 default: None,
3331 inferred_type: None,
3332 })),
3333 Expression::Cast(Box::new(Cast {
3334 this: start_expr.clone(),
3335 to: DataType::Date,
3336 trailing_comments: vec![],
3337 double_colon_syntax: false,
3338 format: None,
3339 default: None,
3340 inferred_type: None,
3341 })),
3342 ],
3343 )));
3344
3345 // Replace references to the alias in the SELECT list
3346 let new_exprs: Vec<Expression> = sel
3347 .expressions
3348 .iter()
3349 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3350 .collect();
3351 sel.expressions = new_exprs;
3352
3353 Ok(Expression::Select(sel))
3354 })
3355 }
3356
3357 /// Helper: replace column references to `alias_name` with dateadd expression
3358 fn replace_column_ref_with_dateadd(
3359 expr: &Expression,
3360 alias_name: &str,
3361 dateadd: &Expression,
3362 ) -> Expression {
3363 use crate::expressions::*;
3364 match expr {
3365 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3366 // Plain column reference -> DATEADD(...) AS alias_name
3367 Expression::Alias(Box::new(Alias {
3368 this: dateadd.clone(),
3369 alias: Identifier::new(alias_name),
3370 column_aliases: vec![],
3371 pre_alias_comments: vec![],
3372 trailing_comments: vec![],
3373 inferred_type: None,
3374 }))
3375 }
3376 Expression::Alias(a) => {
3377 // Check if the inner expression references the alias
3378 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3379 Expression::Alias(Box::new(Alias {
3380 this: new_this,
3381 alias: a.alias.clone(),
3382 column_aliases: a.column_aliases.clone(),
3383 pre_alias_comments: a.pre_alias_comments.clone(),
3384 trailing_comments: a.trailing_comments.clone(),
3385 inferred_type: None,
3386 }))
3387 }
3388 _ => expr.clone(),
3389 }
3390 }
3391
3392 /// Helper: replace column references in inner expression (not top-level)
3393 fn replace_column_ref_inner(
3394 expr: &Expression,
3395 alias_name: &str,
3396 dateadd: &Expression,
3397 ) -> Expression {
3398 use crate::expressions::*;
3399 match expr {
3400 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3401 dateadd.clone()
3402 }
3403 Expression::Add(op) => {
3404 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3405 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3406 Expression::Add(Box::new(BinaryOp {
3407 left,
3408 right,
3409 left_comments: op.left_comments.clone(),
3410 operator_comments: op.operator_comments.clone(),
3411 trailing_comments: op.trailing_comments.clone(),
3412 inferred_type: None,
3413 }))
3414 }
3415 Expression::Sub(op) => {
3416 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3417 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3418 Expression::Sub(Box::new(BinaryOp {
3419 left,
3420 right,
3421 left_comments: op.left_comments.clone(),
3422 operator_comments: op.operator_comments.clone(),
3423 trailing_comments: op.trailing_comments.clone(),
3424 inferred_type: None,
3425 }))
3426 }
3427 Expression::Mul(op) => {
3428 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3429 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3430 Expression::Mul(Box::new(BinaryOp {
3431 left,
3432 right,
3433 left_comments: op.left_comments.clone(),
3434 operator_comments: op.operator_comments.clone(),
3435 trailing_comments: op.trailing_comments.clone(),
3436 inferred_type: None,
3437 }))
3438 }
3439 _ => expr.clone(),
3440 }
3441 }
3442
3443 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3444 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3445 fn try_transform_from_gda_snowflake(
3446 mut sel: Box<crate::expressions::Select>,
3447 ) -> Result<Expression> {
3448 use crate::expressions::*;
3449
3450 // Extract GDA info from FROM clause
3451 let mut gda_info: Option<(
3452 usize,
3453 String,
3454 Expression,
3455 Expression,
3456 String,
3457 Option<(String, Vec<Identifier>)>,
3458 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3459
3460 if let Some(ref from) = sel.from {
3461 for (idx, table_expr) in from.expressions.iter().enumerate() {
3462 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3463 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3464 let (unnest_opt, outer_alias_info) = match table_expr {
3465 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3466 Expression::Alias(ref a) => {
3467 if let Expression::Unnest(ref unnest) = a.this {
3468 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3469 (Some(unnest.as_ref()), Some(alias_info))
3470 } else {
3471 (None, None)
3472 }
3473 }
3474 _ => (None, None),
3475 };
3476
3477 if let Some(unnest) = unnest_opt {
3478 // Check for GENERATE_DATE_ARRAY function
3479 let func_opt = match &unnest.this {
3480 Expression::Function(ref f)
3481 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3482 && f.args.len() >= 2 =>
3483 {
3484 Some(f)
3485 }
3486 // Also check for GenerateSeries (from earlier normalization)
3487 _ => None,
3488 };
3489
3490 if let Some(f) = func_opt {
3491 let start_expr = f.args[0].clone();
3492 let end_expr = f.args[1].clone();
3493 let step = f.args.get(2).cloned();
3494
3495 // Extract unit and column name
3496 let unit = Self::extract_interval_unit_str(&step);
3497 let col_name = outer_alias_info
3498 .as_ref()
3499 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3500 .unwrap_or_else(|| "value".to_string());
3501
3502 if let Some(unit_str) = unit {
3503 gda_info = Some((
3504 idx,
3505 col_name,
3506 start_expr,
3507 end_expr,
3508 unit_str,
3509 outer_alias_info,
3510 ));
3511 break;
3512 }
3513 }
3514 }
3515 }
3516 }
3517
3518 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3519 else {
3520 return Ok(Expression::Select(sel));
3521 };
3522
3523 // Build the Snowflake subquery:
3524 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3525 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
3526
3527 // DATEDIFF(unit, start, end)
3528 let datediff = Expression::Function(Box::new(Function::new(
3529 "DATEDIFF".to_string(),
3530 vec![
3531 Expression::boxed_column(Column {
3532 name: Identifier::new(&unit_str),
3533 table: None,
3534 join_mark: false,
3535 trailing_comments: vec![],
3536 span: None,
3537 inferred_type: None,
3538 }),
3539 start_expr.clone(),
3540 end_expr.clone(),
3541 ],
3542 )));
3543 // DATEDIFF(...) + 1
3544 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3545 left: datediff,
3546 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3547 left_comments: vec![],
3548 operator_comments: vec![],
3549 trailing_comments: vec![],
3550 inferred_type: None,
3551 }));
3552
3553 let array_gen_range = Expression::Function(Box::new(Function::new(
3554 "ARRAY_GENERATE_RANGE".to_string(),
3555 vec![
3556 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3557 datediff_plus_one,
3558 ],
3559 )));
3560
3561 // TABLE(FLATTEN(INPUT => ...))
3562 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3563 name: Identifier::new("INPUT"),
3564 value: array_gen_range,
3565 separator: crate::expressions::NamedArgSeparator::DArrow,
3566 }));
3567 let flatten = Expression::Function(Box::new(Function::new(
3568 "FLATTEN".to_string(),
3569 vec![flatten_input],
3570 )));
3571
3572 // Determine alias name for the table: use outer alias or _t0
3573 let table_alias_name = outer_alias_info
3574 .as_ref()
3575 .map(|(name, _)| name.clone())
3576 .unwrap_or_else(|| "_t0".to_string());
3577
3578 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3579 let table_func =
3580 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3581 let flatten_aliased = Expression::Alias(Box::new(Alias {
3582 this: table_func,
3583 alias: Identifier::new(&table_alias_name),
3584 column_aliases: vec![
3585 Identifier::new("seq"),
3586 Identifier::new("key"),
3587 Identifier::new("path"),
3588 Identifier::new("index"),
3589 Identifier::new(&col_name),
3590 Identifier::new("this"),
3591 ],
3592 pre_alias_comments: vec![],
3593 trailing_comments: vec![],
3594 inferred_type: None,
3595 }));
3596
3597 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3598 let dateadd_expr = Expression::Function(Box::new(Function::new(
3599 "DATEADD".to_string(),
3600 vec![
3601 Expression::boxed_column(Column {
3602 name: Identifier::new(&unit_str),
3603 table: None,
3604 join_mark: false,
3605 trailing_comments: vec![],
3606 span: None,
3607 inferred_type: None,
3608 }),
3609 Expression::Cast(Box::new(Cast {
3610 this: Expression::boxed_column(Column {
3611 name: Identifier::new(&col_name),
3612 table: None,
3613 join_mark: false,
3614 trailing_comments: vec![],
3615 span: None,
3616 inferred_type: None,
3617 }),
3618 to: DataType::Int {
3619 length: None,
3620 integer_spelling: false,
3621 },
3622 trailing_comments: vec![],
3623 double_colon_syntax: false,
3624 format: None,
3625 default: None,
3626 inferred_type: None,
3627 })),
3628 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3629 start_expr.clone(),
3630 ],
3631 )));
3632 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3633 this: dateadd_expr,
3634 alias: Identifier::new(&col_name),
3635 column_aliases: vec![],
3636 pre_alias_comments: vec![],
3637 trailing_comments: vec![],
3638 inferred_type: None,
3639 }));
3640
3641 // Build inner SELECT
3642 let mut inner_select = Select::new();
3643 inner_select.expressions = vec![dateadd_aliased];
3644 inner_select.from = Some(From {
3645 expressions: vec![flatten_aliased],
3646 });
3647
3648 let inner_select_expr = Expression::Select(Box::new(inner_select));
3649 let subquery = Expression::Subquery(Box::new(Subquery {
3650 this: inner_select_expr,
3651 alias: None,
3652 column_aliases: vec![],
3653 order_by: None,
3654 limit: None,
3655 offset: None,
3656 distribute_by: None,
3657 sort_by: None,
3658 cluster_by: None,
3659 lateral: false,
3660 modifiers_inside: false,
3661 trailing_comments: vec![],
3662 inferred_type: None,
3663 }));
3664
3665 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3666 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3667 Expression::Alias(Box::new(Alias {
3668 this: subquery,
3669 alias: Identifier::new(&alias_name),
3670 column_aliases: col_aliases,
3671 pre_alias_comments: vec![],
3672 trailing_comments: vec![],
3673 inferred_type: None,
3674 }))
3675 } else {
3676 subquery
3677 };
3678
3679 // Replace the FROM expression
3680 if let Some(ref mut from) = sel.from {
3681 from.expressions[from_idx] = replacement;
3682 }
3683
3684 Ok(Expression::Select(sel))
3685 }
3686
3687 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3688 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3689 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
3690 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3691 use crate::expressions::*;
3692
3693 let start_expr = f.args[0].clone();
3694 let end_expr = f.args[1].clone();
3695 let step = f.args.get(2).cloned();
3696 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3697 let col_name = "value";
3698
3699 // Build the inner subquery: same as try_transform_from_gda_snowflake
3700 let datediff = Expression::Function(Box::new(Function::new(
3701 "DATEDIFF".to_string(),
3702 vec![
3703 Expression::boxed_column(Column {
3704 name: Identifier::new(&unit_str),
3705 table: None,
3706 join_mark: false,
3707 trailing_comments: vec![],
3708 span: None,
3709 inferred_type: None,
3710 }),
3711 start_expr.clone(),
3712 end_expr.clone(),
3713 ],
3714 )));
3715 // DATEDIFF(...) + 1
3716 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3717 left: datediff,
3718 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3719 left_comments: vec![],
3720 operator_comments: vec![],
3721 trailing_comments: vec![],
3722 inferred_type: None,
3723 }));
3724
3725 let array_gen_range = Expression::Function(Box::new(Function::new(
3726 "ARRAY_GENERATE_RANGE".to_string(),
3727 vec![
3728 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3729 datediff_plus_one,
3730 ],
3731 )));
3732
3733 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3734 name: Identifier::new("INPUT"),
3735 value: array_gen_range,
3736 separator: crate::expressions::NamedArgSeparator::DArrow,
3737 }));
3738 let flatten = Expression::Function(Box::new(Function::new(
3739 "FLATTEN".to_string(),
3740 vec![flatten_input],
3741 )));
3742
3743 let table_func =
3744 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3745 let flatten_aliased = Expression::Alias(Box::new(Alias {
3746 this: table_func,
3747 alias: Identifier::new("_t0"),
3748 column_aliases: vec![
3749 Identifier::new("seq"),
3750 Identifier::new("key"),
3751 Identifier::new("path"),
3752 Identifier::new("index"),
3753 Identifier::new(col_name),
3754 Identifier::new("this"),
3755 ],
3756 pre_alias_comments: vec![],
3757 trailing_comments: vec![],
3758 inferred_type: None,
3759 }));
3760
3761 let dateadd_expr = Expression::Function(Box::new(Function::new(
3762 "DATEADD".to_string(),
3763 vec![
3764 Expression::boxed_column(Column {
3765 name: Identifier::new(&unit_str),
3766 table: None,
3767 join_mark: false,
3768 trailing_comments: vec![],
3769 span: None,
3770 inferred_type: None,
3771 }),
3772 Expression::Cast(Box::new(Cast {
3773 this: Expression::boxed_column(Column {
3774 name: Identifier::new(col_name),
3775 table: None,
3776 join_mark: false,
3777 trailing_comments: vec![],
3778 span: None,
3779 inferred_type: None,
3780 }),
3781 to: DataType::Int {
3782 length: None,
3783 integer_spelling: false,
3784 },
3785 trailing_comments: vec![],
3786 double_colon_syntax: false,
3787 format: None,
3788 default: None,
3789 inferred_type: None,
3790 })),
3791 start_expr.clone(),
3792 ],
3793 )));
3794 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3795 this: dateadd_expr,
3796 alias: Identifier::new(col_name),
3797 column_aliases: vec![],
3798 pre_alias_comments: vec![],
3799 trailing_comments: vec![],
3800 inferred_type: None,
3801 }));
3802
3803 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3804 let mut inner_select = Select::new();
3805 inner_select.expressions = vec![dateadd_aliased];
3806 inner_select.from = Some(From {
3807 expressions: vec![flatten_aliased],
3808 });
3809
3810 // Wrap in subquery for the inner part
3811 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3812 this: Expression::Select(Box::new(inner_select)),
3813 alias: None,
3814 column_aliases: vec![],
3815 order_by: None,
3816 limit: None,
3817 offset: None,
3818 distribute_by: None,
3819 sort_by: None,
3820 cluster_by: None,
3821 lateral: false,
3822 modifiers_inside: false,
3823 trailing_comments: vec![],
3824 inferred_type: None,
3825 }));
3826
3827 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3828 let star = Expression::Star(Star {
3829 table: None,
3830 except: None,
3831 replace: None,
3832 rename: None,
3833 trailing_comments: vec![],
3834 span: None,
3835 });
3836 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3837 this: star,
3838 distinct: false,
3839 filter: None,
3840 order_by: vec![],
3841 name: Some("ARRAY_AGG".to_string()),
3842 ignore_nulls: None,
3843 having_max: None,
3844 limit: None,
3845 inferred_type: None,
3846 }));
3847
3848 let mut outer_select = Select::new();
3849 outer_select.expressions = vec![array_agg];
3850 outer_select.from = Some(From {
3851 expressions: vec![inner_subquery],
3852 });
3853
3854 // Wrap in a subquery
3855 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3856 this: Expression::Select(Box::new(outer_select)),
3857 alias: None,
3858 column_aliases: vec![],
3859 order_by: None,
3860 limit: None,
3861 offset: None,
3862 distribute_by: None,
3863 sort_by: None,
3864 cluster_by: None,
3865 lateral: false,
3866 modifiers_inside: false,
3867 trailing_comments: vec![],
3868 inferred_type: None,
3869 }));
3870
3871 // ARRAY_SIZE(subquery)
3872 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3873 outer_subquery,
3874 ))))
3875 }
3876
3877 /// Extract interval unit string from an optional step expression.
3878 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3879 use crate::expressions::*;
3880 if let Some(Expression::Interval(ref iv)) = step {
3881 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3882 return Some(format!("{:?}", unit).to_ascii_uppercase());
3883 }
3884 if let Some(ref this) = iv.this {
3885 if let Expression::Literal(lit) = this {
3886 if let Literal::String(ref s) = lit.as_ref() {
3887 let parts: Vec<&str> = s.split_whitespace().collect();
3888 if parts.len() == 2 {
3889 return Some(parts[1].to_ascii_uppercase());
3890 } else if parts.len() == 1 {
3891 let upper = parts[0].to_ascii_uppercase();
3892 if matches!(
3893 upper.as_str(),
3894 "YEAR"
3895 | "QUARTER"
3896 | "MONTH"
3897 | "WEEK"
3898 | "DAY"
3899 | "HOUR"
3900 | "MINUTE"
3901 | "SECOND"
3902 ) {
3903 return Some(upper);
3904 }
3905 }
3906 }
3907 }
3908 }
3909 }
3910 // Default to DAY if no step or no interval
3911 if step.is_none() {
3912 return Some("DAY".to_string());
3913 }
3914 None
3915 }
3916
3917 fn normalize_snowflake_pretty(mut sql: String) -> String {
3918 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3919 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3920 {
3921 sql = sql.replace(
3922 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3923 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3924 );
3925
3926 sql = sql.replace(
3927 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3928 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3929 );
3930
3931 sql = sql.replace(
3932 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3933 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3934 );
3935 }
3936
3937 sql
3938 }
3939
3940 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3941 /// This handles cases where the same syntax has different semantics across dialects.
3942 fn cross_dialect_normalize(
3943 expr: Expression,
3944 source: DialectType,
3945 target: DialectType,
3946 ) -> Result<Expression> {
3947 use crate::expressions::{
3948 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3949 Function, Identifier, IsNull, Literal, Null, Paren,
3950 };
3951
3952 // Helper to tag which kind of transform to apply
3953 #[derive(Debug)]
3954 enum Action {
3955 None,
3956 GreatestLeastNull,
3957 ArrayGenerateRange,
3958 Div0TypedDivision,
3959 ArrayAggCollectList,
3960 ArrayAggWithinGroupFilter,
3961 ArrayAggFilter,
3962 CastTimestampToDatetime,
3963 DateTruncWrapCast,
3964 ToDateToCast,
3965 ConvertTimezoneToExpr,
3966 SetToVariable,
3967 RegexpReplaceSnowflakeToDuckDB,
3968 BigQueryFunctionNormalize,
3969 BigQuerySafeDivide,
3970 BigQueryCastType,
3971 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3972 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3973 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3974 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3975 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3976 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3977 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3978 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3979 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
3980 EpochConvert, // Expression::Epoch -> target-specific epoch function
3981 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3982 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3983 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3984 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3985 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3986 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3987 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3988 TempTableHash, // TSQL #table -> temp table normalization
3989 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3990 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3991 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3992 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3993 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3994 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3995 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3996 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3997 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3998 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
3999 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
4000 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
4001 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
4002 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
4003 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
4004 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
4005 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
4006 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
4007 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
4008 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4009 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
4010 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
4011 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
4012 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
4013 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
4014 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
4015 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
4016 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
4017 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
4018 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
4019 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
4020 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4021 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
4022 DollarParamConvert, // $foo -> @foo for BigQuery
4023 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
4024 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
4025 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
4026 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
4027 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
4028 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4029 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
4030 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
4031 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
4032 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
4033 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4034 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
4035 RespectNullsConvert, // RESPECT NULLS window function handling
4036 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
4037 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
4038 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
4039 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
4040 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4041 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
4042 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4043 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
4044 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
4045 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
4046 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4047 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
4048 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
4049 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
4050 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
4051 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
4052 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
4053 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
4054 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
4055 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4056 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
4057 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
4058 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
4059 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
4060 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
4061 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
4062 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
4063 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
4064 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
4065 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
4066 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
4067 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
4068 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
4069 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
4070 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
4071 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
4072 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
4073 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
4074 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
4075 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
4076 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
4077 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
4078 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
4079 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
4080 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
4081 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
4082 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
4083 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
4084 DecodeSimplify, // DECODE with null-safe -> simple = comparison
4085 ArraySumConvert, // ARRAY_SUM -> target-specific
4086 ArraySizeConvert, // ARRAY_SIZE -> target-specific
4087 ArrayAnyConvert, // ARRAY_ANY -> target-specific
4088 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
4089 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
4090 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
4091 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
4092 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
4093 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
4094 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
4095 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
4096 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
4097 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
4098 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
4099 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
4100 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
4101 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
4102 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
4103 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
4104 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
4105 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
4106 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
4107 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
4108 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
4109 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
4110 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
4111 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
4112 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
4113 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
4114 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
4115 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
4116 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
4117 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
4118 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
4119 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
4120 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
4121 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
4122 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
4123 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
4124 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
4125 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
4126 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
4127 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
4128 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
4129 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
4130 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
4131 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
4132 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
4133 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
4134 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
4135 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
4136 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
4137 }
4138
4139 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
4140 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
4141 Self::transform_select_into(expr, source, target)
4142 } else {
4143 expr
4144 };
4145
4146 // Strip OFFSET ROWS for non-TSQL/Oracle targets
4147 let expr = if !matches!(
4148 target,
4149 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
4150 ) {
4151 if let Expression::Select(mut select) = expr {
4152 if let Some(ref mut offset) = select.offset {
4153 offset.rows = None;
4154 }
4155 Expression::Select(select)
4156 } else {
4157 expr
4158 }
4159 } else {
4160 expr
4161 };
4162
4163 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
4164 let expr = if matches!(target, DialectType::Oracle) {
4165 if let Expression::Select(mut select) = expr {
4166 if let Some(limit) = select.limit.take() {
4167 // Convert LIMIT to FETCH FIRST n ROWS ONLY
4168 select.fetch = Some(crate::expressions::Fetch {
4169 direction: "FIRST".to_string(),
4170 count: Some(limit.this),
4171 percent: false,
4172 rows: true,
4173 with_ties: false,
4174 });
4175 }
4176 // Add ROWS to OFFSET if present
4177 if let Some(ref mut offset) = select.offset {
4178 offset.rows = Some(true);
4179 }
4180 Expression::Select(select)
4181 } else {
4182 expr
4183 }
4184 } else {
4185 expr
4186 };
4187
4188 // Handle CreateTable WITH properties transformation before recursive transforms
4189 let expr = if let Expression::CreateTable(mut ct) = expr {
4190 Self::transform_create_table_properties(&mut ct, source, target);
4191
4192 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
4193 // When the PARTITIONED BY clause contains column definitions, merge them into the
4194 // main column list and adjust the PARTITIONED BY clause for the target dialect.
4195 if matches!(
4196 source,
4197 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4198 ) {
4199 let mut partition_col_names: Vec<String> = Vec::new();
4200 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
4201 let mut has_col_def_partitions = false;
4202
4203 // Check if any PARTITIONED BY property contains ColumnDef expressions
4204 for prop in &ct.properties {
4205 if let Expression::PartitionedByProperty(ref pbp) = prop {
4206 if let Expression::Tuple(ref tuple) = *pbp.this {
4207 for expr in &tuple.expressions {
4208 if let Expression::ColumnDef(ref cd) = expr {
4209 has_col_def_partitions = true;
4210 partition_col_names.push(cd.name.name.clone());
4211 partition_col_defs.push(*cd.clone());
4212 }
4213 }
4214 }
4215 }
4216 }
4217
4218 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
4219 // Merge partition columns into main column list
4220 for cd in partition_col_defs {
4221 ct.columns.push(cd);
4222 }
4223
4224 // Replace PARTITIONED BY property with column-name-only version
4225 ct.properties
4226 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
4227
4228 if matches!(
4229 target,
4230 DialectType::Presto | DialectType::Trino | DialectType::Athena
4231 ) {
4232 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
4233 let array_elements: Vec<String> = partition_col_names
4234 .iter()
4235 .map(|n| format!("'{}'", n))
4236 .collect();
4237 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
4238 ct.with_properties
4239 .push(("PARTITIONED_BY".to_string(), array_value));
4240 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4241 // Spark: PARTITIONED BY (y, z) - just column names
4242 let name_exprs: Vec<Expression> = partition_col_names
4243 .iter()
4244 .map(|n| {
4245 Expression::Column(Box::new(crate::expressions::Column {
4246 name: crate::expressions::Identifier::new(n.clone()),
4247 table: None,
4248 join_mark: false,
4249 trailing_comments: Vec::new(),
4250 span: None,
4251 inferred_type: None,
4252 }))
4253 })
4254 .collect();
4255 ct.properties.insert(
4256 0,
4257 Expression::PartitionedByProperty(Box::new(
4258 crate::expressions::PartitionedByProperty {
4259 this: Box::new(Expression::Tuple(Box::new(
4260 crate::expressions::Tuple {
4261 expressions: name_exprs,
4262 },
4263 ))),
4264 },
4265 )),
4266 );
4267 }
4268 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
4269 }
4270
4271 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
4272 // are handled by transform_create_table_properties which runs first
4273 }
4274
4275 // Strip LOCATION property for Presto/Trino (not supported)
4276 if matches!(
4277 target,
4278 DialectType::Presto | DialectType::Trino | DialectType::Athena
4279 ) {
4280 ct.properties
4281 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4282 }
4283
4284 // Strip table-level constraints for Spark/Hive/Databricks
4285 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4286 if matches!(
4287 target,
4288 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4289 ) {
4290 ct.constraints.retain(|c| {
4291 matches!(
4292 c,
4293 crate::expressions::TableConstraint::PrimaryKey { .. }
4294 | crate::expressions::TableConstraint::Like { .. }
4295 )
4296 });
4297 for constraint in &mut ct.constraints {
4298 if let crate::expressions::TableConstraint::PrimaryKey {
4299 columns,
4300 modifiers,
4301 ..
4302 } = constraint
4303 {
4304 // Strip ASC/DESC from column names
4305 for col in columns.iter_mut() {
4306 if col.name.ends_with(" ASC") {
4307 col.name = col.name[..col.name.len() - 4].to_string();
4308 } else if col.name.ends_with(" DESC") {
4309 col.name = col.name[..col.name.len() - 5].to_string();
4310 }
4311 }
4312 // Strip TSQL-specific modifiers
4313 modifiers.clustered = None;
4314 modifiers.with_options.clear();
4315 modifiers.on_filegroup = None;
4316 }
4317 }
4318 }
4319
4320 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4321 if matches!(target, DialectType::Databricks) {
4322 for col in &mut ct.columns {
4323 if col.auto_increment {
4324 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4325 col.data_type = crate::expressions::DataType::BigInt { length: None };
4326 }
4327 }
4328 }
4329 }
4330
4331 // Spark/Databricks: INTEGER -> INT in column definitions
4332 // Python sqlglot always outputs INT for Spark/Databricks
4333 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4334 for col in &mut ct.columns {
4335 if let crate::expressions::DataType::Int {
4336 integer_spelling, ..
4337 } = &mut col.data_type
4338 {
4339 *integer_spelling = false;
4340 }
4341 }
4342 }
4343
4344 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4345 if matches!(target, DialectType::Hive | DialectType::Spark) {
4346 for col in &mut ct.columns {
4347 // If nullable is explicitly true (NULL), change to None (omit it)
4348 if col.nullable == Some(true) {
4349 col.nullable = None;
4350 }
4351 // Also remove from constraints if stored there
4352 col.constraints
4353 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4354 }
4355 }
4356
4357 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4358 if ct.on_property.is_some()
4359 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4360 {
4361 ct.on_property = None;
4362 }
4363
4364 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4365 // Snowflake doesn't support typed arrays in DDL
4366 if matches!(target, DialectType::Snowflake) {
4367 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4368 if let crate::expressions::DataType::Array { .. } = dt {
4369 *dt = crate::expressions::DataType::Custom {
4370 name: "ARRAY".to_string(),
4371 };
4372 }
4373 }
4374 for col in &mut ct.columns {
4375 strip_array_type_params(&mut col.data_type);
4376 }
4377 }
4378
4379 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4380 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4381 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4382 if matches!(target, DialectType::PostgreSQL) {
4383 for col in &mut ct.columns {
4384 if col.auto_increment && !col.constraint_order.is_empty() {
4385 use crate::expressions::ConstraintType;
4386 let has_explicit_not_null = col
4387 .constraint_order
4388 .iter()
4389 .any(|ct| *ct == ConstraintType::NotNull);
4390
4391 if has_explicit_not_null {
4392 // Source had explicit NOT NULL - preserve original order
4393 // Just ensure nullable is set
4394 if col.nullable != Some(false) {
4395 col.nullable = Some(false);
4396 }
4397 } else {
4398 // Source didn't have explicit NOT NULL - build order with
4399 // AutoIncrement + NotNull first, then remaining constraints
4400 let mut new_order = Vec::new();
4401 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4402 new_order.push(ConstraintType::AutoIncrement);
4403 new_order.push(ConstraintType::NotNull);
4404 // Add remaining constraints in original order (except AutoIncrement)
4405 for ct_type in &col.constraint_order {
4406 if *ct_type != ConstraintType::AutoIncrement {
4407 new_order.push(ct_type.clone());
4408 }
4409 }
4410 col.constraint_order = new_order;
4411 col.nullable = Some(false);
4412 }
4413 }
4414 }
4415 }
4416
4417 Expression::CreateTable(ct)
4418 } else {
4419 expr
4420 };
4421
4422 // Handle CreateView column stripping for Presto/Trino target
4423 let expr = if let Expression::CreateView(mut cv) = expr {
4424 // Presto/Trino: drop column list when view has a SELECT body
4425 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4426 {
4427 if !matches!(&cv.query, Expression::Null(_)) {
4428 cv.columns.clear();
4429 }
4430 }
4431 Expression::CreateView(cv)
4432 } else {
4433 expr
4434 };
4435
4436 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4437 let expr = if !matches!(
4438 target,
4439 DialectType::Presto | DialectType::Trino | DialectType::Athena
4440 ) {
4441 if let Expression::Select(mut select) = expr {
4442 if let Some(ref mut with) = select.with {
4443 for cte in &mut with.ctes {
4444 if let Expression::Values(ref vals) = cte.this {
4445 // Build: SELECT * FROM (VALUES ...) AS _values
4446 let values_subquery =
4447 Expression::Subquery(Box::new(crate::expressions::Subquery {
4448 this: Expression::Values(vals.clone()),
4449 alias: Some(Identifier::new("_values".to_string())),
4450 column_aliases: Vec::new(),
4451 order_by: None,
4452 limit: None,
4453 offset: None,
4454 distribute_by: None,
4455 sort_by: None,
4456 cluster_by: None,
4457 lateral: false,
4458 modifiers_inside: false,
4459 trailing_comments: Vec::new(),
4460 inferred_type: None,
4461 }));
4462 let mut new_select = crate::expressions::Select::new();
4463 new_select.expressions =
4464 vec![Expression::Star(crate::expressions::Star {
4465 table: None,
4466 except: None,
4467 replace: None,
4468 rename: None,
4469 trailing_comments: Vec::new(),
4470 span: None,
4471 })];
4472 new_select.from = Some(crate::expressions::From {
4473 expressions: vec![values_subquery],
4474 });
4475 cte.this = Expression::Select(Box::new(new_select));
4476 }
4477 }
4478 }
4479 Expression::Select(select)
4480 } else {
4481 expr
4482 }
4483 } else {
4484 expr
4485 };
4486
4487 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4488 let expr = if matches!(target, DialectType::PostgreSQL) {
4489 if let Expression::CreateIndex(mut ci) = expr {
4490 for col in &mut ci.columns {
4491 if col.nulls_first.is_none() {
4492 col.nulls_first = Some(true);
4493 }
4494 }
4495 Expression::CreateIndex(ci)
4496 } else {
4497 expr
4498 }
4499 } else {
4500 expr
4501 };
4502
4503 transform_recursive(expr, &|e| {
4504 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4505 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4506 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4507 if let Expression::Cast(ref c) = e {
4508 // Check if this is a CAST of an array to a struct array type
4509 let is_struct_array_cast =
4510 matches!(&c.to, crate::expressions::DataType::Array { .. });
4511 if is_struct_array_cast {
4512 let has_auto_named_structs = match &c.this {
4513 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4514 if let Expression::Struct(s) = elem {
4515 s.fields.iter().all(|(name, _)| {
4516 name.as_ref().map_or(true, |n| {
4517 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4518 })
4519 })
4520 } else {
4521 false
4522 }
4523 }),
4524 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4525 if let Expression::Struct(s) = elem {
4526 s.fields.iter().all(|(name, _)| {
4527 name.as_ref().map_or(true, |n| {
4528 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4529 })
4530 })
4531 } else {
4532 false
4533 }
4534 }),
4535 _ => false,
4536 };
4537 if has_auto_named_structs {
4538 let convert_struct_to_row = |elem: Expression| -> Expression {
4539 if let Expression::Struct(s) = elem {
4540 let row_args: Vec<Expression> =
4541 s.fields.into_iter().map(|(_, v)| v).collect();
4542 Expression::Function(Box::new(Function::new(
4543 "ROW".to_string(),
4544 row_args,
4545 )))
4546 } else {
4547 elem
4548 }
4549 };
4550 let mut c_clone = c.as_ref().clone();
4551 match &mut c_clone.this {
4552 Expression::Array(arr) => {
4553 arr.expressions = arr
4554 .expressions
4555 .drain(..)
4556 .map(convert_struct_to_row)
4557 .collect();
4558 }
4559 Expression::ArrayFunc(arr) => {
4560 arr.expressions = arr
4561 .expressions
4562 .drain(..)
4563 .map(convert_struct_to_row)
4564 .collect();
4565 }
4566 _ => {}
4567 }
4568 return Ok(Expression::Cast(Box::new(c_clone)));
4569 }
4570 }
4571 }
4572 }
4573
4574 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4575 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4576 if let Expression::Select(ref sel) = e {
4577 if sel.kind.as_deref() == Some("STRUCT") {
4578 let mut fields = Vec::new();
4579 for expr in &sel.expressions {
4580 match expr {
4581 Expression::Alias(a) => {
4582 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4583 }
4584 Expression::Column(c) => {
4585 fields.push((Some(c.name.name.clone()), expr.clone()));
4586 }
4587 _ => {
4588 fields.push((None, expr.clone()));
4589 }
4590 }
4591 }
4592 let struct_lit =
4593 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4594 let mut new_select = sel.as_ref().clone();
4595 new_select.kind = None;
4596 new_select.expressions = vec![struct_lit];
4597 return Ok(Expression::Select(Box::new(new_select)));
4598 }
4599 }
4600 }
4601
4602 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4603 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4604 && matches!(
4605 target,
4606 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4607 )
4608 {
4609 if let Expression::Parameter(ref p) = e {
4610 if p.style == crate::expressions::ParameterStyle::At {
4611 if let Some(ref name) = p.name {
4612 return Ok(Expression::Parameter(Box::new(
4613 crate::expressions::Parameter {
4614 name: Some(name.clone()),
4615 index: p.index,
4616 style: crate::expressions::ParameterStyle::DollarBrace,
4617 quoted: p.quoted,
4618 string_quoted: p.string_quoted,
4619 expression: None,
4620 },
4621 )));
4622 }
4623 }
4624 }
4625 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4626 if let Expression::Column(ref col) = e {
4627 if col.name.name.starts_with('@') && col.table.is_none() {
4628 let var_name = col.name.name.trim_start_matches('@').to_string();
4629 return Ok(Expression::Parameter(Box::new(
4630 crate::expressions::Parameter {
4631 name: Some(var_name),
4632 index: None,
4633 style: crate::expressions::ParameterStyle::DollarBrace,
4634 quoted: false,
4635 string_quoted: false,
4636 expression: None,
4637 },
4638 )));
4639 }
4640 }
4641 }
4642
4643 // Convert @variable -> variable in SET statements for Spark/Databricks
4644 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4645 && matches!(target, DialectType::Spark | DialectType::Databricks)
4646 {
4647 if let Expression::SetStatement(ref s) = e {
4648 let mut new_items = s.items.clone();
4649 let mut changed = false;
4650 for item in &mut new_items {
4651 // Strip @ from the SET name (Parameter style)
4652 if let Expression::Parameter(ref p) = item.name {
4653 if p.style == crate::expressions::ParameterStyle::At {
4654 if let Some(ref name) = p.name {
4655 item.name = Expression::Identifier(Identifier::new(name));
4656 changed = true;
4657 }
4658 }
4659 }
4660 // Strip @ from the SET name (Identifier style - SET parser)
4661 if let Expression::Identifier(ref id) = item.name {
4662 if id.name.starts_with('@') {
4663 let var_name = id.name.trim_start_matches('@').to_string();
4664 item.name = Expression::Identifier(Identifier::new(&var_name));
4665 changed = true;
4666 }
4667 }
4668 // Strip @ from the SET name (Column style - alternative parsing)
4669 if let Expression::Column(ref col) = item.name {
4670 if col.name.name.starts_with('@') && col.table.is_none() {
4671 let var_name = col.name.name.trim_start_matches('@').to_string();
4672 item.name = Expression::Identifier(Identifier::new(&var_name));
4673 changed = true;
4674 }
4675 }
4676 }
4677 if changed {
4678 let mut new_set = (**s).clone();
4679 new_set.items = new_items;
4680 return Ok(Expression::SetStatement(Box::new(new_set)));
4681 }
4682 }
4683 }
4684
4685 // Strip NOLOCK hint for non-TSQL targets
4686 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4687 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4688 {
4689 if let Expression::Table(ref tr) = e {
4690 if !tr.hints.is_empty() {
4691 let mut new_tr = tr.clone();
4692 new_tr.hints.clear();
4693 return Ok(Expression::Table(new_tr));
4694 }
4695 }
4696 }
4697
4698 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4699 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4700 if matches!(target, DialectType::Snowflake) {
4701 if let Expression::IsTrue(ref itf) = e {
4702 if let Expression::Boolean(ref b) = itf.this {
4703 if !itf.not {
4704 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4705 value: b.value,
4706 }));
4707 } else {
4708 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4709 value: !b.value,
4710 }));
4711 }
4712 }
4713 }
4714 if let Expression::IsFalse(ref itf) = e {
4715 if let Expression::Boolean(ref b) = itf.this {
4716 if !itf.not {
4717 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4718 value: !b.value,
4719 }));
4720 } else {
4721 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4722 value: b.value,
4723 }));
4724 }
4725 }
4726 }
4727 }
4728
4729 // BigQuery: split dotted backtick identifiers in table names
4730 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4731 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4732 if let Expression::CreateTable(ref ct) = e {
4733 let mut changed = false;
4734 let mut new_ct = ct.clone();
4735 // Split the table name
4736 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4737 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4738 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4739 let was_quoted = ct.name.name.quoted;
4740 let mk_id = |s: &str| {
4741 if was_quoted {
4742 Identifier::quoted(s)
4743 } else {
4744 Identifier::new(s)
4745 }
4746 };
4747 if parts.len() == 3 {
4748 new_ct.name.catalog = Some(mk_id(parts[0]));
4749 new_ct.name.schema = Some(mk_id(parts[1]));
4750 new_ct.name.name = mk_id(parts[2]);
4751 changed = true;
4752 } else if parts.len() == 2 {
4753 new_ct.name.schema = Some(mk_id(parts[0]));
4754 new_ct.name.name = mk_id(parts[1]);
4755 changed = true;
4756 }
4757 }
4758 // Split the clone source name
4759 if let Some(ref clone_src) = ct.clone_source {
4760 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4761 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4762 let was_quoted = clone_src.name.quoted;
4763 let mk_id = |s: &str| {
4764 if was_quoted {
4765 Identifier::quoted(s)
4766 } else {
4767 Identifier::new(s)
4768 }
4769 };
4770 let mut new_src = clone_src.clone();
4771 if parts.len() == 3 {
4772 new_src.catalog = Some(mk_id(parts[0]));
4773 new_src.schema = Some(mk_id(parts[1]));
4774 new_src.name = mk_id(parts[2]);
4775 new_ct.clone_source = Some(new_src);
4776 changed = true;
4777 } else if parts.len() == 2 {
4778 new_src.schema = Some(mk_id(parts[0]));
4779 new_src.name = mk_id(parts[1]);
4780 new_ct.clone_source = Some(new_src);
4781 changed = true;
4782 }
4783 }
4784 }
4785 if changed {
4786 return Ok(Expression::CreateTable(new_ct));
4787 }
4788 }
4789 }
4790
4791 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4792 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4793 if matches!(source, DialectType::BigQuery)
4794 && matches!(
4795 target,
4796 DialectType::DuckDB
4797 | DialectType::Presto
4798 | DialectType::Trino
4799 | DialectType::Athena
4800 )
4801 {
4802 if let Expression::Subscript(ref sub) = e {
4803 let (new_index, is_safe) = match &sub.index {
4804 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4805 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
4806 let Literal::Number(n) = lit.as_ref() else {
4807 unreachable!()
4808 };
4809 if let Ok(val) = n.parse::<i64>() {
4810 (
4811 Some(Expression::Literal(Box::new(Literal::Number(
4812 (val + 1).to_string(),
4813 )))),
4814 false,
4815 )
4816 } else {
4817 (None, false)
4818 }
4819 }
4820 // OFFSET(n) -> n+1 (0-based)
4821 Expression::Function(ref f)
4822 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4823 {
4824 if let Expression::Literal(lit) = &f.args[0] {
4825 if let Literal::Number(n) = lit.as_ref() {
4826 if let Ok(val) = n.parse::<i64>() {
4827 (
4828 Some(Expression::Literal(Box::new(Literal::Number(
4829 (val + 1).to_string(),
4830 )))),
4831 false,
4832 )
4833 } else {
4834 (
4835 Some(Expression::Add(Box::new(
4836 crate::expressions::BinaryOp::new(
4837 f.args[0].clone(),
4838 Expression::number(1),
4839 ),
4840 ))),
4841 false,
4842 )
4843 }
4844 } else {
4845 (None, false)
4846 }
4847 } else {
4848 (
4849 Some(Expression::Add(Box::new(
4850 crate::expressions::BinaryOp::new(
4851 f.args[0].clone(),
4852 Expression::number(1),
4853 ),
4854 ))),
4855 false,
4856 )
4857 }
4858 }
4859 // ORDINAL(n) -> n (already 1-based)
4860 Expression::Function(ref f)
4861 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4862 {
4863 (Some(f.args[0].clone()), false)
4864 }
4865 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4866 Expression::Function(ref f)
4867 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4868 {
4869 if let Expression::Literal(lit) = &f.args[0] {
4870 if let Literal::Number(n) = lit.as_ref() {
4871 if let Ok(val) = n.parse::<i64>() {
4872 (
4873 Some(Expression::Literal(Box::new(Literal::Number(
4874 (val + 1).to_string(),
4875 )))),
4876 true,
4877 )
4878 } else {
4879 (
4880 Some(Expression::Add(Box::new(
4881 crate::expressions::BinaryOp::new(
4882 f.args[0].clone(),
4883 Expression::number(1),
4884 ),
4885 ))),
4886 true,
4887 )
4888 }
4889 } else {
4890 (None, false)
4891 }
4892 } else {
4893 (
4894 Some(Expression::Add(Box::new(
4895 crate::expressions::BinaryOp::new(
4896 f.args[0].clone(),
4897 Expression::number(1),
4898 ),
4899 ))),
4900 true,
4901 )
4902 }
4903 }
4904 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4905 Expression::Function(ref f)
4906 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4907 {
4908 (Some(f.args[0].clone()), true)
4909 }
4910 _ => (None, false),
4911 };
4912 if let Some(idx) = new_index {
4913 if is_safe
4914 && matches!(
4915 target,
4916 DialectType::Presto | DialectType::Trino | DialectType::Athena
4917 )
4918 {
4919 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4920 return Ok(Expression::Function(Box::new(Function::new(
4921 "ELEMENT_AT".to_string(),
4922 vec![sub.this.clone(), idx],
4923 ))));
4924 } else {
4925 // DuckDB or non-safe: just use subscript with converted index
4926 return Ok(Expression::Subscript(Box::new(
4927 crate::expressions::Subscript {
4928 this: sub.this.clone(),
4929 index: idx,
4930 },
4931 )));
4932 }
4933 }
4934 }
4935 }
4936
4937 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4938 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4939 if let Expression::Length(ref uf) = e {
4940 let arg = uf.this.clone();
4941 let typeof_func = Expression::Function(Box::new(Function::new(
4942 "TYPEOF".to_string(),
4943 vec![arg.clone()],
4944 )));
4945 let blob_cast = Expression::Cast(Box::new(Cast {
4946 this: arg.clone(),
4947 to: DataType::VarBinary { length: None },
4948 trailing_comments: vec![],
4949 double_colon_syntax: false,
4950 format: None,
4951 default: None,
4952 inferred_type: None,
4953 }));
4954 let octet_length = Expression::Function(Box::new(Function::new(
4955 "OCTET_LENGTH".to_string(),
4956 vec![blob_cast],
4957 )));
4958 let text_cast = Expression::Cast(Box::new(Cast {
4959 this: arg,
4960 to: DataType::Text,
4961 trailing_comments: vec![],
4962 double_colon_syntax: false,
4963 format: None,
4964 default: None,
4965 inferred_type: None,
4966 }));
4967 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4968 this: text_cast,
4969 original_name: None,
4970 inferred_type: None,
4971 }));
4972 return Ok(Expression::Case(Box::new(Case {
4973 operand: Some(typeof_func),
4974 whens: vec![(
4975 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
4976 octet_length,
4977 )],
4978 else_: Some(length_text),
4979 comments: Vec::new(),
4980 inferred_type: None,
4981 })));
4982 }
4983 }
4984
4985 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4986 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4987 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4988 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4989 if let Expression::Alias(ref a) = e {
4990 if matches!(&a.this, Expression::Unnest(_)) {
4991 if a.column_aliases.is_empty() {
4992 // Drop the entire alias, return just the UNNEST expression
4993 return Ok(a.this.clone());
4994 } else {
4995 // Use first column alias as the main alias
4996 let mut new_alias = a.as_ref().clone();
4997 new_alias.alias = a.column_aliases[0].clone();
4998 new_alias.column_aliases.clear();
4999 return Ok(Expression::Alias(Box::new(new_alias)));
5000 }
5001 }
5002 }
5003 }
5004
5005 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
5006 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
5007 if let Expression::In(ref in_expr) = e {
5008 if let Some(ref unnest_inner) = in_expr.unnest {
5009 // Build the function call for the target dialect
5010 let func_expr = if matches!(
5011 target,
5012 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5013 ) {
5014 // Use EXPLODE for Hive/Spark
5015 Expression::Function(Box::new(Function::new(
5016 "EXPLODE".to_string(),
5017 vec![*unnest_inner.clone()],
5018 )))
5019 } else {
5020 // Use UNNEST for Presto/Trino/DuckDB/etc.
5021 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
5022 this: *unnest_inner.clone(),
5023 expressions: Vec::new(),
5024 with_ordinality: false,
5025 alias: None,
5026 offset_alias: None,
5027 }))
5028 };
5029
5030 // Wrap in SELECT
5031 let mut inner_select = crate::expressions::Select::new();
5032 inner_select.expressions = vec![func_expr];
5033
5034 let subquery_expr = Expression::Select(Box::new(inner_select));
5035
5036 return Ok(Expression::In(Box::new(crate::expressions::In {
5037 this: in_expr.this.clone(),
5038 expressions: Vec::new(),
5039 query: Some(subquery_expr),
5040 not: in_expr.not,
5041 global: in_expr.global,
5042 unnest: None,
5043 is_field: false,
5044 })));
5045 }
5046 }
5047 }
5048
5049 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
5050 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
5051 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
5052 if let Expression::Alias(ref a) = e {
5053 if let Expression::Function(ref f) = a.this {
5054 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
5055 && !a.column_aliases.is_empty()
5056 {
5057 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
5058 let col_alias = a.column_aliases[0].clone();
5059 let mut inner_select = crate::expressions::Select::new();
5060 inner_select.expressions =
5061 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
5062 Expression::Identifier(Identifier::new("value".to_string())),
5063 col_alias,
5064 )))];
5065 inner_select.from = Some(crate::expressions::From {
5066 expressions: vec![a.this.clone()],
5067 });
5068 let subquery =
5069 Expression::Subquery(Box::new(crate::expressions::Subquery {
5070 this: Expression::Select(Box::new(inner_select)),
5071 alias: Some(a.alias.clone()),
5072 column_aliases: Vec::new(),
5073 order_by: None,
5074 limit: None,
5075 offset: None,
5076 lateral: false,
5077 modifiers_inside: false,
5078 trailing_comments: Vec::new(),
5079 distribute_by: None,
5080 sort_by: None,
5081 cluster_by: None,
5082 inferred_type: None,
5083 }));
5084 return Ok(subquery);
5085 }
5086 }
5087 }
5088 }
5089
5090 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
5091 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
5092 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
5093 if matches!(source, DialectType::BigQuery) {
5094 if let Expression::Select(ref s) = e {
5095 if let Some(ref from) = s.from {
5096 if from.expressions.len() >= 2 {
5097 // Collect table names from first expression
5098 let first_tables: Vec<String> = from
5099 .expressions
5100 .iter()
5101 .take(1)
5102 .filter_map(|expr| {
5103 if let Expression::Table(t) = expr {
5104 Some(t.name.name.to_ascii_lowercase())
5105 } else {
5106 None
5107 }
5108 })
5109 .collect();
5110
5111 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
5112 // or have a dotted name matching a table
5113 let mut needs_rewrite = false;
5114 for expr in from.expressions.iter().skip(1) {
5115 if let Expression::Table(t) = expr {
5116 if let Some(ref schema) = t.schema {
5117 if first_tables.contains(&schema.name.to_ascii_lowercase())
5118 {
5119 needs_rewrite = true;
5120 break;
5121 }
5122 }
5123 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
5124 if t.schema.is_none() && t.name.name.contains('.') {
5125 let parts: Vec<&str> = t.name.name.split('.').collect();
5126 if parts.len() >= 2
5127 && first_tables.contains(&parts[0].to_ascii_lowercase())
5128 {
5129 needs_rewrite = true;
5130 break;
5131 }
5132 }
5133 }
5134 }
5135
5136 if needs_rewrite {
5137 let mut new_select = s.clone();
5138 let mut new_from_exprs = vec![from.expressions[0].clone()];
5139 let mut new_joins = s.joins.clone();
5140
5141 for expr in from.expressions.iter().skip(1) {
5142 if let Expression::Table(ref t) = expr {
5143 if let Some(ref schema) = t.schema {
5144 if first_tables
5145 .contains(&schema.name.to_ascii_lowercase())
5146 {
5147 // This is an array path reference, convert to CROSS JOIN UNNEST
5148 let col_expr = Expression::Column(Box::new(
5149 crate::expressions::Column {
5150 name: t.name.clone(),
5151 table: Some(schema.clone()),
5152 join_mark: false,
5153 trailing_comments: vec![],
5154 span: None,
5155 inferred_type: None,
5156 },
5157 ));
5158 let unnest_expr = Expression::Unnest(Box::new(
5159 crate::expressions::UnnestFunc {
5160 this: col_expr,
5161 expressions: Vec::new(),
5162 with_ordinality: false,
5163 alias: None,
5164 offset_alias: None,
5165 },
5166 ));
5167 let join_this = if let Some(ref alias) = t.alias {
5168 if matches!(
5169 target,
5170 DialectType::Presto
5171 | DialectType::Trino
5172 | DialectType::Athena
5173 ) {
5174 // Presto: UNNEST(x) AS _t0(results)
5175 Expression::Alias(Box::new(
5176 crate::expressions::Alias {
5177 this: unnest_expr,
5178 alias: Identifier::new("_t0"),
5179 column_aliases: vec![alias.clone()],
5180 pre_alias_comments: vec![],
5181 trailing_comments: vec![],
5182 inferred_type: None,
5183 },
5184 ))
5185 } else {
5186 // BigQuery: UNNEST(x) AS results
5187 Expression::Alias(Box::new(
5188 crate::expressions::Alias {
5189 this: unnest_expr,
5190 alias: alias.clone(),
5191 column_aliases: vec![],
5192 pre_alias_comments: vec![],
5193 trailing_comments: vec![],
5194 inferred_type: None,
5195 },
5196 ))
5197 }
5198 } else {
5199 unnest_expr
5200 };
5201 new_joins.push(crate::expressions::Join {
5202 kind: crate::expressions::JoinKind::Cross,
5203 this: join_this,
5204 on: None,
5205 using: Vec::new(),
5206 use_inner_keyword: false,
5207 use_outer_keyword: false,
5208 deferred_condition: false,
5209 join_hint: None,
5210 match_condition: None,
5211 pivots: Vec::new(),
5212 comments: Vec::new(),
5213 nesting_group: 0,
5214 directed: false,
5215 });
5216 } else {
5217 new_from_exprs.push(expr.clone());
5218 }
5219 } else if t.schema.is_none() && t.name.name.contains('.') {
5220 // Dotted name in quoted identifier: `Coordinates.position`
5221 let parts: Vec<&str> = t.name.name.split('.').collect();
5222 if parts.len() >= 2
5223 && first_tables
5224 .contains(&parts[0].to_ascii_lowercase())
5225 {
5226 let join_this =
5227 if matches!(target, DialectType::BigQuery) {
5228 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
5229 Expression::Table(t.clone())
5230 } else {
5231 // Other targets: split into "schema"."name"
5232 let mut new_t = t.clone();
5233 new_t.schema =
5234 Some(Identifier::quoted(parts[0]));
5235 new_t.name = Identifier::quoted(parts[1]);
5236 Expression::Table(new_t)
5237 };
5238 new_joins.push(crate::expressions::Join {
5239 kind: crate::expressions::JoinKind::Cross,
5240 this: join_this,
5241 on: None,
5242 using: Vec::new(),
5243 use_inner_keyword: false,
5244 use_outer_keyword: false,
5245 deferred_condition: false,
5246 join_hint: None,
5247 match_condition: None,
5248 pivots: Vec::new(),
5249 comments: Vec::new(),
5250 nesting_group: 0,
5251 directed: false,
5252 });
5253 } else {
5254 new_from_exprs.push(expr.clone());
5255 }
5256 } else {
5257 new_from_exprs.push(expr.clone());
5258 }
5259 } else {
5260 new_from_exprs.push(expr.clone());
5261 }
5262 }
5263
5264 new_select.from = Some(crate::expressions::From {
5265 expressions: new_from_exprs,
5266 ..from.clone()
5267 });
5268 new_select.joins = new_joins;
5269 return Ok(Expression::Select(new_select));
5270 }
5271 }
5272 }
5273 }
5274 }
5275
5276 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
5277 if matches!(
5278 target,
5279 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5280 ) {
5281 if let Expression::Select(ref s) = e {
5282 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
5283 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
5284 matches!(expr, Expression::Unnest(_))
5285 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
5286 };
5287 let has_unnest_join = s.joins.iter().any(|j| {
5288 j.kind == crate::expressions::JoinKind::Cross && (
5289 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5290 || is_unnest_or_explode_expr(&j.this)
5291 )
5292 });
5293 if has_unnest_join {
5294 let mut select = s.clone();
5295 let mut new_joins = Vec::new();
5296 for join in select.joins.drain(..) {
5297 if join.kind == crate::expressions::JoinKind::Cross {
5298 // Extract the UNNEST/EXPLODE from the join
5299 let (func_expr, table_alias, col_aliases) = match &join.this {
5300 Expression::Alias(a) => {
5301 let ta = if a.alias.is_empty() {
5302 None
5303 } else {
5304 Some(a.alias.clone())
5305 };
5306 let cas = a.column_aliases.clone();
5307 match &a.this {
5308 Expression::Unnest(u) => {
5309 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5310 if !u.expressions.is_empty() {
5311 let mut all_args = vec![u.this.clone()];
5312 all_args.extend(u.expressions.clone());
5313 let arrays_zip =
5314 Expression::Function(Box::new(
5315 crate::expressions::Function::new(
5316 "ARRAYS_ZIP".to_string(),
5317 all_args,
5318 ),
5319 ));
5320 let inline = Expression::Function(Box::new(
5321 crate::expressions::Function::new(
5322 "INLINE".to_string(),
5323 vec![arrays_zip],
5324 ),
5325 ));
5326 (Some(inline), ta, a.column_aliases.clone())
5327 } else {
5328 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5329 let func_name = if u.with_ordinality {
5330 "POSEXPLODE"
5331 } else {
5332 "EXPLODE"
5333 };
5334 let explode = Expression::Function(Box::new(
5335 crate::expressions::Function::new(
5336 func_name.to_string(),
5337 vec![u.this.clone()],
5338 ),
5339 ));
5340 // For POSEXPLODE, add 'pos' to column aliases
5341 let cas = if u.with_ordinality {
5342 let mut pos_aliases =
5343 vec![Identifier::new(
5344 "pos".to_string(),
5345 )];
5346 pos_aliases
5347 .extend(a.column_aliases.clone());
5348 pos_aliases
5349 } else {
5350 a.column_aliases.clone()
5351 };
5352 (Some(explode), ta, cas)
5353 }
5354 }
5355 Expression::Function(f)
5356 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5357 {
5358 (Some(Expression::Function(f.clone())), ta, cas)
5359 }
5360 _ => (None, None, Vec::new()),
5361 }
5362 }
5363 Expression::Unnest(u) => {
5364 let func_name = if u.with_ordinality {
5365 "POSEXPLODE"
5366 } else {
5367 "EXPLODE"
5368 };
5369 let explode = Expression::Function(Box::new(
5370 crate::expressions::Function::new(
5371 func_name.to_string(),
5372 vec![u.this.clone()],
5373 ),
5374 ));
5375 let ta = u.alias.clone();
5376 let col_aliases = if u.with_ordinality {
5377 vec![Identifier::new("pos".to_string())]
5378 } else {
5379 Vec::new()
5380 };
5381 (Some(explode), ta, col_aliases)
5382 }
5383 _ => (None, None, Vec::new()),
5384 };
5385 if let Some(func) = func_expr {
5386 select.lateral_views.push(crate::expressions::LateralView {
5387 this: func,
5388 table_alias,
5389 column_aliases: col_aliases,
5390 outer: false,
5391 });
5392 } else {
5393 new_joins.push(join);
5394 }
5395 } else {
5396 new_joins.push(join);
5397 }
5398 }
5399 select.joins = new_joins;
5400 return Ok(Expression::Select(select));
5401 }
5402 }
5403 }
5404
5405 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5406 // for BigQuery, Presto/Trino, Snowflake
5407 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5408 && matches!(
5409 target,
5410 DialectType::BigQuery
5411 | DialectType::Presto
5412 | DialectType::Trino
5413 | DialectType::Snowflake
5414 )
5415 {
5416 if let Expression::Select(ref s) = e {
5417 // Check if any SELECT expressions contain UNNEST
5418 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5419 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5420 fn contains_unnest(e: &Expression) -> bool {
5421 match e {
5422 Expression::Unnest(_) => true,
5423 Expression::Function(f)
5424 if f.name.eq_ignore_ascii_case("UNNEST") =>
5425 {
5426 true
5427 }
5428 Expression::Alias(a) => contains_unnest(&a.this),
5429 Expression::Add(op)
5430 | Expression::Sub(op)
5431 | Expression::Mul(op)
5432 | Expression::Div(op) => {
5433 contains_unnest(&op.left) || contains_unnest(&op.right)
5434 }
5435 _ => false,
5436 }
5437 }
5438 contains_unnest(expr)
5439 });
5440
5441 if has_unnest_in_select {
5442 let rewritten = Self::rewrite_unnest_expansion(s, target);
5443 if let Some(new_select) = rewritten {
5444 return Ok(Expression::Select(Box::new(new_select)));
5445 }
5446 }
5447 }
5448 }
5449
5450 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5451 // BigQuery '\n' -> PostgreSQL literal newline in string
5452 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5453 {
5454 if let Expression::Literal(ref lit) = e {
5455 if let Literal::String(ref s) = lit.as_ref() {
5456 if s.contains("\\n")
5457 || s.contains("\\t")
5458 || s.contains("\\r")
5459 || s.contains("\\\\")
5460 {
5461 let converted = s
5462 .replace("\\n", "\n")
5463 .replace("\\t", "\t")
5464 .replace("\\r", "\r")
5465 .replace("\\\\", "\\");
5466 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
5467 }
5468 }
5469 }
5470 }
5471
5472 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5473 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5474 if source != target {
5475 if let Expression::Literal(ref lit) = e {
5476 if let Literal::Timestamp(ref s) = lit.as_ref() {
5477 let s = s.clone();
5478 // MySQL: TIMESTAMP handling depends on source dialect
5479 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5480 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5481 if matches!(target, DialectType::MySQL) {
5482 if matches!(source, DialectType::BigQuery) {
5483 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5484 return Ok(Expression::Function(Box::new(Function::new(
5485 "TIMESTAMP".to_string(),
5486 vec![Expression::Literal(Box::new(Literal::String(s)))],
5487 ))));
5488 } else {
5489 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5490 return Ok(Expression::Cast(Box::new(Cast {
5491 this: Expression::Literal(Box::new(Literal::String(s))),
5492 to: DataType::Custom {
5493 name: "DATETIME".to_string(),
5494 },
5495 trailing_comments: Vec::new(),
5496 double_colon_syntax: false,
5497 format: None,
5498 default: None,
5499 inferred_type: None,
5500 })));
5501 }
5502 }
5503 let dt = match target {
5504 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5505 name: "DATETIME".to_string(),
5506 },
5507 DialectType::Snowflake => {
5508 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5509 if matches!(source, DialectType::BigQuery) {
5510 DataType::Custom {
5511 name: "TIMESTAMPTZ".to_string(),
5512 }
5513 } else if matches!(
5514 source,
5515 DialectType::PostgreSQL
5516 | DialectType::Redshift
5517 | DialectType::Snowflake
5518 ) {
5519 DataType::Timestamp {
5520 precision: None,
5521 timezone: false,
5522 }
5523 } else {
5524 DataType::Custom {
5525 name: "TIMESTAMPNTZ".to_string(),
5526 }
5527 }
5528 }
5529 DialectType::Spark | DialectType::Databricks => {
5530 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5531 if matches!(source, DialectType::BigQuery) {
5532 DataType::Timestamp {
5533 precision: None,
5534 timezone: false,
5535 }
5536 } else {
5537 DataType::Custom {
5538 name: "TIMESTAMP_NTZ".to_string(),
5539 }
5540 }
5541 }
5542 DialectType::ClickHouse => DataType::Nullable {
5543 inner: Box::new(DataType::Custom {
5544 name: "DateTime".to_string(),
5545 }),
5546 },
5547 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5548 name: "DATETIME2".to_string(),
5549 },
5550 DialectType::DuckDB => {
5551 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5552 // or when the timestamp string explicitly has timezone info
5553 if matches!(source, DialectType::BigQuery)
5554 || Self::timestamp_string_has_timezone(&s)
5555 {
5556 DataType::Custom {
5557 name: "TIMESTAMPTZ".to_string(),
5558 }
5559 } else {
5560 DataType::Timestamp {
5561 precision: None,
5562 timezone: false,
5563 }
5564 }
5565 }
5566 _ => DataType::Timestamp {
5567 precision: None,
5568 timezone: false,
5569 },
5570 };
5571 return Ok(Expression::Cast(Box::new(Cast {
5572 this: Expression::Literal(Box::new(Literal::String(s))),
5573 to: dt,
5574 trailing_comments: vec![],
5575 double_colon_syntax: false,
5576 format: None,
5577 default: None,
5578 inferred_type: None,
5579 })));
5580 }
5581 }
5582 }
5583
5584 // PostgreSQL DELETE requires explicit AS for table aliases
5585 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5586 if let Expression::Delete(ref del) = e {
5587 if del.alias.is_some() && !del.alias_explicit_as {
5588 let mut new_del = del.clone();
5589 new_del.alias_explicit_as = true;
5590 return Ok(Expression::Delete(new_del));
5591 }
5592 }
5593 }
5594
5595 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5596 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5597 // while others don't support it (Presto, Spark, DuckDB, etc.)
5598 {
5599 let needs_distinct =
5600 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5601 let drop_distinct = matches!(
5602 target,
5603 DialectType::Presto
5604 | DialectType::Trino
5605 | DialectType::Athena
5606 | DialectType::Spark
5607 | DialectType::Databricks
5608 | DialectType::DuckDB
5609 | DialectType::Hive
5610 | DialectType::MySQL
5611 | DialectType::PostgreSQL
5612 | DialectType::SQLite
5613 | DialectType::TSQL
5614 | DialectType::Redshift
5615 | DialectType::Snowflake
5616 | DialectType::Oracle
5617 | DialectType::Teradata
5618 | DialectType::Drill
5619 | DialectType::Doris
5620 | DialectType::StarRocks
5621 );
5622 match &e {
5623 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5624 let mut new_u = (**u).clone();
5625 new_u.distinct = true;
5626 return Ok(Expression::Union(Box::new(new_u)));
5627 }
5628 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5629 let mut new_i = (**i).clone();
5630 new_i.distinct = true;
5631 return Ok(Expression::Intersect(Box::new(new_i)));
5632 }
5633 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5634 let mut new_ex = (**ex).clone();
5635 new_ex.distinct = true;
5636 return Ok(Expression::Except(Box::new(new_ex)));
5637 }
5638 Expression::Union(u) if u.distinct && drop_distinct => {
5639 let mut new_u = (**u).clone();
5640 new_u.distinct = false;
5641 return Ok(Expression::Union(Box::new(new_u)));
5642 }
5643 Expression::Intersect(i) if i.distinct && drop_distinct => {
5644 let mut new_i = (**i).clone();
5645 new_i.distinct = false;
5646 return Ok(Expression::Intersect(Box::new(new_i)));
5647 }
5648 Expression::Except(ex) if ex.distinct && drop_distinct => {
5649 let mut new_ex = (**ex).clone();
5650 new_ex.distinct = false;
5651 return Ok(Expression::Except(Box::new(new_ex)));
5652 }
5653 _ => {}
5654 }
5655 }
5656
5657 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5658 if matches!(target, DialectType::ClickHouse) {
5659 if let Expression::Function(ref f) = e {
5660 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5661 let mut new_f = f.as_ref().clone();
5662 new_f.name = "map".to_string();
5663 return Ok(Expression::Function(Box::new(new_f)));
5664 }
5665 }
5666 }
5667
5668 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5669 if matches!(target, DialectType::ClickHouse) {
5670 if let Expression::Intersect(ref i) = e {
5671 if i.all {
5672 let mut new_i = (**i).clone();
5673 new_i.all = false;
5674 return Ok(Expression::Intersect(Box::new(new_i)));
5675 }
5676 }
5677 }
5678
5679 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5680 // Only from Generic source, to prevent double-wrapping
5681 if matches!(source, DialectType::Generic) {
5682 if let Expression::Div(ref op) = e {
5683 let cast_type = match target {
5684 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5685 precision: None,
5686 scale: None,
5687 real_spelling: false,
5688 }),
5689 DialectType::Drill
5690 | DialectType::Trino
5691 | DialectType::Athena
5692 | DialectType::Presto => Some(DataType::Double {
5693 precision: None,
5694 scale: None,
5695 }),
5696 DialectType::PostgreSQL
5697 | DialectType::Redshift
5698 | DialectType::Materialize
5699 | DialectType::Teradata
5700 | DialectType::RisingWave => Some(DataType::Double {
5701 precision: None,
5702 scale: None,
5703 }),
5704 _ => None,
5705 };
5706 if let Some(dt) = cast_type {
5707 let cast_left = Expression::Cast(Box::new(Cast {
5708 this: op.left.clone(),
5709 to: dt,
5710 double_colon_syntax: false,
5711 trailing_comments: Vec::new(),
5712 format: None,
5713 default: None,
5714 inferred_type: None,
5715 }));
5716 let new_op = crate::expressions::BinaryOp {
5717 left: cast_left,
5718 right: op.right.clone(),
5719 left_comments: op.left_comments.clone(),
5720 operator_comments: op.operator_comments.clone(),
5721 trailing_comments: op.trailing_comments.clone(),
5722 inferred_type: None,
5723 };
5724 return Ok(Expression::Div(Box::new(new_op)));
5725 }
5726 }
5727 }
5728
5729 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5730 if matches!(target, DialectType::DuckDB) {
5731 if let Expression::CreateDatabase(db) = e {
5732 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5733 schema.if_not_exists = db.if_not_exists;
5734 return Ok(Expression::CreateSchema(Box::new(schema)));
5735 }
5736 if let Expression::DropDatabase(db) = e {
5737 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5738 schema.if_exists = db.if_exists;
5739 return Ok(Expression::DropSchema(Box::new(schema)));
5740 }
5741 }
5742
5743 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5744 if matches!(source, DialectType::ClickHouse)
5745 && !matches!(target, DialectType::ClickHouse)
5746 {
5747 if let Expression::Cast(ref c) = e {
5748 if let DataType::Custom { ref name } = c.to {
5749 if name.len() >= 9
5750 && name[..9].eq_ignore_ascii_case("NULLABLE(")
5751 && name.ends_with(")")
5752 {
5753 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5754 let inner_upper = inner.to_ascii_uppercase();
5755 let new_dt = match inner_upper.as_str() {
5756 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5757 precision: None,
5758 timezone: false,
5759 },
5760 "DATE" => DataType::Date,
5761 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5762 "INT32" | "INT" | "INTEGER" => DataType::Int {
5763 length: None,
5764 integer_spelling: false,
5765 },
5766 "FLOAT64" | "DOUBLE" => DataType::Double {
5767 precision: None,
5768 scale: None,
5769 },
5770 "STRING" => DataType::Text,
5771 _ => DataType::Custom {
5772 name: inner.to_string(),
5773 },
5774 };
5775 let mut new_cast = c.clone();
5776 new_cast.to = new_dt;
5777 return Ok(Expression::Cast(new_cast));
5778 }
5779 }
5780 }
5781 }
5782
5783 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5784 if matches!(target, DialectType::Snowflake) {
5785 if let Expression::ArrayConcatAgg(ref agg) = e {
5786 let mut agg_clone = agg.as_ref().clone();
5787 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5788 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5789 let flatten = Expression::Function(Box::new(Function::new(
5790 "ARRAY_FLATTEN".to_string(),
5791 vec![array_agg],
5792 )));
5793 return Ok(flatten);
5794 }
5795 }
5796
5797 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5798 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5799 if let Expression::ArrayConcatAgg(agg) = e {
5800 let arg = agg.this;
5801 return Ok(Expression::Function(Box::new(Function::new(
5802 "ARRAY_CONCAT_AGG".to_string(),
5803 vec![arg],
5804 ))));
5805 }
5806 }
5807
5808 // Determine what action to take by inspecting e immutably
5809 let action = {
5810 let source_propagates_nulls =
5811 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5812 let target_ignores_nulls =
5813 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5814
5815 match &e {
5816 Expression::Function(f) => {
5817 let name = f.name.to_ascii_uppercase();
5818 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
5819 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
5820 if name == "JSON"
5821 && f.args.len() == 1
5822 && matches!(source, DialectType::DuckDB)
5823 && matches!(
5824 target,
5825 DialectType::Presto | DialectType::Trino | DialectType::Athena
5826 )
5827 {
5828 Action::DuckDBJsonFuncToJsonParse
5829 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
5830 // SQL:2016 `x IS JSON` predicate which has matching semantics.
5831 } else if name == "JSON_VALID"
5832 && f.args.len() == 1
5833 && matches!(source, DialectType::DuckDB)
5834 && matches!(
5835 target,
5836 DialectType::Presto | DialectType::Trino | DialectType::Athena
5837 )
5838 {
5839 Action::DuckDBJsonValidToIsJson
5840 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5841 } else if (name == "DATE_PART" || name == "DATEPART")
5842 && f.args.len() == 2
5843 && matches!(target, DialectType::Snowflake)
5844 && !matches!(source, DialectType::Snowflake)
5845 && matches!(
5846 &f.args[0],
5847 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5848 )
5849 {
5850 Action::DatePartUnquote
5851 } else if source_propagates_nulls
5852 && target_ignores_nulls
5853 && (name == "GREATEST" || name == "LEAST")
5854 && f.args.len() >= 2
5855 {
5856 Action::GreatestLeastNull
5857 } else if matches!(source, DialectType::Snowflake)
5858 && name == "ARRAY_GENERATE_RANGE"
5859 && f.args.len() >= 2
5860 {
5861 Action::ArrayGenerateRange
5862 } else if matches!(source, DialectType::Snowflake)
5863 && matches!(target, DialectType::DuckDB)
5864 && name == "DATE_TRUNC"
5865 && f.args.len() == 2
5866 {
5867 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5868 // Logic based on Python sqlglot's input_type_preserved flag:
5869 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5870 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5871 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5872 let unit_str = match &f.args[0] {
5873 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
5874 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
5875 Some(s.to_ascii_uppercase())
5876 }
5877 _ => None,
5878 };
5879 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5880 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5881 });
5882 match &f.args[1] {
5883 Expression::Cast(c) => match &c.to {
5884 DataType::Time { .. } => Action::DateTruncWrapCast,
5885 DataType::Custom { name }
5886 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5887 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5888 {
5889 Action::DateTruncWrapCast
5890 }
5891 DataType::Timestamp { timezone: true, .. } => {
5892 Action::DateTruncWrapCast
5893 }
5894 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5895 DataType::Timestamp {
5896 timezone: false, ..
5897 } if is_date_unit => Action::DateTruncWrapCast,
5898 _ => Action::None,
5899 },
5900 _ => Action::None,
5901 }
5902 } else if matches!(source, DialectType::Snowflake)
5903 && matches!(target, DialectType::DuckDB)
5904 && name == "TO_DATE"
5905 && f.args.len() == 1
5906 && !matches!(
5907 &f.args[0],
5908 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5909 )
5910 {
5911 Action::ToDateToCast
5912 } else if !matches!(source, DialectType::Redshift)
5913 && matches!(target, DialectType::Redshift)
5914 && name == "CONVERT_TIMEZONE"
5915 && (f.args.len() == 2 || f.args.len() == 3)
5916 {
5917 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5918 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5919 // The Redshift parser adds 'UTC' as default source_tz, but when
5920 // transpiling from other dialects, we should preserve the original form.
5921 Action::ConvertTimezoneToExpr
5922 } else if matches!(source, DialectType::Snowflake)
5923 && matches!(target, DialectType::DuckDB)
5924 && name == "REGEXP_REPLACE"
5925 && f.args.len() == 4
5926 && !matches!(
5927 &f.args[3],
5928 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5929 )
5930 {
5931 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5932 Action::RegexpReplaceSnowflakeToDuckDB
5933 } else if matches!(source, DialectType::Snowflake)
5934 && matches!(target, DialectType::DuckDB)
5935 && name == "REGEXP_REPLACE"
5936 && f.args.len() == 5
5937 {
5938 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
5939 Action::RegexpReplacePositionSnowflakeToDuckDB
5940 } else if matches!(source, DialectType::Snowflake)
5941 && matches!(target, DialectType::DuckDB)
5942 && name == "REGEXP_SUBSTR"
5943 {
5944 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
5945 Action::RegexpSubstrSnowflakeToDuckDB
5946 } else if matches!(source, DialectType::Snowflake)
5947 && matches!(target, DialectType::Snowflake)
5948 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
5949 && f.args.len() == 6
5950 {
5951 // Snowflake identity: strip trailing group=0
5952 Action::RegexpSubstrSnowflakeIdentity
5953 } else if matches!(source, DialectType::Snowflake)
5954 && matches!(target, DialectType::DuckDB)
5955 && name == "REGEXP_SUBSTR_ALL"
5956 {
5957 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
5958 Action::RegexpSubstrAllSnowflakeToDuckDB
5959 } else if matches!(source, DialectType::Snowflake)
5960 && matches!(target, DialectType::DuckDB)
5961 && name == "REGEXP_COUNT"
5962 {
5963 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
5964 Action::RegexpCountSnowflakeToDuckDB
5965 } else if matches!(source, DialectType::Snowflake)
5966 && matches!(target, DialectType::DuckDB)
5967 && name == "REGEXP_INSTR"
5968 {
5969 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
5970 Action::RegexpInstrSnowflakeToDuckDB
5971 } else if matches!(source, DialectType::BigQuery)
5972 && matches!(target, DialectType::Snowflake)
5973 && name == "REGEXP_EXTRACT_ALL"
5974 {
5975 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
5976 Action::RegexpExtractAllToSnowflake
5977 } else if name == "_BQ_TO_HEX" {
5978 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5979 Action::BigQueryToHexBare
5980 } else if matches!(source, DialectType::BigQuery)
5981 && !matches!(target, DialectType::BigQuery)
5982 {
5983 // BigQuery-specific functions that need to be converted to standard forms
5984 match name.as_str() {
5985 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5986 | "DATE_DIFF"
5987 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5988 | "DATETIME_ADD" | "DATETIME_SUB"
5989 | "TIME_ADD" | "TIME_SUB"
5990 | "DATE_ADD" | "DATE_SUB"
5991 | "SAFE_DIVIDE"
5992 | "GENERATE_UUID"
5993 | "COUNTIF"
5994 | "EDIT_DISTANCE"
5995 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5996 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5997 | "TO_HEX"
5998 | "TO_JSON_STRING"
5999 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
6000 | "DIV"
6001 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
6002 | "LAST_DAY"
6003 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
6004 | "REGEXP_CONTAINS"
6005 | "CONTAINS_SUBSTR"
6006 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
6007 | "SAFE_CAST"
6008 | "GENERATE_DATE_ARRAY"
6009 | "PARSE_DATE" | "PARSE_TIMESTAMP"
6010 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
6011 | "ARRAY_CONCAT"
6012 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
6013 | "INSTR"
6014 | "MD5" | "SHA1" | "SHA256" | "SHA512"
6015 | "GENERATE_UUID()" // just in case
6016 | "REGEXP_EXTRACT_ALL"
6017 | "REGEXP_EXTRACT"
6018 | "INT64"
6019 | "ARRAY_CONCAT_AGG"
6020 | "DATE_DIFF(" // just in case
6021 | "TO_HEX_MD5" // internal
6022 | "MOD"
6023 | "CONCAT"
6024 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
6025 | "STRUCT"
6026 | "ROUND"
6027 | "MAKE_INTERVAL"
6028 | "ARRAY_TO_STRING"
6029 | "PERCENTILE_CONT"
6030 => Action::BigQueryFunctionNormalize,
6031 "ARRAY" if matches!(target, DialectType::Snowflake)
6032 && f.args.len() == 1
6033 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
6034 => Action::BigQueryArraySelectAsStructToSnowflake,
6035 _ => Action::None,
6036 }
6037 } else if matches!(source, DialectType::BigQuery)
6038 && matches!(target, DialectType::BigQuery)
6039 {
6040 // BigQuery -> BigQuery normalizations
6041 match name.as_str() {
6042 "TIMESTAMP_DIFF"
6043 | "DATETIME_DIFF"
6044 | "TIME_DIFF"
6045 | "DATE_DIFF"
6046 | "DATE_ADD"
6047 | "TO_HEX"
6048 | "CURRENT_TIMESTAMP"
6049 | "CURRENT_DATE"
6050 | "CURRENT_TIME"
6051 | "CURRENT_DATETIME"
6052 | "GENERATE_DATE_ARRAY"
6053 | "INSTR"
6054 | "FORMAT_DATETIME"
6055 | "DATETIME"
6056 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
6057 _ => Action::None,
6058 }
6059 } else {
6060 // Generic function normalization for non-BigQuery sources
6061 match name.as_str() {
6062 "ARBITRARY" | "AGGREGATE"
6063 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
6064 | "STRUCT_EXTRACT"
6065 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
6066 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
6067 | "SUBSTRINGINDEX"
6068 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
6069 | "UNICODE"
6070 | "XOR"
6071 | "ARRAY_REVERSE_SORT"
6072 | "ENCODE" | "DECODE"
6073 | "QUANTILE"
6074 | "EPOCH" | "EPOCH_MS"
6075 | "HASHBYTES"
6076 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
6077 | "APPROX_DISTINCT"
6078 | "DATE_PARSE" | "FORMAT_DATETIME"
6079 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
6080 | "RLIKE"
6081 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
6082 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
6083 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
6084 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
6085 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
6086 | "MAP" | "MAP_FROM_ENTRIES"
6087 | "COLLECT_LIST" | "COLLECT_SET"
6088 | "ISNAN" | "IS_NAN"
6089 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
6090 | "FORMAT_NUMBER"
6091 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
6092 | "ELEMENT_AT"
6093 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
6094 | "SPLIT_PART"
6095 // GENERATE_SERIES: handled separately below
6096 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
6097 | "JSON_QUERY" | "JSON_VALUE"
6098 | "JSON_SEARCH"
6099 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
6100 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
6101 | "CURDATE" | "CURTIME"
6102 | "ARRAY_TO_STRING"
6103 | "ARRAY_SORT" | "SORT_ARRAY"
6104 | "LEFT" | "RIGHT"
6105 | "MAP_FROM_ARRAYS"
6106 | "LIKE" | "ILIKE"
6107 | "ARRAY_CONCAT" | "LIST_CONCAT"
6108 | "QUANTILE_CONT" | "QUANTILE_DISC"
6109 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
6110 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
6111 | "LOCATE" | "STRPOS" | "INSTR"
6112 | "CHAR"
6113 // CONCAT: handled separately for COALESCE wrapping
6114 | "ARRAY_JOIN"
6115 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
6116 | "ISNULL"
6117 | "MONTHNAME"
6118 | "TO_TIMESTAMP"
6119 | "TO_DATE"
6120 | "TO_JSON"
6121 | "REGEXP_SPLIT"
6122 | "SPLIT"
6123 | "FORMATDATETIME"
6124 | "ARRAYJOIN"
6125 | "SPLITBYSTRING" | "SPLITBYREGEXP"
6126 | "NVL"
6127 | "TO_CHAR"
6128 | "DBMS_RANDOM.VALUE"
6129 | "REGEXP_LIKE"
6130 | "REPLICATE"
6131 | "LEN"
6132 | "COUNT_BIG"
6133 | "DATEFROMPARTS"
6134 | "DATETIMEFROMPARTS"
6135 | "CONVERT" | "TRY_CONVERT"
6136 | "STRFTIME" | "STRPTIME"
6137 | "DATE_FORMAT" | "FORMAT_DATE"
6138 | "PARSE_TIMESTAMP" | "PARSE_DATE"
6139 | "FROM_BASE64" | "TO_BASE64"
6140 | "GETDATE"
6141 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
6142 | "TO_UTF8" | "FROM_UTF8"
6143 | "STARTS_WITH" | "STARTSWITH"
6144 | "APPROX_COUNT_DISTINCT"
6145 | "JSON_FORMAT"
6146 | "SYSDATE"
6147 | "LOGICAL_OR" | "LOGICAL_AND"
6148 | "MONTHS_ADD"
6149 | "SCHEMA_NAME"
6150 | "STRTOL"
6151 | "EDITDIST3"
6152 | "FORMAT"
6153 | "LIST_CONTAINS" | "LIST_HAS"
6154 | "VARIANCE" | "STDDEV"
6155 | "ISINF"
6156 | "TO_UNIXTIME"
6157 | "FROM_UNIXTIME"
6158 | "DATEPART" | "DATE_PART"
6159 | "DATENAME"
6160 | "STRING_AGG"
6161 | "JSON_ARRAYAGG"
6162 | "APPROX_QUANTILE"
6163 | "MAKE_DATE"
6164 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
6165 | "RANGE"
6166 | "TRY_ELEMENT_AT"
6167 | "STR_TO_MAP"
6168 | "STRING"
6169 | "STR_TO_TIME"
6170 | "CURRENT_SCHEMA"
6171 | "LTRIM" | "RTRIM"
6172 | "UUID"
6173 | "FARM_FINGERPRINT"
6174 | "JSON_KEYS"
6175 | "WEEKOFYEAR"
6176 | "CONCAT_WS"
6177 | "ARRAY_SLICE"
6178 | "ARRAY_PREPEND"
6179 | "ARRAY_REMOVE"
6180 | "GENERATE_DATE_ARRAY"
6181 | "PARSE_JSON"
6182 | "JSON_REMOVE"
6183 | "JSON_SET"
6184 | "LEVENSHTEIN"
6185 | "CURRENT_VERSION"
6186 | "ARRAY_MAX"
6187 | "ARRAY_MIN"
6188 | "JAROWINKLER_SIMILARITY"
6189 | "CURRENT_SCHEMAS"
6190 | "TO_VARIANT"
6191 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
6192 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
6193 => Action::GenericFunctionNormalize,
6194 // Canonical date functions -> dialect-specific
6195 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
6196 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
6197 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
6198 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
6199 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
6200 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
6201 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
6202 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
6203 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
6204 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
6205 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
6206 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
6207 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
6208 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
6209 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
6210 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
6211 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
6212 // STR_TO_DATE(x, fmt) -> dialect-specific
6213 "STR_TO_DATE" if f.args.len() == 2
6214 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
6215 "STR_TO_DATE" => Action::GenericFunctionNormalize,
6216 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
6217 "TS_OR_DS_ADD" if f.args.len() == 3
6218 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
6219 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
6220 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
6221 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
6222 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
6223 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
6224 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
6225 // IS_ASCII(x) -> dialect-specific
6226 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
6227 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
6228 "STR_POSITION" => Action::StrPositionConvert,
6229 // ARRAY_SUM -> dialect-specific
6230 "ARRAY_SUM" => Action::ArraySumConvert,
6231 // ARRAY_SIZE -> dialect-specific (Drill only)
6232 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
6233 // ARRAY_ANY -> dialect-specific
6234 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
6235 // Functions needing specific cross-dialect transforms
6236 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
6237 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
6238 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
6239 "ARRAY" if matches!(source, DialectType::BigQuery)
6240 && matches!(target, DialectType::Snowflake)
6241 && f.args.len() == 1
6242 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
6243 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
6244 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
6245 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
6246 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
6247 "DATE_TRUNC" if f.args.len() == 2
6248 && matches!(source, DialectType::Generic)
6249 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
6250 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
6251 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
6252 "TIMESTAMP_TRUNC" if f.args.len() >= 2
6253 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
6254 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
6255 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
6256 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6257 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
6258 // GENERATE_SERIES with interval normalization for PG target
6259 "GENERATE_SERIES" if f.args.len() >= 3
6260 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6261 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
6262 "GENERATE_SERIES" => Action::None, // passthrough for other cases
6263 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
6264 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6265 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
6266 "CONCAT" => Action::GenericFunctionNormalize,
6267 // DIV(a, b) -> target-specific integer division
6268 "DIV" if f.args.len() == 2
6269 && matches!(source, DialectType::PostgreSQL)
6270 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
6271 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6272 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
6273 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
6274 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6275 "JSONB_EXISTS" if f.args.len() == 2
6276 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
6277 // DATE_BIN -> TIME_BUCKET for DuckDB
6278 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
6279 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
6280 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
6281 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
6282 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
6283 // ClickHouse any -> ANY_VALUE for other dialects
6284 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
6285 _ => Action::None,
6286 }
6287 }
6288 }
6289 Expression::AggregateFunction(af) => {
6290 let name = af.name.to_ascii_uppercase();
6291 match name.as_str() {
6292 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
6293 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
6294 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6295 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
6296 if matches!(target, DialectType::DuckDB) =>
6297 {
6298 Action::JsonObjectAggConvert
6299 }
6300 "ARRAY_AGG"
6301 if matches!(
6302 target,
6303 DialectType::Hive
6304 | DialectType::Spark
6305 | DialectType::Databricks
6306 ) =>
6307 {
6308 Action::ArrayAggToCollectList
6309 }
6310 "MAX_BY" | "MIN_BY"
6311 if matches!(
6312 target,
6313 DialectType::ClickHouse
6314 | DialectType::Spark
6315 | DialectType::Databricks
6316 | DialectType::DuckDB
6317 ) =>
6318 {
6319 Action::MaxByMinByConvert
6320 }
6321 "COLLECT_LIST"
6322 if matches!(
6323 target,
6324 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6325 ) =>
6326 {
6327 Action::CollectListToArrayAgg
6328 }
6329 "COLLECT_SET"
6330 if matches!(
6331 target,
6332 DialectType::Presto
6333 | DialectType::Trino
6334 | DialectType::Snowflake
6335 | DialectType::DuckDB
6336 ) =>
6337 {
6338 Action::CollectSetConvert
6339 }
6340 "PERCENTILE"
6341 if matches!(
6342 target,
6343 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6344 ) =>
6345 {
6346 Action::PercentileConvert
6347 }
6348 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6349 "CORR"
6350 if matches!(target, DialectType::DuckDB)
6351 && matches!(source, DialectType::Snowflake) =>
6352 {
6353 Action::CorrIsnanWrap
6354 }
6355 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6356 "APPROX_QUANTILES"
6357 if matches!(source, DialectType::BigQuery)
6358 && matches!(target, DialectType::DuckDB) =>
6359 {
6360 Action::BigQueryApproxQuantiles
6361 }
6362 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6363 "PERCENTILE_CONT"
6364 if matches!(source, DialectType::BigQuery)
6365 && matches!(target, DialectType::DuckDB)
6366 && af.args.len() >= 2 =>
6367 {
6368 Action::BigQueryPercentileContToDuckDB
6369 }
6370 _ => Action::None,
6371 }
6372 }
6373 Expression::JSONArrayAgg(_) => match target {
6374 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6375 _ => Action::None,
6376 },
6377 Expression::ToNumber(tn) => {
6378 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6379 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6380 match target {
6381 DialectType::Oracle
6382 | DialectType::Snowflake
6383 | DialectType::Teradata => Action::None,
6384 _ => Action::GenericFunctionNormalize,
6385 }
6386 } else {
6387 Action::None
6388 }
6389 }
6390 Expression::Nvl2(_) => {
6391 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6392 // Keep as NVL2 for dialects that support it natively
6393 match target {
6394 DialectType::Oracle
6395 | DialectType::Snowflake
6396 | DialectType::Teradata
6397 | DialectType::Spark
6398 | DialectType::Databricks
6399 | DialectType::Redshift => Action::None,
6400 _ => Action::Nvl2Expand,
6401 }
6402 }
6403 Expression::Decode(_) | Expression::DecodeCase(_) => {
6404 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6405 // Keep as DECODE for Oracle/Snowflake
6406 match target {
6407 DialectType::Oracle | DialectType::Snowflake => Action::None,
6408 _ => Action::DecodeSimplify,
6409 }
6410 }
6411 Expression::Coalesce(ref cf) => {
6412 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6413 // BigQuery keeps IFNULL natively when source is also BigQuery
6414 if cf.original_name.as_deref() == Some("IFNULL")
6415 && !(matches!(source, DialectType::BigQuery)
6416 && matches!(target, DialectType::BigQuery))
6417 {
6418 Action::IfnullToCoalesce
6419 } else {
6420 Action::None
6421 }
6422 }
6423 Expression::IfFunc(if_func) => {
6424 if matches!(source, DialectType::Snowflake)
6425 && matches!(
6426 target,
6427 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6428 )
6429 && matches!(if_func.false_value, Some(Expression::Div(_)))
6430 {
6431 Action::Div0TypedDivision
6432 } else {
6433 Action::None
6434 }
6435 }
6436 Expression::ToJson(_) => match target {
6437 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6438 DialectType::BigQuery => Action::ToJsonConvert,
6439 DialectType::DuckDB => Action::ToJsonConvert,
6440 _ => Action::None,
6441 },
6442 Expression::ArrayAgg(ref agg) => {
6443 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6444 Action::ArrayAggToGroupConcat
6445 } else if matches!(
6446 target,
6447 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6448 ) {
6449 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6450 Action::ArrayAggToCollectList
6451 } else if matches!(
6452 source,
6453 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6454 ) && matches!(target, DialectType::DuckDB)
6455 && agg.filter.is_some()
6456 {
6457 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6458 // Need to add NOT x IS NULL to existing filter
6459 Action::ArrayAggNullFilter
6460 } else if matches!(target, DialectType::DuckDB)
6461 && agg.ignore_nulls == Some(true)
6462 && !agg.order_by.is_empty()
6463 {
6464 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6465 Action::ArrayAggIgnoreNullsDuckDB
6466 } else if !matches!(source, DialectType::Snowflake) {
6467 Action::None
6468 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6469 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
6470 || agg.name.is_none();
6471 if is_array_agg {
6472 Action::ArrayAggCollectList
6473 } else {
6474 Action::None
6475 }
6476 } else if matches!(
6477 target,
6478 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6479 ) && agg.filter.is_none()
6480 {
6481 Action::ArrayAggFilter
6482 } else {
6483 Action::None
6484 }
6485 }
6486 Expression::WithinGroup(wg) => {
6487 if matches!(source, DialectType::Snowflake)
6488 && matches!(
6489 target,
6490 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6491 )
6492 && matches!(wg.this, Expression::ArrayAgg(_))
6493 {
6494 Action::ArrayAggWithinGroupFilter
6495 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6496 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6497 || matches!(&wg.this, Expression::StringAgg(_))
6498 {
6499 Action::StringAggConvert
6500 } else if matches!(
6501 target,
6502 DialectType::Presto
6503 | DialectType::Trino
6504 | DialectType::Athena
6505 | DialectType::Spark
6506 | DialectType::Databricks
6507 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6508 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6509 || matches!(&wg.this, Expression::PercentileCont(_)))
6510 {
6511 Action::PercentileContConvert
6512 } else {
6513 Action::None
6514 }
6515 }
6516 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6517 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6518 // DATETIME is the timezone-unaware type
6519 Expression::Cast(ref c) => {
6520 if c.format.is_some()
6521 && (matches!(source, DialectType::BigQuery)
6522 || matches!(source, DialectType::Teradata))
6523 {
6524 Action::BigQueryCastFormat
6525 } else if matches!(target, DialectType::BigQuery)
6526 && !matches!(source, DialectType::BigQuery)
6527 && matches!(
6528 c.to,
6529 DataType::Timestamp {
6530 timezone: false,
6531 ..
6532 }
6533 )
6534 {
6535 Action::CastTimestampToDatetime
6536 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6537 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6538 && matches!(
6539 c.to,
6540 DataType::Timestamp {
6541 timezone: false,
6542 ..
6543 }
6544 )
6545 {
6546 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6547 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6548 Action::CastTimestampToDatetime
6549 } else if matches!(
6550 source,
6551 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6552 ) && matches!(
6553 target,
6554 DialectType::Presto
6555 | DialectType::Trino
6556 | DialectType::Athena
6557 | DialectType::DuckDB
6558 | DialectType::Snowflake
6559 | DialectType::BigQuery
6560 | DialectType::Databricks
6561 | DialectType::TSQL
6562 ) {
6563 Action::HiveCastToTryCast
6564 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6565 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6566 {
6567 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6568 Action::CastTimestamptzToFunc
6569 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6570 && matches!(
6571 target,
6572 DialectType::Hive
6573 | DialectType::Spark
6574 | DialectType::Databricks
6575 | DialectType::BigQuery
6576 )
6577 {
6578 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6579 Action::CastTimestampStripTz
6580 } else if matches!(&c.to, DataType::Json)
6581 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
6582 && matches!(
6583 target,
6584 DialectType::Presto
6585 | DialectType::Trino
6586 | DialectType::Athena
6587 | DialectType::Snowflake
6588 )
6589 {
6590 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6591 // Only when the input is a string literal (JSON 'value' syntax)
6592 Action::JsonLiteralToJsonParse
6593 } else if matches!(&c.to, DataType::Json)
6594 && matches!(source, DialectType::DuckDB)
6595 && matches!(
6596 target,
6597 DialectType::Presto | DialectType::Trino | DialectType::Athena
6598 )
6599 {
6600 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
6601 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
6602 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
6603 // in the target to preserve DuckDB's parse semantics.
6604 Action::JsonLiteralToJsonParse
6605 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6606 && matches!(target, DialectType::Spark | DialectType::Databricks)
6607 {
6608 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6609 Action::CastToJsonForSpark
6610 } else if (matches!(
6611 &c.to,
6612 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6613 )) && matches!(
6614 target,
6615 DialectType::Spark | DialectType::Databricks
6616 ) && (matches!(&c.this, Expression::ParseJson(_))
6617 || matches!(
6618 &c.this,
6619 Expression::Function(f)
6620 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6621 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6622 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6623 ))
6624 {
6625 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6626 // -> FROM_JSON(..., type_string) for Spark
6627 Action::CastJsonToFromJson
6628 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6629 && matches!(
6630 c.to,
6631 DataType::Timestamp {
6632 timezone: false,
6633 ..
6634 }
6635 )
6636 && matches!(source, DialectType::DuckDB)
6637 {
6638 Action::StrftimeCastTimestamp
6639 } else if matches!(source, DialectType::DuckDB)
6640 && matches!(
6641 c.to,
6642 DataType::Decimal {
6643 precision: None,
6644 ..
6645 }
6646 )
6647 {
6648 Action::DecimalDefaultPrecision
6649 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6650 && matches!(c.to, DataType::Char { length: None })
6651 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6652 {
6653 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6654 Action::MysqlCastCharToText
6655 } else if matches!(
6656 source,
6657 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6658 ) && matches!(
6659 target,
6660 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6661 ) && Self::has_varchar_char_type(&c.to)
6662 {
6663 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6664 Action::SparkCastVarcharToString
6665 } else {
6666 Action::None
6667 }
6668 }
6669 Expression::SafeCast(ref c) => {
6670 if c.format.is_some()
6671 && matches!(source, DialectType::BigQuery)
6672 && !matches!(target, DialectType::BigQuery)
6673 {
6674 Action::BigQueryCastFormat
6675 } else {
6676 Action::None
6677 }
6678 }
6679 Expression::TryCast(ref c) => {
6680 if matches!(&c.to, DataType::Json)
6681 && matches!(source, DialectType::DuckDB)
6682 && matches!(
6683 target,
6684 DialectType::Presto | DialectType::Trino | DialectType::Athena
6685 )
6686 {
6687 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
6688 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
6689 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
6690 // to preserve DuckDB's parse-or-null semantics.
6691 Action::DuckDBTryCastJsonToTryJsonParse
6692 } else {
6693 Action::None
6694 }
6695 }
6696 // For DuckDB: DATE_TRUNC should preserve the input type
6697 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6698 if matches!(source, DialectType::Snowflake)
6699 && matches!(target, DialectType::DuckDB)
6700 {
6701 Action::DateTruncWrapCast
6702 } else {
6703 Action::None
6704 }
6705 }
6706 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6707 Expression::SetStatement(s) => {
6708 if matches!(target, DialectType::DuckDB)
6709 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6710 && s.items.iter().any(|item| item.kind.is_none())
6711 {
6712 Action::SetToVariable
6713 } else {
6714 Action::None
6715 }
6716 }
6717 // Cross-dialect NULL ordering normalization.
6718 // When nulls_first is not specified, fill in the source dialect's implied
6719 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6720 Expression::Ordered(o) => {
6721 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6722 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6723 Action::MysqlNullsOrdering
6724 } else {
6725 // Skip targets that don't support NULLS FIRST/LAST syntax
6726 let target_supports_nulls = !matches!(
6727 target,
6728 DialectType::MySQL
6729 | DialectType::TSQL
6730 | DialectType::StarRocks
6731 | DialectType::Doris
6732 );
6733 if o.nulls_first.is_none() && source != target && target_supports_nulls
6734 {
6735 Action::NullsOrdering
6736 } else {
6737 Action::None
6738 }
6739 }
6740 }
6741 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6742 Expression::DataType(dt) => {
6743 if matches!(source, DialectType::BigQuery)
6744 && !matches!(target, DialectType::BigQuery)
6745 {
6746 match dt {
6747 DataType::Custom { ref name }
6748 if name.eq_ignore_ascii_case("INT64")
6749 || name.eq_ignore_ascii_case("FLOAT64")
6750 || name.eq_ignore_ascii_case("BOOL")
6751 || name.eq_ignore_ascii_case("BYTES")
6752 || name.eq_ignore_ascii_case("NUMERIC")
6753 || name.eq_ignore_ascii_case("STRING")
6754 || name.eq_ignore_ascii_case("DATETIME") =>
6755 {
6756 Action::BigQueryCastType
6757 }
6758 _ => Action::None,
6759 }
6760 } else if matches!(source, DialectType::TSQL) {
6761 // For TSQL source -> any target (including TSQL itself for REAL)
6762 match dt {
6763 // REAL -> FLOAT even for TSQL->TSQL
6764 DataType::Custom { ref name }
6765 if name.eq_ignore_ascii_case("REAL") =>
6766 {
6767 Action::TSQLTypeNormalize
6768 }
6769 DataType::Float {
6770 real_spelling: true,
6771 ..
6772 } => Action::TSQLTypeNormalize,
6773 // Other TSQL type normalizations only for non-TSQL targets
6774 DataType::Custom { ref name }
6775 if !matches!(target, DialectType::TSQL)
6776 && (name.eq_ignore_ascii_case("MONEY")
6777 || name.eq_ignore_ascii_case("SMALLMONEY")
6778 || name.eq_ignore_ascii_case("DATETIME2")
6779 || name.eq_ignore_ascii_case("IMAGE")
6780 || name.eq_ignore_ascii_case("BIT")
6781 || name.eq_ignore_ascii_case("ROWVERSION")
6782 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6783 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6784 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
6785 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
6786 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
6787 {
6788 Action::TSQLTypeNormalize
6789 }
6790 DataType::Float {
6791 precision: Some(_), ..
6792 } if !matches!(target, DialectType::TSQL) => {
6793 Action::TSQLTypeNormalize
6794 }
6795 DataType::TinyInt { .. }
6796 if !matches!(target, DialectType::TSQL) =>
6797 {
6798 Action::TSQLTypeNormalize
6799 }
6800 // INTEGER -> INT for Databricks/Spark targets
6801 DataType::Int {
6802 integer_spelling: true,
6803 ..
6804 } if matches!(
6805 target,
6806 DialectType::Databricks | DialectType::Spark
6807 ) =>
6808 {
6809 Action::TSQLTypeNormalize
6810 }
6811 _ => Action::None,
6812 }
6813 } else if (matches!(source, DialectType::Oracle)
6814 || matches!(source, DialectType::Generic))
6815 && !matches!(target, DialectType::Oracle)
6816 {
6817 match dt {
6818 DataType::Custom { ref name }
6819 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
6820 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
6821 || name.eq_ignore_ascii_case("VARCHAR2")
6822 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6823 {
6824 Action::OracleVarchar2ToVarchar
6825 }
6826 _ => Action::None,
6827 }
6828 } else if matches!(target, DialectType::Snowflake)
6829 && !matches!(source, DialectType::Snowflake)
6830 {
6831 // When target is Snowflake but source is NOT Snowflake,
6832 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6833 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6834 // should keep their FLOAT spelling.
6835 match dt {
6836 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6837 _ => Action::None,
6838 }
6839 } else {
6840 Action::None
6841 }
6842 }
6843 // LOWER patterns from BigQuery TO_HEX conversions:
6844 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6845 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6846 Expression::Lower(uf) => {
6847 if matches!(source, DialectType::BigQuery) {
6848 match &uf.this {
6849 Expression::Lower(_) => Action::BigQueryToHexLower,
6850 Expression::Function(f)
6851 if f.name == "TO_HEX"
6852 && matches!(target, DialectType::BigQuery) =>
6853 {
6854 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6855 Action::BigQueryToHexLower
6856 }
6857 _ => Action::None,
6858 }
6859 } else {
6860 Action::None
6861 }
6862 }
6863 // UPPER patterns from BigQuery TO_HEX conversions:
6864 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6865 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6866 Expression::Upper(uf) => {
6867 if matches!(source, DialectType::BigQuery) {
6868 match &uf.this {
6869 Expression::Lower(_) => Action::BigQueryToHexUpper,
6870 _ => Action::None,
6871 }
6872 } else {
6873 Action::None
6874 }
6875 }
6876 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6877 // Snowflake supports LAST_DAY with unit, so keep it there
6878 Expression::LastDay(ld) => {
6879 if matches!(source, DialectType::BigQuery)
6880 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6881 && ld.unit.is_some()
6882 {
6883 Action::BigQueryLastDayStripUnit
6884 } else {
6885 Action::None
6886 }
6887 }
6888 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6889 Expression::SafeDivide(_) => {
6890 if matches!(source, DialectType::BigQuery)
6891 && !matches!(target, DialectType::BigQuery)
6892 {
6893 Action::BigQuerySafeDivide
6894 } else {
6895 Action::None
6896 }
6897 }
6898 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6899 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6900 Expression::AnyValue(ref agg) => {
6901 if matches!(source, DialectType::BigQuery)
6902 && matches!(target, DialectType::DuckDB)
6903 && agg.having_max.is_some()
6904 {
6905 Action::BigQueryAnyValueHaving
6906 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6907 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6908 && agg.ignore_nulls.is_none()
6909 {
6910 Action::AnyValueIgnoreNulls
6911 } else {
6912 Action::None
6913 }
6914 }
6915 Expression::Any(ref q) => {
6916 if matches!(source, DialectType::PostgreSQL)
6917 && matches!(
6918 target,
6919 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6920 )
6921 && q.op.is_some()
6922 && !matches!(
6923 q.subquery,
6924 Expression::Select(_) | Expression::Subquery(_)
6925 )
6926 {
6927 Action::AnyToExists
6928 } else {
6929 Action::None
6930 }
6931 }
6932 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6933 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
6934 Expression::RegexpLike(_)
6935 if matches!(source, DialectType::Snowflake)
6936 && matches!(target, DialectType::DuckDB) =>
6937 {
6938 Action::RlikeSnowflakeToDuckDB
6939 }
6940 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
6941 Expression::RegexpLike(_)
6942 if !matches!(source, DialectType::DuckDB)
6943 && matches!(target, DialectType::DuckDB) =>
6944 {
6945 Action::RegexpLikeToDuckDB
6946 }
6947 // RegexpLike -> Exasol: anchor pattern with .*...*
6948 Expression::RegexpLike(_)
6949 if matches!(target, DialectType::Exasol) =>
6950 {
6951 Action::RegexpLikeExasolAnchor
6952 }
6953 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6954 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6955 Expression::Div(ref op)
6956 if matches!(
6957 source,
6958 DialectType::MySQL
6959 | DialectType::DuckDB
6960 | DialectType::SingleStore
6961 | DialectType::TiDB
6962 | DialectType::ClickHouse
6963 | DialectType::Doris
6964 ) && matches!(
6965 target,
6966 DialectType::PostgreSQL
6967 | DialectType::Redshift
6968 | DialectType::Drill
6969 | DialectType::Trino
6970 | DialectType::Presto
6971 | DialectType::Athena
6972 | DialectType::TSQL
6973 | DialectType::Teradata
6974 | DialectType::SQLite
6975 | DialectType::BigQuery
6976 | DialectType::Snowflake
6977 | DialectType::Databricks
6978 | DialectType::Oracle
6979 | DialectType::Materialize
6980 | DialectType::RisingWave
6981 ) =>
6982 {
6983 // Only wrap if RHS is not already NULLIF
6984 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6985 {
6986 Action::MySQLSafeDivide
6987 } else {
6988 Action::None
6989 }
6990 }
6991 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6992 // For TSQL/Fabric, convert to sp_rename instead
6993 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6994 if let Some(crate::expressions::AlterTableAction::RenameTable(
6995 ref new_tbl,
6996 )) = at.actions.first()
6997 {
6998 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6999 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
7000 Action::AlterTableToSpRename
7001 } else if new_tbl.schema.is_some()
7002 && matches!(
7003 target,
7004 DialectType::BigQuery
7005 | DialectType::Doris
7006 | DialectType::StarRocks
7007 | DialectType::DuckDB
7008 | DialectType::PostgreSQL
7009 | DialectType::Redshift
7010 )
7011 {
7012 Action::AlterTableRenameStripSchema
7013 } else {
7014 Action::None
7015 }
7016 } else {
7017 Action::None
7018 }
7019 }
7020 // EPOCH(x) expression -> target-specific epoch conversion
7021 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
7022 Action::EpochConvert
7023 }
7024 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
7025 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
7026 Action::EpochMsConvert
7027 }
7028 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
7029 Expression::StringAgg(_) => {
7030 if matches!(
7031 target,
7032 DialectType::MySQL
7033 | DialectType::SingleStore
7034 | DialectType::Doris
7035 | DialectType::StarRocks
7036 | DialectType::SQLite
7037 ) {
7038 Action::StringAggConvert
7039 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7040 Action::StringAggConvert
7041 } else {
7042 Action::None
7043 }
7044 }
7045 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
7046 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
7047 Expression::GroupConcat(_) => Action::GroupConcatConvert,
7048 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
7049 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
7050 Expression::Cardinality(_)
7051 if matches!(source, DialectType::DuckDB)
7052 && matches!(target, DialectType::DuckDB) =>
7053 {
7054 Action::None
7055 }
7056 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
7057 Action::ArrayLengthConvert
7058 }
7059 Expression::ArraySize(_) => {
7060 if matches!(target, DialectType::Drill) {
7061 Action::ArraySizeDrill
7062 } else {
7063 Action::ArrayLengthConvert
7064 }
7065 }
7066 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
7067 Expression::ArrayRemove(_) => match target {
7068 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
7069 Action::ArrayRemoveConvert
7070 }
7071 _ => Action::None,
7072 },
7073 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
7074 Expression::ArrayReverse(_) => match target {
7075 DialectType::ClickHouse => Action::ArrayReverseConvert,
7076 _ => Action::None,
7077 },
7078 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
7079 Expression::JsonKeys(_) => match target {
7080 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
7081 Action::JsonKeysConvert
7082 }
7083 _ => Action::None,
7084 },
7085 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
7086 Expression::ParseJson(_) => match target {
7087 DialectType::SQLite
7088 | DialectType::Doris
7089 | DialectType::MySQL
7090 | DialectType::StarRocks => Action::ParseJsonStrip,
7091 _ => Action::None,
7092 },
7093 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
7094 Expression::WeekOfYear(_)
7095 if matches!(target, DialectType::Snowflake)
7096 && !matches!(source, DialectType::Snowflake) =>
7097 {
7098 Action::WeekOfYearToWeekIso
7099 }
7100 // NVL: clear original_name so generator uses dialect-specific function names
7101 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
7102 // XOR: expand for dialects that don't support the XOR keyword
7103 Expression::Xor(_) => {
7104 let target_supports_xor = matches!(
7105 target,
7106 DialectType::MySQL
7107 | DialectType::SingleStore
7108 | DialectType::Doris
7109 | DialectType::StarRocks
7110 );
7111 if !target_supports_xor {
7112 Action::XorExpand
7113 } else {
7114 Action::None
7115 }
7116 }
7117 // TSQL #table -> temp table normalization (CREATE TABLE)
7118 Expression::CreateTable(ct)
7119 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7120 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7121 && ct.name.name.name.starts_with('#') =>
7122 {
7123 Action::TempTableHash
7124 }
7125 // TSQL #table -> strip # from table references in SELECT/etc.
7126 Expression::Table(tr)
7127 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7128 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7129 && tr.name.name.starts_with('#') =>
7130 {
7131 Action::TempTableHash
7132 }
7133 // TSQL #table -> strip # from DROP TABLE names
7134 Expression::DropTable(ref dt)
7135 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7136 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7137 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
7138 {
7139 Action::TempTableHash
7140 }
7141 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7142 Expression::JsonExtract(_)
7143 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7144 {
7145 Action::JsonExtractToTsql
7146 }
7147 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7148 Expression::JsonExtractScalar(_)
7149 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7150 {
7151 Action::JsonExtractToTsql
7152 }
7153 // JSON_EXTRACT -> JSONExtractString for ClickHouse
7154 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
7155 Action::JsonExtractToClickHouse
7156 }
7157 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
7158 Expression::JsonExtractScalar(_)
7159 if matches!(target, DialectType::ClickHouse) =>
7160 {
7161 Action::JsonExtractToClickHouse
7162 }
7163 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
7164 Expression::JsonExtract(ref f)
7165 if !f.arrow_syntax
7166 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
7167 {
7168 Action::JsonExtractToArrow
7169 }
7170 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
7171 Expression::JsonExtract(ref f)
7172 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
7173 && !matches!(
7174 source,
7175 DialectType::PostgreSQL
7176 | DialectType::Redshift
7177 | DialectType::Materialize
7178 )
7179 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
7180 {
7181 Action::JsonExtractToGetJsonObject
7182 }
7183 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
7184 Expression::JsonExtract(_)
7185 if matches!(
7186 target,
7187 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7188 ) =>
7189 {
7190 Action::JsonExtractToGetJsonObject
7191 }
7192 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
7193 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
7194 Expression::JsonExtractScalar(ref f)
7195 if !f.arrow_syntax
7196 && !f.hash_arrow_syntax
7197 && matches!(
7198 target,
7199 DialectType::PostgreSQL
7200 | DialectType::Redshift
7201 | DialectType::Snowflake
7202 | DialectType::SQLite
7203 | DialectType::DuckDB
7204 ) =>
7205 {
7206 Action::JsonExtractScalarConvert
7207 }
7208 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
7209 Expression::JsonExtractScalar(_)
7210 if matches!(
7211 target,
7212 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7213 ) =>
7214 {
7215 Action::JsonExtractScalarToGetJsonObject
7216 }
7217 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
7218 Expression::JsonExtract(ref f)
7219 if !f.arrow_syntax
7220 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
7221 {
7222 Action::JsonPathNormalize
7223 }
7224 // JsonQuery (parsed JSON_QUERY) -> target-specific
7225 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
7226 // JsonValue (parsed JSON_VALUE) -> target-specific
7227 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
7228 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
7229 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
7230 Expression::AtTimeZone(_)
7231 if matches!(
7232 target,
7233 DialectType::Presto
7234 | DialectType::Trino
7235 | DialectType::Athena
7236 | DialectType::Spark
7237 | DialectType::Databricks
7238 | DialectType::BigQuery
7239 | DialectType::Snowflake
7240 ) =>
7241 {
7242 Action::AtTimeZoneConvert
7243 }
7244 // DAY_OF_WEEK -> dialect-specific
7245 Expression::DayOfWeek(_)
7246 if matches!(
7247 target,
7248 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
7249 ) =>
7250 {
7251 Action::DayOfWeekConvert
7252 }
7253 // CURRENT_USER -> CURRENT_USER() for Snowflake
7254 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
7255 Action::CurrentUserParens
7256 }
7257 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
7258 Expression::ElementAt(_)
7259 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
7260 {
7261 Action::ElementAtConvert
7262 }
7263 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
7264 Expression::ArrayFunc(ref arr)
7265 if !arr.bracket_notation
7266 && matches!(
7267 target,
7268 DialectType::Spark
7269 | DialectType::Databricks
7270 | DialectType::Hive
7271 | DialectType::BigQuery
7272 | DialectType::DuckDB
7273 | DialectType::Snowflake
7274 | DialectType::Presto
7275 | DialectType::Trino
7276 | DialectType::Athena
7277 | DialectType::ClickHouse
7278 | DialectType::StarRocks
7279 ) =>
7280 {
7281 Action::ArraySyntaxConvert
7282 }
7283 // VARIANCE expression -> varSamp for ClickHouse
7284 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
7285 Action::VarianceToClickHouse
7286 }
7287 // STDDEV expression -> stddevSamp for ClickHouse
7288 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
7289 Action::StddevToClickHouse
7290 }
7291 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
7292 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
7293 Action::ApproxQuantileConvert
7294 }
7295 // MonthsBetween -> target-specific
7296 Expression::MonthsBetween(_)
7297 if !matches!(
7298 target,
7299 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7300 ) =>
7301 {
7302 Action::MonthsBetweenConvert
7303 }
7304 // AddMonths -> target-specific DATEADD/DATE_ADD
7305 Expression::AddMonths(_) => Action::AddMonthsConvert,
7306 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
7307 Expression::MapFromArrays(_)
7308 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
7309 {
7310 Action::MapFromArraysConvert
7311 }
7312 // CURRENT_USER -> CURRENT_USER() for Spark
7313 Expression::CurrentUser(_)
7314 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
7315 {
7316 Action::CurrentUserSparkParens
7317 }
7318 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
7319 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
7320 if matches!(
7321 source,
7322 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7323 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7324 && matches!(
7325 target,
7326 DialectType::DuckDB
7327 | DialectType::Presto
7328 | DialectType::Trino
7329 | DialectType::Athena
7330 | DialectType::PostgreSQL
7331 | DialectType::Redshift
7332 ) =>
7333 {
7334 Action::SparkDateFuncCast
7335 }
7336 // $parameter -> @parameter for BigQuery
7337 Expression::Parameter(ref p)
7338 if matches!(target, DialectType::BigQuery)
7339 && matches!(source, DialectType::DuckDB)
7340 && (p.style == crate::expressions::ParameterStyle::Dollar
7341 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
7342 {
7343 Action::DollarParamConvert
7344 }
7345 // EscapeString literal: normalize literal newlines to \n
7346 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
7347 =>
7348 {
7349 Action::EscapeStringNormalize
7350 }
7351 // straight_join: keep lowercase for DuckDB, quote for MySQL
7352 Expression::Column(ref col)
7353 if col.name.name == "STRAIGHT_JOIN"
7354 && col.table.is_none()
7355 && matches!(source, DialectType::DuckDB)
7356 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
7357 {
7358 Action::StraightJoinCase
7359 }
7360 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7361 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7362 Expression::Interval(ref iv)
7363 if matches!(
7364 target,
7365 DialectType::Snowflake
7366 | DialectType::PostgreSQL
7367 | DialectType::Redshift
7368 ) && iv.unit.is_some()
7369 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
7370 {
7371 Action::SnowflakeIntervalFormat
7372 }
7373 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7374 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7375 if let Some(ref sample) = ts.sample {
7376 if !sample.explicit_method {
7377 Action::TablesampleReservoir
7378 } else {
7379 Action::None
7380 }
7381 } else {
7382 Action::None
7383 }
7384 }
7385 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7386 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7387 Expression::TableSample(ref ts)
7388 if matches!(target, DialectType::Snowflake)
7389 && !matches!(source, DialectType::Snowflake)
7390 && ts.sample.is_some() =>
7391 {
7392 if let Some(ref sample) = ts.sample {
7393 if !sample.explicit_method {
7394 Action::TablesampleSnowflakeStrip
7395 } else {
7396 Action::None
7397 }
7398 } else {
7399 Action::None
7400 }
7401 }
7402 Expression::Table(ref t)
7403 if matches!(target, DialectType::Snowflake)
7404 && !matches!(source, DialectType::Snowflake)
7405 && t.table_sample.is_some() =>
7406 {
7407 if let Some(ref sample) = t.table_sample {
7408 if !sample.explicit_method {
7409 Action::TablesampleSnowflakeStrip
7410 } else {
7411 Action::None
7412 }
7413 } else {
7414 Action::None
7415 }
7416 }
7417 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7418 Expression::AlterTable(ref at)
7419 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7420 && !at.actions.is_empty()
7421 && matches!(
7422 at.actions.first(),
7423 Some(crate::expressions::AlterTableAction::RenameTable(_))
7424 ) =>
7425 {
7426 Action::AlterTableToSpRename
7427 }
7428 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7429 Expression::Subscript(ref sub)
7430 if matches!(
7431 target,
7432 DialectType::BigQuery
7433 | DialectType::Hive
7434 | DialectType::Spark
7435 | DialectType::Databricks
7436 ) && matches!(
7437 source,
7438 DialectType::DuckDB
7439 | DialectType::PostgreSQL
7440 | DialectType::Presto
7441 | DialectType::Trino
7442 | DialectType::Redshift
7443 | DialectType::ClickHouse
7444 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
7445 {
7446 Action::ArrayIndexConvert
7447 }
7448 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7449 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7450 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7451 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7452 Expression::WindowFunction(ref wf) => {
7453 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7454 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7455 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7456 if matches!(target, DialectType::BigQuery)
7457 && !is_row_number
7458 && !wf.over.order_by.is_empty()
7459 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7460 {
7461 Action::BigQueryNullsOrdering
7462 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7463 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7464 } else {
7465 let source_nulls_last = matches!(source, DialectType::DuckDB);
7466 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7467 matches!(
7468 f.kind,
7469 crate::expressions::WindowFrameKind::Range
7470 | crate::expressions::WindowFrameKind::Groups
7471 )
7472 });
7473 if source_nulls_last
7474 && matches!(target, DialectType::MySQL)
7475 && !wf.over.order_by.is_empty()
7476 && wf.over.order_by.iter().any(|o| !o.desc)
7477 && !has_range_frame
7478 {
7479 Action::MysqlNullsLastRewrite
7480 } else {
7481 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
7482 let is_ranking_window_func = matches!(
7483 &wf.this,
7484 Expression::FirstValue(_)
7485 | Expression::LastValue(_)
7486 | Expression::NthValue(_)
7487 );
7488 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
7489 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
7490 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
7491 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
7492 && f.exclude.is_none()
7493 });
7494 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
7495 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
7496 // Strip the default frame for Snowflake target
7497 Action::SnowflakeWindowFrameStrip
7498 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
7499 // Add default frame for non-Snowflake target
7500 Action::SnowflakeWindowFrameAdd
7501 } else {
7502 match &wf.this {
7503 Expression::FirstValue(ref vf)
7504 | Expression::LastValue(ref vf)
7505 if vf.ignore_nulls == Some(false) =>
7506 {
7507 match target {
7508 DialectType::SQLite => Action::RespectNullsConvert,
7509 _ => Action::None,
7510 }
7511 }
7512 _ => Action::None,
7513 }
7514 }
7515 } else {
7516 match &wf.this {
7517 Expression::FirstValue(ref vf)
7518 | Expression::LastValue(ref vf)
7519 if vf.ignore_nulls == Some(false) =>
7520 {
7521 // RESPECT NULLS
7522 match target {
7523 DialectType::SQLite | DialectType::PostgreSQL => {
7524 Action::RespectNullsConvert
7525 }
7526 _ => Action::None,
7527 }
7528 }
7529 _ => Action::None,
7530 }
7531 }
7532 }
7533 }
7534 }
7535 // CREATE TABLE a LIKE b -> dialect-specific transformations
7536 Expression::CreateTable(ref ct)
7537 if ct.columns.is_empty()
7538 && ct.constraints.iter().any(|c| {
7539 matches!(c, crate::expressions::TableConstraint::Like { .. })
7540 })
7541 && matches!(
7542 target,
7543 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7544 ) =>
7545 {
7546 Action::CreateTableLikeToCtas
7547 }
7548 Expression::CreateTable(ref ct)
7549 if ct.columns.is_empty()
7550 && ct.constraints.iter().any(|c| {
7551 matches!(c, crate::expressions::TableConstraint::Like { .. })
7552 })
7553 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7554 {
7555 Action::CreateTableLikeToSelectInto
7556 }
7557 Expression::CreateTable(ref ct)
7558 if ct.columns.is_empty()
7559 && ct.constraints.iter().any(|c| {
7560 matches!(c, crate::expressions::TableConstraint::Like { .. })
7561 })
7562 && matches!(target, DialectType::ClickHouse) =>
7563 {
7564 Action::CreateTableLikeToAs
7565 }
7566 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7567 Expression::CreateTable(ref ct)
7568 if matches!(target, DialectType::DuckDB)
7569 && matches!(
7570 source,
7571 DialectType::DuckDB
7572 | DialectType::Spark
7573 | DialectType::Databricks
7574 | DialectType::Hive
7575 ) =>
7576 {
7577 let has_comment = ct.columns.iter().any(|c| {
7578 c.comment.is_some()
7579 || c.constraints.iter().any(|con| {
7580 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7581 })
7582 });
7583 let has_props = !ct.properties.is_empty();
7584 if has_comment || has_props {
7585 Action::CreateTableStripComment
7586 } else {
7587 Action::None
7588 }
7589 }
7590 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7591 Expression::Array(_)
7592 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7593 {
7594 Action::ArrayConcatBracketConvert
7595 }
7596 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7597 Expression::ArrayFunc(ref arr)
7598 if arr.bracket_notation
7599 && matches!(source, DialectType::BigQuery)
7600 && matches!(target, DialectType::Redshift) =>
7601 {
7602 Action::ArrayConcatBracketConvert
7603 }
7604 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7605 Expression::BitwiseOrAgg(ref f)
7606 | Expression::BitwiseAndAgg(ref f)
7607 | Expression::BitwiseXorAgg(ref f) => {
7608 if matches!(target, DialectType::DuckDB) {
7609 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7610 if let Expression::Cast(ref c) = f.this {
7611 match &c.to {
7612 DataType::Float { .. }
7613 | DataType::Double { .. }
7614 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7615 DataType::Custom { ref name }
7616 if name.eq_ignore_ascii_case("REAL") =>
7617 {
7618 Action::BitAggFloatCast
7619 }
7620 _ => Action::None,
7621 }
7622 } else {
7623 Action::None
7624 }
7625 } else if matches!(target, DialectType::Snowflake) {
7626 Action::BitAggSnowflakeRename
7627 } else {
7628 Action::None
7629 }
7630 }
7631 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7632 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7633 Action::FilterToIff
7634 }
7635 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7636 Expression::Avg(ref f)
7637 | Expression::Sum(ref f)
7638 | Expression::Min(ref f)
7639 | Expression::Max(ref f)
7640 | Expression::CountIf(ref f)
7641 | Expression::Stddev(ref f)
7642 | Expression::StddevPop(ref f)
7643 | Expression::StddevSamp(ref f)
7644 | Expression::Variance(ref f)
7645 | Expression::VarPop(ref f)
7646 | Expression::VarSamp(ref f)
7647 | Expression::Median(ref f)
7648 | Expression::Mode(ref f)
7649 | Expression::First(ref f)
7650 | Expression::Last(ref f)
7651 | Expression::ApproxDistinct(ref f)
7652 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7653 {
7654 Action::AggFilterToIff
7655 }
7656 Expression::Count(ref c)
7657 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7658 {
7659 Action::AggFilterToIff
7660 }
7661 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7662 Expression::Count(ref c)
7663 if c.distinct
7664 && matches!(&c.this, Some(Expression::Tuple(_)))
7665 && matches!(
7666 target,
7667 DialectType::Presto
7668 | DialectType::Trino
7669 | DialectType::DuckDB
7670 | DialectType::PostgreSQL
7671 ) =>
7672 {
7673 Action::CountDistinctMultiArg
7674 }
7675 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7676 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7677 Action::JsonToGetPath
7678 }
7679 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7680 Expression::Struct(_)
7681 if matches!(
7682 target,
7683 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7684 ) && matches!(source, DialectType::DuckDB) =>
7685 {
7686 Action::StructToRow
7687 }
7688 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7689 Expression::MapFunc(ref m)
7690 if m.curly_brace_syntax
7691 && matches!(
7692 target,
7693 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7694 )
7695 && matches!(source, DialectType::DuckDB) =>
7696 {
7697 Action::StructToRow
7698 }
7699 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7700 Expression::ApproxCountDistinct(_)
7701 if matches!(
7702 target,
7703 DialectType::Presto | DialectType::Trino | DialectType::Athena
7704 ) =>
7705 {
7706 Action::ApproxCountDistinctToApproxDistinct
7707 }
7708 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7709 Expression::ArrayContains(_)
7710 if matches!(
7711 target,
7712 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7713 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7714 {
7715 Action::ArrayContainsConvert
7716 }
7717 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7718 Expression::ArrayContains(_)
7719 if matches!(target, DialectType::DuckDB)
7720 && matches!(source, DialectType::Snowflake) =>
7721 {
7722 Action::ArrayContainsDuckDBConvert
7723 }
7724 // ARRAY_EXCEPT -> target-specific conversion
7725 Expression::ArrayExcept(_)
7726 if matches!(
7727 target,
7728 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7729 ) =>
7730 {
7731 Action::ArrayExceptConvert
7732 }
7733 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
7734 Expression::ArrayPosition(_)
7735 if matches!(target, DialectType::Snowflake)
7736 && !matches!(source, DialectType::Snowflake) =>
7737 {
7738 Action::ArrayPositionSnowflakeSwap
7739 }
7740 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
7741 Expression::ArrayPosition(_)
7742 if matches!(target, DialectType::DuckDB)
7743 && matches!(source, DialectType::Snowflake) =>
7744 {
7745 Action::SnowflakeArrayPositionToDuckDB
7746 }
7747 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7748 Expression::ArrayDistinct(_)
7749 if matches!(target, DialectType::ClickHouse) =>
7750 {
7751 Action::ArrayDistinctClickHouse
7752 }
7753 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7754 Expression::ArrayDistinct(_)
7755 if matches!(target, DialectType::DuckDB)
7756 && matches!(source, DialectType::Snowflake) =>
7757 {
7758 Action::ArrayDistinctConvert
7759 }
7760 // StrPosition with position -> complex expansion for Presto/DuckDB
7761 // STRPOS doesn't support a position arg in these dialects
7762 Expression::StrPosition(ref sp)
7763 if sp.position.is_some()
7764 && matches!(
7765 target,
7766 DialectType::Presto
7767 | DialectType::Trino
7768 | DialectType::Athena
7769 | DialectType::DuckDB
7770 ) =>
7771 {
7772 Action::StrPositionExpand
7773 }
7774 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7775 Expression::First(ref f)
7776 if f.ignore_nulls == Some(true)
7777 && matches!(target, DialectType::DuckDB) =>
7778 {
7779 Action::FirstToAnyValue
7780 }
7781 // BEGIN -> START TRANSACTION for Presto/Trino
7782 Expression::Command(ref cmd)
7783 if cmd.this.eq_ignore_ascii_case("BEGIN")
7784 && matches!(
7785 target,
7786 DialectType::Presto | DialectType::Trino | DialectType::Athena
7787 ) =>
7788 {
7789 // Handled inline below
7790 Action::None // We'll handle it directly
7791 }
7792 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7793 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7794 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7795 Expression::Concat(ref _op)
7796 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7797 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7798 {
7799 Action::PipeConcatToConcat
7800 }
7801 _ => Action::None,
7802 }
7803 };
7804
7805 match action {
7806 Action::None => {
7807 // Handle inline transforms that don't need a dedicated action
7808
7809 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7810 if let Expression::Between(ref b) = e {
7811 if let Some(sym) = b.symmetric {
7812 let keeps_symmetric =
7813 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7814 if !keeps_symmetric {
7815 if sym {
7816 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7817 let b = if let Expression::Between(b) = e {
7818 *b
7819 } else {
7820 unreachable!()
7821 };
7822 let between1 = Expression::Between(Box::new(
7823 crate::expressions::Between {
7824 this: b.this.clone(),
7825 low: b.low.clone(),
7826 high: b.high.clone(),
7827 not: b.not,
7828 symmetric: None,
7829 },
7830 ));
7831 let between2 = Expression::Between(Box::new(
7832 crate::expressions::Between {
7833 this: b.this,
7834 low: b.high,
7835 high: b.low,
7836 not: b.not,
7837 symmetric: None,
7838 },
7839 ));
7840 return Ok(Expression::Paren(Box::new(
7841 crate::expressions::Paren {
7842 this: Expression::Or(Box::new(
7843 crate::expressions::BinaryOp::new(
7844 between1, between2,
7845 ),
7846 )),
7847 trailing_comments: vec![],
7848 },
7849 )));
7850 } else {
7851 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7852 let b = if let Expression::Between(b) = e {
7853 *b
7854 } else {
7855 unreachable!()
7856 };
7857 return Ok(Expression::Between(Box::new(
7858 crate::expressions::Between {
7859 this: b.this,
7860 low: b.low,
7861 high: b.high,
7862 not: b.not,
7863 symmetric: None,
7864 },
7865 )));
7866 }
7867 }
7868 }
7869 }
7870
7871 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7872 if let Expression::ILike(ref _like) = e {
7873 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7874 let like = if let Expression::ILike(l) = e {
7875 *l
7876 } else {
7877 unreachable!()
7878 };
7879 let lower_left = Expression::Function(Box::new(Function::new(
7880 "LOWER".to_string(),
7881 vec![like.left],
7882 )));
7883 let lower_right = Expression::Function(Box::new(Function::new(
7884 "LOWER".to_string(),
7885 vec![like.right],
7886 )));
7887 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7888 left: lower_left,
7889 right: lower_right,
7890 escape: like.escape,
7891 quantifier: like.quantifier,
7892 inferred_type: None,
7893 })));
7894 }
7895 }
7896
7897 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7898 if let Expression::MethodCall(ref mc) = e {
7899 if matches!(source, DialectType::Oracle)
7900 && mc.method.name.eq_ignore_ascii_case("VALUE")
7901 && mc.args.is_empty()
7902 {
7903 let is_dbms_random = match &mc.this {
7904 Expression::Identifier(id) => {
7905 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7906 }
7907 Expression::Column(col) => {
7908 col.table.is_none()
7909 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7910 }
7911 _ => false,
7912 };
7913 if is_dbms_random {
7914 let func_name = match target {
7915 DialectType::PostgreSQL
7916 | DialectType::Redshift
7917 | DialectType::DuckDB
7918 | DialectType::SQLite => "RANDOM",
7919 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7920 _ => "RAND",
7921 };
7922 return Ok(Expression::Function(Box::new(Function::new(
7923 func_name.to_string(),
7924 vec![],
7925 ))));
7926 }
7927 }
7928 }
7929 // TRIM without explicit position -> add BOTH for ClickHouse
7930 if let Expression::Trim(ref trim) = e {
7931 if matches!(target, DialectType::ClickHouse)
7932 && trim.sql_standard_syntax
7933 && trim.characters.is_some()
7934 && !trim.position_explicit
7935 {
7936 let mut new_trim = (**trim).clone();
7937 new_trim.position_explicit = true;
7938 return Ok(Expression::Trim(Box::new(new_trim)));
7939 }
7940 }
7941 // BEGIN -> START TRANSACTION for Presto/Trino
7942 if let Expression::Transaction(ref txn) = e {
7943 if matches!(
7944 target,
7945 DialectType::Presto | DialectType::Trino | DialectType::Athena
7946 ) {
7947 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7948 let mut txn = txn.clone();
7949 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7950 "START".to_string(),
7951 ))));
7952 return Ok(Expression::Transaction(Box::new(*txn)));
7953 }
7954 }
7955 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7956 if matches!(
7957 target,
7958 DialectType::Presto | DialectType::Trino | DialectType::Athena
7959 ) {
7960 match &e {
7961 Expression::IsTrue(itf) if !itf.not => {
7962 // x IS TRUE -> x
7963 return Ok(itf.this.clone());
7964 }
7965 Expression::IsTrue(itf) if itf.not => {
7966 // x IS NOT TRUE -> NOT x
7967 return Ok(Expression::Not(Box::new(
7968 crate::expressions::UnaryOp {
7969 this: itf.this.clone(),
7970 inferred_type: None,
7971 },
7972 )));
7973 }
7974 Expression::IsFalse(itf) if !itf.not => {
7975 // x IS FALSE -> NOT x
7976 return Ok(Expression::Not(Box::new(
7977 crate::expressions::UnaryOp {
7978 this: itf.this.clone(),
7979 inferred_type: None,
7980 },
7981 )));
7982 }
7983 Expression::IsFalse(itf) if itf.not => {
7984 // x IS NOT FALSE -> NOT NOT x
7985 let not_x =
7986 Expression::Not(Box::new(crate::expressions::UnaryOp {
7987 this: itf.this.clone(),
7988 inferred_type: None,
7989 }));
7990 return Ok(Expression::Not(Box::new(
7991 crate::expressions::UnaryOp {
7992 this: not_x,
7993 inferred_type: None,
7994 },
7995 )));
7996 }
7997 _ => {}
7998 }
7999 }
8000 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
8001 if matches!(target, DialectType::Redshift) {
8002 if let Expression::IsFalse(ref itf) = e {
8003 if itf.not {
8004 return Ok(Expression::Not(Box::new(
8005 crate::expressions::UnaryOp {
8006 this: Expression::IsFalse(Box::new(
8007 crate::expressions::IsTrueFalse {
8008 this: itf.this.clone(),
8009 not: false,
8010 },
8011 )),
8012 inferred_type: None,
8013 },
8014 )));
8015 }
8016 }
8017 }
8018 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
8019 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
8020 if let Expression::Function(ref f) = e {
8021 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
8022 && matches!(source, DialectType::Snowflake)
8023 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
8024 {
8025 if f.args.len() == 3 {
8026 let mut args = f.args.clone();
8027 args.push(Expression::string("g"));
8028 return Ok(Expression::Function(Box::new(Function::new(
8029 "REGEXP_REPLACE".to_string(),
8030 args,
8031 ))));
8032 } else if f.args.len() == 4 {
8033 // 4th arg might be position, add 'g' as 5th
8034 let mut args = f.args.clone();
8035 args.push(Expression::string("g"));
8036 return Ok(Expression::Function(Box::new(Function::new(
8037 "REGEXP_REPLACE".to_string(),
8038 args,
8039 ))));
8040 }
8041 }
8042 }
8043 Ok(e)
8044 }
8045
8046 Action::GreatestLeastNull => {
8047 let f = if let Expression::Function(f) = e {
8048 *f
8049 } else {
8050 unreachable!("action only triggered for Function expressions")
8051 };
8052 let mut null_checks: Vec<Expression> = f
8053 .args
8054 .iter()
8055 .map(|a| {
8056 Expression::IsNull(Box::new(IsNull {
8057 this: a.clone(),
8058 not: false,
8059 postfix_form: false,
8060 }))
8061 })
8062 .collect();
8063 let condition = if null_checks.len() == 1 {
8064 null_checks.remove(0)
8065 } else {
8066 let first = null_checks.remove(0);
8067 null_checks.into_iter().fold(first, |acc, check| {
8068 Expression::Or(Box::new(BinaryOp::new(acc, check)))
8069 })
8070 };
8071 Ok(Expression::Case(Box::new(Case {
8072 operand: None,
8073 whens: vec![(condition, Expression::Null(Null))],
8074 else_: Some(Expression::Function(Box::new(Function::new(
8075 f.name, f.args,
8076 )))),
8077 comments: Vec::new(),
8078 inferred_type: None,
8079 })))
8080 }
8081
8082 Action::ArrayGenerateRange => {
8083 let f = if let Expression::Function(f) = e {
8084 *f
8085 } else {
8086 unreachable!("action only triggered for Function expressions")
8087 };
8088 let start = f.args[0].clone();
8089 let end = f.args[1].clone();
8090 let step = f.args.get(2).cloned();
8091
8092 // Helper: compute end - 1 for converting exclusive→inclusive end.
8093 // When end is a literal number, simplify to a computed literal.
8094 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
8095 // Try to simplify literal numbers
8096 match end {
8097 Expression::Literal(lit)
8098 if matches!(lit.as_ref(), Literal::Number(_)) =>
8099 {
8100 let Literal::Number(n) = lit.as_ref() else {
8101 unreachable!()
8102 };
8103 if let Ok(val) = n.parse::<i64>() {
8104 return Expression::number(val - 1);
8105 }
8106 }
8107 Expression::Neg(u) => {
8108 if let Expression::Literal(lit) = &u.this {
8109 if let Literal::Number(n) = lit.as_ref() {
8110 if let Ok(val) = n.parse::<i64>() {
8111 return Expression::number(-val - 1);
8112 }
8113 }
8114 }
8115 }
8116 _ => {}
8117 }
8118 // Non-literal: produce end - 1 expression
8119 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
8120 }
8121
8122 match target {
8123 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
8124 // so no adjustment needed — just rename the function.
8125 DialectType::Snowflake => {
8126 let mut args = vec![start, end];
8127 if let Some(s) = step {
8128 args.push(s);
8129 }
8130 Ok(Expression::Function(Box::new(Function::new(
8131 "ARRAY_GENERATE_RANGE".to_string(),
8132 args,
8133 ))))
8134 }
8135 DialectType::DuckDB => {
8136 let mut args = vec![start, end];
8137 if let Some(s) = step {
8138 args.push(s);
8139 }
8140 Ok(Expression::Function(Box::new(Function::new(
8141 "RANGE".to_string(),
8142 args,
8143 ))))
8144 }
8145 // These dialects use inclusive end, so convert exclusive→inclusive.
8146 // Presto/Trino: simplify literal numbers (3 → 2).
8147 DialectType::Presto | DialectType::Trino => {
8148 let end_inclusive = exclusive_to_inclusive_end(&end);
8149 let mut args = vec![start, end_inclusive];
8150 if let Some(s) = step {
8151 args.push(s);
8152 }
8153 Ok(Expression::Function(Box::new(Function::new(
8154 "SEQUENCE".to_string(),
8155 args,
8156 ))))
8157 }
8158 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
8159 DialectType::PostgreSQL | DialectType::Redshift => {
8160 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8161 end.clone(),
8162 Expression::number(1),
8163 )));
8164 let mut args = vec![start, end_minus_1];
8165 if let Some(s) = step {
8166 args.push(s);
8167 }
8168 Ok(Expression::Function(Box::new(Function::new(
8169 "GENERATE_SERIES".to_string(),
8170 args,
8171 ))))
8172 }
8173 DialectType::BigQuery => {
8174 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8175 end.clone(),
8176 Expression::number(1),
8177 )));
8178 let mut args = vec![start, end_minus_1];
8179 if let Some(s) = step {
8180 args.push(s);
8181 }
8182 Ok(Expression::Function(Box::new(Function::new(
8183 "GENERATE_ARRAY".to_string(),
8184 args,
8185 ))))
8186 }
8187 _ => Ok(Expression::Function(Box::new(Function::new(
8188 f.name, f.args,
8189 )))),
8190 }
8191 }
8192
8193 Action::Div0TypedDivision => {
8194 let if_func = if let Expression::IfFunc(f) = e {
8195 *f
8196 } else {
8197 unreachable!("action only triggered for IfFunc expressions")
8198 };
8199 if let Some(Expression::Div(div)) = if_func.false_value {
8200 let cast_type = if matches!(target, DialectType::SQLite) {
8201 DataType::Float {
8202 precision: None,
8203 scale: None,
8204 real_spelling: true,
8205 }
8206 } else {
8207 DataType::Double {
8208 precision: None,
8209 scale: None,
8210 }
8211 };
8212 let casted_left = Expression::Cast(Box::new(Cast {
8213 this: div.left,
8214 to: cast_type,
8215 trailing_comments: vec![],
8216 double_colon_syntax: false,
8217 format: None,
8218 default: None,
8219 inferred_type: None,
8220 }));
8221 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8222 condition: if_func.condition,
8223 true_value: if_func.true_value,
8224 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
8225 casted_left,
8226 div.right,
8227 )))),
8228 original_name: if_func.original_name,
8229 inferred_type: None,
8230 })))
8231 } else {
8232 // Not actually a Div, reconstruct
8233 Ok(Expression::IfFunc(Box::new(if_func)))
8234 }
8235 }
8236
8237 Action::ArrayAggCollectList => {
8238 let agg = if let Expression::ArrayAgg(a) = e {
8239 *a
8240 } else {
8241 unreachable!("action only triggered for ArrayAgg expressions")
8242 };
8243 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8244 name: Some("COLLECT_LIST".to_string()),
8245 ..agg
8246 })))
8247 }
8248
8249 Action::ArrayAggToGroupConcat => {
8250 let agg = if let Expression::ArrayAgg(a) = e {
8251 *a
8252 } else {
8253 unreachable!("action only triggered for ArrayAgg expressions")
8254 };
8255 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8256 name: Some("GROUP_CONCAT".to_string()),
8257 ..agg
8258 })))
8259 }
8260
8261 Action::ArrayAggWithinGroupFilter => {
8262 let wg = if let Expression::WithinGroup(w) = e {
8263 *w
8264 } else {
8265 unreachable!("action only triggered for WithinGroup expressions")
8266 };
8267 if let Expression::ArrayAgg(inner_agg) = wg.this {
8268 let col = inner_agg.this.clone();
8269 let filter = Expression::IsNull(Box::new(IsNull {
8270 this: col,
8271 not: true,
8272 postfix_form: false,
8273 }));
8274 // For DuckDB, add explicit NULLS FIRST for DESC ordering
8275 let order_by = if matches!(target, DialectType::DuckDB) {
8276 wg.order_by
8277 .into_iter()
8278 .map(|mut o| {
8279 if o.desc && o.nulls_first.is_none() {
8280 o.nulls_first = Some(true);
8281 }
8282 o
8283 })
8284 .collect()
8285 } else {
8286 wg.order_by
8287 };
8288 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8289 this: inner_agg.this,
8290 distinct: inner_agg.distinct,
8291 filter: Some(filter),
8292 order_by,
8293 name: inner_agg.name,
8294 ignore_nulls: inner_agg.ignore_nulls,
8295 having_max: inner_agg.having_max,
8296 limit: inner_agg.limit,
8297 inferred_type: None,
8298 })))
8299 } else {
8300 Ok(Expression::WithinGroup(Box::new(wg)))
8301 }
8302 }
8303
8304 Action::ArrayAggFilter => {
8305 let agg = if let Expression::ArrayAgg(a) = e {
8306 *a
8307 } else {
8308 unreachable!("action only triggered for ArrayAgg expressions")
8309 };
8310 let col = agg.this.clone();
8311 let filter = Expression::IsNull(Box::new(IsNull {
8312 this: col,
8313 not: true,
8314 postfix_form: false,
8315 }));
8316 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8317 filter: Some(filter),
8318 ..agg
8319 })))
8320 }
8321
8322 Action::ArrayAggNullFilter => {
8323 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
8324 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
8325 let agg = if let Expression::ArrayAgg(a) = e {
8326 *a
8327 } else {
8328 unreachable!("action only triggered for ArrayAgg expressions")
8329 };
8330 let col = agg.this.clone();
8331 let not_null = Expression::IsNull(Box::new(IsNull {
8332 this: col,
8333 not: true,
8334 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
8335 }));
8336 let new_filter = if let Some(existing_filter) = agg.filter {
8337 // AND the NOT IS NULL with existing filter
8338 Expression::And(Box::new(crate::expressions::BinaryOp::new(
8339 existing_filter,
8340 not_null,
8341 )))
8342 } else {
8343 not_null
8344 };
8345 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8346 filter: Some(new_filter),
8347 ..agg
8348 })))
8349 }
8350
8351 Action::BigQueryArraySelectAsStructToSnowflake => {
8352 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
8353 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
8354 if let Expression::Function(mut f) = e {
8355 let is_match = f.args.len() == 1
8356 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
8357 if is_match {
8358 let inner_select = match f.args.remove(0) {
8359 Expression::Select(s) => *s,
8360 _ => unreachable!(
8361 "argument already verified to be a Select expression"
8362 ),
8363 };
8364 // Build OBJECT_CONSTRUCT args from SELECT expressions
8365 let mut oc_args = Vec::new();
8366 for expr in &inner_select.expressions {
8367 match expr {
8368 Expression::Alias(a) => {
8369 let key = Expression::Literal(Box::new(Literal::String(
8370 a.alias.name.clone(),
8371 )));
8372 let value = a.this.clone();
8373 oc_args.push(key);
8374 oc_args.push(value);
8375 }
8376 Expression::Column(c) => {
8377 let key = Expression::Literal(Box::new(Literal::String(
8378 c.name.name.clone(),
8379 )));
8380 oc_args.push(key);
8381 oc_args.push(expr.clone());
8382 }
8383 _ => {
8384 oc_args.push(expr.clone());
8385 }
8386 }
8387 }
8388 let object_construct = Expression::Function(Box::new(Function::new(
8389 "OBJECT_CONSTRUCT".to_string(),
8390 oc_args,
8391 )));
8392 let array_agg = Expression::Function(Box::new(Function::new(
8393 "ARRAY_AGG".to_string(),
8394 vec![object_construct],
8395 )));
8396 let mut new_select = crate::expressions::Select::new();
8397 new_select.expressions = vec![array_agg];
8398 new_select.from = inner_select.from.clone();
8399 new_select.where_clause = inner_select.where_clause.clone();
8400 new_select.group_by = inner_select.group_by.clone();
8401 new_select.having = inner_select.having.clone();
8402 new_select.joins = inner_select.joins.clone();
8403 Ok(Expression::Subquery(Box::new(
8404 crate::expressions::Subquery {
8405 this: Expression::Select(Box::new(new_select)),
8406 alias: None,
8407 column_aliases: Vec::new(),
8408 order_by: None,
8409 limit: None,
8410 offset: None,
8411 distribute_by: None,
8412 sort_by: None,
8413 cluster_by: None,
8414 lateral: false,
8415 modifiers_inside: false,
8416 trailing_comments: Vec::new(),
8417 inferred_type: None,
8418 },
8419 )))
8420 } else {
8421 Ok(Expression::Function(f))
8422 }
8423 } else {
8424 Ok(e)
8425 }
8426 }
8427
8428 Action::BigQueryPercentileContToDuckDB => {
8429 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
8430 if let Expression::AggregateFunction(mut af) = e {
8431 af.name = "QUANTILE_CONT".to_string();
8432 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
8433 // Keep only first 2 args
8434 if af.args.len() > 2 {
8435 af.args.truncate(2);
8436 }
8437 Ok(Expression::AggregateFunction(af))
8438 } else {
8439 Ok(e)
8440 }
8441 }
8442
8443 Action::ArrayAggIgnoreNullsDuckDB => {
8444 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
8445 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
8446 let mut agg = if let Expression::ArrayAgg(a) = e {
8447 *a
8448 } else {
8449 unreachable!("action only triggered for ArrayAgg expressions")
8450 };
8451 agg.ignore_nulls = None; // Strip IGNORE NULLS
8452 if !agg.order_by.is_empty() {
8453 agg.order_by[0].nulls_first = Some(true);
8454 }
8455 Ok(Expression::ArrayAgg(Box::new(agg)))
8456 }
8457
8458 Action::CountDistinctMultiArg => {
8459 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8460 if let Expression::Count(c) = e {
8461 if let Some(Expression::Tuple(t)) = c.this {
8462 let args = t.expressions;
8463 // Build CASE expression:
8464 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8465 let mut whens = Vec::new();
8466 for arg in &args {
8467 whens.push((
8468 Expression::IsNull(Box::new(IsNull {
8469 this: arg.clone(),
8470 not: false,
8471 postfix_form: false,
8472 })),
8473 Expression::Null(crate::expressions::Null),
8474 ));
8475 }
8476 // Build the tuple for ELSE
8477 let tuple_expr =
8478 Expression::Tuple(Box::new(crate::expressions::Tuple {
8479 expressions: args,
8480 }));
8481 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8482 operand: None,
8483 whens,
8484 else_: Some(tuple_expr),
8485 comments: Vec::new(),
8486 inferred_type: None,
8487 }));
8488 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8489 this: Some(case_expr),
8490 star: false,
8491 distinct: true,
8492 filter: c.filter,
8493 ignore_nulls: c.ignore_nulls,
8494 original_name: c.original_name,
8495 inferred_type: None,
8496 })))
8497 } else {
8498 Ok(Expression::Count(c))
8499 }
8500 } else {
8501 Ok(e)
8502 }
8503 }
8504
8505 Action::CastTimestampToDatetime => {
8506 let c = if let Expression::Cast(c) = e {
8507 *c
8508 } else {
8509 unreachable!("action only triggered for Cast expressions")
8510 };
8511 Ok(Expression::Cast(Box::new(Cast {
8512 to: DataType::Custom {
8513 name: "DATETIME".to_string(),
8514 },
8515 ..c
8516 })))
8517 }
8518
8519 Action::CastTimestampStripTz => {
8520 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8521 let c = if let Expression::Cast(c) = e {
8522 *c
8523 } else {
8524 unreachable!("action only triggered for Cast expressions")
8525 };
8526 Ok(Expression::Cast(Box::new(Cast {
8527 to: DataType::Timestamp {
8528 precision: None,
8529 timezone: false,
8530 },
8531 ..c
8532 })))
8533 }
8534
8535 Action::CastTimestamptzToFunc => {
8536 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8537 let c = if let Expression::Cast(c) = e {
8538 *c
8539 } else {
8540 unreachable!("action only triggered for Cast expressions")
8541 };
8542 Ok(Expression::Function(Box::new(Function::new(
8543 "TIMESTAMP".to_string(),
8544 vec![c.this],
8545 ))))
8546 }
8547
8548 Action::ToDateToCast => {
8549 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8550 if let Expression::Function(f) = e {
8551 let arg = f.args.into_iter().next().unwrap();
8552 Ok(Expression::Cast(Box::new(Cast {
8553 this: arg,
8554 to: DataType::Date,
8555 double_colon_syntax: false,
8556 trailing_comments: vec![],
8557 format: None,
8558 default: None,
8559 inferred_type: None,
8560 })))
8561 } else {
8562 Ok(e)
8563 }
8564 }
8565 Action::DateTruncWrapCast => {
8566 // Handle both Expression::DateTrunc/TimestampTrunc and
8567 // Expression::Function("DATE_TRUNC", [unit, expr])
8568 match e {
8569 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8570 let input_type = match &d.this {
8571 Expression::Cast(c) => Some(c.to.clone()),
8572 _ => None,
8573 };
8574 if let Some(cast_type) = input_type {
8575 let is_time = matches!(cast_type, DataType::Time { .. });
8576 if is_time {
8577 let date_expr = Expression::Cast(Box::new(Cast {
8578 this: Expression::Literal(Box::new(
8579 crate::expressions::Literal::String(
8580 "1970-01-01".to_string(),
8581 ),
8582 )),
8583 to: DataType::Date,
8584 double_colon_syntax: false,
8585 trailing_comments: vec![],
8586 format: None,
8587 default: None,
8588 inferred_type: None,
8589 }));
8590 let add_expr =
8591 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8592 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8593 this: add_expr,
8594 unit: d.unit,
8595 }));
8596 Ok(Expression::Cast(Box::new(Cast {
8597 this: inner,
8598 to: cast_type,
8599 double_colon_syntax: false,
8600 trailing_comments: vec![],
8601 format: None,
8602 default: None,
8603 inferred_type: None,
8604 })))
8605 } else {
8606 let inner = Expression::DateTrunc(Box::new(*d));
8607 Ok(Expression::Cast(Box::new(Cast {
8608 this: inner,
8609 to: cast_type,
8610 double_colon_syntax: false,
8611 trailing_comments: vec![],
8612 format: None,
8613 default: None,
8614 inferred_type: None,
8615 })))
8616 }
8617 } else {
8618 Ok(Expression::DateTrunc(d))
8619 }
8620 }
8621 Expression::Function(f) if f.args.len() == 2 => {
8622 // Function-based DATE_TRUNC(unit, expr)
8623 let input_type = match &f.args[1] {
8624 Expression::Cast(c) => Some(c.to.clone()),
8625 _ => None,
8626 };
8627 if let Some(cast_type) = input_type {
8628 let is_time = matches!(cast_type, DataType::Time { .. });
8629 if is_time {
8630 let date_expr = Expression::Cast(Box::new(Cast {
8631 this: Expression::Literal(Box::new(
8632 crate::expressions::Literal::String(
8633 "1970-01-01".to_string(),
8634 ),
8635 )),
8636 to: DataType::Date,
8637 double_colon_syntax: false,
8638 trailing_comments: vec![],
8639 format: None,
8640 default: None,
8641 inferred_type: None,
8642 }));
8643 let mut args = f.args;
8644 let unit_arg = args.remove(0);
8645 let time_expr = args.remove(0);
8646 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8647 date_expr, time_expr,
8648 )));
8649 let inner = Expression::Function(Box::new(Function::new(
8650 "DATE_TRUNC".to_string(),
8651 vec![unit_arg, add_expr],
8652 )));
8653 Ok(Expression::Cast(Box::new(Cast {
8654 this: inner,
8655 to: cast_type,
8656 double_colon_syntax: false,
8657 trailing_comments: vec![],
8658 format: None,
8659 default: None,
8660 inferred_type: None,
8661 })))
8662 } else {
8663 // Wrap the function in CAST
8664 Ok(Expression::Cast(Box::new(Cast {
8665 this: Expression::Function(f),
8666 to: cast_type,
8667 double_colon_syntax: false,
8668 trailing_comments: vec![],
8669 format: None,
8670 default: None,
8671 inferred_type: None,
8672 })))
8673 }
8674 } else {
8675 Ok(Expression::Function(f))
8676 }
8677 }
8678 other => Ok(other),
8679 }
8680 }
8681
8682 Action::RegexpReplaceSnowflakeToDuckDB => {
8683 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8684 if let Expression::Function(f) = e {
8685 let mut args = f.args;
8686 let subject = args.remove(0);
8687 let pattern = args.remove(0);
8688 let replacement = args.remove(0);
8689 Ok(Expression::Function(Box::new(Function::new(
8690 "REGEXP_REPLACE".to_string(),
8691 vec![
8692 subject,
8693 pattern,
8694 replacement,
8695 Expression::Literal(Box::new(crate::expressions::Literal::String(
8696 "g".to_string(),
8697 ))),
8698 ],
8699 ))))
8700 } else {
8701 Ok(e)
8702 }
8703 }
8704
8705 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8706 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8707 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8708 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8709 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8710 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8711 if let Expression::Function(f) = e {
8712 let mut args = f.args;
8713 let subject = args.remove(0);
8714 let pattern = args.remove(0);
8715 let replacement = args.remove(0);
8716 let position = args.remove(0);
8717 let occurrence = args.remove(0);
8718
8719 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8720 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8721 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8722
8723 if is_pos_1 && is_occ_1 {
8724 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8725 Ok(Expression::Function(Box::new(Function::new(
8726 "REGEXP_REPLACE".to_string(),
8727 vec![subject, pattern, replacement],
8728 ))))
8729 } else if is_pos_1 && is_occ_0 {
8730 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8731 Ok(Expression::Function(Box::new(Function::new(
8732 "REGEXP_REPLACE".to_string(),
8733 vec![
8734 subject,
8735 pattern,
8736 replacement,
8737 Expression::Literal(Box::new(Literal::String("g".to_string()))),
8738 ],
8739 ))))
8740 } else {
8741 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8742 // Pre-compute pos-1 when position is a numeric literal
8743 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
8744 if let Literal::Number(ref n) = lit.as_ref() {
8745 if let Ok(val) = n.parse::<i64>() {
8746 Expression::number(val - 1)
8747 } else {
8748 Expression::Sub(Box::new(BinaryOp::new(
8749 position.clone(),
8750 Expression::number(1),
8751 )))
8752 }
8753 } else {
8754 position.clone()
8755 }
8756 } else {
8757 Expression::Sub(Box::new(BinaryOp::new(
8758 position.clone(),
8759 Expression::number(1),
8760 )))
8761 };
8762 let prefix = Expression::Function(Box::new(Function::new(
8763 "SUBSTRING".to_string(),
8764 vec![subject.clone(), Expression::number(1), pos_minus_1],
8765 )));
8766 let suffix_subject = Expression::Function(Box::new(Function::new(
8767 "SUBSTRING".to_string(),
8768 vec![subject, position],
8769 )));
8770 let mut replace_args = vec![suffix_subject, pattern, replacement];
8771 if is_occ_0 {
8772 replace_args.push(Expression::Literal(Box::new(Literal::String(
8773 "g".to_string(),
8774 ))));
8775 }
8776 let replace_expr = Expression::Function(Box::new(Function::new(
8777 "REGEXP_REPLACE".to_string(),
8778 replace_args,
8779 )));
8780 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8781 this: Box::new(prefix),
8782 expression: Box::new(replace_expr),
8783 safe: None,
8784 })))
8785 }
8786 } else {
8787 Ok(e)
8788 }
8789 }
8790
8791 Action::RegexpSubstrSnowflakeToDuckDB => {
8792 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8793 if let Expression::Function(f) = e {
8794 let mut args = f.args;
8795 let arg_count = args.len();
8796 match arg_count {
8797 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8798 0..=2 => Ok(Expression::Function(Box::new(Function::new(
8799 "REGEXP_EXTRACT".to_string(),
8800 args,
8801 )))),
8802 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8803 3 => {
8804 let subject = args.remove(0);
8805 let pattern = args.remove(0);
8806 let position = args.remove(0);
8807 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8808 if is_pos_1 {
8809 Ok(Expression::Function(Box::new(Function::new(
8810 "REGEXP_EXTRACT".to_string(),
8811 vec![subject, pattern],
8812 ))))
8813 } else {
8814 let substring_expr =
8815 Expression::Function(Box::new(Function::new(
8816 "SUBSTRING".to_string(),
8817 vec![subject, position],
8818 )));
8819 let nullif_expr =
8820 Expression::Function(Box::new(Function::new(
8821 "NULLIF".to_string(),
8822 vec![
8823 substring_expr,
8824 Expression::Literal(Box::new(Literal::String(
8825 String::new(),
8826 ))),
8827 ],
8828 )));
8829 Ok(Expression::Function(Box::new(Function::new(
8830 "REGEXP_EXTRACT".to_string(),
8831 vec![nullif_expr, pattern],
8832 ))))
8833 }
8834 }
8835 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8836 4 => {
8837 let subject = args.remove(0);
8838 let pattern = args.remove(0);
8839 let position = args.remove(0);
8840 let occurrence = args.remove(0);
8841 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8842 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8843
8844 let effective_subject = if is_pos_1 {
8845 subject
8846 } else {
8847 let substring_expr =
8848 Expression::Function(Box::new(Function::new(
8849 "SUBSTRING".to_string(),
8850 vec![subject, position],
8851 )));
8852 Expression::Function(Box::new(Function::new(
8853 "NULLIF".to_string(),
8854 vec![
8855 substring_expr,
8856 Expression::Literal(Box::new(Literal::String(
8857 String::new(),
8858 ))),
8859 ],
8860 )))
8861 };
8862
8863 if is_occ_1 {
8864 Ok(Expression::Function(Box::new(Function::new(
8865 "REGEXP_EXTRACT".to_string(),
8866 vec![effective_subject, pattern],
8867 ))))
8868 } else {
8869 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8870 let extract_all =
8871 Expression::Function(Box::new(Function::new(
8872 "REGEXP_EXTRACT_ALL".to_string(),
8873 vec![effective_subject, pattern],
8874 )));
8875 Ok(Expression::Function(Box::new(Function::new(
8876 "ARRAY_EXTRACT".to_string(),
8877 vec![extract_all, occurrence],
8878 ))))
8879 }
8880 }
8881 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8882 5 => {
8883 let subject = args.remove(0);
8884 let pattern = args.remove(0);
8885 let _position = args.remove(0);
8886 let _occurrence = args.remove(0);
8887 let _flags = args.remove(0);
8888 // Strip 'e' flag, convert to REGEXP_EXTRACT
8889 Ok(Expression::Function(Box::new(Function::new(
8890 "REGEXP_EXTRACT".to_string(),
8891 vec![subject, pattern],
8892 ))))
8893 }
8894 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
8895 _ => {
8896 let subject = args.remove(0);
8897 let pattern = args.remove(0);
8898 let _position = args.remove(0);
8899 let _occurrence = args.remove(0);
8900 let _flags = args.remove(0);
8901 let group = args.remove(0);
8902 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8903 if is_group_0 {
8904 // Strip group=0 (default)
8905 Ok(Expression::Function(Box::new(Function::new(
8906 "REGEXP_EXTRACT".to_string(),
8907 vec![subject, pattern],
8908 ))))
8909 } else {
8910 Ok(Expression::Function(Box::new(Function::new(
8911 "REGEXP_EXTRACT".to_string(),
8912 vec![subject, pattern, group],
8913 ))))
8914 }
8915 }
8916 }
8917 } else {
8918 Ok(e)
8919 }
8920 }
8921
8922 Action::RegexpSubstrSnowflakeIdentity => {
8923 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
8924 // Strip trailing group=0
8925 if let Expression::Function(f) = e {
8926 let func_name = f.name.clone();
8927 let mut args = f.args;
8928 if args.len() == 6 {
8929 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8930 if is_group_0 {
8931 args.truncate(5);
8932 }
8933 }
8934 Ok(Expression::Function(Box::new(Function::new(
8935 func_name, args,
8936 ))))
8937 } else {
8938 Ok(e)
8939 }
8940 }
8941
8942 Action::RegexpSubstrAllSnowflakeToDuckDB => {
8943 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8944 if let Expression::Function(f) = e {
8945 let mut args = f.args;
8946 let arg_count = args.len();
8947 match arg_count {
8948 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
8949 0..=2 => Ok(Expression::Function(Box::new(Function::new(
8950 "REGEXP_EXTRACT_ALL".to_string(),
8951 args,
8952 )))),
8953 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
8954 3 => {
8955 let subject = args.remove(0);
8956 let pattern = args.remove(0);
8957 let position = args.remove(0);
8958 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8959 if is_pos_1 {
8960 Ok(Expression::Function(Box::new(Function::new(
8961 "REGEXP_EXTRACT_ALL".to_string(),
8962 vec![subject, pattern],
8963 ))))
8964 } else {
8965 let substring_expr =
8966 Expression::Function(Box::new(Function::new(
8967 "SUBSTRING".to_string(),
8968 vec![subject, position],
8969 )));
8970 Ok(Expression::Function(Box::new(Function::new(
8971 "REGEXP_EXTRACT_ALL".to_string(),
8972 vec![substring_expr, pattern],
8973 ))))
8974 }
8975 }
8976 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
8977 4 => {
8978 let subject = args.remove(0);
8979 let pattern = args.remove(0);
8980 let position = args.remove(0);
8981 let occurrence = args.remove(0);
8982 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8983 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8984
8985 let effective_subject = if is_pos_1 {
8986 subject
8987 } else {
8988 Expression::Function(Box::new(Function::new(
8989 "SUBSTRING".to_string(),
8990 vec![subject, position],
8991 )))
8992 };
8993
8994 if is_occ_1 {
8995 Ok(Expression::Function(Box::new(Function::new(
8996 "REGEXP_EXTRACT_ALL".to_string(),
8997 vec![effective_subject, pattern],
8998 ))))
8999 } else {
9000 // REGEXP_EXTRACT_ALL(s, p)[occ:]
9001 let extract_all =
9002 Expression::Function(Box::new(Function::new(
9003 "REGEXP_EXTRACT_ALL".to_string(),
9004 vec![effective_subject, pattern],
9005 )));
9006 Ok(Expression::ArraySlice(Box::new(
9007 crate::expressions::ArraySlice {
9008 this: extract_all,
9009 start: Some(occurrence),
9010 end: None,
9011 },
9012 )))
9013 }
9014 }
9015 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
9016 5 => {
9017 let subject = args.remove(0);
9018 let pattern = args.remove(0);
9019 let _position = args.remove(0);
9020 let _occurrence = args.remove(0);
9021 let _flags = args.remove(0);
9022 Ok(Expression::Function(Box::new(Function::new(
9023 "REGEXP_EXTRACT_ALL".to_string(),
9024 vec![subject, pattern],
9025 ))))
9026 }
9027 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
9028 _ => {
9029 let subject = args.remove(0);
9030 let pattern = args.remove(0);
9031 let _position = args.remove(0);
9032 let _occurrence = args.remove(0);
9033 let _flags = args.remove(0);
9034 let group = args.remove(0);
9035 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9036 if is_group_0 {
9037 Ok(Expression::Function(Box::new(Function::new(
9038 "REGEXP_EXTRACT_ALL".to_string(),
9039 vec![subject, pattern],
9040 ))))
9041 } else {
9042 Ok(Expression::Function(Box::new(Function::new(
9043 "REGEXP_EXTRACT_ALL".to_string(),
9044 vec![subject, pattern, group],
9045 ))))
9046 }
9047 }
9048 }
9049 } else {
9050 Ok(e)
9051 }
9052 }
9053
9054 Action::RegexpCountSnowflakeToDuckDB => {
9055 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
9056 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9057 if let Expression::Function(f) = e {
9058 let mut args = f.args;
9059 let arg_count = args.len();
9060 let subject = args.remove(0);
9061 let pattern = args.remove(0);
9062
9063 // Handle position arg
9064 let effective_subject = if arg_count >= 3 {
9065 let position = args.remove(0);
9066 Expression::Function(Box::new(Function::new(
9067 "SUBSTRING".to_string(),
9068 vec![subject, position],
9069 )))
9070 } else {
9071 subject
9072 };
9073
9074 // Handle flags arg -> embed as (?flags) prefix in pattern
9075 let effective_pattern = if arg_count >= 4 {
9076 let flags = args.remove(0);
9077 match &flags {
9078 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
9079 {
9080 let Literal::String(f_str) = lit.as_ref() else {
9081 unreachable!()
9082 };
9083 // Always use concatenation: '(?flags)' || pattern
9084 let prefix = Expression::Literal(Box::new(Literal::String(
9085 format!("(?{})", f_str),
9086 )));
9087 Expression::DPipe(Box::new(crate::expressions::DPipe {
9088 this: Box::new(prefix),
9089 expression: Box::new(pattern.clone()),
9090 safe: None,
9091 }))
9092 }
9093 _ => pattern.clone(),
9094 }
9095 } else {
9096 pattern.clone()
9097 };
9098
9099 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9100 let extract_all = Expression::Function(Box::new(Function::new(
9101 "REGEXP_EXTRACT_ALL".to_string(),
9102 vec![effective_subject, effective_pattern.clone()],
9103 )));
9104 let length_expr =
9105 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9106 this: extract_all,
9107 original_name: None,
9108 inferred_type: None,
9109 }));
9110 let condition = Expression::Eq(Box::new(BinaryOp::new(
9111 effective_pattern,
9112 Expression::Literal(Box::new(Literal::String(String::new()))),
9113 )));
9114 Ok(Expression::Case(Box::new(Case {
9115 operand: None,
9116 whens: vec![(condition, Expression::number(0))],
9117 else_: Some(length_expr),
9118 comments: vec![],
9119 inferred_type: None,
9120 })))
9121 } else {
9122 Ok(e)
9123 }
9124 }
9125
9126 Action::RegexpInstrSnowflakeToDuckDB => {
9127 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
9128 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
9129 // WHEN p = '' THEN 0
9130 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9131 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
9132 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
9133 // + pos_offset
9134 // END
9135 if let Expression::Function(f) = e {
9136 let mut args = f.args;
9137 let subject = args.remove(0);
9138 let pattern = if !args.is_empty() {
9139 args.remove(0)
9140 } else {
9141 Expression::Literal(Box::new(Literal::String(String::new())))
9142 };
9143
9144 // Collect all original args for NULL checks
9145 let position = if !args.is_empty() {
9146 Some(args.remove(0))
9147 } else {
9148 None
9149 };
9150 let occurrence = if !args.is_empty() {
9151 Some(args.remove(0))
9152 } else {
9153 None
9154 };
9155 let option = if !args.is_empty() {
9156 Some(args.remove(0))
9157 } else {
9158 None
9159 };
9160 let flags = if !args.is_empty() {
9161 Some(args.remove(0))
9162 } else {
9163 None
9164 };
9165 let _group = if !args.is_empty() {
9166 Some(args.remove(0))
9167 } else {
9168 None
9169 };
9170
9171 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
9172 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
9173
9174 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
9175 let mut null_checks: Vec<Expression> = vec![
9176 Expression::Is(Box::new(BinaryOp::new(
9177 subject.clone(),
9178 Expression::Null(Null),
9179 ))),
9180 Expression::Is(Box::new(BinaryOp::new(
9181 pattern.clone(),
9182 Expression::Null(Null),
9183 ))),
9184 ];
9185 // Add NULL checks for all provided optional args
9186 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
9187 if let Some(arg) = opt_arg {
9188 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
9189 (*arg).clone(),
9190 Expression::Null(Null),
9191 ))));
9192 }
9193 }
9194 // Chain with OR
9195 let null_condition = null_checks
9196 .into_iter()
9197 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
9198 .unwrap();
9199
9200 // Effective subject (apply position offset)
9201 let effective_subject = if is_pos_1 {
9202 subject.clone()
9203 } else {
9204 let pos = position.clone().unwrap_or(Expression::number(1));
9205 Expression::Function(Box::new(Function::new(
9206 "SUBSTRING".to_string(),
9207 vec![subject.clone(), pos],
9208 )))
9209 };
9210
9211 // Effective pattern (apply flags if present)
9212 let effective_pattern = if let Some(ref fl) = flags {
9213 if let Expression::Literal(lit) = fl {
9214 if let Literal::String(f_str) = lit.as_ref() {
9215 if !f_str.is_empty() {
9216 let prefix = Expression::Literal(Box::new(
9217 Literal::String(format!("(?{})", f_str)),
9218 ));
9219 Expression::DPipe(Box::new(crate::expressions::DPipe {
9220 this: Box::new(prefix),
9221 expression: Box::new(pattern.clone()),
9222 safe: None,
9223 }))
9224 } else {
9225 pattern.clone()
9226 }
9227 } else {
9228 fl.clone()
9229 }
9230 } else {
9231 pattern.clone()
9232 }
9233 } else {
9234 pattern.clone()
9235 };
9236
9237 // WHEN pattern = '' THEN 0
9238 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
9239 effective_pattern.clone(),
9240 Expression::Literal(Box::new(Literal::String(String::new()))),
9241 )));
9242
9243 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9244 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
9245 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9246 this: Expression::Function(Box::new(Function::new(
9247 "REGEXP_EXTRACT_ALL".to_string(),
9248 vec![effective_subject.clone(), effective_pattern.clone()],
9249 ))),
9250 original_name: None,
9251 inferred_type: None,
9252 })),
9253 occurrence_expr.clone(),
9254 )));
9255
9256 // Helper: build LENGTH lambda for LIST_TRANSFORM
9257 let make_len_lambda = || {
9258 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9259 parameters: vec![crate::expressions::Identifier::new("x")],
9260 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
9261 this: Expression::Identifier(
9262 crate::expressions::Identifier::new("x"),
9263 ),
9264 original_name: None,
9265 inferred_type: None,
9266 })),
9267 colon: false,
9268 parameter_types: vec![],
9269 }))
9270 };
9271
9272 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
9273 let split_sliced =
9274 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9275 this: Expression::Function(Box::new(Function::new(
9276 "STRING_SPLIT_REGEX".to_string(),
9277 vec![effective_subject.clone(), effective_pattern.clone()],
9278 ))),
9279 start: Some(Expression::number(1)),
9280 end: Some(occurrence_expr.clone()),
9281 }));
9282 let split_sum = Expression::Function(Box::new(Function::new(
9283 "COALESCE".to_string(),
9284 vec![
9285 Expression::Function(Box::new(Function::new(
9286 "LIST_SUM".to_string(),
9287 vec![Expression::Function(Box::new(Function::new(
9288 "LIST_TRANSFORM".to_string(),
9289 vec![split_sliced, make_len_lambda()],
9290 )))],
9291 ))),
9292 Expression::number(0),
9293 ],
9294 )));
9295
9296 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
9297 let extract_sliced =
9298 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9299 this: Expression::Function(Box::new(Function::new(
9300 "REGEXP_EXTRACT_ALL".to_string(),
9301 vec![effective_subject.clone(), effective_pattern.clone()],
9302 ))),
9303 start: Some(Expression::number(1)),
9304 end: Some(Expression::Sub(Box::new(BinaryOp::new(
9305 occurrence_expr.clone(),
9306 Expression::number(1),
9307 )))),
9308 }));
9309 let extract_sum = Expression::Function(Box::new(Function::new(
9310 "COALESCE".to_string(),
9311 vec![
9312 Expression::Function(Box::new(Function::new(
9313 "LIST_SUM".to_string(),
9314 vec![Expression::Function(Box::new(Function::new(
9315 "LIST_TRANSFORM".to_string(),
9316 vec![extract_sliced, make_len_lambda()],
9317 )))],
9318 ))),
9319 Expression::number(0),
9320 ],
9321 )));
9322
9323 // Position offset: pos - 1 when pos > 1, else 0
9324 let pos_offset: Expression = if !is_pos_1 {
9325 let pos = position.clone().unwrap_or(Expression::number(1));
9326 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
9327 } else {
9328 Expression::number(0)
9329 };
9330
9331 // ELSE: 1 + split_sum + extract_sum + pos_offset
9332 let else_expr = Expression::Add(Box::new(BinaryOp::new(
9333 Expression::Add(Box::new(BinaryOp::new(
9334 Expression::Add(Box::new(BinaryOp::new(
9335 Expression::number(1),
9336 split_sum,
9337 ))),
9338 extract_sum,
9339 ))),
9340 pos_offset,
9341 )));
9342
9343 Ok(Expression::Case(Box::new(Case {
9344 operand: None,
9345 whens: vec![
9346 (null_condition, Expression::Null(Null)),
9347 (empty_pattern_check, Expression::number(0)),
9348 (match_count_check, Expression::number(0)),
9349 ],
9350 else_: Some(else_expr),
9351 comments: vec![],
9352 inferred_type: None,
9353 })))
9354 } else {
9355 Ok(e)
9356 }
9357 }
9358
9359 Action::RlikeSnowflakeToDuckDB => {
9360 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
9361 // Both do full-string matching, so no anchoring needed
9362 let (subject, pattern, flags) = match e {
9363 Expression::RegexpLike(ref rl) => {
9364 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
9365 }
9366 Expression::Function(ref f) if f.args.len() >= 2 => {
9367 let s = f.args[0].clone();
9368 let p = f.args[1].clone();
9369 let fl = f.args.get(2).cloned();
9370 (s, p, fl)
9371 }
9372 _ => return Ok(e),
9373 };
9374
9375 let mut result_args = vec![subject, pattern];
9376 if let Some(fl) = flags {
9377 result_args.push(fl);
9378 }
9379 Ok(Expression::Function(Box::new(Function::new(
9380 "REGEXP_FULL_MATCH".to_string(),
9381 result_args,
9382 ))))
9383 }
9384
9385 Action::RegexpExtractAllToSnowflake => {
9386 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
9387 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
9388 if let Expression::Function(f) = e {
9389 let mut args = f.args;
9390 if args.len() >= 2 {
9391 let str_expr = args.remove(0);
9392 let pattern = args.remove(0);
9393
9394 let has_groups = match &pattern {
9395 Expression::Literal(lit)
9396 if matches!(lit.as_ref(), Literal::String(_)) =>
9397 {
9398 let Literal::String(s) = lit.as_ref() else {
9399 unreachable!()
9400 };
9401 s.contains('(') && s.contains(')')
9402 }
9403 _ => false,
9404 };
9405
9406 if has_groups {
9407 Ok(Expression::Function(Box::new(Function::new(
9408 "REGEXP_SUBSTR_ALL".to_string(),
9409 vec![
9410 str_expr,
9411 pattern,
9412 Expression::number(1),
9413 Expression::number(1),
9414 Expression::Literal(Box::new(Literal::String(
9415 "c".to_string(),
9416 ))),
9417 Expression::number(1),
9418 ],
9419 ))))
9420 } else {
9421 Ok(Expression::Function(Box::new(Function::new(
9422 "REGEXP_SUBSTR_ALL".to_string(),
9423 vec![str_expr, pattern],
9424 ))))
9425 }
9426 } else {
9427 Ok(Expression::Function(Box::new(Function::new(
9428 "REGEXP_SUBSTR_ALL".to_string(),
9429 args,
9430 ))))
9431 }
9432 } else {
9433 Ok(e)
9434 }
9435 }
9436
9437 Action::SetToVariable => {
9438 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9439 if let Expression::SetStatement(mut s) = e {
9440 for item in &mut s.items {
9441 if item.kind.is_none() {
9442 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
9443 let already_variable = match &item.name {
9444 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
9445 _ => false,
9446 };
9447 if already_variable {
9448 // Extract the actual name and set kind
9449 if let Expression::Identifier(ref mut id) = item.name {
9450 let actual_name = id.name["VARIABLE ".len()..].to_string();
9451 id.name = actual_name;
9452 }
9453 }
9454 item.kind = Some("VARIABLE".to_string());
9455 }
9456 }
9457 Ok(Expression::SetStatement(s))
9458 } else {
9459 Ok(e)
9460 }
9461 }
9462
9463 Action::ConvertTimezoneToExpr => {
9464 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9465 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9466 if let Expression::Function(f) = e {
9467 if f.args.len() == 2 {
9468 let mut args = f.args;
9469 let target_tz = args.remove(0);
9470 let timestamp = args.remove(0);
9471 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9472 source_tz: None,
9473 target_tz: Some(Box::new(target_tz)),
9474 timestamp: Some(Box::new(timestamp)),
9475 options: vec![],
9476 })))
9477 } else if f.args.len() == 3 {
9478 let mut args = f.args;
9479 let source_tz = args.remove(0);
9480 let target_tz = args.remove(0);
9481 let timestamp = args.remove(0);
9482 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9483 source_tz: Some(Box::new(source_tz)),
9484 target_tz: Some(Box::new(target_tz)),
9485 timestamp: Some(Box::new(timestamp)),
9486 options: vec![],
9487 })))
9488 } else {
9489 Ok(Expression::Function(f))
9490 }
9491 } else {
9492 Ok(e)
9493 }
9494 }
9495
9496 Action::BigQueryCastType => {
9497 // Convert BigQuery types to standard SQL types
9498 if let Expression::DataType(dt) = e {
9499 match dt {
9500 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9501 Ok(Expression::DataType(DataType::BigInt { length: None }))
9502 }
9503 DataType::Custom { ref name }
9504 if name.eq_ignore_ascii_case("FLOAT64") =>
9505 {
9506 Ok(Expression::DataType(DataType::Double {
9507 precision: None,
9508 scale: None,
9509 }))
9510 }
9511 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9512 Ok(Expression::DataType(DataType::Boolean))
9513 }
9514 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9515 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9516 }
9517 DataType::Custom { ref name }
9518 if name.eq_ignore_ascii_case("NUMERIC") =>
9519 {
9520 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9521 // default precision (18, 3) being added to bare DECIMAL
9522 if matches!(target, DialectType::DuckDB) {
9523 Ok(Expression::DataType(DataType::Custom {
9524 name: "DECIMAL".to_string(),
9525 }))
9526 } else {
9527 Ok(Expression::DataType(DataType::Decimal {
9528 precision: None,
9529 scale: None,
9530 }))
9531 }
9532 }
9533 DataType::Custom { ref name }
9534 if name.eq_ignore_ascii_case("STRING") =>
9535 {
9536 Ok(Expression::DataType(DataType::String { length: None }))
9537 }
9538 DataType::Custom { ref name }
9539 if name.eq_ignore_ascii_case("DATETIME") =>
9540 {
9541 Ok(Expression::DataType(DataType::Timestamp {
9542 precision: None,
9543 timezone: false,
9544 }))
9545 }
9546 _ => Ok(Expression::DataType(dt)),
9547 }
9548 } else {
9549 Ok(e)
9550 }
9551 }
9552
9553 Action::BigQuerySafeDivide => {
9554 // Convert SafeDivide expression to IF/CASE form for most targets
9555 if let Expression::SafeDivide(sd) = e {
9556 let x = *sd.this;
9557 let y = *sd.expression;
9558 // Wrap x and y in parens if they're complex expressions
9559 let y_ref = match &y {
9560 Expression::Column(_)
9561 | Expression::Literal(_)
9562 | Expression::Identifier(_) => y.clone(),
9563 _ => Expression::Paren(Box::new(Paren {
9564 this: y.clone(),
9565 trailing_comments: vec![],
9566 })),
9567 };
9568 let x_ref = match &x {
9569 Expression::Column(_)
9570 | Expression::Literal(_)
9571 | Expression::Identifier(_) => x.clone(),
9572 _ => Expression::Paren(Box::new(Paren {
9573 this: x.clone(),
9574 trailing_comments: vec![],
9575 })),
9576 };
9577 let condition = Expression::Neq(Box::new(BinaryOp::new(
9578 y_ref.clone(),
9579 Expression::number(0),
9580 )));
9581 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9582
9583 if matches!(target, DialectType::Presto | DialectType::Trino) {
9584 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9585 let cast_x = Expression::Cast(Box::new(Cast {
9586 this: match &x {
9587 Expression::Column(_)
9588 | Expression::Literal(_)
9589 | Expression::Identifier(_) => x,
9590 _ => Expression::Paren(Box::new(Paren {
9591 this: x,
9592 trailing_comments: vec![],
9593 })),
9594 },
9595 to: DataType::Double {
9596 precision: None,
9597 scale: None,
9598 },
9599 trailing_comments: vec![],
9600 double_colon_syntax: false,
9601 format: None,
9602 default: None,
9603 inferred_type: None,
9604 }));
9605 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9606 cast_x,
9607 match &y {
9608 Expression::Column(_)
9609 | Expression::Literal(_)
9610 | Expression::Identifier(_) => y,
9611 _ => Expression::Paren(Box::new(Paren {
9612 this: y,
9613 trailing_comments: vec![],
9614 })),
9615 },
9616 )));
9617 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9618 condition,
9619 true_value: cast_div,
9620 false_value: Some(Expression::Null(Null)),
9621 original_name: None,
9622 inferred_type: None,
9623 })))
9624 } else if matches!(target, DialectType::PostgreSQL) {
9625 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9626 let cast_x = Expression::Cast(Box::new(Cast {
9627 this: match &x {
9628 Expression::Column(_)
9629 | Expression::Literal(_)
9630 | Expression::Identifier(_) => x,
9631 _ => Expression::Paren(Box::new(Paren {
9632 this: x,
9633 trailing_comments: vec![],
9634 })),
9635 },
9636 to: DataType::Custom {
9637 name: "DOUBLE PRECISION".to_string(),
9638 },
9639 trailing_comments: vec![],
9640 double_colon_syntax: false,
9641 format: None,
9642 default: None,
9643 inferred_type: None,
9644 }));
9645 let y_paren = match &y {
9646 Expression::Column(_)
9647 | Expression::Literal(_)
9648 | Expression::Identifier(_) => y,
9649 _ => Expression::Paren(Box::new(Paren {
9650 this: y,
9651 trailing_comments: vec![],
9652 })),
9653 };
9654 let cast_div =
9655 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9656 Ok(Expression::Case(Box::new(Case {
9657 operand: None,
9658 whens: vec![(condition, cast_div)],
9659 else_: Some(Expression::Null(Null)),
9660 comments: Vec::new(),
9661 inferred_type: None,
9662 })))
9663 } else if matches!(target, DialectType::DuckDB) {
9664 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9665 Ok(Expression::Case(Box::new(Case {
9666 operand: None,
9667 whens: vec![(condition, div_expr)],
9668 else_: Some(Expression::Null(Null)),
9669 comments: Vec::new(),
9670 inferred_type: None,
9671 })))
9672 } else if matches!(target, DialectType::Snowflake) {
9673 // Snowflake: IFF(y <> 0, x / y, NULL)
9674 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9675 condition,
9676 true_value: div_expr,
9677 false_value: Some(Expression::Null(Null)),
9678 original_name: Some("IFF".to_string()),
9679 inferred_type: None,
9680 })))
9681 } else {
9682 // All others: IF(y <> 0, x / y, NULL)
9683 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9684 condition,
9685 true_value: div_expr,
9686 false_value: Some(Expression::Null(Null)),
9687 original_name: None,
9688 inferred_type: None,
9689 })))
9690 }
9691 } else {
9692 Ok(e)
9693 }
9694 }
9695
9696 Action::BigQueryLastDayStripUnit => {
9697 if let Expression::LastDay(mut ld) = e {
9698 ld.unit = None; // Strip the unit (MONTH is default)
9699 match target {
9700 DialectType::PostgreSQL => {
9701 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9702 let date_trunc = Expression::Function(Box::new(Function::new(
9703 "DATE_TRUNC".to_string(),
9704 vec![
9705 Expression::Literal(Box::new(
9706 crate::expressions::Literal::String(
9707 "MONTH".to_string(),
9708 ),
9709 )),
9710 ld.this.clone(),
9711 ],
9712 )));
9713 let plus_month =
9714 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9715 date_trunc,
9716 Expression::Interval(Box::new(
9717 crate::expressions::Interval {
9718 this: Some(Expression::Literal(Box::new(
9719 crate::expressions::Literal::String(
9720 "1 MONTH".to_string(),
9721 ),
9722 ))),
9723 unit: None,
9724 },
9725 )),
9726 )));
9727 let minus_day =
9728 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9729 plus_month,
9730 Expression::Interval(Box::new(
9731 crate::expressions::Interval {
9732 this: Some(Expression::Literal(Box::new(
9733 crate::expressions::Literal::String(
9734 "1 DAY".to_string(),
9735 ),
9736 ))),
9737 unit: None,
9738 },
9739 )),
9740 )));
9741 Ok(Expression::Cast(Box::new(Cast {
9742 this: minus_day,
9743 to: DataType::Date,
9744 trailing_comments: vec![],
9745 double_colon_syntax: false,
9746 format: None,
9747 default: None,
9748 inferred_type: None,
9749 })))
9750 }
9751 DialectType::Presto => {
9752 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9753 Ok(Expression::Function(Box::new(Function::new(
9754 "LAST_DAY_OF_MONTH".to_string(),
9755 vec![ld.this],
9756 ))))
9757 }
9758 DialectType::ClickHouse => {
9759 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9760 // Need to wrap the DATE type in Nullable
9761 let nullable_date = match ld.this {
9762 Expression::Cast(mut c) => {
9763 c.to = DataType::Nullable {
9764 inner: Box::new(DataType::Date),
9765 };
9766 Expression::Cast(c)
9767 }
9768 other => other,
9769 };
9770 ld.this = nullable_date;
9771 Ok(Expression::LastDay(ld))
9772 }
9773 _ => Ok(Expression::LastDay(ld)),
9774 }
9775 } else {
9776 Ok(e)
9777 }
9778 }
9779
9780 Action::BigQueryCastFormat => {
9781 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9782 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9783 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9784 let (this, to, format_expr, is_safe) = match e {
9785 Expression::Cast(ref c) if c.format.is_some() => (
9786 c.this.clone(),
9787 c.to.clone(),
9788 c.format.as_ref().unwrap().as_ref().clone(),
9789 false,
9790 ),
9791 Expression::SafeCast(ref c) if c.format.is_some() => (
9792 c.this.clone(),
9793 c.to.clone(),
9794 c.format.as_ref().unwrap().as_ref().clone(),
9795 true,
9796 ),
9797 _ => return Ok(e),
9798 };
9799 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9800 if matches!(target, DialectType::BigQuery) {
9801 match &to {
9802 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9803 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9804 return Ok(e);
9805 }
9806 _ => {}
9807 }
9808 }
9809 // Extract timezone from format if AT TIME ZONE is present
9810 let (actual_format_expr, timezone) = match &format_expr {
9811 Expression::AtTimeZone(ref atz) => {
9812 (atz.this.clone(), Some(atz.zone.clone()))
9813 }
9814 _ => (format_expr.clone(), None),
9815 };
9816 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9817 match target {
9818 DialectType::BigQuery => {
9819 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9820 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9821 let func_name = match &to {
9822 DataType::Date => "PARSE_DATE",
9823 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9824 DataType::Time { .. } => "PARSE_TIMESTAMP",
9825 _ => "PARSE_TIMESTAMP",
9826 };
9827 let mut func_args = vec![strftime_fmt, this];
9828 if let Some(tz) = timezone {
9829 func_args.push(tz);
9830 }
9831 Ok(Expression::Function(Box::new(Function::new(
9832 func_name.to_string(),
9833 func_args,
9834 ))))
9835 }
9836 DialectType::DuckDB => {
9837 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9838 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9839 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9840 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9841 let parse_call = Expression::Function(Box::new(Function::new(
9842 parse_fn_name.to_string(),
9843 vec![this, duck_fmt],
9844 )));
9845 Ok(Expression::Cast(Box::new(Cast {
9846 this: parse_call,
9847 to,
9848 trailing_comments: vec![],
9849 double_colon_syntax: false,
9850 format: None,
9851 default: None,
9852 inferred_type: None,
9853 })))
9854 }
9855 _ => Ok(e),
9856 }
9857 }
9858
9859 Action::BigQueryFunctionNormalize => {
9860 Self::normalize_bigquery_function(e, source, target)
9861 }
9862
9863 Action::BigQueryToHexBare => {
9864 // Not used anymore - handled directly in normalize_bigquery_function
9865 Ok(e)
9866 }
9867
9868 Action::BigQueryToHexLower => {
9869 if let Expression::Lower(uf) = e {
9870 match uf.this {
9871 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9872 Expression::Function(f)
9873 if matches!(target, DialectType::BigQuery)
9874 && f.name == "TO_HEX" =>
9875 {
9876 Ok(Expression::Function(f))
9877 }
9878 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9879 Expression::Lower(inner_uf) => {
9880 if matches!(target, DialectType::BigQuery) {
9881 // BQ->BQ: extract TO_HEX
9882 if let Expression::Function(f) = inner_uf.this {
9883 Ok(Expression::Function(Box::new(Function::new(
9884 "TO_HEX".to_string(),
9885 f.args,
9886 ))))
9887 } else {
9888 Ok(Expression::Lower(inner_uf))
9889 }
9890 } else {
9891 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
9892 Ok(Expression::Lower(inner_uf))
9893 }
9894 }
9895 other => {
9896 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
9897 this: other,
9898 original_name: None,
9899 inferred_type: None,
9900 })))
9901 }
9902 }
9903 } else {
9904 Ok(e)
9905 }
9906 }
9907
9908 Action::BigQueryToHexUpper => {
9909 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
9910 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
9911 if let Expression::Upper(uf) = e {
9912 if let Expression::Lower(inner_uf) = uf.this {
9913 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
9914 if matches!(target, DialectType::BigQuery) {
9915 // Restore TO_HEX name in inner function
9916 if let Expression::Function(f) = inner_uf.this {
9917 let restored = Expression::Function(Box::new(Function::new(
9918 "TO_HEX".to_string(),
9919 f.args,
9920 )));
9921 Ok(Expression::Upper(Box::new(
9922 crate::expressions::UnaryFunc::new(restored),
9923 )))
9924 } else {
9925 Ok(Expression::Upper(inner_uf))
9926 }
9927 } else {
9928 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
9929 Ok(inner_uf.this)
9930 }
9931 } else {
9932 Ok(Expression::Upper(uf))
9933 }
9934 } else {
9935 Ok(e)
9936 }
9937 }
9938
9939 Action::BigQueryAnyValueHaving => {
9940 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
9941 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
9942 if let Expression::AnyValue(agg) = e {
9943 if let Some((having_expr, is_max)) = agg.having_max {
9944 let func_name = if is_max {
9945 "ARG_MAX_NULL"
9946 } else {
9947 "ARG_MIN_NULL"
9948 };
9949 Ok(Expression::Function(Box::new(Function::new(
9950 func_name.to_string(),
9951 vec![agg.this, *having_expr],
9952 ))))
9953 } else {
9954 Ok(Expression::AnyValue(agg))
9955 }
9956 } else {
9957 Ok(e)
9958 }
9959 }
9960
9961 Action::BigQueryApproxQuantiles => {
9962 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
9963 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
9964 if let Expression::AggregateFunction(agg) = e {
9965 if agg.args.len() >= 2 {
9966 let x_expr = agg.args[0].clone();
9967 let n_expr = &agg.args[1];
9968
9969 // Extract the numeric value from n_expr
9970 let n = match n_expr {
9971 Expression::Literal(lit)
9972 if matches!(
9973 lit.as_ref(),
9974 crate::expressions::Literal::Number(_)
9975 ) =>
9976 {
9977 let crate::expressions::Literal::Number(s) = lit.as_ref()
9978 else {
9979 unreachable!()
9980 };
9981 s.parse::<usize>().unwrap_or(2)
9982 }
9983 _ => 2,
9984 };
9985
9986 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
9987 let mut quantiles = Vec::new();
9988 for i in 0..=n {
9989 let q = i as f64 / n as f64;
9990 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
9991 if q == 0.0 {
9992 quantiles.push(Expression::number(0));
9993 } else if q == 1.0 {
9994 quantiles.push(Expression::number(1));
9995 } else {
9996 quantiles.push(Expression::Literal(Box::new(
9997 crate::expressions::Literal::Number(format!("{}", q)),
9998 )));
9999 }
10000 }
10001
10002 let array_expr =
10003 Expression::Array(Box::new(crate::expressions::Array {
10004 expressions: quantiles,
10005 }));
10006
10007 // Preserve DISTINCT modifier
10008 let mut new_func = Function::new(
10009 "APPROX_QUANTILE".to_string(),
10010 vec![x_expr, array_expr],
10011 );
10012 new_func.distinct = agg.distinct;
10013 Ok(Expression::Function(Box::new(new_func)))
10014 } else {
10015 Ok(Expression::AggregateFunction(agg))
10016 }
10017 } else {
10018 Ok(e)
10019 }
10020 }
10021
10022 Action::GenericFunctionNormalize => {
10023 // Helper closure to convert ARBITRARY to target-specific function
10024 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
10025 let name = match target {
10026 DialectType::ClickHouse => "any",
10027 DialectType::TSQL | DialectType::SQLite => "MAX",
10028 DialectType::Hive => "FIRST",
10029 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10030 "ARBITRARY"
10031 }
10032 _ => "ANY_VALUE",
10033 };
10034 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
10035 }
10036
10037 if let Expression::Function(f) = e {
10038 let name = f.name.to_ascii_uppercase();
10039 match name.as_str() {
10040 "ARBITRARY" if f.args.len() == 1 => {
10041 let arg = f.args.into_iter().next().unwrap();
10042 Ok(convert_arbitrary(arg, target))
10043 }
10044 "TO_NUMBER" if f.args.len() == 1 => {
10045 let arg = f.args.into_iter().next().unwrap();
10046 match target {
10047 DialectType::Oracle | DialectType::Snowflake => {
10048 Ok(Expression::Function(Box::new(Function::new(
10049 "TO_NUMBER".to_string(),
10050 vec![arg],
10051 ))))
10052 }
10053 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10054 this: arg,
10055 to: crate::expressions::DataType::Double {
10056 precision: None,
10057 scale: None,
10058 },
10059 double_colon_syntax: false,
10060 trailing_comments: Vec::new(),
10061 format: None,
10062 default: None,
10063 inferred_type: None,
10064 }))),
10065 }
10066 }
10067 "AGGREGATE" if f.args.len() >= 3 => match target {
10068 DialectType::DuckDB
10069 | DialectType::Hive
10070 | DialectType::Presto
10071 | DialectType::Trino => Ok(Expression::Function(Box::new(
10072 Function::new("REDUCE".to_string(), f.args),
10073 ))),
10074 _ => Ok(Expression::Function(f)),
10075 },
10076 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
10077 "REGEXP_MATCHES" if f.args.len() >= 2 => {
10078 if matches!(target, DialectType::DuckDB) {
10079 Ok(Expression::Function(f))
10080 } else {
10081 let mut args = f.args;
10082 let this = args.remove(0);
10083 let pattern = args.remove(0);
10084 let flags = if args.is_empty() {
10085 None
10086 } else {
10087 Some(args.remove(0))
10088 };
10089 Ok(Expression::RegexpLike(Box::new(
10090 crate::expressions::RegexpFunc {
10091 this,
10092 pattern,
10093 flags,
10094 },
10095 )))
10096 }
10097 }
10098 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
10099 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
10100 if matches!(target, DialectType::DuckDB) {
10101 Ok(Expression::Function(f))
10102 } else {
10103 let mut args = f.args;
10104 let this = args.remove(0);
10105 let pattern = args.remove(0);
10106 let flags = if args.is_empty() {
10107 None
10108 } else {
10109 Some(args.remove(0))
10110 };
10111 Ok(Expression::RegexpLike(Box::new(
10112 crate::expressions::RegexpFunc {
10113 this,
10114 pattern,
10115 flags,
10116 },
10117 )))
10118 }
10119 }
10120 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
10121 "STRUCT_EXTRACT" if f.args.len() == 2 => {
10122 let mut args = f.args;
10123 let this = args.remove(0);
10124 let field_expr = args.remove(0);
10125 // Extract string literal to get field name
10126 let field_name = match &field_expr {
10127 Expression::Literal(lit)
10128 if matches!(
10129 lit.as_ref(),
10130 crate::expressions::Literal::String(_)
10131 ) =>
10132 {
10133 let crate::expressions::Literal::String(s) = lit.as_ref()
10134 else {
10135 unreachable!()
10136 };
10137 s.clone()
10138 }
10139 Expression::Identifier(id) => id.name.clone(),
10140 _ => {
10141 return Ok(Expression::Function(Box::new(Function::new(
10142 "STRUCT_EXTRACT".to_string(),
10143 vec![this, field_expr],
10144 ))))
10145 }
10146 };
10147 Ok(Expression::StructExtract(Box::new(
10148 crate::expressions::StructExtractFunc {
10149 this,
10150 field: crate::expressions::Identifier::new(field_name),
10151 },
10152 )))
10153 }
10154 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
10155 "LIST_FILTER" if f.args.len() == 2 => {
10156 let name = match target {
10157 DialectType::DuckDB => "LIST_FILTER",
10158 _ => "FILTER",
10159 };
10160 Ok(Expression::Function(Box::new(Function::new(
10161 name.to_string(),
10162 f.args,
10163 ))))
10164 }
10165 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
10166 "LIST_TRANSFORM" if f.args.len() == 2 => {
10167 let name = match target {
10168 DialectType::DuckDB => "LIST_TRANSFORM",
10169 _ => "TRANSFORM",
10170 };
10171 Ok(Expression::Function(Box::new(Function::new(
10172 name.to_string(),
10173 f.args,
10174 ))))
10175 }
10176 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
10177 "LIST_SORT" if f.args.len() >= 1 => {
10178 let name = match target {
10179 DialectType::DuckDB => "LIST_SORT",
10180 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
10181 _ => "SORT_ARRAY",
10182 };
10183 Ok(Expression::Function(Box::new(Function::new(
10184 name.to_string(),
10185 f.args,
10186 ))))
10187 }
10188 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10189 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
10190 match target {
10191 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10192 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
10193 ))),
10194 DialectType::Spark
10195 | DialectType::Databricks
10196 | DialectType::Hive => {
10197 let mut args = f.args;
10198 args.push(Expression::Identifier(
10199 crate::expressions::Identifier::new("FALSE"),
10200 ));
10201 Ok(Expression::Function(Box::new(Function::new(
10202 "SORT_ARRAY".to_string(),
10203 args,
10204 ))))
10205 }
10206 DialectType::Presto
10207 | DialectType::Trino
10208 | DialectType::Athena => {
10209 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10210 let arr = f.args.into_iter().next().unwrap();
10211 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10212 parameters: vec![
10213 crate::expressions::Identifier::new("a"),
10214 crate::expressions::Identifier::new("b"),
10215 ],
10216 body: Expression::Case(Box::new(Case {
10217 operand: None,
10218 whens: vec![
10219 (
10220 Expression::Lt(Box::new(BinaryOp::new(
10221 Expression::Identifier(crate::expressions::Identifier::new("a")),
10222 Expression::Identifier(crate::expressions::Identifier::new("b")),
10223 ))),
10224 Expression::number(1),
10225 ),
10226 (
10227 Expression::Gt(Box::new(BinaryOp::new(
10228 Expression::Identifier(crate::expressions::Identifier::new("a")),
10229 Expression::Identifier(crate::expressions::Identifier::new("b")),
10230 ))),
10231 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
10232 ),
10233 ],
10234 else_: Some(Expression::number(0)),
10235 comments: Vec::new(),
10236 inferred_type: None,
10237 })),
10238 colon: false,
10239 parameter_types: Vec::new(),
10240 }));
10241 Ok(Expression::Function(Box::new(Function::new(
10242 "ARRAY_SORT".to_string(),
10243 vec![arr, lambda],
10244 ))))
10245 }
10246 _ => Ok(Expression::Function(Box::new(Function::new(
10247 "LIST_REVERSE_SORT".to_string(),
10248 f.args,
10249 )))),
10250 }
10251 }
10252 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
10253 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
10254 let mut args = f.args;
10255 args.push(Expression::string(","));
10256 let name = match target {
10257 DialectType::DuckDB => "STR_SPLIT",
10258 DialectType::Presto | DialectType::Trino => "SPLIT",
10259 DialectType::Spark
10260 | DialectType::Databricks
10261 | DialectType::Hive => "SPLIT",
10262 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10263 DialectType::Redshift => "SPLIT_TO_ARRAY",
10264 _ => "SPLIT",
10265 };
10266 Ok(Expression::Function(Box::new(Function::new(
10267 name.to_string(),
10268 args,
10269 ))))
10270 }
10271 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
10272 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
10273 let name = match target {
10274 DialectType::DuckDB => "STR_SPLIT",
10275 DialectType::Presto | DialectType::Trino => "SPLIT",
10276 DialectType::Spark
10277 | DialectType::Databricks
10278 | DialectType::Hive => "SPLIT",
10279 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10280 DialectType::Redshift => "SPLIT_TO_ARRAY",
10281 _ => "SPLIT",
10282 };
10283 Ok(Expression::Function(Box::new(Function::new(
10284 name.to_string(),
10285 f.args,
10286 ))))
10287 }
10288 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
10289 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
10290 let name = match target {
10291 DialectType::DuckDB => "STR_SPLIT",
10292 DialectType::Presto | DialectType::Trino => "SPLIT",
10293 DialectType::Spark
10294 | DialectType::Databricks
10295 | DialectType::Hive => "SPLIT",
10296 DialectType::Doris | DialectType::StarRocks => {
10297 "SPLIT_BY_STRING"
10298 }
10299 DialectType::PostgreSQL | DialectType::Redshift => {
10300 "STRING_TO_ARRAY"
10301 }
10302 _ => "SPLIT",
10303 };
10304 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
10305 if matches!(
10306 target,
10307 DialectType::Spark
10308 | DialectType::Databricks
10309 | DialectType::Hive
10310 ) {
10311 let mut args = f.args;
10312 let x = args.remove(0);
10313 let sep = args.remove(0);
10314 // Wrap separator in CONCAT('\\Q', sep, '\\E')
10315 let escaped_sep =
10316 Expression::Function(Box::new(Function::new(
10317 "CONCAT".to_string(),
10318 vec![
10319 Expression::string("\\Q"),
10320 sep,
10321 Expression::string("\\E"),
10322 ],
10323 )));
10324 Ok(Expression::Function(Box::new(Function::new(
10325 name.to_string(),
10326 vec![x, escaped_sep],
10327 ))))
10328 } else {
10329 Ok(Expression::Function(Box::new(Function::new(
10330 name.to_string(),
10331 f.args,
10332 ))))
10333 }
10334 }
10335 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
10336 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
10337 let name = match target {
10338 DialectType::DuckDB => "STR_SPLIT_REGEX",
10339 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
10340 DialectType::Spark
10341 | DialectType::Databricks
10342 | DialectType::Hive => "SPLIT",
10343 _ => "REGEXP_SPLIT",
10344 };
10345 Ok(Expression::Function(Box::new(Function::new(
10346 name.to_string(),
10347 f.args,
10348 ))))
10349 }
10350 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
10351 "SPLIT"
10352 if f.args.len() == 2
10353 && matches!(source, DialectType::Snowflake)
10354 && matches!(target, DialectType::DuckDB) =>
10355 {
10356 let mut args = f.args;
10357 let str_arg = args.remove(0);
10358 let delim_arg = args.remove(0);
10359
10360 // STR_SPLIT(str, delim) as the base
10361 let base_func = Expression::Function(Box::new(Function::new(
10362 "STR_SPLIT".to_string(),
10363 vec![str_arg.clone(), delim_arg.clone()],
10364 )));
10365
10366 // [str] - array with single element
10367 let array_with_input =
10368 Expression::Array(Box::new(crate::expressions::Array {
10369 expressions: vec![str_arg],
10370 }));
10371
10372 // CASE
10373 // WHEN delim IS NULL THEN NULL
10374 // WHEN delim = '' THEN [str]
10375 // ELSE STR_SPLIT(str, delim)
10376 // END
10377 Ok(Expression::Case(Box::new(Case {
10378 operand: None,
10379 whens: vec![
10380 (
10381 Expression::Is(Box::new(BinaryOp {
10382 left: delim_arg.clone(),
10383 right: Expression::Null(Null),
10384 left_comments: vec![],
10385 operator_comments: vec![],
10386 trailing_comments: vec![],
10387 inferred_type: None,
10388 })),
10389 Expression::Null(Null),
10390 ),
10391 (
10392 Expression::Eq(Box::new(BinaryOp {
10393 left: delim_arg,
10394 right: Expression::string(""),
10395 left_comments: vec![],
10396 operator_comments: vec![],
10397 trailing_comments: vec![],
10398 inferred_type: None,
10399 })),
10400 array_with_input,
10401 ),
10402 ],
10403 else_: Some(base_func),
10404 comments: vec![],
10405 inferred_type: None,
10406 })))
10407 }
10408 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
10409 "SPLIT"
10410 if f.args.len() == 2
10411 && matches!(
10412 source,
10413 DialectType::Presto
10414 | DialectType::Trino
10415 | DialectType::Athena
10416 | DialectType::StarRocks
10417 | DialectType::Doris
10418 )
10419 && matches!(
10420 target,
10421 DialectType::Spark
10422 | DialectType::Databricks
10423 | DialectType::Hive
10424 ) =>
10425 {
10426 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
10427 let mut args = f.args;
10428 let x = args.remove(0);
10429 let sep = args.remove(0);
10430 let escaped_sep = Expression::Function(Box::new(Function::new(
10431 "CONCAT".to_string(),
10432 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
10433 )));
10434 Ok(Expression::Function(Box::new(Function::new(
10435 "SPLIT".to_string(),
10436 vec![x, escaped_sep],
10437 ))))
10438 }
10439 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
10440 // For ClickHouse target, preserve original name to maintain camelCase
10441 "SUBSTRINGINDEX" => {
10442 let name = if matches!(target, DialectType::ClickHouse) {
10443 f.name.clone()
10444 } else {
10445 "SUBSTRING_INDEX".to_string()
10446 };
10447 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
10448 }
10449 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
10450 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
10451 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
10452 if name == "CARDINALITY"
10453 && matches!(source, DialectType::DuckDB)
10454 && matches!(target, DialectType::DuckDB)
10455 {
10456 return Ok(Expression::Function(f));
10457 }
10458 // Get the array argument (first arg, drop dimension args)
10459 let mut args = f.args;
10460 let arr = if args.is_empty() {
10461 return Ok(Expression::Function(Box::new(Function::new(
10462 name.to_string(),
10463 args,
10464 ))));
10465 } else {
10466 args.remove(0)
10467 };
10468 let name =
10469 match target {
10470 DialectType::Spark
10471 | DialectType::Databricks
10472 | DialectType::Hive => "SIZE",
10473 DialectType::Presto | DialectType::Trino => "CARDINALITY",
10474 DialectType::BigQuery => "ARRAY_LENGTH",
10475 DialectType::DuckDB => {
10476 // DuckDB: use ARRAY_LENGTH with all args
10477 let mut all_args = vec![arr];
10478 all_args.extend(args);
10479 return Ok(Expression::Function(Box::new(
10480 Function::new("ARRAY_LENGTH".to_string(), all_args),
10481 )));
10482 }
10483 DialectType::PostgreSQL | DialectType::Redshift => {
10484 // Keep ARRAY_LENGTH with dimension arg
10485 let mut all_args = vec![arr];
10486 all_args.extend(args);
10487 return Ok(Expression::Function(Box::new(
10488 Function::new("ARRAY_LENGTH".to_string(), all_args),
10489 )));
10490 }
10491 DialectType::ClickHouse => "LENGTH",
10492 _ => "ARRAY_LENGTH",
10493 };
10494 Ok(Expression::Function(Box::new(Function::new(
10495 name.to_string(),
10496 vec![arr],
10497 ))))
10498 }
10499 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
10500 "TO_VARIANT" if f.args.len() == 1 => match target {
10501 DialectType::DuckDB => {
10502 let arg = f.args.into_iter().next().unwrap();
10503 Ok(Expression::Cast(Box::new(Cast {
10504 this: arg,
10505 to: DataType::Custom {
10506 name: "VARIANT".to_string(),
10507 },
10508 double_colon_syntax: false,
10509 trailing_comments: Vec::new(),
10510 format: None,
10511 default: None,
10512 inferred_type: None,
10513 })))
10514 }
10515 _ => Ok(Expression::Function(f)),
10516 },
10517 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
10518 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
10519 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10520 Function::new("JSON_AGG".to_string(), f.args),
10521 ))),
10522 _ => Ok(Expression::Function(f)),
10523 },
10524 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
10525 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
10526 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10527 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
10528 ))),
10529 _ => Ok(Expression::Function(f)),
10530 },
10531 // UNICODE(x) -> target-specific codepoint function
10532 "UNICODE" if f.args.len() == 1 => {
10533 match target {
10534 DialectType::SQLite | DialectType::DuckDB => {
10535 Ok(Expression::Function(Box::new(Function::new(
10536 "UNICODE".to_string(),
10537 f.args,
10538 ))))
10539 }
10540 DialectType::Oracle => {
10541 // ASCII(UNISTR(x))
10542 let inner = Expression::Function(Box::new(Function::new(
10543 "UNISTR".to_string(),
10544 f.args,
10545 )));
10546 Ok(Expression::Function(Box::new(Function::new(
10547 "ASCII".to_string(),
10548 vec![inner],
10549 ))))
10550 }
10551 DialectType::MySQL => {
10552 // ORD(CONVERT(x USING utf32))
10553 let arg = f.args.into_iter().next().unwrap();
10554 let convert_expr = Expression::ConvertToCharset(Box::new(
10555 crate::expressions::ConvertToCharset {
10556 this: Box::new(arg),
10557 dest: Some(Box::new(Expression::Identifier(
10558 crate::expressions::Identifier::new("utf32"),
10559 ))),
10560 source: None,
10561 },
10562 ));
10563 Ok(Expression::Function(Box::new(Function::new(
10564 "ORD".to_string(),
10565 vec![convert_expr],
10566 ))))
10567 }
10568 _ => Ok(Expression::Function(Box::new(Function::new(
10569 "ASCII".to_string(),
10570 f.args,
10571 )))),
10572 }
10573 }
10574 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
10575 "XOR" if f.args.len() >= 2 => {
10576 match target {
10577 DialectType::ClickHouse => {
10578 // ClickHouse: keep as xor() function with lowercase name
10579 Ok(Expression::Function(Box::new(Function::new(
10580 "xor".to_string(),
10581 f.args,
10582 ))))
10583 }
10584 DialectType::Presto | DialectType::Trino => {
10585 if f.args.len() == 2 {
10586 Ok(Expression::Function(Box::new(Function::new(
10587 "BITWISE_XOR".to_string(),
10588 f.args,
10589 ))))
10590 } else {
10591 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10592 let mut args = f.args;
10593 let first = args.remove(0);
10594 let second = args.remove(0);
10595 let mut result =
10596 Expression::Function(Box::new(Function::new(
10597 "BITWISE_XOR".to_string(),
10598 vec![first, second],
10599 )));
10600 for arg in args {
10601 result =
10602 Expression::Function(Box::new(Function::new(
10603 "BITWISE_XOR".to_string(),
10604 vec![result, arg],
10605 )));
10606 }
10607 Ok(result)
10608 }
10609 }
10610 DialectType::MySQL
10611 | DialectType::SingleStore
10612 | DialectType::Doris
10613 | DialectType::StarRocks => {
10614 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10615 let args = f.args;
10616 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10617 this: None,
10618 expression: None,
10619 expressions: args,
10620 })))
10621 }
10622 DialectType::PostgreSQL | DialectType::Redshift => {
10623 // PostgreSQL: a # b (hash operator for XOR)
10624 let mut args = f.args;
10625 let first = args.remove(0);
10626 let second = args.remove(0);
10627 let mut result = Expression::BitwiseXor(Box::new(
10628 BinaryOp::new(first, second),
10629 ));
10630 for arg in args {
10631 result = Expression::BitwiseXor(Box::new(
10632 BinaryOp::new(result, arg),
10633 ));
10634 }
10635 Ok(result)
10636 }
10637 DialectType::DuckDB => {
10638 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10639 Ok(Expression::Function(Box::new(Function::new(
10640 "XOR".to_string(),
10641 f.args,
10642 ))))
10643 }
10644 DialectType::BigQuery => {
10645 // BigQuery: a ^ b (caret operator for XOR)
10646 let mut args = f.args;
10647 let first = args.remove(0);
10648 let second = args.remove(0);
10649 let mut result = Expression::BitwiseXor(Box::new(
10650 BinaryOp::new(first, second),
10651 ));
10652 for arg in args {
10653 result = Expression::BitwiseXor(Box::new(
10654 BinaryOp::new(result, arg),
10655 ));
10656 }
10657 Ok(result)
10658 }
10659 _ => Ok(Expression::Function(Box::new(Function::new(
10660 "XOR".to_string(),
10661 f.args,
10662 )))),
10663 }
10664 }
10665 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10666 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10667 match target {
10668 DialectType::Spark
10669 | DialectType::Databricks
10670 | DialectType::Hive => {
10671 let mut args = f.args;
10672 args.push(Expression::Identifier(
10673 crate::expressions::Identifier::new("FALSE"),
10674 ));
10675 Ok(Expression::Function(Box::new(Function::new(
10676 "SORT_ARRAY".to_string(),
10677 args,
10678 ))))
10679 }
10680 DialectType::Presto
10681 | DialectType::Trino
10682 | DialectType::Athena => {
10683 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10684 let arr = f.args.into_iter().next().unwrap();
10685 let lambda = Expression::Lambda(Box::new(
10686 crate::expressions::LambdaExpr {
10687 parameters: vec![
10688 Identifier::new("a"),
10689 Identifier::new("b"),
10690 ],
10691 colon: false,
10692 parameter_types: Vec::new(),
10693 body: Expression::Case(Box::new(Case {
10694 operand: None,
10695 whens: vec![
10696 (
10697 Expression::Lt(Box::new(
10698 BinaryOp::new(
10699 Expression::Identifier(
10700 Identifier::new("a"),
10701 ),
10702 Expression::Identifier(
10703 Identifier::new("b"),
10704 ),
10705 ),
10706 )),
10707 Expression::number(1),
10708 ),
10709 (
10710 Expression::Gt(Box::new(
10711 BinaryOp::new(
10712 Expression::Identifier(
10713 Identifier::new("a"),
10714 ),
10715 Expression::Identifier(
10716 Identifier::new("b"),
10717 ),
10718 ),
10719 )),
10720 Expression::Neg(Box::new(
10721 crate::expressions::UnaryOp {
10722 this: Expression::number(1),
10723 inferred_type: None,
10724 },
10725 )),
10726 ),
10727 ],
10728 else_: Some(Expression::number(0)),
10729 comments: Vec::new(),
10730 inferred_type: None,
10731 })),
10732 },
10733 ));
10734 Ok(Expression::Function(Box::new(Function::new(
10735 "ARRAY_SORT".to_string(),
10736 vec![arr, lambda],
10737 ))))
10738 }
10739 _ => Ok(Expression::Function(Box::new(Function::new(
10740 "ARRAY_REVERSE_SORT".to_string(),
10741 f.args,
10742 )))),
10743 }
10744 }
10745 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10746 "ENCODE" if f.args.len() == 1 => match target {
10747 DialectType::Spark
10748 | DialectType::Databricks
10749 | DialectType::Hive => {
10750 let mut args = f.args;
10751 args.push(Expression::string("utf-8"));
10752 Ok(Expression::Function(Box::new(Function::new(
10753 "ENCODE".to_string(),
10754 args,
10755 ))))
10756 }
10757 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10758 Ok(Expression::Function(Box::new(Function::new(
10759 "TO_UTF8".to_string(),
10760 f.args,
10761 ))))
10762 }
10763 _ => Ok(Expression::Function(Box::new(Function::new(
10764 "ENCODE".to_string(),
10765 f.args,
10766 )))),
10767 },
10768 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10769 "DECODE" if f.args.len() == 1 => match target {
10770 DialectType::Spark
10771 | DialectType::Databricks
10772 | DialectType::Hive => {
10773 let mut args = f.args;
10774 args.push(Expression::string("utf-8"));
10775 Ok(Expression::Function(Box::new(Function::new(
10776 "DECODE".to_string(),
10777 args,
10778 ))))
10779 }
10780 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10781 Ok(Expression::Function(Box::new(Function::new(
10782 "FROM_UTF8".to_string(),
10783 f.args,
10784 ))))
10785 }
10786 _ => Ok(Expression::Function(Box::new(Function::new(
10787 "DECODE".to_string(),
10788 f.args,
10789 )))),
10790 },
10791 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10792 "QUANTILE" if f.args.len() == 2 => {
10793 let name = match target {
10794 DialectType::Spark
10795 | DialectType::Databricks
10796 | DialectType::Hive => "PERCENTILE",
10797 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10798 DialectType::BigQuery => "PERCENTILE_CONT",
10799 _ => "QUANTILE",
10800 };
10801 Ok(Expression::Function(Box::new(Function::new(
10802 name.to_string(),
10803 f.args,
10804 ))))
10805 }
10806 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10807 "QUANTILE_CONT" if f.args.len() == 2 => {
10808 let mut args = f.args;
10809 let column = args.remove(0);
10810 let quantile = args.remove(0);
10811 match target {
10812 DialectType::DuckDB => {
10813 Ok(Expression::Function(Box::new(Function::new(
10814 "QUANTILE_CONT".to_string(),
10815 vec![column, quantile],
10816 ))))
10817 }
10818 DialectType::PostgreSQL
10819 | DialectType::Redshift
10820 | DialectType::Snowflake => {
10821 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10822 let inner = Expression::PercentileCont(Box::new(
10823 crate::expressions::PercentileFunc {
10824 this: column.clone(),
10825 percentile: quantile,
10826 order_by: None,
10827 filter: None,
10828 },
10829 ));
10830 Ok(Expression::WithinGroup(Box::new(
10831 crate::expressions::WithinGroup {
10832 this: inner,
10833 order_by: vec![crate::expressions::Ordered {
10834 this: column,
10835 desc: false,
10836 nulls_first: None,
10837 explicit_asc: false,
10838 with_fill: None,
10839 }],
10840 },
10841 )))
10842 }
10843 _ => Ok(Expression::Function(Box::new(Function::new(
10844 "QUANTILE_CONT".to_string(),
10845 vec![column, quantile],
10846 )))),
10847 }
10848 }
10849 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10850 "QUANTILE_DISC" if f.args.len() == 2 => {
10851 let mut args = f.args;
10852 let column = args.remove(0);
10853 let quantile = args.remove(0);
10854 match target {
10855 DialectType::DuckDB => {
10856 Ok(Expression::Function(Box::new(Function::new(
10857 "QUANTILE_DISC".to_string(),
10858 vec![column, quantile],
10859 ))))
10860 }
10861 DialectType::PostgreSQL
10862 | DialectType::Redshift
10863 | DialectType::Snowflake => {
10864 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10865 let inner = Expression::PercentileDisc(Box::new(
10866 crate::expressions::PercentileFunc {
10867 this: column.clone(),
10868 percentile: quantile,
10869 order_by: None,
10870 filter: None,
10871 },
10872 ));
10873 Ok(Expression::WithinGroup(Box::new(
10874 crate::expressions::WithinGroup {
10875 this: inner,
10876 order_by: vec![crate::expressions::Ordered {
10877 this: column,
10878 desc: false,
10879 nulls_first: None,
10880 explicit_asc: false,
10881 with_fill: None,
10882 }],
10883 },
10884 )))
10885 }
10886 _ => Ok(Expression::Function(Box::new(Function::new(
10887 "QUANTILE_DISC".to_string(),
10888 vec![column, quantile],
10889 )))),
10890 }
10891 }
10892 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
10893 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
10894 let name = match target {
10895 DialectType::Presto
10896 | DialectType::Trino
10897 | DialectType::Athena => "APPROX_PERCENTILE",
10898 DialectType::Spark
10899 | DialectType::Databricks
10900 | DialectType::Hive => "PERCENTILE_APPROX",
10901 DialectType::DuckDB => "APPROX_QUANTILE",
10902 DialectType::PostgreSQL | DialectType::Redshift => {
10903 "PERCENTILE_CONT"
10904 }
10905 _ => &f.name,
10906 };
10907 Ok(Expression::Function(Box::new(Function::new(
10908 name.to_string(),
10909 f.args,
10910 ))))
10911 }
10912 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10913 "EPOCH" if f.args.len() == 1 => {
10914 let name = match target {
10915 DialectType::Spark
10916 | DialectType::Databricks
10917 | DialectType::Hive => "UNIX_TIMESTAMP",
10918 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
10919 _ => "EPOCH",
10920 };
10921 Ok(Expression::Function(Box::new(Function::new(
10922 name.to_string(),
10923 f.args,
10924 ))))
10925 }
10926 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
10927 "EPOCH_MS" if f.args.len() == 1 => {
10928 match target {
10929 DialectType::Spark | DialectType::Databricks => {
10930 Ok(Expression::Function(Box::new(Function::new(
10931 "TIMESTAMP_MILLIS".to_string(),
10932 f.args,
10933 ))))
10934 }
10935 DialectType::Hive => {
10936 // Hive: FROM_UNIXTIME(x / 1000)
10937 let arg = f.args.into_iter().next().unwrap();
10938 let div_expr = Expression::Div(Box::new(
10939 crate::expressions::BinaryOp::new(
10940 arg,
10941 Expression::number(1000),
10942 ),
10943 ));
10944 Ok(Expression::Function(Box::new(Function::new(
10945 "FROM_UNIXTIME".to_string(),
10946 vec![div_expr],
10947 ))))
10948 }
10949 DialectType::Presto | DialectType::Trino => {
10950 Ok(Expression::Function(Box::new(Function::new(
10951 "FROM_UNIXTIME".to_string(),
10952 vec![Expression::Div(Box::new(
10953 crate::expressions::BinaryOp::new(
10954 f.args.into_iter().next().unwrap(),
10955 Expression::number(1000),
10956 ),
10957 ))],
10958 ))))
10959 }
10960 _ => Ok(Expression::Function(Box::new(Function::new(
10961 "EPOCH_MS".to_string(),
10962 f.args,
10963 )))),
10964 }
10965 }
10966 // HASHBYTES('algorithm', x) -> target-specific hash function
10967 "HASHBYTES" if f.args.len() == 2 => {
10968 // Keep HASHBYTES as-is for TSQL target
10969 if matches!(target, DialectType::TSQL) {
10970 return Ok(Expression::Function(f));
10971 }
10972 let algo_expr = &f.args[0];
10973 let algo = match algo_expr {
10974 Expression::Literal(lit)
10975 if matches!(
10976 lit.as_ref(),
10977 crate::expressions::Literal::String(_)
10978 ) =>
10979 {
10980 let crate::expressions::Literal::String(s) = lit.as_ref()
10981 else {
10982 unreachable!()
10983 };
10984 s.to_ascii_uppercase()
10985 }
10986 _ => return Ok(Expression::Function(f)),
10987 };
10988 let data_arg = f.args.into_iter().nth(1).unwrap();
10989 match algo.as_str() {
10990 "SHA1" => {
10991 let name = match target {
10992 DialectType::Spark | DialectType::Databricks => "SHA",
10993 DialectType::Hive => "SHA1",
10994 _ => "SHA1",
10995 };
10996 Ok(Expression::Function(Box::new(Function::new(
10997 name.to_string(),
10998 vec![data_arg],
10999 ))))
11000 }
11001 "SHA2_256" => {
11002 Ok(Expression::Function(Box::new(Function::new(
11003 "SHA2".to_string(),
11004 vec![data_arg, Expression::number(256)],
11005 ))))
11006 }
11007 "SHA2_512" => {
11008 Ok(Expression::Function(Box::new(Function::new(
11009 "SHA2".to_string(),
11010 vec![data_arg, Expression::number(512)],
11011 ))))
11012 }
11013 "MD5" => Ok(Expression::Function(Box::new(Function::new(
11014 "MD5".to_string(),
11015 vec![data_arg],
11016 )))),
11017 _ => Ok(Expression::Function(Box::new(Function::new(
11018 "HASHBYTES".to_string(),
11019 vec![Expression::string(&algo), data_arg],
11020 )))),
11021 }
11022 }
11023 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
11024 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
11025 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
11026 let mut args = f.args;
11027 let json_expr = args.remove(0);
11028 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
11029 let mut json_path = "$".to_string();
11030 for a in &args {
11031 match a {
11032 Expression::Literal(lit)
11033 if matches!(
11034 lit.as_ref(),
11035 crate::expressions::Literal::String(_)
11036 ) =>
11037 {
11038 let crate::expressions::Literal::String(s) =
11039 lit.as_ref()
11040 else {
11041 unreachable!()
11042 };
11043 // Numeric string keys become array indices: [0]
11044 if s.chars().all(|c| c.is_ascii_digit()) {
11045 json_path.push('[');
11046 json_path.push_str(s);
11047 json_path.push(']');
11048 } else {
11049 json_path.push('.');
11050 json_path.push_str(s);
11051 }
11052 }
11053 _ => {
11054 json_path.push_str(".?");
11055 }
11056 }
11057 }
11058 match target {
11059 DialectType::Spark
11060 | DialectType::Databricks
11061 | DialectType::Hive => {
11062 Ok(Expression::Function(Box::new(Function::new(
11063 "GET_JSON_OBJECT".to_string(),
11064 vec![json_expr, Expression::string(&json_path)],
11065 ))))
11066 }
11067 DialectType::Presto | DialectType::Trino => {
11068 let func_name = if is_text {
11069 "JSON_EXTRACT_SCALAR"
11070 } else {
11071 "JSON_EXTRACT"
11072 };
11073 Ok(Expression::Function(Box::new(Function::new(
11074 func_name.to_string(),
11075 vec![json_expr, Expression::string(&json_path)],
11076 ))))
11077 }
11078 DialectType::BigQuery | DialectType::MySQL => {
11079 let func_name = if is_text {
11080 "JSON_EXTRACT_SCALAR"
11081 } else {
11082 "JSON_EXTRACT"
11083 };
11084 Ok(Expression::Function(Box::new(Function::new(
11085 func_name.to_string(),
11086 vec![json_expr, Expression::string(&json_path)],
11087 ))))
11088 }
11089 DialectType::PostgreSQL | DialectType::Materialize => {
11090 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
11091 let func_name = if is_text {
11092 "JSON_EXTRACT_PATH_TEXT"
11093 } else {
11094 "JSON_EXTRACT_PATH"
11095 };
11096 let mut new_args = vec![json_expr];
11097 new_args.extend(args);
11098 Ok(Expression::Function(Box::new(Function::new(
11099 func_name.to_string(),
11100 new_args,
11101 ))))
11102 }
11103 DialectType::DuckDB | DialectType::SQLite => {
11104 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
11105 if is_text {
11106 Ok(Expression::JsonExtractScalar(Box::new(
11107 crate::expressions::JsonExtractFunc {
11108 this: json_expr,
11109 path: Expression::string(&json_path),
11110 returning: None,
11111 arrow_syntax: true,
11112 hash_arrow_syntax: false,
11113 wrapper_option: None,
11114 quotes_option: None,
11115 on_scalar_string: false,
11116 on_error: None,
11117 },
11118 )))
11119 } else {
11120 Ok(Expression::JsonExtract(Box::new(
11121 crate::expressions::JsonExtractFunc {
11122 this: json_expr,
11123 path: Expression::string(&json_path),
11124 returning: None,
11125 arrow_syntax: true,
11126 hash_arrow_syntax: false,
11127 wrapper_option: None,
11128 quotes_option: None,
11129 on_scalar_string: false,
11130 on_error: None,
11131 },
11132 )))
11133 }
11134 }
11135 DialectType::Redshift => {
11136 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
11137 let mut new_args = vec![json_expr];
11138 new_args.extend(args);
11139 Ok(Expression::Function(Box::new(Function::new(
11140 "JSON_EXTRACT_PATH_TEXT".to_string(),
11141 new_args,
11142 ))))
11143 }
11144 DialectType::TSQL => {
11145 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
11146 let jq = Expression::Function(Box::new(Function::new(
11147 "JSON_QUERY".to_string(),
11148 vec![json_expr.clone(), Expression::string(&json_path)],
11149 )));
11150 let jv = Expression::Function(Box::new(Function::new(
11151 "JSON_VALUE".to_string(),
11152 vec![json_expr, Expression::string(&json_path)],
11153 )));
11154 Ok(Expression::Function(Box::new(Function::new(
11155 "ISNULL".to_string(),
11156 vec![jq, jv],
11157 ))))
11158 }
11159 DialectType::ClickHouse => {
11160 let func_name = if is_text {
11161 "JSONExtractString"
11162 } else {
11163 "JSONExtractRaw"
11164 };
11165 let mut new_args = vec![json_expr];
11166 new_args.extend(args);
11167 Ok(Expression::Function(Box::new(Function::new(
11168 func_name.to_string(),
11169 new_args,
11170 ))))
11171 }
11172 _ => {
11173 let func_name = if is_text {
11174 "JSON_EXTRACT_SCALAR"
11175 } else {
11176 "JSON_EXTRACT"
11177 };
11178 Ok(Expression::Function(Box::new(Function::new(
11179 func_name.to_string(),
11180 vec![json_expr, Expression::string(&json_path)],
11181 ))))
11182 }
11183 }
11184 }
11185 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
11186 "APPROX_DISTINCT" if f.args.len() >= 1 => {
11187 let name = match target {
11188 DialectType::Spark
11189 | DialectType::Databricks
11190 | DialectType::Hive
11191 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
11192 _ => "APPROX_DISTINCT",
11193 };
11194 let mut args = f.args;
11195 // Hive doesn't support the accuracy parameter
11196 if name == "APPROX_COUNT_DISTINCT"
11197 && matches!(target, DialectType::Hive)
11198 {
11199 args.truncate(1);
11200 }
11201 Ok(Expression::Function(Box::new(Function::new(
11202 name.to_string(),
11203 args,
11204 ))))
11205 }
11206 // REGEXP_EXTRACT(x, pattern) - normalize default group index
11207 "REGEXP_EXTRACT" if f.args.len() == 2 => {
11208 // Determine source default group index
11209 let source_default = match source {
11210 DialectType::Presto
11211 | DialectType::Trino
11212 | DialectType::DuckDB => 0,
11213 _ => 1, // Hive/Spark/Databricks default = 1
11214 };
11215 // Determine target default group index
11216 let target_default = match target {
11217 DialectType::Presto
11218 | DialectType::Trino
11219 | DialectType::DuckDB
11220 | DialectType::BigQuery => 0,
11221 DialectType::Snowflake => {
11222 // Snowflake uses REGEXP_SUBSTR
11223 return Ok(Expression::Function(Box::new(Function::new(
11224 "REGEXP_SUBSTR".to_string(),
11225 f.args,
11226 ))));
11227 }
11228 _ => 1, // Hive/Spark/Databricks default = 1
11229 };
11230 if source_default != target_default {
11231 let mut args = f.args;
11232 args.push(Expression::number(source_default));
11233 Ok(Expression::Function(Box::new(Function::new(
11234 "REGEXP_EXTRACT".to_string(),
11235 args,
11236 ))))
11237 } else {
11238 Ok(Expression::Function(Box::new(Function::new(
11239 "REGEXP_EXTRACT".to_string(),
11240 f.args,
11241 ))))
11242 }
11243 }
11244 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
11245 "RLIKE" if f.args.len() == 2 => {
11246 let mut args = f.args;
11247 let str_expr = args.remove(0);
11248 let pattern = args.remove(0);
11249 match target {
11250 DialectType::DuckDB => {
11251 // REGEXP_MATCHES(str, pattern)
11252 Ok(Expression::Function(Box::new(Function::new(
11253 "REGEXP_MATCHES".to_string(),
11254 vec![str_expr, pattern],
11255 ))))
11256 }
11257 _ => {
11258 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
11259 Ok(Expression::RegexpLike(Box::new(
11260 crate::expressions::RegexpFunc {
11261 this: str_expr,
11262 pattern,
11263 flags: None,
11264 },
11265 )))
11266 }
11267 }
11268 }
11269 // EOMONTH(date[, month_offset]) -> target-specific
11270 "EOMONTH" if f.args.len() >= 1 => {
11271 let mut args = f.args;
11272 let date_arg = args.remove(0);
11273 let month_offset = if !args.is_empty() {
11274 Some(args.remove(0))
11275 } else {
11276 None
11277 };
11278
11279 // Helper: wrap date in CAST to DATE
11280 let cast_to_date = |e: Expression| -> Expression {
11281 Expression::Cast(Box::new(Cast {
11282 this: e,
11283 to: DataType::Date,
11284 trailing_comments: vec![],
11285 double_colon_syntax: false,
11286 format: None,
11287 default: None,
11288 inferred_type: None,
11289 }))
11290 };
11291
11292 match target {
11293 DialectType::TSQL | DialectType::Fabric => {
11294 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
11295 let date = cast_to_date(date_arg);
11296 let date = if let Some(offset) = month_offset {
11297 Expression::Function(Box::new(Function::new(
11298 "DATEADD".to_string(),
11299 vec![
11300 Expression::Identifier(Identifier::new(
11301 "MONTH",
11302 )),
11303 offset,
11304 date,
11305 ],
11306 )))
11307 } else {
11308 date
11309 };
11310 Ok(Expression::Function(Box::new(Function::new(
11311 "EOMONTH".to_string(),
11312 vec![date],
11313 ))))
11314 }
11315 DialectType::Presto
11316 | DialectType::Trino
11317 | DialectType::Athena => {
11318 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
11319 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
11320 let cast_ts = Expression::Cast(Box::new(Cast {
11321 this: date_arg,
11322 to: DataType::Timestamp {
11323 timezone: false,
11324 precision: None,
11325 },
11326 trailing_comments: vec![],
11327 double_colon_syntax: false,
11328 format: None,
11329 default: None,
11330 inferred_type: None,
11331 }));
11332 let date = cast_to_date(cast_ts);
11333 let date = if let Some(offset) = month_offset {
11334 Expression::Function(Box::new(Function::new(
11335 "DATE_ADD".to_string(),
11336 vec![Expression::string("MONTH"), offset, date],
11337 )))
11338 } else {
11339 date
11340 };
11341 Ok(Expression::Function(Box::new(Function::new(
11342 "LAST_DAY_OF_MONTH".to_string(),
11343 vec![date],
11344 ))))
11345 }
11346 DialectType::PostgreSQL => {
11347 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11348 let date = cast_to_date(date_arg);
11349 let date = if let Some(offset) = month_offset {
11350 let interval_str = format!(
11351 "{} MONTH",
11352 Self::expr_to_string_static(&offset)
11353 );
11354 Expression::Add(Box::new(
11355 crate::expressions::BinaryOp::new(
11356 date,
11357 Expression::Interval(Box::new(
11358 crate::expressions::Interval {
11359 this: Some(Expression::string(
11360 &interval_str,
11361 )),
11362 unit: None,
11363 },
11364 )),
11365 ),
11366 ))
11367 } else {
11368 date
11369 };
11370 let truncated =
11371 Expression::Function(Box::new(Function::new(
11372 "DATE_TRUNC".to_string(),
11373 vec![Expression::string("MONTH"), date],
11374 )));
11375 let plus_month = Expression::Add(Box::new(
11376 crate::expressions::BinaryOp::new(
11377 truncated,
11378 Expression::Interval(Box::new(
11379 crate::expressions::Interval {
11380 this: Some(Expression::string("1 MONTH")),
11381 unit: None,
11382 },
11383 )),
11384 ),
11385 ));
11386 let minus_day = Expression::Sub(Box::new(
11387 crate::expressions::BinaryOp::new(
11388 plus_month,
11389 Expression::Interval(Box::new(
11390 crate::expressions::Interval {
11391 this: Some(Expression::string("1 DAY")),
11392 unit: None,
11393 },
11394 )),
11395 ),
11396 ));
11397 Ok(Expression::Cast(Box::new(Cast {
11398 this: minus_day,
11399 to: DataType::Date,
11400 trailing_comments: vec![],
11401 double_colon_syntax: false,
11402 format: None,
11403 default: None,
11404 inferred_type: None,
11405 })))
11406 }
11407 DialectType::DuckDB => {
11408 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
11409 let date = cast_to_date(date_arg);
11410 let date = if let Some(offset) = month_offset {
11411 // Wrap negative numbers in parentheses for DuckDB INTERVAL
11412 let interval_val =
11413 if matches!(&offset, Expression::Neg(_)) {
11414 Expression::Paren(Box::new(
11415 crate::expressions::Paren {
11416 this: offset,
11417 trailing_comments: Vec::new(),
11418 },
11419 ))
11420 } else {
11421 offset
11422 };
11423 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11424 date,
11425 Expression::Interval(Box::new(crate::expressions::Interval {
11426 this: Some(interval_val),
11427 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11428 unit: crate::expressions::IntervalUnit::Month,
11429 use_plural: false,
11430 }),
11431 })),
11432 )))
11433 } else {
11434 date
11435 };
11436 Ok(Expression::Function(Box::new(Function::new(
11437 "LAST_DAY".to_string(),
11438 vec![date],
11439 ))))
11440 }
11441 DialectType::Snowflake | DialectType::Redshift => {
11442 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
11443 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
11444 let date = if matches!(target, DialectType::Snowflake) {
11445 Expression::Function(Box::new(Function::new(
11446 "TO_DATE".to_string(),
11447 vec![date_arg],
11448 )))
11449 } else {
11450 cast_to_date(date_arg)
11451 };
11452 let date = if let Some(offset) = month_offset {
11453 Expression::Function(Box::new(Function::new(
11454 "DATEADD".to_string(),
11455 vec![
11456 Expression::Identifier(Identifier::new(
11457 "MONTH",
11458 )),
11459 offset,
11460 date,
11461 ],
11462 )))
11463 } else {
11464 date
11465 };
11466 Ok(Expression::Function(Box::new(Function::new(
11467 "LAST_DAY".to_string(),
11468 vec![date],
11469 ))))
11470 }
11471 DialectType::Spark | DialectType::Databricks => {
11472 // Spark: LAST_DAY(TO_DATE(date))
11473 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
11474 let date = Expression::Function(Box::new(Function::new(
11475 "TO_DATE".to_string(),
11476 vec![date_arg],
11477 )));
11478 let date = if let Some(offset) = month_offset {
11479 Expression::Function(Box::new(Function::new(
11480 "ADD_MONTHS".to_string(),
11481 vec![date, offset],
11482 )))
11483 } else {
11484 date
11485 };
11486 Ok(Expression::Function(Box::new(Function::new(
11487 "LAST_DAY".to_string(),
11488 vec![date],
11489 ))))
11490 }
11491 DialectType::MySQL => {
11492 // MySQL: LAST_DAY(DATE(date)) - no offset
11493 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
11494 let date = if let Some(offset) = month_offset {
11495 let iu = crate::expressions::IntervalUnit::Month;
11496 Expression::DateAdd(Box::new(
11497 crate::expressions::DateAddFunc {
11498 this: date_arg,
11499 interval: offset,
11500 unit: iu,
11501 },
11502 ))
11503 } else {
11504 Expression::Function(Box::new(Function::new(
11505 "DATE".to_string(),
11506 vec![date_arg],
11507 )))
11508 };
11509 Ok(Expression::Function(Box::new(Function::new(
11510 "LAST_DAY".to_string(),
11511 vec![date],
11512 ))))
11513 }
11514 DialectType::BigQuery => {
11515 // BigQuery: LAST_DAY(CAST(date AS DATE))
11516 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
11517 let date = cast_to_date(date_arg);
11518 let date = if let Some(offset) = month_offset {
11519 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
11520 this: Some(offset),
11521 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11522 unit: crate::expressions::IntervalUnit::Month,
11523 use_plural: false,
11524 }),
11525 }));
11526 Expression::Function(Box::new(Function::new(
11527 "DATE_ADD".to_string(),
11528 vec![date, interval],
11529 )))
11530 } else {
11531 date
11532 };
11533 Ok(Expression::Function(Box::new(Function::new(
11534 "LAST_DAY".to_string(),
11535 vec![date],
11536 ))))
11537 }
11538 DialectType::ClickHouse => {
11539 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
11540 let date = Expression::Cast(Box::new(Cast {
11541 this: date_arg,
11542 to: DataType::Nullable {
11543 inner: Box::new(DataType::Date),
11544 },
11545 trailing_comments: vec![],
11546 double_colon_syntax: false,
11547 format: None,
11548 default: None,
11549 inferred_type: None,
11550 }));
11551 let date = if let Some(offset) = month_offset {
11552 Expression::Function(Box::new(Function::new(
11553 "DATE_ADD".to_string(),
11554 vec![
11555 Expression::Identifier(Identifier::new(
11556 "MONTH",
11557 )),
11558 offset,
11559 date,
11560 ],
11561 )))
11562 } else {
11563 date
11564 };
11565 Ok(Expression::Function(Box::new(Function::new(
11566 "LAST_DAY".to_string(),
11567 vec![date],
11568 ))))
11569 }
11570 DialectType::Hive => {
11571 // Hive: LAST_DAY(date)
11572 let date = if let Some(offset) = month_offset {
11573 Expression::Function(Box::new(Function::new(
11574 "ADD_MONTHS".to_string(),
11575 vec![date_arg, offset],
11576 )))
11577 } else {
11578 date_arg
11579 };
11580 Ok(Expression::Function(Box::new(Function::new(
11581 "LAST_DAY".to_string(),
11582 vec![date],
11583 ))))
11584 }
11585 _ => {
11586 // Default: LAST_DAY(date)
11587 let date = if let Some(offset) = month_offset {
11588 let unit =
11589 Expression::Identifier(Identifier::new("MONTH"));
11590 Expression::Function(Box::new(Function::new(
11591 "DATEADD".to_string(),
11592 vec![unit, offset, date_arg],
11593 )))
11594 } else {
11595 date_arg
11596 };
11597 Ok(Expression::Function(Box::new(Function::new(
11598 "LAST_DAY".to_string(),
11599 vec![date],
11600 ))))
11601 }
11602 }
11603 }
11604 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11605 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11606 if !matches!(source, DialectType::BigQuery)
11607 && f.args.len() >= 1 =>
11608 {
11609 let first_arg = f.args.into_iter().next().unwrap();
11610 match target {
11611 DialectType::TSQL | DialectType::Fabric => {
11612 Ok(Expression::Function(Box::new(Function::new(
11613 "EOMONTH".to_string(),
11614 vec![first_arg],
11615 ))))
11616 }
11617 DialectType::Presto
11618 | DialectType::Trino
11619 | DialectType::Athena => {
11620 Ok(Expression::Function(Box::new(Function::new(
11621 "LAST_DAY_OF_MONTH".to_string(),
11622 vec![first_arg],
11623 ))))
11624 }
11625 _ => Ok(Expression::Function(Box::new(Function::new(
11626 "LAST_DAY".to_string(),
11627 vec![first_arg],
11628 )))),
11629 }
11630 }
11631 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11632 "MAP"
11633 if f.args.len() == 2
11634 && matches!(
11635 source,
11636 DialectType::Presto
11637 | DialectType::Trino
11638 | DialectType::Athena
11639 ) =>
11640 {
11641 let keys_arg = f.args[0].clone();
11642 let vals_arg = f.args[1].clone();
11643
11644 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11645 fn extract_array_elements(
11646 expr: &Expression,
11647 ) -> Option<&Vec<Expression>> {
11648 match expr {
11649 Expression::Array(arr) => Some(&arr.expressions),
11650 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11651 Expression::Function(f)
11652 if f.name.eq_ignore_ascii_case("ARRAY") =>
11653 {
11654 Some(&f.args)
11655 }
11656 _ => None,
11657 }
11658 }
11659
11660 match target {
11661 DialectType::Spark | DialectType::Databricks => {
11662 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11663 Ok(Expression::Function(Box::new(Function::new(
11664 "MAP_FROM_ARRAYS".to_string(),
11665 f.args,
11666 ))))
11667 }
11668 DialectType::Hive => {
11669 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11670 if let (Some(keys), Some(vals)) = (
11671 extract_array_elements(&keys_arg),
11672 extract_array_elements(&vals_arg),
11673 ) {
11674 if keys.len() == vals.len() {
11675 let mut interleaved = Vec::new();
11676 for (k, v) in keys.iter().zip(vals.iter()) {
11677 interleaved.push(k.clone());
11678 interleaved.push(v.clone());
11679 }
11680 Ok(Expression::Function(Box::new(Function::new(
11681 "MAP".to_string(),
11682 interleaved,
11683 ))))
11684 } else {
11685 Ok(Expression::Function(Box::new(Function::new(
11686 "MAP".to_string(),
11687 f.args,
11688 ))))
11689 }
11690 } else {
11691 Ok(Expression::Function(Box::new(Function::new(
11692 "MAP".to_string(),
11693 f.args,
11694 ))))
11695 }
11696 }
11697 DialectType::Snowflake => {
11698 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11699 if let (Some(keys), Some(vals)) = (
11700 extract_array_elements(&keys_arg),
11701 extract_array_elements(&vals_arg),
11702 ) {
11703 if keys.len() == vals.len() {
11704 let mut interleaved = Vec::new();
11705 for (k, v) in keys.iter().zip(vals.iter()) {
11706 interleaved.push(k.clone());
11707 interleaved.push(v.clone());
11708 }
11709 Ok(Expression::Function(Box::new(Function::new(
11710 "OBJECT_CONSTRUCT".to_string(),
11711 interleaved,
11712 ))))
11713 } else {
11714 Ok(Expression::Function(Box::new(Function::new(
11715 "MAP".to_string(),
11716 f.args,
11717 ))))
11718 }
11719 } else {
11720 Ok(Expression::Function(Box::new(Function::new(
11721 "MAP".to_string(),
11722 f.args,
11723 ))))
11724 }
11725 }
11726 _ => Ok(Expression::Function(f)),
11727 }
11728 }
11729 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11730 "MAP"
11731 if f.args.is_empty()
11732 && matches!(
11733 source,
11734 DialectType::Hive
11735 | DialectType::Spark
11736 | DialectType::Databricks
11737 )
11738 && matches!(
11739 target,
11740 DialectType::Presto
11741 | DialectType::Trino
11742 | DialectType::Athena
11743 ) =>
11744 {
11745 let empty_keys =
11746 Expression::Array(Box::new(crate::expressions::Array {
11747 expressions: vec![],
11748 }));
11749 let empty_vals =
11750 Expression::Array(Box::new(crate::expressions::Array {
11751 expressions: vec![],
11752 }));
11753 Ok(Expression::Function(Box::new(Function::new(
11754 "MAP".to_string(),
11755 vec![empty_keys, empty_vals],
11756 ))))
11757 }
11758 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11759 "MAP"
11760 if f.args.len() >= 2
11761 && f.args.len() % 2 == 0
11762 && matches!(
11763 source,
11764 DialectType::Hive
11765 | DialectType::Spark
11766 | DialectType::Databricks
11767 | DialectType::ClickHouse
11768 ) =>
11769 {
11770 let args = f.args;
11771 match target {
11772 DialectType::DuckDB => {
11773 // MAP([k1, k2], [v1, v2])
11774 let mut keys = Vec::new();
11775 let mut vals = Vec::new();
11776 for (i, arg) in args.into_iter().enumerate() {
11777 if i % 2 == 0 {
11778 keys.push(arg);
11779 } else {
11780 vals.push(arg);
11781 }
11782 }
11783 let keys_arr = Expression::Array(Box::new(
11784 crate::expressions::Array { expressions: keys },
11785 ));
11786 let vals_arr = Expression::Array(Box::new(
11787 crate::expressions::Array { expressions: vals },
11788 ));
11789 Ok(Expression::Function(Box::new(Function::new(
11790 "MAP".to_string(),
11791 vec![keys_arr, vals_arr],
11792 ))))
11793 }
11794 DialectType::Presto | DialectType::Trino => {
11795 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11796 let mut keys = Vec::new();
11797 let mut vals = Vec::new();
11798 for (i, arg) in args.into_iter().enumerate() {
11799 if i % 2 == 0 {
11800 keys.push(arg);
11801 } else {
11802 vals.push(arg);
11803 }
11804 }
11805 let keys_arr = Expression::Array(Box::new(
11806 crate::expressions::Array { expressions: keys },
11807 ));
11808 let vals_arr = Expression::Array(Box::new(
11809 crate::expressions::Array { expressions: vals },
11810 ));
11811 Ok(Expression::Function(Box::new(Function::new(
11812 "MAP".to_string(),
11813 vec![keys_arr, vals_arr],
11814 ))))
11815 }
11816 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11817 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11818 ))),
11819 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11820 Function::new("map".to_string(), args),
11821 ))),
11822 _ => Ok(Expression::Function(Box::new(Function::new(
11823 "MAP".to_string(),
11824 args,
11825 )))),
11826 }
11827 }
11828 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11829 "COLLECT_LIST" if f.args.len() >= 1 => {
11830 let name = match target {
11831 DialectType::Spark
11832 | DialectType::Databricks
11833 | DialectType::Hive => "COLLECT_LIST",
11834 DialectType::DuckDB
11835 | DialectType::PostgreSQL
11836 | DialectType::Redshift
11837 | DialectType::Snowflake
11838 | DialectType::BigQuery => "ARRAY_AGG",
11839 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11840 _ => "ARRAY_AGG",
11841 };
11842 Ok(Expression::Function(Box::new(Function::new(
11843 name.to_string(),
11844 f.args,
11845 ))))
11846 }
11847 // COLLECT_SET(x) -> target-specific distinct array aggregation
11848 "COLLECT_SET" if f.args.len() >= 1 => {
11849 let name = match target {
11850 DialectType::Spark
11851 | DialectType::Databricks
11852 | DialectType::Hive => "COLLECT_SET",
11853 DialectType::Presto
11854 | DialectType::Trino
11855 | DialectType::Athena => "SET_AGG",
11856 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11857 _ => "ARRAY_AGG",
11858 };
11859 Ok(Expression::Function(Box::new(Function::new(
11860 name.to_string(),
11861 f.args,
11862 ))))
11863 }
11864 // ISNAN(x) / IS_NAN(x) - normalize
11865 "ISNAN" | "IS_NAN" => {
11866 let name = match target {
11867 DialectType::Spark
11868 | DialectType::Databricks
11869 | DialectType::Hive => "ISNAN",
11870 DialectType::Presto
11871 | DialectType::Trino
11872 | DialectType::Athena => "IS_NAN",
11873 DialectType::BigQuery
11874 | DialectType::PostgreSQL
11875 | DialectType::Redshift => "IS_NAN",
11876 DialectType::ClickHouse => "IS_NAN",
11877 _ => "ISNAN",
11878 };
11879 Ok(Expression::Function(Box::new(Function::new(
11880 name.to_string(),
11881 f.args,
11882 ))))
11883 }
11884 // SPLIT_PART(str, delim, index) -> target-specific
11885 "SPLIT_PART" if f.args.len() == 3 => {
11886 match target {
11887 DialectType::Spark | DialectType::Databricks => {
11888 // Keep as SPLIT_PART (Spark 3.4+)
11889 Ok(Expression::Function(Box::new(Function::new(
11890 "SPLIT_PART".to_string(),
11891 f.args,
11892 ))))
11893 }
11894 DialectType::DuckDB
11895 if matches!(source, DialectType::Snowflake) =>
11896 {
11897 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
11898 // - part_index 0 treated as 1
11899 // - empty delimiter: return whole string if index 1 or -1, else ''
11900 let mut args = f.args;
11901 let str_arg = args.remove(0);
11902 let delim_arg = args.remove(0);
11903 let idx_arg = args.remove(0);
11904
11905 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
11906 let adjusted_idx = Expression::Paren(Box::new(Paren {
11907 this: Expression::Case(Box::new(Case {
11908 operand: None,
11909 whens: vec![(
11910 Expression::Eq(Box::new(BinaryOp {
11911 left: idx_arg.clone(),
11912 right: Expression::number(0),
11913 left_comments: vec![],
11914 operator_comments: vec![],
11915 trailing_comments: vec![],
11916 inferred_type: None,
11917 })),
11918 Expression::number(1),
11919 )],
11920 else_: Some(idx_arg.clone()),
11921 comments: vec![],
11922 inferred_type: None,
11923 })),
11924 trailing_comments: vec![],
11925 }));
11926
11927 // SPLIT_PART(str, delim, adjusted_idx)
11928 let base_func =
11929 Expression::Function(Box::new(Function::new(
11930 "SPLIT_PART".to_string(),
11931 vec![
11932 str_arg.clone(),
11933 delim_arg.clone(),
11934 adjusted_idx.clone(),
11935 ],
11936 )));
11937
11938 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
11939 let empty_delim_case = Expression::Paren(Box::new(Paren {
11940 this: Expression::Case(Box::new(Case {
11941 operand: None,
11942 whens: vec![(
11943 Expression::Or(Box::new(BinaryOp {
11944 left: Expression::Eq(Box::new(BinaryOp {
11945 left: adjusted_idx.clone(),
11946 right: Expression::number(1),
11947 left_comments: vec![],
11948 operator_comments: vec![],
11949 trailing_comments: vec![],
11950 inferred_type: None,
11951 })),
11952 right: Expression::Eq(Box::new(BinaryOp {
11953 left: adjusted_idx,
11954 right: Expression::number(-1),
11955 left_comments: vec![],
11956 operator_comments: vec![],
11957 trailing_comments: vec![],
11958 inferred_type: None,
11959 })),
11960 left_comments: vec![],
11961 operator_comments: vec![],
11962 trailing_comments: vec![],
11963 inferred_type: None,
11964 })),
11965 str_arg,
11966 )],
11967 else_: Some(Expression::string("")),
11968 comments: vec![],
11969 inferred_type: None,
11970 })),
11971 trailing_comments: vec![],
11972 }));
11973
11974 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
11975 Ok(Expression::Case(Box::new(Case {
11976 operand: None,
11977 whens: vec![(
11978 Expression::Eq(Box::new(BinaryOp {
11979 left: delim_arg,
11980 right: Expression::string(""),
11981 left_comments: vec![],
11982 operator_comments: vec![],
11983 trailing_comments: vec![],
11984 inferred_type: None,
11985 })),
11986 empty_delim_case,
11987 )],
11988 else_: Some(base_func),
11989 comments: vec![],
11990 inferred_type: None,
11991 })))
11992 }
11993 DialectType::DuckDB
11994 | DialectType::PostgreSQL
11995 | DialectType::Snowflake
11996 | DialectType::Redshift
11997 | DialectType::Trino
11998 | DialectType::Presto => Ok(Expression::Function(Box::new(
11999 Function::new("SPLIT_PART".to_string(), f.args),
12000 ))),
12001 DialectType::Hive => {
12002 // SPLIT(str, delim)[index]
12003 // Complex conversion, just keep as-is for now
12004 Ok(Expression::Function(Box::new(Function::new(
12005 "SPLIT_PART".to_string(),
12006 f.args,
12007 ))))
12008 }
12009 _ => Ok(Expression::Function(Box::new(Function::new(
12010 "SPLIT_PART".to_string(),
12011 f.args,
12012 )))),
12013 }
12014 }
12015 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
12016 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
12017 let is_scalar = name == "JSON_EXTRACT_SCALAR";
12018 match target {
12019 DialectType::Spark
12020 | DialectType::Databricks
12021 | DialectType::Hive => {
12022 let mut args = f.args;
12023 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
12024 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
12025 if let Some(Expression::Function(inner)) = args.first() {
12026 if inner.name.eq_ignore_ascii_case("TRY")
12027 && inner.args.len() == 1
12028 {
12029 let mut inner_args = inner.args.clone();
12030 args[0] = inner_args.remove(0);
12031 }
12032 }
12033 Ok(Expression::Function(Box::new(Function::new(
12034 "GET_JSON_OBJECT".to_string(),
12035 args,
12036 ))))
12037 }
12038 DialectType::DuckDB | DialectType::SQLite => {
12039 // json -> path syntax
12040 let mut args = f.args;
12041 let json_expr = args.remove(0);
12042 let path = args.remove(0);
12043 Ok(Expression::JsonExtract(Box::new(
12044 crate::expressions::JsonExtractFunc {
12045 this: json_expr,
12046 path,
12047 returning: None,
12048 arrow_syntax: true,
12049 hash_arrow_syntax: false,
12050 wrapper_option: None,
12051 quotes_option: None,
12052 on_scalar_string: false,
12053 on_error: None,
12054 },
12055 )))
12056 }
12057 DialectType::TSQL => {
12058 let func_name = if is_scalar {
12059 "JSON_VALUE"
12060 } else {
12061 "JSON_QUERY"
12062 };
12063 Ok(Expression::Function(Box::new(Function::new(
12064 func_name.to_string(),
12065 f.args,
12066 ))))
12067 }
12068 DialectType::PostgreSQL | DialectType::Redshift => {
12069 let func_name = if is_scalar {
12070 "JSON_EXTRACT_PATH_TEXT"
12071 } else {
12072 "JSON_EXTRACT_PATH"
12073 };
12074 Ok(Expression::Function(Box::new(Function::new(
12075 func_name.to_string(),
12076 f.args,
12077 ))))
12078 }
12079 _ => Ok(Expression::Function(Box::new(Function::new(
12080 name.to_string(),
12081 f.args,
12082 )))),
12083 }
12084 }
12085 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
12086 "JSON_SEARCH"
12087 if matches!(target, DialectType::DuckDB)
12088 && (3..=5).contains(&f.args.len()) =>
12089 {
12090 let args = &f.args;
12091
12092 // Only rewrite deterministic modes and NULL/no escape-char variant.
12093 let mode = match &args[1] {
12094 Expression::Literal(lit)
12095 if matches!(
12096 lit.as_ref(),
12097 crate::expressions::Literal::String(_)
12098 ) =>
12099 {
12100 let crate::expressions::Literal::String(s) = lit.as_ref()
12101 else {
12102 unreachable!()
12103 };
12104 s.to_ascii_lowercase()
12105 }
12106 _ => return Ok(Expression::Function(f)),
12107 };
12108 if mode != "one" && mode != "all" {
12109 return Ok(Expression::Function(f));
12110 }
12111 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
12112 return Ok(Expression::Function(f));
12113 }
12114
12115 let json_doc_sql = match Generator::sql(&args[0]) {
12116 Ok(sql) => sql,
12117 Err(_) => return Ok(Expression::Function(f)),
12118 };
12119 let search_sql = match Generator::sql(&args[2]) {
12120 Ok(sql) => sql,
12121 Err(_) => return Ok(Expression::Function(f)),
12122 };
12123 let path_sql = if args.len() == 5 {
12124 match Generator::sql(&args[4]) {
12125 Ok(sql) => sql,
12126 Err(_) => return Ok(Expression::Function(f)),
12127 }
12128 } else {
12129 "'$'".to_string()
12130 };
12131
12132 let rewrite_sql = if mode == "all" {
12133 format!(
12134 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
12135 json_doc_sql, path_sql, search_sql
12136 )
12137 } else {
12138 format!(
12139 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
12140 json_doc_sql, path_sql, search_sql
12141 )
12142 };
12143
12144 Ok(Expression::Raw(crate::expressions::Raw {
12145 sql: rewrite_sql,
12146 }))
12147 }
12148 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
12149 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
12150 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
12151 if f.args.len() >= 2
12152 && matches!(source, DialectType::SingleStore) =>
12153 {
12154 let is_bson = name == "BSON_EXTRACT_BSON";
12155 let mut args = f.args;
12156 let json_expr = args.remove(0);
12157
12158 // Build JSONPath from remaining arguments
12159 let mut path = String::from("$");
12160 for arg in &args {
12161 if let Expression::Literal(lit) = arg {
12162 if let crate::expressions::Literal::String(s) = lit.as_ref()
12163 {
12164 // Check if it's a numeric string (array index)
12165 if s.parse::<i64>().is_ok() {
12166 path.push('[');
12167 path.push_str(s);
12168 path.push(']');
12169 } else {
12170 path.push('.');
12171 path.push_str(s);
12172 }
12173 }
12174 }
12175 }
12176
12177 let target_func = if is_bson {
12178 "JSONB_EXTRACT"
12179 } else {
12180 "JSON_EXTRACT"
12181 };
12182 Ok(Expression::Function(Box::new(Function::new(
12183 target_func.to_string(),
12184 vec![json_expr, Expression::string(&path)],
12185 ))))
12186 }
12187 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
12188 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
12189 Ok(Expression::Function(Box::new(Function {
12190 name: "arraySum".to_string(),
12191 args: f.args,
12192 distinct: f.distinct,
12193 trailing_comments: f.trailing_comments,
12194 use_bracket_syntax: f.use_bracket_syntax,
12195 no_parens: f.no_parens,
12196 quoted: f.quoted,
12197 span: None,
12198 inferred_type: None,
12199 })))
12200 }
12201 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
12202 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
12203 // and is handled by JsonQueryValueConvert action. This handles the case where
12204 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
12205 "JSON_QUERY" | "JSON_VALUE"
12206 if f.args.len() == 2
12207 && matches!(
12208 source,
12209 DialectType::TSQL | DialectType::Fabric
12210 ) =>
12211 {
12212 match target {
12213 DialectType::Spark
12214 | DialectType::Databricks
12215 | DialectType::Hive => Ok(Expression::Function(Box::new(
12216 Function::new("GET_JSON_OBJECT".to_string(), f.args),
12217 ))),
12218 _ => Ok(Expression::Function(Box::new(Function::new(
12219 name.to_string(),
12220 f.args,
12221 )))),
12222 }
12223 }
12224 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
12225 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
12226 let arg = f.args.into_iter().next().unwrap();
12227 let is_hive_source = matches!(
12228 source,
12229 DialectType::Hive
12230 | DialectType::Spark
12231 | DialectType::Databricks
12232 );
12233 match target {
12234 DialectType::DuckDB if is_hive_source => {
12235 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
12236 let strptime =
12237 Expression::Function(Box::new(Function::new(
12238 "STRPTIME".to_string(),
12239 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
12240 )));
12241 Ok(Expression::Function(Box::new(Function::new(
12242 "EPOCH".to_string(),
12243 vec![strptime],
12244 ))))
12245 }
12246 DialectType::Presto | DialectType::Trino if is_hive_source => {
12247 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
12248 let cast_varchar =
12249 Expression::Cast(Box::new(crate::expressions::Cast {
12250 this: arg.clone(),
12251 to: DataType::VarChar {
12252 length: None,
12253 parenthesized_length: false,
12254 },
12255 trailing_comments: vec![],
12256 double_colon_syntax: false,
12257 format: None,
12258 default: None,
12259 inferred_type: None,
12260 }));
12261 let date_parse =
12262 Expression::Function(Box::new(Function::new(
12263 "DATE_PARSE".to_string(),
12264 vec![
12265 cast_varchar,
12266 Expression::string("%Y-%m-%d %T"),
12267 ],
12268 )));
12269 let try_expr = Expression::Function(Box::new(
12270 Function::new("TRY".to_string(), vec![date_parse]),
12271 ));
12272 let date_format =
12273 Expression::Function(Box::new(Function::new(
12274 "DATE_FORMAT".to_string(),
12275 vec![arg, Expression::string("%Y-%m-%d %T")],
12276 )));
12277 let parse_datetime =
12278 Expression::Function(Box::new(Function::new(
12279 "PARSE_DATETIME".to_string(),
12280 vec![
12281 date_format,
12282 Expression::string("yyyy-MM-dd HH:mm:ss"),
12283 ],
12284 )));
12285 let coalesce =
12286 Expression::Function(Box::new(Function::new(
12287 "COALESCE".to_string(),
12288 vec![try_expr, parse_datetime],
12289 )));
12290 Ok(Expression::Function(Box::new(Function::new(
12291 "TO_UNIXTIME".to_string(),
12292 vec![coalesce],
12293 ))))
12294 }
12295 DialectType::Presto | DialectType::Trino => {
12296 Ok(Expression::Function(Box::new(Function::new(
12297 "TO_UNIXTIME".to_string(),
12298 vec![arg],
12299 ))))
12300 }
12301 _ => Ok(Expression::Function(Box::new(Function::new(
12302 "UNIX_TIMESTAMP".to_string(),
12303 vec![arg],
12304 )))),
12305 }
12306 }
12307 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12308 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
12309 DialectType::Spark
12310 | DialectType::Databricks
12311 | DialectType::Hive => Ok(Expression::Function(Box::new(
12312 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
12313 ))),
12314 _ => Ok(Expression::Function(Box::new(Function::new(
12315 "TO_UNIX_TIMESTAMP".to_string(),
12316 f.args,
12317 )))),
12318 },
12319 // CURDATE() -> CURRENT_DATE
12320 "CURDATE" => {
12321 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
12322 }
12323 // CURTIME() -> CURRENT_TIME
12324 "CURTIME" => {
12325 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
12326 precision: None,
12327 }))
12328 }
12329 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
12330 "ARRAY_SORT" if f.args.len() >= 1 => {
12331 match target {
12332 DialectType::Hive => {
12333 let mut args = f.args;
12334 args.truncate(1); // Drop lambda comparator
12335 Ok(Expression::Function(Box::new(Function::new(
12336 "SORT_ARRAY".to_string(),
12337 args,
12338 ))))
12339 }
12340 DialectType::DuckDB
12341 if matches!(source, DialectType::Snowflake) =>
12342 {
12343 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
12344 let mut args_iter = f.args.into_iter();
12345 let arr = args_iter.next().unwrap();
12346 let asc_arg = args_iter.next();
12347 let nulls_first_arg = args_iter.next();
12348
12349 let is_asc_bool = asc_arg
12350 .as_ref()
12351 .map(|a| matches!(a, Expression::Boolean(_)))
12352 .unwrap_or(false);
12353 let is_nf_bool = nulls_first_arg
12354 .as_ref()
12355 .map(|a| matches!(a, Expression::Boolean(_)))
12356 .unwrap_or(false);
12357
12358 // No boolean args: pass through as-is
12359 if !is_asc_bool && !is_nf_bool {
12360 let mut result_args = vec![arr];
12361 if let Some(asc) = asc_arg {
12362 result_args.push(asc);
12363 if let Some(nf) = nulls_first_arg {
12364 result_args.push(nf);
12365 }
12366 }
12367 Ok(Expression::Function(Box::new(Function::new(
12368 "LIST_SORT".to_string(),
12369 result_args,
12370 ))))
12371 } else {
12372 // Has boolean args: convert to DuckDB LIST_SORT format
12373 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
12374
12375 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
12376 let nulls_are_first = match &nulls_first_arg {
12377 Some(Expression::Boolean(b)) => b.value,
12378 None if is_asc_bool => descending, // Snowflake default
12379 _ => false,
12380 };
12381 let nulls_first_sql = if nulls_are_first {
12382 Some(Expression::string("NULLS FIRST"))
12383 } else {
12384 None
12385 };
12386
12387 if !is_asc_bool {
12388 // asc is non-boolean expression, nulls_first is boolean
12389 let mut result_args = vec![arr];
12390 if let Some(asc) = asc_arg {
12391 result_args.push(asc);
12392 }
12393 if let Some(nf) = nulls_first_sql {
12394 result_args.push(nf);
12395 }
12396 Ok(Expression::Function(Box::new(Function::new(
12397 "LIST_SORT".to_string(),
12398 result_args,
12399 ))))
12400 } else {
12401 if !descending && !nulls_are_first {
12402 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
12403 Ok(Expression::Function(Box::new(
12404 Function::new(
12405 "LIST_SORT".to_string(),
12406 vec![arr],
12407 ),
12408 )))
12409 } else if descending && !nulls_are_first {
12410 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
12411 Ok(Expression::Function(Box::new(
12412 Function::new(
12413 "ARRAY_REVERSE_SORT".to_string(),
12414 vec![arr],
12415 ),
12416 )))
12417 } else {
12418 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
12419 let order_str =
12420 if descending { "DESC" } else { "ASC" };
12421 Ok(Expression::Function(Box::new(
12422 Function::new(
12423 "LIST_SORT".to_string(),
12424 vec![
12425 arr,
12426 Expression::string(order_str),
12427 Expression::string("NULLS FIRST"),
12428 ],
12429 ),
12430 )))
12431 }
12432 }
12433 }
12434 }
12435 DialectType::DuckDB => {
12436 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
12437 let mut args = f.args;
12438 args.truncate(1); // Drop lambda comparator for DuckDB
12439 Ok(Expression::Function(Box::new(Function::new(
12440 "ARRAY_SORT".to_string(),
12441 args,
12442 ))))
12443 }
12444 _ => Ok(Expression::Function(f)),
12445 }
12446 }
12447 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
12448 "SORT_ARRAY" if f.args.len() == 1 => match target {
12449 DialectType::Hive
12450 | DialectType::Spark
12451 | DialectType::Databricks => Ok(Expression::Function(f)),
12452 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12453 Function::new("LIST_SORT".to_string(), f.args),
12454 ))),
12455 _ => Ok(Expression::Function(Box::new(Function::new(
12456 "ARRAY_SORT".to_string(),
12457 f.args,
12458 )))),
12459 },
12460 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
12461 "SORT_ARRAY" if f.args.len() == 2 => {
12462 let is_desc =
12463 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
12464 if is_desc {
12465 match target {
12466 DialectType::DuckDB => {
12467 Ok(Expression::Function(Box::new(Function::new(
12468 "ARRAY_REVERSE_SORT".to_string(),
12469 vec![f.args.into_iter().next().unwrap()],
12470 ))))
12471 }
12472 DialectType::Presto | DialectType::Trino => {
12473 let arr_arg = f.args.into_iter().next().unwrap();
12474 let a = Expression::Column(Box::new(
12475 crate::expressions::Column {
12476 name: crate::expressions::Identifier::new("a"),
12477 table: None,
12478 join_mark: false,
12479 trailing_comments: Vec::new(),
12480 span: None,
12481 inferred_type: None,
12482 },
12483 ));
12484 let b = Expression::Column(Box::new(
12485 crate::expressions::Column {
12486 name: crate::expressions::Identifier::new("b"),
12487 table: None,
12488 join_mark: false,
12489 trailing_comments: Vec::new(),
12490 span: None,
12491 inferred_type: None,
12492 },
12493 ));
12494 let case_expr = Expression::Case(Box::new(
12495 crate::expressions::Case {
12496 operand: None,
12497 whens: vec![
12498 (
12499 Expression::Lt(Box::new(
12500 BinaryOp::new(a.clone(), b.clone()),
12501 )),
12502 Expression::Literal(Box::new(
12503 Literal::Number("1".to_string()),
12504 )),
12505 ),
12506 (
12507 Expression::Gt(Box::new(
12508 BinaryOp::new(a.clone(), b.clone()),
12509 )),
12510 Expression::Literal(Box::new(
12511 Literal::Number("-1".to_string()),
12512 )),
12513 ),
12514 ],
12515 else_: Some(Expression::Literal(Box::new(
12516 Literal::Number("0".to_string()),
12517 ))),
12518 comments: Vec::new(),
12519 inferred_type: None,
12520 },
12521 ));
12522 let lambda = Expression::Lambda(Box::new(
12523 crate::expressions::LambdaExpr {
12524 parameters: vec![
12525 crate::expressions::Identifier::new("a"),
12526 crate::expressions::Identifier::new("b"),
12527 ],
12528 body: case_expr,
12529 colon: false,
12530 parameter_types: Vec::new(),
12531 },
12532 ));
12533 Ok(Expression::Function(Box::new(Function::new(
12534 "ARRAY_SORT".to_string(),
12535 vec![arr_arg, lambda],
12536 ))))
12537 }
12538 _ => Ok(Expression::Function(f)),
12539 }
12540 } else {
12541 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
12542 match target {
12543 DialectType::Hive => Ok(Expression::Function(f)),
12544 DialectType::DuckDB => {
12545 Ok(Expression::Function(Box::new(Function::new(
12546 "LIST_SORT".to_string(),
12547 vec![f.args.into_iter().next().unwrap()],
12548 ))))
12549 }
12550 _ => Ok(Expression::Function(Box::new(Function::new(
12551 "ARRAY_SORT".to_string(),
12552 vec![f.args.into_iter().next().unwrap()],
12553 )))),
12554 }
12555 }
12556 }
12557 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
12558 "LEFT" if f.args.len() == 2 => {
12559 match target {
12560 DialectType::Hive
12561 | DialectType::Presto
12562 | DialectType::Trino
12563 | DialectType::Athena => {
12564 let x = f.args[0].clone();
12565 let n = f.args[1].clone();
12566 Ok(Expression::Function(Box::new(Function::new(
12567 "SUBSTRING".to_string(),
12568 vec![x, Expression::number(1), n],
12569 ))))
12570 }
12571 DialectType::Spark | DialectType::Databricks
12572 if matches!(
12573 source,
12574 DialectType::TSQL | DialectType::Fabric
12575 ) =>
12576 {
12577 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
12578 let x = f.args[0].clone();
12579 let n = f.args[1].clone();
12580 let cast_x = Expression::Cast(Box::new(Cast {
12581 this: x,
12582 to: DataType::VarChar {
12583 length: None,
12584 parenthesized_length: false,
12585 },
12586 double_colon_syntax: false,
12587 trailing_comments: Vec::new(),
12588 format: None,
12589 default: None,
12590 inferred_type: None,
12591 }));
12592 Ok(Expression::Function(Box::new(Function::new(
12593 "LEFT".to_string(),
12594 vec![cast_x, n],
12595 ))))
12596 }
12597 _ => Ok(Expression::Function(f)),
12598 }
12599 }
12600 "RIGHT" if f.args.len() == 2 => {
12601 match target {
12602 DialectType::Hive
12603 | DialectType::Presto
12604 | DialectType::Trino
12605 | DialectType::Athena => {
12606 let x = f.args[0].clone();
12607 let n = f.args[1].clone();
12608 // SUBSTRING(x, LENGTH(x) - (n - 1))
12609 let len_x = Expression::Function(Box::new(Function::new(
12610 "LENGTH".to_string(),
12611 vec![x.clone()],
12612 )));
12613 let n_minus_1 = Expression::Sub(Box::new(
12614 crate::expressions::BinaryOp::new(
12615 n,
12616 Expression::number(1),
12617 ),
12618 ));
12619 let n_minus_1_paren = Expression::Paren(Box::new(
12620 crate::expressions::Paren {
12621 this: n_minus_1,
12622 trailing_comments: Vec::new(),
12623 },
12624 ));
12625 let offset = Expression::Sub(Box::new(
12626 crate::expressions::BinaryOp::new(
12627 len_x,
12628 n_minus_1_paren,
12629 ),
12630 ));
12631 Ok(Expression::Function(Box::new(Function::new(
12632 "SUBSTRING".to_string(),
12633 vec![x, offset],
12634 ))))
12635 }
12636 DialectType::Spark | DialectType::Databricks
12637 if matches!(
12638 source,
12639 DialectType::TSQL | DialectType::Fabric
12640 ) =>
12641 {
12642 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
12643 let x = f.args[0].clone();
12644 let n = f.args[1].clone();
12645 let cast_x = Expression::Cast(Box::new(Cast {
12646 this: x,
12647 to: DataType::VarChar {
12648 length: None,
12649 parenthesized_length: false,
12650 },
12651 double_colon_syntax: false,
12652 trailing_comments: Vec::new(),
12653 format: None,
12654 default: None,
12655 inferred_type: None,
12656 }));
12657 Ok(Expression::Function(Box::new(Function::new(
12658 "RIGHT".to_string(),
12659 vec![cast_x, n],
12660 ))))
12661 }
12662 _ => Ok(Expression::Function(f)),
12663 }
12664 }
12665 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
12666 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
12667 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12668 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
12669 ))),
12670 DialectType::Spark | DialectType::Databricks => {
12671 Ok(Expression::Function(Box::new(Function::new(
12672 "MAP_FROM_ARRAYS".to_string(),
12673 f.args,
12674 ))))
12675 }
12676 _ => Ok(Expression::Function(Box::new(Function::new(
12677 "MAP".to_string(),
12678 f.args,
12679 )))),
12680 },
12681 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
12682 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
12683 "LIKE" if f.args.len() >= 2 => {
12684 let (this, pattern) = if matches!(source, DialectType::SQLite) {
12685 // SQLite: LIKE(pattern, string) -> string LIKE pattern
12686 (f.args[1].clone(), f.args[0].clone())
12687 } else {
12688 // Standard: LIKE(string, pattern) -> string LIKE pattern
12689 (f.args[0].clone(), f.args[1].clone())
12690 };
12691 let escape = if f.args.len() >= 3 {
12692 Some(f.args[2].clone())
12693 } else {
12694 None
12695 };
12696 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
12697 left: this,
12698 right: pattern,
12699 escape,
12700 quantifier: None,
12701 inferred_type: None,
12702 })))
12703 }
12704 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
12705 "ILIKE" if f.args.len() >= 2 => {
12706 let this = f.args[0].clone();
12707 let pattern = f.args[1].clone();
12708 let escape = if f.args.len() >= 3 {
12709 Some(f.args[2].clone())
12710 } else {
12711 None
12712 };
12713 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
12714 left: this,
12715 right: pattern,
12716 escape,
12717 quantifier: None,
12718 inferred_type: None,
12719 })))
12720 }
12721 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
12722 "CHAR" if f.args.len() == 1 => match target {
12723 DialectType::MySQL
12724 | DialectType::SingleStore
12725 | DialectType::TSQL => Ok(Expression::Function(f)),
12726 _ => Ok(Expression::Function(Box::new(Function::new(
12727 "CHR".to_string(),
12728 f.args,
12729 )))),
12730 },
12731 // CONCAT(a, b) -> a || b for PostgreSQL
12732 "CONCAT"
12733 if f.args.len() == 2
12734 && matches!(target, DialectType::PostgreSQL)
12735 && matches!(
12736 source,
12737 DialectType::ClickHouse | DialectType::MySQL
12738 ) =>
12739 {
12740 let mut args = f.args;
12741 let right = args.pop().unwrap();
12742 let left = args.pop().unwrap();
12743 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
12744 this: Box::new(left),
12745 expression: Box::new(right),
12746 safe: None,
12747 })))
12748 }
12749 // ARRAY_TO_STRING(arr, delim) -> target-specific
12750 "ARRAY_TO_STRING"
12751 if f.args.len() == 2
12752 && matches!(target, DialectType::DuckDB)
12753 && matches!(source, DialectType::Snowflake) =>
12754 {
12755 let mut args = f.args;
12756 let arr = args.remove(0);
12757 let sep = args.remove(0);
12758 // sep IS NULL
12759 let sep_is_null = Expression::IsNull(Box::new(IsNull {
12760 this: sep.clone(),
12761 not: false,
12762 postfix_form: false,
12763 }));
12764 // COALESCE(CAST(x AS TEXT), '')
12765 let cast_x = Expression::Cast(Box::new(Cast {
12766 this: Expression::Identifier(Identifier::new("x")),
12767 to: DataType::Text,
12768 trailing_comments: Vec::new(),
12769 double_colon_syntax: false,
12770 format: None,
12771 default: None,
12772 inferred_type: None,
12773 }));
12774 let coalesce = Expression::Coalesce(Box::new(
12775 crate::expressions::VarArgFunc {
12776 original_name: None,
12777 expressions: vec![
12778 cast_x,
12779 Expression::Literal(Box::new(Literal::String(
12780 String::new(),
12781 ))),
12782 ],
12783 inferred_type: None,
12784 },
12785 ));
12786 let lambda =
12787 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
12788 parameters: vec![Identifier::new("x")],
12789 body: coalesce,
12790 colon: false,
12791 parameter_types: Vec::new(),
12792 }));
12793 let list_transform = Expression::Function(Box::new(Function::new(
12794 "LIST_TRANSFORM".to_string(),
12795 vec![arr, lambda],
12796 )));
12797 let array_to_string =
12798 Expression::Function(Box::new(Function::new(
12799 "ARRAY_TO_STRING".to_string(),
12800 vec![list_transform, sep],
12801 )));
12802 Ok(Expression::Case(Box::new(Case {
12803 operand: None,
12804 whens: vec![(sep_is_null, Expression::Null(Null))],
12805 else_: Some(array_to_string),
12806 comments: Vec::new(),
12807 inferred_type: None,
12808 })))
12809 }
12810 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
12811 DialectType::Presto | DialectType::Trino => {
12812 Ok(Expression::Function(Box::new(Function::new(
12813 "ARRAY_JOIN".to_string(),
12814 f.args,
12815 ))))
12816 }
12817 DialectType::TSQL => Ok(Expression::Function(Box::new(
12818 Function::new("STRING_AGG".to_string(), f.args),
12819 ))),
12820 _ => Ok(Expression::Function(f)),
12821 },
12822 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
12823 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
12824 DialectType::Spark
12825 | DialectType::Databricks
12826 | DialectType::Hive => Ok(Expression::Function(Box::new(
12827 Function::new("CONCAT".to_string(), f.args),
12828 ))),
12829 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12830 Function::new("ARRAY_CAT".to_string(), f.args),
12831 ))),
12832 DialectType::Redshift => Ok(Expression::Function(Box::new(
12833 Function::new("ARRAY_CONCAT".to_string(), f.args),
12834 ))),
12835 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12836 Function::new("ARRAY_CAT".to_string(), f.args),
12837 ))),
12838 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12839 Function::new("LIST_CONCAT".to_string(), f.args),
12840 ))),
12841 DialectType::Presto | DialectType::Trino => {
12842 Ok(Expression::Function(Box::new(Function::new(
12843 "CONCAT".to_string(),
12844 f.args,
12845 ))))
12846 }
12847 DialectType::BigQuery => Ok(Expression::Function(Box::new(
12848 Function::new("ARRAY_CONCAT".to_string(), f.args),
12849 ))),
12850 _ => Ok(Expression::Function(f)),
12851 },
12852 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
12853 "HAS" if f.args.len() == 2 => match target {
12854 DialectType::Spark
12855 | DialectType::Databricks
12856 | DialectType::Hive => Ok(Expression::Function(Box::new(
12857 Function::new("ARRAY_CONTAINS".to_string(), f.args),
12858 ))),
12859 DialectType::Presto | DialectType::Trino => {
12860 Ok(Expression::Function(Box::new(Function::new(
12861 "CONTAINS".to_string(),
12862 f.args,
12863 ))))
12864 }
12865 _ => Ok(Expression::Function(f)),
12866 },
12867 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
12868 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
12869 Function::new("COALESCE".to_string(), f.args),
12870 ))),
12871 // ISNULL(x) in MySQL -> (x IS NULL)
12872 "ISNULL"
12873 if f.args.len() == 1
12874 && matches!(source, DialectType::MySQL)
12875 && matches!(target, DialectType::MySQL) =>
12876 {
12877 let arg = f.args.into_iter().next().unwrap();
12878 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
12879 this: Expression::IsNull(Box::new(
12880 crate::expressions::IsNull {
12881 this: arg,
12882 not: false,
12883 postfix_form: false,
12884 },
12885 )),
12886 trailing_comments: Vec::new(),
12887 })))
12888 }
12889 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
12890 "MONTHNAME"
12891 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
12892 {
12893 let arg = f.args.into_iter().next().unwrap();
12894 Ok(Expression::Function(Box::new(Function::new(
12895 "DATE_FORMAT".to_string(),
12896 vec![arg, Expression::string("%M")],
12897 ))))
12898 }
12899 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
12900 "SPLITBYSTRING" if f.args.len() == 2 => {
12901 let sep = f.args[0].clone();
12902 let str_arg = f.args[1].clone();
12903 match target {
12904 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12905 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
12906 ))),
12907 DialectType::Doris => {
12908 Ok(Expression::Function(Box::new(Function::new(
12909 "SPLIT_BY_STRING".to_string(),
12910 vec![str_arg, sep],
12911 ))))
12912 }
12913 DialectType::Hive
12914 | DialectType::Spark
12915 | DialectType::Databricks => {
12916 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
12917 let escaped =
12918 Expression::Function(Box::new(Function::new(
12919 "CONCAT".to_string(),
12920 vec![
12921 Expression::string("\\Q"),
12922 sep,
12923 Expression::string("\\E"),
12924 ],
12925 )));
12926 Ok(Expression::Function(Box::new(Function::new(
12927 "SPLIT".to_string(),
12928 vec![str_arg, escaped],
12929 ))))
12930 }
12931 _ => Ok(Expression::Function(f)),
12932 }
12933 }
12934 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
12935 "SPLITBYREGEXP" if f.args.len() == 2 => {
12936 let sep = f.args[0].clone();
12937 let str_arg = f.args[1].clone();
12938 match target {
12939 DialectType::DuckDB => {
12940 Ok(Expression::Function(Box::new(Function::new(
12941 "STR_SPLIT_REGEX".to_string(),
12942 vec![str_arg, sep],
12943 ))))
12944 }
12945 DialectType::Hive
12946 | DialectType::Spark
12947 | DialectType::Databricks => {
12948 Ok(Expression::Function(Box::new(Function::new(
12949 "SPLIT".to_string(),
12950 vec![str_arg, sep],
12951 ))))
12952 }
12953 _ => Ok(Expression::Function(f)),
12954 }
12955 }
12956 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
12957 "TOMONDAY" => {
12958 if f.args.len() == 1 {
12959 let arg = f.args.into_iter().next().unwrap();
12960 match target {
12961 DialectType::Doris => {
12962 Ok(Expression::Function(Box::new(Function::new(
12963 "DATE_TRUNC".to_string(),
12964 vec![arg, Expression::string("WEEK")],
12965 ))))
12966 }
12967 _ => Ok(Expression::Function(Box::new(Function::new(
12968 "DATE_TRUNC".to_string(),
12969 vec![Expression::string("WEEK"), arg],
12970 )))),
12971 }
12972 } else {
12973 Ok(Expression::Function(f))
12974 }
12975 }
12976 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
12977 "COLLECT_LIST" if f.args.len() == 1 => match target {
12978 DialectType::Spark
12979 | DialectType::Databricks
12980 | DialectType::Hive => Ok(Expression::Function(f)),
12981 _ => Ok(Expression::Function(Box::new(Function::new(
12982 "ARRAY_AGG".to_string(),
12983 f.args,
12984 )))),
12985 },
12986 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
12987 "TO_CHAR"
12988 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
12989 {
12990 let arg = f.args.into_iter().next().unwrap();
12991 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12992 this: arg,
12993 to: DataType::Custom {
12994 name: "STRING".to_string(),
12995 },
12996 double_colon_syntax: false,
12997 trailing_comments: Vec::new(),
12998 format: None,
12999 default: None,
13000 inferred_type: None,
13001 })))
13002 }
13003 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
13004 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
13005 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13006 Function::new("RANDOM".to_string(), vec![]),
13007 ))),
13008 _ => Ok(Expression::Function(f)),
13009 },
13010 // ClickHouse formatDateTime -> target-specific
13011 "FORMATDATETIME" if f.args.len() >= 2 => match target {
13012 DialectType::MySQL => Ok(Expression::Function(Box::new(
13013 Function::new("DATE_FORMAT".to_string(), f.args),
13014 ))),
13015 _ => Ok(Expression::Function(f)),
13016 },
13017 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
13018 "REPLICATE" if f.args.len() == 2 => match target {
13019 DialectType::TSQL => Ok(Expression::Function(f)),
13020 _ => Ok(Expression::Function(Box::new(Function::new(
13021 "REPEAT".to_string(),
13022 f.args,
13023 )))),
13024 },
13025 // LEN(x) -> LENGTH(x) for non-TSQL targets
13026 // No CAST needed when arg is already a string literal
13027 "LEN" if f.args.len() == 1 => {
13028 match target {
13029 DialectType::TSQL => Ok(Expression::Function(f)),
13030 DialectType::Spark | DialectType::Databricks => {
13031 let arg = f.args.into_iter().next().unwrap();
13032 // Don't wrap string literals with CAST - they're already strings
13033 let is_string = matches!(
13034 &arg,
13035 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
13036 );
13037 let final_arg = if is_string {
13038 arg
13039 } else {
13040 Expression::Cast(Box::new(Cast {
13041 this: arg,
13042 to: DataType::VarChar {
13043 length: None,
13044 parenthesized_length: false,
13045 },
13046 double_colon_syntax: false,
13047 trailing_comments: Vec::new(),
13048 format: None,
13049 default: None,
13050 inferred_type: None,
13051 }))
13052 };
13053 Ok(Expression::Function(Box::new(Function::new(
13054 "LENGTH".to_string(),
13055 vec![final_arg],
13056 ))))
13057 }
13058 _ => {
13059 let arg = f.args.into_iter().next().unwrap();
13060 Ok(Expression::Function(Box::new(Function::new(
13061 "LENGTH".to_string(),
13062 vec![arg],
13063 ))))
13064 }
13065 }
13066 }
13067 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
13068 "COUNT_BIG" if f.args.len() == 1 => match target {
13069 DialectType::TSQL => Ok(Expression::Function(f)),
13070 _ => Ok(Expression::Function(Box::new(Function::new(
13071 "COUNT".to_string(),
13072 f.args,
13073 )))),
13074 },
13075 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
13076 "DATEFROMPARTS" if f.args.len() == 3 => match target {
13077 DialectType::TSQL => Ok(Expression::Function(f)),
13078 _ => Ok(Expression::Function(Box::new(Function::new(
13079 "MAKE_DATE".to_string(),
13080 f.args,
13081 )))),
13082 },
13083 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
13084 "REGEXP_LIKE" if f.args.len() >= 2 => {
13085 let str_expr = f.args[0].clone();
13086 let pattern = f.args[1].clone();
13087 let flags = if f.args.len() >= 3 {
13088 Some(f.args[2].clone())
13089 } else {
13090 None
13091 };
13092 match target {
13093 DialectType::DuckDB => {
13094 let mut new_args = vec![str_expr, pattern];
13095 if let Some(fl) = flags {
13096 new_args.push(fl);
13097 }
13098 Ok(Expression::Function(Box::new(Function::new(
13099 "REGEXP_MATCHES".to_string(),
13100 new_args,
13101 ))))
13102 }
13103 _ => Ok(Expression::RegexpLike(Box::new(
13104 crate::expressions::RegexpFunc {
13105 this: str_expr,
13106 pattern,
13107 flags,
13108 },
13109 ))),
13110 }
13111 }
13112 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
13113 "ARRAYJOIN" if f.args.len() == 1 => match target {
13114 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13115 Function::new("UNNEST".to_string(), f.args),
13116 ))),
13117 _ => Ok(Expression::Function(f)),
13118 },
13119 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
13120 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
13121 match target {
13122 DialectType::TSQL => Ok(Expression::Function(f)),
13123 DialectType::DuckDB => {
13124 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
13125 let mut args = f.args;
13126 let ms = args.pop().unwrap();
13127 let s = args.pop().unwrap();
13128 // s + (ms / 1000.0)
13129 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
13130 ms,
13131 Expression::Literal(Box::new(
13132 crate::expressions::Literal::Number(
13133 "1000.0".to_string(),
13134 ),
13135 )),
13136 )));
13137 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
13138 s,
13139 Expression::Paren(Box::new(Paren {
13140 this: ms_frac,
13141 trailing_comments: vec![],
13142 })),
13143 )));
13144 args.push(s_with_ms);
13145 Ok(Expression::Function(Box::new(Function::new(
13146 "MAKE_TIMESTAMP".to_string(),
13147 args,
13148 ))))
13149 }
13150 DialectType::Snowflake => {
13151 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
13152 let mut args = f.args;
13153 let ms = args.pop().unwrap();
13154 // ms * 1000000
13155 let ns = Expression::Mul(Box::new(BinaryOp::new(
13156 ms,
13157 Expression::number(1000000),
13158 )));
13159 args.push(ns);
13160 Ok(Expression::Function(Box::new(Function::new(
13161 "TIMESTAMP_FROM_PARTS".to_string(),
13162 args,
13163 ))))
13164 }
13165 _ => {
13166 // Default: keep function name for other targets
13167 Ok(Expression::Function(Box::new(Function::new(
13168 "DATETIMEFROMPARTS".to_string(),
13169 f.args,
13170 ))))
13171 }
13172 }
13173 }
13174 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
13175 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
13176 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
13177 let is_try = name == "TRY_CONVERT";
13178 let type_expr = f.args[0].clone();
13179 let value_expr = f.args[1].clone();
13180 let style = if f.args.len() >= 3 {
13181 Some(&f.args[2])
13182 } else {
13183 None
13184 };
13185
13186 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
13187 if matches!(target, DialectType::TSQL) {
13188 let normalized_type = match &type_expr {
13189 Expression::DataType(dt) => {
13190 let new_dt = match dt {
13191 DataType::Int { .. } => DataType::Custom {
13192 name: "INTEGER".to_string(),
13193 },
13194 _ => dt.clone(),
13195 };
13196 Expression::DataType(new_dt)
13197 }
13198 Expression::Identifier(id) => {
13199 if id.name.eq_ignore_ascii_case("INT") {
13200 Expression::Identifier(
13201 crate::expressions::Identifier::new("INTEGER"),
13202 )
13203 } else {
13204 let upper = id.name.to_ascii_uppercase();
13205 Expression::Identifier(
13206 crate::expressions::Identifier::new(upper),
13207 )
13208 }
13209 }
13210 Expression::Column(col) => {
13211 if col.name.name.eq_ignore_ascii_case("INT") {
13212 Expression::Identifier(
13213 crate::expressions::Identifier::new("INTEGER"),
13214 )
13215 } else {
13216 let upper = col.name.name.to_ascii_uppercase();
13217 Expression::Identifier(
13218 crate::expressions::Identifier::new(upper),
13219 )
13220 }
13221 }
13222 _ => type_expr.clone(),
13223 };
13224 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
13225 let mut new_args = vec![normalized_type, value_expr];
13226 if let Some(s) = style {
13227 new_args.push(s.clone());
13228 }
13229 return Ok(Expression::Function(Box::new(Function::new(
13230 func_name.to_string(),
13231 new_args,
13232 ))));
13233 }
13234
13235 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
13236 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
13237 match e {
13238 Expression::DataType(dt) => {
13239 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
13240 match dt {
13241 DataType::Custom { name }
13242 if name.starts_with("NVARCHAR(")
13243 || name.starts_with("NCHAR(") =>
13244 {
13245 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
13246 let inner = &name[name.find('(').unwrap() + 1
13247 ..name.len() - 1];
13248 if inner.eq_ignore_ascii_case("MAX") {
13249 Some(DataType::Text)
13250 } else if let Ok(len) = inner.parse::<u32>() {
13251 if name.starts_with("NCHAR") {
13252 Some(DataType::Char {
13253 length: Some(len),
13254 })
13255 } else {
13256 Some(DataType::VarChar {
13257 length: Some(len),
13258 parenthesized_length: false,
13259 })
13260 }
13261 } else {
13262 Some(dt.clone())
13263 }
13264 }
13265 DataType::Custom { name } if name == "NVARCHAR" => {
13266 Some(DataType::VarChar {
13267 length: None,
13268 parenthesized_length: false,
13269 })
13270 }
13271 DataType::Custom { name } if name == "NCHAR" => {
13272 Some(DataType::Char { length: None })
13273 }
13274 DataType::Custom { name }
13275 if name == "NVARCHAR(MAX)"
13276 || name == "VARCHAR(MAX)" =>
13277 {
13278 Some(DataType::Text)
13279 }
13280 _ => Some(dt.clone()),
13281 }
13282 }
13283 Expression::Identifier(id) => {
13284 let name = id.name.to_ascii_uppercase();
13285 match name.as_str() {
13286 "INT" | "INTEGER" => Some(DataType::Int {
13287 length: None,
13288 integer_spelling: false,
13289 }),
13290 "BIGINT" => Some(DataType::BigInt { length: None }),
13291 "SMALLINT" => {
13292 Some(DataType::SmallInt { length: None })
13293 }
13294 "TINYINT" => {
13295 Some(DataType::TinyInt { length: None })
13296 }
13297 "FLOAT" => Some(DataType::Float {
13298 precision: None,
13299 scale: None,
13300 real_spelling: false,
13301 }),
13302 "REAL" => Some(DataType::Float {
13303 precision: None,
13304 scale: None,
13305 real_spelling: true,
13306 }),
13307 "DATETIME" | "DATETIME2" => {
13308 Some(DataType::Timestamp {
13309 timezone: false,
13310 precision: None,
13311 })
13312 }
13313 "DATE" => Some(DataType::Date),
13314 "BIT" => Some(DataType::Boolean),
13315 "TEXT" => Some(DataType::Text),
13316 "NUMERIC" => Some(DataType::Decimal {
13317 precision: None,
13318 scale: None,
13319 }),
13320 "MONEY" => Some(DataType::Decimal {
13321 precision: Some(15),
13322 scale: Some(4),
13323 }),
13324 "SMALLMONEY" => Some(DataType::Decimal {
13325 precision: Some(6),
13326 scale: Some(4),
13327 }),
13328 "VARCHAR" => Some(DataType::VarChar {
13329 length: None,
13330 parenthesized_length: false,
13331 }),
13332 "NVARCHAR" => Some(DataType::VarChar {
13333 length: None,
13334 parenthesized_length: false,
13335 }),
13336 "CHAR" => Some(DataType::Char { length: None }),
13337 "NCHAR" => Some(DataType::Char { length: None }),
13338 _ => Some(DataType::Custom { name }),
13339 }
13340 }
13341 Expression::Column(col) => {
13342 let name = col.name.name.to_ascii_uppercase();
13343 match name.as_str() {
13344 "INT" | "INTEGER" => Some(DataType::Int {
13345 length: None,
13346 integer_spelling: false,
13347 }),
13348 "BIGINT" => Some(DataType::BigInt { length: None }),
13349 "FLOAT" => Some(DataType::Float {
13350 precision: None,
13351 scale: None,
13352 real_spelling: false,
13353 }),
13354 "DATETIME" | "DATETIME2" => {
13355 Some(DataType::Timestamp {
13356 timezone: false,
13357 precision: None,
13358 })
13359 }
13360 "DATE" => Some(DataType::Date),
13361 "NUMERIC" => Some(DataType::Decimal {
13362 precision: None,
13363 scale: None,
13364 }),
13365 "VARCHAR" => Some(DataType::VarChar {
13366 length: None,
13367 parenthesized_length: false,
13368 }),
13369 "NVARCHAR" => Some(DataType::VarChar {
13370 length: None,
13371 parenthesized_length: false,
13372 }),
13373 "CHAR" => Some(DataType::Char { length: None }),
13374 "NCHAR" => Some(DataType::Char { length: None }),
13375 _ => Some(DataType::Custom { name }),
13376 }
13377 }
13378 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
13379 Expression::Function(f) => {
13380 let fname = f.name.to_ascii_uppercase();
13381 match fname.as_str() {
13382 "VARCHAR" | "NVARCHAR" => {
13383 let len = f.args.first().and_then(|a| {
13384 if let Expression::Literal(lit) = a
13385 {
13386 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13387 n.parse::<u32>().ok()
13388 } else { None }
13389 } else if let Expression::Identifier(id) = a
13390 {
13391 if id.name.eq_ignore_ascii_case("MAX") {
13392 None
13393 } else {
13394 None
13395 }
13396 } else {
13397 None
13398 }
13399 });
13400 // Check for VARCHAR(MAX) -> TEXT
13401 let is_max = f.args.first().map_or(false, |a| {
13402 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
13403 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
13404 });
13405 if is_max {
13406 Some(DataType::Text)
13407 } else {
13408 Some(DataType::VarChar {
13409 length: len,
13410 parenthesized_length: false,
13411 })
13412 }
13413 }
13414 "NCHAR" | "CHAR" => {
13415 let len = f.args.first().and_then(|a| {
13416 if let Expression::Literal(lit) = a
13417 {
13418 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13419 n.parse::<u32>().ok()
13420 } else { None }
13421 } else {
13422 None
13423 }
13424 });
13425 Some(DataType::Char { length: len })
13426 }
13427 "NUMERIC" | "DECIMAL" => {
13428 let precision = f.args.first().and_then(|a| {
13429 if let Expression::Literal(lit) = a
13430 {
13431 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13432 n.parse::<u32>().ok()
13433 } else { None }
13434 } else {
13435 None
13436 }
13437 });
13438 let scale = f.args.get(1).and_then(|a| {
13439 if let Expression::Literal(lit) = a
13440 {
13441 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13442 n.parse::<u32>().ok()
13443 } else { None }
13444 } else {
13445 None
13446 }
13447 });
13448 Some(DataType::Decimal { precision, scale })
13449 }
13450 _ => None,
13451 }
13452 }
13453 _ => None,
13454 }
13455 }
13456
13457 if let Some(mut dt) = expr_to_datatype(&type_expr) {
13458 // For TSQL source: VARCHAR/CHAR without length defaults to 30
13459 let is_tsql_source =
13460 matches!(source, DialectType::TSQL | DialectType::Fabric);
13461 if is_tsql_source {
13462 match &dt {
13463 DataType::VarChar { length: None, .. } => {
13464 dt = DataType::VarChar {
13465 length: Some(30),
13466 parenthesized_length: false,
13467 };
13468 }
13469 DataType::Char { length: None } => {
13470 dt = DataType::Char { length: Some(30) };
13471 }
13472 _ => {}
13473 }
13474 }
13475
13476 // Determine if this is a string type
13477 let is_string_type = matches!(
13478 dt,
13479 DataType::VarChar { .. }
13480 | DataType::Char { .. }
13481 | DataType::Text
13482 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
13483 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
13484 || name.starts_with("VARCHAR(") || name == "VARCHAR"
13485 || name == "STRING");
13486
13487 // Determine if this is a date/time type
13488 let is_datetime_type = matches!(
13489 dt,
13490 DataType::Timestamp { .. } | DataType::Date
13491 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
13492 || name == "DATETIME2" || name == "SMALLDATETIME");
13493
13494 // Check for date conversion with style
13495 if style.is_some() {
13496 let style_num = style.and_then(|s| {
13497 if let Expression::Literal(lit) = s {
13498 if let crate::expressions::Literal::Number(n) =
13499 lit.as_ref()
13500 {
13501 n.parse::<u32>().ok()
13502 } else {
13503 None
13504 }
13505 } else {
13506 None
13507 }
13508 });
13509
13510 // TSQL CONVERT date styles (Java format)
13511 let format_str = style_num.and_then(|n| match n {
13512 101 => Some("MM/dd/yyyy"),
13513 102 => Some("yyyy.MM.dd"),
13514 103 => Some("dd/MM/yyyy"),
13515 104 => Some("dd.MM.yyyy"),
13516 105 => Some("dd-MM-yyyy"),
13517 108 => Some("HH:mm:ss"),
13518 110 => Some("MM-dd-yyyy"),
13519 112 => Some("yyyyMMdd"),
13520 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
13521 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
13522 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
13523 _ => None,
13524 });
13525
13526 // Non-string, non-datetime types with style: just CAST, ignore the style
13527 if !is_string_type && !is_datetime_type {
13528 let cast_expr = if is_try {
13529 Expression::TryCast(Box::new(
13530 crate::expressions::Cast {
13531 this: value_expr,
13532 to: dt,
13533 trailing_comments: Vec::new(),
13534 double_colon_syntax: false,
13535 format: None,
13536 default: None,
13537 inferred_type: None,
13538 },
13539 ))
13540 } else {
13541 Expression::Cast(Box::new(
13542 crate::expressions::Cast {
13543 this: value_expr,
13544 to: dt,
13545 trailing_comments: Vec::new(),
13546 double_colon_syntax: false,
13547 format: None,
13548 default: None,
13549 inferred_type: None,
13550 },
13551 ))
13552 };
13553 return Ok(cast_expr);
13554 }
13555
13556 if let Some(java_fmt) = format_str {
13557 let c_fmt = java_fmt
13558 .replace("yyyy", "%Y")
13559 .replace("MM", "%m")
13560 .replace("dd", "%d")
13561 .replace("HH", "%H")
13562 .replace("mm", "%M")
13563 .replace("ss", "%S")
13564 .replace("SSSSSS", "%f")
13565 .replace("SSS", "%f")
13566 .replace("'T'", "T");
13567
13568 // For datetime target types: style is the INPUT format for parsing strings -> dates
13569 if is_datetime_type {
13570 match target {
13571 DialectType::DuckDB => {
13572 return Ok(Expression::Function(Box::new(
13573 Function::new(
13574 "STRPTIME".to_string(),
13575 vec![
13576 value_expr,
13577 Expression::string(&c_fmt),
13578 ],
13579 ),
13580 )));
13581 }
13582 DialectType::Spark
13583 | DialectType::Databricks => {
13584 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
13585 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
13586 let func_name =
13587 if matches!(dt, DataType::Date) {
13588 "TO_DATE"
13589 } else {
13590 "TO_TIMESTAMP"
13591 };
13592 return Ok(Expression::Function(Box::new(
13593 Function::new(
13594 func_name.to_string(),
13595 vec![
13596 value_expr,
13597 Expression::string(java_fmt),
13598 ],
13599 ),
13600 )));
13601 }
13602 DialectType::Hive => {
13603 return Ok(Expression::Function(Box::new(
13604 Function::new(
13605 "TO_TIMESTAMP".to_string(),
13606 vec![
13607 value_expr,
13608 Expression::string(java_fmt),
13609 ],
13610 ),
13611 )));
13612 }
13613 _ => {
13614 return Ok(Expression::Cast(Box::new(
13615 crate::expressions::Cast {
13616 this: value_expr,
13617 to: dt,
13618 trailing_comments: Vec::new(),
13619 double_colon_syntax: false,
13620 format: None,
13621 default: None,
13622 inferred_type: None,
13623 },
13624 )));
13625 }
13626 }
13627 }
13628
13629 // For string target types: style is the OUTPUT format for dates -> strings
13630 match target {
13631 DialectType::DuckDB => Ok(Expression::Function(
13632 Box::new(Function::new(
13633 "STRPTIME".to_string(),
13634 vec![
13635 value_expr,
13636 Expression::string(&c_fmt),
13637 ],
13638 )),
13639 )),
13640 DialectType::Spark | DialectType::Databricks => {
13641 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
13642 // Determine the target string type
13643 let string_dt = match &dt {
13644 DataType::VarChar {
13645 length: Some(l),
13646 ..
13647 } => DataType::VarChar {
13648 length: Some(*l),
13649 parenthesized_length: false,
13650 },
13651 DataType::Text => DataType::Custom {
13652 name: "STRING".to_string(),
13653 },
13654 _ => DataType::Custom {
13655 name: "STRING".to_string(),
13656 },
13657 };
13658 let date_format_expr = Expression::Function(
13659 Box::new(Function::new(
13660 "DATE_FORMAT".to_string(),
13661 vec![
13662 value_expr,
13663 Expression::string(java_fmt),
13664 ],
13665 )),
13666 );
13667 let cast_expr = if is_try {
13668 Expression::TryCast(Box::new(
13669 crate::expressions::Cast {
13670 this: date_format_expr,
13671 to: string_dt,
13672 trailing_comments: Vec::new(),
13673 double_colon_syntax: false,
13674 format: None,
13675 default: None,
13676 inferred_type: None,
13677 },
13678 ))
13679 } else {
13680 Expression::Cast(Box::new(
13681 crate::expressions::Cast {
13682 this: date_format_expr,
13683 to: string_dt,
13684 trailing_comments: Vec::new(),
13685 double_colon_syntax: false,
13686 format: None,
13687 default: None,
13688 inferred_type: None,
13689 },
13690 ))
13691 };
13692 Ok(cast_expr)
13693 }
13694 DialectType::MySQL | DialectType::SingleStore => {
13695 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
13696 let mysql_fmt = java_fmt
13697 .replace("yyyy", "%Y")
13698 .replace("MM", "%m")
13699 .replace("dd", "%d")
13700 .replace("HH:mm:ss.SSSSSS", "%T")
13701 .replace("HH:mm:ss", "%T")
13702 .replace("HH", "%H")
13703 .replace("mm", "%i")
13704 .replace("ss", "%S");
13705 let date_format_expr = Expression::Function(
13706 Box::new(Function::new(
13707 "DATE_FORMAT".to_string(),
13708 vec![
13709 value_expr,
13710 Expression::string(&mysql_fmt),
13711 ],
13712 )),
13713 );
13714 // MySQL uses CHAR for string casts
13715 let mysql_dt = match &dt {
13716 DataType::VarChar { length, .. } => {
13717 DataType::Char { length: *length }
13718 }
13719 _ => dt,
13720 };
13721 Ok(Expression::Cast(Box::new(
13722 crate::expressions::Cast {
13723 this: date_format_expr,
13724 to: mysql_dt,
13725 trailing_comments: Vec::new(),
13726 double_colon_syntax: false,
13727 format: None,
13728 default: None,
13729 inferred_type: None,
13730 },
13731 )))
13732 }
13733 DialectType::Hive => {
13734 let func_name = "TO_TIMESTAMP";
13735 Ok(Expression::Function(Box::new(
13736 Function::new(
13737 func_name.to_string(),
13738 vec![
13739 value_expr,
13740 Expression::string(java_fmt),
13741 ],
13742 ),
13743 )))
13744 }
13745 _ => Ok(Expression::Cast(Box::new(
13746 crate::expressions::Cast {
13747 this: value_expr,
13748 to: dt,
13749 trailing_comments: Vec::new(),
13750 double_colon_syntax: false,
13751 format: None,
13752 default: None,
13753 inferred_type: None,
13754 },
13755 ))),
13756 }
13757 } else {
13758 // Unknown style, just CAST
13759 let cast_expr = if is_try {
13760 Expression::TryCast(Box::new(
13761 crate::expressions::Cast {
13762 this: value_expr,
13763 to: dt,
13764 trailing_comments: Vec::new(),
13765 double_colon_syntax: false,
13766 format: None,
13767 default: None,
13768 inferred_type: None,
13769 },
13770 ))
13771 } else {
13772 Expression::Cast(Box::new(
13773 crate::expressions::Cast {
13774 this: value_expr,
13775 to: dt,
13776 trailing_comments: Vec::new(),
13777 double_colon_syntax: false,
13778 format: None,
13779 default: None,
13780 inferred_type: None,
13781 },
13782 ))
13783 };
13784 Ok(cast_expr)
13785 }
13786 } else {
13787 // No style - simple CAST
13788 let final_dt = if matches!(
13789 target,
13790 DialectType::MySQL | DialectType::SingleStore
13791 ) {
13792 match &dt {
13793 DataType::Int { .. }
13794 | DataType::BigInt { .. }
13795 | DataType::SmallInt { .. }
13796 | DataType::TinyInt { .. } => DataType::Custom {
13797 name: "SIGNED".to_string(),
13798 },
13799 DataType::VarChar { length, .. } => {
13800 DataType::Char { length: *length }
13801 }
13802 _ => dt,
13803 }
13804 } else {
13805 dt
13806 };
13807 let cast_expr = if is_try {
13808 Expression::TryCast(Box::new(
13809 crate::expressions::Cast {
13810 this: value_expr,
13811 to: final_dt,
13812 trailing_comments: Vec::new(),
13813 double_colon_syntax: false,
13814 format: None,
13815 default: None,
13816 inferred_type: None,
13817 },
13818 ))
13819 } else {
13820 Expression::Cast(Box::new(crate::expressions::Cast {
13821 this: value_expr,
13822 to: final_dt,
13823 trailing_comments: Vec::new(),
13824 double_colon_syntax: false,
13825 format: None,
13826 default: None,
13827 inferred_type: None,
13828 }))
13829 };
13830 Ok(cast_expr)
13831 }
13832 } else {
13833 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
13834 Ok(Expression::Function(f))
13835 }
13836 }
13837 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
13838 "STRFTIME" if f.args.len() == 2 => {
13839 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
13840 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
13841 // SQLite: args[0] = format, args[1] = value
13842 (f.args[1].clone(), &f.args[0])
13843 } else {
13844 // DuckDB and others: args[0] = value, args[1] = format
13845 (f.args[0].clone(), &f.args[1])
13846 };
13847
13848 // Helper to convert C-style format to Java-style
13849 fn c_to_java_format(fmt: &str) -> String {
13850 fmt.replace("%Y", "yyyy")
13851 .replace("%m", "MM")
13852 .replace("%d", "dd")
13853 .replace("%H", "HH")
13854 .replace("%M", "mm")
13855 .replace("%S", "ss")
13856 .replace("%f", "SSSSSS")
13857 .replace("%y", "yy")
13858 .replace("%-m", "M")
13859 .replace("%-d", "d")
13860 .replace("%-H", "H")
13861 .replace("%-I", "h")
13862 .replace("%I", "hh")
13863 .replace("%p", "a")
13864 .replace("%j", "DDD")
13865 .replace("%a", "EEE")
13866 .replace("%b", "MMM")
13867 .replace("%F", "yyyy-MM-dd")
13868 .replace("%T", "HH:mm:ss")
13869 }
13870
13871 // Helper: recursively convert format strings within expressions (handles CONCAT)
13872 fn convert_fmt_expr(
13873 expr: &Expression,
13874 converter: &dyn Fn(&str) -> String,
13875 ) -> Expression {
13876 match expr {
13877 Expression::Literal(lit)
13878 if matches!(
13879 lit.as_ref(),
13880 crate::expressions::Literal::String(_)
13881 ) =>
13882 {
13883 let crate::expressions::Literal::String(s) =
13884 lit.as_ref()
13885 else {
13886 unreachable!()
13887 };
13888 Expression::string(&converter(s))
13889 }
13890 Expression::Function(func)
13891 if func.name.eq_ignore_ascii_case("CONCAT") =>
13892 {
13893 let new_args: Vec<Expression> = func
13894 .args
13895 .iter()
13896 .map(|a| convert_fmt_expr(a, converter))
13897 .collect();
13898 Expression::Function(Box::new(Function::new(
13899 "CONCAT".to_string(),
13900 new_args,
13901 )))
13902 }
13903 other => other.clone(),
13904 }
13905 }
13906
13907 match target {
13908 DialectType::DuckDB => {
13909 if matches!(source, DialectType::SQLite) {
13910 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
13911 let cast_val = Expression::Cast(Box::new(Cast {
13912 this: val,
13913 to: crate::expressions::DataType::Timestamp {
13914 precision: None,
13915 timezone: false,
13916 },
13917 trailing_comments: Vec::new(),
13918 double_colon_syntax: false,
13919 format: None,
13920 default: None,
13921 inferred_type: None,
13922 }));
13923 Ok(Expression::Function(Box::new(Function::new(
13924 "STRFTIME".to_string(),
13925 vec![cast_val, fmt_expr.clone()],
13926 ))))
13927 } else {
13928 Ok(Expression::Function(f))
13929 }
13930 }
13931 DialectType::Spark
13932 | DialectType::Databricks
13933 | DialectType::Hive => {
13934 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
13935 let converted_fmt =
13936 convert_fmt_expr(fmt_expr, &c_to_java_format);
13937 Ok(Expression::Function(Box::new(Function::new(
13938 "DATE_FORMAT".to_string(),
13939 vec![val, converted_fmt],
13940 ))))
13941 }
13942 DialectType::TSQL | DialectType::Fabric => {
13943 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
13944 let converted_fmt =
13945 convert_fmt_expr(fmt_expr, &c_to_java_format);
13946 Ok(Expression::Function(Box::new(Function::new(
13947 "FORMAT".to_string(),
13948 vec![val, converted_fmt],
13949 ))))
13950 }
13951 DialectType::Presto
13952 | DialectType::Trino
13953 | DialectType::Athena => {
13954 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
13955 if let Expression::Literal(lit) = fmt_expr {
13956 if let crate::expressions::Literal::String(s) =
13957 lit.as_ref()
13958 {
13959 let presto_fmt = duckdb_to_presto_format(s);
13960 Ok(Expression::Function(Box::new(Function::new(
13961 "DATE_FORMAT".to_string(),
13962 vec![val, Expression::string(&presto_fmt)],
13963 ))))
13964 } else {
13965 Ok(Expression::Function(Box::new(Function::new(
13966 "DATE_FORMAT".to_string(),
13967 vec![val, fmt_expr.clone()],
13968 ))))
13969 }
13970 } else {
13971 Ok(Expression::Function(Box::new(Function::new(
13972 "DATE_FORMAT".to_string(),
13973 vec![val, fmt_expr.clone()],
13974 ))))
13975 }
13976 }
13977 DialectType::BigQuery => {
13978 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
13979 if let Expression::Literal(lit) = fmt_expr {
13980 if let crate::expressions::Literal::String(s) =
13981 lit.as_ref()
13982 {
13983 let bq_fmt = duckdb_to_bigquery_format(s);
13984 Ok(Expression::Function(Box::new(Function::new(
13985 "FORMAT_DATE".to_string(),
13986 vec![Expression::string(&bq_fmt), val],
13987 ))))
13988 } else {
13989 Ok(Expression::Function(Box::new(Function::new(
13990 "FORMAT_DATE".to_string(),
13991 vec![fmt_expr.clone(), val],
13992 ))))
13993 }
13994 } else {
13995 Ok(Expression::Function(Box::new(Function::new(
13996 "FORMAT_DATE".to_string(),
13997 vec![fmt_expr.clone(), val],
13998 ))))
13999 }
14000 }
14001 DialectType::PostgreSQL | DialectType::Redshift => {
14002 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
14003 if let Expression::Literal(lit) = fmt_expr {
14004 if let crate::expressions::Literal::String(s) =
14005 lit.as_ref()
14006 {
14007 let pg_fmt = s
14008 .replace("%Y", "YYYY")
14009 .replace("%m", "MM")
14010 .replace("%d", "DD")
14011 .replace("%H", "HH24")
14012 .replace("%M", "MI")
14013 .replace("%S", "SS")
14014 .replace("%y", "YY")
14015 .replace("%-m", "FMMM")
14016 .replace("%-d", "FMDD")
14017 .replace("%-H", "FMHH24")
14018 .replace("%-I", "FMHH12")
14019 .replace("%p", "AM")
14020 .replace("%F", "YYYY-MM-DD")
14021 .replace("%T", "HH24:MI:SS");
14022 Ok(Expression::Function(Box::new(Function::new(
14023 "TO_CHAR".to_string(),
14024 vec![val, Expression::string(&pg_fmt)],
14025 ))))
14026 } else {
14027 Ok(Expression::Function(Box::new(Function::new(
14028 "TO_CHAR".to_string(),
14029 vec![val, fmt_expr.clone()],
14030 ))))
14031 }
14032 } else {
14033 Ok(Expression::Function(Box::new(Function::new(
14034 "TO_CHAR".to_string(),
14035 vec![val, fmt_expr.clone()],
14036 ))))
14037 }
14038 }
14039 _ => Ok(Expression::Function(f)),
14040 }
14041 }
14042 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
14043 "STRPTIME" if f.args.len() == 2 => {
14044 let val = f.args[0].clone();
14045 let fmt_expr = &f.args[1];
14046
14047 fn c_to_java_format_parse(fmt: &str) -> String {
14048 fmt.replace("%Y", "yyyy")
14049 .replace("%m", "MM")
14050 .replace("%d", "dd")
14051 .replace("%H", "HH")
14052 .replace("%M", "mm")
14053 .replace("%S", "ss")
14054 .replace("%f", "SSSSSS")
14055 .replace("%y", "yy")
14056 .replace("%-m", "M")
14057 .replace("%-d", "d")
14058 .replace("%-H", "H")
14059 .replace("%-I", "h")
14060 .replace("%I", "hh")
14061 .replace("%p", "a")
14062 .replace("%F", "yyyy-MM-dd")
14063 .replace("%T", "HH:mm:ss")
14064 }
14065
14066 match target {
14067 DialectType::DuckDB => Ok(Expression::Function(f)),
14068 DialectType::Spark | DialectType::Databricks => {
14069 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
14070 if let Expression::Literal(lit) = fmt_expr {
14071 if let crate::expressions::Literal::String(s) =
14072 lit.as_ref()
14073 {
14074 let java_fmt = c_to_java_format_parse(s);
14075 Ok(Expression::Function(Box::new(Function::new(
14076 "TO_TIMESTAMP".to_string(),
14077 vec![val, Expression::string(&java_fmt)],
14078 ))))
14079 } else {
14080 Ok(Expression::Function(Box::new(Function::new(
14081 "TO_TIMESTAMP".to_string(),
14082 vec![val, fmt_expr.clone()],
14083 ))))
14084 }
14085 } else {
14086 Ok(Expression::Function(Box::new(Function::new(
14087 "TO_TIMESTAMP".to_string(),
14088 vec![val, fmt_expr.clone()],
14089 ))))
14090 }
14091 }
14092 DialectType::Hive => {
14093 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
14094 if let Expression::Literal(lit) = fmt_expr {
14095 if let crate::expressions::Literal::String(s) =
14096 lit.as_ref()
14097 {
14098 let java_fmt = c_to_java_format_parse(s);
14099 let unix_ts =
14100 Expression::Function(Box::new(Function::new(
14101 "UNIX_TIMESTAMP".to_string(),
14102 vec![val, Expression::string(&java_fmt)],
14103 )));
14104 let from_unix =
14105 Expression::Function(Box::new(Function::new(
14106 "FROM_UNIXTIME".to_string(),
14107 vec![unix_ts],
14108 )));
14109 Ok(Expression::Cast(Box::new(
14110 crate::expressions::Cast {
14111 this: from_unix,
14112 to: DataType::Timestamp {
14113 timezone: false,
14114 precision: None,
14115 },
14116 trailing_comments: Vec::new(),
14117 double_colon_syntax: false,
14118 format: None,
14119 default: None,
14120 inferred_type: None,
14121 },
14122 )))
14123 } else {
14124 Ok(Expression::Function(f))
14125 }
14126 } else {
14127 Ok(Expression::Function(f))
14128 }
14129 }
14130 DialectType::Presto
14131 | DialectType::Trino
14132 | DialectType::Athena => {
14133 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
14134 if let Expression::Literal(lit) = fmt_expr {
14135 if let crate::expressions::Literal::String(s) =
14136 lit.as_ref()
14137 {
14138 let presto_fmt = duckdb_to_presto_format(s);
14139 Ok(Expression::Function(Box::new(Function::new(
14140 "DATE_PARSE".to_string(),
14141 vec![val, Expression::string(&presto_fmt)],
14142 ))))
14143 } else {
14144 Ok(Expression::Function(Box::new(Function::new(
14145 "DATE_PARSE".to_string(),
14146 vec![val, fmt_expr.clone()],
14147 ))))
14148 }
14149 } else {
14150 Ok(Expression::Function(Box::new(Function::new(
14151 "DATE_PARSE".to_string(),
14152 vec![val, fmt_expr.clone()],
14153 ))))
14154 }
14155 }
14156 DialectType::BigQuery => {
14157 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
14158 if let Expression::Literal(lit) = fmt_expr {
14159 if let crate::expressions::Literal::String(s) =
14160 lit.as_ref()
14161 {
14162 let bq_fmt = duckdb_to_bigquery_format(s);
14163 Ok(Expression::Function(Box::new(Function::new(
14164 "PARSE_TIMESTAMP".to_string(),
14165 vec![Expression::string(&bq_fmt), val],
14166 ))))
14167 } else {
14168 Ok(Expression::Function(Box::new(Function::new(
14169 "PARSE_TIMESTAMP".to_string(),
14170 vec![fmt_expr.clone(), val],
14171 ))))
14172 }
14173 } else {
14174 Ok(Expression::Function(Box::new(Function::new(
14175 "PARSE_TIMESTAMP".to_string(),
14176 vec![fmt_expr.clone(), val],
14177 ))))
14178 }
14179 }
14180 _ => Ok(Expression::Function(f)),
14181 }
14182 }
14183 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
14184 "DATE_FORMAT"
14185 if f.args.len() >= 2
14186 && matches!(
14187 source,
14188 DialectType::Presto
14189 | DialectType::Trino
14190 | DialectType::Athena
14191 ) =>
14192 {
14193 let val = f.args[0].clone();
14194 let fmt_expr = &f.args[1];
14195
14196 match target {
14197 DialectType::Presto
14198 | DialectType::Trino
14199 | DialectType::Athena => {
14200 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
14201 if let Expression::Literal(lit) = fmt_expr {
14202 if let crate::expressions::Literal::String(s) =
14203 lit.as_ref()
14204 {
14205 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14206 Ok(Expression::Function(Box::new(Function::new(
14207 "DATE_FORMAT".to_string(),
14208 vec![val, Expression::string(&normalized)],
14209 ))))
14210 } else {
14211 Ok(Expression::Function(f))
14212 }
14213 } else {
14214 Ok(Expression::Function(f))
14215 }
14216 }
14217 DialectType::Hive
14218 | DialectType::Spark
14219 | DialectType::Databricks => {
14220 // Convert Presto C-style to Java-style format
14221 if let Expression::Literal(lit) = fmt_expr {
14222 if let crate::expressions::Literal::String(s) =
14223 lit.as_ref()
14224 {
14225 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14226 Ok(Expression::Function(Box::new(Function::new(
14227 "DATE_FORMAT".to_string(),
14228 vec![val, Expression::string(&java_fmt)],
14229 ))))
14230 } else {
14231 Ok(Expression::Function(f))
14232 }
14233 } else {
14234 Ok(Expression::Function(f))
14235 }
14236 }
14237 DialectType::DuckDB => {
14238 // Convert to STRFTIME(val, duckdb_fmt)
14239 if let Expression::Literal(lit) = fmt_expr {
14240 if let crate::expressions::Literal::String(s) =
14241 lit.as_ref()
14242 {
14243 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14244 Ok(Expression::Function(Box::new(Function::new(
14245 "STRFTIME".to_string(),
14246 vec![val, Expression::string(&duckdb_fmt)],
14247 ))))
14248 } else {
14249 Ok(Expression::Function(Box::new(Function::new(
14250 "STRFTIME".to_string(),
14251 vec![val, fmt_expr.clone()],
14252 ))))
14253 }
14254 } else {
14255 Ok(Expression::Function(Box::new(Function::new(
14256 "STRFTIME".to_string(),
14257 vec![val, fmt_expr.clone()],
14258 ))))
14259 }
14260 }
14261 DialectType::BigQuery => {
14262 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
14263 if let Expression::Literal(lit) = fmt_expr {
14264 if let crate::expressions::Literal::String(s) =
14265 lit.as_ref()
14266 {
14267 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
14268 Ok(Expression::Function(Box::new(Function::new(
14269 "FORMAT_DATE".to_string(),
14270 vec![Expression::string(&bq_fmt), val],
14271 ))))
14272 } else {
14273 Ok(Expression::Function(Box::new(Function::new(
14274 "FORMAT_DATE".to_string(),
14275 vec![fmt_expr.clone(), val],
14276 ))))
14277 }
14278 } else {
14279 Ok(Expression::Function(Box::new(Function::new(
14280 "FORMAT_DATE".to_string(),
14281 vec![fmt_expr.clone(), val],
14282 ))))
14283 }
14284 }
14285 _ => Ok(Expression::Function(f)),
14286 }
14287 }
14288 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
14289 "DATE_PARSE"
14290 if f.args.len() >= 2
14291 && matches!(
14292 source,
14293 DialectType::Presto
14294 | DialectType::Trino
14295 | DialectType::Athena
14296 ) =>
14297 {
14298 let val = f.args[0].clone();
14299 let fmt_expr = &f.args[1];
14300
14301 match target {
14302 DialectType::Presto
14303 | DialectType::Trino
14304 | DialectType::Athena => {
14305 // Presto -> Presto: normalize format
14306 if let Expression::Literal(lit) = fmt_expr {
14307 if let crate::expressions::Literal::String(s) =
14308 lit.as_ref()
14309 {
14310 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14311 Ok(Expression::Function(Box::new(Function::new(
14312 "DATE_PARSE".to_string(),
14313 vec![val, Expression::string(&normalized)],
14314 ))))
14315 } else {
14316 Ok(Expression::Function(f))
14317 }
14318 } else {
14319 Ok(Expression::Function(f))
14320 }
14321 }
14322 DialectType::Hive => {
14323 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
14324 if let Expression::Literal(lit) = fmt_expr {
14325 if let crate::expressions::Literal::String(s) =
14326 lit.as_ref()
14327 {
14328 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
14329 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
14330 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14331 this: val,
14332 to: DataType::Timestamp { timezone: false, precision: None },
14333 trailing_comments: Vec::new(),
14334 double_colon_syntax: false,
14335 format: None,
14336 default: None,
14337 inferred_type: None,
14338 })))
14339 } else {
14340 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14341 Ok(Expression::Function(Box::new(Function::new(
14342 "TO_TIMESTAMP".to_string(),
14343 vec![val, Expression::string(&java_fmt)],
14344 ))))
14345 }
14346 } else {
14347 Ok(Expression::Function(f))
14348 }
14349 } else {
14350 Ok(Expression::Function(f))
14351 }
14352 }
14353 DialectType::Spark | DialectType::Databricks => {
14354 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
14355 if let Expression::Literal(lit) = fmt_expr {
14356 if let crate::expressions::Literal::String(s) =
14357 lit.as_ref()
14358 {
14359 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14360 Ok(Expression::Function(Box::new(Function::new(
14361 "TO_TIMESTAMP".to_string(),
14362 vec![val, Expression::string(&java_fmt)],
14363 ))))
14364 } else {
14365 Ok(Expression::Function(f))
14366 }
14367 } else {
14368 Ok(Expression::Function(f))
14369 }
14370 }
14371 DialectType::DuckDB => {
14372 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
14373 if let Expression::Literal(lit) = fmt_expr {
14374 if let crate::expressions::Literal::String(s) =
14375 lit.as_ref()
14376 {
14377 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14378 Ok(Expression::Function(Box::new(Function::new(
14379 "STRPTIME".to_string(),
14380 vec![val, Expression::string(&duckdb_fmt)],
14381 ))))
14382 } else {
14383 Ok(Expression::Function(Box::new(Function::new(
14384 "STRPTIME".to_string(),
14385 vec![val, fmt_expr.clone()],
14386 ))))
14387 }
14388 } else {
14389 Ok(Expression::Function(Box::new(Function::new(
14390 "STRPTIME".to_string(),
14391 vec![val, fmt_expr.clone()],
14392 ))))
14393 }
14394 }
14395 _ => Ok(Expression::Function(f)),
14396 }
14397 }
14398 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
14399 "FROM_BASE64"
14400 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14401 {
14402 Ok(Expression::Function(Box::new(Function::new(
14403 "UNBASE64".to_string(),
14404 f.args,
14405 ))))
14406 }
14407 "TO_BASE64"
14408 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14409 {
14410 Ok(Expression::Function(Box::new(Function::new(
14411 "BASE64".to_string(),
14412 f.args,
14413 ))))
14414 }
14415 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
14416 "FROM_UNIXTIME"
14417 if f.args.len() == 1
14418 && matches!(
14419 source,
14420 DialectType::Presto
14421 | DialectType::Trino
14422 | DialectType::Athena
14423 )
14424 && matches!(
14425 target,
14426 DialectType::Spark | DialectType::Databricks
14427 ) =>
14428 {
14429 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
14430 let from_unix = Expression::Function(Box::new(Function::new(
14431 "FROM_UNIXTIME".to_string(),
14432 f.args,
14433 )));
14434 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14435 this: from_unix,
14436 to: DataType::Timestamp {
14437 timezone: false,
14438 precision: None,
14439 },
14440 trailing_comments: Vec::new(),
14441 double_colon_syntax: false,
14442 format: None,
14443 default: None,
14444 inferred_type: None,
14445 })))
14446 }
14447 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
14448 "DATE_FORMAT"
14449 if f.args.len() >= 2
14450 && !matches!(
14451 target,
14452 DialectType::Hive
14453 | DialectType::Spark
14454 | DialectType::Databricks
14455 | DialectType::MySQL
14456 | DialectType::SingleStore
14457 ) =>
14458 {
14459 let val = f.args[0].clone();
14460 let fmt_expr = &f.args[1];
14461 let is_hive_source = matches!(
14462 source,
14463 DialectType::Hive
14464 | DialectType::Spark
14465 | DialectType::Databricks
14466 );
14467
14468 fn java_to_c_format(fmt: &str) -> String {
14469 // Replace Java patterns with C strftime patterns.
14470 // Uses multi-pass to handle patterns that conflict.
14471 // First pass: replace multi-char patterns (longer first)
14472 let result = fmt
14473 .replace("yyyy", "%Y")
14474 .replace("SSSSSS", "%f")
14475 .replace("EEEE", "%W")
14476 .replace("MM", "%m")
14477 .replace("dd", "%d")
14478 .replace("HH", "%H")
14479 .replace("mm", "%M")
14480 .replace("ss", "%S")
14481 .replace("yy", "%y");
14482 // Second pass: handle single-char timezone patterns
14483 // z -> %Z (timezone name), Z -> %z (timezone offset)
14484 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
14485 let mut out = String::new();
14486 let chars: Vec<char> = result.chars().collect();
14487 let mut i = 0;
14488 while i < chars.len() {
14489 if chars[i] == '%' && i + 1 < chars.len() {
14490 // Already a format specifier, skip both chars
14491 out.push(chars[i]);
14492 out.push(chars[i + 1]);
14493 i += 2;
14494 } else if chars[i] == 'z' {
14495 out.push_str("%Z");
14496 i += 1;
14497 } else if chars[i] == 'Z' {
14498 out.push_str("%z");
14499 i += 1;
14500 } else {
14501 out.push(chars[i]);
14502 i += 1;
14503 }
14504 }
14505 out
14506 }
14507
14508 fn java_to_presto_format(fmt: &str) -> String {
14509 // Presto uses %T for HH:MM:SS
14510 let c_fmt = java_to_c_format(fmt);
14511 c_fmt.replace("%H:%M:%S", "%T")
14512 }
14513
14514 fn java_to_bq_format(fmt: &str) -> String {
14515 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
14516 let c_fmt = java_to_c_format(fmt);
14517 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
14518 }
14519
14520 // For Hive source, CAST string literals to appropriate type
14521 let cast_val = if is_hive_source {
14522 match &val {
14523 Expression::Literal(lit)
14524 if matches!(
14525 lit.as_ref(),
14526 crate::expressions::Literal::String(_)
14527 ) =>
14528 {
14529 match target {
14530 DialectType::DuckDB
14531 | DialectType::Presto
14532 | DialectType::Trino
14533 | DialectType::Athena => {
14534 Self::ensure_cast_timestamp(val.clone())
14535 }
14536 DialectType::BigQuery => {
14537 // BigQuery: CAST(val AS DATETIME)
14538 Expression::Cast(Box::new(
14539 crate::expressions::Cast {
14540 this: val.clone(),
14541 to: DataType::Custom {
14542 name: "DATETIME".to_string(),
14543 },
14544 trailing_comments: vec![],
14545 double_colon_syntax: false,
14546 format: None,
14547 default: None,
14548 inferred_type: None,
14549 },
14550 ))
14551 }
14552 _ => val.clone(),
14553 }
14554 }
14555 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
14556 Expression::Cast(c)
14557 if matches!(c.to, DataType::Date)
14558 && matches!(
14559 target,
14560 DialectType::Presto
14561 | DialectType::Trino
14562 | DialectType::Athena
14563 ) =>
14564 {
14565 Expression::Cast(Box::new(crate::expressions::Cast {
14566 this: val.clone(),
14567 to: DataType::Timestamp {
14568 timezone: false,
14569 precision: None,
14570 },
14571 trailing_comments: vec![],
14572 double_colon_syntax: false,
14573 format: None,
14574 default: None,
14575 inferred_type: None,
14576 }))
14577 }
14578 Expression::Literal(lit)
14579 if matches!(
14580 lit.as_ref(),
14581 crate::expressions::Literal::Date(_)
14582 ) && matches!(
14583 target,
14584 DialectType::Presto
14585 | DialectType::Trino
14586 | DialectType::Athena
14587 ) =>
14588 {
14589 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
14590 let cast_date = Self::date_literal_to_cast(val.clone());
14591 Expression::Cast(Box::new(crate::expressions::Cast {
14592 this: cast_date,
14593 to: DataType::Timestamp {
14594 timezone: false,
14595 precision: None,
14596 },
14597 trailing_comments: vec![],
14598 double_colon_syntax: false,
14599 format: None,
14600 default: None,
14601 inferred_type: None,
14602 }))
14603 }
14604 _ => val.clone(),
14605 }
14606 } else {
14607 val.clone()
14608 };
14609
14610 match target {
14611 DialectType::DuckDB => {
14612 if let Expression::Literal(lit) = fmt_expr {
14613 if let crate::expressions::Literal::String(s) =
14614 lit.as_ref()
14615 {
14616 let c_fmt = if is_hive_source {
14617 java_to_c_format(s)
14618 } else {
14619 s.clone()
14620 };
14621 Ok(Expression::Function(Box::new(Function::new(
14622 "STRFTIME".to_string(),
14623 vec![cast_val, Expression::string(&c_fmt)],
14624 ))))
14625 } else {
14626 Ok(Expression::Function(Box::new(Function::new(
14627 "STRFTIME".to_string(),
14628 vec![cast_val, fmt_expr.clone()],
14629 ))))
14630 }
14631 } else {
14632 Ok(Expression::Function(Box::new(Function::new(
14633 "STRFTIME".to_string(),
14634 vec![cast_val, fmt_expr.clone()],
14635 ))))
14636 }
14637 }
14638 DialectType::Presto
14639 | DialectType::Trino
14640 | DialectType::Athena => {
14641 if is_hive_source {
14642 if let Expression::Literal(lit) = fmt_expr {
14643 if let crate::expressions::Literal::String(s) =
14644 lit.as_ref()
14645 {
14646 let p_fmt = java_to_presto_format(s);
14647 Ok(Expression::Function(Box::new(
14648 Function::new(
14649 "DATE_FORMAT".to_string(),
14650 vec![
14651 cast_val,
14652 Expression::string(&p_fmt),
14653 ],
14654 ),
14655 )))
14656 } else {
14657 Ok(Expression::Function(Box::new(
14658 Function::new(
14659 "DATE_FORMAT".to_string(),
14660 vec![cast_val, fmt_expr.clone()],
14661 ),
14662 )))
14663 }
14664 } else {
14665 Ok(Expression::Function(Box::new(Function::new(
14666 "DATE_FORMAT".to_string(),
14667 vec![cast_val, fmt_expr.clone()],
14668 ))))
14669 }
14670 } else {
14671 Ok(Expression::Function(Box::new(Function::new(
14672 "DATE_FORMAT".to_string(),
14673 f.args,
14674 ))))
14675 }
14676 }
14677 DialectType::BigQuery => {
14678 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
14679 if let Expression::Literal(lit) = fmt_expr {
14680 if let crate::expressions::Literal::String(s) =
14681 lit.as_ref()
14682 {
14683 let bq_fmt = if is_hive_source {
14684 java_to_bq_format(s)
14685 } else {
14686 java_to_c_format(s)
14687 };
14688 Ok(Expression::Function(Box::new(Function::new(
14689 "FORMAT_DATE".to_string(),
14690 vec![Expression::string(&bq_fmt), cast_val],
14691 ))))
14692 } else {
14693 Ok(Expression::Function(Box::new(Function::new(
14694 "FORMAT_DATE".to_string(),
14695 vec![fmt_expr.clone(), cast_val],
14696 ))))
14697 }
14698 } else {
14699 Ok(Expression::Function(Box::new(Function::new(
14700 "FORMAT_DATE".to_string(),
14701 vec![fmt_expr.clone(), cast_val],
14702 ))))
14703 }
14704 }
14705 DialectType::PostgreSQL | DialectType::Redshift => {
14706 if let Expression::Literal(lit) = fmt_expr {
14707 if let crate::expressions::Literal::String(s) =
14708 lit.as_ref()
14709 {
14710 let pg_fmt = s
14711 .replace("yyyy", "YYYY")
14712 .replace("MM", "MM")
14713 .replace("dd", "DD")
14714 .replace("HH", "HH24")
14715 .replace("mm", "MI")
14716 .replace("ss", "SS")
14717 .replace("yy", "YY");
14718 Ok(Expression::Function(Box::new(Function::new(
14719 "TO_CHAR".to_string(),
14720 vec![val, Expression::string(&pg_fmt)],
14721 ))))
14722 } else {
14723 Ok(Expression::Function(Box::new(Function::new(
14724 "TO_CHAR".to_string(),
14725 vec![val, fmt_expr.clone()],
14726 ))))
14727 }
14728 } else {
14729 Ok(Expression::Function(Box::new(Function::new(
14730 "TO_CHAR".to_string(),
14731 vec![val, fmt_expr.clone()],
14732 ))))
14733 }
14734 }
14735 _ => Ok(Expression::Function(f)),
14736 }
14737 }
14738 // DATEDIFF(unit, start, end) - 3-arg form
14739 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
14740 "DATEDIFF" if f.args.len() == 3 => {
14741 let mut args = f.args;
14742 // SQLite source: args = (date1, date2, unit_string)
14743 // Standard source: args = (unit, start, end)
14744 let (_arg0, arg1, arg2, unit_str) =
14745 if matches!(source, DialectType::SQLite) {
14746 let date1 = args.remove(0);
14747 let date2 = args.remove(0);
14748 let unit_expr = args.remove(0);
14749 let unit_s = Self::get_unit_str_static(&unit_expr);
14750
14751 // For SQLite target, generate JULIANDAY arithmetic directly
14752 if matches!(target, DialectType::SQLite) {
14753 let jd_first = Expression::Function(Box::new(
14754 Function::new("JULIANDAY".to_string(), vec![date1]),
14755 ));
14756 let jd_second = Expression::Function(Box::new(
14757 Function::new("JULIANDAY".to_string(), vec![date2]),
14758 ));
14759 let diff = Expression::Sub(Box::new(
14760 crate::expressions::BinaryOp::new(
14761 jd_first, jd_second,
14762 ),
14763 ));
14764 let paren_diff = Expression::Paren(Box::new(
14765 crate::expressions::Paren {
14766 this: diff,
14767 trailing_comments: Vec::new(),
14768 },
14769 ));
14770 let adjusted = match unit_s.as_str() {
14771 "HOUR" => Expression::Mul(Box::new(
14772 crate::expressions::BinaryOp::new(
14773 paren_diff,
14774 Expression::Literal(Box::new(
14775 Literal::Number("24.0".to_string()),
14776 )),
14777 ),
14778 )),
14779 "MINUTE" => Expression::Mul(Box::new(
14780 crate::expressions::BinaryOp::new(
14781 paren_diff,
14782 Expression::Literal(Box::new(
14783 Literal::Number("1440.0".to_string()),
14784 )),
14785 ),
14786 )),
14787 "SECOND" => Expression::Mul(Box::new(
14788 crate::expressions::BinaryOp::new(
14789 paren_diff,
14790 Expression::Literal(Box::new(
14791 Literal::Number("86400.0".to_string()),
14792 )),
14793 ),
14794 )),
14795 "MONTH" => Expression::Div(Box::new(
14796 crate::expressions::BinaryOp::new(
14797 paren_diff,
14798 Expression::Literal(Box::new(
14799 Literal::Number("30.0".to_string()),
14800 )),
14801 ),
14802 )),
14803 "YEAR" => Expression::Div(Box::new(
14804 crate::expressions::BinaryOp::new(
14805 paren_diff,
14806 Expression::Literal(Box::new(
14807 Literal::Number("365.0".to_string()),
14808 )),
14809 ),
14810 )),
14811 _ => paren_diff,
14812 };
14813 return Ok(Expression::Cast(Box::new(Cast {
14814 this: adjusted,
14815 to: DataType::Int {
14816 length: None,
14817 integer_spelling: true,
14818 },
14819 trailing_comments: vec![],
14820 double_colon_syntax: false,
14821 format: None,
14822 default: None,
14823 inferred_type: None,
14824 })));
14825 }
14826
14827 // For other targets, remap to standard (unit, start, end) form
14828 let unit_ident =
14829 Expression::Identifier(Identifier::new(&unit_s));
14830 (unit_ident, date1, date2, unit_s)
14831 } else {
14832 let arg0 = args.remove(0);
14833 let arg1 = args.remove(0);
14834 let arg2 = args.remove(0);
14835 let unit_s = Self::get_unit_str_static(&arg0);
14836 (arg0, arg1, arg2, unit_s)
14837 };
14838
14839 // For Hive/Spark source, string literal dates need to be cast
14840 // Note: Databricks is excluded - it handles string args like standard SQL
14841 let is_hive_spark =
14842 matches!(source, DialectType::Hive | DialectType::Spark);
14843
14844 match target {
14845 DialectType::Snowflake => {
14846 let unit =
14847 Expression::Identifier(Identifier::new(&unit_str));
14848 // Use ensure_to_date_preserved to add TO_DATE with a marker
14849 // that prevents the Snowflake TO_DATE handler from converting it to CAST
14850 let d1 = if is_hive_spark {
14851 Self::ensure_to_date_preserved(arg1)
14852 } else {
14853 arg1
14854 };
14855 let d2 = if is_hive_spark {
14856 Self::ensure_to_date_preserved(arg2)
14857 } else {
14858 arg2
14859 };
14860 Ok(Expression::Function(Box::new(Function::new(
14861 "DATEDIFF".to_string(),
14862 vec![unit, d1, d2],
14863 ))))
14864 }
14865 DialectType::Redshift => {
14866 let unit =
14867 Expression::Identifier(Identifier::new(&unit_str));
14868 let d1 = if is_hive_spark {
14869 Self::ensure_cast_date(arg1)
14870 } else {
14871 arg1
14872 };
14873 let d2 = if is_hive_spark {
14874 Self::ensure_cast_date(arg2)
14875 } else {
14876 arg2
14877 };
14878 Ok(Expression::Function(Box::new(Function::new(
14879 "DATEDIFF".to_string(),
14880 vec![unit, d1, d2],
14881 ))))
14882 }
14883 DialectType::TSQL => {
14884 let unit =
14885 Expression::Identifier(Identifier::new(&unit_str));
14886 Ok(Expression::Function(Box::new(Function::new(
14887 "DATEDIFF".to_string(),
14888 vec![unit, arg1, arg2],
14889 ))))
14890 }
14891 DialectType::DuckDB => {
14892 let is_redshift_tsql = matches!(
14893 source,
14894 DialectType::Redshift | DialectType::TSQL
14895 );
14896 if is_hive_spark {
14897 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
14898 let d1 = Self::ensure_cast_date(arg1);
14899 let d2 = Self::ensure_cast_date(arg2);
14900 Ok(Expression::Function(Box::new(Function::new(
14901 "DATE_DIFF".to_string(),
14902 vec![Expression::string(&unit_str), d1, d2],
14903 ))))
14904 } else if matches!(source, DialectType::Snowflake) {
14905 // For Snowflake source: special handling per unit
14906 match unit_str.as_str() {
14907 "NANOSECOND" => {
14908 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
14909 fn cast_to_timestamp_ns(
14910 expr: Expression,
14911 ) -> Expression
14912 {
14913 Expression::Cast(Box::new(Cast {
14914 this: expr,
14915 to: DataType::Custom {
14916 name: "TIMESTAMP_NS".to_string(),
14917 },
14918 trailing_comments: vec![],
14919 double_colon_syntax: false,
14920 format: None,
14921 default: None,
14922 inferred_type: None,
14923 }))
14924 }
14925 let epoch_end = Expression::Function(Box::new(
14926 Function::new(
14927 "EPOCH_NS".to_string(),
14928 vec![cast_to_timestamp_ns(arg2)],
14929 ),
14930 ));
14931 let epoch_start = Expression::Function(
14932 Box::new(Function::new(
14933 "EPOCH_NS".to_string(),
14934 vec![cast_to_timestamp_ns(arg1)],
14935 )),
14936 );
14937 Ok(Expression::Sub(Box::new(BinaryOp::new(
14938 epoch_end,
14939 epoch_start,
14940 ))))
14941 }
14942 "WEEK" => {
14943 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
14944 let d1 = Self::force_cast_date(arg1);
14945 let d2 = Self::force_cast_date(arg2);
14946 let dt1 = Expression::Function(Box::new(
14947 Function::new(
14948 "DATE_TRUNC".to_string(),
14949 vec![Expression::string("WEEK"), d1],
14950 ),
14951 ));
14952 let dt2 = Expression::Function(Box::new(
14953 Function::new(
14954 "DATE_TRUNC".to_string(),
14955 vec![Expression::string("WEEK"), d2],
14956 ),
14957 ));
14958 Ok(Expression::Function(Box::new(
14959 Function::new(
14960 "DATE_DIFF".to_string(),
14961 vec![
14962 Expression::string(&unit_str),
14963 dt1,
14964 dt2,
14965 ],
14966 ),
14967 )))
14968 }
14969 _ => {
14970 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
14971 let d1 = Self::force_cast_date(arg1);
14972 let d2 = Self::force_cast_date(arg2);
14973 Ok(Expression::Function(Box::new(
14974 Function::new(
14975 "DATE_DIFF".to_string(),
14976 vec![
14977 Expression::string(&unit_str),
14978 d1,
14979 d2,
14980 ],
14981 ),
14982 )))
14983 }
14984 }
14985 } else if is_redshift_tsql {
14986 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
14987 let d1 = Self::force_cast_timestamp(arg1);
14988 let d2 = Self::force_cast_timestamp(arg2);
14989 Ok(Expression::Function(Box::new(Function::new(
14990 "DATE_DIFF".to_string(),
14991 vec![Expression::string(&unit_str), d1, d2],
14992 ))))
14993 } else {
14994 // Keep as DATEDIFF so DuckDB's transform_datediff handles
14995 // DATE_TRUNC for WEEK, CAST for string literals, etc.
14996 let unit =
14997 Expression::Identifier(Identifier::new(&unit_str));
14998 Ok(Expression::Function(Box::new(Function::new(
14999 "DATEDIFF".to_string(),
15000 vec![unit, arg1, arg2],
15001 ))))
15002 }
15003 }
15004 DialectType::BigQuery => {
15005 let is_redshift_tsql = matches!(
15006 source,
15007 DialectType::Redshift
15008 | DialectType::TSQL
15009 | DialectType::Snowflake
15010 );
15011 let cast_d1 = if is_hive_spark {
15012 Self::ensure_cast_date(arg1)
15013 } else if is_redshift_tsql {
15014 Self::force_cast_datetime(arg1)
15015 } else {
15016 Self::ensure_cast_datetime(arg1)
15017 };
15018 let cast_d2 = if is_hive_spark {
15019 Self::ensure_cast_date(arg2)
15020 } else if is_redshift_tsql {
15021 Self::force_cast_datetime(arg2)
15022 } else {
15023 Self::ensure_cast_datetime(arg2)
15024 };
15025 let unit =
15026 Expression::Identifier(Identifier::new(&unit_str));
15027 Ok(Expression::Function(Box::new(Function::new(
15028 "DATE_DIFF".to_string(),
15029 vec![cast_d2, cast_d1, unit],
15030 ))))
15031 }
15032 DialectType::Presto
15033 | DialectType::Trino
15034 | DialectType::Athena => {
15035 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
15036 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
15037 let is_redshift_tsql = matches!(
15038 source,
15039 DialectType::Redshift
15040 | DialectType::TSQL
15041 | DialectType::Snowflake
15042 );
15043 let d1 = if is_hive_spark {
15044 Self::double_cast_timestamp_date(arg1)
15045 } else if is_redshift_tsql {
15046 Self::force_cast_timestamp(arg1)
15047 } else {
15048 arg1
15049 };
15050 let d2 = if is_hive_spark {
15051 Self::double_cast_timestamp_date(arg2)
15052 } else if is_redshift_tsql {
15053 Self::force_cast_timestamp(arg2)
15054 } else {
15055 arg2
15056 };
15057 Ok(Expression::Function(Box::new(Function::new(
15058 "DATE_DIFF".to_string(),
15059 vec![Expression::string(&unit_str), d1, d2],
15060 ))))
15061 }
15062 DialectType::Hive => match unit_str.as_str() {
15063 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
15064 this: Expression::Function(Box::new(Function::new(
15065 "MONTHS_BETWEEN".to_string(),
15066 vec![arg2, arg1],
15067 ))),
15068 to: DataType::Int {
15069 length: None,
15070 integer_spelling: false,
15071 },
15072 trailing_comments: vec![],
15073 double_colon_syntax: false,
15074 format: None,
15075 default: None,
15076 inferred_type: None,
15077 }))),
15078 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
15079 this: Expression::Div(Box::new(
15080 crate::expressions::BinaryOp::new(
15081 Expression::Function(Box::new(Function::new(
15082 "DATEDIFF".to_string(),
15083 vec![arg2, arg1],
15084 ))),
15085 Expression::number(7),
15086 ),
15087 )),
15088 to: DataType::Int {
15089 length: None,
15090 integer_spelling: false,
15091 },
15092 trailing_comments: vec![],
15093 double_colon_syntax: false,
15094 format: None,
15095 default: None,
15096 inferred_type: None,
15097 }))),
15098 _ => Ok(Expression::Function(Box::new(Function::new(
15099 "DATEDIFF".to_string(),
15100 vec![arg2, arg1],
15101 )))),
15102 },
15103 DialectType::Spark | DialectType::Databricks => {
15104 let unit =
15105 Expression::Identifier(Identifier::new(&unit_str));
15106 Ok(Expression::Function(Box::new(Function::new(
15107 "DATEDIFF".to_string(),
15108 vec![unit, arg1, arg2],
15109 ))))
15110 }
15111 _ => {
15112 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
15113 let d1 = if is_hive_spark {
15114 Self::ensure_cast_date(arg1)
15115 } else {
15116 arg1
15117 };
15118 let d2 = if is_hive_spark {
15119 Self::ensure_cast_date(arg2)
15120 } else {
15121 arg2
15122 };
15123 let unit =
15124 Expression::Identifier(Identifier::new(&unit_str));
15125 Ok(Expression::Function(Box::new(Function::new(
15126 "DATEDIFF".to_string(),
15127 vec![unit, d1, d2],
15128 ))))
15129 }
15130 }
15131 }
15132 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
15133 "DATEDIFF" if f.args.len() == 2 => {
15134 let mut args = f.args;
15135 let arg0 = args.remove(0);
15136 let arg1 = args.remove(0);
15137
15138 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
15139 // Also recognizes TryCast/Cast to DATE that may have been produced by
15140 // cross-dialect TO_DATE -> TRY_CAST conversion
15141 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
15142 if let Expression::Function(ref f) = e {
15143 if f.name.eq_ignore_ascii_case("TO_DATE")
15144 && f.args.len() == 1
15145 {
15146 return (f.args[0].clone(), true);
15147 }
15148 }
15149 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
15150 if let Expression::TryCast(ref c) = e {
15151 if matches!(c.to, DataType::Date) {
15152 return (e, true); // Already properly cast, return as-is
15153 }
15154 }
15155 (e, false)
15156 };
15157
15158 match target {
15159 DialectType::DuckDB => {
15160 // For Hive source, always CAST to DATE
15161 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
15162 let cast_d0 = if matches!(
15163 source,
15164 DialectType::Hive
15165 | DialectType::Spark
15166 | DialectType::Databricks
15167 ) {
15168 let (inner, was_to_date) = unwrap_to_date(arg1);
15169 if was_to_date {
15170 // Already a date expression, use directly
15171 if matches!(&inner, Expression::TryCast(_)) {
15172 inner // Already TRY_CAST(x AS DATE)
15173 } else {
15174 Self::try_cast_date(inner)
15175 }
15176 } else {
15177 Self::force_cast_date(inner)
15178 }
15179 } else {
15180 Self::ensure_cast_date(arg1)
15181 };
15182 let cast_d1 = if matches!(
15183 source,
15184 DialectType::Hive
15185 | DialectType::Spark
15186 | DialectType::Databricks
15187 ) {
15188 let (inner, was_to_date) = unwrap_to_date(arg0);
15189 if was_to_date {
15190 if matches!(&inner, Expression::TryCast(_)) {
15191 inner
15192 } else {
15193 Self::try_cast_date(inner)
15194 }
15195 } else {
15196 Self::force_cast_date(inner)
15197 }
15198 } else {
15199 Self::ensure_cast_date(arg0)
15200 };
15201 Ok(Expression::Function(Box::new(Function::new(
15202 "DATE_DIFF".to_string(),
15203 vec![Expression::string("DAY"), cast_d0, cast_d1],
15204 ))))
15205 }
15206 DialectType::Presto
15207 | DialectType::Trino
15208 | DialectType::Athena => {
15209 // For Hive/Spark source, apply double_cast_timestamp_date
15210 // For other sources (MySQL etc.), just swap args without casting
15211 if matches!(
15212 source,
15213 DialectType::Hive
15214 | DialectType::Spark
15215 | DialectType::Databricks
15216 ) {
15217 let cast_fn = |e: Expression| -> Expression {
15218 let (inner, was_to_date) = unwrap_to_date(e);
15219 if was_to_date {
15220 let first_cast =
15221 Self::double_cast_timestamp_date(inner);
15222 Self::double_cast_timestamp_date(first_cast)
15223 } else {
15224 Self::double_cast_timestamp_date(inner)
15225 }
15226 };
15227 Ok(Expression::Function(Box::new(Function::new(
15228 "DATE_DIFF".to_string(),
15229 vec![
15230 Expression::string("DAY"),
15231 cast_fn(arg1),
15232 cast_fn(arg0),
15233 ],
15234 ))))
15235 } else {
15236 Ok(Expression::Function(Box::new(Function::new(
15237 "DATE_DIFF".to_string(),
15238 vec![Expression::string("DAY"), arg1, arg0],
15239 ))))
15240 }
15241 }
15242 DialectType::Redshift => {
15243 let unit = Expression::Identifier(Identifier::new("DAY"));
15244 Ok(Expression::Function(Box::new(Function::new(
15245 "DATEDIFF".to_string(),
15246 vec![unit, arg1, arg0],
15247 ))))
15248 }
15249 _ => Ok(Expression::Function(Box::new(Function::new(
15250 "DATEDIFF".to_string(),
15251 vec![arg0, arg1],
15252 )))),
15253 }
15254 }
15255 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
15256 "DATE_DIFF" if f.args.len() == 3 => {
15257 let mut args = f.args;
15258 let arg0 = args.remove(0);
15259 let arg1 = args.remove(0);
15260 let arg2 = args.remove(0);
15261 let unit_str = Self::get_unit_str_static(&arg0);
15262
15263 match target {
15264 DialectType::DuckDB => {
15265 // DuckDB: DATE_DIFF('UNIT', start, end)
15266 Ok(Expression::Function(Box::new(Function::new(
15267 "DATE_DIFF".to_string(),
15268 vec![Expression::string(&unit_str), arg1, arg2],
15269 ))))
15270 }
15271 DialectType::Presto
15272 | DialectType::Trino
15273 | DialectType::Athena => {
15274 Ok(Expression::Function(Box::new(Function::new(
15275 "DATE_DIFF".to_string(),
15276 vec![Expression::string(&unit_str), arg1, arg2],
15277 ))))
15278 }
15279 DialectType::ClickHouse => {
15280 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
15281 let unit =
15282 Expression::Identifier(Identifier::new(&unit_str));
15283 Ok(Expression::Function(Box::new(Function::new(
15284 "DATE_DIFF".to_string(),
15285 vec![unit, arg1, arg2],
15286 ))))
15287 }
15288 DialectType::Snowflake | DialectType::Redshift => {
15289 let unit =
15290 Expression::Identifier(Identifier::new(&unit_str));
15291 Ok(Expression::Function(Box::new(Function::new(
15292 "DATEDIFF".to_string(),
15293 vec![unit, arg1, arg2],
15294 ))))
15295 }
15296 _ => {
15297 let unit =
15298 Expression::Identifier(Identifier::new(&unit_str));
15299 Ok(Expression::Function(Box::new(Function::new(
15300 "DATEDIFF".to_string(),
15301 vec![unit, arg1, arg2],
15302 ))))
15303 }
15304 }
15305 }
15306 // DATEADD(unit, val, date) - 3-arg form
15307 "DATEADD" if f.args.len() == 3 => {
15308 let mut args = f.args;
15309 let arg0 = args.remove(0);
15310 let arg1 = args.remove(0);
15311 let arg2 = args.remove(0);
15312 let unit_str = Self::get_unit_str_static(&arg0);
15313
15314 // Normalize TSQL unit abbreviations to standard names
15315 let unit_str = match unit_str.as_str() {
15316 "YY" | "YYYY" => "YEAR".to_string(),
15317 "QQ" | "Q" => "QUARTER".to_string(),
15318 "MM" | "M" => "MONTH".to_string(),
15319 "WK" | "WW" => "WEEK".to_string(),
15320 "DD" | "D" | "DY" => "DAY".to_string(),
15321 "HH" => "HOUR".to_string(),
15322 "MI" | "N" => "MINUTE".to_string(),
15323 "SS" | "S" => "SECOND".to_string(),
15324 "MS" => "MILLISECOND".to_string(),
15325 "MCS" | "US" => "MICROSECOND".to_string(),
15326 _ => unit_str,
15327 };
15328 match target {
15329 DialectType::Snowflake => {
15330 let unit =
15331 Expression::Identifier(Identifier::new(&unit_str));
15332 // Cast string literal to TIMESTAMP, but not for Snowflake source
15333 // (Snowflake natively accepts string literals in DATEADD)
15334 let arg2 = if matches!(
15335 &arg2,
15336 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15337 ) && !matches!(source, DialectType::Snowflake)
15338 {
15339 Expression::Cast(Box::new(Cast {
15340 this: arg2,
15341 to: DataType::Timestamp {
15342 precision: None,
15343 timezone: false,
15344 },
15345 trailing_comments: Vec::new(),
15346 double_colon_syntax: false,
15347 format: None,
15348 default: None,
15349 inferred_type: None,
15350 }))
15351 } else {
15352 arg2
15353 };
15354 Ok(Expression::Function(Box::new(Function::new(
15355 "DATEADD".to_string(),
15356 vec![unit, arg1, arg2],
15357 ))))
15358 }
15359 DialectType::TSQL => {
15360 let unit =
15361 Expression::Identifier(Identifier::new(&unit_str));
15362 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
15363 let arg2 = if matches!(
15364 &arg2,
15365 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15366 ) && !matches!(
15367 source,
15368 DialectType::Spark
15369 | DialectType::Databricks
15370 | DialectType::Hive
15371 ) {
15372 Expression::Cast(Box::new(Cast {
15373 this: arg2,
15374 to: DataType::Custom {
15375 name: "DATETIME2".to_string(),
15376 },
15377 trailing_comments: Vec::new(),
15378 double_colon_syntax: false,
15379 format: None,
15380 default: None,
15381 inferred_type: None,
15382 }))
15383 } else {
15384 arg2
15385 };
15386 Ok(Expression::Function(Box::new(Function::new(
15387 "DATEADD".to_string(),
15388 vec![unit, arg1, arg2],
15389 ))))
15390 }
15391 DialectType::Redshift => {
15392 let unit =
15393 Expression::Identifier(Identifier::new(&unit_str));
15394 Ok(Expression::Function(Box::new(Function::new(
15395 "DATEADD".to_string(),
15396 vec![unit, arg1, arg2],
15397 ))))
15398 }
15399 DialectType::Databricks => {
15400 let unit =
15401 Expression::Identifier(Identifier::new(&unit_str));
15402 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
15403 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
15404 let func_name = if matches!(
15405 source,
15406 DialectType::TSQL
15407 | DialectType::Fabric
15408 | DialectType::Databricks
15409 | DialectType::Snowflake
15410 ) {
15411 "DATEADD"
15412 } else {
15413 "DATE_ADD"
15414 };
15415 Ok(Expression::Function(Box::new(Function::new(
15416 func_name.to_string(),
15417 vec![unit, arg1, arg2],
15418 ))))
15419 }
15420 DialectType::DuckDB => {
15421 // Special handling for NANOSECOND from Snowflake
15422 if unit_str == "NANOSECOND"
15423 && matches!(source, DialectType::Snowflake)
15424 {
15425 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
15426 let cast_ts = Expression::Cast(Box::new(Cast {
15427 this: arg2,
15428 to: DataType::Custom {
15429 name: "TIMESTAMP_NS".to_string(),
15430 },
15431 trailing_comments: vec![],
15432 double_colon_syntax: false,
15433 format: None,
15434 default: None,
15435 inferred_type: None,
15436 }));
15437 let epoch_ns =
15438 Expression::Function(Box::new(Function::new(
15439 "EPOCH_NS".to_string(),
15440 vec![cast_ts],
15441 )));
15442 let sum = Expression::Add(Box::new(BinaryOp::new(
15443 epoch_ns, arg1,
15444 )));
15445 Ok(Expression::Function(Box::new(Function::new(
15446 "MAKE_TIMESTAMP_NS".to_string(),
15447 vec![sum],
15448 ))))
15449 } else {
15450 // DuckDB: convert to date + INTERVAL syntax with CAST
15451 let iu = Self::parse_interval_unit_static(&unit_str);
15452 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15453 this: Some(arg1),
15454 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15455 }));
15456 // Cast string literal to TIMESTAMP
15457 let arg2 = if matches!(
15458 &arg2,
15459 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15460 ) {
15461 Expression::Cast(Box::new(Cast {
15462 this: arg2,
15463 to: DataType::Timestamp {
15464 precision: None,
15465 timezone: false,
15466 },
15467 trailing_comments: Vec::new(),
15468 double_colon_syntax: false,
15469 format: None,
15470 default: None,
15471 inferred_type: None,
15472 }))
15473 } else {
15474 arg2
15475 };
15476 Ok(Expression::Add(Box::new(
15477 crate::expressions::BinaryOp::new(arg2, interval),
15478 )))
15479 }
15480 }
15481 DialectType::Spark => {
15482 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
15483 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
15484 if matches!(source, DialectType::TSQL | DialectType::Fabric)
15485 {
15486 fn multiply_expr_spark(
15487 expr: Expression,
15488 factor: i64,
15489 ) -> Expression
15490 {
15491 if let Expression::Literal(lit) = &expr {
15492 if let crate::expressions::Literal::Number(n) =
15493 lit.as_ref()
15494 {
15495 if let Ok(val) = n.parse::<i64>() {
15496 return Expression::Literal(Box::new(
15497 crate::expressions::Literal::Number(
15498 (val * factor).to_string(),
15499 ),
15500 ));
15501 }
15502 }
15503 }
15504 Expression::Mul(Box::new(
15505 crate::expressions::BinaryOp::new(
15506 expr,
15507 Expression::Literal(Box::new(
15508 crate::expressions::Literal::Number(
15509 factor.to_string(),
15510 ),
15511 )),
15512 ),
15513 ))
15514 }
15515 let normalized_unit = match unit_str.as_str() {
15516 "YEAR" | "YY" | "YYYY" => "YEAR",
15517 "QUARTER" | "QQ" | "Q" => "QUARTER",
15518 "MONTH" | "MM" | "M" => "MONTH",
15519 "WEEK" | "WK" | "WW" => "WEEK",
15520 "DAY" | "DD" | "D" | "DY" => "DAY",
15521 _ => &unit_str,
15522 };
15523 match normalized_unit {
15524 "YEAR" => {
15525 let months = multiply_expr_spark(arg1, 12);
15526 Ok(Expression::Function(Box::new(
15527 Function::new(
15528 "ADD_MONTHS".to_string(),
15529 vec![arg2, months],
15530 ),
15531 )))
15532 }
15533 "QUARTER" => {
15534 let months = multiply_expr_spark(arg1, 3);
15535 Ok(Expression::Function(Box::new(
15536 Function::new(
15537 "ADD_MONTHS".to_string(),
15538 vec![arg2, months],
15539 ),
15540 )))
15541 }
15542 "MONTH" => Ok(Expression::Function(Box::new(
15543 Function::new(
15544 "ADD_MONTHS".to_string(),
15545 vec![arg2, arg1],
15546 ),
15547 ))),
15548 "WEEK" => {
15549 let days = multiply_expr_spark(arg1, 7);
15550 Ok(Expression::Function(Box::new(
15551 Function::new(
15552 "DATE_ADD".to_string(),
15553 vec![arg2, days],
15554 ),
15555 )))
15556 }
15557 "DAY" => Ok(Expression::Function(Box::new(
15558 Function::new(
15559 "DATE_ADD".to_string(),
15560 vec![arg2, arg1],
15561 ),
15562 ))),
15563 _ => {
15564 let unit = Expression::Identifier(
15565 Identifier::new(&unit_str),
15566 );
15567 Ok(Expression::Function(Box::new(
15568 Function::new(
15569 "DATE_ADD".to_string(),
15570 vec![unit, arg1, arg2],
15571 ),
15572 )))
15573 }
15574 }
15575 } else {
15576 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
15577 let unit =
15578 Expression::Identifier(Identifier::new(&unit_str));
15579 Ok(Expression::Function(Box::new(Function::new(
15580 "DATE_ADD".to_string(),
15581 vec![unit, arg1, arg2],
15582 ))))
15583 }
15584 }
15585 DialectType::Hive => match unit_str.as_str() {
15586 "MONTH" => {
15587 Ok(Expression::Function(Box::new(Function::new(
15588 "ADD_MONTHS".to_string(),
15589 vec![arg2, arg1],
15590 ))))
15591 }
15592 _ => Ok(Expression::Function(Box::new(Function::new(
15593 "DATE_ADD".to_string(),
15594 vec![arg2, arg1],
15595 )))),
15596 },
15597 DialectType::Presto
15598 | DialectType::Trino
15599 | DialectType::Athena => {
15600 // Cast string literal date to TIMESTAMP
15601 let arg2 = if matches!(
15602 &arg2,
15603 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15604 ) {
15605 Expression::Cast(Box::new(Cast {
15606 this: arg2,
15607 to: DataType::Timestamp {
15608 precision: None,
15609 timezone: false,
15610 },
15611 trailing_comments: Vec::new(),
15612 double_colon_syntax: false,
15613 format: None,
15614 default: None,
15615 inferred_type: None,
15616 }))
15617 } else {
15618 arg2
15619 };
15620 Ok(Expression::Function(Box::new(Function::new(
15621 "DATE_ADD".to_string(),
15622 vec![Expression::string(&unit_str), arg1, arg2],
15623 ))))
15624 }
15625 DialectType::MySQL => {
15626 let iu = Self::parse_interval_unit_static(&unit_str);
15627 Ok(Expression::DateAdd(Box::new(
15628 crate::expressions::DateAddFunc {
15629 this: arg2,
15630 interval: arg1,
15631 unit: iu,
15632 },
15633 )))
15634 }
15635 DialectType::PostgreSQL => {
15636 // Cast string literal date to TIMESTAMP
15637 let arg2 = if matches!(
15638 &arg2,
15639 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15640 ) {
15641 Expression::Cast(Box::new(Cast {
15642 this: arg2,
15643 to: DataType::Timestamp {
15644 precision: None,
15645 timezone: false,
15646 },
15647 trailing_comments: Vec::new(),
15648 double_colon_syntax: false,
15649 format: None,
15650 default: None,
15651 inferred_type: None,
15652 }))
15653 } else {
15654 arg2
15655 };
15656 let interval = Expression::Interval(Box::new(
15657 crate::expressions::Interval {
15658 this: Some(Expression::string(&format!(
15659 "{} {}",
15660 Self::expr_to_string_static(&arg1),
15661 unit_str
15662 ))),
15663 unit: None,
15664 },
15665 ));
15666 Ok(Expression::Add(Box::new(
15667 crate::expressions::BinaryOp::new(arg2, interval),
15668 )))
15669 }
15670 DialectType::BigQuery => {
15671 let iu = Self::parse_interval_unit_static(&unit_str);
15672 let interval = Expression::Interval(Box::new(
15673 crate::expressions::Interval {
15674 this: Some(arg1),
15675 unit: Some(
15676 crate::expressions::IntervalUnitSpec::Simple {
15677 unit: iu,
15678 use_plural: false,
15679 },
15680 ),
15681 },
15682 ));
15683 // Non-TSQL sources: CAST string literal to DATETIME
15684 let arg2 = if !matches!(
15685 source,
15686 DialectType::TSQL | DialectType::Fabric
15687 ) && matches!(
15688 &arg2,
15689 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15690 ) {
15691 Expression::Cast(Box::new(Cast {
15692 this: arg2,
15693 to: DataType::Custom {
15694 name: "DATETIME".to_string(),
15695 },
15696 trailing_comments: Vec::new(),
15697 double_colon_syntax: false,
15698 format: None,
15699 default: None,
15700 inferred_type: None,
15701 }))
15702 } else {
15703 arg2
15704 };
15705 Ok(Expression::Function(Box::new(Function::new(
15706 "DATE_ADD".to_string(),
15707 vec![arg2, interval],
15708 ))))
15709 }
15710 _ => {
15711 let unit =
15712 Expression::Identifier(Identifier::new(&unit_str));
15713 Ok(Expression::Function(Box::new(Function::new(
15714 "DATEADD".to_string(),
15715 vec![unit, arg1, arg2],
15716 ))))
15717 }
15718 }
15719 }
15720 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
15721 // or (date, val, 'UNIT') from Generic canonical form
15722 "DATE_ADD" if f.args.len() == 3 => {
15723 let mut args = f.args;
15724 let arg0 = args.remove(0);
15725 let arg1 = args.remove(0);
15726 let arg2 = args.remove(0);
15727 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
15728 // where arg2 is a string literal matching a unit name
15729 let arg2_unit = match &arg2 {
15730 Expression::Literal(lit)
15731 if matches!(lit.as_ref(), Literal::String(_)) =>
15732 {
15733 let Literal::String(s) = lit.as_ref() else {
15734 unreachable!()
15735 };
15736 let u = s.to_ascii_uppercase();
15737 if matches!(
15738 u.as_str(),
15739 "DAY"
15740 | "MONTH"
15741 | "YEAR"
15742 | "HOUR"
15743 | "MINUTE"
15744 | "SECOND"
15745 | "WEEK"
15746 | "QUARTER"
15747 | "MILLISECOND"
15748 | "MICROSECOND"
15749 ) {
15750 Some(u)
15751 } else {
15752 None
15753 }
15754 }
15755 _ => None,
15756 };
15757 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
15758 let (unit_str, val, date) = if let Some(u) = arg2_unit {
15759 (u, arg1, arg0)
15760 } else {
15761 (Self::get_unit_str_static(&arg0), arg1, arg2)
15762 };
15763 // Alias for backward compat with the rest of the match
15764 let arg1 = val;
15765 let arg2 = date;
15766
15767 match target {
15768 DialectType::Presto
15769 | DialectType::Trino
15770 | DialectType::Athena => {
15771 Ok(Expression::Function(Box::new(Function::new(
15772 "DATE_ADD".to_string(),
15773 vec![Expression::string(&unit_str), arg1, arg2],
15774 ))))
15775 }
15776 DialectType::DuckDB => {
15777 let iu = Self::parse_interval_unit_static(&unit_str);
15778 let interval = Expression::Interval(Box::new(
15779 crate::expressions::Interval {
15780 this: Some(arg1),
15781 unit: Some(
15782 crate::expressions::IntervalUnitSpec::Simple {
15783 unit: iu,
15784 use_plural: false,
15785 },
15786 ),
15787 },
15788 ));
15789 Ok(Expression::Add(Box::new(
15790 crate::expressions::BinaryOp::new(arg2, interval),
15791 )))
15792 }
15793 DialectType::PostgreSQL
15794 | DialectType::Materialize
15795 | DialectType::RisingWave => {
15796 // PostgreSQL: x + INTERVAL '1 DAY'
15797 let amount_str = Self::expr_to_string_static(&arg1);
15798 let interval = Expression::Interval(Box::new(
15799 crate::expressions::Interval {
15800 this: Some(Expression::string(&format!(
15801 "{} {}",
15802 amount_str, unit_str
15803 ))),
15804 unit: None,
15805 },
15806 ));
15807 Ok(Expression::Add(Box::new(
15808 crate::expressions::BinaryOp::new(arg2, interval),
15809 )))
15810 }
15811 DialectType::Snowflake
15812 | DialectType::TSQL
15813 | DialectType::Redshift => {
15814 let unit =
15815 Expression::Identifier(Identifier::new(&unit_str));
15816 Ok(Expression::Function(Box::new(Function::new(
15817 "DATEADD".to_string(),
15818 vec![unit, arg1, arg2],
15819 ))))
15820 }
15821 DialectType::BigQuery
15822 | DialectType::MySQL
15823 | DialectType::Doris
15824 | DialectType::StarRocks
15825 | DialectType::Drill => {
15826 // DATE_ADD(date, INTERVAL amount UNIT)
15827 let iu = Self::parse_interval_unit_static(&unit_str);
15828 let interval = Expression::Interval(Box::new(
15829 crate::expressions::Interval {
15830 this: Some(arg1),
15831 unit: Some(
15832 crate::expressions::IntervalUnitSpec::Simple {
15833 unit: iu,
15834 use_plural: false,
15835 },
15836 ),
15837 },
15838 ));
15839 Ok(Expression::Function(Box::new(Function::new(
15840 "DATE_ADD".to_string(),
15841 vec![arg2, interval],
15842 ))))
15843 }
15844 DialectType::SQLite => {
15845 // SQLite: DATE(x, '1 DAY')
15846 // Build the string '1 DAY' from amount and unit
15847 let amount_str = match &arg1 {
15848 Expression::Literal(lit)
15849 if matches!(lit.as_ref(), Literal::Number(_)) =>
15850 {
15851 let Literal::Number(n) = lit.as_ref() else {
15852 unreachable!()
15853 };
15854 n.clone()
15855 }
15856 _ => "1".to_string(),
15857 };
15858 Ok(Expression::Function(Box::new(Function::new(
15859 "DATE".to_string(),
15860 vec![
15861 arg2,
15862 Expression::string(format!(
15863 "{} {}",
15864 amount_str, unit_str
15865 )),
15866 ],
15867 ))))
15868 }
15869 DialectType::Dremio => {
15870 // Dremio: DATE_ADD(date, amount) - drops unit
15871 Ok(Expression::Function(Box::new(Function::new(
15872 "DATE_ADD".to_string(),
15873 vec![arg2, arg1],
15874 ))))
15875 }
15876 DialectType::Spark => {
15877 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
15878 if unit_str == "DAY" {
15879 Ok(Expression::Function(Box::new(Function::new(
15880 "DATE_ADD".to_string(),
15881 vec![arg2, arg1],
15882 ))))
15883 } else {
15884 let unit =
15885 Expression::Identifier(Identifier::new(&unit_str));
15886 Ok(Expression::Function(Box::new(Function::new(
15887 "DATE_ADD".to_string(),
15888 vec![unit, arg1, arg2],
15889 ))))
15890 }
15891 }
15892 DialectType::Databricks => {
15893 let unit =
15894 Expression::Identifier(Identifier::new(&unit_str));
15895 Ok(Expression::Function(Box::new(Function::new(
15896 "DATE_ADD".to_string(),
15897 vec![unit, arg1, arg2],
15898 ))))
15899 }
15900 DialectType::Hive => {
15901 // Hive: DATE_ADD(date, val) for DAY
15902 Ok(Expression::Function(Box::new(Function::new(
15903 "DATE_ADD".to_string(),
15904 vec![arg2, arg1],
15905 ))))
15906 }
15907 _ => {
15908 let unit =
15909 Expression::Identifier(Identifier::new(&unit_str));
15910 Ok(Expression::Function(Box::new(Function::new(
15911 "DATE_ADD".to_string(),
15912 vec![unit, arg1, arg2],
15913 ))))
15914 }
15915 }
15916 }
15917 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
15918 "DATE_ADD"
15919 if f.args.len() == 2
15920 && matches!(
15921 source,
15922 DialectType::Hive
15923 | DialectType::Spark
15924 | DialectType::Databricks
15925 | DialectType::Generic
15926 ) =>
15927 {
15928 let mut args = f.args;
15929 let date = args.remove(0);
15930 let days = args.remove(0);
15931 match target {
15932 DialectType::Hive | DialectType::Spark => {
15933 // Keep as DATE_ADD(date, days) for Hive/Spark
15934 Ok(Expression::Function(Box::new(Function::new(
15935 "DATE_ADD".to_string(),
15936 vec![date, days],
15937 ))))
15938 }
15939 DialectType::Databricks => {
15940 // Databricks: DATEADD(DAY, days, date)
15941 Ok(Expression::Function(Box::new(Function::new(
15942 "DATEADD".to_string(),
15943 vec![
15944 Expression::Identifier(Identifier::new("DAY")),
15945 days,
15946 date,
15947 ],
15948 ))))
15949 }
15950 DialectType::DuckDB => {
15951 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
15952 let cast_date = Self::ensure_cast_date(date);
15953 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
15954 let interval_val = if matches!(
15955 days,
15956 Expression::Mul(_)
15957 | Expression::Sub(_)
15958 | Expression::Add(_)
15959 ) {
15960 Expression::Paren(Box::new(crate::expressions::Paren {
15961 this: days,
15962 trailing_comments: vec![],
15963 }))
15964 } else {
15965 days
15966 };
15967 let interval = Expression::Interval(Box::new(
15968 crate::expressions::Interval {
15969 this: Some(interval_val),
15970 unit: Some(
15971 crate::expressions::IntervalUnitSpec::Simple {
15972 unit: crate::expressions::IntervalUnit::Day,
15973 use_plural: false,
15974 },
15975 ),
15976 },
15977 ));
15978 Ok(Expression::Add(Box::new(
15979 crate::expressions::BinaryOp::new(cast_date, interval),
15980 )))
15981 }
15982 DialectType::Snowflake => {
15983 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15984 let cast_date = if matches!(
15985 source,
15986 DialectType::Hive
15987 | DialectType::Spark
15988 | DialectType::Databricks
15989 ) {
15990 if matches!(
15991 date,
15992 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
15993 ) {
15994 Self::double_cast_timestamp_date(date)
15995 } else {
15996 date
15997 }
15998 } else {
15999 date
16000 };
16001 Ok(Expression::Function(Box::new(Function::new(
16002 "DATEADD".to_string(),
16003 vec![
16004 Expression::Identifier(Identifier::new("DAY")),
16005 days,
16006 cast_date,
16007 ],
16008 ))))
16009 }
16010 DialectType::Redshift => {
16011 Ok(Expression::Function(Box::new(Function::new(
16012 "DATEADD".to_string(),
16013 vec![
16014 Expression::Identifier(Identifier::new("DAY")),
16015 days,
16016 date,
16017 ],
16018 ))))
16019 }
16020 DialectType::TSQL | DialectType::Fabric => {
16021 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
16022 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
16023 let cast_date = if matches!(
16024 source,
16025 DialectType::Hive | DialectType::Spark
16026 ) {
16027 if matches!(
16028 date,
16029 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16030 ) {
16031 Self::double_cast_datetime2_date(date)
16032 } else {
16033 date
16034 }
16035 } else {
16036 date
16037 };
16038 Ok(Expression::Function(Box::new(Function::new(
16039 "DATEADD".to_string(),
16040 vec![
16041 Expression::Identifier(Identifier::new("DAY")),
16042 days,
16043 cast_date,
16044 ],
16045 ))))
16046 }
16047 DialectType::Presto
16048 | DialectType::Trino
16049 | DialectType::Athena => {
16050 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
16051 let cast_date = if matches!(
16052 source,
16053 DialectType::Hive
16054 | DialectType::Spark
16055 | DialectType::Databricks
16056 ) {
16057 if matches!(
16058 date,
16059 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16060 ) {
16061 Self::double_cast_timestamp_date(date)
16062 } else {
16063 date
16064 }
16065 } else {
16066 date
16067 };
16068 Ok(Expression::Function(Box::new(Function::new(
16069 "DATE_ADD".to_string(),
16070 vec![Expression::string("DAY"), days, cast_date],
16071 ))))
16072 }
16073 DialectType::BigQuery => {
16074 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
16075 let cast_date = if matches!(
16076 source,
16077 DialectType::Hive
16078 | DialectType::Spark
16079 | DialectType::Databricks
16080 ) {
16081 Self::double_cast_datetime_date(date)
16082 } else {
16083 date
16084 };
16085 // Wrap complex expressions in Paren for interval
16086 let interval_val = if matches!(
16087 days,
16088 Expression::Mul(_)
16089 | Expression::Sub(_)
16090 | Expression::Add(_)
16091 ) {
16092 Expression::Paren(Box::new(crate::expressions::Paren {
16093 this: days,
16094 trailing_comments: vec![],
16095 }))
16096 } else {
16097 days
16098 };
16099 let interval = Expression::Interval(Box::new(
16100 crate::expressions::Interval {
16101 this: Some(interval_val),
16102 unit: Some(
16103 crate::expressions::IntervalUnitSpec::Simple {
16104 unit: crate::expressions::IntervalUnit::Day,
16105 use_plural: false,
16106 },
16107 ),
16108 },
16109 ));
16110 Ok(Expression::Function(Box::new(Function::new(
16111 "DATE_ADD".to_string(),
16112 vec![cast_date, interval],
16113 ))))
16114 }
16115 DialectType::MySQL => {
16116 let iu = crate::expressions::IntervalUnit::Day;
16117 Ok(Expression::DateAdd(Box::new(
16118 crate::expressions::DateAddFunc {
16119 this: date,
16120 interval: days,
16121 unit: iu,
16122 },
16123 )))
16124 }
16125 DialectType::PostgreSQL => {
16126 let interval = Expression::Interval(Box::new(
16127 crate::expressions::Interval {
16128 this: Some(Expression::string(&format!(
16129 "{} DAY",
16130 Self::expr_to_string_static(&days)
16131 ))),
16132 unit: None,
16133 },
16134 ));
16135 Ok(Expression::Add(Box::new(
16136 crate::expressions::BinaryOp::new(date, interval),
16137 )))
16138 }
16139 DialectType::Doris
16140 | DialectType::StarRocks
16141 | DialectType::Drill => {
16142 // DATE_ADD(date, INTERVAL days DAY)
16143 let interval = Expression::Interval(Box::new(
16144 crate::expressions::Interval {
16145 this: Some(days),
16146 unit: Some(
16147 crate::expressions::IntervalUnitSpec::Simple {
16148 unit: crate::expressions::IntervalUnit::Day,
16149 use_plural: false,
16150 },
16151 ),
16152 },
16153 ));
16154 Ok(Expression::Function(Box::new(Function::new(
16155 "DATE_ADD".to_string(),
16156 vec![date, interval],
16157 ))))
16158 }
16159 _ => Ok(Expression::Function(Box::new(Function::new(
16160 "DATE_ADD".to_string(),
16161 vec![date, days],
16162 )))),
16163 }
16164 }
16165 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
16166 "DATE_ADD"
16167 if f.args.len() == 2
16168 && matches!(
16169 source,
16170 DialectType::MySQL | DialectType::SingleStore
16171 )
16172 && matches!(&f.args[1], Expression::Interval(_)) =>
16173 {
16174 let mut args = f.args;
16175 let date = args.remove(0);
16176 let interval_expr = args.remove(0);
16177 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16178 let unit_str = Self::interval_unit_to_string(&unit);
16179 let is_literal = matches!(&val,
16180 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
16181 );
16182
16183 match target {
16184 DialectType::MySQL | DialectType::SingleStore => {
16185 // Keep as DATE_ADD(date, INTERVAL val UNIT)
16186 Ok(Expression::Function(Box::new(Function::new(
16187 "DATE_ADD".to_string(),
16188 vec![date, interval_expr],
16189 ))))
16190 }
16191 DialectType::PostgreSQL => {
16192 if is_literal {
16193 // Literal: date + INTERVAL 'val UNIT'
16194 let interval = Expression::Interval(Box::new(
16195 crate::expressions::Interval {
16196 this: Some(Expression::Literal(Box::new(
16197 Literal::String(format!(
16198 "{} {}",
16199 Self::expr_to_string(&val),
16200 unit_str
16201 )),
16202 ))),
16203 unit: None,
16204 },
16205 ));
16206 Ok(Expression::Add(Box::new(
16207 crate::expressions::BinaryOp::new(date, interval),
16208 )))
16209 } else {
16210 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
16211 let interval_one = Expression::Interval(Box::new(
16212 crate::expressions::Interval {
16213 this: Some(Expression::Literal(Box::new(
16214 Literal::String(format!("1 {}", unit_str)),
16215 ))),
16216 unit: None,
16217 },
16218 ));
16219 let mul = Expression::Mul(Box::new(
16220 crate::expressions::BinaryOp::new(
16221 interval_one,
16222 val,
16223 ),
16224 ));
16225 Ok(Expression::Add(Box::new(
16226 crate::expressions::BinaryOp::new(date, mul),
16227 )))
16228 }
16229 }
16230 _ => {
16231 // Default: keep as DATE_ADD(date, interval)
16232 Ok(Expression::Function(Box::new(Function::new(
16233 "DATE_ADD".to_string(),
16234 vec![date, interval_expr],
16235 ))))
16236 }
16237 }
16238 }
16239 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
16240 "DATE_SUB"
16241 if f.args.len() == 2
16242 && matches!(
16243 source,
16244 DialectType::Hive
16245 | DialectType::Spark
16246 | DialectType::Databricks
16247 ) =>
16248 {
16249 let mut args = f.args;
16250 let date = args.remove(0);
16251 let days = args.remove(0);
16252 // Helper to create days * -1
16253 let make_neg_days = |d: Expression| -> Expression {
16254 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16255 d,
16256 Expression::Literal(Box::new(Literal::Number(
16257 "-1".to_string(),
16258 ))),
16259 )))
16260 };
16261 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
16262 match target {
16263 DialectType::Hive
16264 | DialectType::Spark
16265 | DialectType::Databricks => {
16266 // Keep as DATE_SUB(date, days) for Hive/Spark
16267 Ok(Expression::Function(Box::new(Function::new(
16268 "DATE_SUB".to_string(),
16269 vec![date, days],
16270 ))))
16271 }
16272 DialectType::DuckDB => {
16273 let cast_date = Self::ensure_cast_date(date);
16274 let neg = make_neg_days(days);
16275 let interval = Expression::Interval(Box::new(
16276 crate::expressions::Interval {
16277 this: Some(Expression::Paren(Box::new(
16278 crate::expressions::Paren {
16279 this: neg,
16280 trailing_comments: vec![],
16281 },
16282 ))),
16283 unit: Some(
16284 crate::expressions::IntervalUnitSpec::Simple {
16285 unit: crate::expressions::IntervalUnit::Day,
16286 use_plural: false,
16287 },
16288 ),
16289 },
16290 ));
16291 Ok(Expression::Add(Box::new(
16292 crate::expressions::BinaryOp::new(cast_date, interval),
16293 )))
16294 }
16295 DialectType::Snowflake => {
16296 let cast_date = if is_string_literal {
16297 Self::double_cast_timestamp_date(date)
16298 } else {
16299 date
16300 };
16301 let neg = make_neg_days(days);
16302 Ok(Expression::Function(Box::new(Function::new(
16303 "DATEADD".to_string(),
16304 vec![
16305 Expression::Identifier(Identifier::new("DAY")),
16306 neg,
16307 cast_date,
16308 ],
16309 ))))
16310 }
16311 DialectType::Redshift => {
16312 let neg = make_neg_days(days);
16313 Ok(Expression::Function(Box::new(Function::new(
16314 "DATEADD".to_string(),
16315 vec![
16316 Expression::Identifier(Identifier::new("DAY")),
16317 neg,
16318 date,
16319 ],
16320 ))))
16321 }
16322 DialectType::TSQL | DialectType::Fabric => {
16323 let cast_date = if is_string_literal {
16324 Self::double_cast_datetime2_date(date)
16325 } else {
16326 date
16327 };
16328 let neg = make_neg_days(days);
16329 Ok(Expression::Function(Box::new(Function::new(
16330 "DATEADD".to_string(),
16331 vec![
16332 Expression::Identifier(Identifier::new("DAY")),
16333 neg,
16334 cast_date,
16335 ],
16336 ))))
16337 }
16338 DialectType::Presto
16339 | DialectType::Trino
16340 | DialectType::Athena => {
16341 let cast_date = if is_string_literal {
16342 Self::double_cast_timestamp_date(date)
16343 } else {
16344 date
16345 };
16346 let neg = make_neg_days(days);
16347 Ok(Expression::Function(Box::new(Function::new(
16348 "DATE_ADD".to_string(),
16349 vec![Expression::string("DAY"), neg, cast_date],
16350 ))))
16351 }
16352 DialectType::BigQuery => {
16353 let cast_date = if is_string_literal {
16354 Self::double_cast_datetime_date(date)
16355 } else {
16356 date
16357 };
16358 let neg = make_neg_days(days);
16359 let interval = Expression::Interval(Box::new(
16360 crate::expressions::Interval {
16361 this: Some(Expression::Paren(Box::new(
16362 crate::expressions::Paren {
16363 this: neg,
16364 trailing_comments: vec![],
16365 },
16366 ))),
16367 unit: Some(
16368 crate::expressions::IntervalUnitSpec::Simple {
16369 unit: crate::expressions::IntervalUnit::Day,
16370 use_plural: false,
16371 },
16372 ),
16373 },
16374 ));
16375 Ok(Expression::Function(Box::new(Function::new(
16376 "DATE_ADD".to_string(),
16377 vec![cast_date, interval],
16378 ))))
16379 }
16380 _ => Ok(Expression::Function(Box::new(Function::new(
16381 "DATE_SUB".to_string(),
16382 vec![date, days],
16383 )))),
16384 }
16385 }
16386 // ADD_MONTHS(date, val) -> target-specific
16387 "ADD_MONTHS" if f.args.len() == 2 => {
16388 let mut args = f.args;
16389 let date = args.remove(0);
16390 let val = args.remove(0);
16391 match target {
16392 DialectType::TSQL => {
16393 let cast_date = Self::ensure_cast_datetime2(date);
16394 Ok(Expression::Function(Box::new(Function::new(
16395 "DATEADD".to_string(),
16396 vec![
16397 Expression::Identifier(Identifier::new("MONTH")),
16398 val,
16399 cast_date,
16400 ],
16401 ))))
16402 }
16403 DialectType::DuckDB => {
16404 let interval = Expression::Interval(Box::new(
16405 crate::expressions::Interval {
16406 this: Some(val),
16407 unit: Some(
16408 crate::expressions::IntervalUnitSpec::Simple {
16409 unit:
16410 crate::expressions::IntervalUnit::Month,
16411 use_plural: false,
16412 },
16413 ),
16414 },
16415 ));
16416 Ok(Expression::Add(Box::new(
16417 crate::expressions::BinaryOp::new(date, interval),
16418 )))
16419 }
16420 DialectType::Snowflake => {
16421 // Keep ADD_MONTHS when source is Snowflake
16422 if matches!(source, DialectType::Snowflake) {
16423 Ok(Expression::Function(Box::new(Function::new(
16424 "ADD_MONTHS".to_string(),
16425 vec![date, val],
16426 ))))
16427 } else {
16428 Ok(Expression::Function(Box::new(Function::new(
16429 "DATEADD".to_string(),
16430 vec![
16431 Expression::Identifier(Identifier::new(
16432 "MONTH",
16433 )),
16434 val,
16435 date,
16436 ],
16437 ))))
16438 }
16439 }
16440 DialectType::Redshift => {
16441 Ok(Expression::Function(Box::new(Function::new(
16442 "DATEADD".to_string(),
16443 vec![
16444 Expression::Identifier(Identifier::new("MONTH")),
16445 val,
16446 date,
16447 ],
16448 ))))
16449 }
16450 DialectType::Presto
16451 | DialectType::Trino
16452 | DialectType::Athena => {
16453 Ok(Expression::Function(Box::new(Function::new(
16454 "DATE_ADD".to_string(),
16455 vec![Expression::string("MONTH"), val, date],
16456 ))))
16457 }
16458 DialectType::BigQuery => {
16459 let interval = Expression::Interval(Box::new(
16460 crate::expressions::Interval {
16461 this: Some(val),
16462 unit: Some(
16463 crate::expressions::IntervalUnitSpec::Simple {
16464 unit:
16465 crate::expressions::IntervalUnit::Month,
16466 use_plural: false,
16467 },
16468 ),
16469 },
16470 ));
16471 Ok(Expression::Function(Box::new(Function::new(
16472 "DATE_ADD".to_string(),
16473 vec![date, interval],
16474 ))))
16475 }
16476 _ => Ok(Expression::Function(Box::new(Function::new(
16477 "ADD_MONTHS".to_string(),
16478 vec![date, val],
16479 )))),
16480 }
16481 }
16482 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
16483 "DATETRUNC" if f.args.len() == 2 => {
16484 let mut args = f.args;
16485 let arg0 = args.remove(0);
16486 let arg1 = args.remove(0);
16487 let unit_str = Self::get_unit_str_static(&arg0);
16488 match target {
16489 DialectType::TSQL | DialectType::Fabric => {
16490 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
16491 Ok(Expression::Function(Box::new(Function::new(
16492 "DATETRUNC".to_string(),
16493 vec![
16494 Expression::Identifier(Identifier::new(&unit_str)),
16495 arg1,
16496 ],
16497 ))))
16498 }
16499 DialectType::DuckDB => {
16500 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
16501 let date = Self::ensure_cast_timestamp(arg1);
16502 Ok(Expression::Function(Box::new(Function::new(
16503 "DATE_TRUNC".to_string(),
16504 vec![Expression::string(&unit_str), date],
16505 ))))
16506 }
16507 DialectType::ClickHouse => {
16508 // ClickHouse: dateTrunc('UNIT', expr)
16509 Ok(Expression::Function(Box::new(Function::new(
16510 "dateTrunc".to_string(),
16511 vec![Expression::string(&unit_str), arg1],
16512 ))))
16513 }
16514 _ => {
16515 // Standard: DATE_TRUNC('UNIT', expr)
16516 let unit = Expression::string(&unit_str);
16517 Ok(Expression::Function(Box::new(Function::new(
16518 "DATE_TRUNC".to_string(),
16519 vec![unit, arg1],
16520 ))))
16521 }
16522 }
16523 }
16524 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
16525 "GETDATE" if f.args.is_empty() => match target {
16526 DialectType::TSQL => Ok(Expression::Function(f)),
16527 DialectType::Redshift => Ok(Expression::Function(Box::new(
16528 Function::new("GETDATE".to_string(), vec![]),
16529 ))),
16530 _ => Ok(Expression::CurrentTimestamp(
16531 crate::expressions::CurrentTimestamp {
16532 precision: None,
16533 sysdate: false,
16534 },
16535 )),
16536 },
16537 // TO_HEX(x) / HEX(x) -> target-specific hex function
16538 "TO_HEX" | "HEX" if f.args.len() == 1 => {
16539 let name = match target {
16540 DialectType::Presto | DialectType::Trino => "TO_HEX",
16541 DialectType::Spark
16542 | DialectType::Databricks
16543 | DialectType::Hive => "HEX",
16544 DialectType::DuckDB
16545 | DialectType::PostgreSQL
16546 | DialectType::Redshift => "TO_HEX",
16547 _ => &f.name,
16548 };
16549 Ok(Expression::Function(Box::new(Function::new(
16550 name.to_string(),
16551 f.args,
16552 ))))
16553 }
16554 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
16555 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
16556 match target {
16557 DialectType::BigQuery => {
16558 // BigQuery: UNHEX(x) -> FROM_HEX(x)
16559 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
16560 // because BigQuery MD5 returns BYTES, not hex string
16561 let arg = &f.args[0];
16562 let wrapped_arg = match arg {
16563 Expression::Function(inner_f)
16564 if inner_f.name.eq_ignore_ascii_case("MD5")
16565 || inner_f
16566 .name
16567 .eq_ignore_ascii_case("SHA1")
16568 || inner_f
16569 .name
16570 .eq_ignore_ascii_case("SHA256")
16571 || inner_f
16572 .name
16573 .eq_ignore_ascii_case("SHA512") =>
16574 {
16575 // Wrap hash function in TO_HEX for BigQuery
16576 Expression::Function(Box::new(Function::new(
16577 "TO_HEX".to_string(),
16578 vec![arg.clone()],
16579 )))
16580 }
16581 _ => f.args.into_iter().next().unwrap(),
16582 };
16583 Ok(Expression::Function(Box::new(Function::new(
16584 "FROM_HEX".to_string(),
16585 vec![wrapped_arg],
16586 ))))
16587 }
16588 _ => {
16589 let name = match target {
16590 DialectType::Presto | DialectType::Trino => "FROM_HEX",
16591 DialectType::Spark
16592 | DialectType::Databricks
16593 | DialectType::Hive => "UNHEX",
16594 _ => &f.name,
16595 };
16596 Ok(Expression::Function(Box::new(Function::new(
16597 name.to_string(),
16598 f.args,
16599 ))))
16600 }
16601 }
16602 }
16603 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
16604 "TO_UTF8" if f.args.len() == 1 => match target {
16605 DialectType::Spark | DialectType::Databricks => {
16606 let mut args = f.args;
16607 args.push(Expression::string("utf-8"));
16608 Ok(Expression::Function(Box::new(Function::new(
16609 "ENCODE".to_string(),
16610 args,
16611 ))))
16612 }
16613 _ => Ok(Expression::Function(f)),
16614 },
16615 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
16616 "FROM_UTF8" if f.args.len() == 1 => match target {
16617 DialectType::Spark | DialectType::Databricks => {
16618 let mut args = f.args;
16619 args.push(Expression::string("utf-8"));
16620 Ok(Expression::Function(Box::new(Function::new(
16621 "DECODE".to_string(),
16622 args,
16623 ))))
16624 }
16625 _ => Ok(Expression::Function(f)),
16626 },
16627 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
16628 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
16629 let name = match target {
16630 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
16631 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
16632 DialectType::PostgreSQL | DialectType::Redshift => {
16633 "STARTS_WITH"
16634 }
16635 _ => &f.name,
16636 };
16637 Ok(Expression::Function(Box::new(Function::new(
16638 name.to_string(),
16639 f.args,
16640 ))))
16641 }
16642 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
16643 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
16644 let name = match target {
16645 DialectType::Presto
16646 | DialectType::Trino
16647 | DialectType::Athena => "APPROX_DISTINCT",
16648 _ => "APPROX_COUNT_DISTINCT",
16649 };
16650 Ok(Expression::Function(Box::new(Function::new(
16651 name.to_string(),
16652 f.args,
16653 ))))
16654 }
16655 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
16656 "JSON_EXTRACT"
16657 if f.args.len() == 2
16658 && !matches!(source, DialectType::BigQuery)
16659 && matches!(
16660 target,
16661 DialectType::Spark
16662 | DialectType::Databricks
16663 | DialectType::Hive
16664 ) =>
16665 {
16666 Ok(Expression::Function(Box::new(Function::new(
16667 "GET_JSON_OBJECT".to_string(),
16668 f.args,
16669 ))))
16670 }
16671 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
16672 "JSON_EXTRACT"
16673 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
16674 {
16675 let mut args = f.args;
16676 let path = args.remove(1);
16677 let this = args.remove(0);
16678 Ok(Expression::JsonExtract(Box::new(
16679 crate::expressions::JsonExtractFunc {
16680 this,
16681 path,
16682 returning: None,
16683 arrow_syntax: true,
16684 hash_arrow_syntax: false,
16685 wrapper_option: None,
16686 quotes_option: None,
16687 on_scalar_string: false,
16688 on_error: None,
16689 },
16690 )))
16691 }
16692 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
16693 "JSON_FORMAT" if f.args.len() == 1 => {
16694 match target {
16695 DialectType::Spark | DialectType::Databricks => {
16696 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
16697 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
16698 if matches!(
16699 source,
16700 DialectType::Presto
16701 | DialectType::Trino
16702 | DialectType::Athena
16703 ) {
16704 if let Some(Expression::ParseJson(pj)) = f.args.first()
16705 {
16706 if let Expression::Literal(lit) = &pj.this {
16707 if let Literal::String(s) = lit.as_ref() {
16708 let wrapped =
16709 Expression::Literal(Box::new(
16710 Literal::String(format!("[{}]", s)),
16711 ));
16712 let schema_of_json = Expression::Function(
16713 Box::new(Function::new(
16714 "SCHEMA_OF_JSON".to_string(),
16715 vec![wrapped.clone()],
16716 )),
16717 );
16718 let from_json = Expression::Function(
16719 Box::new(Function::new(
16720 "FROM_JSON".to_string(),
16721 vec![wrapped, schema_of_json],
16722 )),
16723 );
16724 let to_json = Expression::Function(
16725 Box::new(Function::new(
16726 "TO_JSON".to_string(),
16727 vec![from_json],
16728 )),
16729 );
16730 return Ok(Expression::Function(Box::new(
16731 Function::new(
16732 "REGEXP_EXTRACT".to_string(),
16733 vec![
16734 to_json,
16735 Expression::Literal(Box::new(
16736 Literal::String(
16737 "^.(.*).$".to_string(),
16738 ),
16739 )),
16740 Expression::Literal(Box::new(
16741 Literal::Number(
16742 "1".to_string(),
16743 ),
16744 )),
16745 ],
16746 ),
16747 )));
16748 }
16749 }
16750 }
16751 }
16752
16753 // Strip inner CAST(... AS JSON) or TO_JSON() if present
16754 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
16755 let mut args = f.args;
16756 if let Some(Expression::Cast(ref c)) = args.first() {
16757 if matches!(&c.to, DataType::Json | DataType::JsonB) {
16758 args = vec![c.this.clone()];
16759 }
16760 } else if let Some(Expression::Function(ref inner_f)) =
16761 args.first()
16762 {
16763 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
16764 && inner_f.args.len() == 1
16765 {
16766 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
16767 args = inner_f.args.clone();
16768 }
16769 }
16770 Ok(Expression::Function(Box::new(Function::new(
16771 "TO_JSON".to_string(),
16772 args,
16773 ))))
16774 }
16775 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16776 Function::new("TO_JSON_STRING".to_string(), f.args),
16777 ))),
16778 DialectType::DuckDB => {
16779 // CAST(TO_JSON(x) AS TEXT)
16780 let to_json = Expression::Function(Box::new(
16781 Function::new("TO_JSON".to_string(), f.args),
16782 ));
16783 Ok(Expression::Cast(Box::new(Cast {
16784 this: to_json,
16785 to: DataType::Text,
16786 trailing_comments: Vec::new(),
16787 double_colon_syntax: false,
16788 format: None,
16789 default: None,
16790 inferred_type: None,
16791 })))
16792 }
16793 _ => Ok(Expression::Function(f)),
16794 }
16795 }
16796 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
16797 "SYSDATE" if f.args.is_empty() => {
16798 match target {
16799 DialectType::Oracle | DialectType::Redshift => {
16800 Ok(Expression::Function(f))
16801 }
16802 DialectType::Snowflake => {
16803 // Snowflake uses SYSDATE() with parens
16804 let mut f = *f;
16805 f.no_parens = false;
16806 Ok(Expression::Function(Box::new(f)))
16807 }
16808 DialectType::DuckDB => {
16809 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
16810 Ok(Expression::AtTimeZone(Box::new(
16811 crate::expressions::AtTimeZone {
16812 this: Expression::CurrentTimestamp(
16813 crate::expressions::CurrentTimestamp {
16814 precision: None,
16815 sysdate: false,
16816 },
16817 ),
16818 zone: Expression::Literal(Box::new(
16819 Literal::String("UTC".to_string()),
16820 )),
16821 },
16822 )))
16823 }
16824 _ => Ok(Expression::CurrentTimestamp(
16825 crate::expressions::CurrentTimestamp {
16826 precision: None,
16827 sysdate: true,
16828 },
16829 )),
16830 }
16831 }
16832 // LOGICAL_OR(x) -> BOOL_OR(x)
16833 "LOGICAL_OR" if f.args.len() == 1 => {
16834 let name = match target {
16835 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16836 _ => &f.name,
16837 };
16838 Ok(Expression::Function(Box::new(Function::new(
16839 name.to_string(),
16840 f.args,
16841 ))))
16842 }
16843 // LOGICAL_AND(x) -> BOOL_AND(x)
16844 "LOGICAL_AND" if f.args.len() == 1 => {
16845 let name = match target {
16846 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
16847 _ => &f.name,
16848 };
16849 Ok(Expression::Function(Box::new(Function::new(
16850 name.to_string(),
16851 f.args,
16852 ))))
16853 }
16854 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
16855 "MONTHS_ADD" if f.args.len() == 2 => match target {
16856 DialectType::Oracle => Ok(Expression::Function(Box::new(
16857 Function::new("ADD_MONTHS".to_string(), f.args),
16858 ))),
16859 _ => Ok(Expression::Function(f)),
16860 },
16861 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
16862 "ARRAY_JOIN" if f.args.len() >= 2 => {
16863 match target {
16864 DialectType::Spark | DialectType::Databricks => {
16865 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
16866 Ok(Expression::Function(f))
16867 }
16868 DialectType::Hive => {
16869 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
16870 let mut args = f.args;
16871 let arr = args.remove(0);
16872 let sep = args.remove(0);
16873 // Drop any remaining args (null_replacement)
16874 Ok(Expression::Function(Box::new(Function::new(
16875 "CONCAT_WS".to_string(),
16876 vec![sep, arr],
16877 ))))
16878 }
16879 DialectType::Presto | DialectType::Trino => {
16880 Ok(Expression::Function(f))
16881 }
16882 _ => Ok(Expression::Function(f)),
16883 }
16884 }
16885 // LOCATE(substr, str, pos) 3-arg -> target-specific
16886 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
16887 "LOCATE"
16888 if f.args.len() == 3
16889 && matches!(
16890 target,
16891 DialectType::Presto
16892 | DialectType::Trino
16893 | DialectType::Athena
16894 | DialectType::DuckDB
16895 ) =>
16896 {
16897 let mut args = f.args;
16898 let substr = args.remove(0);
16899 let string = args.remove(0);
16900 let pos = args.remove(0);
16901 // STRPOS(SUBSTRING(string, pos), substr)
16902 let substring_call = Expression::Function(Box::new(Function::new(
16903 "SUBSTRING".to_string(),
16904 vec![string.clone(), pos.clone()],
16905 )));
16906 let strpos_call = Expression::Function(Box::new(Function::new(
16907 "STRPOS".to_string(),
16908 vec![substring_call, substr.clone()],
16909 )));
16910 // STRPOS(...) + pos - 1
16911 let pos_adjusted =
16912 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
16913 Expression::Add(Box::new(
16914 crate::expressions::BinaryOp::new(
16915 strpos_call.clone(),
16916 pos.clone(),
16917 ),
16918 )),
16919 Expression::number(1),
16920 )));
16921 // STRPOS(...) = 0
16922 let is_zero =
16923 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
16924 strpos_call.clone(),
16925 Expression::number(0),
16926 )));
16927
16928 match target {
16929 DialectType::Presto
16930 | DialectType::Trino
16931 | DialectType::Athena => {
16932 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
16933 Ok(Expression::Function(Box::new(Function::new(
16934 "IF".to_string(),
16935 vec![is_zero, Expression::number(0), pos_adjusted],
16936 ))))
16937 }
16938 DialectType::DuckDB => {
16939 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
16940 Ok(Expression::Case(Box::new(crate::expressions::Case {
16941 operand: None,
16942 whens: vec![(is_zero, Expression::number(0))],
16943 else_: Some(pos_adjusted),
16944 comments: Vec::new(),
16945 inferred_type: None,
16946 })))
16947 }
16948 _ => Ok(Expression::Function(Box::new(Function::new(
16949 "LOCATE".to_string(),
16950 vec![substr, string, pos],
16951 )))),
16952 }
16953 }
16954 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
16955 "STRPOS"
16956 if f.args.len() == 3
16957 && matches!(
16958 target,
16959 DialectType::BigQuery
16960 | DialectType::Oracle
16961 | DialectType::Teradata
16962 ) =>
16963 {
16964 let mut args = f.args;
16965 let haystack = args.remove(0);
16966 let needle = args.remove(0);
16967 let occurrence = args.remove(0);
16968 Ok(Expression::Function(Box::new(Function::new(
16969 "INSTR".to_string(),
16970 vec![haystack, needle, Expression::number(1), occurrence],
16971 ))))
16972 }
16973 // SCHEMA_NAME(id) -> target-specific
16974 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
16975 DialectType::MySQL | DialectType::SingleStore => {
16976 Ok(Expression::Function(Box::new(Function::new(
16977 "SCHEMA".to_string(),
16978 vec![],
16979 ))))
16980 }
16981 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
16982 crate::expressions::CurrentSchema { this: None },
16983 ))),
16984 DialectType::SQLite => Ok(Expression::string("main")),
16985 _ => Ok(Expression::Function(f)),
16986 },
16987 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
16988 "STRTOL" if f.args.len() == 2 => match target {
16989 DialectType::Presto | DialectType::Trino => {
16990 Ok(Expression::Function(Box::new(Function::new(
16991 "FROM_BASE".to_string(),
16992 f.args,
16993 ))))
16994 }
16995 _ => Ok(Expression::Function(f)),
16996 },
16997 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
16998 "EDITDIST3" if f.args.len() == 2 => match target {
16999 DialectType::Spark | DialectType::Databricks => {
17000 Ok(Expression::Function(Box::new(Function::new(
17001 "LEVENSHTEIN".to_string(),
17002 f.args,
17003 ))))
17004 }
17005 _ => Ok(Expression::Function(f)),
17006 },
17007 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
17008 "FORMAT"
17009 if f.args.len() == 2
17010 && matches!(
17011 source,
17012 DialectType::MySQL | DialectType::SingleStore
17013 )
17014 && matches!(target, DialectType::DuckDB) =>
17015 {
17016 let mut args = f.args;
17017 let num_expr = args.remove(0);
17018 let decimals_expr = args.remove(0);
17019 // Extract decimal count
17020 let dec_count = match &decimals_expr {
17021 Expression::Literal(lit)
17022 if matches!(lit.as_ref(), Literal::Number(_)) =>
17023 {
17024 let Literal::Number(n) = lit.as_ref() else {
17025 unreachable!()
17026 };
17027 n.clone()
17028 }
17029 _ => "0".to_string(),
17030 };
17031 let fmt_str = format!("{{:,.{}f}}", dec_count);
17032 Ok(Expression::Function(Box::new(Function::new(
17033 "FORMAT".to_string(),
17034 vec![Expression::string(&fmt_str), num_expr],
17035 ))))
17036 }
17037 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
17038 "FORMAT"
17039 if f.args.len() == 2
17040 && matches!(
17041 source,
17042 DialectType::TSQL | DialectType::Fabric
17043 ) =>
17044 {
17045 let val_expr = f.args[0].clone();
17046 let fmt_expr = f.args[1].clone();
17047 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
17048 // Only expand shortcodes that are NOT also valid numeric format specifiers.
17049 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
17050 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
17051 let (expanded_fmt, is_shortcode) = match &fmt_expr {
17052 Expression::Literal(lit)
17053 if matches!(
17054 lit.as_ref(),
17055 crate::expressions::Literal::String(_)
17056 ) =>
17057 {
17058 let crate::expressions::Literal::String(s) = lit.as_ref()
17059 else {
17060 unreachable!()
17061 };
17062 match s.as_str() {
17063 "m" | "M" => (Expression::string("MMMM d"), true),
17064 "t" => (Expression::string("h:mm tt"), true),
17065 "T" => (Expression::string("h:mm:ss tt"), true),
17066 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
17067 _ => (fmt_expr.clone(), false),
17068 }
17069 }
17070 _ => (fmt_expr.clone(), false),
17071 };
17072 // Check if the format looks like a date format
17073 let is_date_format = is_shortcode
17074 || match &expanded_fmt {
17075 Expression::Literal(lit)
17076 if matches!(
17077 lit.as_ref(),
17078 crate::expressions::Literal::String(_)
17079 ) =>
17080 {
17081 let crate::expressions::Literal::String(s) =
17082 lit.as_ref()
17083 else {
17084 unreachable!()
17085 };
17086 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
17087 s.contains("yyyy")
17088 || s.contains("YYYY")
17089 || s.contains("MM")
17090 || s.contains("dd")
17091 || s.contains("MMMM")
17092 || s.contains("HH")
17093 || s.contains("hh")
17094 || s.contains("ss")
17095 }
17096 _ => false,
17097 };
17098 match target {
17099 DialectType::Spark | DialectType::Databricks => {
17100 let func_name = if is_date_format {
17101 "DATE_FORMAT"
17102 } else {
17103 "FORMAT_NUMBER"
17104 };
17105 Ok(Expression::Function(Box::new(Function::new(
17106 func_name.to_string(),
17107 vec![val_expr, expanded_fmt],
17108 ))))
17109 }
17110 _ => {
17111 // For TSQL and other targets, expand shortcodes but keep FORMAT
17112 if is_shortcode {
17113 Ok(Expression::Function(Box::new(Function::new(
17114 "FORMAT".to_string(),
17115 vec![val_expr, expanded_fmt],
17116 ))))
17117 } else {
17118 Ok(Expression::Function(f))
17119 }
17120 }
17121 }
17122 }
17123 // FORMAT('%s', x) from Trino/Presto -> target-specific
17124 "FORMAT"
17125 if f.args.len() >= 2
17126 && matches!(
17127 source,
17128 DialectType::Trino
17129 | DialectType::Presto
17130 | DialectType::Athena
17131 ) =>
17132 {
17133 let fmt_expr = f.args[0].clone();
17134 let value_args: Vec<Expression> = f.args[1..].to_vec();
17135 match target {
17136 // DuckDB: replace %s with {} in format string
17137 DialectType::DuckDB => {
17138 let new_fmt = match &fmt_expr {
17139 Expression::Literal(lit)
17140 if matches!(lit.as_ref(), Literal::String(_)) =>
17141 {
17142 let Literal::String(s) = lit.as_ref() else {
17143 unreachable!()
17144 };
17145 Expression::Literal(Box::new(Literal::String(
17146 s.replace("%s", "{}"),
17147 )))
17148 }
17149 _ => fmt_expr,
17150 };
17151 let mut args = vec![new_fmt];
17152 args.extend(value_args);
17153 Ok(Expression::Function(Box::new(Function::new(
17154 "FORMAT".to_string(),
17155 args,
17156 ))))
17157 }
17158 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
17159 DialectType::Snowflake => match &fmt_expr {
17160 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
17161 {
17162 let Literal::String(_) = lit.as_ref() else {
17163 unreachable!()
17164 };
17165 Ok(Expression::Function(Box::new(Function::new(
17166 "TO_CHAR".to_string(),
17167 value_args,
17168 ))))
17169 }
17170 _ => Ok(Expression::Function(f)),
17171 },
17172 // Default: keep FORMAT as-is
17173 _ => Ok(Expression::Function(f)),
17174 }
17175 }
17176 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
17177 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
17178 if f.args.len() == 2 =>
17179 {
17180 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
17181 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
17182 if matches!(target, DialectType::DuckDB)
17183 && matches!(source, DialectType::Snowflake)
17184 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
17185 {
17186 let value = f.args[0].clone();
17187 let array = f.args[1].clone();
17188
17189 // value IS NULL
17190 let value_is_null =
17191 Expression::IsNull(Box::new(crate::expressions::IsNull {
17192 this: value.clone(),
17193 not: false,
17194 postfix_form: false,
17195 }));
17196
17197 // ARRAY_LENGTH(array)
17198 let array_length =
17199 Expression::Function(Box::new(Function::new(
17200 "ARRAY_LENGTH".to_string(),
17201 vec![array.clone()],
17202 )));
17203 // LIST_COUNT(array)
17204 let list_count = Expression::Function(Box::new(Function::new(
17205 "LIST_COUNT".to_string(),
17206 vec![array.clone()],
17207 )));
17208 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
17209 let neq =
17210 Expression::Neq(Box::new(crate::expressions::BinaryOp {
17211 left: array_length,
17212 right: list_count,
17213 left_comments: vec![],
17214 operator_comments: vec![],
17215 trailing_comments: vec![],
17216 inferred_type: None,
17217 }));
17218 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
17219 let nullif =
17220 Expression::Nullif(Box::new(crate::expressions::Nullif {
17221 this: Box::new(neq),
17222 expression: Box::new(Expression::Boolean(
17223 crate::expressions::BooleanLiteral { value: false },
17224 )),
17225 }));
17226
17227 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
17228 let array_contains =
17229 Expression::Function(Box::new(Function::new(
17230 "ARRAY_CONTAINS".to_string(),
17231 vec![array, value],
17232 )));
17233
17234 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
17235 return Ok(Expression::Case(Box::new(Case {
17236 operand: None,
17237 whens: vec![(value_is_null, nullif)],
17238 else_: Some(array_contains),
17239 comments: Vec::new(),
17240 inferred_type: None,
17241 })));
17242 }
17243 match target {
17244 DialectType::PostgreSQL | DialectType::Redshift => {
17245 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
17246 let arr = f.args[0].clone();
17247 let needle = f.args[1].clone();
17248 // Convert [] to ARRAY[] for PostgreSQL
17249 let pg_arr = match arr {
17250 Expression::Array(a) => Expression::ArrayFunc(
17251 Box::new(crate::expressions::ArrayConstructor {
17252 expressions: a.expressions,
17253 bracket_notation: false,
17254 use_list_keyword: false,
17255 }),
17256 ),
17257 _ => arr,
17258 };
17259 // needle = ANY(arr) using the Any quantified expression
17260 let any_expr = Expression::Any(Box::new(
17261 crate::expressions::QuantifiedExpr {
17262 this: needle.clone(),
17263 subquery: pg_arr,
17264 op: Some(crate::expressions::QuantifiedOp::Eq),
17265 },
17266 ));
17267 let coalesce = Expression::Coalesce(Box::new(
17268 crate::expressions::VarArgFunc {
17269 expressions: vec![
17270 any_expr,
17271 Expression::Boolean(
17272 crate::expressions::BooleanLiteral {
17273 value: false,
17274 },
17275 ),
17276 ],
17277 original_name: None,
17278 inferred_type: None,
17279 },
17280 ));
17281 let is_null_check = Expression::IsNull(Box::new(
17282 crate::expressions::IsNull {
17283 this: needle,
17284 not: false,
17285 postfix_form: false,
17286 },
17287 ));
17288 Ok(Expression::Case(Box::new(Case {
17289 operand: None,
17290 whens: vec![(
17291 is_null_check,
17292 Expression::Null(crate::expressions::Null),
17293 )],
17294 else_: Some(coalesce),
17295 comments: Vec::new(),
17296 inferred_type: None,
17297 })))
17298 }
17299 _ => Ok(Expression::Function(Box::new(Function::new(
17300 "ARRAY_CONTAINS".to_string(),
17301 f.args,
17302 )))),
17303 }
17304 }
17305 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
17306 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
17307 match target {
17308 DialectType::PostgreSQL | DialectType::Redshift => {
17309 // arr1 && arr2 with ARRAY[] syntax
17310 let mut args = f.args;
17311 let arr1 = args.remove(0);
17312 let arr2 = args.remove(0);
17313 let pg_arr1 = match arr1 {
17314 Expression::Array(a) => Expression::ArrayFunc(
17315 Box::new(crate::expressions::ArrayConstructor {
17316 expressions: a.expressions,
17317 bracket_notation: false,
17318 use_list_keyword: false,
17319 }),
17320 ),
17321 _ => arr1,
17322 };
17323 let pg_arr2 = match arr2 {
17324 Expression::Array(a) => Expression::ArrayFunc(
17325 Box::new(crate::expressions::ArrayConstructor {
17326 expressions: a.expressions,
17327 bracket_notation: false,
17328 use_list_keyword: false,
17329 }),
17330 ),
17331 _ => arr2,
17332 };
17333 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17334 pg_arr1, pg_arr2,
17335 ))))
17336 }
17337 DialectType::DuckDB => {
17338 // DuckDB: arr1 && arr2 (native support)
17339 let mut args = f.args;
17340 let arr1 = args.remove(0);
17341 let arr2 = args.remove(0);
17342 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17343 arr1, arr2,
17344 ))))
17345 }
17346 _ => Ok(Expression::Function(Box::new(Function::new(
17347 "LIST_HAS_ANY".to_string(),
17348 f.args,
17349 )))),
17350 }
17351 }
17352 // APPROX_QUANTILE(x, q) -> target-specific
17353 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
17354 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17355 Function::new("APPROX_PERCENTILE".to_string(), f.args),
17356 ))),
17357 DialectType::DuckDB => Ok(Expression::Function(f)),
17358 _ => Ok(Expression::Function(f)),
17359 },
17360 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
17361 "MAKE_DATE" if f.args.len() == 3 => match target {
17362 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17363 Function::new("DATE".to_string(), f.args),
17364 ))),
17365 _ => Ok(Expression::Function(f)),
17366 },
17367 // RANGE(start, end[, step]) -> target-specific
17368 "RANGE"
17369 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
17370 {
17371 let start = f.args[0].clone();
17372 let end = f.args[1].clone();
17373 let step = f.args.get(2).cloned();
17374 match target {
17375 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
17376 // so just rename without adjusting the end argument.
17377 DialectType::Snowflake => {
17378 let mut args = vec![start, end];
17379 if let Some(s) = step {
17380 args.push(s);
17381 }
17382 Ok(Expression::Function(Box::new(Function::new(
17383 "ARRAY_GENERATE_RANGE".to_string(),
17384 args,
17385 ))))
17386 }
17387 DialectType::Spark | DialectType::Databricks => {
17388 // RANGE(start, end) -> SEQUENCE(start, end-1)
17389 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
17390 // RANGE(start, start) -> ARRAY() (empty)
17391 // RANGE(start, end, 0) -> ARRAY() (empty)
17392 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
17393
17394 // Check for constant args
17395 fn extract_i64(e: &Expression) -> Option<i64> {
17396 match e {
17397 Expression::Literal(lit)
17398 if matches!(
17399 lit.as_ref(),
17400 Literal::Number(_)
17401 ) =>
17402 {
17403 let Literal::Number(n) = lit.as_ref() else {
17404 unreachable!()
17405 };
17406 n.parse::<i64>().ok()
17407 }
17408 Expression::Neg(u) => {
17409 if let Expression::Literal(lit) = &u.this {
17410 if let Literal::Number(n) = lit.as_ref() {
17411 n.parse::<i64>().ok().map(|v| -v)
17412 } else {
17413 None
17414 }
17415 } else {
17416 None
17417 }
17418 }
17419 _ => None,
17420 }
17421 }
17422 let start_val = extract_i64(&start);
17423 let end_val = extract_i64(&end);
17424 let step_val = step.as_ref().and_then(|s| extract_i64(s));
17425
17426 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
17427 if step_val == Some(0) {
17428 return Ok(Expression::Function(Box::new(
17429 Function::new("ARRAY".to_string(), vec![]),
17430 )));
17431 }
17432 if let (Some(s), Some(e_val)) = (start_val, end_val) {
17433 if s == e_val {
17434 return Ok(Expression::Function(Box::new(
17435 Function::new("ARRAY".to_string(), vec![]),
17436 )));
17437 }
17438 }
17439
17440 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
17441 // All constants - compute new end = end - step (if step provided) or end - 1
17442 match step_val {
17443 Some(st) if st < 0 => {
17444 // Negative step: SEQUENCE(start, end - step, step)
17445 let new_end = e_val - st; // end - step (= end + |step|)
17446 let mut args =
17447 vec![start, Expression::number(new_end)];
17448 if let Some(s) = step {
17449 args.push(s);
17450 }
17451 Ok(Expression::Function(Box::new(
17452 Function::new("SEQUENCE".to_string(), args),
17453 )))
17454 }
17455 Some(st) => {
17456 let new_end = e_val - st;
17457 let mut args =
17458 vec![start, Expression::number(new_end)];
17459 if let Some(s) = step {
17460 args.push(s);
17461 }
17462 Ok(Expression::Function(Box::new(
17463 Function::new("SEQUENCE".to_string(), args),
17464 )))
17465 }
17466 None => {
17467 // No step: SEQUENCE(start, end - 1)
17468 let new_end = e_val - 1;
17469 Ok(Expression::Function(Box::new(
17470 Function::new(
17471 "SEQUENCE".to_string(),
17472 vec![
17473 start,
17474 Expression::number(new_end),
17475 ],
17476 ),
17477 )))
17478 }
17479 }
17480 } else {
17481 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
17482 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
17483 end.clone(),
17484 Expression::number(1),
17485 )));
17486 let cond = Expression::Lt(Box::new(BinaryOp::new(
17487 Expression::Paren(Box::new(Paren {
17488 this: end_m1.clone(),
17489 trailing_comments: Vec::new(),
17490 })),
17491 start.clone(),
17492 )));
17493 let empty = Expression::Function(Box::new(
17494 Function::new("ARRAY".to_string(), vec![]),
17495 ));
17496 let mut seq_args = vec![
17497 start,
17498 Expression::Paren(Box::new(Paren {
17499 this: end_m1,
17500 trailing_comments: Vec::new(),
17501 })),
17502 ];
17503 if let Some(s) = step {
17504 seq_args.push(s);
17505 }
17506 let seq = Expression::Function(Box::new(
17507 Function::new("SEQUENCE".to_string(), seq_args),
17508 ));
17509 Ok(Expression::IfFunc(Box::new(
17510 crate::expressions::IfFunc {
17511 condition: cond,
17512 true_value: empty,
17513 false_value: Some(seq),
17514 original_name: None,
17515 inferred_type: None,
17516 },
17517 )))
17518 }
17519 }
17520 DialectType::SQLite => {
17521 // RANGE(start, end) -> GENERATE_SERIES(start, end)
17522 // The subquery wrapping is handled at the Alias level
17523 let mut args = vec![start, end];
17524 if let Some(s) = step {
17525 args.push(s);
17526 }
17527 Ok(Expression::Function(Box::new(Function::new(
17528 "GENERATE_SERIES".to_string(),
17529 args,
17530 ))))
17531 }
17532 _ => Ok(Expression::Function(f)),
17533 }
17534 }
17535 // ARRAY_REVERSE_SORT -> target-specific
17536 // (handled above as well, but also need DuckDB self-normalization)
17537 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17538 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
17539 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17540 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
17541 ))),
17542 DialectType::Spark | DialectType::Databricks => {
17543 Ok(Expression::Function(Box::new(Function::new(
17544 "MAP_FROM_ARRAYS".to_string(),
17545 f.args,
17546 ))))
17547 }
17548 _ => Ok(Expression::Function(Box::new(Function::new(
17549 "MAP".to_string(),
17550 f.args,
17551 )))),
17552 },
17553 // VARIANCE(x) -> varSamp(x) for ClickHouse
17554 "VARIANCE" if f.args.len() == 1 => match target {
17555 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17556 Function::new("varSamp".to_string(), f.args),
17557 ))),
17558 _ => Ok(Expression::Function(f)),
17559 },
17560 // STDDEV(x) -> stddevSamp(x) for ClickHouse
17561 "STDDEV" if f.args.len() == 1 => match target {
17562 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17563 Function::new("stddevSamp".to_string(), f.args),
17564 ))),
17565 _ => Ok(Expression::Function(f)),
17566 },
17567 // ISINF(x) -> IS_INF(x) for BigQuery
17568 "ISINF" if f.args.len() == 1 => match target {
17569 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17570 Function::new("IS_INF".to_string(), f.args),
17571 ))),
17572 _ => Ok(Expression::Function(f)),
17573 },
17574 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
17575 "CONTAINS" if f.args.len() == 2 => match target {
17576 DialectType::Spark
17577 | DialectType::Databricks
17578 | DialectType::Hive => Ok(Expression::Function(Box::new(
17579 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17580 ))),
17581 _ => Ok(Expression::Function(f)),
17582 },
17583 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
17584 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
17585 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17586 Ok(Expression::Function(Box::new(Function::new(
17587 "CONTAINS".to_string(),
17588 f.args,
17589 ))))
17590 }
17591 DialectType::DuckDB => Ok(Expression::Function(Box::new(
17592 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17593 ))),
17594 _ => Ok(Expression::Function(f)),
17595 },
17596 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
17597 "TO_UNIXTIME" if f.args.len() == 1 => match target {
17598 DialectType::Hive
17599 | DialectType::Spark
17600 | DialectType::Databricks => Ok(Expression::Function(Box::new(
17601 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
17602 ))),
17603 _ => Ok(Expression::Function(f)),
17604 },
17605 // FROM_UNIXTIME(x) -> target-specific
17606 "FROM_UNIXTIME" if f.args.len() == 1 => {
17607 match target {
17608 DialectType::Hive
17609 | DialectType::Spark
17610 | DialectType::Databricks
17611 | DialectType::Presto
17612 | DialectType::Trino => Ok(Expression::Function(f)),
17613 DialectType::DuckDB => {
17614 // DuckDB: TO_TIMESTAMP(x)
17615 let arg = f.args.into_iter().next().unwrap();
17616 Ok(Expression::Function(Box::new(Function::new(
17617 "TO_TIMESTAMP".to_string(),
17618 vec![arg],
17619 ))))
17620 }
17621 DialectType::PostgreSQL => {
17622 // PG: TO_TIMESTAMP(col)
17623 let arg = f.args.into_iter().next().unwrap();
17624 Ok(Expression::Function(Box::new(Function::new(
17625 "TO_TIMESTAMP".to_string(),
17626 vec![arg],
17627 ))))
17628 }
17629 DialectType::Redshift => {
17630 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
17631 let arg = f.args.into_iter().next().unwrap();
17632 let epoch_ts = Expression::Literal(Box::new(
17633 Literal::Timestamp("epoch".to_string()),
17634 ));
17635 let interval = Expression::Interval(Box::new(
17636 crate::expressions::Interval {
17637 this: Some(Expression::string("1 SECOND")),
17638 unit: None,
17639 },
17640 ));
17641 let mul =
17642 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
17643 let add =
17644 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
17645 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17646 this: add,
17647 trailing_comments: Vec::new(),
17648 })))
17649 }
17650 _ => Ok(Expression::Function(f)),
17651 }
17652 }
17653 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
17654 "FROM_UNIXTIME"
17655 if f.args.len() == 2
17656 && matches!(
17657 source,
17658 DialectType::Hive
17659 | DialectType::Spark
17660 | DialectType::Databricks
17661 ) =>
17662 {
17663 let mut args = f.args;
17664 let unix_ts = args.remove(0);
17665 let fmt_expr = args.remove(0);
17666 match target {
17667 DialectType::DuckDB => {
17668 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
17669 let to_ts = Expression::Function(Box::new(Function::new(
17670 "TO_TIMESTAMP".to_string(),
17671 vec![unix_ts],
17672 )));
17673 if let Expression::Literal(lit) = &fmt_expr {
17674 if let crate::expressions::Literal::String(s) =
17675 lit.as_ref()
17676 {
17677 let c_fmt = Self::hive_format_to_c_format(s);
17678 Ok(Expression::Function(Box::new(Function::new(
17679 "STRFTIME".to_string(),
17680 vec![to_ts, Expression::string(&c_fmt)],
17681 ))))
17682 } else {
17683 Ok(Expression::Function(Box::new(Function::new(
17684 "STRFTIME".to_string(),
17685 vec![to_ts, fmt_expr],
17686 ))))
17687 }
17688 } else {
17689 Ok(Expression::Function(Box::new(Function::new(
17690 "STRFTIME".to_string(),
17691 vec![to_ts, fmt_expr],
17692 ))))
17693 }
17694 }
17695 DialectType::Presto
17696 | DialectType::Trino
17697 | DialectType::Athena => {
17698 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
17699 let from_unix =
17700 Expression::Function(Box::new(Function::new(
17701 "FROM_UNIXTIME".to_string(),
17702 vec![unix_ts],
17703 )));
17704 if let Expression::Literal(lit) = &fmt_expr {
17705 if let crate::expressions::Literal::String(s) =
17706 lit.as_ref()
17707 {
17708 let p_fmt = Self::hive_format_to_presto_format(s);
17709 Ok(Expression::Function(Box::new(Function::new(
17710 "DATE_FORMAT".to_string(),
17711 vec![from_unix, Expression::string(&p_fmt)],
17712 ))))
17713 } else {
17714 Ok(Expression::Function(Box::new(Function::new(
17715 "DATE_FORMAT".to_string(),
17716 vec![from_unix, fmt_expr],
17717 ))))
17718 }
17719 } else {
17720 Ok(Expression::Function(Box::new(Function::new(
17721 "DATE_FORMAT".to_string(),
17722 vec![from_unix, fmt_expr],
17723 ))))
17724 }
17725 }
17726 _ => {
17727 // Keep as FROM_UNIXTIME(x, fmt) for other targets
17728 Ok(Expression::Function(Box::new(Function::new(
17729 "FROM_UNIXTIME".to_string(),
17730 vec![unix_ts, fmt_expr],
17731 ))))
17732 }
17733 }
17734 }
17735 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
17736 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
17737 let unit_str = Self::get_unit_str_static(&f.args[0]);
17738 // Get the raw unit text preserving original case
17739 let raw_unit = match &f.args[0] {
17740 Expression::Identifier(id) => id.name.clone(),
17741 Expression::Var(v) => v.this.clone(),
17742 Expression::Literal(lit)
17743 if matches!(
17744 lit.as_ref(),
17745 crate::expressions::Literal::String(_)
17746 ) =>
17747 {
17748 let crate::expressions::Literal::String(s) = lit.as_ref()
17749 else {
17750 unreachable!()
17751 };
17752 s.clone()
17753 }
17754 Expression::Column(col) => col.name.name.clone(),
17755 _ => unit_str.clone(),
17756 };
17757 match target {
17758 DialectType::TSQL | DialectType::Fabric => {
17759 // Preserve original case of unit for TSQL
17760 let unit_name = match unit_str.as_str() {
17761 "YY" | "YYYY" => "YEAR".to_string(),
17762 "QQ" | "Q" => "QUARTER".to_string(),
17763 "MM" | "M" => "MONTH".to_string(),
17764 "WK" | "WW" => "WEEK".to_string(),
17765 "DD" | "D" | "DY" => "DAY".to_string(),
17766 "HH" => "HOUR".to_string(),
17767 "MI" | "N" => "MINUTE".to_string(),
17768 "SS" | "S" => "SECOND".to_string(),
17769 _ => raw_unit.clone(), // preserve original case
17770 };
17771 let mut args = f.args;
17772 args[0] =
17773 Expression::Identifier(Identifier::new(&unit_name));
17774 Ok(Expression::Function(Box::new(Function::new(
17775 "DATEPART".to_string(),
17776 args,
17777 ))))
17778 }
17779 DialectType::Spark | DialectType::Databricks => {
17780 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
17781 // Preserve original case for non-abbreviation units
17782 let unit = match unit_str.as_str() {
17783 "YY" | "YYYY" => "YEAR".to_string(),
17784 "QQ" | "Q" => "QUARTER".to_string(),
17785 "MM" | "M" => "MONTH".to_string(),
17786 "WK" | "WW" => "WEEK".to_string(),
17787 "DD" | "D" | "DY" => "DAY".to_string(),
17788 "HH" => "HOUR".to_string(),
17789 "MI" | "N" => "MINUTE".to_string(),
17790 "SS" | "S" => "SECOND".to_string(),
17791 _ => raw_unit, // preserve original case
17792 };
17793 Ok(Expression::Extract(Box::new(
17794 crate::expressions::ExtractFunc {
17795 this: f.args[1].clone(),
17796 field: crate::expressions::DateTimeField::Custom(
17797 unit,
17798 ),
17799 },
17800 )))
17801 }
17802 _ => Ok(Expression::Function(Box::new(Function::new(
17803 "DATE_PART".to_string(),
17804 f.args,
17805 )))),
17806 }
17807 }
17808 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
17809 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
17810 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
17811 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
17812 "DATENAME" if f.args.len() == 2 => {
17813 let unit_str = Self::get_unit_str_static(&f.args[0]);
17814 let date_expr = f.args[1].clone();
17815 match unit_str.as_str() {
17816 "MM" | "M" | "MONTH" => match target {
17817 DialectType::TSQL => {
17818 let cast_date = Expression::Cast(Box::new(
17819 crate::expressions::Cast {
17820 this: date_expr,
17821 to: DataType::Custom {
17822 name: "DATETIME2".to_string(),
17823 },
17824 trailing_comments: Vec::new(),
17825 double_colon_syntax: false,
17826 format: None,
17827 default: None,
17828 inferred_type: None,
17829 },
17830 ));
17831 Ok(Expression::Function(Box::new(Function::new(
17832 "FORMAT".to_string(),
17833 vec![cast_date, Expression::string("MMMM")],
17834 ))))
17835 }
17836 DialectType::Spark | DialectType::Databricks => {
17837 let cast_date = Expression::Cast(Box::new(
17838 crate::expressions::Cast {
17839 this: date_expr,
17840 to: DataType::Timestamp {
17841 timezone: false,
17842 precision: None,
17843 },
17844 trailing_comments: Vec::new(),
17845 double_colon_syntax: false,
17846 format: None,
17847 default: None,
17848 inferred_type: None,
17849 },
17850 ));
17851 Ok(Expression::Function(Box::new(Function::new(
17852 "DATE_FORMAT".to_string(),
17853 vec![cast_date, Expression::string("MMMM")],
17854 ))))
17855 }
17856 _ => Ok(Expression::Function(f)),
17857 },
17858 "DW" | "WEEKDAY" => match target {
17859 DialectType::TSQL => {
17860 let cast_date = Expression::Cast(Box::new(
17861 crate::expressions::Cast {
17862 this: date_expr,
17863 to: DataType::Custom {
17864 name: "DATETIME2".to_string(),
17865 },
17866 trailing_comments: Vec::new(),
17867 double_colon_syntax: false,
17868 format: None,
17869 default: None,
17870 inferred_type: None,
17871 },
17872 ));
17873 Ok(Expression::Function(Box::new(Function::new(
17874 "FORMAT".to_string(),
17875 vec![cast_date, Expression::string("dddd")],
17876 ))))
17877 }
17878 DialectType::Spark | DialectType::Databricks => {
17879 let cast_date = Expression::Cast(Box::new(
17880 crate::expressions::Cast {
17881 this: date_expr,
17882 to: DataType::Timestamp {
17883 timezone: false,
17884 precision: None,
17885 },
17886 trailing_comments: Vec::new(),
17887 double_colon_syntax: false,
17888 format: None,
17889 default: None,
17890 inferred_type: None,
17891 },
17892 ));
17893 Ok(Expression::Function(Box::new(Function::new(
17894 "DATE_FORMAT".to_string(),
17895 vec![cast_date, Expression::string("EEEE")],
17896 ))))
17897 }
17898 _ => Ok(Expression::Function(f)),
17899 },
17900 _ => Ok(Expression::Function(f)),
17901 }
17902 }
17903 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
17904 "STRING_AGG" if f.args.len() >= 2 => {
17905 let x = f.args[0].clone();
17906 let sep = f.args[1].clone();
17907 match target {
17908 DialectType::MySQL
17909 | DialectType::SingleStore
17910 | DialectType::Doris
17911 | DialectType::StarRocks => Ok(Expression::GroupConcat(
17912 Box::new(crate::expressions::GroupConcatFunc {
17913 this: x,
17914 separator: Some(sep),
17915 order_by: None,
17916 distinct: false,
17917 filter: None,
17918 inferred_type: None,
17919 }),
17920 )),
17921 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
17922 crate::expressions::GroupConcatFunc {
17923 this: x,
17924 separator: Some(sep),
17925 order_by: None,
17926 distinct: false,
17927 filter: None,
17928 inferred_type: None,
17929 },
17930 ))),
17931 DialectType::PostgreSQL | DialectType::Redshift => {
17932 Ok(Expression::StringAgg(Box::new(
17933 crate::expressions::StringAggFunc {
17934 this: x,
17935 separator: Some(sep),
17936 order_by: None,
17937 distinct: false,
17938 filter: None,
17939 limit: None,
17940 inferred_type: None,
17941 },
17942 )))
17943 }
17944 _ => Ok(Expression::Function(f)),
17945 }
17946 }
17947 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
17948 "JSON_ARRAYAGG" => match target {
17949 DialectType::PostgreSQL => {
17950 Ok(Expression::Function(Box::new(Function {
17951 name: "JSON_AGG".to_string(),
17952 ..(*f)
17953 })))
17954 }
17955 _ => Ok(Expression::Function(f)),
17956 },
17957 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
17958 "SCHEMA_NAME" => match target {
17959 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
17960 crate::expressions::CurrentSchema { this: None },
17961 ))),
17962 DialectType::SQLite => Ok(Expression::string("main")),
17963 _ => Ok(Expression::Function(f)),
17964 },
17965 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
17966 "TO_TIMESTAMP"
17967 if f.args.len() == 2
17968 && matches!(
17969 source,
17970 DialectType::Spark
17971 | DialectType::Databricks
17972 | DialectType::Hive
17973 )
17974 && matches!(target, DialectType::DuckDB) =>
17975 {
17976 let mut args = f.args;
17977 let val = args.remove(0);
17978 let fmt_expr = args.remove(0);
17979 if let Expression::Literal(ref lit) = fmt_expr {
17980 if let Literal::String(ref s) = lit.as_ref() {
17981 // Convert Java/Spark format to C strptime format
17982 fn java_to_c_fmt(fmt: &str) -> String {
17983 let result = fmt
17984 .replace("yyyy", "%Y")
17985 .replace("SSSSSS", "%f")
17986 .replace("EEEE", "%W")
17987 .replace("MM", "%m")
17988 .replace("dd", "%d")
17989 .replace("HH", "%H")
17990 .replace("mm", "%M")
17991 .replace("ss", "%S")
17992 .replace("yy", "%y");
17993 let mut out = String::new();
17994 let chars: Vec<char> = result.chars().collect();
17995 let mut i = 0;
17996 while i < chars.len() {
17997 if chars[i] == '%' && i + 1 < chars.len() {
17998 out.push(chars[i]);
17999 out.push(chars[i + 1]);
18000 i += 2;
18001 } else if chars[i] == 'z' {
18002 out.push_str("%Z");
18003 i += 1;
18004 } else if chars[i] == 'Z' {
18005 out.push_str("%z");
18006 i += 1;
18007 } else {
18008 out.push(chars[i]);
18009 i += 1;
18010 }
18011 }
18012 out
18013 }
18014 let c_fmt = java_to_c_fmt(s);
18015 Ok(Expression::Function(Box::new(Function::new(
18016 "STRPTIME".to_string(),
18017 vec![val, Expression::string(&c_fmt)],
18018 ))))
18019 } else {
18020 Ok(Expression::Function(Box::new(Function::new(
18021 "STRPTIME".to_string(),
18022 vec![val, fmt_expr],
18023 ))))
18024 }
18025 } else {
18026 Ok(Expression::Function(Box::new(Function::new(
18027 "STRPTIME".to_string(),
18028 vec![val, fmt_expr],
18029 ))))
18030 }
18031 }
18032 // TO_DATE(x) 1-arg from Doris: date conversion
18033 "TO_DATE"
18034 if f.args.len() == 1
18035 && matches!(
18036 source,
18037 DialectType::Doris | DialectType::StarRocks
18038 ) =>
18039 {
18040 let arg = f.args.into_iter().next().unwrap();
18041 match target {
18042 DialectType::Oracle
18043 | DialectType::DuckDB
18044 | DialectType::TSQL => {
18045 // CAST(x AS DATE)
18046 Ok(Expression::Cast(Box::new(Cast {
18047 this: arg,
18048 to: DataType::Date,
18049 double_colon_syntax: false,
18050 trailing_comments: vec![],
18051 format: None,
18052 default: None,
18053 inferred_type: None,
18054 })))
18055 }
18056 DialectType::MySQL | DialectType::SingleStore => {
18057 // DATE(x)
18058 Ok(Expression::Function(Box::new(Function::new(
18059 "DATE".to_string(),
18060 vec![arg],
18061 ))))
18062 }
18063 _ => {
18064 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
18065 Ok(Expression::Function(Box::new(Function::new(
18066 "TO_DATE".to_string(),
18067 vec![arg],
18068 ))))
18069 }
18070 }
18071 }
18072 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
18073 "TO_DATE"
18074 if f.args.len() == 1
18075 && matches!(
18076 source,
18077 DialectType::Spark
18078 | DialectType::Databricks
18079 | DialectType::Hive
18080 ) =>
18081 {
18082 let arg = f.args.into_iter().next().unwrap();
18083 match target {
18084 DialectType::DuckDB => {
18085 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
18086 Ok(Expression::TryCast(Box::new(Cast {
18087 this: arg,
18088 to: DataType::Date,
18089 double_colon_syntax: false,
18090 trailing_comments: vec![],
18091 format: None,
18092 default: None,
18093 inferred_type: None,
18094 })))
18095 }
18096 DialectType::Presto
18097 | DialectType::Trino
18098 | DialectType::Athena => {
18099 // CAST(CAST(x AS TIMESTAMP) AS DATE)
18100 Ok(Self::double_cast_timestamp_date(arg))
18101 }
18102 DialectType::Snowflake => {
18103 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
18104 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
18105 Ok(Expression::Function(Box::new(Function::new(
18106 "TRY_TO_DATE".to_string(),
18107 vec![arg, Expression::string("yyyy-mm-DD")],
18108 ))))
18109 }
18110 _ => {
18111 // Default: keep as TO_DATE(x)
18112 Ok(Expression::Function(Box::new(Function::new(
18113 "TO_DATE".to_string(),
18114 vec![arg],
18115 ))))
18116 }
18117 }
18118 }
18119 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
18120 "TO_DATE"
18121 if f.args.len() == 2
18122 && matches!(
18123 source,
18124 DialectType::Spark
18125 | DialectType::Databricks
18126 | DialectType::Hive
18127 ) =>
18128 {
18129 let mut args = f.args;
18130 let val = args.remove(0);
18131 let fmt_expr = args.remove(0);
18132 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
18133
18134 if is_default_format {
18135 // Default format: same as 1-arg form
18136 match target {
18137 DialectType::DuckDB => {
18138 Ok(Expression::TryCast(Box::new(Cast {
18139 this: val,
18140 to: DataType::Date,
18141 double_colon_syntax: false,
18142 trailing_comments: vec![],
18143 format: None,
18144 default: None,
18145 inferred_type: None,
18146 })))
18147 }
18148 DialectType::Presto
18149 | DialectType::Trino
18150 | DialectType::Athena => {
18151 Ok(Self::double_cast_timestamp_date(val))
18152 }
18153 DialectType::Snowflake => {
18154 // TRY_TO_DATE(x, format) with Snowflake format mapping
18155 let sf_fmt = "yyyy-MM-dd"
18156 .replace("yyyy", "yyyy")
18157 .replace("MM", "mm")
18158 .replace("dd", "DD");
18159 Ok(Expression::Function(Box::new(Function::new(
18160 "TRY_TO_DATE".to_string(),
18161 vec![val, Expression::string(&sf_fmt)],
18162 ))))
18163 }
18164 _ => Ok(Expression::Function(Box::new(Function::new(
18165 "TO_DATE".to_string(),
18166 vec![val],
18167 )))),
18168 }
18169 } else {
18170 // Non-default format: use format-based parsing
18171 if let Expression::Literal(ref lit) = fmt_expr {
18172 if let Literal::String(ref s) = lit.as_ref() {
18173 match target {
18174 DialectType::DuckDB => {
18175 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
18176 fn java_to_c_fmt_todate(fmt: &str) -> String {
18177 let result = fmt
18178 .replace("yyyy", "%Y")
18179 .replace("SSSSSS", "%f")
18180 .replace("EEEE", "%W")
18181 .replace("MM", "%m")
18182 .replace("dd", "%d")
18183 .replace("HH", "%H")
18184 .replace("mm", "%M")
18185 .replace("ss", "%S")
18186 .replace("yy", "%y");
18187 let mut out = String::new();
18188 let chars: Vec<char> =
18189 result.chars().collect();
18190 let mut i = 0;
18191 while i < chars.len() {
18192 if chars[i] == '%'
18193 && i + 1 < chars.len()
18194 {
18195 out.push(chars[i]);
18196 out.push(chars[i + 1]);
18197 i += 2;
18198 } else if chars[i] == 'z' {
18199 out.push_str("%Z");
18200 i += 1;
18201 } else if chars[i] == 'Z' {
18202 out.push_str("%z");
18203 i += 1;
18204 } else {
18205 out.push(chars[i]);
18206 i += 1;
18207 }
18208 }
18209 out
18210 }
18211 let c_fmt = java_to_c_fmt_todate(s);
18212 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
18213 let try_strptime = Expression::Function(
18214 Box::new(Function::new(
18215 "TRY_STRPTIME".to_string(),
18216 vec![val, Expression::string(&c_fmt)],
18217 )),
18218 );
18219 let cast_ts =
18220 Expression::Cast(Box::new(Cast {
18221 this: try_strptime,
18222 to: DataType::Timestamp {
18223 precision: None,
18224 timezone: false,
18225 },
18226 double_colon_syntax: false,
18227 trailing_comments: vec![],
18228 format: None,
18229 default: None,
18230 inferred_type: None,
18231 }));
18232 Ok(Expression::Cast(Box::new(Cast {
18233 this: cast_ts,
18234 to: DataType::Date,
18235 double_colon_syntax: false,
18236 trailing_comments: vec![],
18237 format: None,
18238 default: None,
18239 inferred_type: None,
18240 })))
18241 }
18242 DialectType::Presto
18243 | DialectType::Trino
18244 | DialectType::Athena => {
18245 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
18246 let p_fmt = s
18247 .replace("yyyy", "%Y")
18248 .replace("SSSSSS", "%f")
18249 .replace("MM", "%m")
18250 .replace("dd", "%d")
18251 .replace("HH", "%H")
18252 .replace("mm", "%M")
18253 .replace("ss", "%S")
18254 .replace("yy", "%y");
18255 let date_parse = Expression::Function(
18256 Box::new(Function::new(
18257 "DATE_PARSE".to_string(),
18258 vec![val, Expression::string(&p_fmt)],
18259 )),
18260 );
18261 Ok(Expression::Cast(Box::new(Cast {
18262 this: date_parse,
18263 to: DataType::Date,
18264 double_colon_syntax: false,
18265 trailing_comments: vec![],
18266 format: None,
18267 default: None,
18268 inferred_type: None,
18269 })))
18270 }
18271 DialectType::Snowflake => {
18272 // TRY_TO_DATE(x, snowflake_fmt)
18273 Ok(Expression::Function(Box::new(
18274 Function::new(
18275 "TRY_TO_DATE".to_string(),
18276 vec![val, Expression::string(s)],
18277 ),
18278 )))
18279 }
18280 _ => Ok(Expression::Function(Box::new(
18281 Function::new(
18282 "TO_DATE".to_string(),
18283 vec![val, fmt_expr],
18284 ),
18285 ))),
18286 }
18287 } else {
18288 Ok(Expression::Function(Box::new(Function::new(
18289 "TO_DATE".to_string(),
18290 vec![val, fmt_expr],
18291 ))))
18292 }
18293 } else {
18294 Ok(Expression::Function(Box::new(Function::new(
18295 "TO_DATE".to_string(),
18296 vec![val, fmt_expr],
18297 ))))
18298 }
18299 }
18300 }
18301 // TO_TIMESTAMP(x) 1-arg: epoch conversion
18302 "TO_TIMESTAMP"
18303 if f.args.len() == 1
18304 && matches!(source, DialectType::DuckDB)
18305 && matches!(
18306 target,
18307 DialectType::BigQuery
18308 | DialectType::Presto
18309 | DialectType::Trino
18310 | DialectType::Hive
18311 | DialectType::Spark
18312 | DialectType::Databricks
18313 | DialectType::Athena
18314 ) =>
18315 {
18316 let arg = f.args.into_iter().next().unwrap();
18317 let func_name = match target {
18318 DialectType::BigQuery => "TIMESTAMP_SECONDS",
18319 DialectType::Presto
18320 | DialectType::Trino
18321 | DialectType::Athena
18322 | DialectType::Hive
18323 | DialectType::Spark
18324 | DialectType::Databricks => "FROM_UNIXTIME",
18325 _ => "TO_TIMESTAMP",
18326 };
18327 Ok(Expression::Function(Box::new(Function::new(
18328 func_name.to_string(),
18329 vec![arg],
18330 ))))
18331 }
18332 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
18333 "CONCAT" if f.args.len() == 1 => {
18334 let arg = f.args.into_iter().next().unwrap();
18335 match target {
18336 DialectType::Presto
18337 | DialectType::Trino
18338 | DialectType::Athena => {
18339 // CONCAT(a) -> CAST(a AS VARCHAR)
18340 Ok(Expression::Cast(Box::new(Cast {
18341 this: arg,
18342 to: DataType::VarChar {
18343 length: None,
18344 parenthesized_length: false,
18345 },
18346 trailing_comments: vec![],
18347 double_colon_syntax: false,
18348 format: None,
18349 default: None,
18350 inferred_type: None,
18351 })))
18352 }
18353 DialectType::TSQL => {
18354 // CONCAT(a) -> a
18355 Ok(arg)
18356 }
18357 DialectType::DuckDB => {
18358 // Keep CONCAT(a) for DuckDB (native support)
18359 Ok(Expression::Function(Box::new(Function::new(
18360 "CONCAT".to_string(),
18361 vec![arg],
18362 ))))
18363 }
18364 DialectType::Spark | DialectType::Databricks => {
18365 let coalesced = Expression::Coalesce(Box::new(
18366 crate::expressions::VarArgFunc {
18367 expressions: vec![arg, Expression::string("")],
18368 original_name: None,
18369 inferred_type: None,
18370 },
18371 ));
18372 Ok(Expression::Function(Box::new(Function::new(
18373 "CONCAT".to_string(),
18374 vec![coalesced],
18375 ))))
18376 }
18377 _ => Ok(Expression::Function(Box::new(Function::new(
18378 "CONCAT".to_string(),
18379 vec![arg],
18380 )))),
18381 }
18382 }
18383 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
18384 "REGEXP_EXTRACT"
18385 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
18386 {
18387 // If group_index is 0, drop it
18388 let drop_group = match &f.args[2] {
18389 Expression::Literal(lit)
18390 if matches!(lit.as_ref(), Literal::Number(_)) =>
18391 {
18392 let Literal::Number(n) = lit.as_ref() else {
18393 unreachable!()
18394 };
18395 n == "0"
18396 }
18397 _ => false,
18398 };
18399 if drop_group {
18400 let mut args = f.args;
18401 args.truncate(2);
18402 Ok(Expression::Function(Box::new(Function::new(
18403 "REGEXP_EXTRACT".to_string(),
18404 args,
18405 ))))
18406 } else {
18407 Ok(Expression::Function(f))
18408 }
18409 }
18410 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
18411 "REGEXP_EXTRACT"
18412 if f.args.len() == 4
18413 && matches!(target, DialectType::Snowflake) =>
18414 {
18415 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
18416 let mut args = f.args;
18417 let this = args.remove(0);
18418 let pattern = args.remove(0);
18419 let group = args.remove(0);
18420 let flags = args.remove(0);
18421 Ok(Expression::Function(Box::new(Function::new(
18422 "REGEXP_SUBSTR".to_string(),
18423 vec![
18424 this,
18425 pattern,
18426 Expression::number(1),
18427 Expression::number(1),
18428 flags,
18429 group,
18430 ],
18431 ))))
18432 }
18433 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
18434 "REGEXP_SUBSTR"
18435 if f.args.len() == 3
18436 && matches!(
18437 target,
18438 DialectType::DuckDB
18439 | DialectType::Presto
18440 | DialectType::Trino
18441 | DialectType::Spark
18442 | DialectType::Databricks
18443 ) =>
18444 {
18445 let mut args = f.args;
18446 let this = args.remove(0);
18447 let pattern = args.remove(0);
18448 let position = args.remove(0);
18449 // Wrap subject in SUBSTRING(this, position) to apply the offset
18450 let substring_expr = Expression::Function(Box::new(Function::new(
18451 "SUBSTRING".to_string(),
18452 vec![this, position],
18453 )));
18454 let target_name = match target {
18455 DialectType::DuckDB => "REGEXP_EXTRACT",
18456 _ => "REGEXP_EXTRACT",
18457 };
18458 Ok(Expression::Function(Box::new(Function::new(
18459 target_name.to_string(),
18460 vec![substring_expr, pattern],
18461 ))))
18462 }
18463 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
18464 "TO_DAYS" if f.args.len() == 1 => {
18465 let x = f.args.into_iter().next().unwrap();
18466 let epoch = Expression::string("0000-01-01");
18467 // Build the final target-specific expression directly
18468 let datediff_expr = match target {
18469 DialectType::MySQL | DialectType::SingleStore => {
18470 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
18471 Expression::Function(Box::new(Function::new(
18472 "DATEDIFF".to_string(),
18473 vec![x, epoch],
18474 )))
18475 }
18476 DialectType::DuckDB => {
18477 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
18478 let cast_epoch = Expression::Cast(Box::new(Cast {
18479 this: epoch,
18480 to: DataType::Date,
18481 trailing_comments: Vec::new(),
18482 double_colon_syntax: false,
18483 format: None,
18484 default: None,
18485 inferred_type: None,
18486 }));
18487 let cast_x = Expression::Cast(Box::new(Cast {
18488 this: x,
18489 to: DataType::Date,
18490 trailing_comments: Vec::new(),
18491 double_colon_syntax: false,
18492 format: None,
18493 default: None,
18494 inferred_type: None,
18495 }));
18496 Expression::Function(Box::new(Function::new(
18497 "DATE_DIFF".to_string(),
18498 vec![Expression::string("DAY"), cast_epoch, cast_x],
18499 )))
18500 }
18501 DialectType::Presto
18502 | DialectType::Trino
18503 | DialectType::Athena => {
18504 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
18505 let cast_epoch = Self::double_cast_timestamp_date(epoch);
18506 let cast_x = Self::double_cast_timestamp_date(x);
18507 Expression::Function(Box::new(Function::new(
18508 "DATE_DIFF".to_string(),
18509 vec![Expression::string("DAY"), cast_epoch, cast_x],
18510 )))
18511 }
18512 _ => {
18513 // Default: (DATEDIFF(x, '0000-01-01') + 1)
18514 Expression::Function(Box::new(Function::new(
18515 "DATEDIFF".to_string(),
18516 vec![x, epoch],
18517 )))
18518 }
18519 };
18520 let add_one = Expression::Add(Box::new(BinaryOp::new(
18521 datediff_expr,
18522 Expression::number(1),
18523 )));
18524 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
18525 this: add_one,
18526 trailing_comments: Vec::new(),
18527 })))
18528 }
18529 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
18530 "STR_TO_DATE"
18531 if f.args.len() == 2
18532 && matches!(
18533 target,
18534 DialectType::Presto | DialectType::Trino
18535 ) =>
18536 {
18537 let mut args = f.args;
18538 let x = args.remove(0);
18539 let format_expr = args.remove(0);
18540 // Check if the format contains time components
18541 let has_time = if let Expression::Literal(ref lit) = format_expr {
18542 if let Literal::String(ref fmt) = lit.as_ref() {
18543 fmt.contains("%H")
18544 || fmt.contains("%T")
18545 || fmt.contains("%M")
18546 || fmt.contains("%S")
18547 || fmt.contains("%I")
18548 || fmt.contains("%p")
18549 } else {
18550 false
18551 }
18552 } else {
18553 false
18554 };
18555 let date_parse = Expression::Function(Box::new(Function::new(
18556 "DATE_PARSE".to_string(),
18557 vec![x, format_expr],
18558 )));
18559 if has_time {
18560 // Has time components: just DATE_PARSE
18561 Ok(date_parse)
18562 } else {
18563 // Date-only: CAST(DATE_PARSE(...) AS DATE)
18564 Ok(Expression::Cast(Box::new(Cast {
18565 this: date_parse,
18566 to: DataType::Date,
18567 trailing_comments: Vec::new(),
18568 double_colon_syntax: false,
18569 format: None,
18570 default: None,
18571 inferred_type: None,
18572 })))
18573 }
18574 }
18575 "STR_TO_DATE"
18576 if f.args.len() == 2
18577 && matches!(
18578 target,
18579 DialectType::PostgreSQL | DialectType::Redshift
18580 ) =>
18581 {
18582 let mut args = f.args;
18583 let x = args.remove(0);
18584 let fmt = args.remove(0);
18585 let pg_fmt = match fmt {
18586 Expression::Literal(lit)
18587 if matches!(lit.as_ref(), Literal::String(_)) =>
18588 {
18589 let Literal::String(s) = lit.as_ref() else {
18590 unreachable!()
18591 };
18592 Expression::string(
18593 &s.replace("%Y", "YYYY")
18594 .replace("%m", "MM")
18595 .replace("%d", "DD")
18596 .replace("%H", "HH24")
18597 .replace("%M", "MI")
18598 .replace("%S", "SS"),
18599 )
18600 }
18601 other => other,
18602 };
18603 let to_date = Expression::Function(Box::new(Function::new(
18604 "TO_DATE".to_string(),
18605 vec![x, pg_fmt],
18606 )));
18607 Ok(Expression::Cast(Box::new(Cast {
18608 this: to_date,
18609 to: DataType::Timestamp {
18610 timezone: false,
18611 precision: None,
18612 },
18613 trailing_comments: Vec::new(),
18614 double_colon_syntax: false,
18615 format: None,
18616 default: None,
18617 inferred_type: None,
18618 })))
18619 }
18620 // RANGE(start, end) -> GENERATE_SERIES for SQLite
18621 "RANGE"
18622 if (f.args.len() == 1 || f.args.len() == 2)
18623 && matches!(target, DialectType::SQLite) =>
18624 {
18625 if f.args.len() == 2 {
18626 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
18627 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
18628 let mut args = f.args;
18629 let start = args.remove(0);
18630 let end = args.remove(0);
18631 Ok(Expression::Function(Box::new(Function::new(
18632 "GENERATE_SERIES".to_string(),
18633 vec![start, end],
18634 ))))
18635 } else {
18636 Ok(Expression::Function(f))
18637 }
18638 }
18639 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
18640 // When source is Snowflake, keep as-is (args already in correct form)
18641 "UNIFORM"
18642 if matches!(target, DialectType::Snowflake)
18643 && (f.args.len() == 2 || f.args.len() == 3) =>
18644 {
18645 if matches!(source, DialectType::Snowflake) {
18646 // Snowflake -> Snowflake: keep as-is
18647 Ok(Expression::Function(f))
18648 } else {
18649 let mut args = f.args;
18650 let low = args.remove(0);
18651 let high = args.remove(0);
18652 let random = if !args.is_empty() {
18653 let seed = args.remove(0);
18654 Expression::Function(Box::new(Function::new(
18655 "RANDOM".to_string(),
18656 vec![seed],
18657 )))
18658 } else {
18659 Expression::Function(Box::new(Function::new(
18660 "RANDOM".to_string(),
18661 vec![],
18662 )))
18663 };
18664 Ok(Expression::Function(Box::new(Function::new(
18665 "UNIFORM".to_string(),
18666 vec![low, high, random],
18667 ))))
18668 }
18669 }
18670 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18671 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
18672 let mut args = f.args;
18673 let ts_arg = args.remove(0);
18674 let tz_arg = args.remove(0);
18675 // Cast string literal to TIMESTAMP for all targets
18676 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18677 {
18678 Expression::Cast(Box::new(Cast {
18679 this: ts_arg,
18680 to: DataType::Timestamp {
18681 timezone: false,
18682 precision: None,
18683 },
18684 trailing_comments: vec![],
18685 double_colon_syntax: false,
18686 format: None,
18687 default: None,
18688 inferred_type: None,
18689 }))
18690 } else {
18691 ts_arg
18692 };
18693 match target {
18694 DialectType::Spark | DialectType::Databricks => {
18695 Ok(Expression::Function(Box::new(Function::new(
18696 "TO_UTC_TIMESTAMP".to_string(),
18697 vec![ts_cast, tz_arg],
18698 ))))
18699 }
18700 DialectType::Snowflake => {
18701 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
18702 Ok(Expression::Function(Box::new(Function::new(
18703 "CONVERT_TIMEZONE".to_string(),
18704 vec![tz_arg, Expression::string("UTC"), ts_cast],
18705 ))))
18706 }
18707 DialectType::Presto
18708 | DialectType::Trino
18709 | DialectType::Athena => {
18710 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
18711 let wtz = Expression::Function(Box::new(Function::new(
18712 "WITH_TIMEZONE".to_string(),
18713 vec![ts_cast, tz_arg],
18714 )));
18715 Ok(Expression::AtTimeZone(Box::new(
18716 crate::expressions::AtTimeZone {
18717 this: wtz,
18718 zone: Expression::string("UTC"),
18719 },
18720 )))
18721 }
18722 DialectType::BigQuery => {
18723 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
18724 let cast_dt = Expression::Cast(Box::new(Cast {
18725 this: if let Expression::Cast(c) = ts_cast {
18726 c.this
18727 } else {
18728 ts_cast.clone()
18729 },
18730 to: DataType::Custom {
18731 name: "DATETIME".to_string(),
18732 },
18733 trailing_comments: vec![],
18734 double_colon_syntax: false,
18735 format: None,
18736 default: None,
18737 inferred_type: None,
18738 }));
18739 let ts_func =
18740 Expression::Function(Box::new(Function::new(
18741 "TIMESTAMP".to_string(),
18742 vec![cast_dt, tz_arg],
18743 )));
18744 Ok(Expression::Function(Box::new(Function::new(
18745 "DATETIME".to_string(),
18746 vec![ts_func, Expression::string("UTC")],
18747 ))))
18748 }
18749 _ => {
18750 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
18751 let atz1 = Expression::AtTimeZone(Box::new(
18752 crate::expressions::AtTimeZone {
18753 this: ts_cast,
18754 zone: tz_arg,
18755 },
18756 ));
18757 Ok(Expression::AtTimeZone(Box::new(
18758 crate::expressions::AtTimeZone {
18759 this: atz1,
18760 zone: Expression::string("UTC"),
18761 },
18762 )))
18763 }
18764 }
18765 }
18766 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18767 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
18768 let mut args = f.args;
18769 let ts_arg = args.remove(0);
18770 let tz_arg = args.remove(0);
18771 // Cast string literal to TIMESTAMP
18772 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18773 {
18774 Expression::Cast(Box::new(Cast {
18775 this: ts_arg,
18776 to: DataType::Timestamp {
18777 timezone: false,
18778 precision: None,
18779 },
18780 trailing_comments: vec![],
18781 double_colon_syntax: false,
18782 format: None,
18783 default: None,
18784 inferred_type: None,
18785 }))
18786 } else {
18787 ts_arg
18788 };
18789 match target {
18790 DialectType::Spark | DialectType::Databricks => {
18791 Ok(Expression::Function(Box::new(Function::new(
18792 "FROM_UTC_TIMESTAMP".to_string(),
18793 vec![ts_cast, tz_arg],
18794 ))))
18795 }
18796 DialectType::Presto
18797 | DialectType::Trino
18798 | DialectType::Athena => {
18799 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
18800 Ok(Expression::Function(Box::new(Function::new(
18801 "AT_TIMEZONE".to_string(),
18802 vec![ts_cast, tz_arg],
18803 ))))
18804 }
18805 DialectType::Snowflake => {
18806 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
18807 Ok(Expression::Function(Box::new(Function::new(
18808 "CONVERT_TIMEZONE".to_string(),
18809 vec![Expression::string("UTC"), tz_arg, ts_cast],
18810 ))))
18811 }
18812 _ => {
18813 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
18814 Ok(Expression::AtTimeZone(Box::new(
18815 crate::expressions::AtTimeZone {
18816 this: ts_cast,
18817 zone: tz_arg,
18818 },
18819 )))
18820 }
18821 }
18822 }
18823 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18824 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
18825 let name = match target {
18826 DialectType::Snowflake => "OBJECT_CONSTRUCT",
18827 _ => "MAP",
18828 };
18829 Ok(Expression::Function(Box::new(Function::new(
18830 name.to_string(),
18831 f.args,
18832 ))))
18833 }
18834 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
18835 "STR_TO_MAP" if f.args.len() >= 1 => match target {
18836 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18837 Ok(Expression::Function(Box::new(Function::new(
18838 "SPLIT_TO_MAP".to_string(),
18839 f.args,
18840 ))))
18841 }
18842 _ => Ok(Expression::Function(f)),
18843 },
18844 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
18845 "TIME_TO_STR" if f.args.len() == 2 => {
18846 let mut args = f.args;
18847 let this = args.remove(0);
18848 let fmt_expr = args.remove(0);
18849 let format = if let Expression::Literal(lit) = fmt_expr {
18850 if let Literal::String(s) = lit.as_ref() {
18851 s.clone()
18852 } else {
18853 String::new()
18854 }
18855 } else {
18856 "%Y-%m-%d %H:%M:%S".to_string()
18857 };
18858 Ok(Expression::TimeToStr(Box::new(
18859 crate::expressions::TimeToStr {
18860 this: Box::new(this),
18861 format,
18862 culture: None,
18863 zone: None,
18864 },
18865 )))
18866 }
18867 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
18868 "STR_TO_TIME" if f.args.len() == 2 => {
18869 let mut args = f.args;
18870 let this = args.remove(0);
18871 let fmt_expr = args.remove(0);
18872 let format = if let Expression::Literal(lit) = fmt_expr {
18873 if let Literal::String(s) = lit.as_ref() {
18874 s.clone()
18875 } else {
18876 String::new()
18877 }
18878 } else {
18879 "%Y-%m-%d %H:%M:%S".to_string()
18880 };
18881 Ok(Expression::StrToTime(Box::new(
18882 crate::expressions::StrToTime {
18883 this: Box::new(this),
18884 format,
18885 zone: None,
18886 safe: None,
18887 target_type: None,
18888 },
18889 )))
18890 }
18891 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
18892 "STR_TO_UNIX" if f.args.len() >= 1 => {
18893 let mut args = f.args;
18894 let this = args.remove(0);
18895 let format = if !args.is_empty() {
18896 if let Expression::Literal(lit) = args.remove(0) {
18897 if let Literal::String(s) = lit.as_ref() {
18898 Some(s.clone())
18899 } else {
18900 None
18901 }
18902 } else {
18903 None
18904 }
18905 } else {
18906 None
18907 };
18908 Ok(Expression::StrToUnix(Box::new(
18909 crate::expressions::StrToUnix {
18910 this: Some(Box::new(this)),
18911 format,
18912 },
18913 )))
18914 }
18915 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
18916 "TIME_TO_UNIX" if f.args.len() == 1 => {
18917 let mut args = f.args;
18918 let this = args.remove(0);
18919 Ok(Expression::TimeToUnix(Box::new(
18920 crate::expressions::UnaryFunc {
18921 this,
18922 original_name: None,
18923 inferred_type: None,
18924 },
18925 )))
18926 }
18927 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
18928 "UNIX_TO_STR" if f.args.len() >= 1 => {
18929 let mut args = f.args;
18930 let this = args.remove(0);
18931 let format = if !args.is_empty() {
18932 if let Expression::Literal(lit) = args.remove(0) {
18933 if let Literal::String(s) = lit.as_ref() {
18934 Some(s.clone())
18935 } else {
18936 None
18937 }
18938 } else {
18939 None
18940 }
18941 } else {
18942 None
18943 };
18944 Ok(Expression::UnixToStr(Box::new(
18945 crate::expressions::UnixToStr {
18946 this: Box::new(this),
18947 format,
18948 },
18949 )))
18950 }
18951 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
18952 "UNIX_TO_TIME" if f.args.len() == 1 => {
18953 let mut args = f.args;
18954 let this = args.remove(0);
18955 Ok(Expression::UnixToTime(Box::new(
18956 crate::expressions::UnixToTime {
18957 this: Box::new(this),
18958 scale: None,
18959 zone: None,
18960 hours: None,
18961 minutes: None,
18962 format: None,
18963 target_type: None,
18964 },
18965 )))
18966 }
18967 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
18968 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
18969 let mut args = f.args;
18970 let this = args.remove(0);
18971 Ok(Expression::TimeStrToDate(Box::new(
18972 crate::expressions::UnaryFunc {
18973 this,
18974 original_name: None,
18975 inferred_type: None,
18976 },
18977 )))
18978 }
18979 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
18980 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
18981 let mut args = f.args;
18982 let this = args.remove(0);
18983 Ok(Expression::TimeStrToTime(Box::new(
18984 crate::expressions::TimeStrToTime {
18985 this: Box::new(this),
18986 zone: None,
18987 },
18988 )))
18989 }
18990 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
18991 "MONTHS_BETWEEN" if f.args.len() == 2 => {
18992 match target {
18993 DialectType::DuckDB => {
18994 let mut args = f.args;
18995 let end_date = args.remove(0);
18996 let start_date = args.remove(0);
18997 let cast_end = Self::ensure_cast_date(end_date);
18998 let cast_start = Self::ensure_cast_date(start_date);
18999 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
19000 let dd = Expression::Function(Box::new(Function::new(
19001 "DATE_DIFF".to_string(),
19002 vec![
19003 Expression::string("MONTH"),
19004 cast_start.clone(),
19005 cast_end.clone(),
19006 ],
19007 )));
19008 let day_end =
19009 Expression::Function(Box::new(Function::new(
19010 "DAY".to_string(),
19011 vec![cast_end.clone()],
19012 )));
19013 let day_start =
19014 Expression::Function(Box::new(Function::new(
19015 "DAY".to_string(),
19016 vec![cast_start.clone()],
19017 )));
19018 let last_day_end =
19019 Expression::Function(Box::new(Function::new(
19020 "LAST_DAY".to_string(),
19021 vec![cast_end.clone()],
19022 )));
19023 let last_day_start =
19024 Expression::Function(Box::new(Function::new(
19025 "LAST_DAY".to_string(),
19026 vec![cast_start.clone()],
19027 )));
19028 let day_last_end = Expression::Function(Box::new(
19029 Function::new("DAY".to_string(), vec![last_day_end]),
19030 ));
19031 let day_last_start = Expression::Function(Box::new(
19032 Function::new("DAY".to_string(), vec![last_day_start]),
19033 ));
19034 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
19035 day_end.clone(),
19036 day_last_end,
19037 )));
19038 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
19039 day_start.clone(),
19040 day_last_start,
19041 )));
19042 let both_cond =
19043 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
19044 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
19045 day_end, day_start,
19046 )));
19047 let day_diff_paren = Expression::Paren(Box::new(
19048 crate::expressions::Paren {
19049 this: day_diff,
19050 trailing_comments: Vec::new(),
19051 },
19052 ));
19053 let frac = Expression::Div(Box::new(BinaryOp::new(
19054 day_diff_paren,
19055 Expression::Literal(Box::new(Literal::Number(
19056 "31.0".to_string(),
19057 ))),
19058 )));
19059 let case_expr = Expression::Case(Box::new(Case {
19060 operand: None,
19061 whens: vec![(both_cond, Expression::number(0))],
19062 else_: Some(frac),
19063 comments: Vec::new(),
19064 inferred_type: None,
19065 }));
19066 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
19067 }
19068 DialectType::Snowflake | DialectType::Redshift => {
19069 let mut args = f.args;
19070 let end_date = args.remove(0);
19071 let start_date = args.remove(0);
19072 let unit = Expression::Identifier(Identifier::new("MONTH"));
19073 Ok(Expression::Function(Box::new(Function::new(
19074 "DATEDIFF".to_string(),
19075 vec![unit, start_date, end_date],
19076 ))))
19077 }
19078 DialectType::Presto
19079 | DialectType::Trino
19080 | DialectType::Athena => {
19081 let mut args = f.args;
19082 let end_date = args.remove(0);
19083 let start_date = args.remove(0);
19084 Ok(Expression::Function(Box::new(Function::new(
19085 "DATE_DIFF".to_string(),
19086 vec![Expression::string("MONTH"), start_date, end_date],
19087 ))))
19088 }
19089 _ => Ok(Expression::Function(f)),
19090 }
19091 }
19092 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
19093 // Drop the roundOff arg for non-Spark targets, keep it for Spark
19094 "MONTHS_BETWEEN" if f.args.len() == 3 => {
19095 match target {
19096 DialectType::Spark | DialectType::Databricks => {
19097 Ok(Expression::Function(f))
19098 }
19099 _ => {
19100 // Drop the 3rd arg and delegate to the 2-arg logic
19101 let mut args = f.args;
19102 let end_date = args.remove(0);
19103 let start_date = args.remove(0);
19104 // Re-create as 2-arg and process
19105 let f2 = Function::new(
19106 "MONTHS_BETWEEN".to_string(),
19107 vec![end_date, start_date],
19108 );
19109 let e2 = Expression::Function(Box::new(f2));
19110 Self::cross_dialect_normalize(e2, source, target)
19111 }
19112 }
19113 }
19114 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
19115 "TO_TIMESTAMP"
19116 if f.args.len() == 1
19117 && matches!(
19118 source,
19119 DialectType::Spark
19120 | DialectType::Databricks
19121 | DialectType::Hive
19122 ) =>
19123 {
19124 let arg = f.args.into_iter().next().unwrap();
19125 Ok(Expression::Cast(Box::new(Cast {
19126 this: arg,
19127 to: DataType::Timestamp {
19128 timezone: false,
19129 precision: None,
19130 },
19131 trailing_comments: vec![],
19132 double_colon_syntax: false,
19133 format: None,
19134 default: None,
19135 inferred_type: None,
19136 })))
19137 }
19138 // STRING(x) -> CAST(x AS STRING) for Spark target
19139 "STRING"
19140 if f.args.len() == 1
19141 && matches!(
19142 source,
19143 DialectType::Spark | DialectType::Databricks
19144 ) =>
19145 {
19146 let arg = f.args.into_iter().next().unwrap();
19147 let dt = match target {
19148 DialectType::Spark
19149 | DialectType::Databricks
19150 | DialectType::Hive => DataType::Custom {
19151 name: "STRING".to_string(),
19152 },
19153 _ => DataType::Text,
19154 };
19155 Ok(Expression::Cast(Box::new(Cast {
19156 this: arg,
19157 to: dt,
19158 trailing_comments: vec![],
19159 double_colon_syntax: false,
19160 format: None,
19161 default: None,
19162 inferred_type: None,
19163 })))
19164 }
19165 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
19166 "LOGICAL_OR" if f.args.len() == 1 => {
19167 let name = match target {
19168 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
19169 _ => "LOGICAL_OR",
19170 };
19171 Ok(Expression::Function(Box::new(Function::new(
19172 name.to_string(),
19173 f.args,
19174 ))))
19175 }
19176 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
19177 "SPLIT"
19178 if f.args.len() == 2
19179 && matches!(
19180 source,
19181 DialectType::Spark
19182 | DialectType::Databricks
19183 | DialectType::Hive
19184 ) =>
19185 {
19186 let name = match target {
19187 DialectType::DuckDB => "STR_SPLIT_REGEX",
19188 DialectType::Presto
19189 | DialectType::Trino
19190 | DialectType::Athena => "REGEXP_SPLIT",
19191 DialectType::Spark
19192 | DialectType::Databricks
19193 | DialectType::Hive => "SPLIT",
19194 _ => "SPLIT",
19195 };
19196 Ok(Expression::Function(Box::new(Function::new(
19197 name.to_string(),
19198 f.args,
19199 ))))
19200 }
19201 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
19202 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
19203 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19204 Ok(Expression::Function(Box::new(Function::new(
19205 "ELEMENT_AT".to_string(),
19206 f.args,
19207 ))))
19208 }
19209 DialectType::DuckDB => {
19210 let mut args = f.args;
19211 let arr = args.remove(0);
19212 let idx = args.remove(0);
19213 Ok(Expression::Subscript(Box::new(
19214 crate::expressions::Subscript {
19215 this: arr,
19216 index: idx,
19217 },
19218 )))
19219 }
19220 _ => Ok(Expression::Function(f)),
19221 },
19222 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
19223 "ARRAY_FILTER" if f.args.len() == 2 => {
19224 let name = match target {
19225 DialectType::DuckDB => "LIST_FILTER",
19226 DialectType::StarRocks => "ARRAY_FILTER",
19227 _ => "FILTER",
19228 };
19229 Ok(Expression::Function(Box::new(Function::new(
19230 name.to_string(),
19231 f.args,
19232 ))))
19233 }
19234 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
19235 "FILTER" if f.args.len() == 2 => {
19236 let name = match target {
19237 DialectType::DuckDB => "LIST_FILTER",
19238 DialectType::StarRocks => "ARRAY_FILTER",
19239 _ => "FILTER",
19240 };
19241 Ok(Expression::Function(Box::new(Function::new(
19242 name.to_string(),
19243 f.args,
19244 ))))
19245 }
19246 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
19247 "REDUCE" if f.args.len() >= 3 => {
19248 let name = match target {
19249 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
19250 _ => "REDUCE",
19251 };
19252 Ok(Expression::Function(Box::new(Function::new(
19253 name.to_string(),
19254 f.args,
19255 ))))
19256 }
19257 // CURRENT_SCHEMA() -> dialect-specific
19258 "CURRENT_SCHEMA" => {
19259 match target {
19260 DialectType::PostgreSQL => {
19261 // PostgreSQL: CURRENT_SCHEMA (no parens)
19262 Ok(Expression::Function(Box::new(Function {
19263 name: "CURRENT_SCHEMA".to_string(),
19264 args: vec![],
19265 distinct: false,
19266 trailing_comments: vec![],
19267 use_bracket_syntax: false,
19268 no_parens: true,
19269 quoted: false,
19270 span: None,
19271 inferred_type: None,
19272 })))
19273 }
19274 DialectType::MySQL
19275 | DialectType::Doris
19276 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
19277 Function::new("SCHEMA".to_string(), vec![]),
19278 ))),
19279 DialectType::TSQL => Ok(Expression::Function(Box::new(
19280 Function::new("SCHEMA_NAME".to_string(), vec![]),
19281 ))),
19282 DialectType::SQLite => Ok(Expression::Literal(Box::new(
19283 Literal::String("main".to_string()),
19284 ))),
19285 _ => Ok(Expression::Function(f)),
19286 }
19287 }
19288 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19289 "LTRIM" if f.args.len() == 2 => match target {
19290 DialectType::Spark
19291 | DialectType::Hive
19292 | DialectType::Databricks
19293 | DialectType::ClickHouse => {
19294 let mut args = f.args;
19295 let str_expr = args.remove(0);
19296 let chars = args.remove(0);
19297 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19298 this: str_expr,
19299 characters: Some(chars),
19300 position: crate::expressions::TrimPosition::Leading,
19301 sql_standard_syntax: true,
19302 position_explicit: true,
19303 })))
19304 }
19305 _ => Ok(Expression::Function(f)),
19306 },
19307 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19308 "RTRIM" if f.args.len() == 2 => match target {
19309 DialectType::Spark
19310 | DialectType::Hive
19311 | DialectType::Databricks
19312 | DialectType::ClickHouse => {
19313 let mut args = f.args;
19314 let str_expr = args.remove(0);
19315 let chars = args.remove(0);
19316 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19317 this: str_expr,
19318 characters: Some(chars),
19319 position: crate::expressions::TrimPosition::Trailing,
19320 sql_standard_syntax: true,
19321 position_explicit: true,
19322 })))
19323 }
19324 _ => Ok(Expression::Function(f)),
19325 },
19326 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
19327 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
19328 DialectType::ClickHouse => {
19329 let mut new_f = *f;
19330 new_f.name = "arrayReverse".to_string();
19331 Ok(Expression::Function(Box::new(new_f)))
19332 }
19333 _ => Ok(Expression::Function(f)),
19334 },
19335 // UUID() -> NEWID() for TSQL
19336 "UUID" if f.args.is_empty() => match target {
19337 DialectType::TSQL | DialectType::Fabric => {
19338 Ok(Expression::Function(Box::new(Function::new(
19339 "NEWID".to_string(),
19340 vec![],
19341 ))))
19342 }
19343 _ => Ok(Expression::Function(f)),
19344 },
19345 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
19346 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
19347 DialectType::ClickHouse => {
19348 let mut new_f = *f;
19349 new_f.name = "farmFingerprint64".to_string();
19350 Ok(Expression::Function(Box::new(new_f)))
19351 }
19352 DialectType::Redshift => {
19353 let mut new_f = *f;
19354 new_f.name = "FARMFINGERPRINT64".to_string();
19355 Ok(Expression::Function(Box::new(new_f)))
19356 }
19357 _ => Ok(Expression::Function(f)),
19358 },
19359 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
19360 "JSON_KEYS" => match target {
19361 DialectType::Databricks | DialectType::Spark => {
19362 let mut new_f = *f;
19363 new_f.name = "JSON_OBJECT_KEYS".to_string();
19364 Ok(Expression::Function(Box::new(new_f)))
19365 }
19366 DialectType::Snowflake => {
19367 let mut new_f = *f;
19368 new_f.name = "OBJECT_KEYS".to_string();
19369 Ok(Expression::Function(Box::new(new_f)))
19370 }
19371 _ => Ok(Expression::Function(f)),
19372 },
19373 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
19374 "WEEKOFYEAR" => match target {
19375 DialectType::Snowflake => {
19376 let mut new_f = *f;
19377 new_f.name = "WEEKISO".to_string();
19378 Ok(Expression::Function(Box::new(new_f)))
19379 }
19380 _ => Ok(Expression::Function(f)),
19381 },
19382 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
19383 "FORMAT"
19384 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
19385 {
19386 match target {
19387 DialectType::Databricks | DialectType::Spark => {
19388 let mut new_f = *f;
19389 new_f.name = "FORMAT_STRING".to_string();
19390 Ok(Expression::Function(Box::new(new_f)))
19391 }
19392 _ => Ok(Expression::Function(f)),
19393 }
19394 }
19395 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
19396 "CONCAT_WS" if f.args.len() >= 2 => match target {
19397 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19398 let mut args = f.args;
19399 let sep = args.remove(0);
19400 let cast_args: Vec<Expression> = args
19401 .into_iter()
19402 .map(|a| {
19403 Expression::Cast(Box::new(Cast {
19404 this: a,
19405 to: DataType::VarChar {
19406 length: None,
19407 parenthesized_length: false,
19408 },
19409 double_colon_syntax: false,
19410 trailing_comments: Vec::new(),
19411 format: None,
19412 default: None,
19413 inferred_type: None,
19414 }))
19415 })
19416 .collect();
19417 let mut new_args = vec![sep];
19418 new_args.extend(cast_args);
19419 Ok(Expression::Function(Box::new(Function::new(
19420 "CONCAT_WS".to_string(),
19421 new_args,
19422 ))))
19423 }
19424 _ => Ok(Expression::Function(f)),
19425 },
19426 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
19427 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
19428 DialectType::DuckDB
19429 if f.args.len() == 3
19430 && matches!(source, DialectType::Snowflake) =>
19431 {
19432 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
19433 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
19434 let mut args = f.args;
19435 let arr = args.remove(0);
19436 let start = args.remove(0);
19437 let end = args.remove(0);
19438
19439 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
19440 let adjusted_start = Expression::Case(Box::new(Case {
19441 operand: None,
19442 whens: vec![(
19443 Expression::Gte(Box::new(BinaryOp {
19444 left: start.clone(),
19445 right: Expression::number(0),
19446 left_comments: vec![],
19447 operator_comments: vec![],
19448 trailing_comments: vec![],
19449 inferred_type: None,
19450 })),
19451 Expression::Add(Box::new(BinaryOp {
19452 left: start.clone(),
19453 right: Expression::number(1),
19454 left_comments: vec![],
19455 operator_comments: vec![],
19456 trailing_comments: vec![],
19457 inferred_type: None,
19458 })),
19459 )],
19460 else_: Some(start),
19461 comments: vec![],
19462 inferred_type: None,
19463 }));
19464
19465 // CASE WHEN end < 0 THEN end - 1 ELSE end END
19466 let adjusted_end = Expression::Case(Box::new(Case {
19467 operand: None,
19468 whens: vec![(
19469 Expression::Lt(Box::new(BinaryOp {
19470 left: end.clone(),
19471 right: Expression::number(0),
19472 left_comments: vec![],
19473 operator_comments: vec![],
19474 trailing_comments: vec![],
19475 inferred_type: None,
19476 })),
19477 Expression::Sub(Box::new(BinaryOp {
19478 left: end.clone(),
19479 right: Expression::number(1),
19480 left_comments: vec![],
19481 operator_comments: vec![],
19482 trailing_comments: vec![],
19483 inferred_type: None,
19484 })),
19485 )],
19486 else_: Some(end),
19487 comments: vec![],
19488 inferred_type: None,
19489 }));
19490
19491 Ok(Expression::Function(Box::new(Function::new(
19492 "ARRAY_SLICE".to_string(),
19493 vec![arr, adjusted_start, adjusted_end],
19494 ))))
19495 }
19496 DialectType::Presto
19497 | DialectType::Trino
19498 | DialectType::Athena
19499 | DialectType::Databricks
19500 | DialectType::Spark => {
19501 let mut new_f = *f;
19502 new_f.name = "SLICE".to_string();
19503 Ok(Expression::Function(Box::new(new_f)))
19504 }
19505 DialectType::ClickHouse => {
19506 let mut new_f = *f;
19507 new_f.name = "arraySlice".to_string();
19508 Ok(Expression::Function(Box::new(new_f)))
19509 }
19510 _ => Ok(Expression::Function(f)),
19511 },
19512 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
19513 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
19514 DialectType::DuckDB => {
19515 let mut args = f.args;
19516 let arr = args.remove(0);
19517 let val = args.remove(0);
19518 Ok(Expression::Function(Box::new(Function::new(
19519 "LIST_PREPEND".to_string(),
19520 vec![val, arr],
19521 ))))
19522 }
19523 _ => Ok(Expression::Function(f)),
19524 },
19525 // ARRAY_REMOVE(arr, target) -> dialect-specific
19526 "ARRAY_REMOVE" if f.args.len() == 2 => {
19527 match target {
19528 DialectType::DuckDB => {
19529 let mut args = f.args;
19530 let arr = args.remove(0);
19531 let target_val = args.remove(0);
19532 let u_id = crate::expressions::Identifier::new("_u");
19533 // LIST_FILTER(arr, _u -> _u <> target)
19534 let lambda = Expression::Lambda(Box::new(
19535 crate::expressions::LambdaExpr {
19536 parameters: vec![u_id.clone()],
19537 body: Expression::Neq(Box::new(BinaryOp {
19538 left: Expression::Identifier(u_id),
19539 right: target_val,
19540 left_comments: Vec::new(),
19541 operator_comments: Vec::new(),
19542 trailing_comments: Vec::new(),
19543 inferred_type: None,
19544 })),
19545 colon: false,
19546 parameter_types: Vec::new(),
19547 },
19548 ));
19549 Ok(Expression::Function(Box::new(Function::new(
19550 "LIST_FILTER".to_string(),
19551 vec![arr, lambda],
19552 ))))
19553 }
19554 DialectType::ClickHouse => {
19555 let mut args = f.args;
19556 let arr = args.remove(0);
19557 let target_val = args.remove(0);
19558 let u_id = crate::expressions::Identifier::new("_u");
19559 // arrayFilter(_u -> _u <> target, arr)
19560 let lambda = Expression::Lambda(Box::new(
19561 crate::expressions::LambdaExpr {
19562 parameters: vec![u_id.clone()],
19563 body: Expression::Neq(Box::new(BinaryOp {
19564 left: Expression::Identifier(u_id),
19565 right: target_val,
19566 left_comments: Vec::new(),
19567 operator_comments: Vec::new(),
19568 trailing_comments: Vec::new(),
19569 inferred_type: None,
19570 })),
19571 colon: false,
19572 parameter_types: Vec::new(),
19573 },
19574 ));
19575 Ok(Expression::Function(Box::new(Function::new(
19576 "arrayFilter".to_string(),
19577 vec![lambda, arr],
19578 ))))
19579 }
19580 DialectType::BigQuery => {
19581 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
19582 let mut args = f.args;
19583 let arr = args.remove(0);
19584 let target_val = args.remove(0);
19585 let u_id = crate::expressions::Identifier::new("_u");
19586 let u_col = Expression::Column(Box::new(
19587 crate::expressions::Column {
19588 name: u_id.clone(),
19589 table: None,
19590 join_mark: false,
19591 trailing_comments: Vec::new(),
19592 span: None,
19593 inferred_type: None,
19594 },
19595 ));
19596 // UNNEST(the_array) AS _u
19597 let unnest_expr = Expression::Unnest(Box::new(
19598 crate::expressions::UnnestFunc {
19599 this: arr,
19600 expressions: Vec::new(),
19601 with_ordinality: false,
19602 alias: None,
19603 offset_alias: None,
19604 },
19605 ));
19606 let aliased_unnest = Expression::Alias(Box::new(
19607 crate::expressions::Alias {
19608 this: unnest_expr,
19609 alias: u_id.clone(),
19610 column_aliases: Vec::new(),
19611 pre_alias_comments: Vec::new(),
19612 trailing_comments: Vec::new(),
19613 inferred_type: None,
19614 },
19615 ));
19616 // _u <> target
19617 let where_cond = Expression::Neq(Box::new(BinaryOp {
19618 left: u_col.clone(),
19619 right: target_val,
19620 left_comments: Vec::new(),
19621 operator_comments: Vec::new(),
19622 trailing_comments: Vec::new(),
19623 inferred_type: None,
19624 }));
19625 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
19626 let subquery = Expression::Select(Box::new(
19627 crate::expressions::Select::new()
19628 .column(u_col)
19629 .from(aliased_unnest)
19630 .where_(where_cond),
19631 ));
19632 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
19633 Ok(Expression::ArrayFunc(Box::new(
19634 crate::expressions::ArrayConstructor {
19635 expressions: vec![subquery],
19636 bracket_notation: false,
19637 use_list_keyword: false,
19638 },
19639 )))
19640 }
19641 _ => Ok(Expression::Function(f)),
19642 }
19643 }
19644 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
19645 "PARSE_JSON" if f.args.len() == 1 => {
19646 match target {
19647 DialectType::SQLite
19648 | DialectType::Doris
19649 | DialectType::MySQL
19650 | DialectType::StarRocks => {
19651 // Strip PARSE_JSON, return the inner argument
19652 Ok(f.args.into_iter().next().unwrap())
19653 }
19654 _ => Ok(Expression::Function(f)),
19655 }
19656 }
19657 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
19658 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
19659 "JSON_REMOVE" => Ok(Expression::Function(f)),
19660 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
19661 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
19662 "JSON_SET" => Ok(Expression::Function(f)),
19663 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
19664 // Behavior per search value type:
19665 // NULL literal -> CASE WHEN x IS NULL THEN result
19666 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
19667 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
19668 "DECODE" if f.args.len() >= 3 => {
19669 // Keep as DECODE for targets that support it natively
19670 let keep_as_decode = matches!(
19671 target,
19672 DialectType::Oracle
19673 | DialectType::Snowflake
19674 | DialectType::Redshift
19675 | DialectType::Teradata
19676 | DialectType::Spark
19677 | DialectType::Databricks
19678 );
19679 if keep_as_decode {
19680 return Ok(Expression::Function(f));
19681 }
19682
19683 let mut args = f.args;
19684 let this_expr = args.remove(0);
19685 let mut pairs = Vec::new();
19686 let mut default = None;
19687 let mut i = 0;
19688 while i + 1 < args.len() {
19689 pairs.push((args[i].clone(), args[i + 1].clone()));
19690 i += 2;
19691 }
19692 if i < args.len() {
19693 default = Some(args[i].clone());
19694 }
19695 // Helper: check if expression is a literal value
19696 fn is_literal(e: &Expression) -> bool {
19697 matches!(
19698 e,
19699 Expression::Literal(_)
19700 | Expression::Boolean(_)
19701 | Expression::Neg(_)
19702 )
19703 }
19704 let whens: Vec<(Expression, Expression)> = pairs
19705 .into_iter()
19706 .map(|(search, result)| {
19707 if matches!(&search, Expression::Null(_)) {
19708 // NULL search -> IS NULL
19709 let condition = Expression::Is(Box::new(BinaryOp {
19710 left: this_expr.clone(),
19711 right: Expression::Null(crate::expressions::Null),
19712 left_comments: Vec::new(),
19713 operator_comments: Vec::new(),
19714 trailing_comments: Vec::new(),
19715 inferred_type: None,
19716 }));
19717 (condition, result)
19718 } else if is_literal(&search) {
19719 // Literal search -> simple equality
19720 let eq = Expression::Eq(Box::new(BinaryOp {
19721 left: this_expr.clone(),
19722 right: search,
19723 left_comments: Vec::new(),
19724 operator_comments: Vec::new(),
19725 trailing_comments: Vec::new(),
19726 inferred_type: None,
19727 }));
19728 (eq, result)
19729 } else {
19730 // Non-literal (column ref, expression) -> null-safe comparison
19731 let needs_paren = matches!(
19732 &search,
19733 Expression::Eq(_)
19734 | Expression::Neq(_)
19735 | Expression::Gt(_)
19736 | Expression::Gte(_)
19737 | Expression::Lt(_)
19738 | Expression::Lte(_)
19739 );
19740 let search_for_eq = if needs_paren {
19741 Expression::Paren(Box::new(
19742 crate::expressions::Paren {
19743 this: search.clone(),
19744 trailing_comments: Vec::new(),
19745 },
19746 ))
19747 } else {
19748 search.clone()
19749 };
19750 let eq = Expression::Eq(Box::new(BinaryOp {
19751 left: this_expr.clone(),
19752 right: search_for_eq,
19753 left_comments: Vec::new(),
19754 operator_comments: Vec::new(),
19755 trailing_comments: Vec::new(),
19756 inferred_type: None,
19757 }));
19758 let search_for_null = if needs_paren {
19759 Expression::Paren(Box::new(
19760 crate::expressions::Paren {
19761 this: search.clone(),
19762 trailing_comments: Vec::new(),
19763 },
19764 ))
19765 } else {
19766 search.clone()
19767 };
19768 let x_is_null = Expression::Is(Box::new(BinaryOp {
19769 left: this_expr.clone(),
19770 right: Expression::Null(crate::expressions::Null),
19771 left_comments: Vec::new(),
19772 operator_comments: Vec::new(),
19773 trailing_comments: Vec::new(),
19774 inferred_type: None,
19775 }));
19776 let s_is_null = Expression::Is(Box::new(BinaryOp {
19777 left: search_for_null,
19778 right: Expression::Null(crate::expressions::Null),
19779 left_comments: Vec::new(),
19780 operator_comments: Vec::new(),
19781 trailing_comments: Vec::new(),
19782 inferred_type: None,
19783 }));
19784 let both_null = Expression::And(Box::new(BinaryOp {
19785 left: x_is_null,
19786 right: s_is_null,
19787 left_comments: Vec::new(),
19788 operator_comments: Vec::new(),
19789 trailing_comments: Vec::new(),
19790 inferred_type: None,
19791 }));
19792 let condition = Expression::Or(Box::new(BinaryOp {
19793 left: eq,
19794 right: Expression::Paren(Box::new(
19795 crate::expressions::Paren {
19796 this: both_null,
19797 trailing_comments: Vec::new(),
19798 },
19799 )),
19800 left_comments: Vec::new(),
19801 operator_comments: Vec::new(),
19802 trailing_comments: Vec::new(),
19803 inferred_type: None,
19804 }));
19805 (condition, result)
19806 }
19807 })
19808 .collect();
19809 Ok(Expression::Case(Box::new(Case {
19810 operand: None,
19811 whens,
19812 else_: default,
19813 comments: Vec::new(),
19814 inferred_type: None,
19815 })))
19816 }
19817 // LEVENSHTEIN(a, b, ...) -> dialect-specific
19818 "LEVENSHTEIN" => {
19819 match target {
19820 DialectType::BigQuery => {
19821 let mut new_f = *f;
19822 new_f.name = "EDIT_DISTANCE".to_string();
19823 Ok(Expression::Function(Box::new(new_f)))
19824 }
19825 DialectType::Drill => {
19826 let mut new_f = *f;
19827 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
19828 Ok(Expression::Function(Box::new(new_f)))
19829 }
19830 DialectType::PostgreSQL if f.args.len() == 6 => {
19831 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
19832 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
19833 let mut new_f = *f;
19834 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
19835 Ok(Expression::Function(Box::new(new_f)))
19836 }
19837 _ => Ok(Expression::Function(f)),
19838 }
19839 }
19840 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
19841 "ARRAY_MAX" => {
19842 let name = match target {
19843 DialectType::ClickHouse => "arrayMax",
19844 DialectType::DuckDB => "LIST_MAX",
19845 _ => "ARRAY_MAX",
19846 };
19847 let mut new_f = *f;
19848 new_f.name = name.to_string();
19849 Ok(Expression::Function(Box::new(new_f)))
19850 }
19851 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
19852 "ARRAY_MIN" => {
19853 let name = match target {
19854 DialectType::ClickHouse => "arrayMin",
19855 DialectType::DuckDB => "LIST_MIN",
19856 _ => "ARRAY_MIN",
19857 };
19858 let mut new_f = *f;
19859 new_f.name = name.to_string();
19860 Ok(Expression::Function(Box::new(new_f)))
19861 }
19862 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
19863 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
19864 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
19865 let mut args = f.args;
19866 let b = args.pop().unwrap();
19867 let a = args.pop().unwrap();
19868 match target {
19869 DialectType::ClickHouse => {
19870 let upper_a = Expression::Upper(Box::new(
19871 crate::expressions::UnaryFunc::new(a),
19872 ));
19873 let upper_b = Expression::Upper(Box::new(
19874 crate::expressions::UnaryFunc::new(b),
19875 ));
19876 Ok(Expression::Function(Box::new(Function::new(
19877 "jaroWinklerSimilarity".to_string(),
19878 vec![upper_a, upper_b],
19879 ))))
19880 }
19881 DialectType::DuckDB => {
19882 let upper_a = Expression::Upper(Box::new(
19883 crate::expressions::UnaryFunc::new(a),
19884 ));
19885 let upper_b = Expression::Upper(Box::new(
19886 crate::expressions::UnaryFunc::new(b),
19887 ));
19888 Ok(Expression::Function(Box::new(Function::new(
19889 "JARO_WINKLER_SIMILARITY".to_string(),
19890 vec![upper_a, upper_b],
19891 ))))
19892 }
19893 _ => Ok(Expression::Function(Box::new(Function::new(
19894 "JAROWINKLER_SIMILARITY".to_string(),
19895 vec![a, b],
19896 )))),
19897 }
19898 }
19899 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
19900 "CURRENT_SCHEMAS" => match target {
19901 DialectType::Snowflake => Ok(Expression::Function(Box::new(
19902 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
19903 ))),
19904 _ => Ok(Expression::Function(f)),
19905 },
19906 // TRUNC/TRUNCATE (numeric) -> dialect-specific
19907 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
19908 match target {
19909 DialectType::TSQL | DialectType::Fabric => {
19910 // ROUND(x, decimals, 1) - the 1 flag means truncation
19911 let mut args = f.args;
19912 let this = if args.is_empty() {
19913 return Ok(Expression::Function(Box::new(
19914 Function::new("TRUNC".to_string(), args),
19915 )));
19916 } else {
19917 args.remove(0)
19918 };
19919 let decimals = if args.is_empty() {
19920 Expression::Literal(Box::new(Literal::Number(
19921 "0".to_string(),
19922 )))
19923 } else {
19924 args.remove(0)
19925 };
19926 Ok(Expression::Function(Box::new(Function::new(
19927 "ROUND".to_string(),
19928 vec![
19929 this,
19930 decimals,
19931 Expression::Literal(Box::new(Literal::Number(
19932 "1".to_string(),
19933 ))),
19934 ],
19935 ))))
19936 }
19937 DialectType::Presto
19938 | DialectType::Trino
19939 | DialectType::Athena => {
19940 // TRUNCATE(x, decimals)
19941 let mut new_f = *f;
19942 new_f.name = "TRUNCATE".to_string();
19943 Ok(Expression::Function(Box::new(new_f)))
19944 }
19945 DialectType::MySQL
19946 | DialectType::SingleStore
19947 | DialectType::TiDB => {
19948 // TRUNCATE(x, decimals)
19949 let mut new_f = *f;
19950 new_f.name = "TRUNCATE".to_string();
19951 Ok(Expression::Function(Box::new(new_f)))
19952 }
19953 DialectType::DuckDB => {
19954 // DuckDB supports TRUNC(x, decimals) — preserve both args
19955 let mut args = f.args;
19956 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
19957 if args.len() == 2 && matches!(source, DialectType::Snowflake) {
19958 let decimals = args.remove(1);
19959 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
19960 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
19961 let wrapped = if !is_int {
19962 Expression::Cast(Box::new(crate::expressions::Cast {
19963 this: decimals,
19964 to: DataType::Int { length: None, integer_spelling: false },
19965 double_colon_syntax: false,
19966 trailing_comments: Vec::new(),
19967 format: None,
19968 default: None,
19969 inferred_type: None,
19970 }))
19971 } else {
19972 decimals
19973 };
19974 args.push(wrapped);
19975 }
19976 Ok(Expression::Function(Box::new(Function::new(
19977 "TRUNC".to_string(),
19978 args,
19979 ))))
19980 }
19981 DialectType::ClickHouse => {
19982 // trunc(x, decimals) - lowercase
19983 let mut new_f = *f;
19984 new_f.name = "trunc".to_string();
19985 Ok(Expression::Function(Box::new(new_f)))
19986 }
19987 DialectType::Spark | DialectType::Databricks => {
19988 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
19989 let this = f.args.into_iter().next().unwrap_or(
19990 Expression::Literal(Box::new(Literal::Number(
19991 "0".to_string(),
19992 ))),
19993 );
19994 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
19995 this,
19996 to: crate::expressions::DataType::BigInt {
19997 length: None,
19998 },
19999 double_colon_syntax: false,
20000 trailing_comments: Vec::new(),
20001 format: None,
20002 default: None,
20003 inferred_type: None,
20004 })))
20005 }
20006 _ => {
20007 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
20008 let mut new_f = *f;
20009 new_f.name = "TRUNC".to_string();
20010 Ok(Expression::Function(Box::new(new_f)))
20011 }
20012 }
20013 }
20014 // CURRENT_VERSION() -> VERSION() for most dialects
20015 "CURRENT_VERSION" => match target {
20016 DialectType::Snowflake
20017 | DialectType::Databricks
20018 | DialectType::StarRocks => Ok(Expression::Function(f)),
20019 DialectType::SQLite => {
20020 let mut new_f = *f;
20021 new_f.name = "SQLITE_VERSION".to_string();
20022 Ok(Expression::Function(Box::new(new_f)))
20023 }
20024 _ => {
20025 let mut new_f = *f;
20026 new_f.name = "VERSION".to_string();
20027 Ok(Expression::Function(Box::new(new_f)))
20028 }
20029 },
20030 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20031 "ARRAY_REVERSE" => match target {
20032 DialectType::ClickHouse => {
20033 let mut new_f = *f;
20034 new_f.name = "arrayReverse".to_string();
20035 Ok(Expression::Function(Box::new(new_f)))
20036 }
20037 _ => Ok(Expression::Function(f)),
20038 },
20039 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
20040 "GENERATE_DATE_ARRAY" => {
20041 let mut args = f.args;
20042 if matches!(target, DialectType::BigQuery) {
20043 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
20044 if args.len() == 2 {
20045 let default_interval = Expression::Interval(Box::new(
20046 crate::expressions::Interval {
20047 this: Some(Expression::Literal(Box::new(
20048 Literal::String("1".to_string()),
20049 ))),
20050 unit: Some(
20051 crate::expressions::IntervalUnitSpec::Simple {
20052 unit: crate::expressions::IntervalUnit::Day,
20053 use_plural: false,
20054 },
20055 ),
20056 },
20057 ));
20058 args.push(default_interval);
20059 }
20060 Ok(Expression::Function(Box::new(Function::new(
20061 "GENERATE_DATE_ARRAY".to_string(),
20062 args,
20063 ))))
20064 } else if matches!(target, DialectType::DuckDB) {
20065 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
20066 let start = args.get(0).cloned();
20067 let end = args.get(1).cloned();
20068 let step = args.get(2).cloned().or_else(|| {
20069 Some(Expression::Interval(Box::new(
20070 crate::expressions::Interval {
20071 this: Some(Expression::Literal(Box::new(
20072 Literal::String("1".to_string()),
20073 ))),
20074 unit: Some(
20075 crate::expressions::IntervalUnitSpec::Simple {
20076 unit: crate::expressions::IntervalUnit::Day,
20077 use_plural: false,
20078 },
20079 ),
20080 },
20081 )))
20082 });
20083 let gen_series = Expression::GenerateSeries(Box::new(
20084 crate::expressions::GenerateSeries {
20085 start: start.map(Box::new),
20086 end: end.map(Box::new),
20087 step: step.map(Box::new),
20088 is_end_exclusive: None,
20089 },
20090 ));
20091 Ok(Expression::Cast(Box::new(Cast {
20092 this: gen_series,
20093 to: DataType::Array {
20094 element_type: Box::new(DataType::Date),
20095 dimension: None,
20096 },
20097 trailing_comments: vec![],
20098 double_colon_syntax: false,
20099 format: None,
20100 default: None,
20101 inferred_type: None,
20102 })))
20103 } else if matches!(
20104 target,
20105 DialectType::Presto | DialectType::Trino | DialectType::Athena
20106 ) {
20107 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
20108 let start = args.get(0).cloned();
20109 let end = args.get(1).cloned();
20110 let step = args.get(2).cloned().or_else(|| {
20111 Some(Expression::Interval(Box::new(
20112 crate::expressions::Interval {
20113 this: Some(Expression::Literal(Box::new(
20114 Literal::String("1".to_string()),
20115 ))),
20116 unit: Some(
20117 crate::expressions::IntervalUnitSpec::Simple {
20118 unit: crate::expressions::IntervalUnit::Day,
20119 use_plural: false,
20120 },
20121 ),
20122 },
20123 )))
20124 });
20125 let gen_series = Expression::GenerateSeries(Box::new(
20126 crate::expressions::GenerateSeries {
20127 start: start.map(Box::new),
20128 end: end.map(Box::new),
20129 step: step.map(Box::new),
20130 is_end_exclusive: None,
20131 },
20132 ));
20133 Ok(gen_series)
20134 } else if matches!(
20135 target,
20136 DialectType::Spark | DialectType::Databricks
20137 ) {
20138 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
20139 let start = args.get(0).cloned();
20140 let end = args.get(1).cloned();
20141 let step = args.get(2).cloned().or_else(|| {
20142 Some(Expression::Interval(Box::new(
20143 crate::expressions::Interval {
20144 this: Some(Expression::Literal(Box::new(
20145 Literal::String("1".to_string()),
20146 ))),
20147 unit: Some(
20148 crate::expressions::IntervalUnitSpec::Simple {
20149 unit: crate::expressions::IntervalUnit::Day,
20150 use_plural: false,
20151 },
20152 ),
20153 },
20154 )))
20155 });
20156 let gen_series = Expression::GenerateSeries(Box::new(
20157 crate::expressions::GenerateSeries {
20158 start: start.map(Box::new),
20159 end: end.map(Box::new),
20160 step: step.map(Box::new),
20161 is_end_exclusive: None,
20162 },
20163 ));
20164 Ok(gen_series)
20165 } else if matches!(target, DialectType::Snowflake) {
20166 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
20167 if args.len() == 2 {
20168 let default_interval = Expression::Interval(Box::new(
20169 crate::expressions::Interval {
20170 this: Some(Expression::Literal(Box::new(
20171 Literal::String("1".to_string()),
20172 ))),
20173 unit: Some(
20174 crate::expressions::IntervalUnitSpec::Simple {
20175 unit: crate::expressions::IntervalUnit::Day,
20176 use_plural: false,
20177 },
20178 ),
20179 },
20180 ));
20181 args.push(default_interval);
20182 }
20183 Ok(Expression::Function(Box::new(Function::new(
20184 "GENERATE_DATE_ARRAY".to_string(),
20185 args,
20186 ))))
20187 } else if matches!(
20188 target,
20189 DialectType::MySQL
20190 | DialectType::TSQL
20191 | DialectType::Fabric
20192 | DialectType::Redshift
20193 ) {
20194 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
20195 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
20196 Ok(Expression::Function(Box::new(Function::new(
20197 "GENERATE_DATE_ARRAY".to_string(),
20198 args,
20199 ))))
20200 } else {
20201 // PostgreSQL/others: convert to GenerateSeries
20202 let start = args.get(0).cloned();
20203 let end = args.get(1).cloned();
20204 let step = args.get(2).cloned().or_else(|| {
20205 Some(Expression::Interval(Box::new(
20206 crate::expressions::Interval {
20207 this: Some(Expression::Literal(Box::new(
20208 Literal::String("1".to_string()),
20209 ))),
20210 unit: Some(
20211 crate::expressions::IntervalUnitSpec::Simple {
20212 unit: crate::expressions::IntervalUnit::Day,
20213 use_plural: false,
20214 },
20215 ),
20216 },
20217 )))
20218 });
20219 Ok(Expression::GenerateSeries(Box::new(
20220 crate::expressions::GenerateSeries {
20221 start: start.map(Box::new),
20222 end: end.map(Box::new),
20223 step: step.map(Box::new),
20224 is_end_exclusive: None,
20225 },
20226 )))
20227 }
20228 }
20229 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
20230 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20231 "ARRAYS_OVERLAP"
20232 if f.args.len() == 2
20233 && matches!(source, DialectType::Snowflake)
20234 && matches!(target, DialectType::DuckDB) =>
20235 {
20236 let mut args = f.args;
20237 let arr1 = args.remove(0);
20238 let arr2 = args.remove(0);
20239
20240 // (arr1 && arr2)
20241 let overlap = Expression::Paren(Box::new(Paren {
20242 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
20243 left: arr1.clone(),
20244 right: arr2.clone(),
20245 left_comments: vec![],
20246 operator_comments: vec![],
20247 trailing_comments: vec![],
20248 inferred_type: None,
20249 })),
20250 trailing_comments: vec![],
20251 }));
20252
20253 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
20254 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
20255 left: Expression::Function(Box::new(Function::new(
20256 "ARRAY_LENGTH".to_string(),
20257 vec![arr1.clone()],
20258 ))),
20259 right: Expression::Function(Box::new(Function::new(
20260 "LIST_COUNT".to_string(),
20261 vec![arr1],
20262 ))),
20263 left_comments: vec![],
20264 operator_comments: vec![],
20265 trailing_comments: vec![],
20266 inferred_type: None,
20267 }));
20268
20269 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
20270 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
20271 left: Expression::Function(Box::new(Function::new(
20272 "ARRAY_LENGTH".to_string(),
20273 vec![arr2.clone()],
20274 ))),
20275 right: Expression::Function(Box::new(Function::new(
20276 "LIST_COUNT".to_string(),
20277 vec![arr2],
20278 ))),
20279 left_comments: vec![],
20280 operator_comments: vec![],
20281 trailing_comments: vec![],
20282 inferred_type: None,
20283 }));
20284
20285 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20286 let null_check = Expression::Paren(Box::new(Paren {
20287 this: Expression::And(Box::new(BinaryOp {
20288 left: arr1_has_null,
20289 right: arr2_has_null,
20290 left_comments: vec![],
20291 operator_comments: vec![],
20292 trailing_comments: vec![],
20293 inferred_type: None,
20294 })),
20295 trailing_comments: vec![],
20296 }));
20297
20298 // (arr1 && arr2) OR (null_check)
20299 Ok(Expression::Or(Box::new(BinaryOp {
20300 left: overlap,
20301 right: null_check,
20302 left_comments: vec![],
20303 operator_comments: vec![],
20304 trailing_comments: vec![],
20305 inferred_type: None,
20306 })))
20307 }
20308 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
20309 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
20310 "ARRAY_INTERSECTION"
20311 if f.args.len() == 2
20312 && matches!(source, DialectType::Snowflake)
20313 && matches!(target, DialectType::DuckDB) =>
20314 {
20315 let mut args = f.args;
20316 let arr1 = args.remove(0);
20317 let arr2 = args.remove(0);
20318
20319 // Build: arr1 IS NULL
20320 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
20321 this: arr1.clone(),
20322 not: false,
20323 postfix_form: false,
20324 }));
20325 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
20326 this: arr2.clone(),
20327 not: false,
20328 postfix_form: false,
20329 }));
20330 let null_check = Expression::Or(Box::new(BinaryOp {
20331 left: arr1_is_null,
20332 right: arr2_is_null,
20333 left_comments: vec![],
20334 operator_comments: vec![],
20335 trailing_comments: vec![],
20336 inferred_type: None,
20337 }));
20338
20339 // GENERATE_SERIES(1, LENGTH(arr1))
20340 let gen_series = Expression::Function(Box::new(Function::new(
20341 "GENERATE_SERIES".to_string(),
20342 vec![
20343 Expression::number(1),
20344 Expression::Function(Box::new(Function::new(
20345 "LENGTH".to_string(),
20346 vec![arr1.clone()],
20347 ))),
20348 ],
20349 )));
20350
20351 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
20352 let list_zip = Expression::Function(Box::new(Function::new(
20353 "LIST_ZIP".to_string(),
20354 vec![arr1.clone(), gen_series],
20355 )));
20356
20357 // pair[1] and pair[2]
20358 let pair_col = Expression::column("pair");
20359 let pair_1 = Expression::Subscript(Box::new(
20360 crate::expressions::Subscript {
20361 this: pair_col.clone(),
20362 index: Expression::number(1),
20363 },
20364 ));
20365 let pair_2 = Expression::Subscript(Box::new(
20366 crate::expressions::Subscript {
20367 this: pair_col.clone(),
20368 index: Expression::number(2),
20369 },
20370 ));
20371
20372 // arr1[1:pair[2]]
20373 let arr1_slice = Expression::ArraySlice(Box::new(
20374 crate::expressions::ArraySlice {
20375 this: arr1.clone(),
20376 start: Some(Expression::number(1)),
20377 end: Some(pair_2),
20378 },
20379 ));
20380
20381 // e IS NOT DISTINCT FROM pair[1]
20382 let e_col = Expression::column("e");
20383 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
20384 left: e_col.clone(),
20385 right: pair_1.clone(),
20386 left_comments: vec![],
20387 operator_comments: vec![],
20388 trailing_comments: vec![],
20389 inferred_type: None,
20390 }));
20391
20392 // e -> e IS NOT DISTINCT FROM pair[1]
20393 let inner_lambda1 =
20394 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20395 parameters: vec![crate::expressions::Identifier::new("e")],
20396 body: is_not_distinct,
20397 colon: false,
20398 parameter_types: vec![],
20399 }));
20400
20401 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
20402 let inner_filter1 = Expression::Function(Box::new(Function::new(
20403 "LIST_FILTER".to_string(),
20404 vec![arr1_slice, inner_lambda1],
20405 )));
20406
20407 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
20408 let len1 = Expression::Function(Box::new(Function::new(
20409 "LENGTH".to_string(),
20410 vec![inner_filter1],
20411 )));
20412
20413 // e -> e IS NOT DISTINCT FROM pair[1]
20414 let inner_lambda2 =
20415 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20416 parameters: vec![crate::expressions::Identifier::new("e")],
20417 body: Expression::NullSafeEq(Box::new(BinaryOp {
20418 left: e_col,
20419 right: pair_1.clone(),
20420 left_comments: vec![],
20421 operator_comments: vec![],
20422 trailing_comments: vec![],
20423 inferred_type: None,
20424 })),
20425 colon: false,
20426 parameter_types: vec![],
20427 }));
20428
20429 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
20430 let inner_filter2 = Expression::Function(Box::new(Function::new(
20431 "LIST_FILTER".to_string(),
20432 vec![arr2.clone(), inner_lambda2],
20433 )));
20434
20435 // LENGTH(LIST_FILTER(arr2, ...))
20436 let len2 = Expression::Function(Box::new(Function::new(
20437 "LENGTH".to_string(),
20438 vec![inner_filter2],
20439 )));
20440
20441 // LENGTH(...) <= LENGTH(...)
20442 let cond = Expression::Paren(Box::new(Paren {
20443 this: Expression::Lte(Box::new(BinaryOp {
20444 left: len1,
20445 right: len2,
20446 left_comments: vec![],
20447 operator_comments: vec![],
20448 trailing_comments: vec![],
20449 inferred_type: None,
20450 })),
20451 trailing_comments: vec![],
20452 }));
20453
20454 // pair -> (condition)
20455 let filter_lambda =
20456 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20457 parameters: vec![crate::expressions::Identifier::new(
20458 "pair",
20459 )],
20460 body: cond,
20461 colon: false,
20462 parameter_types: vec![],
20463 }));
20464
20465 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
20466 let outer_filter = Expression::Function(Box::new(Function::new(
20467 "LIST_FILTER".to_string(),
20468 vec![list_zip, filter_lambda],
20469 )));
20470
20471 // pair -> pair[1]
20472 let transform_lambda =
20473 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20474 parameters: vec![crate::expressions::Identifier::new(
20475 "pair",
20476 )],
20477 body: pair_1,
20478 colon: false,
20479 parameter_types: vec![],
20480 }));
20481
20482 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20483 let list_transform = Expression::Function(Box::new(Function::new(
20484 "LIST_TRANSFORM".to_string(),
20485 vec![outer_filter, transform_lambda],
20486 )));
20487
20488 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
20489 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20490 // END
20491 Ok(Expression::Case(Box::new(Case {
20492 operand: None,
20493 whens: vec![(null_check, Expression::Null(Null))],
20494 else_: Some(list_transform),
20495 comments: vec![],
20496 inferred_type: None,
20497 })))
20498 }
20499 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
20500 "ARRAY_CONSTRUCT" => {
20501 Ok(Expression::Array(Box::new(crate::expressions::Array {
20502 expressions: f.args,
20503 })))
20504 }
20505 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
20506 "ARRAY"
20507 if !f.args.iter().any(|a| {
20508 matches!(a, Expression::Select(_) | Expression::Subquery(_))
20509 }) =>
20510 {
20511 match target {
20512 DialectType::DuckDB
20513 | DialectType::Snowflake
20514 | DialectType::Presto
20515 | DialectType::Trino
20516 | DialectType::Athena => {
20517 Ok(Expression::Array(Box::new(crate::expressions::Array {
20518 expressions: f.args,
20519 })))
20520 }
20521 _ => Ok(Expression::Function(f)),
20522 }
20523 }
20524 _ => Ok(Expression::Function(f)),
20525 }
20526 } else if let Expression::AggregateFunction(mut af) = e {
20527 let name = af.name.to_ascii_uppercase();
20528 match name.as_str() {
20529 "ARBITRARY" if af.args.len() == 1 => {
20530 let arg = af.args.into_iter().next().unwrap();
20531 Ok(convert_arbitrary(arg, target))
20532 }
20533 "JSON_ARRAYAGG" => {
20534 match target {
20535 DialectType::PostgreSQL => {
20536 af.name = "JSON_AGG".to_string();
20537 // Add NULLS FIRST to ORDER BY items for PostgreSQL
20538 for ordered in af.order_by.iter_mut() {
20539 if ordered.nulls_first.is_none() {
20540 ordered.nulls_first = Some(true);
20541 }
20542 }
20543 Ok(Expression::AggregateFunction(af))
20544 }
20545 _ => Ok(Expression::AggregateFunction(af)),
20546 }
20547 }
20548 _ => Ok(Expression::AggregateFunction(af)),
20549 }
20550 } else if let Expression::JSONArrayAgg(ja) = e {
20551 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
20552 match target {
20553 DialectType::PostgreSQL => {
20554 let mut order_by = Vec::new();
20555 if let Some(order_expr) = ja.order {
20556 if let Expression::OrderBy(ob) = *order_expr {
20557 for mut ordered in ob.expressions {
20558 if ordered.nulls_first.is_none() {
20559 ordered.nulls_first = Some(true);
20560 }
20561 order_by.push(ordered);
20562 }
20563 }
20564 }
20565 Ok(Expression::AggregateFunction(Box::new(
20566 crate::expressions::AggregateFunction {
20567 name: "JSON_AGG".to_string(),
20568 args: vec![*ja.this],
20569 distinct: false,
20570 filter: None,
20571 order_by,
20572 limit: None,
20573 ignore_nulls: None,
20574 inferred_type: None,
20575 },
20576 )))
20577 }
20578 _ => Ok(Expression::JSONArrayAgg(ja)),
20579 }
20580 } else if let Expression::ToNumber(tn) = e {
20581 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
20582 let arg = *tn.this;
20583 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20584 this: arg,
20585 to: crate::expressions::DataType::Double {
20586 precision: None,
20587 scale: None,
20588 },
20589 double_colon_syntax: false,
20590 trailing_comments: Vec::new(),
20591 format: None,
20592 default: None,
20593 inferred_type: None,
20594 })))
20595 } else {
20596 Ok(e)
20597 }
20598 }
20599
20600 Action::RegexpLikeToDuckDB => {
20601 if let Expression::RegexpLike(f) = e {
20602 let mut args = vec![f.this, f.pattern];
20603 if let Some(flags) = f.flags {
20604 args.push(flags);
20605 }
20606 Ok(Expression::Function(Box::new(Function::new(
20607 "REGEXP_MATCHES".to_string(),
20608 args,
20609 ))))
20610 } else {
20611 Ok(e)
20612 }
20613 }
20614 Action::EpochConvert => {
20615 if let Expression::Epoch(f) = e {
20616 let arg = f.this;
20617 let name = match target {
20618 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20619 "UNIX_TIMESTAMP"
20620 }
20621 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
20622 DialectType::BigQuery => "TIME_TO_UNIX",
20623 _ => "EPOCH",
20624 };
20625 Ok(Expression::Function(Box::new(Function::new(
20626 name.to_string(),
20627 vec![arg],
20628 ))))
20629 } else {
20630 Ok(e)
20631 }
20632 }
20633 Action::EpochMsConvert => {
20634 use crate::expressions::{BinaryOp, Cast};
20635 if let Expression::EpochMs(f) = e {
20636 let arg = f.this;
20637 match target {
20638 DialectType::Spark | DialectType::Databricks => {
20639 Ok(Expression::Function(Box::new(Function::new(
20640 "TIMESTAMP_MILLIS".to_string(),
20641 vec![arg],
20642 ))))
20643 }
20644 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20645 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
20646 ))),
20647 DialectType::Presto | DialectType::Trino => {
20648 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
20649 let cast_arg = Expression::Cast(Box::new(Cast {
20650 this: arg,
20651 to: DataType::Double {
20652 precision: None,
20653 scale: None,
20654 },
20655 trailing_comments: Vec::new(),
20656 double_colon_syntax: false,
20657 format: None,
20658 default: None,
20659 inferred_type: None,
20660 }));
20661 let div = Expression::Div(Box::new(BinaryOp::new(
20662 cast_arg,
20663 Expression::Function(Box::new(Function::new(
20664 "POW".to_string(),
20665 vec![Expression::number(10), Expression::number(3)],
20666 ))),
20667 )));
20668 Ok(Expression::Function(Box::new(Function::new(
20669 "FROM_UNIXTIME".to_string(),
20670 vec![div],
20671 ))))
20672 }
20673 DialectType::MySQL => {
20674 // FROM_UNIXTIME(x / POWER(10, 3))
20675 let div = Expression::Div(Box::new(BinaryOp::new(
20676 arg,
20677 Expression::Function(Box::new(Function::new(
20678 "POWER".to_string(),
20679 vec![Expression::number(10), Expression::number(3)],
20680 ))),
20681 )));
20682 Ok(Expression::Function(Box::new(Function::new(
20683 "FROM_UNIXTIME".to_string(),
20684 vec![div],
20685 ))))
20686 }
20687 DialectType::PostgreSQL | DialectType::Redshift => {
20688 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
20689 let cast_arg = Expression::Cast(Box::new(Cast {
20690 this: arg,
20691 to: DataType::Custom {
20692 name: "DOUBLE PRECISION".to_string(),
20693 },
20694 trailing_comments: Vec::new(),
20695 double_colon_syntax: false,
20696 format: None,
20697 default: None,
20698 inferred_type: None,
20699 }));
20700 let div = Expression::Div(Box::new(BinaryOp::new(
20701 cast_arg,
20702 Expression::Function(Box::new(Function::new(
20703 "POWER".to_string(),
20704 vec![Expression::number(10), Expression::number(3)],
20705 ))),
20706 )));
20707 Ok(Expression::Function(Box::new(Function::new(
20708 "TO_TIMESTAMP".to_string(),
20709 vec![div],
20710 ))))
20711 }
20712 DialectType::ClickHouse => {
20713 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
20714 let cast_arg = Expression::Cast(Box::new(Cast {
20715 this: arg,
20716 to: DataType::Nullable {
20717 inner: Box::new(DataType::BigInt { length: None }),
20718 },
20719 trailing_comments: Vec::new(),
20720 double_colon_syntax: false,
20721 format: None,
20722 default: None,
20723 inferred_type: None,
20724 }));
20725 Ok(Expression::Function(Box::new(Function::new(
20726 "fromUnixTimestamp64Milli".to_string(),
20727 vec![cast_arg],
20728 ))))
20729 }
20730 _ => Ok(Expression::Function(Box::new(Function::new(
20731 "EPOCH_MS".to_string(),
20732 vec![arg],
20733 )))),
20734 }
20735 } else {
20736 Ok(e)
20737 }
20738 }
20739 Action::TSQLTypeNormalize => {
20740 if let Expression::DataType(dt) = e {
20741 let new_dt = match &dt {
20742 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
20743 DataType::Decimal {
20744 precision: Some(15),
20745 scale: Some(4),
20746 }
20747 }
20748 DataType::Custom { name }
20749 if name.eq_ignore_ascii_case("SMALLMONEY") =>
20750 {
20751 DataType::Decimal {
20752 precision: Some(6),
20753 scale: Some(4),
20754 }
20755 }
20756 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
20757 DataType::Timestamp {
20758 timezone: false,
20759 precision: None,
20760 }
20761 }
20762 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
20763 DataType::Float {
20764 precision: None,
20765 scale: None,
20766 real_spelling: false,
20767 }
20768 }
20769 DataType::Float {
20770 real_spelling: true,
20771 ..
20772 } => DataType::Float {
20773 precision: None,
20774 scale: None,
20775 real_spelling: false,
20776 },
20777 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
20778 DataType::Custom {
20779 name: "BLOB".to_string(),
20780 }
20781 }
20782 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
20783 DataType::Boolean
20784 }
20785 DataType::Custom { name }
20786 if name.eq_ignore_ascii_case("ROWVERSION") =>
20787 {
20788 DataType::Custom {
20789 name: "BINARY".to_string(),
20790 }
20791 }
20792 DataType::Custom { name }
20793 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
20794 {
20795 match target {
20796 DialectType::Spark
20797 | DialectType::Databricks
20798 | DialectType::Hive => DataType::Custom {
20799 name: "STRING".to_string(),
20800 },
20801 _ => DataType::VarChar {
20802 length: Some(36),
20803 parenthesized_length: true,
20804 },
20805 }
20806 }
20807 DataType::Custom { name }
20808 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
20809 {
20810 match target {
20811 DialectType::Spark
20812 | DialectType::Databricks
20813 | DialectType::Hive => DataType::Timestamp {
20814 timezone: false,
20815 precision: None,
20816 },
20817 _ => DataType::Timestamp {
20818 timezone: true,
20819 precision: None,
20820 },
20821 }
20822 }
20823 DataType::Custom { ref name }
20824 if name.len() >= 10
20825 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
20826 {
20827 // DATETIME2(n) -> TIMESTAMP
20828 DataType::Timestamp {
20829 timezone: false,
20830 precision: None,
20831 }
20832 }
20833 DataType::Custom { ref name }
20834 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
20835 {
20836 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
20837 match target {
20838 DialectType::Spark
20839 | DialectType::Databricks
20840 | DialectType::Hive => DataType::Timestamp {
20841 timezone: false,
20842 precision: None,
20843 },
20844 _ => return Ok(Expression::DataType(dt)),
20845 }
20846 }
20847 DataType::Custom { ref name }
20848 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
20849 {
20850 // Parse NUMERIC(p,s) back to Decimal(p,s)
20851 let upper = name.to_ascii_uppercase();
20852 if let Some(inner) = upper
20853 .strip_prefix("NUMERIC(")
20854 .and_then(|s| s.strip_suffix(')'))
20855 {
20856 let parts: Vec<&str> = inner.split(',').collect();
20857 let precision =
20858 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
20859 let scale =
20860 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
20861 DataType::Decimal { precision, scale }
20862 } else if upper == "NUMERIC" {
20863 DataType::Decimal {
20864 precision: None,
20865 scale: None,
20866 }
20867 } else {
20868 return Ok(Expression::DataType(dt));
20869 }
20870 }
20871 DataType::Float {
20872 precision: Some(p), ..
20873 } => {
20874 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
20875 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
20876 let boundary = match target {
20877 DialectType::Hive
20878 | DialectType::Spark
20879 | DialectType::Databricks => 32,
20880 _ => 24,
20881 };
20882 if *p <= boundary {
20883 DataType::Float {
20884 precision: None,
20885 scale: None,
20886 real_spelling: false,
20887 }
20888 } else {
20889 DataType::Double {
20890 precision: None,
20891 scale: None,
20892 }
20893 }
20894 }
20895 DataType::TinyInt { .. } => match target {
20896 DialectType::DuckDB => DataType::Custom {
20897 name: "UTINYINT".to_string(),
20898 },
20899 DialectType::Hive
20900 | DialectType::Spark
20901 | DialectType::Databricks => DataType::SmallInt { length: None },
20902 _ => return Ok(Expression::DataType(dt)),
20903 },
20904 // INTEGER -> INT for Spark/Databricks
20905 DataType::Int {
20906 length,
20907 integer_spelling: true,
20908 } => DataType::Int {
20909 length: *length,
20910 integer_spelling: false,
20911 },
20912 _ => return Ok(Expression::DataType(dt)),
20913 };
20914 Ok(Expression::DataType(new_dt))
20915 } else {
20916 Ok(e)
20917 }
20918 }
20919 Action::MySQLSafeDivide => {
20920 use crate::expressions::{BinaryOp, Cast};
20921 if let Expression::Div(op) = e {
20922 let left = op.left;
20923 let right = op.right;
20924 // For SQLite: CAST left as REAL but NO NULLIF wrapping
20925 if matches!(target, DialectType::SQLite) {
20926 let new_left = Expression::Cast(Box::new(Cast {
20927 this: left,
20928 to: DataType::Float {
20929 precision: None,
20930 scale: None,
20931 real_spelling: true,
20932 },
20933 trailing_comments: Vec::new(),
20934 double_colon_syntax: false,
20935 format: None,
20936 default: None,
20937 inferred_type: None,
20938 }));
20939 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
20940 }
20941 // Wrap right in NULLIF(right, 0)
20942 let nullif_right = Expression::Function(Box::new(Function::new(
20943 "NULLIF".to_string(),
20944 vec![right, Expression::number(0)],
20945 )));
20946 // For some dialects, also CAST the left side
20947 let new_left = match target {
20948 DialectType::PostgreSQL
20949 | DialectType::Redshift
20950 | DialectType::Teradata
20951 | DialectType::Materialize
20952 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
20953 this: left,
20954 to: DataType::Custom {
20955 name: "DOUBLE PRECISION".to_string(),
20956 },
20957 trailing_comments: Vec::new(),
20958 double_colon_syntax: false,
20959 format: None,
20960 default: None,
20961 inferred_type: None,
20962 })),
20963 DialectType::Drill
20964 | DialectType::Trino
20965 | DialectType::Presto
20966 | DialectType::Athena => Expression::Cast(Box::new(Cast {
20967 this: left,
20968 to: DataType::Double {
20969 precision: None,
20970 scale: None,
20971 },
20972 trailing_comments: Vec::new(),
20973 double_colon_syntax: false,
20974 format: None,
20975 default: None,
20976 inferred_type: None,
20977 })),
20978 DialectType::TSQL => Expression::Cast(Box::new(Cast {
20979 this: left,
20980 to: DataType::Float {
20981 precision: None,
20982 scale: None,
20983 real_spelling: false,
20984 },
20985 trailing_comments: Vec::new(),
20986 double_colon_syntax: false,
20987 format: None,
20988 default: None,
20989 inferred_type: None,
20990 })),
20991 _ => left,
20992 };
20993 Ok(Expression::Div(Box::new(BinaryOp::new(
20994 new_left,
20995 nullif_right,
20996 ))))
20997 } else {
20998 Ok(e)
20999 }
21000 }
21001 Action::AlterTableRenameStripSchema => {
21002 if let Expression::AlterTable(mut at) = e {
21003 if let Some(crate::expressions::AlterTableAction::RenameTable(
21004 ref mut new_tbl,
21005 )) = at.actions.first_mut()
21006 {
21007 new_tbl.schema = None;
21008 new_tbl.catalog = None;
21009 }
21010 Ok(Expression::AlterTable(at))
21011 } else {
21012 Ok(e)
21013 }
21014 }
21015 Action::NullsOrdering => {
21016 // Fill in the source dialect's implied null ordering default.
21017 // This makes implicit null ordering explicit so the target generator
21018 // can correctly strip or keep it.
21019 //
21020 // Dialect null ordering categories:
21021 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
21022 // ASC -> NULLS LAST, DESC -> NULLS FIRST
21023 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
21024 // ASC -> NULLS FIRST, DESC -> NULLS LAST
21025 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
21026 // NULLS LAST always (both ASC and DESC)
21027 if let Expression::Ordered(mut o) = e {
21028 let is_asc = !o.desc;
21029
21030 let is_source_nulls_large = matches!(
21031 source,
21032 DialectType::Oracle
21033 | DialectType::PostgreSQL
21034 | DialectType::Redshift
21035 | DialectType::Snowflake
21036 );
21037 let is_source_nulls_last = matches!(
21038 source,
21039 DialectType::DuckDB
21040 | DialectType::Presto
21041 | DialectType::Trino
21042 | DialectType::Dremio
21043 | DialectType::Athena
21044 | DialectType::ClickHouse
21045 | DialectType::Drill
21046 | DialectType::Exasol
21047 | DialectType::DataFusion
21048 );
21049
21050 // Determine target category to check if default matches
21051 let is_target_nulls_large = matches!(
21052 target,
21053 DialectType::Oracle
21054 | DialectType::PostgreSQL
21055 | DialectType::Redshift
21056 | DialectType::Snowflake
21057 );
21058 let is_target_nulls_last = matches!(
21059 target,
21060 DialectType::DuckDB
21061 | DialectType::Presto
21062 | DialectType::Trino
21063 | DialectType::Dremio
21064 | DialectType::Athena
21065 | DialectType::ClickHouse
21066 | DialectType::Drill
21067 | DialectType::Exasol
21068 | DialectType::DataFusion
21069 );
21070
21071 // Compute the implied nulls_first for source
21072 let source_nulls_first = if is_source_nulls_large {
21073 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
21074 } else if is_source_nulls_last {
21075 false // NULLS LAST always
21076 } else {
21077 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
21078 };
21079
21080 // Compute the target's default
21081 let target_nulls_first = if is_target_nulls_large {
21082 !is_asc
21083 } else if is_target_nulls_last {
21084 false
21085 } else {
21086 is_asc
21087 };
21088
21089 // Only add explicit nulls ordering if source and target defaults differ
21090 if source_nulls_first != target_nulls_first {
21091 o.nulls_first = Some(source_nulls_first);
21092 }
21093 // If they match, leave nulls_first as None so the generator won't output it
21094
21095 Ok(Expression::Ordered(o))
21096 } else {
21097 Ok(e)
21098 }
21099 }
21100 Action::StringAggConvert => {
21101 match e {
21102 Expression::WithinGroup(wg) => {
21103 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
21104 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
21105 let (x_opt, sep_opt, distinct) = match wg.this {
21106 Expression::AggregateFunction(ref af)
21107 if af.name.eq_ignore_ascii_case("STRING_AGG")
21108 && af.args.len() >= 2 =>
21109 {
21110 (
21111 Some(af.args[0].clone()),
21112 Some(af.args[1].clone()),
21113 af.distinct,
21114 )
21115 }
21116 Expression::Function(ref f)
21117 if f.name.eq_ignore_ascii_case("STRING_AGG")
21118 && f.args.len() >= 2 =>
21119 {
21120 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
21121 }
21122 Expression::StringAgg(ref sa) => {
21123 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
21124 }
21125 _ => (None, None, false),
21126 };
21127 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
21128 let order_by = wg.order_by;
21129
21130 match target {
21131 DialectType::TSQL | DialectType::Fabric => {
21132 // Keep as WithinGroup(StringAgg) for TSQL
21133 Ok(Expression::WithinGroup(Box::new(
21134 crate::expressions::WithinGroup {
21135 this: Expression::StringAgg(Box::new(
21136 crate::expressions::StringAggFunc {
21137 this: x,
21138 separator: Some(sep),
21139 order_by: None, // order_by goes in WithinGroup, not StringAgg
21140 distinct,
21141 filter: None,
21142 limit: None,
21143 inferred_type: None,
21144 },
21145 )),
21146 order_by,
21147 },
21148 )))
21149 }
21150 DialectType::MySQL
21151 | DialectType::SingleStore
21152 | DialectType::Doris
21153 | DialectType::StarRocks => {
21154 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
21155 Ok(Expression::GroupConcat(Box::new(
21156 crate::expressions::GroupConcatFunc {
21157 this: x,
21158 separator: Some(sep),
21159 order_by: Some(order_by),
21160 distinct,
21161 filter: None,
21162 inferred_type: None,
21163 },
21164 )))
21165 }
21166 DialectType::SQLite => {
21167 // GROUP_CONCAT(x, sep) - no ORDER BY support
21168 Ok(Expression::GroupConcat(Box::new(
21169 crate::expressions::GroupConcatFunc {
21170 this: x,
21171 separator: Some(sep),
21172 order_by: None,
21173 distinct,
21174 filter: None,
21175 inferred_type: None,
21176 },
21177 )))
21178 }
21179 DialectType::PostgreSQL | DialectType::Redshift => {
21180 // STRING_AGG(x, sep ORDER BY z)
21181 Ok(Expression::StringAgg(Box::new(
21182 crate::expressions::StringAggFunc {
21183 this: x,
21184 separator: Some(sep),
21185 order_by: Some(order_by),
21186 distinct,
21187 filter: None,
21188 limit: None,
21189 inferred_type: None,
21190 },
21191 )))
21192 }
21193 _ => {
21194 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
21195 Ok(Expression::StringAgg(Box::new(
21196 crate::expressions::StringAggFunc {
21197 this: x,
21198 separator: Some(sep),
21199 order_by: Some(order_by),
21200 distinct,
21201 filter: None,
21202 limit: None,
21203 inferred_type: None,
21204 },
21205 )))
21206 }
21207 }
21208 } else {
21209 Ok(Expression::WithinGroup(wg))
21210 }
21211 }
21212 Expression::StringAgg(sa) => {
21213 match target {
21214 DialectType::MySQL
21215 | DialectType::SingleStore
21216 | DialectType::Doris
21217 | DialectType::StarRocks => {
21218 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
21219 Ok(Expression::GroupConcat(Box::new(
21220 crate::expressions::GroupConcatFunc {
21221 this: sa.this,
21222 separator: sa.separator,
21223 order_by: sa.order_by,
21224 distinct: sa.distinct,
21225 filter: sa.filter,
21226 inferred_type: None,
21227 },
21228 )))
21229 }
21230 DialectType::SQLite => {
21231 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
21232 Ok(Expression::GroupConcat(Box::new(
21233 crate::expressions::GroupConcatFunc {
21234 this: sa.this,
21235 separator: sa.separator,
21236 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21237 distinct: sa.distinct,
21238 filter: sa.filter,
21239 inferred_type: None,
21240 },
21241 )))
21242 }
21243 DialectType::Spark | DialectType::Databricks => {
21244 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
21245 Ok(Expression::ListAgg(Box::new(
21246 crate::expressions::ListAggFunc {
21247 this: sa.this,
21248 separator: sa.separator,
21249 on_overflow: None,
21250 order_by: sa.order_by,
21251 distinct: sa.distinct,
21252 filter: None,
21253 inferred_type: None,
21254 },
21255 )))
21256 }
21257 _ => Ok(Expression::StringAgg(sa)),
21258 }
21259 }
21260 _ => Ok(e),
21261 }
21262 }
21263 Action::GroupConcatConvert => {
21264 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
21265 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
21266 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
21267 if let Expression::Function(ref f) = expr {
21268 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21269 let mut result = f.args[0].clone();
21270 for arg in &f.args[1..] {
21271 result = Expression::Concat(Box::new(BinaryOp {
21272 left: result,
21273 right: arg.clone(),
21274 left_comments: vec![],
21275 operator_comments: vec![],
21276 trailing_comments: vec![],
21277 inferred_type: None,
21278 }));
21279 }
21280 return result;
21281 }
21282 }
21283 expr
21284 }
21285 fn expand_concat_to_plus(expr: Expression) -> Expression {
21286 if let Expression::Function(ref f) = expr {
21287 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21288 let mut result = f.args[0].clone();
21289 for arg in &f.args[1..] {
21290 result = Expression::Add(Box::new(BinaryOp {
21291 left: result,
21292 right: arg.clone(),
21293 left_comments: vec![],
21294 operator_comments: vec![],
21295 trailing_comments: vec![],
21296 inferred_type: None,
21297 }));
21298 }
21299 return result;
21300 }
21301 }
21302 expr
21303 }
21304 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
21305 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
21306 if let Expression::Function(ref f) = expr {
21307 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21308 let new_args: Vec<Expression> = f
21309 .args
21310 .iter()
21311 .map(|arg| {
21312 Expression::Cast(Box::new(crate::expressions::Cast {
21313 this: arg.clone(),
21314 to: crate::expressions::DataType::VarChar {
21315 length: None,
21316 parenthesized_length: false,
21317 },
21318 trailing_comments: Vec::new(),
21319 double_colon_syntax: false,
21320 format: None,
21321 default: None,
21322 inferred_type: None,
21323 }))
21324 })
21325 .collect();
21326 return Expression::Function(Box::new(
21327 crate::expressions::Function::new(
21328 "CONCAT".to_string(),
21329 new_args,
21330 ),
21331 ));
21332 }
21333 }
21334 expr
21335 }
21336 if let Expression::GroupConcat(gc) = e {
21337 match target {
21338 DialectType::Presto => {
21339 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
21340 let sep = gc.separator.unwrap_or(Expression::string(","));
21341 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21342 let this = wrap_concat_args_in_varchar_cast(gc.this);
21343 let array_agg =
21344 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
21345 this,
21346 distinct: gc.distinct,
21347 filter: gc.filter,
21348 order_by: gc.order_by.unwrap_or_default(),
21349 name: None,
21350 ignore_nulls: None,
21351 having_max: None,
21352 limit: None,
21353 inferred_type: None,
21354 }));
21355 Ok(Expression::ArrayJoin(Box::new(
21356 crate::expressions::ArrayJoinFunc {
21357 this: array_agg,
21358 separator: sep,
21359 null_replacement: None,
21360 },
21361 )))
21362 }
21363 DialectType::Trino => {
21364 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21365 let sep = gc.separator.unwrap_or(Expression::string(","));
21366 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21367 let this = wrap_concat_args_in_varchar_cast(gc.this);
21368 Ok(Expression::ListAgg(Box::new(
21369 crate::expressions::ListAggFunc {
21370 this,
21371 separator: Some(sep),
21372 on_overflow: None,
21373 order_by: gc.order_by,
21374 distinct: gc.distinct,
21375 filter: gc.filter,
21376 inferred_type: None,
21377 },
21378 )))
21379 }
21380 DialectType::PostgreSQL
21381 | DialectType::Redshift
21382 | DialectType::Snowflake
21383 | DialectType::DuckDB
21384 | DialectType::Hive
21385 | DialectType::ClickHouse => {
21386 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
21387 let sep = gc.separator.unwrap_or(Expression::string(","));
21388 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
21389 let this = expand_concat_to_dpipe(gc.this);
21390 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
21391 let order_by = if target == DialectType::PostgreSQL {
21392 gc.order_by.map(|ords| {
21393 ords.into_iter()
21394 .map(|mut o| {
21395 if o.nulls_first.is_none() {
21396 if o.desc {
21397 o.nulls_first = Some(false);
21398 // NULLS LAST
21399 } else {
21400 o.nulls_first = Some(true);
21401 // NULLS FIRST
21402 }
21403 }
21404 o
21405 })
21406 .collect()
21407 })
21408 } else {
21409 gc.order_by
21410 };
21411 Ok(Expression::StringAgg(Box::new(
21412 crate::expressions::StringAggFunc {
21413 this,
21414 separator: Some(sep),
21415 order_by,
21416 distinct: gc.distinct,
21417 filter: gc.filter,
21418 limit: None,
21419 inferred_type: None,
21420 },
21421 )))
21422 }
21423 DialectType::TSQL => {
21424 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
21425 // TSQL doesn't support DISTINCT in STRING_AGG
21426 let sep = gc.separator.unwrap_or(Expression::string(","));
21427 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
21428 let this = expand_concat_to_plus(gc.this);
21429 Ok(Expression::StringAgg(Box::new(
21430 crate::expressions::StringAggFunc {
21431 this,
21432 separator: Some(sep),
21433 order_by: gc.order_by,
21434 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
21435 filter: gc.filter,
21436 limit: None,
21437 inferred_type: None,
21438 },
21439 )))
21440 }
21441 DialectType::SQLite => {
21442 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
21443 // SQLite GROUP_CONCAT doesn't support ORDER BY
21444 // Expand CONCAT(a,b,c) -> a || b || c
21445 let this = expand_concat_to_dpipe(gc.this);
21446 Ok(Expression::GroupConcat(Box::new(
21447 crate::expressions::GroupConcatFunc {
21448 this,
21449 separator: gc.separator,
21450 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21451 distinct: gc.distinct,
21452 filter: gc.filter,
21453 inferred_type: None,
21454 },
21455 )))
21456 }
21457 DialectType::Spark | DialectType::Databricks => {
21458 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21459 let sep = gc.separator.unwrap_or(Expression::string(","));
21460 Ok(Expression::ListAgg(Box::new(
21461 crate::expressions::ListAggFunc {
21462 this: gc.this,
21463 separator: Some(sep),
21464 on_overflow: None,
21465 order_by: gc.order_by,
21466 distinct: gc.distinct,
21467 filter: None,
21468 inferred_type: None,
21469 },
21470 )))
21471 }
21472 DialectType::MySQL
21473 | DialectType::SingleStore
21474 | DialectType::StarRocks => {
21475 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
21476 if gc.separator.is_none() {
21477 let mut gc = gc;
21478 gc.separator = Some(Expression::string(","));
21479 Ok(Expression::GroupConcat(gc))
21480 } else {
21481 Ok(Expression::GroupConcat(gc))
21482 }
21483 }
21484 _ => Ok(Expression::GroupConcat(gc)),
21485 }
21486 } else {
21487 Ok(e)
21488 }
21489 }
21490 Action::TempTableHash => {
21491 match e {
21492 Expression::CreateTable(mut ct) => {
21493 // TSQL #table -> TEMPORARY TABLE with # stripped from name
21494 let name = &ct.name.name.name;
21495 if name.starts_with('#') {
21496 ct.name.name.name = name.trim_start_matches('#').to_string();
21497 }
21498 // Set temporary flag
21499 ct.temporary = true;
21500 Ok(Expression::CreateTable(ct))
21501 }
21502 Expression::Table(mut tr) => {
21503 // Strip # from table references
21504 let name = &tr.name.name;
21505 if name.starts_with('#') {
21506 tr.name.name = name.trim_start_matches('#').to_string();
21507 }
21508 Ok(Expression::Table(tr))
21509 }
21510 Expression::DropTable(mut dt) => {
21511 // Strip # from DROP TABLE names
21512 for table_ref in &mut dt.names {
21513 if table_ref.name.name.starts_with('#') {
21514 table_ref.name.name =
21515 table_ref.name.name.trim_start_matches('#').to_string();
21516 }
21517 }
21518 Ok(Expression::DropTable(dt))
21519 }
21520 _ => Ok(e),
21521 }
21522 }
21523 Action::NvlClearOriginal => {
21524 if let Expression::Nvl(mut f) = e {
21525 f.original_name = None;
21526 Ok(Expression::Nvl(f))
21527 } else {
21528 Ok(e)
21529 }
21530 }
21531 Action::HiveCastToTryCast => {
21532 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
21533 if let Expression::Cast(mut c) = e {
21534 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
21535 // (Spark's TIMESTAMP is always timezone-aware)
21536 if matches!(target, DialectType::DuckDB)
21537 && matches!(source, DialectType::Spark | DialectType::Databricks)
21538 && matches!(
21539 c.to,
21540 DataType::Timestamp {
21541 timezone: false,
21542 ..
21543 }
21544 )
21545 {
21546 c.to = DataType::Custom {
21547 name: "TIMESTAMPTZ".to_string(),
21548 };
21549 }
21550 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
21551 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
21552 if matches!(target, DialectType::Databricks | DialectType::Spark)
21553 && matches!(
21554 source,
21555 DialectType::Spark | DialectType::Databricks | DialectType::Hive
21556 )
21557 && Self::has_varchar_char_type(&c.to)
21558 {
21559 c.to = Self::normalize_varchar_to_string(c.to);
21560 }
21561 Ok(Expression::TryCast(c))
21562 } else {
21563 Ok(e)
21564 }
21565 }
21566 Action::XorExpand => {
21567 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
21568 // Snowflake: use BOOLXOR(a, b) instead
21569 if let Expression::Xor(xor) = e {
21570 // Collect all XOR operands
21571 let mut operands = Vec::new();
21572 if let Some(this) = xor.this {
21573 operands.push(*this);
21574 }
21575 if let Some(expr) = xor.expression {
21576 operands.push(*expr);
21577 }
21578 operands.extend(xor.expressions);
21579
21580 // Snowflake: use BOOLXOR(a, b)
21581 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
21582 let a = operands.remove(0);
21583 let b = operands.remove(0);
21584 return Ok(Expression::Function(Box::new(Function::new(
21585 "BOOLXOR".to_string(),
21586 vec![a, b],
21587 ))));
21588 }
21589
21590 // Helper to build (a AND NOT b) OR (NOT a AND b)
21591 let make_xor = |a: Expression, b: Expression| -> Expression {
21592 let not_b = Expression::Not(Box::new(
21593 crate::expressions::UnaryOp::new(b.clone()),
21594 ));
21595 let not_a = Expression::Not(Box::new(
21596 crate::expressions::UnaryOp::new(a.clone()),
21597 ));
21598 let left_and = Expression::And(Box::new(BinaryOp {
21599 left: a,
21600 right: Expression::Paren(Box::new(Paren {
21601 this: not_b,
21602 trailing_comments: Vec::new(),
21603 })),
21604 left_comments: Vec::new(),
21605 operator_comments: Vec::new(),
21606 trailing_comments: Vec::new(),
21607 inferred_type: None,
21608 }));
21609 let right_and = Expression::And(Box::new(BinaryOp {
21610 left: Expression::Paren(Box::new(Paren {
21611 this: not_a,
21612 trailing_comments: Vec::new(),
21613 })),
21614 right: b,
21615 left_comments: Vec::new(),
21616 operator_comments: Vec::new(),
21617 trailing_comments: Vec::new(),
21618 inferred_type: None,
21619 }));
21620 Expression::Or(Box::new(BinaryOp {
21621 left: Expression::Paren(Box::new(Paren {
21622 this: left_and,
21623 trailing_comments: Vec::new(),
21624 })),
21625 right: Expression::Paren(Box::new(Paren {
21626 this: right_and,
21627 trailing_comments: Vec::new(),
21628 })),
21629 left_comments: Vec::new(),
21630 operator_comments: Vec::new(),
21631 trailing_comments: Vec::new(),
21632 inferred_type: None,
21633 }))
21634 };
21635
21636 if operands.len() >= 2 {
21637 let mut result = make_xor(operands.remove(0), operands.remove(0));
21638 for operand in operands {
21639 result = make_xor(result, operand);
21640 }
21641 Ok(result)
21642 } else if operands.len() == 1 {
21643 Ok(operands.remove(0))
21644 } else {
21645 // No operands - return FALSE (shouldn't happen)
21646 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
21647 value: false,
21648 }))
21649 }
21650 } else {
21651 Ok(e)
21652 }
21653 }
21654 Action::DatePartUnquote => {
21655 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
21656 // Convert the quoted string first arg to a bare Column/Identifier
21657 if let Expression::Function(mut f) = e {
21658 if let Some(Expression::Literal(lit)) = f.args.first() {
21659 if let crate::expressions::Literal::String(s) = lit.as_ref() {
21660 let bare_name = s.to_ascii_lowercase();
21661 f.args[0] =
21662 Expression::Column(Box::new(crate::expressions::Column {
21663 name: Identifier::new(bare_name),
21664 table: None,
21665 join_mark: false,
21666 trailing_comments: Vec::new(),
21667 span: None,
21668 inferred_type: None,
21669 }));
21670 }
21671 }
21672 Ok(Expression::Function(f))
21673 } else {
21674 Ok(e)
21675 }
21676 }
21677 Action::ArrayLengthConvert => {
21678 // Extract the argument from the expression
21679 let arg = match e {
21680 Expression::Cardinality(ref f) => f.this.clone(),
21681 Expression::ArrayLength(ref f) => f.this.clone(),
21682 Expression::ArraySize(ref f) => f.this.clone(),
21683 _ => return Ok(e),
21684 };
21685 match target {
21686 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21687 Ok(Expression::Function(Box::new(Function::new(
21688 "SIZE".to_string(),
21689 vec![arg],
21690 ))))
21691 }
21692 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21693 Ok(Expression::Cardinality(Box::new(
21694 crate::expressions::UnaryFunc::new(arg),
21695 )))
21696 }
21697 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
21698 crate::expressions::UnaryFunc::new(arg),
21699 ))),
21700 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
21701 crate::expressions::UnaryFunc::new(arg),
21702 ))),
21703 DialectType::PostgreSQL | DialectType::Redshift => {
21704 // PostgreSQL ARRAY_LENGTH requires dimension arg
21705 Ok(Expression::Function(Box::new(Function::new(
21706 "ARRAY_LENGTH".to_string(),
21707 vec![arg, Expression::number(1)],
21708 ))))
21709 }
21710 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
21711 crate::expressions::UnaryFunc::new(arg),
21712 ))),
21713 _ => Ok(e), // Keep original
21714 }
21715 }
21716
21717 Action::JsonExtractToArrow => {
21718 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
21719 if let Expression::JsonExtract(mut f) = e {
21720 f.arrow_syntax = true;
21721 // Transform path: convert bracket notation to dot notation
21722 // SQLite strips wildcards, DuckDB preserves them
21723 if let Expression::Literal(ref lit) = f.path {
21724 if let Literal::String(ref s) = lit.as_ref() {
21725 let mut transformed = s.clone();
21726 if matches!(target, DialectType::SQLite) {
21727 transformed = Self::strip_json_wildcards(&transformed);
21728 }
21729 transformed = Self::bracket_to_dot_notation(&transformed);
21730 if transformed != *s {
21731 f.path = Expression::string(&transformed);
21732 }
21733 }
21734 }
21735 Ok(Expression::JsonExtract(f))
21736 } else {
21737 Ok(e)
21738 }
21739 }
21740
21741 Action::JsonExtractToGetJsonObject => {
21742 if let Expression::JsonExtract(f) = e {
21743 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21744 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
21745 // Use proper decomposition that handles brackets
21746 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
21747 if let Literal::String(ref s) = lit.as_ref() {
21748 let parts = Self::decompose_json_path(s);
21749 parts.into_iter().map(|k| Expression::string(&k)).collect()
21750 } else {
21751 vec![]
21752 }
21753 } else {
21754 vec![f.path]
21755 };
21756 let func_name = if matches!(target, DialectType::Redshift) {
21757 "JSON_EXTRACT_PATH_TEXT"
21758 } else {
21759 "JSON_EXTRACT_PATH"
21760 };
21761 let mut args = vec![f.this];
21762 args.extend(keys);
21763 Ok(Expression::Function(Box::new(Function::new(
21764 func_name.to_string(),
21765 args,
21766 ))))
21767 } else {
21768 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21769 // Convert bracket double quotes to single quotes
21770 let path = if let Expression::Literal(ref lit) = f.path {
21771 if let Literal::String(ref s) = lit.as_ref() {
21772 let normalized = Self::bracket_to_single_quotes(s);
21773 if normalized != *s {
21774 Expression::string(&normalized)
21775 } else {
21776 f.path.clone()
21777 }
21778 } else {
21779 f.path.clone()
21780 }
21781 } else {
21782 f.path.clone()
21783 };
21784 Ok(Expression::Function(Box::new(Function::new(
21785 "GET_JSON_OBJECT".to_string(),
21786 vec![f.this, path],
21787 ))))
21788 }
21789 } else {
21790 Ok(e)
21791 }
21792 }
21793
21794 Action::JsonExtractScalarToGetJsonObject => {
21795 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21796 if let Expression::JsonExtractScalar(f) = e {
21797 Ok(Expression::Function(Box::new(Function::new(
21798 "GET_JSON_OBJECT".to_string(),
21799 vec![f.this, f.path],
21800 ))))
21801 } else {
21802 Ok(e)
21803 }
21804 }
21805
21806 Action::JsonExtractToTsql => {
21807 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
21808 let (this, path) = match e {
21809 Expression::JsonExtract(f) => (f.this, f.path),
21810 Expression::JsonExtractScalar(f) => (f.this, f.path),
21811 _ => return Ok(e),
21812 };
21813 // Transform path: strip wildcards, convert bracket notation to dot notation
21814 let transformed_path = if let Expression::Literal(ref lit) = path {
21815 if let Literal::String(ref s) = lit.as_ref() {
21816 let stripped = Self::strip_json_wildcards(s);
21817 let dotted = Self::bracket_to_dot_notation(&stripped);
21818 Expression::string(&dotted)
21819 } else {
21820 path.clone()
21821 }
21822 } else {
21823 path
21824 };
21825 let json_query = Expression::Function(Box::new(Function::new(
21826 "JSON_QUERY".to_string(),
21827 vec![this.clone(), transformed_path.clone()],
21828 )));
21829 let json_value = Expression::Function(Box::new(Function::new(
21830 "JSON_VALUE".to_string(),
21831 vec![this, transformed_path],
21832 )));
21833 Ok(Expression::Function(Box::new(Function::new(
21834 "ISNULL".to_string(),
21835 vec![json_query, json_value],
21836 ))))
21837 }
21838
21839 Action::JsonExtractToClickHouse => {
21840 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
21841 let (this, path) = match e {
21842 Expression::JsonExtract(f) => (f.this, f.path),
21843 Expression::JsonExtractScalar(f) => (f.this, f.path),
21844 _ => return Ok(e),
21845 };
21846 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
21847 if let Literal::String(ref s) = lit.as_ref() {
21848 let parts = Self::decompose_json_path(s);
21849 let mut result = vec![this];
21850 for part in parts {
21851 // ClickHouse uses 1-based integer indices for array access
21852 if let Ok(idx) = part.parse::<i64>() {
21853 result.push(Expression::number(idx + 1));
21854 } else {
21855 result.push(Expression::string(&part));
21856 }
21857 }
21858 result
21859 } else {
21860 vec![]
21861 }
21862 } else {
21863 vec![this, path]
21864 };
21865 Ok(Expression::Function(Box::new(Function::new(
21866 "JSONExtractString".to_string(),
21867 args,
21868 ))))
21869 }
21870
21871 Action::JsonExtractScalarConvert => {
21872 // JSON_EXTRACT_SCALAR -> target-specific
21873 if let Expression::JsonExtractScalar(f) = e {
21874 match target {
21875 DialectType::PostgreSQL | DialectType::Redshift => {
21876 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
21877 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
21878 {
21879 if let Literal::String(ref s) = lit.as_ref() {
21880 let parts = Self::decompose_json_path(s);
21881 parts.into_iter().map(|k| Expression::string(&k)).collect()
21882 } else {
21883 vec![]
21884 }
21885 } else {
21886 vec![f.path]
21887 };
21888 let mut args = vec![f.this];
21889 args.extend(keys);
21890 Ok(Expression::Function(Box::new(Function::new(
21891 "JSON_EXTRACT_PATH_TEXT".to_string(),
21892 args,
21893 ))))
21894 }
21895 DialectType::Snowflake => {
21896 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
21897 let stripped_path = if let Expression::Literal(ref lit) = f.path {
21898 if let Literal::String(ref s) = lit.as_ref() {
21899 let stripped = Self::strip_json_dollar_prefix(s);
21900 Expression::string(&stripped)
21901 } else {
21902 f.path.clone()
21903 }
21904 } else {
21905 f.path
21906 };
21907 Ok(Expression::Function(Box::new(Function::new(
21908 "JSON_EXTRACT_PATH_TEXT".to_string(),
21909 vec![f.this, stripped_path],
21910 ))))
21911 }
21912 DialectType::SQLite | DialectType::DuckDB => {
21913 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
21914 Ok(Expression::JsonExtractScalar(Box::new(
21915 crate::expressions::JsonExtractFunc {
21916 this: f.this,
21917 path: f.path,
21918 returning: f.returning,
21919 arrow_syntax: true,
21920 hash_arrow_syntax: false,
21921 wrapper_option: None,
21922 quotes_option: None,
21923 on_scalar_string: false,
21924 on_error: None,
21925 },
21926 )))
21927 }
21928 _ => Ok(Expression::JsonExtractScalar(f)),
21929 }
21930 } else {
21931 Ok(e)
21932 }
21933 }
21934
21935 Action::JsonPathNormalize => {
21936 // Normalize JSON path format for BigQuery, MySQL, etc.
21937 if let Expression::JsonExtract(mut f) = e {
21938 if let Expression::Literal(ref lit) = f.path {
21939 if let Literal::String(ref s) = lit.as_ref() {
21940 let mut normalized = s.clone();
21941 // Convert bracket notation and handle wildcards per dialect
21942 match target {
21943 DialectType::BigQuery => {
21944 // BigQuery strips wildcards and uses single quotes in brackets
21945 normalized = Self::strip_json_wildcards(&normalized);
21946 normalized = Self::bracket_to_single_quotes(&normalized);
21947 }
21948 DialectType::MySQL => {
21949 // MySQL preserves wildcards, converts brackets to dot notation
21950 normalized = Self::bracket_to_dot_notation(&normalized);
21951 }
21952 _ => {}
21953 }
21954 if normalized != *s {
21955 f.path = Expression::string(&normalized);
21956 }
21957 }
21958 }
21959 Ok(Expression::JsonExtract(f))
21960 } else {
21961 Ok(e)
21962 }
21963 }
21964
21965 Action::JsonQueryValueConvert => {
21966 // JsonQuery/JsonValue -> target-specific
21967 let (f, is_query) = match e {
21968 Expression::JsonQuery(f) => (f, true),
21969 Expression::JsonValue(f) => (f, false),
21970 _ => return Ok(e),
21971 };
21972 match target {
21973 DialectType::TSQL | DialectType::Fabric => {
21974 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
21975 let json_query = Expression::Function(Box::new(Function::new(
21976 "JSON_QUERY".to_string(),
21977 vec![f.this.clone(), f.path.clone()],
21978 )));
21979 let json_value = Expression::Function(Box::new(Function::new(
21980 "JSON_VALUE".to_string(),
21981 vec![f.this, f.path],
21982 )));
21983 Ok(Expression::Function(Box::new(Function::new(
21984 "ISNULL".to_string(),
21985 vec![json_query, json_value],
21986 ))))
21987 }
21988 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21989 Ok(Expression::Function(Box::new(Function::new(
21990 "GET_JSON_OBJECT".to_string(),
21991 vec![f.this, f.path],
21992 ))))
21993 }
21994 DialectType::PostgreSQL | DialectType::Redshift => {
21995 Ok(Expression::Function(Box::new(Function::new(
21996 "JSON_EXTRACT_PATH_TEXT".to_string(),
21997 vec![f.this, f.path],
21998 ))))
21999 }
22000 DialectType::DuckDB | DialectType::SQLite => {
22001 // json -> path arrow syntax
22002 Ok(Expression::JsonExtract(Box::new(
22003 crate::expressions::JsonExtractFunc {
22004 this: f.this,
22005 path: f.path,
22006 returning: f.returning,
22007 arrow_syntax: true,
22008 hash_arrow_syntax: false,
22009 wrapper_option: f.wrapper_option,
22010 quotes_option: f.quotes_option,
22011 on_scalar_string: f.on_scalar_string,
22012 on_error: f.on_error,
22013 },
22014 )))
22015 }
22016 DialectType::Snowflake => {
22017 // GET_PATH(PARSE_JSON(json), 'path')
22018 // Strip $. prefix from path
22019 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
22020 let json_expr = match &f.this {
22021 Expression::Function(ref inner_f)
22022 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
22023 {
22024 f.this
22025 }
22026 Expression::ParseJson(_) => {
22027 // Already a ParseJson expression, which generates as PARSE_JSON(...)
22028 f.this
22029 }
22030 _ => Expression::Function(Box::new(Function::new(
22031 "PARSE_JSON".to_string(),
22032 vec![f.this],
22033 ))),
22034 };
22035 let path_str = match &f.path {
22036 Expression::Literal(lit)
22037 if matches!(lit.as_ref(), Literal::String(_)) =>
22038 {
22039 let Literal::String(s) = lit.as_ref() else {
22040 unreachable!()
22041 };
22042 let stripped = s.strip_prefix("$.").unwrap_or(s);
22043 Expression::Literal(Box::new(Literal::String(
22044 stripped.to_string(),
22045 )))
22046 }
22047 other => other.clone(),
22048 };
22049 Ok(Expression::Function(Box::new(Function::new(
22050 "GET_PATH".to_string(),
22051 vec![json_expr, path_str],
22052 ))))
22053 }
22054 _ => {
22055 // Default: keep as JSON_QUERY/JSON_VALUE function
22056 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
22057 Ok(Expression::Function(Box::new(Function::new(
22058 func_name.to_string(),
22059 vec![f.this, f.path],
22060 ))))
22061 }
22062 }
22063 }
22064
22065 Action::JsonLiteralToJsonParse => {
22066 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
22067 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
22068 if let Expression::Cast(c) = e {
22069 let func_name = if matches!(target, DialectType::Snowflake) {
22070 "PARSE_JSON"
22071 } else {
22072 "JSON_PARSE"
22073 };
22074 Ok(Expression::Function(Box::new(Function::new(
22075 func_name.to_string(),
22076 vec![c.this],
22077 ))))
22078 } else {
22079 Ok(e)
22080 }
22081 }
22082
22083 Action::DuckDBTryCastJsonToTryJsonParse => {
22084 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
22085 if let Expression::TryCast(c) = e {
22086 let json_parse = Expression::Function(Box::new(Function::new(
22087 "JSON_PARSE".to_string(),
22088 vec![c.this],
22089 )));
22090 Ok(Expression::Function(Box::new(Function::new(
22091 "TRY".to_string(),
22092 vec![json_parse],
22093 ))))
22094 } else {
22095 Ok(e)
22096 }
22097 }
22098
22099 Action::DuckDBJsonFuncToJsonParse => {
22100 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
22101 if let Expression::Function(f) = e {
22102 let args = f.args;
22103 Ok(Expression::Function(Box::new(Function::new(
22104 "JSON_PARSE".to_string(),
22105 args,
22106 ))))
22107 } else {
22108 Ok(e)
22109 }
22110 }
22111
22112 Action::DuckDBJsonValidToIsJson => {
22113 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
22114 if let Expression::Function(mut f) = e {
22115 let arg = f.args.remove(0);
22116 Ok(Expression::IsJson(Box::new(
22117 crate::expressions::IsJson {
22118 this: arg,
22119 json_type: None,
22120 unique_keys: None,
22121 negated: false,
22122 },
22123 )))
22124 } else {
22125 Ok(e)
22126 }
22127 }
22128
22129 Action::AtTimeZoneConvert => {
22130 // AT TIME ZONE -> target-specific conversion
22131 if let Expression::AtTimeZone(atz) = e {
22132 match target {
22133 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22134 Ok(Expression::Function(Box::new(Function::new(
22135 "AT_TIMEZONE".to_string(),
22136 vec![atz.this, atz.zone],
22137 ))))
22138 }
22139 DialectType::Spark | DialectType::Databricks => {
22140 Ok(Expression::Function(Box::new(Function::new(
22141 "FROM_UTC_TIMESTAMP".to_string(),
22142 vec![atz.this, atz.zone],
22143 ))))
22144 }
22145 DialectType::Snowflake => {
22146 // CONVERT_TIMEZONE('zone', expr)
22147 Ok(Expression::Function(Box::new(Function::new(
22148 "CONVERT_TIMEZONE".to_string(),
22149 vec![atz.zone, atz.this],
22150 ))))
22151 }
22152 DialectType::BigQuery => {
22153 // TIMESTAMP(DATETIME(expr, 'zone'))
22154 let datetime_call = Expression::Function(Box::new(Function::new(
22155 "DATETIME".to_string(),
22156 vec![atz.this, atz.zone],
22157 )));
22158 Ok(Expression::Function(Box::new(Function::new(
22159 "TIMESTAMP".to_string(),
22160 vec![datetime_call],
22161 ))))
22162 }
22163 _ => Ok(Expression::Function(Box::new(Function::new(
22164 "AT_TIMEZONE".to_string(),
22165 vec![atz.this, atz.zone],
22166 )))),
22167 }
22168 } else {
22169 Ok(e)
22170 }
22171 }
22172
22173 Action::DayOfWeekConvert => {
22174 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
22175 if let Expression::DayOfWeek(f) = e {
22176 match target {
22177 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22178 Function::new("ISODOW".to_string(), vec![f.this]),
22179 ))),
22180 DialectType::Spark | DialectType::Databricks => {
22181 // ((DAYOFWEEK(x) % 7) + 1)
22182 let dayofweek = Expression::Function(Box::new(Function::new(
22183 "DAYOFWEEK".to_string(),
22184 vec![f.this],
22185 )));
22186 let modulo = Expression::Mod(Box::new(BinaryOp {
22187 left: dayofweek,
22188 right: Expression::number(7),
22189 left_comments: Vec::new(),
22190 operator_comments: Vec::new(),
22191 trailing_comments: Vec::new(),
22192 inferred_type: None,
22193 }));
22194 let paren_mod = Expression::Paren(Box::new(Paren {
22195 this: modulo,
22196 trailing_comments: Vec::new(),
22197 }));
22198 let add_one = Expression::Add(Box::new(BinaryOp {
22199 left: paren_mod,
22200 right: Expression::number(1),
22201 left_comments: Vec::new(),
22202 operator_comments: Vec::new(),
22203 trailing_comments: Vec::new(),
22204 inferred_type: None,
22205 }));
22206 Ok(Expression::Paren(Box::new(Paren {
22207 this: add_one,
22208 trailing_comments: Vec::new(),
22209 })))
22210 }
22211 _ => Ok(Expression::DayOfWeek(f)),
22212 }
22213 } else {
22214 Ok(e)
22215 }
22216 }
22217
22218 Action::MaxByMinByConvert => {
22219 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
22220 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
22221 // Handle both Expression::Function and Expression::AggregateFunction
22222 let (is_max, args) = match &e {
22223 Expression::Function(f) => {
22224 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
22225 }
22226 Expression::AggregateFunction(af) => {
22227 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
22228 }
22229 _ => return Ok(e),
22230 };
22231 match target {
22232 DialectType::ClickHouse => {
22233 let name = if is_max { "argMax" } else { "argMin" };
22234 let mut args = args;
22235 args.truncate(2);
22236 Ok(Expression::Function(Box::new(Function::new(
22237 name.to_string(),
22238 args,
22239 ))))
22240 }
22241 DialectType::DuckDB => {
22242 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
22243 Ok(Expression::Function(Box::new(Function::new(
22244 name.to_string(),
22245 args,
22246 ))))
22247 }
22248 DialectType::Spark | DialectType::Databricks => {
22249 let mut args = args;
22250 args.truncate(2);
22251 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
22252 Ok(Expression::Function(Box::new(Function::new(
22253 name.to_string(),
22254 args,
22255 ))))
22256 }
22257 _ => Ok(e),
22258 }
22259 }
22260
22261 Action::ElementAtConvert => {
22262 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
22263 let (arr, idx) = if let Expression::ElementAt(bf) = e {
22264 (bf.this, bf.expression)
22265 } else if let Expression::Function(ref f) = e {
22266 if f.args.len() >= 2 {
22267 if let Expression::Function(f) = e {
22268 let mut args = f.args;
22269 let arr = args.remove(0);
22270 let idx = args.remove(0);
22271 (arr, idx)
22272 } else {
22273 unreachable!("outer condition already matched Expression::Function")
22274 }
22275 } else {
22276 return Ok(e);
22277 }
22278 } else {
22279 return Ok(e);
22280 };
22281 match target {
22282 DialectType::PostgreSQL => {
22283 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
22284 let arr_expr = Expression::Paren(Box::new(Paren {
22285 this: arr,
22286 trailing_comments: vec![],
22287 }));
22288 Ok(Expression::Subscript(Box::new(
22289 crate::expressions::Subscript {
22290 this: arr_expr,
22291 index: idx,
22292 },
22293 )))
22294 }
22295 DialectType::BigQuery => {
22296 // BigQuery: convert ARRAY[...] to bare [...] for subscript
22297 let arr_expr = match arr {
22298 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
22299 crate::expressions::ArrayConstructor {
22300 expressions: af.expressions,
22301 bracket_notation: true,
22302 use_list_keyword: false,
22303 },
22304 )),
22305 other => other,
22306 };
22307 let safe_ordinal = Expression::Function(Box::new(Function::new(
22308 "SAFE_ORDINAL".to_string(),
22309 vec![idx],
22310 )));
22311 Ok(Expression::Subscript(Box::new(
22312 crate::expressions::Subscript {
22313 this: arr_expr,
22314 index: safe_ordinal,
22315 },
22316 )))
22317 }
22318 _ => Ok(Expression::Function(Box::new(Function::new(
22319 "ELEMENT_AT".to_string(),
22320 vec![arr, idx],
22321 )))),
22322 }
22323 }
22324
22325 Action::CurrentUserParens => {
22326 // CURRENT_USER -> CURRENT_USER() for Snowflake
22327 Ok(Expression::Function(Box::new(Function::new(
22328 "CURRENT_USER".to_string(),
22329 vec![],
22330 ))))
22331 }
22332
22333 Action::ArrayAggToCollectList => {
22334 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
22335 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
22336 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
22337 match e {
22338 Expression::AggregateFunction(mut af) => {
22339 let is_simple =
22340 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
22341 let args = if af.args.is_empty() {
22342 vec![]
22343 } else {
22344 vec![af.args[0].clone()]
22345 };
22346 af.name = "COLLECT_LIST".to_string();
22347 af.args = args;
22348 if is_simple {
22349 af.order_by = Vec::new();
22350 }
22351 Ok(Expression::AggregateFunction(af))
22352 }
22353 Expression::ArrayAgg(agg) => {
22354 let is_simple =
22355 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
22356 Ok(Expression::AggregateFunction(Box::new(
22357 crate::expressions::AggregateFunction {
22358 name: "COLLECT_LIST".to_string(),
22359 args: vec![agg.this.clone()],
22360 distinct: agg.distinct,
22361 filter: agg.filter.clone(),
22362 order_by: if is_simple {
22363 Vec::new()
22364 } else {
22365 agg.order_by.clone()
22366 },
22367 limit: agg.limit.clone(),
22368 ignore_nulls: agg.ignore_nulls,
22369 inferred_type: None,
22370 },
22371 )))
22372 }
22373 _ => Ok(e),
22374 }
22375 }
22376
22377 Action::ArraySyntaxConvert => {
22378 match e {
22379 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
22380 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
22381 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
22382 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
22383 expressions: arr.expressions,
22384 bracket_notation: true,
22385 use_list_keyword: false,
22386 })),
22387 ),
22388 // ARRAY(y) function style -> ArrayFunc for target dialect
22389 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
22390 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
22391 let bracket = matches!(
22392 target,
22393 DialectType::BigQuery
22394 | DialectType::DuckDB
22395 | DialectType::Snowflake
22396 | DialectType::ClickHouse
22397 | DialectType::StarRocks
22398 );
22399 Ok(Expression::ArrayFunc(Box::new(
22400 crate::expressions::ArrayConstructor {
22401 expressions: f.args,
22402 bracket_notation: bracket,
22403 use_list_keyword: false,
22404 },
22405 )))
22406 }
22407 _ => Ok(e),
22408 }
22409 }
22410
22411 Action::CastToJsonForSpark => {
22412 // CAST(x AS JSON) -> TO_JSON(x) for Spark
22413 if let Expression::Cast(c) = e {
22414 Ok(Expression::Function(Box::new(Function::new(
22415 "TO_JSON".to_string(),
22416 vec![c.this],
22417 ))))
22418 } else {
22419 Ok(e)
22420 }
22421 }
22422
22423 Action::CastJsonToFromJson => {
22424 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
22425 if let Expression::Cast(c) = e {
22426 // Extract the string literal from ParseJson
22427 let literal_expr = if let Expression::ParseJson(pj) = c.this {
22428 pj.this
22429 } else {
22430 c.this
22431 };
22432 // Convert the target DataType to Spark's type string format
22433 let type_str = Self::data_type_to_spark_string(&c.to);
22434 Ok(Expression::Function(Box::new(Function::new(
22435 "FROM_JSON".to_string(),
22436 vec![
22437 literal_expr,
22438 Expression::Literal(Box::new(Literal::String(type_str))),
22439 ],
22440 ))))
22441 } else {
22442 Ok(e)
22443 }
22444 }
22445
22446 Action::ToJsonConvert => {
22447 // TO_JSON(x) -> target-specific conversion
22448 if let Expression::ToJson(f) = e {
22449 let arg = f.this;
22450 match target {
22451 DialectType::Presto | DialectType::Trino => {
22452 // JSON_FORMAT(CAST(x AS JSON))
22453 let cast_json = Expression::Cast(Box::new(Cast {
22454 this: arg,
22455 to: DataType::Custom {
22456 name: "JSON".to_string(),
22457 },
22458 trailing_comments: vec![],
22459 double_colon_syntax: false,
22460 format: None,
22461 default: None,
22462 inferred_type: None,
22463 }));
22464 Ok(Expression::Function(Box::new(Function::new(
22465 "JSON_FORMAT".to_string(),
22466 vec![cast_json],
22467 ))))
22468 }
22469 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22470 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
22471 ))),
22472 DialectType::DuckDB => {
22473 // CAST(TO_JSON(x) AS TEXT)
22474 let to_json =
22475 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
22476 this: arg,
22477 original_name: None,
22478 inferred_type: None,
22479 }));
22480 Ok(Expression::Cast(Box::new(Cast {
22481 this: to_json,
22482 to: DataType::Text,
22483 trailing_comments: vec![],
22484 double_colon_syntax: false,
22485 format: None,
22486 default: None,
22487 inferred_type: None,
22488 })))
22489 }
22490 _ => Ok(Expression::ToJson(Box::new(
22491 crate::expressions::UnaryFunc {
22492 this: arg,
22493 original_name: None,
22494 inferred_type: None,
22495 },
22496 ))),
22497 }
22498 } else {
22499 Ok(e)
22500 }
22501 }
22502
22503 Action::VarianceToClickHouse => {
22504 if let Expression::Variance(f) = e {
22505 Ok(Expression::Function(Box::new(Function::new(
22506 "varSamp".to_string(),
22507 vec![f.this],
22508 ))))
22509 } else {
22510 Ok(e)
22511 }
22512 }
22513
22514 Action::StddevToClickHouse => {
22515 if let Expression::Stddev(f) = e {
22516 Ok(Expression::Function(Box::new(Function::new(
22517 "stddevSamp".to_string(),
22518 vec![f.this],
22519 ))))
22520 } else {
22521 Ok(e)
22522 }
22523 }
22524
22525 Action::ApproxQuantileConvert => {
22526 if let Expression::ApproxQuantile(aq) = e {
22527 let mut args = vec![*aq.this];
22528 if let Some(q) = aq.quantile {
22529 args.push(*q);
22530 }
22531 Ok(Expression::Function(Box::new(Function::new(
22532 "APPROX_PERCENTILE".to_string(),
22533 args,
22534 ))))
22535 } else {
22536 Ok(e)
22537 }
22538 }
22539
22540 Action::DollarParamConvert => {
22541 if let Expression::Parameter(p) = e {
22542 Ok(Expression::Parameter(Box::new(
22543 crate::expressions::Parameter {
22544 name: p.name,
22545 index: p.index,
22546 style: crate::expressions::ParameterStyle::At,
22547 quoted: p.quoted,
22548 string_quoted: p.string_quoted,
22549 expression: p.expression,
22550 },
22551 )))
22552 } else {
22553 Ok(e)
22554 }
22555 }
22556
22557 Action::EscapeStringNormalize => {
22558 if let Expression::Literal(ref lit) = e {
22559 if let Literal::EscapeString(s) = lit.as_ref() {
22560 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
22561 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
22562 s[2..].to_string()
22563 } else {
22564 s.clone()
22565 };
22566 let normalized = stripped
22567 .replace('\n', "\\n")
22568 .replace('\r', "\\r")
22569 .replace('\t', "\\t");
22570 match target {
22571 DialectType::BigQuery => {
22572 // BigQuery: e'...' -> CAST(b'...' AS STRING)
22573 // Use Raw for the b'...' part to avoid double-escaping
22574 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
22575 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
22576 }
22577 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
22578 normalized,
22579 )))),
22580 }
22581 } else {
22582 Ok(e)
22583 }
22584 } else {
22585 Ok(e)
22586 }
22587 }
22588
22589 Action::StraightJoinCase => {
22590 // straight_join: keep lowercase for DuckDB, quote for MySQL
22591 if let Expression::Column(col) = e {
22592 if col.name.name == "STRAIGHT_JOIN" {
22593 let mut new_col = col;
22594 new_col.name.name = "straight_join".to_string();
22595 if matches!(target, DialectType::MySQL) {
22596 // MySQL: needs quoting since it's a reserved keyword
22597 new_col.name.quoted = true;
22598 }
22599 Ok(Expression::Column(new_col))
22600 } else {
22601 Ok(Expression::Column(col))
22602 }
22603 } else {
22604 Ok(e)
22605 }
22606 }
22607
22608 Action::TablesampleReservoir => {
22609 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
22610 if let Expression::TableSample(mut ts) = e {
22611 if let Some(ref mut sample) = ts.sample {
22612 sample.method = crate::expressions::SampleMethod::Reservoir;
22613 sample.explicit_method = true;
22614 }
22615 Ok(Expression::TableSample(ts))
22616 } else {
22617 Ok(e)
22618 }
22619 }
22620
22621 Action::TablesampleSnowflakeStrip => {
22622 // Strip method and PERCENT for Snowflake target from non-Snowflake source
22623 match e {
22624 Expression::TableSample(mut ts) => {
22625 if let Some(ref mut sample) = ts.sample {
22626 sample.suppress_method_output = true;
22627 sample.unit_after_size = false;
22628 sample.is_percent = false;
22629 }
22630 Ok(Expression::TableSample(ts))
22631 }
22632 Expression::Table(mut t) => {
22633 if let Some(ref mut sample) = t.table_sample {
22634 sample.suppress_method_output = true;
22635 sample.unit_after_size = false;
22636 sample.is_percent = false;
22637 }
22638 Ok(Expression::Table(t))
22639 }
22640 _ => Ok(e),
22641 }
22642 }
22643
22644 Action::FirstToAnyValue => {
22645 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
22646 if let Expression::First(mut agg) = e {
22647 agg.ignore_nulls = None;
22648 agg.name = Some("ANY_VALUE".to_string());
22649 Ok(Expression::AnyValue(agg))
22650 } else {
22651 Ok(e)
22652 }
22653 }
22654
22655 Action::ArrayIndexConvert => {
22656 // Subscript index: 1-based to 0-based for BigQuery
22657 if let Expression::Subscript(mut sub) = e {
22658 if let Expression::Literal(ref lit) = sub.index {
22659 if let Literal::Number(ref n) = lit.as_ref() {
22660 if let Ok(val) = n.parse::<i64>() {
22661 sub.index = Expression::Literal(Box::new(Literal::Number(
22662 (val - 1).to_string(),
22663 )));
22664 }
22665 }
22666 }
22667 Ok(Expression::Subscript(sub))
22668 } else {
22669 Ok(e)
22670 }
22671 }
22672
22673 Action::AnyValueIgnoreNulls => {
22674 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
22675 if let Expression::AnyValue(mut av) = e {
22676 if av.ignore_nulls.is_none() {
22677 av.ignore_nulls = Some(true);
22678 }
22679 Ok(Expression::AnyValue(av))
22680 } else {
22681 Ok(e)
22682 }
22683 }
22684
22685 Action::BigQueryNullsOrdering => {
22686 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
22687 if let Expression::WindowFunction(mut wf) = e {
22688 for o in &mut wf.over.order_by {
22689 o.nulls_first = None;
22690 }
22691 Ok(Expression::WindowFunction(wf))
22692 } else if let Expression::Ordered(mut o) = e {
22693 o.nulls_first = None;
22694 Ok(Expression::Ordered(o))
22695 } else {
22696 Ok(e)
22697 }
22698 }
22699
22700 Action::SnowflakeFloatProtect => {
22701 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
22702 // Snowflake's target transform from converting it to DOUBLE.
22703 // Non-Snowflake sources should keep their FLOAT spelling.
22704 if let Expression::DataType(DataType::Float { .. }) = e {
22705 Ok(Expression::DataType(DataType::Custom {
22706 name: "FLOAT".to_string(),
22707 }))
22708 } else {
22709 Ok(e)
22710 }
22711 }
22712
22713 Action::MysqlNullsOrdering => {
22714 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
22715 if let Expression::Ordered(mut o) = e {
22716 let nulls_last = o.nulls_first == Some(false);
22717 let desc = o.desc;
22718 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
22719 // If requested ordering matches default, just strip NULLS clause
22720 let matches_default = if desc {
22721 // DESC default is NULLS FIRST, so nulls_first=true matches
22722 o.nulls_first == Some(true)
22723 } else {
22724 // ASC default is NULLS LAST, so nulls_first=false matches
22725 nulls_last
22726 };
22727 if matches_default {
22728 o.nulls_first = None;
22729 Ok(Expression::Ordered(o))
22730 } else {
22731 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
22732 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
22733 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
22734 let null_val = if desc { 1 } else { 0 };
22735 let non_null_val = if desc { 0 } else { 1 };
22736 let _case_expr = Expression::Case(Box::new(Case {
22737 operand: None,
22738 whens: vec![(
22739 Expression::IsNull(Box::new(crate::expressions::IsNull {
22740 this: o.this.clone(),
22741 not: false,
22742 postfix_form: false,
22743 })),
22744 Expression::number(null_val),
22745 )],
22746 else_: Some(Expression::number(non_null_val)),
22747 comments: Vec::new(),
22748 inferred_type: None,
22749 }));
22750 o.nulls_first = None;
22751 // Return a tuple of [case_expr, ordered_expr]
22752 // We need to return both as part of the ORDER BY
22753 // But since transform_recursive processes individual expressions,
22754 // we can't easily add extra ORDER BY items here.
22755 // Instead, strip the nulls_first
22756 o.nulls_first = None;
22757 Ok(Expression::Ordered(o))
22758 }
22759 } else {
22760 Ok(e)
22761 }
22762 }
22763
22764 Action::MysqlNullsLastRewrite => {
22765 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
22766 // to simulate NULLS LAST for ASC ordering
22767 if let Expression::WindowFunction(mut wf) = e {
22768 let mut new_order_by = Vec::new();
22769 for o in wf.over.order_by {
22770 if !o.desc {
22771 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
22772 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
22773 let case_expr = Expression::Case(Box::new(Case {
22774 operand: None,
22775 whens: vec![(
22776 Expression::IsNull(Box::new(crate::expressions::IsNull {
22777 this: o.this.clone(),
22778 not: false,
22779 postfix_form: false,
22780 })),
22781 Expression::Literal(Box::new(Literal::Number(
22782 "1".to_string(),
22783 ))),
22784 )],
22785 else_: Some(Expression::Literal(Box::new(Literal::Number(
22786 "0".to_string(),
22787 )))),
22788 comments: Vec::new(),
22789 inferred_type: None,
22790 }));
22791 new_order_by.push(crate::expressions::Ordered {
22792 this: case_expr,
22793 desc: false,
22794 nulls_first: None,
22795 explicit_asc: false,
22796 with_fill: None,
22797 });
22798 let mut ordered = o;
22799 ordered.nulls_first = None;
22800 new_order_by.push(ordered);
22801 } else {
22802 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
22803 // No change needed
22804 let mut ordered = o;
22805 ordered.nulls_first = None;
22806 new_order_by.push(ordered);
22807 }
22808 }
22809 wf.over.order_by = new_order_by;
22810 Ok(Expression::WindowFunction(wf))
22811 } else {
22812 Ok(e)
22813 }
22814 }
22815
22816 Action::RespectNullsConvert => {
22817 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
22818 if let Expression::WindowFunction(mut wf) = e {
22819 match &mut wf.this {
22820 Expression::FirstValue(ref mut vf) => {
22821 if vf.ignore_nulls == Some(false) {
22822 vf.ignore_nulls = None;
22823 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
22824 // but that's handled by the generator's NULLS ordering
22825 }
22826 }
22827 Expression::LastValue(ref mut vf) => {
22828 if vf.ignore_nulls == Some(false) {
22829 vf.ignore_nulls = None;
22830 }
22831 }
22832 _ => {}
22833 }
22834 Ok(Expression::WindowFunction(wf))
22835 } else {
22836 Ok(e)
22837 }
22838 }
22839
22840 Action::SnowflakeWindowFrameStrip => {
22841 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22842 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
22843 if let Expression::WindowFunction(mut wf) = e {
22844 wf.over.frame = None;
22845 Ok(Expression::WindowFunction(wf))
22846 } else {
22847 Ok(e)
22848 }
22849 }
22850
22851 Action::SnowflakeWindowFrameAdd => {
22852 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22853 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
22854 if let Expression::WindowFunction(mut wf) = e {
22855 wf.over.frame = Some(crate::expressions::WindowFrame {
22856 kind: crate::expressions::WindowFrameKind::Rows,
22857 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
22858 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
22859 exclude: None,
22860 kind_text: None,
22861 start_side_text: None,
22862 end_side_text: None,
22863 });
22864 Ok(Expression::WindowFunction(wf))
22865 } else {
22866 Ok(e)
22867 }
22868 }
22869
22870 Action::CreateTableStripComment => {
22871 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
22872 if let Expression::CreateTable(mut ct) = e {
22873 for col in &mut ct.columns {
22874 col.comment = None;
22875 col.constraints.retain(|c| {
22876 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
22877 });
22878 // Also remove Comment from constraint_order
22879 col.constraint_order.retain(|c| {
22880 !matches!(c, crate::expressions::ConstraintType::Comment)
22881 });
22882 }
22883 // Strip properties (USING, PARTITIONED BY, etc.)
22884 ct.properties.clear();
22885 Ok(Expression::CreateTable(ct))
22886 } else {
22887 Ok(e)
22888 }
22889 }
22890
22891 Action::AlterTableToSpRename => {
22892 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
22893 if let Expression::AlterTable(ref at) = e {
22894 if let Some(crate::expressions::AlterTableAction::RenameTable(
22895 ref new_tbl,
22896 )) = at.actions.first()
22897 {
22898 // Build the old table name using TSQL bracket quoting
22899 let old_name = if let Some(ref schema) = at.name.schema {
22900 if at.name.name.quoted || schema.quoted {
22901 format!("[{}].[{}]", schema.name, at.name.name.name)
22902 } else {
22903 format!("{}.{}", schema.name, at.name.name.name)
22904 }
22905 } else {
22906 if at.name.name.quoted {
22907 format!("[{}]", at.name.name.name)
22908 } else {
22909 at.name.name.name.clone()
22910 }
22911 };
22912 let new_name = new_tbl.name.name.clone();
22913 // EXEC sp_rename 'old_name', 'new_name'
22914 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
22915 Ok(Expression::Raw(crate::expressions::Raw { sql }))
22916 } else {
22917 Ok(e)
22918 }
22919 } else {
22920 Ok(e)
22921 }
22922 }
22923
22924 Action::SnowflakeIntervalFormat => {
22925 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
22926 if let Expression::Interval(mut iv) = e {
22927 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
22928 (&iv.this, &iv.unit)
22929 {
22930 if let Literal::String(ref val) = lit.as_ref() {
22931 let unit_str = match unit_spec {
22932 crate::expressions::IntervalUnitSpec::Simple {
22933 unit, ..
22934 } => match unit {
22935 crate::expressions::IntervalUnit::Year => "YEAR",
22936 crate::expressions::IntervalUnit::Quarter => "QUARTER",
22937 crate::expressions::IntervalUnit::Month => "MONTH",
22938 crate::expressions::IntervalUnit::Week => "WEEK",
22939 crate::expressions::IntervalUnit::Day => "DAY",
22940 crate::expressions::IntervalUnit::Hour => "HOUR",
22941 crate::expressions::IntervalUnit::Minute => "MINUTE",
22942 crate::expressions::IntervalUnit::Second => "SECOND",
22943 crate::expressions::IntervalUnit::Millisecond => {
22944 "MILLISECOND"
22945 }
22946 crate::expressions::IntervalUnit::Microsecond => {
22947 "MICROSECOND"
22948 }
22949 crate::expressions::IntervalUnit::Nanosecond => {
22950 "NANOSECOND"
22951 }
22952 },
22953 _ => "",
22954 };
22955 if !unit_str.is_empty() {
22956 let combined = format!("{} {}", val, unit_str);
22957 iv.this = Some(Expression::Literal(Box::new(Literal::String(
22958 combined,
22959 ))));
22960 iv.unit = None;
22961 }
22962 }
22963 }
22964 Ok(Expression::Interval(iv))
22965 } else {
22966 Ok(e)
22967 }
22968 }
22969
22970 Action::ArrayConcatBracketConvert => {
22971 // Expression::Array/ArrayFunc -> target-specific
22972 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
22973 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
22974 match e {
22975 Expression::Array(arr) => {
22976 if matches!(target, DialectType::Redshift) {
22977 Ok(Expression::Function(Box::new(Function::new(
22978 "ARRAY".to_string(),
22979 arr.expressions,
22980 ))))
22981 } else {
22982 Ok(Expression::ArrayFunc(Box::new(
22983 crate::expressions::ArrayConstructor {
22984 expressions: arr.expressions,
22985 bracket_notation: false,
22986 use_list_keyword: false,
22987 },
22988 )))
22989 }
22990 }
22991 Expression::ArrayFunc(arr) => {
22992 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
22993 if matches!(target, DialectType::Redshift) {
22994 Ok(Expression::Function(Box::new(Function::new(
22995 "ARRAY".to_string(),
22996 arr.expressions,
22997 ))))
22998 } else {
22999 Ok(Expression::ArrayFunc(arr))
23000 }
23001 }
23002 _ => Ok(e),
23003 }
23004 }
23005
23006 Action::BitAggFloatCast => {
23007 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
23008 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23009 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23010 let int_type = DataType::Int {
23011 length: None,
23012 integer_spelling: false,
23013 };
23014 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
23015 if let Expression::Cast(c) = agg_this {
23016 match &c.to {
23017 DataType::Float { .. }
23018 | DataType::Double { .. }
23019 | DataType::Custom { .. } => {
23020 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23021 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
23022 let inner_type = match &c.to {
23023 DataType::Float {
23024 precision, scale, ..
23025 } => DataType::Float {
23026 precision: *precision,
23027 scale: *scale,
23028 real_spelling: true,
23029 },
23030 other => other.clone(),
23031 };
23032 let inner_cast =
23033 Expression::Cast(Box::new(crate::expressions::Cast {
23034 this: c.this.clone(),
23035 to: inner_type,
23036 trailing_comments: Vec::new(),
23037 double_colon_syntax: false,
23038 format: None,
23039 default: None,
23040 inferred_type: None,
23041 }));
23042 let rounded = Expression::Function(Box::new(Function::new(
23043 "ROUND".to_string(),
23044 vec![inner_cast],
23045 )));
23046 Expression::Cast(Box::new(crate::expressions::Cast {
23047 this: rounded,
23048 to: int_dt,
23049 trailing_comments: Vec::new(),
23050 double_colon_syntax: false,
23051 format: None,
23052 default: None,
23053 inferred_type: None,
23054 }))
23055 }
23056 DataType::Decimal { .. } => {
23057 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23058 Expression::Cast(Box::new(crate::expressions::Cast {
23059 this: Expression::Cast(c),
23060 to: int_dt,
23061 trailing_comments: Vec::new(),
23062 double_colon_syntax: false,
23063 format: None,
23064 default: None,
23065 inferred_type: None,
23066 }))
23067 }
23068 _ => Expression::Cast(c),
23069 }
23070 } else {
23071 agg_this
23072 }
23073 };
23074 match e {
23075 Expression::BitwiseOrAgg(mut f) => {
23076 f.this = wrap_agg(f.this, int_type);
23077 Ok(Expression::BitwiseOrAgg(f))
23078 }
23079 Expression::BitwiseAndAgg(mut f) => {
23080 let int_type = DataType::Int {
23081 length: None,
23082 integer_spelling: false,
23083 };
23084 f.this = wrap_agg(f.this, int_type);
23085 Ok(Expression::BitwiseAndAgg(f))
23086 }
23087 Expression::BitwiseXorAgg(mut f) => {
23088 let int_type = DataType::Int {
23089 length: None,
23090 integer_spelling: false,
23091 };
23092 f.this = wrap_agg(f.this, int_type);
23093 Ok(Expression::BitwiseXorAgg(f))
23094 }
23095 _ => Ok(e),
23096 }
23097 }
23098
23099 Action::BitAggSnowflakeRename => {
23100 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
23101 match e {
23102 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
23103 Function::new("BITORAGG".to_string(), vec![f.this]),
23104 ))),
23105 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
23106 Function::new("BITANDAGG".to_string(), vec![f.this]),
23107 ))),
23108 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
23109 Function::new("BITXORAGG".to_string(), vec![f.this]),
23110 ))),
23111 _ => Ok(e),
23112 }
23113 }
23114
23115 Action::StrftimeCastTimestamp => {
23116 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
23117 if let Expression::Cast(mut c) = e {
23118 if matches!(
23119 c.to,
23120 DataType::Timestamp {
23121 timezone: false,
23122 ..
23123 }
23124 ) {
23125 c.to = DataType::Custom {
23126 name: "TIMESTAMP_NTZ".to_string(),
23127 };
23128 }
23129 Ok(Expression::Cast(c))
23130 } else {
23131 Ok(e)
23132 }
23133 }
23134
23135 Action::DecimalDefaultPrecision => {
23136 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
23137 if let Expression::Cast(mut c) = e {
23138 if matches!(
23139 c.to,
23140 DataType::Decimal {
23141 precision: None,
23142 ..
23143 }
23144 ) {
23145 c.to = DataType::Decimal {
23146 precision: Some(18),
23147 scale: Some(3),
23148 };
23149 }
23150 Ok(Expression::Cast(c))
23151 } else {
23152 Ok(e)
23153 }
23154 }
23155
23156 Action::FilterToIff => {
23157 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
23158 if let Expression::Filter(f) = e {
23159 let condition = *f.expression;
23160 let agg = *f.this;
23161 // Strip WHERE from condition
23162 let cond = match condition {
23163 Expression::Where(w) => w.this,
23164 other => other,
23165 };
23166 // Extract the aggregate function and its argument
23167 // We want AVG(IFF(condition, x, NULL))
23168 match agg {
23169 Expression::Function(mut func) => {
23170 if !func.args.is_empty() {
23171 let orig_arg = func.args[0].clone();
23172 let iff_call = Expression::Function(Box::new(Function::new(
23173 "IFF".to_string(),
23174 vec![cond, orig_arg, Expression::Null(Null)],
23175 )));
23176 func.args[0] = iff_call;
23177 Ok(Expression::Function(func))
23178 } else {
23179 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23180 this: Box::new(Expression::Function(func)),
23181 expression: Box::new(cond),
23182 })))
23183 }
23184 }
23185 Expression::Avg(mut avg) => {
23186 let iff_call = Expression::Function(Box::new(Function::new(
23187 "IFF".to_string(),
23188 vec![cond, avg.this.clone(), Expression::Null(Null)],
23189 )));
23190 avg.this = iff_call;
23191 Ok(Expression::Avg(avg))
23192 }
23193 Expression::Sum(mut s) => {
23194 let iff_call = Expression::Function(Box::new(Function::new(
23195 "IFF".to_string(),
23196 vec![cond, s.this.clone(), Expression::Null(Null)],
23197 )));
23198 s.this = iff_call;
23199 Ok(Expression::Sum(s))
23200 }
23201 Expression::Count(mut c) => {
23202 if let Some(ref this_expr) = c.this {
23203 let iff_call = Expression::Function(Box::new(Function::new(
23204 "IFF".to_string(),
23205 vec![cond, this_expr.clone(), Expression::Null(Null)],
23206 )));
23207 c.this = Some(iff_call);
23208 }
23209 Ok(Expression::Count(c))
23210 }
23211 other => {
23212 // Fallback: keep as Filter
23213 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23214 this: Box::new(other),
23215 expression: Box::new(cond),
23216 })))
23217 }
23218 }
23219 } else {
23220 Ok(e)
23221 }
23222 }
23223
23224 Action::AggFilterToIff => {
23225 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
23226 // Helper macro to handle the common AggFunc case
23227 macro_rules! handle_agg_filter_to_iff {
23228 ($variant:ident, $agg:expr) => {{
23229 let mut agg = $agg;
23230 if let Some(filter_cond) = agg.filter.take() {
23231 let iff_call = Expression::Function(Box::new(Function::new(
23232 "IFF".to_string(),
23233 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
23234 )));
23235 agg.this = iff_call;
23236 }
23237 Ok(Expression::$variant(agg))
23238 }};
23239 }
23240
23241 match e {
23242 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
23243 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
23244 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
23245 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
23246 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
23247 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
23248 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
23249 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
23250 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
23251 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
23252 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
23253 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
23254 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
23255 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
23256 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
23257 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
23258 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
23259 Expression::ApproxDistinct(agg) => {
23260 handle_agg_filter_to_iff!(ApproxDistinct, agg)
23261 }
23262 Expression::Count(mut c) => {
23263 if let Some(filter_cond) = c.filter.take() {
23264 if let Some(ref this_expr) = c.this {
23265 let iff_call = Expression::Function(Box::new(Function::new(
23266 "IFF".to_string(),
23267 vec![
23268 filter_cond,
23269 this_expr.clone(),
23270 Expression::Null(Null),
23271 ],
23272 )));
23273 c.this = Some(iff_call);
23274 }
23275 }
23276 Ok(Expression::Count(c))
23277 }
23278 other => Ok(other),
23279 }
23280 }
23281
23282 Action::JsonToGetPath => {
23283 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
23284 if let Expression::JsonExtract(je) = e {
23285 // Convert to PARSE_JSON() wrapper:
23286 // - JSON(x) -> PARSE_JSON(x)
23287 // - PARSE_JSON(x) -> keep as-is
23288 // - anything else -> wrap in PARSE_JSON()
23289 let this = match &je.this {
23290 Expression::Function(f)
23291 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
23292 {
23293 Expression::Function(Box::new(Function::new(
23294 "PARSE_JSON".to_string(),
23295 f.args.clone(),
23296 )))
23297 }
23298 Expression::Function(f)
23299 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23300 {
23301 je.this.clone()
23302 }
23303 // GET_PATH result is already JSON, don't wrap
23304 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
23305 je.this.clone()
23306 }
23307 other => {
23308 // Wrap non-JSON expressions in PARSE_JSON()
23309 Expression::Function(Box::new(Function::new(
23310 "PARSE_JSON".to_string(),
23311 vec![other.clone()],
23312 )))
23313 }
23314 };
23315 // Convert path: extract key from JSONPath or strip $. prefix from string
23316 let path = match &je.path {
23317 Expression::JSONPath(jp) => {
23318 // Extract the key from JSONPath: $root.key -> 'key'
23319 let mut key_parts = Vec::new();
23320 for expr in &jp.expressions {
23321 match expr {
23322 Expression::JSONPathRoot(_) => {} // skip root
23323 Expression::JSONPathKey(k) => {
23324 if let Expression::Literal(lit) = &*k.this {
23325 if let Literal::String(s) = lit.as_ref() {
23326 key_parts.push(s.clone());
23327 }
23328 }
23329 }
23330 _ => {}
23331 }
23332 }
23333 if !key_parts.is_empty() {
23334 Expression::Literal(Box::new(Literal::String(
23335 key_parts.join("."),
23336 )))
23337 } else {
23338 je.path.clone()
23339 }
23340 }
23341 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
23342 {
23343 let Literal::String(s) = lit.as_ref() else {
23344 unreachable!()
23345 };
23346 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
23347 Expression::Literal(Box::new(Literal::String(stripped)))
23348 }
23349 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
23350 {
23351 let Literal::String(s) = lit.as_ref() else {
23352 unreachable!()
23353 };
23354 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
23355 Expression::Literal(Box::new(Literal::String(stripped)))
23356 }
23357 _ => je.path.clone(),
23358 };
23359 Ok(Expression::Function(Box::new(Function::new(
23360 "GET_PATH".to_string(),
23361 vec![this, path],
23362 ))))
23363 } else {
23364 Ok(e)
23365 }
23366 }
23367
23368 Action::StructToRow => {
23369 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
23370 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
23371
23372 // Extract key-value pairs from either Struct or MapFunc
23373 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
23374 Expression::Struct(s) => Some(
23375 s.fields
23376 .iter()
23377 .map(|(opt_name, field_expr)| {
23378 if let Some(name) = opt_name {
23379 (name.clone(), field_expr.clone())
23380 } else if let Expression::NamedArgument(na) = field_expr {
23381 (na.name.name.clone(), na.value.clone())
23382 } else {
23383 (String::new(), field_expr.clone())
23384 }
23385 })
23386 .collect(),
23387 ),
23388 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23389 m.keys
23390 .iter()
23391 .zip(m.values.iter())
23392 .map(|(key, value)| {
23393 let key_name = match key {
23394 Expression::Literal(lit)
23395 if matches!(lit.as_ref(), Literal::String(_)) =>
23396 {
23397 let Literal::String(s) = lit.as_ref() else {
23398 unreachable!()
23399 };
23400 s.clone()
23401 }
23402 Expression::Identifier(id) => id.name.clone(),
23403 _ => String::new(),
23404 };
23405 (key_name, value.clone())
23406 })
23407 .collect(),
23408 ),
23409 _ => None,
23410 };
23411
23412 if let Some(pairs) = kv_pairs {
23413 let mut named_args = Vec::new();
23414 for (key_name, value) in pairs {
23415 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
23416 named_args.push(Expression::Alias(Box::new(
23417 crate::expressions::Alias::new(
23418 value,
23419 Identifier::new(key_name),
23420 ),
23421 )));
23422 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23423 named_args.push(value);
23424 } else {
23425 named_args.push(value);
23426 }
23427 }
23428
23429 if matches!(target, DialectType::BigQuery) {
23430 Ok(Expression::Function(Box::new(Function::new(
23431 "STRUCT".to_string(),
23432 named_args,
23433 ))))
23434 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23435 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
23436 let row_func = Expression::Function(Box::new(Function::new(
23437 "ROW".to_string(),
23438 named_args,
23439 )));
23440
23441 // Try to infer types for each pair
23442 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
23443 Expression::Struct(s) => Some(
23444 s.fields
23445 .iter()
23446 .map(|(opt_name, field_expr)| {
23447 if let Some(name) = opt_name {
23448 (name.clone(), field_expr.clone())
23449 } else if let Expression::NamedArgument(na) = field_expr
23450 {
23451 (na.name.name.clone(), na.value.clone())
23452 } else {
23453 (String::new(), field_expr.clone())
23454 }
23455 })
23456 .collect(),
23457 ),
23458 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23459 m.keys
23460 .iter()
23461 .zip(m.values.iter())
23462 .map(|(key, value)| {
23463 let key_name = match key {
23464 Expression::Literal(lit)
23465 if matches!(
23466 lit.as_ref(),
23467 Literal::String(_)
23468 ) =>
23469 {
23470 let Literal::String(s) = lit.as_ref() else {
23471 unreachable!()
23472 };
23473 s.clone()
23474 }
23475 Expression::Identifier(id) => id.name.clone(),
23476 _ => String::new(),
23477 };
23478 (key_name, value.clone())
23479 })
23480 .collect(),
23481 ),
23482 _ => None,
23483 };
23484
23485 if let Some(pairs) = kv_pairs_again {
23486 // Infer types for all values
23487 let mut all_inferred = true;
23488 let mut fields = Vec::new();
23489 for (name, value) in &pairs {
23490 let inferred_type = match value {
23491 Expression::Literal(lit)
23492 if matches!(lit.as_ref(), Literal::Number(_)) =>
23493 {
23494 let Literal::Number(n) = lit.as_ref() else {
23495 unreachable!()
23496 };
23497 if n.contains('.') {
23498 Some(DataType::Double {
23499 precision: None,
23500 scale: None,
23501 })
23502 } else {
23503 Some(DataType::Int {
23504 length: None,
23505 integer_spelling: true,
23506 })
23507 }
23508 }
23509 Expression::Literal(lit)
23510 if matches!(lit.as_ref(), Literal::String(_)) =>
23511 {
23512 Some(DataType::VarChar {
23513 length: None,
23514 parenthesized_length: false,
23515 })
23516 }
23517 Expression::Boolean(_) => Some(DataType::Boolean),
23518 _ => None,
23519 };
23520 if let Some(dt) = inferred_type {
23521 fields.push(crate::expressions::StructField::new(
23522 name.clone(),
23523 dt,
23524 ));
23525 } else {
23526 all_inferred = false;
23527 break;
23528 }
23529 }
23530
23531 if all_inferred && !fields.is_empty() {
23532 let row_type = DataType::Struct {
23533 fields,
23534 nested: true,
23535 };
23536 Ok(Expression::Cast(Box::new(Cast {
23537 this: row_func,
23538 to: row_type,
23539 trailing_comments: Vec::new(),
23540 double_colon_syntax: false,
23541 format: None,
23542 default: None,
23543 inferred_type: None,
23544 })))
23545 } else {
23546 Ok(row_func)
23547 }
23548 } else {
23549 Ok(row_func)
23550 }
23551 } else {
23552 Ok(Expression::Function(Box::new(Function::new(
23553 "ROW".to_string(),
23554 named_args,
23555 ))))
23556 }
23557 } else {
23558 Ok(e)
23559 }
23560 }
23561
23562 Action::SparkStructConvert => {
23563 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
23564 // or DuckDB {'name': val, ...}
23565 if let Expression::Function(f) = e {
23566 // Extract name-value pairs from aliased args
23567 let mut pairs: Vec<(String, Expression)> = Vec::new();
23568 for arg in &f.args {
23569 match arg {
23570 Expression::Alias(a) => {
23571 pairs.push((a.alias.name.clone(), a.this.clone()));
23572 }
23573 _ => {
23574 pairs.push((String::new(), arg.clone()));
23575 }
23576 }
23577 }
23578
23579 match target {
23580 DialectType::DuckDB => {
23581 // Convert to DuckDB struct literal {'name': value, ...}
23582 let mut keys = Vec::new();
23583 let mut values = Vec::new();
23584 for (name, value) in &pairs {
23585 keys.push(Expression::Literal(Box::new(Literal::String(
23586 name.clone(),
23587 ))));
23588 values.push(value.clone());
23589 }
23590 Ok(Expression::MapFunc(Box::new(
23591 crate::expressions::MapConstructor {
23592 keys,
23593 values,
23594 curly_brace_syntax: true,
23595 with_map_keyword: false,
23596 },
23597 )))
23598 }
23599 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23600 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
23601 let row_args: Vec<Expression> =
23602 pairs.iter().map(|(_, v)| v.clone()).collect();
23603 let row_func = Expression::Function(Box::new(Function::new(
23604 "ROW".to_string(),
23605 row_args,
23606 )));
23607
23608 // Infer types
23609 let mut all_inferred = true;
23610 let mut fields = Vec::new();
23611 for (name, value) in &pairs {
23612 let inferred_type = match value {
23613 Expression::Literal(lit)
23614 if matches!(lit.as_ref(), Literal::Number(_)) =>
23615 {
23616 let Literal::Number(n) = lit.as_ref() else {
23617 unreachable!()
23618 };
23619 if n.contains('.') {
23620 Some(DataType::Double {
23621 precision: None,
23622 scale: None,
23623 })
23624 } else {
23625 Some(DataType::Int {
23626 length: None,
23627 integer_spelling: true,
23628 })
23629 }
23630 }
23631 Expression::Literal(lit)
23632 if matches!(lit.as_ref(), Literal::String(_)) =>
23633 {
23634 Some(DataType::VarChar {
23635 length: None,
23636 parenthesized_length: false,
23637 })
23638 }
23639 Expression::Boolean(_) => Some(DataType::Boolean),
23640 _ => None,
23641 };
23642 if let Some(dt) = inferred_type {
23643 fields.push(crate::expressions::StructField::new(
23644 name.clone(),
23645 dt,
23646 ));
23647 } else {
23648 all_inferred = false;
23649 break;
23650 }
23651 }
23652
23653 if all_inferred && !fields.is_empty() {
23654 let row_type = DataType::Struct {
23655 fields,
23656 nested: true,
23657 };
23658 Ok(Expression::Cast(Box::new(Cast {
23659 this: row_func,
23660 to: row_type,
23661 trailing_comments: Vec::new(),
23662 double_colon_syntax: false,
23663 format: None,
23664 default: None,
23665 inferred_type: None,
23666 })))
23667 } else {
23668 Ok(row_func)
23669 }
23670 }
23671 _ => Ok(Expression::Function(f)),
23672 }
23673 } else {
23674 Ok(e)
23675 }
23676 }
23677
23678 Action::ApproxCountDistinctToApproxDistinct => {
23679 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
23680 if let Expression::ApproxCountDistinct(f) = e {
23681 Ok(Expression::ApproxDistinct(f))
23682 } else {
23683 Ok(e)
23684 }
23685 }
23686
23687 Action::CollectListToArrayAgg => {
23688 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
23689 if let Expression::AggregateFunction(f) = e {
23690 let filter_expr = if !f.args.is_empty() {
23691 let arg = f.args[0].clone();
23692 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
23693 this: arg,
23694 not: true,
23695 postfix_form: false,
23696 })))
23697 } else {
23698 None
23699 };
23700 let agg = crate::expressions::AggFunc {
23701 this: if f.args.is_empty() {
23702 Expression::Null(crate::expressions::Null)
23703 } else {
23704 f.args[0].clone()
23705 },
23706 distinct: f.distinct,
23707 order_by: f.order_by.clone(),
23708 filter: filter_expr,
23709 ignore_nulls: None,
23710 name: None,
23711 having_max: None,
23712 limit: None,
23713 inferred_type: None,
23714 };
23715 Ok(Expression::ArrayAgg(Box::new(agg)))
23716 } else {
23717 Ok(e)
23718 }
23719 }
23720
23721 Action::CollectSetConvert => {
23722 // COLLECT_SET(x) -> target-specific
23723 if let Expression::AggregateFunction(f) = e {
23724 match target {
23725 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
23726 crate::expressions::AggregateFunction {
23727 name: "SET_AGG".to_string(),
23728 args: f.args,
23729 distinct: false,
23730 order_by: f.order_by,
23731 filter: f.filter,
23732 limit: f.limit,
23733 ignore_nulls: f.ignore_nulls,
23734 inferred_type: None,
23735 },
23736 ))),
23737 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
23738 crate::expressions::AggregateFunction {
23739 name: "ARRAY_UNIQUE_AGG".to_string(),
23740 args: f.args,
23741 distinct: false,
23742 order_by: f.order_by,
23743 filter: f.filter,
23744 limit: f.limit,
23745 ignore_nulls: f.ignore_nulls,
23746 inferred_type: None,
23747 },
23748 ))),
23749 DialectType::Trino | DialectType::DuckDB => {
23750 let agg = crate::expressions::AggFunc {
23751 this: if f.args.is_empty() {
23752 Expression::Null(crate::expressions::Null)
23753 } else {
23754 f.args[0].clone()
23755 },
23756 distinct: true,
23757 order_by: Vec::new(),
23758 filter: None,
23759 ignore_nulls: None,
23760 name: None,
23761 having_max: None,
23762 limit: None,
23763 inferred_type: None,
23764 };
23765 Ok(Expression::ArrayAgg(Box::new(agg)))
23766 }
23767 _ => Ok(Expression::AggregateFunction(f)),
23768 }
23769 } else {
23770 Ok(e)
23771 }
23772 }
23773
23774 Action::PercentileConvert => {
23775 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
23776 if let Expression::AggregateFunction(f) = e {
23777 let name = match target {
23778 DialectType::DuckDB => "QUANTILE",
23779 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
23780 _ => "PERCENTILE",
23781 };
23782 Ok(Expression::AggregateFunction(Box::new(
23783 crate::expressions::AggregateFunction {
23784 name: name.to_string(),
23785 args: f.args,
23786 distinct: f.distinct,
23787 order_by: f.order_by,
23788 filter: f.filter,
23789 limit: f.limit,
23790 ignore_nulls: f.ignore_nulls,
23791 inferred_type: None,
23792 },
23793 )))
23794 } else {
23795 Ok(e)
23796 }
23797 }
23798
23799 Action::CorrIsnanWrap => {
23800 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
23801 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
23802 let corr_clone = e.clone();
23803 let isnan = Expression::Function(Box::new(Function::new(
23804 "ISNAN".to_string(),
23805 vec![corr_clone.clone()],
23806 )));
23807 let case_expr = Expression::Case(Box::new(Case {
23808 operand: None,
23809 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
23810 else_: Some(corr_clone),
23811 comments: Vec::new(),
23812 inferred_type: None,
23813 }));
23814 Ok(case_expr)
23815 }
23816
23817 Action::TruncToDateTrunc => {
23818 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
23819 if let Expression::Function(f) = e {
23820 if f.args.len() == 2 {
23821 let timestamp = f.args[0].clone();
23822 let unit_expr = f.args[1].clone();
23823
23824 if matches!(target, DialectType::ClickHouse) {
23825 // For ClickHouse, produce Expression::DateTrunc which the generator
23826 // outputs as DATE_TRUNC(...) without going through the ClickHouse
23827 // target transform that would convert it to dateTrunc
23828 let unit_str = Self::get_unit_str_static(&unit_expr);
23829 let dt_field = match unit_str.as_str() {
23830 "YEAR" => DateTimeField::Year,
23831 "MONTH" => DateTimeField::Month,
23832 "DAY" => DateTimeField::Day,
23833 "HOUR" => DateTimeField::Hour,
23834 "MINUTE" => DateTimeField::Minute,
23835 "SECOND" => DateTimeField::Second,
23836 "WEEK" => DateTimeField::Week,
23837 "QUARTER" => DateTimeField::Quarter,
23838 _ => DateTimeField::Custom(unit_str),
23839 };
23840 Ok(Expression::DateTrunc(Box::new(
23841 crate::expressions::DateTruncFunc {
23842 this: timestamp,
23843 unit: dt_field,
23844 },
23845 )))
23846 } else {
23847 let new_args = vec![unit_expr, timestamp];
23848 Ok(Expression::Function(Box::new(Function::new(
23849 "DATE_TRUNC".to_string(),
23850 new_args,
23851 ))))
23852 }
23853 } else {
23854 Ok(Expression::Function(f))
23855 }
23856 } else {
23857 Ok(e)
23858 }
23859 }
23860
23861 Action::ArrayContainsConvert => {
23862 if let Expression::ArrayContains(f) = e {
23863 match target {
23864 DialectType::Presto | DialectType::Trino => {
23865 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
23866 Ok(Expression::Function(Box::new(Function::new(
23867 "CONTAINS".to_string(),
23868 vec![f.this, f.expression],
23869 ))))
23870 }
23871 DialectType::Snowflake => {
23872 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
23873 let cast_val =
23874 Expression::Cast(Box::new(crate::expressions::Cast {
23875 this: f.expression,
23876 to: crate::expressions::DataType::Custom {
23877 name: "VARIANT".to_string(),
23878 },
23879 trailing_comments: Vec::new(),
23880 double_colon_syntax: false,
23881 format: None,
23882 default: None,
23883 inferred_type: None,
23884 }));
23885 Ok(Expression::Function(Box::new(Function::new(
23886 "ARRAY_CONTAINS".to_string(),
23887 vec![cast_val, f.this],
23888 ))))
23889 }
23890 _ => Ok(Expression::ArrayContains(f)),
23891 }
23892 } else {
23893 Ok(e)
23894 }
23895 }
23896
23897 Action::ArrayExceptConvert => {
23898 if let Expression::ArrayExcept(f) = e {
23899 let source_arr = f.this;
23900 let exclude_arr = f.expression;
23901 match target {
23902 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
23903 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
23904 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
23905 // ELSE LIST_TRANSFORM(LIST_FILTER(
23906 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
23907 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
23908 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
23909 // pair -> pair[1])
23910 // END
23911
23912 // Build null check
23913 let source_is_null =
23914 Expression::IsNull(Box::new(crate::expressions::IsNull {
23915 this: source_arr.clone(),
23916 not: false,
23917 postfix_form: false,
23918 }));
23919 let exclude_is_null =
23920 Expression::IsNull(Box::new(crate::expressions::IsNull {
23921 this: exclude_arr.clone(),
23922 not: false,
23923 postfix_form: false,
23924 }));
23925 let null_check =
23926 Expression::Or(Box::new(crate::expressions::BinaryOp {
23927 left: source_is_null,
23928 right: exclude_is_null,
23929 left_comments: vec![],
23930 operator_comments: vec![],
23931 trailing_comments: vec![],
23932 inferred_type: None,
23933 }));
23934
23935 // GENERATE_SERIES(1, LENGTH(source))
23936 let gen_series = Expression::Function(Box::new(Function::new(
23937 "GENERATE_SERIES".to_string(),
23938 vec![
23939 Expression::number(1),
23940 Expression::Function(Box::new(Function::new(
23941 "LENGTH".to_string(),
23942 vec![source_arr.clone()],
23943 ))),
23944 ],
23945 )));
23946
23947 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
23948 let list_zip = Expression::Function(Box::new(Function::new(
23949 "LIST_ZIP".to_string(),
23950 vec![source_arr.clone(), gen_series],
23951 )));
23952
23953 // pair[1] and pair[2]
23954 let pair_col = Expression::column("pair");
23955 let pair_1 = Expression::Subscript(Box::new(
23956 crate::expressions::Subscript {
23957 this: pair_col.clone(),
23958 index: Expression::number(1),
23959 },
23960 ));
23961 let pair_2 = Expression::Subscript(Box::new(
23962 crate::expressions::Subscript {
23963 this: pair_col.clone(),
23964 index: Expression::number(2),
23965 },
23966 ));
23967
23968 // source[1:pair[2]]
23969 let source_slice = Expression::ArraySlice(Box::new(
23970 crate::expressions::ArraySlice {
23971 this: source_arr.clone(),
23972 start: Some(Expression::number(1)),
23973 end: Some(pair_2),
23974 },
23975 ));
23976
23977 let e_col = Expression::column("e");
23978
23979 // e -> e IS NOT DISTINCT FROM pair[1]
23980 let inner_lambda1 =
23981 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23982 parameters: vec![crate::expressions::Identifier::new("e")],
23983 body: Expression::NullSafeEq(Box::new(
23984 crate::expressions::BinaryOp {
23985 left: e_col.clone(),
23986 right: pair_1.clone(),
23987 left_comments: vec![],
23988 operator_comments: vec![],
23989 trailing_comments: vec![],
23990 inferred_type: None,
23991 },
23992 )),
23993 colon: false,
23994 parameter_types: vec![],
23995 }));
23996
23997 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
23998 let inner_filter1 = Expression::Function(Box::new(Function::new(
23999 "LIST_FILTER".to_string(),
24000 vec![source_slice, inner_lambda1],
24001 )));
24002
24003 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
24004 let len1 = Expression::Function(Box::new(Function::new(
24005 "LENGTH".to_string(),
24006 vec![inner_filter1],
24007 )));
24008
24009 // e -> e IS NOT DISTINCT FROM pair[1]
24010 let inner_lambda2 =
24011 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24012 parameters: vec![crate::expressions::Identifier::new("e")],
24013 body: Expression::NullSafeEq(Box::new(
24014 crate::expressions::BinaryOp {
24015 left: e_col,
24016 right: pair_1.clone(),
24017 left_comments: vec![],
24018 operator_comments: vec![],
24019 trailing_comments: vec![],
24020 inferred_type: None,
24021 },
24022 )),
24023 colon: false,
24024 parameter_types: vec![],
24025 }));
24026
24027 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
24028 let inner_filter2 = Expression::Function(Box::new(Function::new(
24029 "LIST_FILTER".to_string(),
24030 vec![exclude_arr.clone(), inner_lambda2],
24031 )));
24032
24033 // LENGTH(LIST_FILTER(exclude, ...))
24034 let len2 = Expression::Function(Box::new(Function::new(
24035 "LENGTH".to_string(),
24036 vec![inner_filter2],
24037 )));
24038
24039 // (LENGTH(...) > LENGTH(...))
24040 let cond = Expression::Paren(Box::new(Paren {
24041 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
24042 left: len1,
24043 right: len2,
24044 left_comments: vec![],
24045 operator_comments: vec![],
24046 trailing_comments: vec![],
24047 inferred_type: None,
24048 })),
24049 trailing_comments: vec![],
24050 }));
24051
24052 // pair -> (condition)
24053 let filter_lambda =
24054 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24055 parameters: vec![crate::expressions::Identifier::new(
24056 "pair",
24057 )],
24058 body: cond,
24059 colon: false,
24060 parameter_types: vec![],
24061 }));
24062
24063 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
24064 let outer_filter = Expression::Function(Box::new(Function::new(
24065 "LIST_FILTER".to_string(),
24066 vec![list_zip, filter_lambda],
24067 )));
24068
24069 // pair -> pair[1]
24070 let transform_lambda =
24071 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24072 parameters: vec![crate::expressions::Identifier::new(
24073 "pair",
24074 )],
24075 body: pair_1,
24076 colon: false,
24077 parameter_types: vec![],
24078 }));
24079
24080 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
24081 let list_transform = Expression::Function(Box::new(Function::new(
24082 "LIST_TRANSFORM".to_string(),
24083 vec![outer_filter, transform_lambda],
24084 )));
24085
24086 Ok(Expression::Case(Box::new(Case {
24087 operand: None,
24088 whens: vec![(null_check, Expression::Null(Null))],
24089 else_: Some(list_transform),
24090 comments: Vec::new(),
24091 inferred_type: None,
24092 })))
24093 }
24094 DialectType::DuckDB => {
24095 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
24096 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
24097 // ELSE LIST_FILTER(LIST_DISTINCT(source),
24098 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
24099 // END
24100
24101 // Build: source IS NULL
24102 let source_is_null =
24103 Expression::IsNull(Box::new(crate::expressions::IsNull {
24104 this: source_arr.clone(),
24105 not: false,
24106 postfix_form: false,
24107 }));
24108 // Build: exclude IS NULL
24109 let exclude_is_null =
24110 Expression::IsNull(Box::new(crate::expressions::IsNull {
24111 this: exclude_arr.clone(),
24112 not: false,
24113 postfix_form: false,
24114 }));
24115 // source IS NULL OR exclude IS NULL
24116 let null_check =
24117 Expression::Or(Box::new(crate::expressions::BinaryOp {
24118 left: source_is_null,
24119 right: exclude_is_null,
24120 left_comments: vec![],
24121 operator_comments: vec![],
24122 trailing_comments: vec![],
24123 inferred_type: None,
24124 }));
24125
24126 // LIST_DISTINCT(source)
24127 let list_distinct = Expression::Function(Box::new(Function::new(
24128 "LIST_DISTINCT".to_string(),
24129 vec![source_arr.clone()],
24130 )));
24131
24132 // x IS NOT DISTINCT FROM e
24133 let x_col = Expression::column("x");
24134 let e_col = Expression::column("e");
24135 let is_not_distinct = Expression::NullSafeEq(Box::new(
24136 crate::expressions::BinaryOp {
24137 left: x_col,
24138 right: e_col.clone(),
24139 left_comments: vec![],
24140 operator_comments: vec![],
24141 trailing_comments: vec![],
24142 inferred_type: None,
24143 },
24144 ));
24145
24146 // x -> x IS NOT DISTINCT FROM e
24147 let inner_lambda =
24148 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24149 parameters: vec![crate::expressions::Identifier::new("x")],
24150 body: is_not_distinct,
24151 colon: false,
24152 parameter_types: vec![],
24153 }));
24154
24155 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
24156 let inner_list_filter =
24157 Expression::Function(Box::new(Function::new(
24158 "LIST_FILTER".to_string(),
24159 vec![exclude_arr.clone(), inner_lambda],
24160 )));
24161
24162 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
24163 let len_inner = Expression::Function(Box::new(Function::new(
24164 "LENGTH".to_string(),
24165 vec![inner_list_filter],
24166 )));
24167
24168 // LENGTH(...) = 0
24169 let eq_zero =
24170 Expression::Eq(Box::new(crate::expressions::BinaryOp {
24171 left: len_inner,
24172 right: Expression::number(0),
24173 left_comments: vec![],
24174 operator_comments: vec![],
24175 trailing_comments: vec![],
24176 inferred_type: None,
24177 }));
24178
24179 // e -> LENGTH(LIST_FILTER(...)) = 0
24180 let outer_lambda =
24181 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24182 parameters: vec![crate::expressions::Identifier::new("e")],
24183 body: eq_zero,
24184 colon: false,
24185 parameter_types: vec![],
24186 }));
24187
24188 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
24189 let outer_list_filter =
24190 Expression::Function(Box::new(Function::new(
24191 "LIST_FILTER".to_string(),
24192 vec![list_distinct, outer_lambda],
24193 )));
24194
24195 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
24196 Ok(Expression::Case(Box::new(Case {
24197 operand: None,
24198 whens: vec![(null_check, Expression::Null(Null))],
24199 else_: Some(outer_list_filter),
24200 comments: Vec::new(),
24201 inferred_type: None,
24202 })))
24203 }
24204 DialectType::Snowflake => {
24205 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
24206 Ok(Expression::ArrayExcept(Box::new(
24207 crate::expressions::BinaryFunc {
24208 this: source_arr,
24209 expression: exclude_arr,
24210 original_name: None,
24211 inferred_type: None,
24212 },
24213 )))
24214 }
24215 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24216 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
24217 Ok(Expression::Function(Box::new(Function::new(
24218 "ARRAY_EXCEPT".to_string(),
24219 vec![source_arr, exclude_arr],
24220 ))))
24221 }
24222 _ => Ok(Expression::ArrayExcept(Box::new(
24223 crate::expressions::BinaryFunc {
24224 this: source_arr,
24225 expression: exclude_arr,
24226 original_name: None,
24227 inferred_type: None,
24228 },
24229 ))),
24230 }
24231 } else {
24232 Ok(e)
24233 }
24234 }
24235
24236 Action::RegexpLikeExasolAnchor => {
24237 // RegexpLike -> Exasol: wrap pattern with .*...*
24238 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
24239 // dialects does partial match, so we need to anchor with .* on both sides
24240 if let Expression::RegexpLike(mut f) = e {
24241 match &f.pattern {
24242 Expression::Literal(lit)
24243 if matches!(lit.as_ref(), Literal::String(_)) =>
24244 {
24245 let Literal::String(s) = lit.as_ref() else {
24246 unreachable!()
24247 };
24248 // String literal: wrap with .*...*
24249 f.pattern = Expression::Literal(Box::new(Literal::String(
24250 format!(".*{}.*", s),
24251 )));
24252 }
24253 _ => {
24254 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
24255 f.pattern =
24256 Expression::Paren(Box::new(crate::expressions::Paren {
24257 this: Expression::Concat(Box::new(
24258 crate::expressions::BinaryOp {
24259 left: Expression::Concat(Box::new(
24260 crate::expressions::BinaryOp {
24261 left: Expression::Literal(Box::new(
24262 Literal::String(".*".to_string()),
24263 )),
24264 right: f.pattern,
24265 left_comments: vec![],
24266 operator_comments: vec![],
24267 trailing_comments: vec![],
24268 inferred_type: None,
24269 },
24270 )),
24271 right: Expression::Literal(Box::new(
24272 Literal::String(".*".to_string()),
24273 )),
24274 left_comments: vec![],
24275 operator_comments: vec![],
24276 trailing_comments: vec![],
24277 inferred_type: None,
24278 },
24279 )),
24280 trailing_comments: vec![],
24281 }));
24282 }
24283 }
24284 Ok(Expression::RegexpLike(f))
24285 } else {
24286 Ok(e)
24287 }
24288 }
24289
24290 Action::ArrayPositionSnowflakeSwap => {
24291 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
24292 if let Expression::ArrayPosition(f) = e {
24293 Ok(Expression::ArrayPosition(Box::new(
24294 crate::expressions::BinaryFunc {
24295 this: f.expression,
24296 expression: f.this,
24297 original_name: f.original_name,
24298 inferred_type: f.inferred_type,
24299 },
24300 )))
24301 } else {
24302 Ok(e)
24303 }
24304 }
24305
24306 Action::SnowflakeArrayPositionToDuckDB => {
24307 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
24308 // Snowflake uses 0-based indexing, DuckDB uses 1-based
24309 // The parser has this=value, expression=array (Snowflake order)
24310 if let Expression::ArrayPosition(f) = e {
24311 // Create ARRAY_POSITION(array, value) in standard order
24312 let standard_pos =
24313 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
24314 this: f.expression, // array
24315 expression: f.this, // value
24316 original_name: f.original_name,
24317 inferred_type: f.inferred_type,
24318 }));
24319 // Subtract 1 for zero-based indexing
24320 Ok(Expression::Sub(Box::new(BinaryOp {
24321 left: standard_pos,
24322 right: Expression::number(1),
24323 left_comments: vec![],
24324 operator_comments: vec![],
24325 trailing_comments: vec![],
24326 inferred_type: None,
24327 })))
24328 } else {
24329 Ok(e)
24330 }
24331 }
24332
24333 Action::ArrayDistinctConvert => {
24334 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
24335 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24336 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
24337 // ELSE LIST_DISTINCT(arr)
24338 // END
24339 if let Expression::ArrayDistinct(f) = e {
24340 let arr = f.this;
24341
24342 // ARRAY_LENGTH(arr)
24343 let array_length = Expression::Function(Box::new(Function::new(
24344 "ARRAY_LENGTH".to_string(),
24345 vec![arr.clone()],
24346 )));
24347 // LIST_COUNT(arr)
24348 let list_count = Expression::Function(Box::new(Function::new(
24349 "LIST_COUNT".to_string(),
24350 vec![arr.clone()],
24351 )));
24352 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24353 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24354 left: array_length,
24355 right: list_count,
24356 left_comments: vec![],
24357 operator_comments: vec![],
24358 trailing_comments: vec![],
24359 inferred_type: None,
24360 }));
24361
24362 // _u column
24363 let u_col = Expression::column("_u");
24364 // NOT _u IS NULL
24365 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
24366 this: u_col.clone(),
24367 not: false,
24368 postfix_form: false,
24369 }));
24370 let not_u_is_null =
24371 Expression::Not(Box::new(crate::expressions::UnaryOp {
24372 this: u_is_null,
24373 inferred_type: None,
24374 }));
24375 // _u -> NOT _u IS NULL
24376 let filter_lambda =
24377 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24378 parameters: vec![crate::expressions::Identifier::new("_u")],
24379 body: not_u_is_null,
24380 colon: false,
24381 parameter_types: vec![],
24382 }));
24383 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
24384 let list_filter = Expression::Function(Box::new(Function::new(
24385 "LIST_FILTER".to_string(),
24386 vec![arr.clone(), filter_lambda],
24387 )));
24388 // LIST_DISTINCT(LIST_FILTER(arr, ...))
24389 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
24390 "LIST_DISTINCT".to_string(),
24391 vec![list_filter],
24392 )));
24393 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
24394 let list_append = Expression::Function(Box::new(Function::new(
24395 "LIST_APPEND".to_string(),
24396 vec![list_distinct_filtered, Expression::Null(Null)],
24397 )));
24398
24399 // LIST_DISTINCT(arr)
24400 let list_distinct = Expression::Function(Box::new(Function::new(
24401 "LIST_DISTINCT".to_string(),
24402 vec![arr],
24403 )));
24404
24405 // CASE WHEN neq THEN list_append ELSE list_distinct END
24406 Ok(Expression::Case(Box::new(Case {
24407 operand: None,
24408 whens: vec![(neq, list_append)],
24409 else_: Some(list_distinct),
24410 comments: Vec::new(),
24411 inferred_type: None,
24412 })))
24413 } else {
24414 Ok(e)
24415 }
24416 }
24417
24418 Action::ArrayDistinctClickHouse => {
24419 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
24420 if let Expression::ArrayDistinct(f) = e {
24421 Ok(Expression::Function(Box::new(Function::new(
24422 "arrayDistinct".to_string(),
24423 vec![f.this],
24424 ))))
24425 } else {
24426 Ok(e)
24427 }
24428 }
24429
24430 Action::ArrayContainsDuckDBConvert => {
24431 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
24432 // CASE WHEN value IS NULL
24433 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24434 // ELSE ARRAY_CONTAINS(array, value)
24435 // END
24436 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
24437 if let Expression::ArrayContains(f) = e {
24438 let value = f.this;
24439 let array = f.expression;
24440
24441 // value IS NULL
24442 let value_is_null =
24443 Expression::IsNull(Box::new(crate::expressions::IsNull {
24444 this: value.clone(),
24445 not: false,
24446 postfix_form: false,
24447 }));
24448
24449 // ARRAY_LENGTH(array)
24450 let array_length = Expression::Function(Box::new(Function::new(
24451 "ARRAY_LENGTH".to_string(),
24452 vec![array.clone()],
24453 )));
24454 // LIST_COUNT(array)
24455 let list_count = Expression::Function(Box::new(Function::new(
24456 "LIST_COUNT".to_string(),
24457 vec![array.clone()],
24458 )));
24459 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
24460 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24461 left: array_length,
24462 right: list_count,
24463 left_comments: vec![],
24464 operator_comments: vec![],
24465 trailing_comments: vec![],
24466 inferred_type: None,
24467 }));
24468 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24469 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
24470 this: Box::new(neq),
24471 expression: Box::new(Expression::Boolean(
24472 crate::expressions::BooleanLiteral { value: false },
24473 )),
24474 }));
24475
24476 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
24477 let array_contains = Expression::Function(Box::new(Function::new(
24478 "ARRAY_CONTAINS".to_string(),
24479 vec![array, value],
24480 )));
24481
24482 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
24483 Ok(Expression::Case(Box::new(Case {
24484 operand: None,
24485 whens: vec![(value_is_null, nullif)],
24486 else_: Some(array_contains),
24487 comments: Vec::new(),
24488 inferred_type: None,
24489 })))
24490 } else {
24491 Ok(e)
24492 }
24493 }
24494
24495 Action::StrPositionExpand => {
24496 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
24497 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24498 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24499 if let Expression::StrPosition(sp) = e {
24500 let crate::expressions::StrPosition {
24501 this,
24502 substr,
24503 position,
24504 occurrence,
24505 } = *sp;
24506 let string = *this;
24507 let substr_expr = match substr {
24508 Some(s) => *s,
24509 None => Expression::Null(Null),
24510 };
24511 let pos = match position {
24512 Some(p) => *p,
24513 None => Expression::number(1),
24514 };
24515
24516 // SUBSTRING(string, pos)
24517 let substring_call = Expression::Function(Box::new(Function::new(
24518 "SUBSTRING".to_string(),
24519 vec![string.clone(), pos.clone()],
24520 )));
24521 // STRPOS(SUBSTRING(string, pos), substr)
24522 let strpos_call = Expression::Function(Box::new(Function::new(
24523 "STRPOS".to_string(),
24524 vec![substring_call, substr_expr.clone()],
24525 )));
24526 // STRPOS(...) + pos - 1
24527 let pos_adjusted =
24528 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
24529 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
24530 strpos_call.clone(),
24531 pos.clone(),
24532 ))),
24533 Expression::number(1),
24534 )));
24535 // STRPOS(...) = 0
24536 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
24537 strpos_call.clone(),
24538 Expression::number(0),
24539 )));
24540
24541 match target {
24542 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24543 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24544 Ok(Expression::Function(Box::new(Function::new(
24545 "IF".to_string(),
24546 vec![is_zero, Expression::number(0), pos_adjusted],
24547 ))))
24548 }
24549 DialectType::DuckDB => {
24550 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24551 Ok(Expression::Case(Box::new(Case {
24552 operand: None,
24553 whens: vec![(is_zero, Expression::number(0))],
24554 else_: Some(pos_adjusted),
24555 comments: Vec::new(),
24556 inferred_type: None,
24557 })))
24558 }
24559 _ => {
24560 // Reconstruct StrPosition
24561 Ok(Expression::StrPosition(Box::new(
24562 crate::expressions::StrPosition {
24563 this: Box::new(string),
24564 substr: Some(Box::new(substr_expr)),
24565 position: Some(Box::new(pos)),
24566 occurrence,
24567 },
24568 )))
24569 }
24570 }
24571 } else {
24572 Ok(e)
24573 }
24574 }
24575
24576 Action::MonthsBetweenConvert => {
24577 if let Expression::MonthsBetween(mb) = e {
24578 let crate::expressions::BinaryFunc {
24579 this: end_date,
24580 expression: start_date,
24581 ..
24582 } = *mb;
24583 match target {
24584 DialectType::DuckDB => {
24585 let cast_end = Self::ensure_cast_date(end_date);
24586 let cast_start = Self::ensure_cast_date(start_date);
24587 let dd = Expression::Function(Box::new(Function::new(
24588 "DATE_DIFF".to_string(),
24589 vec![
24590 Expression::string("MONTH"),
24591 cast_start.clone(),
24592 cast_end.clone(),
24593 ],
24594 )));
24595 let day_end = Expression::Function(Box::new(Function::new(
24596 "DAY".to_string(),
24597 vec![cast_end.clone()],
24598 )));
24599 let day_start = Expression::Function(Box::new(Function::new(
24600 "DAY".to_string(),
24601 vec![cast_start.clone()],
24602 )));
24603 let last_day_end = Expression::Function(Box::new(Function::new(
24604 "LAST_DAY".to_string(),
24605 vec![cast_end.clone()],
24606 )));
24607 let last_day_start = Expression::Function(Box::new(Function::new(
24608 "LAST_DAY".to_string(),
24609 vec![cast_start.clone()],
24610 )));
24611 let day_last_end = Expression::Function(Box::new(Function::new(
24612 "DAY".to_string(),
24613 vec![last_day_end],
24614 )));
24615 let day_last_start = Expression::Function(Box::new(Function::new(
24616 "DAY".to_string(),
24617 vec![last_day_start],
24618 )));
24619 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
24620 day_end.clone(),
24621 day_last_end,
24622 )));
24623 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
24624 day_start.clone(),
24625 day_last_start,
24626 )));
24627 let both_cond =
24628 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
24629 let day_diff =
24630 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
24631 let day_diff_paren =
24632 Expression::Paren(Box::new(crate::expressions::Paren {
24633 this: day_diff,
24634 trailing_comments: Vec::new(),
24635 }));
24636 let frac = Expression::Div(Box::new(BinaryOp::new(
24637 day_diff_paren,
24638 Expression::Literal(Box::new(Literal::Number(
24639 "31.0".to_string(),
24640 ))),
24641 )));
24642 let case_expr = Expression::Case(Box::new(Case {
24643 operand: None,
24644 whens: vec![(both_cond, Expression::number(0))],
24645 else_: Some(frac),
24646 comments: Vec::new(),
24647 inferred_type: None,
24648 }));
24649 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
24650 }
24651 DialectType::Snowflake | DialectType::Redshift => {
24652 let unit = Expression::Identifier(Identifier::new("MONTH"));
24653 Ok(Expression::Function(Box::new(Function::new(
24654 "DATEDIFF".to_string(),
24655 vec![unit, start_date, end_date],
24656 ))))
24657 }
24658 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24659 Ok(Expression::Function(Box::new(Function::new(
24660 "DATE_DIFF".to_string(),
24661 vec![Expression::string("MONTH"), start_date, end_date],
24662 ))))
24663 }
24664 _ => Ok(Expression::MonthsBetween(Box::new(
24665 crate::expressions::BinaryFunc {
24666 this: end_date,
24667 expression: start_date,
24668 original_name: None,
24669 inferred_type: None,
24670 },
24671 ))),
24672 }
24673 } else {
24674 Ok(e)
24675 }
24676 }
24677
24678 Action::AddMonthsConvert => {
24679 if let Expression::AddMonths(am) = e {
24680 let date = am.this;
24681 let val = am.expression;
24682 match target {
24683 DialectType::TSQL | DialectType::Fabric => {
24684 let cast_date = Self::ensure_cast_datetime2(date);
24685 Ok(Expression::Function(Box::new(Function::new(
24686 "DATEADD".to_string(),
24687 vec![
24688 Expression::Identifier(Identifier::new("MONTH")),
24689 val,
24690 cast_date,
24691 ],
24692 ))))
24693 }
24694 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
24695 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24696 // Optionally wrapped in CAST(... AS type) if the input had a specific type
24697
24698 // Determine the cast type from the date expression
24699 let (cast_date, return_type) = match &date {
24700 Expression::Literal(lit)
24701 if matches!(lit.as_ref(), Literal::String(_)) =>
24702 {
24703 // String literal: CAST(str AS TIMESTAMP), no outer CAST
24704 (
24705 Expression::Cast(Box::new(Cast {
24706 this: date.clone(),
24707 to: DataType::Timestamp {
24708 precision: None,
24709 timezone: false,
24710 },
24711 trailing_comments: Vec::new(),
24712 double_colon_syntax: false,
24713 format: None,
24714 default: None,
24715 inferred_type: None,
24716 })),
24717 None,
24718 )
24719 }
24720 Expression::Cast(c) => {
24721 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
24722 (date.clone(), Some(c.to.clone()))
24723 }
24724 _ => {
24725 // Expression or NULL::TYPE - keep as-is, check for cast type
24726 if let Expression::Cast(c) = &date {
24727 (date.clone(), Some(c.to.clone()))
24728 } else {
24729 (date.clone(), None)
24730 }
24731 }
24732 };
24733
24734 // Build the interval expression
24735 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
24736 // For integer values, use INTERVAL val MONTH
24737 let is_non_integer_val = match &val {
24738 Expression::Literal(lit)
24739 if matches!(lit.as_ref(), Literal::Number(_)) =>
24740 {
24741 let Literal::Number(n) = lit.as_ref() else {
24742 unreachable!()
24743 };
24744 n.contains('.')
24745 }
24746 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
24747 Expression::Neg(n) => {
24748 if let Expression::Literal(lit) = &n.this {
24749 if let Literal::Number(s) = lit.as_ref() {
24750 s.contains('.')
24751 } else {
24752 false
24753 }
24754 } else {
24755 false
24756 }
24757 }
24758 _ => false,
24759 };
24760
24761 let add_interval = if is_non_integer_val {
24762 // TO_MONTHS(CAST(ROUND(val) AS INT))
24763 let round_val = Expression::Function(Box::new(Function::new(
24764 "ROUND".to_string(),
24765 vec![val.clone()],
24766 )));
24767 let cast_int = Expression::Cast(Box::new(Cast {
24768 this: round_val,
24769 to: DataType::Int {
24770 length: None,
24771 integer_spelling: false,
24772 },
24773 trailing_comments: Vec::new(),
24774 double_colon_syntax: false,
24775 format: None,
24776 default: None,
24777 inferred_type: None,
24778 }));
24779 Expression::Function(Box::new(Function::new(
24780 "TO_MONTHS".to_string(),
24781 vec![cast_int],
24782 )))
24783 } else {
24784 // INTERVAL val MONTH
24785 // For negative numbers, wrap in parens
24786 let interval_val = match &val {
24787 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
24788 {
24789 let Literal::Number(_) = lit.as_ref() else {
24790 unreachable!()
24791 };
24792 Expression::Paren(Box::new(Paren {
24793 this: val.clone(),
24794 trailing_comments: Vec::new(),
24795 }))
24796 }
24797 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
24798 this: val.clone(),
24799 trailing_comments: Vec::new(),
24800 })),
24801 Expression::Null(_) => Expression::Paren(Box::new(Paren {
24802 this: val.clone(),
24803 trailing_comments: Vec::new(),
24804 })),
24805 _ => val.clone(),
24806 };
24807 Expression::Interval(Box::new(crate::expressions::Interval {
24808 this: Some(interval_val),
24809 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24810 unit: crate::expressions::IntervalUnit::Month,
24811 use_plural: false,
24812 }),
24813 }))
24814 };
24815
24816 // Build: date + interval
24817 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
24818 cast_date.clone(),
24819 add_interval.clone(),
24820 )));
24821
24822 // Build LAST_DAY(date)
24823 let last_day_date = Expression::Function(Box::new(Function::new(
24824 "LAST_DAY".to_string(),
24825 vec![cast_date.clone()],
24826 )));
24827
24828 // Build LAST_DAY(date + interval)
24829 let last_day_date_plus =
24830 Expression::Function(Box::new(Function::new(
24831 "LAST_DAY".to_string(),
24832 vec![date_plus_interval.clone()],
24833 )));
24834
24835 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24836 let case_expr = Expression::Case(Box::new(Case {
24837 operand: None,
24838 whens: vec![(
24839 Expression::Eq(Box::new(BinaryOp::new(
24840 last_day_date,
24841 cast_date.clone(),
24842 ))),
24843 last_day_date_plus,
24844 )],
24845 else_: Some(date_plus_interval),
24846 comments: Vec::new(),
24847 inferred_type: None,
24848 }));
24849
24850 // Wrap in CAST(... AS type) if needed
24851 if let Some(dt) = return_type {
24852 Ok(Expression::Cast(Box::new(Cast {
24853 this: case_expr,
24854 to: dt,
24855 trailing_comments: Vec::new(),
24856 double_colon_syntax: false,
24857 format: None,
24858 default: None,
24859 inferred_type: None,
24860 })))
24861 } else {
24862 Ok(case_expr)
24863 }
24864 }
24865 DialectType::DuckDB => {
24866 // Non-Snowflake source: simple date + INTERVAL
24867 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24868 {
24869 Expression::Cast(Box::new(Cast {
24870 this: date,
24871 to: DataType::Timestamp {
24872 precision: None,
24873 timezone: false,
24874 },
24875 trailing_comments: Vec::new(),
24876 double_colon_syntax: false,
24877 format: None,
24878 default: None,
24879 inferred_type: None,
24880 }))
24881 } else {
24882 date
24883 };
24884 let interval =
24885 Expression::Interval(Box::new(crate::expressions::Interval {
24886 this: Some(val),
24887 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24888 unit: crate::expressions::IntervalUnit::Month,
24889 use_plural: false,
24890 }),
24891 }));
24892 Ok(Expression::Add(Box::new(BinaryOp::new(
24893 cast_date, interval,
24894 ))))
24895 }
24896 DialectType::Snowflake => {
24897 // Keep ADD_MONTHS when source is also Snowflake
24898 if matches!(source, DialectType::Snowflake) {
24899 Ok(Expression::Function(Box::new(Function::new(
24900 "ADD_MONTHS".to_string(),
24901 vec![date, val],
24902 ))))
24903 } else {
24904 Ok(Expression::Function(Box::new(Function::new(
24905 "DATEADD".to_string(),
24906 vec![
24907 Expression::Identifier(Identifier::new("MONTH")),
24908 val,
24909 date,
24910 ],
24911 ))))
24912 }
24913 }
24914 DialectType::Redshift => {
24915 Ok(Expression::Function(Box::new(Function::new(
24916 "DATEADD".to_string(),
24917 vec![
24918 Expression::Identifier(Identifier::new("MONTH")),
24919 val,
24920 date,
24921 ],
24922 ))))
24923 }
24924 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24925 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24926 {
24927 Expression::Cast(Box::new(Cast {
24928 this: date,
24929 to: DataType::Timestamp {
24930 precision: None,
24931 timezone: false,
24932 },
24933 trailing_comments: Vec::new(),
24934 double_colon_syntax: false,
24935 format: None,
24936 default: None,
24937 inferred_type: None,
24938 }))
24939 } else {
24940 date
24941 };
24942 Ok(Expression::Function(Box::new(Function::new(
24943 "DATE_ADD".to_string(),
24944 vec![Expression::string("MONTH"), val, cast_date],
24945 ))))
24946 }
24947 DialectType::BigQuery => {
24948 let interval =
24949 Expression::Interval(Box::new(crate::expressions::Interval {
24950 this: Some(val),
24951 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24952 unit: crate::expressions::IntervalUnit::Month,
24953 use_plural: false,
24954 }),
24955 }));
24956 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24957 {
24958 Expression::Cast(Box::new(Cast {
24959 this: date,
24960 to: DataType::Custom {
24961 name: "DATETIME".to_string(),
24962 },
24963 trailing_comments: Vec::new(),
24964 double_colon_syntax: false,
24965 format: None,
24966 default: None,
24967 inferred_type: None,
24968 }))
24969 } else {
24970 date
24971 };
24972 Ok(Expression::Function(Box::new(Function::new(
24973 "DATE_ADD".to_string(),
24974 vec![cast_date, interval],
24975 ))))
24976 }
24977 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
24978 Ok(Expression::Function(Box::new(Function::new(
24979 "ADD_MONTHS".to_string(),
24980 vec![date, val],
24981 ))))
24982 }
24983 _ => {
24984 // Default: keep as AddMonths expression
24985 Ok(Expression::AddMonths(Box::new(
24986 crate::expressions::BinaryFunc {
24987 this: date,
24988 expression: val,
24989 original_name: None,
24990 inferred_type: None,
24991 },
24992 )))
24993 }
24994 }
24995 } else {
24996 Ok(e)
24997 }
24998 }
24999
25000 Action::PercentileContConvert => {
25001 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
25002 // Presto/Trino: APPROX_PERCENTILE(col, p)
25003 // Spark/Databricks: PERCENTILE_APPROX(col, p)
25004 if let Expression::WithinGroup(wg) = e {
25005 // Extract percentile value and order by column
25006 let (percentile, _is_disc) = match &wg.this {
25007 Expression::Function(f) => {
25008 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25009 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
25010 Box::new(Literal::Number("0.5".to_string())),
25011 ));
25012 (pct, is_disc)
25013 }
25014 Expression::AggregateFunction(af) => {
25015 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25016 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
25017 Box::new(Literal::Number("0.5".to_string())),
25018 ));
25019 (pct, is_disc)
25020 }
25021 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
25022 _ => return Ok(Expression::WithinGroup(wg)),
25023 };
25024 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
25025 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
25026 );
25027
25028 let func_name = match target {
25029 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25030 "APPROX_PERCENTILE"
25031 }
25032 _ => "PERCENTILE_APPROX", // Spark, Databricks
25033 };
25034 Ok(Expression::Function(Box::new(Function::new(
25035 func_name.to_string(),
25036 vec![col, percentile],
25037 ))))
25038 } else {
25039 Ok(e)
25040 }
25041 }
25042
25043 Action::CurrentUserSparkParens => {
25044 // CURRENT_USER -> CURRENT_USER() for Spark
25045 if let Expression::CurrentUser(_) = e {
25046 Ok(Expression::Function(Box::new(Function::new(
25047 "CURRENT_USER".to_string(),
25048 vec![],
25049 ))))
25050 } else {
25051 Ok(e)
25052 }
25053 }
25054
25055 Action::SparkDateFuncCast => {
25056 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
25057 let cast_arg = |arg: Expression| -> Expression {
25058 match target {
25059 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25060 Self::double_cast_timestamp_date(arg)
25061 }
25062 _ => {
25063 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
25064 Self::ensure_cast_date(arg)
25065 }
25066 }
25067 };
25068 match e {
25069 Expression::Month(f) => Ok(Expression::Month(Box::new(
25070 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25071 ))),
25072 Expression::Year(f) => Ok(Expression::Year(Box::new(
25073 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25074 ))),
25075 Expression::Day(f) => Ok(Expression::Day(Box::new(
25076 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25077 ))),
25078 other => Ok(other),
25079 }
25080 }
25081
25082 Action::MapFromArraysConvert => {
25083 // Expression::MapFromArrays -> target-specific
25084 if let Expression::MapFromArrays(mfa) = e {
25085 let keys = mfa.this;
25086 let values = mfa.expression;
25087 match target {
25088 DialectType::Snowflake => Ok(Expression::Function(Box::new(
25089 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
25090 ))),
25091 _ => {
25092 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
25093 Ok(Expression::Function(Box::new(Function::new(
25094 "MAP".to_string(),
25095 vec![keys, values],
25096 ))))
25097 }
25098 }
25099 } else {
25100 Ok(e)
25101 }
25102 }
25103
25104 Action::AnyToExists => {
25105 if let Expression::Any(q) = e {
25106 if let Some(op) = q.op.clone() {
25107 let lambda_param = crate::expressions::Identifier::new("x");
25108 let rhs = Expression::Identifier(lambda_param.clone());
25109 let body = match op {
25110 crate::expressions::QuantifiedOp::Eq => {
25111 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
25112 }
25113 crate::expressions::QuantifiedOp::Neq => {
25114 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
25115 }
25116 crate::expressions::QuantifiedOp::Lt => {
25117 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
25118 }
25119 crate::expressions::QuantifiedOp::Lte => {
25120 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
25121 }
25122 crate::expressions::QuantifiedOp::Gt => {
25123 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
25124 }
25125 crate::expressions::QuantifiedOp::Gte => {
25126 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
25127 }
25128 };
25129 let lambda =
25130 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25131 parameters: vec![lambda_param],
25132 body,
25133 colon: false,
25134 parameter_types: Vec::new(),
25135 }));
25136 Ok(Expression::Function(Box::new(Function::new(
25137 "EXISTS".to_string(),
25138 vec![q.subquery, lambda],
25139 ))))
25140 } else {
25141 Ok(Expression::Any(q))
25142 }
25143 } else {
25144 Ok(e)
25145 }
25146 }
25147
25148 Action::GenerateSeriesConvert => {
25149 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
25150 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25151 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
25152 if let Expression::Function(f) = e {
25153 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
25154 let start = f.args[0].clone();
25155 let end = f.args[1].clone();
25156 let step = f.args.get(2).cloned();
25157
25158 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
25159 let step = step.map(|s| Self::normalize_interval_string(s, target));
25160
25161 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
25162 let maybe_cast_timestamp = |arg: Expression| -> Expression {
25163 if matches!(
25164 target,
25165 DialectType::Presto
25166 | DialectType::Trino
25167 | DialectType::Athena
25168 | DialectType::Spark
25169 | DialectType::Databricks
25170 | DialectType::Hive
25171 ) {
25172 match &arg {
25173 Expression::CurrentTimestamp(_) => {
25174 Expression::Cast(Box::new(Cast {
25175 this: arg,
25176 to: DataType::Timestamp {
25177 precision: None,
25178 timezone: false,
25179 },
25180 trailing_comments: Vec::new(),
25181 double_colon_syntax: false,
25182 format: None,
25183 default: None,
25184 inferred_type: None,
25185 }))
25186 }
25187 _ => arg,
25188 }
25189 } else {
25190 arg
25191 }
25192 };
25193
25194 let start = maybe_cast_timestamp(start);
25195 let end = maybe_cast_timestamp(end);
25196
25197 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
25198 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
25199 let mut gs_args = vec![start, end];
25200 if let Some(step) = step {
25201 gs_args.push(step);
25202 }
25203 return Ok(Expression::Function(Box::new(Function::new(
25204 "GENERATE_SERIES".to_string(),
25205 gs_args,
25206 ))));
25207 }
25208
25209 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25210 if matches!(target, DialectType::DuckDB) {
25211 let mut gs_args = vec![start, end];
25212 if let Some(step) = step {
25213 gs_args.push(step);
25214 }
25215 let gs = Expression::Function(Box::new(Function::new(
25216 "GENERATE_SERIES".to_string(),
25217 gs_args,
25218 )));
25219 return Ok(Expression::Function(Box::new(Function::new(
25220 "UNNEST".to_string(),
25221 vec![gs],
25222 ))));
25223 }
25224
25225 let mut seq_args = vec![start, end];
25226 if let Some(step) = step {
25227 seq_args.push(step);
25228 }
25229
25230 let seq = Expression::Function(Box::new(Function::new(
25231 "SEQUENCE".to_string(),
25232 seq_args,
25233 )));
25234
25235 match target {
25236 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25237 // Wrap in UNNEST
25238 Ok(Expression::Function(Box::new(Function::new(
25239 "UNNEST".to_string(),
25240 vec![seq],
25241 ))))
25242 }
25243 DialectType::Spark
25244 | DialectType::Databricks
25245 | DialectType::Hive => {
25246 // Wrap in EXPLODE
25247 Ok(Expression::Function(Box::new(Function::new(
25248 "EXPLODE".to_string(),
25249 vec![seq],
25250 ))))
25251 }
25252 _ => {
25253 // Just SEQUENCE for others
25254 Ok(seq)
25255 }
25256 }
25257 } else {
25258 Ok(Expression::Function(f))
25259 }
25260 } else {
25261 Ok(e)
25262 }
25263 }
25264
25265 Action::ConcatCoalesceWrap => {
25266 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
25267 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
25268 if let Expression::Function(f) = e {
25269 if f.name.eq_ignore_ascii_case("CONCAT") {
25270 let new_args: Vec<Expression> = f
25271 .args
25272 .into_iter()
25273 .map(|arg| {
25274 let cast_arg = if matches!(
25275 target,
25276 DialectType::Presto
25277 | DialectType::Trino
25278 | DialectType::Athena
25279 ) {
25280 Expression::Cast(Box::new(Cast {
25281 this: arg,
25282 to: DataType::VarChar {
25283 length: None,
25284 parenthesized_length: false,
25285 },
25286 trailing_comments: Vec::new(),
25287 double_colon_syntax: false,
25288 format: None,
25289 default: None,
25290 inferred_type: None,
25291 }))
25292 } else {
25293 arg
25294 };
25295 Expression::Function(Box::new(Function::new(
25296 "COALESCE".to_string(),
25297 vec![cast_arg, Expression::string("")],
25298 )))
25299 })
25300 .collect();
25301 Ok(Expression::Function(Box::new(Function::new(
25302 "CONCAT".to_string(),
25303 new_args,
25304 ))))
25305 } else {
25306 Ok(Expression::Function(f))
25307 }
25308 } else {
25309 Ok(e)
25310 }
25311 }
25312
25313 Action::PipeConcatToConcat => {
25314 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
25315 if let Expression::Concat(op) = e {
25316 let cast_left = Expression::Cast(Box::new(Cast {
25317 this: op.left,
25318 to: DataType::VarChar {
25319 length: None,
25320 parenthesized_length: false,
25321 },
25322 trailing_comments: Vec::new(),
25323 double_colon_syntax: false,
25324 format: None,
25325 default: None,
25326 inferred_type: None,
25327 }));
25328 let cast_right = Expression::Cast(Box::new(Cast {
25329 this: op.right,
25330 to: DataType::VarChar {
25331 length: None,
25332 parenthesized_length: false,
25333 },
25334 trailing_comments: Vec::new(),
25335 double_colon_syntax: false,
25336 format: None,
25337 default: None,
25338 inferred_type: None,
25339 }));
25340 Ok(Expression::Function(Box::new(Function::new(
25341 "CONCAT".to_string(),
25342 vec![cast_left, cast_right],
25343 ))))
25344 } else {
25345 Ok(e)
25346 }
25347 }
25348
25349 Action::DivFuncConvert => {
25350 // DIV(a, b) -> target-specific integer division
25351 if let Expression::Function(f) = e {
25352 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
25353 let a = f.args[0].clone();
25354 let b = f.args[1].clone();
25355 match target {
25356 DialectType::DuckDB => {
25357 // DIV(a, b) -> CAST(a // b AS DECIMAL)
25358 let int_div = Expression::IntDiv(Box::new(
25359 crate::expressions::BinaryFunc {
25360 this: a,
25361 expression: b,
25362 original_name: None,
25363 inferred_type: None,
25364 },
25365 ));
25366 Ok(Expression::Cast(Box::new(Cast {
25367 this: int_div,
25368 to: DataType::Decimal {
25369 precision: None,
25370 scale: None,
25371 },
25372 trailing_comments: Vec::new(),
25373 double_colon_syntax: false,
25374 format: None,
25375 default: None,
25376 inferred_type: None,
25377 })))
25378 }
25379 DialectType::BigQuery => {
25380 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
25381 let div_func = Expression::Function(Box::new(Function::new(
25382 "DIV".to_string(),
25383 vec![a, b],
25384 )));
25385 Ok(Expression::Cast(Box::new(Cast {
25386 this: div_func,
25387 to: DataType::Custom {
25388 name: "NUMERIC".to_string(),
25389 },
25390 trailing_comments: Vec::new(),
25391 double_colon_syntax: false,
25392 format: None,
25393 default: None,
25394 inferred_type: None,
25395 })))
25396 }
25397 DialectType::SQLite => {
25398 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
25399 let cast_a = Expression::Cast(Box::new(Cast {
25400 this: a,
25401 to: DataType::Custom {
25402 name: "REAL".to_string(),
25403 },
25404 trailing_comments: Vec::new(),
25405 double_colon_syntax: false,
25406 format: None,
25407 default: None,
25408 inferred_type: None,
25409 }));
25410 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
25411 let cast_int = Expression::Cast(Box::new(Cast {
25412 this: div,
25413 to: DataType::Int {
25414 length: None,
25415 integer_spelling: true,
25416 },
25417 trailing_comments: Vec::new(),
25418 double_colon_syntax: false,
25419 format: None,
25420 default: None,
25421 inferred_type: None,
25422 }));
25423 Ok(Expression::Cast(Box::new(Cast {
25424 this: cast_int,
25425 to: DataType::Custom {
25426 name: "REAL".to_string(),
25427 },
25428 trailing_comments: Vec::new(),
25429 double_colon_syntax: false,
25430 format: None,
25431 default: None,
25432 inferred_type: None,
25433 })))
25434 }
25435 _ => Ok(Expression::Function(f)),
25436 }
25437 } else {
25438 Ok(Expression::Function(f))
25439 }
25440 } else {
25441 Ok(e)
25442 }
25443 }
25444
25445 Action::JsonObjectAggConvert => {
25446 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
25447 match e {
25448 Expression::Function(f) => Ok(Expression::Function(Box::new(
25449 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
25450 ))),
25451 Expression::AggregateFunction(af) => {
25452 // AggregateFunction stores all args in the `args` vec
25453 Ok(Expression::Function(Box::new(Function::new(
25454 "JSON_GROUP_OBJECT".to_string(),
25455 af.args,
25456 ))))
25457 }
25458 other => Ok(other),
25459 }
25460 }
25461
25462 Action::JsonbExistsConvert => {
25463 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
25464 if let Expression::Function(f) = e {
25465 if f.args.len() == 2 {
25466 let json_expr = f.args[0].clone();
25467 let key = match &f.args[1] {
25468 Expression::Literal(lit)
25469 if matches!(
25470 lit.as_ref(),
25471 crate::expressions::Literal::String(_)
25472 ) =>
25473 {
25474 let crate::expressions::Literal::String(s) = lit.as_ref()
25475 else {
25476 unreachable!()
25477 };
25478 format!("$.{}", s)
25479 }
25480 _ => return Ok(Expression::Function(f)),
25481 };
25482 Ok(Expression::Function(Box::new(Function::new(
25483 "JSON_EXISTS".to_string(),
25484 vec![json_expr, Expression::string(&key)],
25485 ))))
25486 } else {
25487 Ok(Expression::Function(f))
25488 }
25489 } else {
25490 Ok(e)
25491 }
25492 }
25493
25494 Action::DateBinConvert => {
25495 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
25496 if let Expression::Function(f) = e {
25497 Ok(Expression::Function(Box::new(Function::new(
25498 "TIME_BUCKET".to_string(),
25499 f.args,
25500 ))))
25501 } else {
25502 Ok(e)
25503 }
25504 }
25505
25506 Action::MysqlCastCharToText => {
25507 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
25508 if let Expression::Cast(mut c) = e {
25509 c.to = DataType::Text;
25510 Ok(Expression::Cast(c))
25511 } else {
25512 Ok(e)
25513 }
25514 }
25515
25516 Action::SparkCastVarcharToString => {
25517 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
25518 match e {
25519 Expression::Cast(mut c) => {
25520 c.to = Self::normalize_varchar_to_string(c.to);
25521 Ok(Expression::Cast(c))
25522 }
25523 Expression::TryCast(mut c) => {
25524 c.to = Self::normalize_varchar_to_string(c.to);
25525 Ok(Expression::TryCast(c))
25526 }
25527 _ => Ok(e),
25528 }
25529 }
25530
25531 Action::MinMaxToLeastGreatest => {
25532 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
25533 if let Expression::Function(f) = e {
25534 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
25535 "LEAST"
25536 } else if f.name.eq_ignore_ascii_case("MAX") {
25537 "GREATEST"
25538 } else {
25539 return Ok(Expression::Function(f));
25540 };
25541 Ok(Expression::Function(Box::new(Function::new(
25542 new_name.to_string(),
25543 f.args,
25544 ))))
25545 } else {
25546 Ok(e)
25547 }
25548 }
25549
25550 Action::ClickHouseUniqToApproxCountDistinct => {
25551 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
25552 if let Expression::Function(f) = e {
25553 Ok(Expression::Function(Box::new(Function::new(
25554 "APPROX_COUNT_DISTINCT".to_string(),
25555 f.args,
25556 ))))
25557 } else {
25558 Ok(e)
25559 }
25560 }
25561
25562 Action::ClickHouseAnyToAnyValue => {
25563 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
25564 if let Expression::Function(f) = e {
25565 Ok(Expression::Function(Box::new(Function::new(
25566 "ANY_VALUE".to_string(),
25567 f.args,
25568 ))))
25569 } else {
25570 Ok(e)
25571 }
25572 }
25573
25574 Action::OracleVarchar2ToVarchar => {
25575 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
25576 if let Expression::DataType(DataType::Custom { ref name }) = e {
25577 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
25578 let starts_varchar2 =
25579 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
25580 let starts_nvarchar2 =
25581 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
25582 let inner = if starts_varchar2 || starts_nvarchar2 {
25583 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
25584 let end = name.len() - 1; // skip trailing ")"
25585 Some(&name[start..end])
25586 } else {
25587 Option::None
25588 };
25589 if let Some(inner_str) = inner {
25590 // Parse the number part, ignoring BYTE/CHAR qualifier
25591 let num_str = inner_str.split_whitespace().next().unwrap_or("");
25592 if let Ok(n) = num_str.parse::<u32>() {
25593 Ok(Expression::DataType(DataType::VarChar {
25594 length: Some(n),
25595 parenthesized_length: false,
25596 }))
25597 } else {
25598 Ok(e)
25599 }
25600 } else {
25601 // Plain VARCHAR2 / NVARCHAR2 without parens
25602 Ok(Expression::DataType(DataType::VarChar {
25603 length: Option::None,
25604 parenthesized_length: false,
25605 }))
25606 }
25607 } else {
25608 Ok(e)
25609 }
25610 }
25611
25612 Action::Nvl2Expand => {
25613 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
25614 // But keep as NVL2 for dialects that support it natively
25615 let nvl2_native = matches!(
25616 target,
25617 DialectType::Oracle
25618 | DialectType::Snowflake
25619 | DialectType::Redshift
25620 | DialectType::Teradata
25621 | DialectType::Spark
25622 | DialectType::Databricks
25623 );
25624 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
25625 if nvl2_native {
25626 return Ok(Expression::Nvl2(nvl2));
25627 }
25628 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
25629 } else if let Expression::Function(f) = e {
25630 if nvl2_native {
25631 return Ok(Expression::Function(Box::new(Function::new(
25632 "NVL2".to_string(),
25633 f.args,
25634 ))));
25635 }
25636 if f.args.len() < 2 {
25637 return Ok(Expression::Function(f));
25638 }
25639 let mut args = f.args;
25640 let a = args.remove(0);
25641 let b = args.remove(0);
25642 let c = if !args.is_empty() {
25643 Some(args.remove(0))
25644 } else {
25645 Option::None
25646 };
25647 (a, b, c)
25648 } else {
25649 return Ok(e);
25650 };
25651 // Build: NOT (a IS NULL)
25652 let is_null = Expression::IsNull(Box::new(IsNull {
25653 this: a,
25654 not: false,
25655 postfix_form: false,
25656 }));
25657 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
25658 this: is_null,
25659 inferred_type: None,
25660 }));
25661 Ok(Expression::Case(Box::new(Case {
25662 operand: Option::None,
25663 whens: vec![(not_null, b)],
25664 else_: c,
25665 comments: Vec::new(),
25666 inferred_type: None,
25667 })))
25668 }
25669
25670 Action::IfnullToCoalesce => {
25671 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
25672 if let Expression::Coalesce(mut cf) = e {
25673 cf.original_name = Option::None;
25674 Ok(Expression::Coalesce(cf))
25675 } else if let Expression::Function(f) = e {
25676 Ok(Expression::Function(Box::new(Function::new(
25677 "COALESCE".to_string(),
25678 f.args,
25679 ))))
25680 } else {
25681 Ok(e)
25682 }
25683 }
25684
25685 Action::IsAsciiConvert => {
25686 // IS_ASCII(x) -> dialect-specific ASCII check
25687 if let Expression::Function(f) = e {
25688 let arg = f.args.into_iter().next().unwrap();
25689 match target {
25690 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
25691 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
25692 Ok(Expression::Function(Box::new(Function::new(
25693 "REGEXP_LIKE".to_string(),
25694 vec![
25695 arg,
25696 Expression::Literal(Box::new(Literal::String(
25697 "^[[:ascii:]]*$".to_string(),
25698 ))),
25699 ],
25700 ))))
25701 }
25702 DialectType::PostgreSQL
25703 | DialectType::Redshift
25704 | DialectType::Materialize
25705 | DialectType::RisingWave => {
25706 // (x ~ '^[[:ascii:]]*$')
25707 Ok(Expression::Paren(Box::new(Paren {
25708 this: Expression::RegexpLike(Box::new(
25709 crate::expressions::RegexpFunc {
25710 this: arg,
25711 pattern: Expression::Literal(Box::new(
25712 Literal::String("^[[:ascii:]]*$".to_string()),
25713 )),
25714 flags: Option::None,
25715 },
25716 )),
25717 trailing_comments: Vec::new(),
25718 })))
25719 }
25720 DialectType::SQLite => {
25721 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
25722 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
25723 "2a5b5e012d7f5d2a".to_string(),
25724 )));
25725 let cast_expr = Expression::Cast(Box::new(Cast {
25726 this: hex_lit,
25727 to: DataType::Text,
25728 trailing_comments: Vec::new(),
25729 double_colon_syntax: false,
25730 format: Option::None,
25731 default: Option::None,
25732 inferred_type: None,
25733 }));
25734 let glob = Expression::Glob(Box::new(BinaryOp {
25735 left: arg,
25736 right: cast_expr,
25737 left_comments: Vec::new(),
25738 operator_comments: Vec::new(),
25739 trailing_comments: Vec::new(),
25740 inferred_type: None,
25741 }));
25742 Ok(Expression::Paren(Box::new(Paren {
25743 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
25744 this: glob,
25745 inferred_type: None,
25746 })),
25747 trailing_comments: Vec::new(),
25748 })))
25749 }
25750 DialectType::TSQL | DialectType::Fabric => {
25751 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
25752 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
25753 "255b5e002d7f5d25".to_string(),
25754 )));
25755 let convert_expr = Expression::Convert(Box::new(
25756 crate::expressions::ConvertFunc {
25757 this: hex_lit,
25758 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
25759 style: None,
25760 },
25761 ));
25762 let collated = Expression::Collation(Box::new(
25763 crate::expressions::CollationExpr {
25764 this: convert_expr,
25765 collation: "Latin1_General_BIN".to_string(),
25766 quoted: false,
25767 double_quoted: false,
25768 },
25769 ));
25770 let patindex = Expression::Function(Box::new(Function::new(
25771 "PATINDEX".to_string(),
25772 vec![collated, arg],
25773 )));
25774 let zero =
25775 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25776 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25777 left: patindex,
25778 right: zero,
25779 left_comments: Vec::new(),
25780 operator_comments: Vec::new(),
25781 trailing_comments: Vec::new(),
25782 inferred_type: None,
25783 }));
25784 Ok(Expression::Paren(Box::new(Paren {
25785 this: eq_zero,
25786 trailing_comments: Vec::new(),
25787 })))
25788 }
25789 DialectType::Oracle => {
25790 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
25791 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25792 let s1 = Expression::Literal(Box::new(Literal::String(
25793 "^[".to_string(),
25794 )));
25795 let chr1 = Expression::Function(Box::new(Function::new(
25796 "CHR".to_string(),
25797 vec![Expression::Literal(Box::new(Literal::Number(
25798 "1".to_string(),
25799 )))],
25800 )));
25801 let dash =
25802 Expression::Literal(Box::new(Literal::String("-".to_string())));
25803 let chr127 = Expression::Function(Box::new(Function::new(
25804 "CHR".to_string(),
25805 vec![Expression::Literal(Box::new(Literal::Number(
25806 "127".to_string(),
25807 )))],
25808 )));
25809 let s2 = Expression::Literal(Box::new(Literal::String(
25810 "]*$".to_string(),
25811 )));
25812 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25813 let concat1 =
25814 Expression::DPipe(Box::new(crate::expressions::DPipe {
25815 this: Box::new(s1),
25816 expression: Box::new(chr1),
25817 safe: None,
25818 }));
25819 let concat2 =
25820 Expression::DPipe(Box::new(crate::expressions::DPipe {
25821 this: Box::new(concat1),
25822 expression: Box::new(dash),
25823 safe: None,
25824 }));
25825 let concat3 =
25826 Expression::DPipe(Box::new(crate::expressions::DPipe {
25827 this: Box::new(concat2),
25828 expression: Box::new(chr127),
25829 safe: None,
25830 }));
25831 let concat4 =
25832 Expression::DPipe(Box::new(crate::expressions::DPipe {
25833 this: Box::new(concat3),
25834 expression: Box::new(s2),
25835 safe: None,
25836 }));
25837 let regexp_like = Expression::Function(Box::new(Function::new(
25838 "REGEXP_LIKE".to_string(),
25839 vec![arg, concat4],
25840 )));
25841 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
25842 let true_expr =
25843 Expression::Column(Box::new(crate::expressions::Column {
25844 name: Identifier {
25845 name: "TRUE".to_string(),
25846 quoted: false,
25847 trailing_comments: Vec::new(),
25848 span: None,
25849 },
25850 table: None,
25851 join_mark: false,
25852 trailing_comments: Vec::new(),
25853 span: None,
25854 inferred_type: None,
25855 }));
25856 let nvl = Expression::Function(Box::new(Function::new(
25857 "NVL".to_string(),
25858 vec![regexp_like, true_expr],
25859 )));
25860 Ok(nvl)
25861 }
25862 _ => Ok(Expression::Function(Box::new(Function::new(
25863 "IS_ASCII".to_string(),
25864 vec![arg],
25865 )))),
25866 }
25867 } else {
25868 Ok(e)
25869 }
25870 }
25871
25872 Action::StrPositionConvert => {
25873 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
25874 if let Expression::Function(f) = e {
25875 if f.args.len() < 2 {
25876 return Ok(Expression::Function(f));
25877 }
25878 let mut args = f.args;
25879
25880 let haystack = args.remove(0);
25881 let needle = args.remove(0);
25882 let position = if !args.is_empty() {
25883 Some(args.remove(0))
25884 } else {
25885 Option::None
25886 };
25887 let occurrence = if !args.is_empty() {
25888 Some(args.remove(0))
25889 } else {
25890 Option::None
25891 };
25892
25893 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
25894 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
25895 fn build_position_expansion(
25896 haystack: Expression,
25897 needle: Expression,
25898 pos: Expression,
25899 occurrence: Option<Expression>,
25900 inner_func: &str,
25901 wrapper: &str, // "CASE", "IF", "IIF"
25902 ) -> Expression {
25903 let substr = Expression::Function(Box::new(Function::new(
25904 "SUBSTRING".to_string(),
25905 vec![haystack, pos.clone()],
25906 )));
25907 let mut inner_args = vec![substr, needle];
25908 if let Some(occ) = occurrence {
25909 inner_args.push(occ);
25910 }
25911 let inner_call = Expression::Function(Box::new(Function::new(
25912 inner_func.to_string(),
25913 inner_args,
25914 )));
25915 let zero =
25916 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25917 let one =
25918 Expression::Literal(Box::new(Literal::Number("1".to_string())));
25919 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25920 left: inner_call.clone(),
25921 right: zero.clone(),
25922 left_comments: Vec::new(),
25923 operator_comments: Vec::new(),
25924 trailing_comments: Vec::new(),
25925 inferred_type: None,
25926 }));
25927 let add_pos = Expression::Add(Box::new(BinaryOp {
25928 left: inner_call,
25929 right: pos,
25930 left_comments: Vec::new(),
25931 operator_comments: Vec::new(),
25932 trailing_comments: Vec::new(),
25933 inferred_type: None,
25934 }));
25935 let sub_one = Expression::Sub(Box::new(BinaryOp {
25936 left: add_pos,
25937 right: one,
25938 left_comments: Vec::new(),
25939 operator_comments: Vec::new(),
25940 trailing_comments: Vec::new(),
25941 inferred_type: None,
25942 }));
25943
25944 match wrapper {
25945 "CASE" => Expression::Case(Box::new(Case {
25946 operand: Option::None,
25947 whens: vec![(eq_zero, zero)],
25948 else_: Some(sub_one),
25949 comments: Vec::new(),
25950 inferred_type: None,
25951 })),
25952 "IIF" => Expression::Function(Box::new(Function::new(
25953 "IIF".to_string(),
25954 vec![eq_zero, zero, sub_one],
25955 ))),
25956 _ => Expression::Function(Box::new(Function::new(
25957 "IF".to_string(),
25958 vec![eq_zero, zero, sub_one],
25959 ))),
25960 }
25961 }
25962
25963 match target {
25964 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
25965 DialectType::Athena
25966 | DialectType::DuckDB
25967 | DialectType::Presto
25968 | DialectType::Trino
25969 | DialectType::Drill => {
25970 if let Some(pos) = position {
25971 let wrapper = if matches!(target, DialectType::DuckDB) {
25972 "CASE"
25973 } else {
25974 "IF"
25975 };
25976 let result = build_position_expansion(
25977 haystack, needle, pos, occurrence, "STRPOS", wrapper,
25978 );
25979 if matches!(target, DialectType::Drill) {
25980 // Drill uses backtick-quoted `IF`
25981 if let Expression::Function(mut f) = result {
25982 f.name = "`IF`".to_string();
25983 Ok(Expression::Function(f))
25984 } else {
25985 Ok(result)
25986 }
25987 } else {
25988 Ok(result)
25989 }
25990 } else {
25991 Ok(Expression::Function(Box::new(Function::new(
25992 "STRPOS".to_string(),
25993 vec![haystack, needle],
25994 ))))
25995 }
25996 }
25997 // SQLite: IIF wrapper
25998 DialectType::SQLite => {
25999 if let Some(pos) = position {
26000 Ok(build_position_expansion(
26001 haystack, needle, pos, occurrence, "INSTR", "IIF",
26002 ))
26003 } else {
26004 Ok(Expression::Function(Box::new(Function::new(
26005 "INSTR".to_string(),
26006 vec![haystack, needle],
26007 ))))
26008 }
26009 }
26010 // INSTR group: Teradata, BigQuery, Oracle
26011 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
26012 let mut a = vec![haystack, needle];
26013 if let Some(pos) = position {
26014 a.push(pos);
26015 }
26016 if let Some(occ) = occurrence {
26017 a.push(occ);
26018 }
26019 Ok(Expression::Function(Box::new(Function::new(
26020 "INSTR".to_string(),
26021 a,
26022 ))))
26023 }
26024 // CHARINDEX group: Snowflake, TSQL
26025 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
26026 let mut a = vec![needle, haystack];
26027 if let Some(pos) = position {
26028 a.push(pos);
26029 }
26030 Ok(Expression::Function(Box::new(Function::new(
26031 "CHARINDEX".to_string(),
26032 a,
26033 ))))
26034 }
26035 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
26036 DialectType::PostgreSQL
26037 | DialectType::Materialize
26038 | DialectType::RisingWave
26039 | DialectType::Redshift => {
26040 if let Some(pos) = position {
26041 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
26042 // ELSE POSITION(...) + pos - 1 END
26043 let substr = Expression::Substring(Box::new(
26044 crate::expressions::SubstringFunc {
26045 this: haystack,
26046 start: pos.clone(),
26047 length: Option::None,
26048 from_for_syntax: true,
26049 },
26050 ));
26051 let pos_in = Expression::StrPosition(Box::new(
26052 crate::expressions::StrPosition {
26053 this: Box::new(substr),
26054 substr: Some(Box::new(needle)),
26055 position: Option::None,
26056 occurrence: Option::None,
26057 },
26058 ));
26059 let zero = Expression::Literal(Box::new(Literal::Number(
26060 "0".to_string(),
26061 )));
26062 let one = Expression::Literal(Box::new(Literal::Number(
26063 "1".to_string(),
26064 )));
26065 let eq_zero = Expression::Eq(Box::new(BinaryOp {
26066 left: pos_in.clone(),
26067 right: zero.clone(),
26068 left_comments: Vec::new(),
26069 operator_comments: Vec::new(),
26070 trailing_comments: Vec::new(),
26071 inferred_type: None,
26072 }));
26073 let add_pos = Expression::Add(Box::new(BinaryOp {
26074 left: pos_in,
26075 right: pos,
26076 left_comments: Vec::new(),
26077 operator_comments: Vec::new(),
26078 trailing_comments: Vec::new(),
26079 inferred_type: None,
26080 }));
26081 let sub_one = Expression::Sub(Box::new(BinaryOp {
26082 left: add_pos,
26083 right: one,
26084 left_comments: Vec::new(),
26085 operator_comments: Vec::new(),
26086 trailing_comments: Vec::new(),
26087 inferred_type: None,
26088 }));
26089 Ok(Expression::Case(Box::new(Case {
26090 operand: Option::None,
26091 whens: vec![(eq_zero, zero)],
26092 else_: Some(sub_one),
26093 comments: Vec::new(),
26094 inferred_type: None,
26095 })))
26096 } else {
26097 Ok(Expression::StrPosition(Box::new(
26098 crate::expressions::StrPosition {
26099 this: Box::new(haystack),
26100 substr: Some(Box::new(needle)),
26101 position: Option::None,
26102 occurrence: Option::None,
26103 },
26104 )))
26105 }
26106 }
26107 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
26108 DialectType::MySQL
26109 | DialectType::SingleStore
26110 | DialectType::TiDB
26111 | DialectType::Hive
26112 | DialectType::Spark
26113 | DialectType::Databricks
26114 | DialectType::Doris
26115 | DialectType::StarRocks => {
26116 let mut a = vec![needle, haystack];
26117 if let Some(pos) = position {
26118 a.push(pos);
26119 }
26120 Ok(Expression::Function(Box::new(Function::new(
26121 "LOCATE".to_string(),
26122 a,
26123 ))))
26124 }
26125 // ClickHouse: POSITION(haystack, needle[, position])
26126 DialectType::ClickHouse => {
26127 let mut a = vec![haystack, needle];
26128 if let Some(pos) = position {
26129 a.push(pos);
26130 }
26131 Ok(Expression::Function(Box::new(Function::new(
26132 "POSITION".to_string(),
26133 a,
26134 ))))
26135 }
26136 _ => {
26137 let mut a = vec![haystack, needle];
26138 if let Some(pos) = position {
26139 a.push(pos);
26140 }
26141 if let Some(occ) = occurrence {
26142 a.push(occ);
26143 }
26144 Ok(Expression::Function(Box::new(Function::new(
26145 "STR_POSITION".to_string(),
26146 a,
26147 ))))
26148 }
26149 }
26150 } else {
26151 Ok(e)
26152 }
26153 }
26154
26155 Action::ArraySumConvert => {
26156 // ARRAY_SUM(arr) -> dialect-specific
26157 if let Expression::Function(f) = e {
26158 let args = f.args;
26159 match target {
26160 DialectType::DuckDB => Ok(Expression::Function(Box::new(
26161 Function::new("LIST_SUM".to_string(), args),
26162 ))),
26163 DialectType::Spark | DialectType::Databricks => {
26164 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26165 let arr = args.into_iter().next().unwrap();
26166 let zero =
26167 Expression::Literal(Box::new(Literal::Number("0".to_string())));
26168 let acc_id = Identifier::new("acc");
26169 let x_id = Identifier::new("x");
26170 let acc = Expression::Identifier(acc_id.clone());
26171 let x = Expression::Identifier(x_id.clone());
26172 let add = Expression::Add(Box::new(BinaryOp {
26173 left: acc.clone(),
26174 right: x,
26175 left_comments: Vec::new(),
26176 operator_comments: Vec::new(),
26177 trailing_comments: Vec::new(),
26178 inferred_type: None,
26179 }));
26180 let lambda1 =
26181 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26182 parameters: vec![acc_id.clone(), x_id],
26183 body: add,
26184 colon: false,
26185 parameter_types: Vec::new(),
26186 }));
26187 let lambda2 =
26188 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26189 parameters: vec![acc_id],
26190 body: acc,
26191 colon: false,
26192 parameter_types: Vec::new(),
26193 }));
26194 Ok(Expression::Function(Box::new(Function::new(
26195 "AGGREGATE".to_string(),
26196 vec![arr, zero, lambda1, lambda2],
26197 ))))
26198 }
26199 DialectType::Presto | DialectType::Athena => {
26200 // Presto/Athena keep ARRAY_SUM natively
26201 Ok(Expression::Function(Box::new(Function::new(
26202 "ARRAY_SUM".to_string(),
26203 args,
26204 ))))
26205 }
26206 DialectType::Trino => {
26207 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26208 if args.len() == 1 {
26209 let arr = args.into_iter().next().unwrap();
26210 let zero = Expression::Literal(Box::new(Literal::Number(
26211 "0".to_string(),
26212 )));
26213 let acc_id = Identifier::new("acc");
26214 let x_id = Identifier::new("x");
26215 let acc = Expression::Identifier(acc_id.clone());
26216 let x = Expression::Identifier(x_id.clone());
26217 let add = Expression::Add(Box::new(BinaryOp {
26218 left: acc.clone(),
26219 right: x,
26220 left_comments: Vec::new(),
26221 operator_comments: Vec::new(),
26222 trailing_comments: Vec::new(),
26223 inferred_type: None,
26224 }));
26225 let lambda1 = Expression::Lambda(Box::new(
26226 crate::expressions::LambdaExpr {
26227 parameters: vec![acc_id.clone(), x_id],
26228 body: add,
26229 colon: false,
26230 parameter_types: Vec::new(),
26231 },
26232 ));
26233 let lambda2 = Expression::Lambda(Box::new(
26234 crate::expressions::LambdaExpr {
26235 parameters: vec![acc_id],
26236 body: acc,
26237 colon: false,
26238 parameter_types: Vec::new(),
26239 },
26240 ));
26241 Ok(Expression::Function(Box::new(Function::new(
26242 "REDUCE".to_string(),
26243 vec![arr, zero, lambda1, lambda2],
26244 ))))
26245 } else {
26246 Ok(Expression::Function(Box::new(Function::new(
26247 "ARRAY_SUM".to_string(),
26248 args,
26249 ))))
26250 }
26251 }
26252 DialectType::ClickHouse => {
26253 // arraySum(lambda, arr) or arraySum(arr)
26254 Ok(Expression::Function(Box::new(Function::new(
26255 "arraySum".to_string(),
26256 args,
26257 ))))
26258 }
26259 _ => Ok(Expression::Function(Box::new(Function::new(
26260 "ARRAY_SUM".to_string(),
26261 args,
26262 )))),
26263 }
26264 } else {
26265 Ok(e)
26266 }
26267 }
26268
26269 Action::ArraySizeConvert => {
26270 if let Expression::Function(f) = e {
26271 Ok(Expression::Function(Box::new(Function::new(
26272 "REPEATED_COUNT".to_string(),
26273 f.args,
26274 ))))
26275 } else {
26276 Ok(e)
26277 }
26278 }
26279
26280 Action::ArrayAnyConvert => {
26281 if let Expression::Function(f) = e {
26282 let mut args = f.args;
26283 if args.len() == 2 {
26284 let arr = args.remove(0);
26285 let lambda = args.remove(0);
26286
26287 // Extract lambda parameter name and body
26288 let (param_name, pred_body) =
26289 if let Expression::Lambda(ref lam) = lambda {
26290 let name = if let Some(p) = lam.parameters.first() {
26291 p.name.clone()
26292 } else {
26293 "x".to_string()
26294 };
26295 (name, lam.body.clone())
26296 } else {
26297 ("x".to_string(), lambda.clone())
26298 };
26299
26300 // Helper: build a function call Expression
26301 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
26302 Expression::Function(Box::new(Function::new(
26303 name.to_string(),
26304 args,
26305 )))
26306 };
26307
26308 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
26309 let build_filter_pattern = |len_func: &str,
26310 len_args_extra: Vec<Expression>,
26311 filter_expr: Expression|
26312 -> Expression {
26313 // len_func(arr, ...extra) = 0
26314 let mut len_arr_args = vec![arr.clone()];
26315 len_arr_args.extend(len_args_extra.clone());
26316 let len_arr = make_func(len_func, len_arr_args);
26317 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
26318 len_arr,
26319 Expression::number(0),
26320 )));
26321
26322 // len_func(filter_expr, ...extra) <> 0
26323 let mut len_filter_args = vec![filter_expr];
26324 len_filter_args.extend(len_args_extra);
26325 let len_filter = make_func(len_func, len_filter_args);
26326 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
26327 len_filter,
26328 Expression::number(0),
26329 )));
26330
26331 // (eq_zero OR neq_zero)
26332 let or_expr =
26333 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
26334 Expression::Paren(Box::new(Paren {
26335 this: or_expr,
26336 trailing_comments: Vec::new(),
26337 }))
26338 };
26339
26340 match target {
26341 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
26342 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
26343 }
26344 DialectType::ClickHouse => {
26345 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
26346 // ClickHouse arrayFilter takes lambda first, then array
26347 let filter_expr =
26348 make_func("arrayFilter", vec![lambda, arr.clone()]);
26349 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
26350 }
26351 DialectType::Databricks | DialectType::Spark => {
26352 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
26353 let filter_expr =
26354 make_func("FILTER", vec![arr.clone(), lambda]);
26355 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
26356 }
26357 DialectType::DuckDB => {
26358 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
26359 let filter_expr =
26360 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
26361 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
26362 }
26363 DialectType::Teradata => {
26364 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
26365 let filter_expr =
26366 make_func("FILTER", vec![arr.clone(), lambda]);
26367 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
26368 }
26369 DialectType::BigQuery => {
26370 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
26371 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
26372 let param_col = Expression::column(¶m_name);
26373 let unnest_expr = Expression::Unnest(Box::new(
26374 crate::expressions::UnnestFunc {
26375 this: arr.clone(),
26376 expressions: vec![],
26377 with_ordinality: false,
26378 alias: Some(Identifier::new(¶m_name)),
26379 offset_alias: None,
26380 },
26381 ));
26382 let mut sel = crate::expressions::Select::default();
26383 sel.expressions = vec![param_col];
26384 sel.from = Some(crate::expressions::From {
26385 expressions: vec![unnest_expr],
26386 });
26387 sel.where_clause =
26388 Some(crate::expressions::Where { this: pred_body });
26389 let array_subquery =
26390 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26391 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
26392 }
26393 DialectType::PostgreSQL => {
26394 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
26395 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
26396 let param_col = Expression::column(¶m_name);
26397 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
26398 let unnest_with_alias =
26399 Expression::Alias(Box::new(crate::expressions::Alias {
26400 this: Expression::Unnest(Box::new(
26401 crate::expressions::UnnestFunc {
26402 this: arr.clone(),
26403 expressions: vec![],
26404 with_ordinality: false,
26405 alias: None,
26406 offset_alias: None,
26407 },
26408 )),
26409 alias: Identifier::new("_t0"),
26410 column_aliases: vec![Identifier::new(¶m_name)],
26411 pre_alias_comments: Vec::new(),
26412 trailing_comments: Vec::new(),
26413 inferred_type: None,
26414 }));
26415 let mut sel = crate::expressions::Select::default();
26416 sel.expressions = vec![param_col];
26417 sel.from = Some(crate::expressions::From {
26418 expressions: vec![unnest_with_alias],
26419 });
26420 sel.where_clause =
26421 Some(crate::expressions::Where { this: pred_body });
26422 let array_subquery =
26423 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26424 Ok(build_filter_pattern(
26425 "ARRAY_LENGTH",
26426 vec![Expression::number(1)],
26427 array_subquery,
26428 ))
26429 }
26430 _ => Ok(Expression::Function(Box::new(Function::new(
26431 "ARRAY_ANY".to_string(),
26432 vec![arr, lambda],
26433 )))),
26434 }
26435 } else {
26436 Ok(Expression::Function(Box::new(Function::new(
26437 "ARRAY_ANY".to_string(),
26438 args,
26439 ))))
26440 }
26441 } else {
26442 Ok(e)
26443 }
26444 }
26445
26446 Action::DecodeSimplify => {
26447 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
26448 // For literal search values: CASE WHEN x = search THEN result
26449 // For NULL search: CASE WHEN x IS NULL THEN result
26450 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
26451 fn is_decode_literal(e: &Expression) -> bool {
26452 matches!(
26453 e,
26454 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
26455 )
26456 }
26457
26458 let build_decode_case =
26459 |this_expr: Expression,
26460 pairs: Vec<(Expression, Expression)>,
26461 default: Option<Expression>| {
26462 let whens: Vec<(Expression, Expression)> = pairs
26463 .into_iter()
26464 .map(|(search, result)| {
26465 if matches!(&search, Expression::Null(_)) {
26466 // NULL search -> IS NULL
26467 let condition = Expression::Is(Box::new(BinaryOp {
26468 left: this_expr.clone(),
26469 right: Expression::Null(crate::expressions::Null),
26470 left_comments: Vec::new(),
26471 operator_comments: Vec::new(),
26472 trailing_comments: Vec::new(),
26473 inferred_type: None,
26474 }));
26475 (condition, result)
26476 } else if is_decode_literal(&search)
26477 || is_decode_literal(&this_expr)
26478 {
26479 // At least one side is a literal -> simple equality (no NULL check needed)
26480 let eq = Expression::Eq(Box::new(BinaryOp {
26481 left: this_expr.clone(),
26482 right: search,
26483 left_comments: Vec::new(),
26484 operator_comments: Vec::new(),
26485 trailing_comments: Vec::new(),
26486 inferred_type: None,
26487 }));
26488 (eq, result)
26489 } else {
26490 // Non-literal -> null-safe comparison
26491 let needs_paren = matches!(
26492 &search,
26493 Expression::Eq(_)
26494 | Expression::Neq(_)
26495 | Expression::Gt(_)
26496 | Expression::Gte(_)
26497 | Expression::Lt(_)
26498 | Expression::Lte(_)
26499 );
26500 let search_ref = if needs_paren {
26501 Expression::Paren(Box::new(crate::expressions::Paren {
26502 this: search.clone(),
26503 trailing_comments: Vec::new(),
26504 }))
26505 } else {
26506 search.clone()
26507 };
26508 // Build: x = search OR (x IS NULL AND search IS NULL)
26509 let eq = Expression::Eq(Box::new(BinaryOp {
26510 left: this_expr.clone(),
26511 right: search_ref,
26512 left_comments: Vec::new(),
26513 operator_comments: Vec::new(),
26514 trailing_comments: Vec::new(),
26515 inferred_type: None,
26516 }));
26517 let search_in_null = if needs_paren {
26518 Expression::Paren(Box::new(crate::expressions::Paren {
26519 this: search.clone(),
26520 trailing_comments: Vec::new(),
26521 }))
26522 } else {
26523 search.clone()
26524 };
26525 let x_is_null = Expression::Is(Box::new(BinaryOp {
26526 left: this_expr.clone(),
26527 right: Expression::Null(crate::expressions::Null),
26528 left_comments: Vec::new(),
26529 operator_comments: Vec::new(),
26530 trailing_comments: Vec::new(),
26531 inferred_type: None,
26532 }));
26533 let search_is_null = Expression::Is(Box::new(BinaryOp {
26534 left: search_in_null,
26535 right: Expression::Null(crate::expressions::Null),
26536 left_comments: Vec::new(),
26537 operator_comments: Vec::new(),
26538 trailing_comments: Vec::new(),
26539 inferred_type: None,
26540 }));
26541 let both_null = Expression::And(Box::new(BinaryOp {
26542 left: x_is_null,
26543 right: search_is_null,
26544 left_comments: Vec::new(),
26545 operator_comments: Vec::new(),
26546 trailing_comments: Vec::new(),
26547 inferred_type: None,
26548 }));
26549 let condition = Expression::Or(Box::new(BinaryOp {
26550 left: eq,
26551 right: Expression::Paren(Box::new(
26552 crate::expressions::Paren {
26553 this: both_null,
26554 trailing_comments: Vec::new(),
26555 },
26556 )),
26557 left_comments: Vec::new(),
26558 operator_comments: Vec::new(),
26559 trailing_comments: Vec::new(),
26560 inferred_type: None,
26561 }));
26562 (condition, result)
26563 }
26564 })
26565 .collect();
26566 Expression::Case(Box::new(Case {
26567 operand: None,
26568 whens,
26569 else_: default,
26570 comments: Vec::new(),
26571 inferred_type: None,
26572 }))
26573 };
26574
26575 if let Expression::Decode(decode) = e {
26576 Ok(build_decode_case(
26577 decode.this,
26578 decode.search_results,
26579 decode.default,
26580 ))
26581 } else if let Expression::DecodeCase(dc) = e {
26582 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
26583 let mut exprs = dc.expressions;
26584 if exprs.len() < 3 {
26585 return Ok(Expression::DecodeCase(Box::new(
26586 crate::expressions::DecodeCase { expressions: exprs },
26587 )));
26588 }
26589 let this_expr = exprs.remove(0);
26590 let mut pairs = Vec::new();
26591 let mut default = None;
26592 let mut i = 0;
26593 while i + 1 < exprs.len() {
26594 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
26595 i += 2;
26596 }
26597 if i < exprs.len() {
26598 // Odd remaining element is the default
26599 default = Some(exprs[i].clone());
26600 }
26601 Ok(build_decode_case(this_expr, pairs, default))
26602 } else {
26603 Ok(e)
26604 }
26605 }
26606
26607 Action::CreateTableLikeToCtas => {
26608 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
26609 if let Expression::CreateTable(ct) = e {
26610 let like_source = ct.constraints.iter().find_map(|c| {
26611 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26612 Some(source.clone())
26613 } else {
26614 None
26615 }
26616 });
26617 if let Some(source_table) = like_source {
26618 let mut new_ct = *ct;
26619 new_ct.constraints.clear();
26620 // Build: SELECT * FROM b LIMIT 0
26621 let select = Expression::Select(Box::new(crate::expressions::Select {
26622 expressions: vec![Expression::Star(crate::expressions::Star {
26623 table: None,
26624 except: None,
26625 replace: None,
26626 rename: None,
26627 trailing_comments: Vec::new(),
26628 span: None,
26629 })],
26630 from: Some(crate::expressions::From {
26631 expressions: vec![Expression::Table(Box::new(source_table))],
26632 }),
26633 limit: Some(crate::expressions::Limit {
26634 this: Expression::Literal(Box::new(Literal::Number(
26635 "0".to_string(),
26636 ))),
26637 percent: false,
26638 comments: Vec::new(),
26639 }),
26640 ..Default::default()
26641 }));
26642 new_ct.as_select = Some(select);
26643 Ok(Expression::CreateTable(Box::new(new_ct)))
26644 } else {
26645 Ok(Expression::CreateTable(ct))
26646 }
26647 } else {
26648 Ok(e)
26649 }
26650 }
26651
26652 Action::CreateTableLikeToSelectInto => {
26653 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
26654 if let Expression::CreateTable(ct) = e {
26655 let like_source = ct.constraints.iter().find_map(|c| {
26656 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26657 Some(source.clone())
26658 } else {
26659 None
26660 }
26661 });
26662 if let Some(source_table) = like_source {
26663 let mut aliased_source = source_table;
26664 aliased_source.alias = Some(Identifier::new("temp"));
26665 // Build: SELECT TOP 0 * INTO a FROM b AS temp
26666 let select = Expression::Select(Box::new(crate::expressions::Select {
26667 expressions: vec![Expression::Star(crate::expressions::Star {
26668 table: None,
26669 except: None,
26670 replace: None,
26671 rename: None,
26672 trailing_comments: Vec::new(),
26673 span: None,
26674 })],
26675 from: Some(crate::expressions::From {
26676 expressions: vec![Expression::Table(Box::new(aliased_source))],
26677 }),
26678 into: Some(crate::expressions::SelectInto {
26679 this: Expression::Table(Box::new(ct.name.clone())),
26680 temporary: false,
26681 unlogged: false,
26682 bulk_collect: false,
26683 expressions: Vec::new(),
26684 }),
26685 top: Some(crate::expressions::Top {
26686 this: Expression::Literal(Box::new(Literal::Number(
26687 "0".to_string(),
26688 ))),
26689 percent: false,
26690 with_ties: false,
26691 parenthesized: false,
26692 }),
26693 ..Default::default()
26694 }));
26695 Ok(select)
26696 } else {
26697 Ok(Expression::CreateTable(ct))
26698 }
26699 } else {
26700 Ok(e)
26701 }
26702 }
26703
26704 Action::CreateTableLikeToAs => {
26705 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
26706 if let Expression::CreateTable(ct) = e {
26707 let like_source = ct.constraints.iter().find_map(|c| {
26708 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26709 Some(source.clone())
26710 } else {
26711 None
26712 }
26713 });
26714 if let Some(source_table) = like_source {
26715 let mut new_ct = *ct;
26716 new_ct.constraints.clear();
26717 // AS b (just a table reference, not a SELECT)
26718 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
26719 Ok(Expression::CreateTable(Box::new(new_ct)))
26720 } else {
26721 Ok(Expression::CreateTable(ct))
26722 }
26723 } else {
26724 Ok(e)
26725 }
26726 }
26727
26728 Action::TsOrDsToDateConvert => {
26729 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
26730 if let Expression::Function(f) = e {
26731 let mut args = f.args;
26732 let this = args.remove(0);
26733 let fmt = if !args.is_empty() {
26734 match &args[0] {
26735 Expression::Literal(lit)
26736 if matches!(lit.as_ref(), Literal::String(_)) =>
26737 {
26738 let Literal::String(s) = lit.as_ref() else {
26739 unreachable!()
26740 };
26741 Some(s.clone())
26742 }
26743 _ => None,
26744 }
26745 } else {
26746 None
26747 };
26748 Ok(Expression::TsOrDsToDate(Box::new(
26749 crate::expressions::TsOrDsToDate {
26750 this: Box::new(this),
26751 format: fmt,
26752 safe: None,
26753 },
26754 )))
26755 } else {
26756 Ok(e)
26757 }
26758 }
26759
26760 Action::TsOrDsToDateStrConvert => {
26761 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
26762 if let Expression::Function(f) = e {
26763 let arg = f.args.into_iter().next().unwrap();
26764 let str_type = match target {
26765 DialectType::DuckDB
26766 | DialectType::PostgreSQL
26767 | DialectType::Materialize => DataType::Text,
26768 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26769 DataType::Custom {
26770 name: "STRING".to_string(),
26771 }
26772 }
26773 DialectType::Presto
26774 | DialectType::Trino
26775 | DialectType::Athena
26776 | DialectType::Drill => DataType::VarChar {
26777 length: None,
26778 parenthesized_length: false,
26779 },
26780 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
26781 DataType::Custom {
26782 name: "STRING".to_string(),
26783 }
26784 }
26785 _ => DataType::VarChar {
26786 length: None,
26787 parenthesized_length: false,
26788 },
26789 };
26790 let cast_expr = Expression::Cast(Box::new(Cast {
26791 this: arg,
26792 to: str_type,
26793 double_colon_syntax: false,
26794 trailing_comments: Vec::new(),
26795 format: None,
26796 default: None,
26797 inferred_type: None,
26798 }));
26799 Ok(Expression::Substring(Box::new(
26800 crate::expressions::SubstringFunc {
26801 this: cast_expr,
26802 start: Expression::number(1),
26803 length: Some(Expression::number(10)),
26804 from_for_syntax: false,
26805 },
26806 )))
26807 } else {
26808 Ok(e)
26809 }
26810 }
26811
26812 Action::DateStrToDateConvert => {
26813 // DATE_STR_TO_DATE(x) -> dialect-specific
26814 if let Expression::Function(f) = e {
26815 let arg = f.args.into_iter().next().unwrap();
26816 match target {
26817 DialectType::SQLite => {
26818 // SQLite: just the bare expression (dates are strings)
26819 Ok(arg)
26820 }
26821 _ => Ok(Expression::Cast(Box::new(Cast {
26822 this: arg,
26823 to: DataType::Date,
26824 double_colon_syntax: false,
26825 trailing_comments: Vec::new(),
26826 format: None,
26827 default: None,
26828 inferred_type: None,
26829 }))),
26830 }
26831 } else {
26832 Ok(e)
26833 }
26834 }
26835
26836 Action::TimeStrToDateConvert => {
26837 // TIME_STR_TO_DATE(x) -> dialect-specific
26838 if let Expression::Function(f) = e {
26839 let arg = f.args.into_iter().next().unwrap();
26840 match target {
26841 DialectType::Hive
26842 | DialectType::Doris
26843 | DialectType::StarRocks
26844 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
26845 Function::new("TO_DATE".to_string(), vec![arg]),
26846 ))),
26847 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26848 // Presto: CAST(x AS TIMESTAMP)
26849 Ok(Expression::Cast(Box::new(Cast {
26850 this: arg,
26851 to: DataType::Timestamp {
26852 timezone: false,
26853 precision: None,
26854 },
26855 double_colon_syntax: false,
26856 trailing_comments: Vec::new(),
26857 format: None,
26858 default: None,
26859 inferred_type: None,
26860 })))
26861 }
26862 _ => {
26863 // Default: CAST(x AS DATE)
26864 Ok(Expression::Cast(Box::new(Cast {
26865 this: arg,
26866 to: DataType::Date,
26867 double_colon_syntax: false,
26868 trailing_comments: Vec::new(),
26869 format: None,
26870 default: None,
26871 inferred_type: None,
26872 })))
26873 }
26874 }
26875 } else {
26876 Ok(e)
26877 }
26878 }
26879
26880 Action::TimeStrToTimeConvert => {
26881 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
26882 if let Expression::Function(f) = e {
26883 let mut args = f.args;
26884 let this = args.remove(0);
26885 let zone = if !args.is_empty() {
26886 match &args[0] {
26887 Expression::Literal(lit)
26888 if matches!(lit.as_ref(), Literal::String(_)) =>
26889 {
26890 let Literal::String(s) = lit.as_ref() else {
26891 unreachable!()
26892 };
26893 Some(s.clone())
26894 }
26895 _ => None,
26896 }
26897 } else {
26898 None
26899 };
26900 let has_zone = zone.is_some();
26901
26902 match target {
26903 DialectType::SQLite => {
26904 // SQLite: just the bare expression
26905 Ok(this)
26906 }
26907 DialectType::MySQL => {
26908 if has_zone {
26909 // MySQL with zone: TIMESTAMP(x)
26910 Ok(Expression::Function(Box::new(Function::new(
26911 "TIMESTAMP".to_string(),
26912 vec![this],
26913 ))))
26914 } else {
26915 // MySQL: CAST(x AS DATETIME) or with precision
26916 // Use DataType::Custom to avoid MySQL's transform_cast converting
26917 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
26918 let precision = if let Expression::Literal(ref lit) = this {
26919 if let Literal::String(ref s) = lit.as_ref() {
26920 if let Some(dot_pos) = s.rfind('.') {
26921 let frac = &s[dot_pos + 1..];
26922 let digit_count = frac
26923 .chars()
26924 .take_while(|c| c.is_ascii_digit())
26925 .count();
26926 if digit_count > 0 {
26927 Some(digit_count)
26928 } else {
26929 None
26930 }
26931 } else {
26932 None
26933 }
26934 } else {
26935 None
26936 }
26937 } else {
26938 None
26939 };
26940 let type_name = match precision {
26941 Some(p) => format!("DATETIME({})", p),
26942 None => "DATETIME".to_string(),
26943 };
26944 Ok(Expression::Cast(Box::new(Cast {
26945 this,
26946 to: DataType::Custom { name: type_name },
26947 double_colon_syntax: false,
26948 trailing_comments: Vec::new(),
26949 format: None,
26950 default: None,
26951 inferred_type: None,
26952 })))
26953 }
26954 }
26955 DialectType::ClickHouse => {
26956 if has_zone {
26957 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
26958 // We need to strip the timezone offset from the literal if present
26959 let clean_this = if let Expression::Literal(ref lit) = this {
26960 if let Literal::String(ref s) = lit.as_ref() {
26961 // Strip timezone offset like "-08:00" or "+00:00"
26962 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
26963 if let Some(offset_pos) = re_offset {
26964 if offset_pos > 10 {
26965 // After the date part
26966 let trimmed = s[..offset_pos].to_string();
26967 Expression::Literal(Box::new(Literal::String(
26968 trimmed,
26969 )))
26970 } else {
26971 this.clone()
26972 }
26973 } else {
26974 this.clone()
26975 }
26976 } else {
26977 this.clone()
26978 }
26979 } else {
26980 this.clone()
26981 };
26982 let zone_str = zone.unwrap();
26983 // Build: CAST(x AS DateTime64(6, 'zone'))
26984 let type_name = format!("DateTime64(6, '{}')", zone_str);
26985 Ok(Expression::Cast(Box::new(Cast {
26986 this: clean_this,
26987 to: DataType::Custom { name: type_name },
26988 double_colon_syntax: false,
26989 trailing_comments: Vec::new(),
26990 format: None,
26991 default: None,
26992 inferred_type: None,
26993 })))
26994 } else {
26995 Ok(Expression::Cast(Box::new(Cast {
26996 this,
26997 to: DataType::Custom {
26998 name: "DateTime64(6)".to_string(),
26999 },
27000 double_colon_syntax: false,
27001 trailing_comments: Vec::new(),
27002 format: None,
27003 default: None,
27004 inferred_type: None,
27005 })))
27006 }
27007 }
27008 DialectType::BigQuery => {
27009 if has_zone {
27010 // BigQuery with zone: CAST(x AS TIMESTAMP)
27011 Ok(Expression::Cast(Box::new(Cast {
27012 this,
27013 to: DataType::Timestamp {
27014 timezone: false,
27015 precision: None,
27016 },
27017 double_colon_syntax: false,
27018 trailing_comments: Vec::new(),
27019 format: None,
27020 default: None,
27021 inferred_type: None,
27022 })))
27023 } else {
27024 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
27025 Ok(Expression::Cast(Box::new(Cast {
27026 this,
27027 to: DataType::Custom {
27028 name: "DATETIME".to_string(),
27029 },
27030 double_colon_syntax: false,
27031 trailing_comments: Vec::new(),
27032 format: None,
27033 default: None,
27034 inferred_type: None,
27035 })))
27036 }
27037 }
27038 DialectType::Doris => {
27039 // Doris: CAST(x AS DATETIME)
27040 Ok(Expression::Cast(Box::new(Cast {
27041 this,
27042 to: DataType::Custom {
27043 name: "DATETIME".to_string(),
27044 },
27045 double_colon_syntax: false,
27046 trailing_comments: Vec::new(),
27047 format: None,
27048 default: None,
27049 inferred_type: None,
27050 })))
27051 }
27052 DialectType::TSQL | DialectType::Fabric => {
27053 if has_zone {
27054 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
27055 let cast_expr = Expression::Cast(Box::new(Cast {
27056 this,
27057 to: DataType::Custom {
27058 name: "DATETIMEOFFSET".to_string(),
27059 },
27060 double_colon_syntax: false,
27061 trailing_comments: Vec::new(),
27062 format: None,
27063 default: None,
27064 inferred_type: None,
27065 }));
27066 Ok(Expression::AtTimeZone(Box::new(
27067 crate::expressions::AtTimeZone {
27068 this: cast_expr,
27069 zone: Expression::Literal(Box::new(Literal::String(
27070 "UTC".to_string(),
27071 ))),
27072 },
27073 )))
27074 } else {
27075 // TSQL: CAST(x AS DATETIME2)
27076 Ok(Expression::Cast(Box::new(Cast {
27077 this,
27078 to: DataType::Custom {
27079 name: "DATETIME2".to_string(),
27080 },
27081 double_colon_syntax: false,
27082 trailing_comments: Vec::new(),
27083 format: None,
27084 default: None,
27085 inferred_type: None,
27086 })))
27087 }
27088 }
27089 DialectType::DuckDB => {
27090 if has_zone {
27091 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
27092 Ok(Expression::Cast(Box::new(Cast {
27093 this,
27094 to: DataType::Timestamp {
27095 timezone: true,
27096 precision: None,
27097 },
27098 double_colon_syntax: false,
27099 trailing_comments: Vec::new(),
27100 format: None,
27101 default: None,
27102 inferred_type: None,
27103 })))
27104 } else {
27105 // DuckDB: CAST(x AS TIMESTAMP)
27106 Ok(Expression::Cast(Box::new(Cast {
27107 this,
27108 to: DataType::Timestamp {
27109 timezone: false,
27110 precision: None,
27111 },
27112 double_colon_syntax: false,
27113 trailing_comments: Vec::new(),
27114 format: None,
27115 default: None,
27116 inferred_type: None,
27117 })))
27118 }
27119 }
27120 DialectType::PostgreSQL
27121 | DialectType::Materialize
27122 | DialectType::RisingWave => {
27123 if has_zone {
27124 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
27125 Ok(Expression::Cast(Box::new(Cast {
27126 this,
27127 to: DataType::Timestamp {
27128 timezone: true,
27129 precision: None,
27130 },
27131 double_colon_syntax: false,
27132 trailing_comments: Vec::new(),
27133 format: None,
27134 default: None,
27135 inferred_type: None,
27136 })))
27137 } else {
27138 // PostgreSQL: CAST(x AS TIMESTAMP)
27139 Ok(Expression::Cast(Box::new(Cast {
27140 this,
27141 to: DataType::Timestamp {
27142 timezone: false,
27143 precision: None,
27144 },
27145 double_colon_syntax: false,
27146 trailing_comments: Vec::new(),
27147 format: None,
27148 default: None,
27149 inferred_type: None,
27150 })))
27151 }
27152 }
27153 DialectType::Snowflake => {
27154 if has_zone {
27155 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
27156 Ok(Expression::Cast(Box::new(Cast {
27157 this,
27158 to: DataType::Timestamp {
27159 timezone: true,
27160 precision: None,
27161 },
27162 double_colon_syntax: false,
27163 trailing_comments: Vec::new(),
27164 format: None,
27165 default: None,
27166 inferred_type: None,
27167 })))
27168 } else {
27169 // Snowflake: CAST(x AS TIMESTAMP)
27170 Ok(Expression::Cast(Box::new(Cast {
27171 this,
27172 to: DataType::Timestamp {
27173 timezone: false,
27174 precision: None,
27175 },
27176 double_colon_syntax: false,
27177 trailing_comments: Vec::new(),
27178 format: None,
27179 default: None,
27180 inferred_type: None,
27181 })))
27182 }
27183 }
27184 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27185 if has_zone {
27186 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27187 // Check for precision from sub-second digits
27188 let precision = if let Expression::Literal(ref lit) = this {
27189 if let Literal::String(ref s) = lit.as_ref() {
27190 if let Some(dot_pos) = s.rfind('.') {
27191 let frac = &s[dot_pos + 1..];
27192 let digit_count = frac
27193 .chars()
27194 .take_while(|c| c.is_ascii_digit())
27195 .count();
27196 if digit_count > 0
27197 && matches!(target, DialectType::Trino)
27198 {
27199 Some(digit_count as u32)
27200 } else {
27201 None
27202 }
27203 } else {
27204 None
27205 }
27206 } else {
27207 None
27208 }
27209 } else {
27210 None
27211 };
27212 let dt = if let Some(prec) = precision {
27213 DataType::Timestamp {
27214 timezone: true,
27215 precision: Some(prec),
27216 }
27217 } else {
27218 DataType::Timestamp {
27219 timezone: true,
27220 precision: None,
27221 }
27222 };
27223 Ok(Expression::Cast(Box::new(Cast {
27224 this,
27225 to: dt,
27226 double_colon_syntax: false,
27227 trailing_comments: Vec::new(),
27228 format: None,
27229 default: None,
27230 inferred_type: None,
27231 })))
27232 } else {
27233 // Check for sub-second precision for Trino
27234 let precision = if let Expression::Literal(ref lit) = this {
27235 if let Literal::String(ref s) = lit.as_ref() {
27236 if let Some(dot_pos) = s.rfind('.') {
27237 let frac = &s[dot_pos + 1..];
27238 let digit_count = frac
27239 .chars()
27240 .take_while(|c| c.is_ascii_digit())
27241 .count();
27242 if digit_count > 0
27243 && matches!(target, DialectType::Trino)
27244 {
27245 Some(digit_count as u32)
27246 } else {
27247 None
27248 }
27249 } else {
27250 None
27251 }
27252 } else {
27253 None
27254 }
27255 } else {
27256 None
27257 };
27258 let dt = DataType::Timestamp {
27259 timezone: false,
27260 precision,
27261 };
27262 Ok(Expression::Cast(Box::new(Cast {
27263 this,
27264 to: dt,
27265 double_colon_syntax: false,
27266 trailing_comments: Vec::new(),
27267 format: None,
27268 default: None,
27269 inferred_type: None,
27270 })))
27271 }
27272 }
27273 DialectType::Redshift => {
27274 if has_zone {
27275 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27276 Ok(Expression::Cast(Box::new(Cast {
27277 this,
27278 to: DataType::Timestamp {
27279 timezone: true,
27280 precision: None,
27281 },
27282 double_colon_syntax: false,
27283 trailing_comments: Vec::new(),
27284 format: None,
27285 default: None,
27286 inferred_type: None,
27287 })))
27288 } else {
27289 // Redshift: CAST(x AS TIMESTAMP)
27290 Ok(Expression::Cast(Box::new(Cast {
27291 this,
27292 to: DataType::Timestamp {
27293 timezone: false,
27294 precision: None,
27295 },
27296 double_colon_syntax: false,
27297 trailing_comments: Vec::new(),
27298 format: None,
27299 default: None,
27300 inferred_type: None,
27301 })))
27302 }
27303 }
27304 _ => {
27305 // Default: CAST(x AS TIMESTAMP)
27306 Ok(Expression::Cast(Box::new(Cast {
27307 this,
27308 to: DataType::Timestamp {
27309 timezone: false,
27310 precision: None,
27311 },
27312 double_colon_syntax: false,
27313 trailing_comments: Vec::new(),
27314 format: None,
27315 default: None,
27316 inferred_type: None,
27317 })))
27318 }
27319 }
27320 } else {
27321 Ok(e)
27322 }
27323 }
27324
27325 Action::DateToDateStrConvert => {
27326 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
27327 if let Expression::Function(f) = e {
27328 let arg = f.args.into_iter().next().unwrap();
27329 let str_type = match target {
27330 DialectType::DuckDB => DataType::Text,
27331 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27332 DataType::Custom {
27333 name: "STRING".to_string(),
27334 }
27335 }
27336 DialectType::Presto
27337 | DialectType::Trino
27338 | DialectType::Athena
27339 | DialectType::Drill => DataType::VarChar {
27340 length: None,
27341 parenthesized_length: false,
27342 },
27343 _ => DataType::VarChar {
27344 length: None,
27345 parenthesized_length: false,
27346 },
27347 };
27348 Ok(Expression::Cast(Box::new(Cast {
27349 this: arg,
27350 to: str_type,
27351 double_colon_syntax: false,
27352 trailing_comments: Vec::new(),
27353 format: None,
27354 default: None,
27355 inferred_type: None,
27356 })))
27357 } else {
27358 Ok(e)
27359 }
27360 }
27361
27362 Action::DateToDiConvert => {
27363 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
27364 if let Expression::Function(f) = e {
27365 let arg = f.args.into_iter().next().unwrap();
27366 let inner = match target {
27367 DialectType::DuckDB => {
27368 // STRFTIME(x, '%Y%m%d')
27369 Expression::Function(Box::new(Function::new(
27370 "STRFTIME".to_string(),
27371 vec![arg, Expression::string("%Y%m%d")],
27372 )))
27373 }
27374 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27375 // DATE_FORMAT(x, 'yyyyMMdd')
27376 Expression::Function(Box::new(Function::new(
27377 "DATE_FORMAT".to_string(),
27378 vec![arg, Expression::string("yyyyMMdd")],
27379 )))
27380 }
27381 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27382 // DATE_FORMAT(x, '%Y%m%d')
27383 Expression::Function(Box::new(Function::new(
27384 "DATE_FORMAT".to_string(),
27385 vec![arg, Expression::string("%Y%m%d")],
27386 )))
27387 }
27388 DialectType::Drill => {
27389 // TO_DATE(x, 'yyyyMMdd')
27390 Expression::Function(Box::new(Function::new(
27391 "TO_DATE".to_string(),
27392 vec![arg, Expression::string("yyyyMMdd")],
27393 )))
27394 }
27395 _ => {
27396 // Default: STRFTIME(x, '%Y%m%d')
27397 Expression::Function(Box::new(Function::new(
27398 "STRFTIME".to_string(),
27399 vec![arg, Expression::string("%Y%m%d")],
27400 )))
27401 }
27402 };
27403 // Use INT (not INTEGER) for Presto/Trino
27404 let int_type = match target {
27405 DialectType::Presto
27406 | DialectType::Trino
27407 | DialectType::Athena
27408 | DialectType::TSQL
27409 | DialectType::Fabric
27410 | DialectType::SQLite
27411 | DialectType::Redshift => DataType::Custom {
27412 name: "INT".to_string(),
27413 },
27414 _ => DataType::Int {
27415 length: None,
27416 integer_spelling: false,
27417 },
27418 };
27419 Ok(Expression::Cast(Box::new(Cast {
27420 this: inner,
27421 to: int_type,
27422 double_colon_syntax: false,
27423 trailing_comments: Vec::new(),
27424 format: None,
27425 default: None,
27426 inferred_type: None,
27427 })))
27428 } else {
27429 Ok(e)
27430 }
27431 }
27432
27433 Action::DiToDateConvert => {
27434 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
27435 if let Expression::Function(f) = e {
27436 let arg = f.args.into_iter().next().unwrap();
27437 match target {
27438 DialectType::DuckDB => {
27439 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
27440 let cast_text = Expression::Cast(Box::new(Cast {
27441 this: arg,
27442 to: DataType::Text,
27443 double_colon_syntax: false,
27444 trailing_comments: Vec::new(),
27445 format: None,
27446 default: None,
27447 inferred_type: None,
27448 }));
27449 let strptime = Expression::Function(Box::new(Function::new(
27450 "STRPTIME".to_string(),
27451 vec![cast_text, Expression::string("%Y%m%d")],
27452 )));
27453 Ok(Expression::Cast(Box::new(Cast {
27454 this: strptime,
27455 to: DataType::Date,
27456 double_colon_syntax: false,
27457 trailing_comments: Vec::new(),
27458 format: None,
27459 default: None,
27460 inferred_type: None,
27461 })))
27462 }
27463 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27464 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
27465 let cast_str = Expression::Cast(Box::new(Cast {
27466 this: arg,
27467 to: DataType::Custom {
27468 name: "STRING".to_string(),
27469 },
27470 double_colon_syntax: false,
27471 trailing_comments: Vec::new(),
27472 format: None,
27473 default: None,
27474 inferred_type: None,
27475 }));
27476 Ok(Expression::Function(Box::new(Function::new(
27477 "TO_DATE".to_string(),
27478 vec![cast_str, Expression::string("yyyyMMdd")],
27479 ))))
27480 }
27481 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27482 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
27483 let cast_varchar = Expression::Cast(Box::new(Cast {
27484 this: arg,
27485 to: DataType::VarChar {
27486 length: None,
27487 parenthesized_length: false,
27488 },
27489 double_colon_syntax: false,
27490 trailing_comments: Vec::new(),
27491 format: None,
27492 default: None,
27493 inferred_type: None,
27494 }));
27495 let date_parse = Expression::Function(Box::new(Function::new(
27496 "DATE_PARSE".to_string(),
27497 vec![cast_varchar, Expression::string("%Y%m%d")],
27498 )));
27499 Ok(Expression::Cast(Box::new(Cast {
27500 this: date_parse,
27501 to: DataType::Date,
27502 double_colon_syntax: false,
27503 trailing_comments: Vec::new(),
27504 format: None,
27505 default: None,
27506 inferred_type: None,
27507 })))
27508 }
27509 DialectType::Drill => {
27510 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
27511 let cast_varchar = Expression::Cast(Box::new(Cast {
27512 this: arg,
27513 to: DataType::VarChar {
27514 length: None,
27515 parenthesized_length: false,
27516 },
27517 double_colon_syntax: false,
27518 trailing_comments: Vec::new(),
27519 format: None,
27520 default: None,
27521 inferred_type: None,
27522 }));
27523 Ok(Expression::Function(Box::new(Function::new(
27524 "TO_DATE".to_string(),
27525 vec![cast_varchar, Expression::string("yyyyMMdd")],
27526 ))))
27527 }
27528 _ => Ok(Expression::Function(Box::new(Function::new(
27529 "DI_TO_DATE".to_string(),
27530 vec![arg],
27531 )))),
27532 }
27533 } else {
27534 Ok(e)
27535 }
27536 }
27537
27538 Action::TsOrDiToDiConvert => {
27539 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
27540 if let Expression::Function(f) = e {
27541 let arg = f.args.into_iter().next().unwrap();
27542 let str_type = match target {
27543 DialectType::DuckDB => DataType::Text,
27544 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27545 DataType::Custom {
27546 name: "STRING".to_string(),
27547 }
27548 }
27549 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27550 DataType::VarChar {
27551 length: None,
27552 parenthesized_length: false,
27553 }
27554 }
27555 _ => DataType::VarChar {
27556 length: None,
27557 parenthesized_length: false,
27558 },
27559 };
27560 let cast_str = Expression::Cast(Box::new(Cast {
27561 this: arg,
27562 to: str_type,
27563 double_colon_syntax: false,
27564 trailing_comments: Vec::new(),
27565 format: None,
27566 default: None,
27567 inferred_type: None,
27568 }));
27569 let replace_expr = Expression::Function(Box::new(Function::new(
27570 "REPLACE".to_string(),
27571 vec![cast_str, Expression::string("-"), Expression::string("")],
27572 )));
27573 let substr_name = match target {
27574 DialectType::DuckDB
27575 | DialectType::Hive
27576 | DialectType::Spark
27577 | DialectType::Databricks => "SUBSTR",
27578 _ => "SUBSTR",
27579 };
27580 let substr = Expression::Function(Box::new(Function::new(
27581 substr_name.to_string(),
27582 vec![replace_expr, Expression::number(1), Expression::number(8)],
27583 )));
27584 // Use INT (not INTEGER) for Presto/Trino etc.
27585 let int_type = match target {
27586 DialectType::Presto
27587 | DialectType::Trino
27588 | DialectType::Athena
27589 | DialectType::TSQL
27590 | DialectType::Fabric
27591 | DialectType::SQLite
27592 | DialectType::Redshift => DataType::Custom {
27593 name: "INT".to_string(),
27594 },
27595 _ => DataType::Int {
27596 length: None,
27597 integer_spelling: false,
27598 },
27599 };
27600 Ok(Expression::Cast(Box::new(Cast {
27601 this: substr,
27602 to: int_type,
27603 double_colon_syntax: false,
27604 trailing_comments: Vec::new(),
27605 format: None,
27606 default: None,
27607 inferred_type: None,
27608 })))
27609 } else {
27610 Ok(e)
27611 }
27612 }
27613
27614 Action::UnixToStrConvert => {
27615 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
27616 if let Expression::Function(f) = e {
27617 let mut args = f.args;
27618 let this = args.remove(0);
27619 let fmt_expr = if !args.is_empty() {
27620 Some(args.remove(0))
27621 } else {
27622 None
27623 };
27624
27625 // Check if format is a string literal
27626 let fmt_str = fmt_expr.as_ref().and_then(|f| {
27627 if let Expression::Literal(lit) = f {
27628 if let Literal::String(s) = lit.as_ref() {
27629 Some(s.clone())
27630 } else {
27631 None
27632 }
27633 } else {
27634 None
27635 }
27636 });
27637
27638 if let Some(fmt_string) = fmt_str {
27639 // String literal format -> use UnixToStr expression (generator handles it)
27640 Ok(Expression::UnixToStr(Box::new(
27641 crate::expressions::UnixToStr {
27642 this: Box::new(this),
27643 format: Some(fmt_string),
27644 },
27645 )))
27646 } else if let Some(fmt_e) = fmt_expr {
27647 // Non-literal format (e.g., identifier `y`) -> build target expression directly
27648 match target {
27649 DialectType::DuckDB => {
27650 // STRFTIME(TO_TIMESTAMP(x), y)
27651 let to_ts = Expression::Function(Box::new(Function::new(
27652 "TO_TIMESTAMP".to_string(),
27653 vec![this],
27654 )));
27655 Ok(Expression::Function(Box::new(Function::new(
27656 "STRFTIME".to_string(),
27657 vec![to_ts, fmt_e],
27658 ))))
27659 }
27660 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27661 // DATE_FORMAT(FROM_UNIXTIME(x), y)
27662 let from_unix = Expression::Function(Box::new(Function::new(
27663 "FROM_UNIXTIME".to_string(),
27664 vec![this],
27665 )));
27666 Ok(Expression::Function(Box::new(Function::new(
27667 "DATE_FORMAT".to_string(),
27668 vec![from_unix, fmt_e],
27669 ))))
27670 }
27671 DialectType::Hive
27672 | DialectType::Spark
27673 | DialectType::Databricks
27674 | DialectType::Doris
27675 | DialectType::StarRocks => {
27676 // FROM_UNIXTIME(x, y)
27677 Ok(Expression::Function(Box::new(Function::new(
27678 "FROM_UNIXTIME".to_string(),
27679 vec![this, fmt_e],
27680 ))))
27681 }
27682 _ => {
27683 // Default: keep as UNIX_TO_STR(x, y)
27684 Ok(Expression::Function(Box::new(Function::new(
27685 "UNIX_TO_STR".to_string(),
27686 vec![this, fmt_e],
27687 ))))
27688 }
27689 }
27690 } else {
27691 Ok(Expression::UnixToStr(Box::new(
27692 crate::expressions::UnixToStr {
27693 this: Box::new(this),
27694 format: None,
27695 },
27696 )))
27697 }
27698 } else {
27699 Ok(e)
27700 }
27701 }
27702
27703 Action::UnixToTimeConvert => {
27704 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
27705 if let Expression::Function(f) = e {
27706 let arg = f.args.into_iter().next().unwrap();
27707 Ok(Expression::UnixToTime(Box::new(
27708 crate::expressions::UnixToTime {
27709 this: Box::new(arg),
27710 scale: None,
27711 zone: None,
27712 hours: None,
27713 minutes: None,
27714 format: None,
27715 target_type: None,
27716 },
27717 )))
27718 } else {
27719 Ok(e)
27720 }
27721 }
27722
27723 Action::UnixToTimeStrConvert => {
27724 // UNIX_TO_TIME_STR(x) -> dialect-specific
27725 if let Expression::Function(f) = e {
27726 let arg = f.args.into_iter().next().unwrap();
27727 match target {
27728 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27729 // FROM_UNIXTIME(x)
27730 Ok(Expression::Function(Box::new(Function::new(
27731 "FROM_UNIXTIME".to_string(),
27732 vec![arg],
27733 ))))
27734 }
27735 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27736 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
27737 let from_unix = Expression::Function(Box::new(Function::new(
27738 "FROM_UNIXTIME".to_string(),
27739 vec![arg],
27740 )));
27741 Ok(Expression::Cast(Box::new(Cast {
27742 this: from_unix,
27743 to: DataType::VarChar {
27744 length: None,
27745 parenthesized_length: false,
27746 },
27747 double_colon_syntax: false,
27748 trailing_comments: Vec::new(),
27749 format: None,
27750 default: None,
27751 inferred_type: None,
27752 })))
27753 }
27754 DialectType::DuckDB => {
27755 // CAST(TO_TIMESTAMP(x) AS TEXT)
27756 let to_ts = Expression::Function(Box::new(Function::new(
27757 "TO_TIMESTAMP".to_string(),
27758 vec![arg],
27759 )));
27760 Ok(Expression::Cast(Box::new(Cast {
27761 this: to_ts,
27762 to: DataType::Text,
27763 double_colon_syntax: false,
27764 trailing_comments: Vec::new(),
27765 format: None,
27766 default: None,
27767 inferred_type: None,
27768 })))
27769 }
27770 _ => Ok(Expression::Function(Box::new(Function::new(
27771 "UNIX_TO_TIME_STR".to_string(),
27772 vec![arg],
27773 )))),
27774 }
27775 } else {
27776 Ok(e)
27777 }
27778 }
27779
27780 Action::TimeToUnixConvert => {
27781 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
27782 if let Expression::Function(f) = e {
27783 let arg = f.args.into_iter().next().unwrap();
27784 Ok(Expression::TimeToUnix(Box::new(
27785 crate::expressions::UnaryFunc {
27786 this: arg,
27787 original_name: None,
27788 inferred_type: None,
27789 },
27790 )))
27791 } else {
27792 Ok(e)
27793 }
27794 }
27795
27796 Action::TimeToStrConvert => {
27797 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
27798 if let Expression::Function(f) = e {
27799 let mut args = f.args;
27800 let this = args.remove(0);
27801 let fmt = match args.remove(0) {
27802 Expression::Literal(lit)
27803 if matches!(lit.as_ref(), Literal::String(_)) =>
27804 {
27805 let Literal::String(s) = lit.as_ref() else {
27806 unreachable!()
27807 };
27808 s.clone()
27809 }
27810 other => {
27811 return Ok(Expression::Function(Box::new(Function::new(
27812 "TIME_TO_STR".to_string(),
27813 vec![this, other],
27814 ))));
27815 }
27816 };
27817 Ok(Expression::TimeToStr(Box::new(
27818 crate::expressions::TimeToStr {
27819 this: Box::new(this),
27820 format: fmt,
27821 culture: None,
27822 zone: None,
27823 },
27824 )))
27825 } else {
27826 Ok(e)
27827 }
27828 }
27829
27830 Action::StrToUnixConvert => {
27831 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
27832 if let Expression::Function(f) = e {
27833 let mut args = f.args;
27834 let this = args.remove(0);
27835 let fmt = match args.remove(0) {
27836 Expression::Literal(lit)
27837 if matches!(lit.as_ref(), Literal::String(_)) =>
27838 {
27839 let Literal::String(s) = lit.as_ref() else {
27840 unreachable!()
27841 };
27842 s.clone()
27843 }
27844 other => {
27845 return Ok(Expression::Function(Box::new(Function::new(
27846 "STR_TO_UNIX".to_string(),
27847 vec![this, other],
27848 ))));
27849 }
27850 };
27851 Ok(Expression::StrToUnix(Box::new(
27852 crate::expressions::StrToUnix {
27853 this: Some(Box::new(this)),
27854 format: Some(fmt),
27855 },
27856 )))
27857 } else {
27858 Ok(e)
27859 }
27860 }
27861
27862 Action::TimeStrToUnixConvert => {
27863 // TIME_STR_TO_UNIX(x) -> dialect-specific
27864 if let Expression::Function(f) = e {
27865 let arg = f.args.into_iter().next().unwrap();
27866 match target {
27867 DialectType::DuckDB => {
27868 // EPOCH(CAST(x AS TIMESTAMP))
27869 let cast_ts = Expression::Cast(Box::new(Cast {
27870 this: arg,
27871 to: DataType::Timestamp {
27872 timezone: false,
27873 precision: None,
27874 },
27875 double_colon_syntax: false,
27876 trailing_comments: Vec::new(),
27877 format: None,
27878 default: None,
27879 inferred_type: None,
27880 }));
27881 Ok(Expression::Function(Box::new(Function::new(
27882 "EPOCH".to_string(),
27883 vec![cast_ts],
27884 ))))
27885 }
27886 DialectType::Hive
27887 | DialectType::Doris
27888 | DialectType::StarRocks
27889 | DialectType::MySQL => {
27890 // UNIX_TIMESTAMP(x)
27891 Ok(Expression::Function(Box::new(Function::new(
27892 "UNIX_TIMESTAMP".to_string(),
27893 vec![arg],
27894 ))))
27895 }
27896 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27897 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
27898 let date_parse = Expression::Function(Box::new(Function::new(
27899 "DATE_PARSE".to_string(),
27900 vec![arg, Expression::string("%Y-%m-%d %T")],
27901 )));
27902 Ok(Expression::Function(Box::new(Function::new(
27903 "TO_UNIXTIME".to_string(),
27904 vec![date_parse],
27905 ))))
27906 }
27907 _ => Ok(Expression::Function(Box::new(Function::new(
27908 "TIME_STR_TO_UNIX".to_string(),
27909 vec![arg],
27910 )))),
27911 }
27912 } else {
27913 Ok(e)
27914 }
27915 }
27916
27917 Action::TimeToTimeStrConvert => {
27918 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
27919 if let Expression::Function(f) = e {
27920 let arg = f.args.into_iter().next().unwrap();
27921 let str_type = match target {
27922 DialectType::DuckDB => DataType::Text,
27923 DialectType::Hive
27924 | DialectType::Spark
27925 | DialectType::Databricks
27926 | DialectType::Doris
27927 | DialectType::StarRocks => DataType::Custom {
27928 name: "STRING".to_string(),
27929 },
27930 DialectType::Redshift => DataType::Custom {
27931 name: "VARCHAR(MAX)".to_string(),
27932 },
27933 _ => DataType::VarChar {
27934 length: None,
27935 parenthesized_length: false,
27936 },
27937 };
27938 Ok(Expression::Cast(Box::new(Cast {
27939 this: arg,
27940 to: str_type,
27941 double_colon_syntax: false,
27942 trailing_comments: Vec::new(),
27943 format: None,
27944 default: None,
27945 inferred_type: None,
27946 })))
27947 } else {
27948 Ok(e)
27949 }
27950 }
27951
27952 Action::DateTruncSwapArgs => {
27953 // DATE_TRUNC('unit', x) from Generic -> target-specific
27954 if let Expression::Function(f) = e {
27955 if f.args.len() == 2 {
27956 let unit_arg = f.args[0].clone();
27957 let expr_arg = f.args[1].clone();
27958 // Extract unit string from the first arg
27959 let unit_str = match &unit_arg {
27960 Expression::Literal(lit)
27961 if matches!(lit.as_ref(), Literal::String(_)) =>
27962 {
27963 let Literal::String(s) = lit.as_ref() else {
27964 unreachable!()
27965 };
27966 s.to_ascii_uppercase()
27967 }
27968 _ => return Ok(Expression::Function(f)),
27969 };
27970 match target {
27971 DialectType::BigQuery => {
27972 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
27973 let unit_ident =
27974 Expression::Column(Box::new(crate::expressions::Column {
27975 name: crate::expressions::Identifier::new(unit_str),
27976 table: None,
27977 join_mark: false,
27978 trailing_comments: Vec::new(),
27979 span: None,
27980 inferred_type: None,
27981 }));
27982 Ok(Expression::Function(Box::new(Function::new(
27983 "DATE_TRUNC".to_string(),
27984 vec![expr_arg, unit_ident],
27985 ))))
27986 }
27987 DialectType::Doris => {
27988 // Doris: DATE_TRUNC(x, 'UNIT')
27989 Ok(Expression::Function(Box::new(Function::new(
27990 "DATE_TRUNC".to_string(),
27991 vec![expr_arg, Expression::string(&unit_str)],
27992 ))))
27993 }
27994 DialectType::StarRocks => {
27995 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
27996 Ok(Expression::Function(Box::new(Function::new(
27997 "DATE_TRUNC".to_string(),
27998 vec![Expression::string(&unit_str), expr_arg],
27999 ))))
28000 }
28001 DialectType::Spark | DialectType::Databricks => {
28002 // Spark: TRUNC(x, 'UNIT')
28003 Ok(Expression::Function(Box::new(Function::new(
28004 "TRUNC".to_string(),
28005 vec![expr_arg, Expression::string(&unit_str)],
28006 ))))
28007 }
28008 DialectType::MySQL => {
28009 // MySQL: complex expansion based on unit
28010 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
28011 }
28012 _ => Ok(Expression::Function(f)),
28013 }
28014 } else {
28015 Ok(Expression::Function(f))
28016 }
28017 } else {
28018 Ok(e)
28019 }
28020 }
28021
28022 Action::TimestampTruncConvert => {
28023 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
28024 if let Expression::Function(f) = e {
28025 if f.args.len() >= 2 {
28026 let expr_arg = f.args[0].clone();
28027 let unit_arg = f.args[1].clone();
28028 let tz_arg = if f.args.len() >= 3 {
28029 Some(f.args[2].clone())
28030 } else {
28031 None
28032 };
28033 // Extract unit string
28034 let unit_str = match &unit_arg {
28035 Expression::Literal(lit)
28036 if matches!(lit.as_ref(), Literal::String(_)) =>
28037 {
28038 let Literal::String(s) = lit.as_ref() else {
28039 unreachable!()
28040 };
28041 s.to_ascii_uppercase()
28042 }
28043 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
28044 _ => {
28045 return Ok(Expression::Function(f));
28046 }
28047 };
28048 match target {
28049 DialectType::Spark | DialectType::Databricks => {
28050 // Spark: DATE_TRUNC('UNIT', x)
28051 Ok(Expression::Function(Box::new(Function::new(
28052 "DATE_TRUNC".to_string(),
28053 vec![Expression::string(&unit_str), expr_arg],
28054 ))))
28055 }
28056 DialectType::Doris | DialectType::StarRocks => {
28057 // Doris: DATE_TRUNC(x, 'UNIT')
28058 Ok(Expression::Function(Box::new(Function::new(
28059 "DATE_TRUNC".to_string(),
28060 vec![expr_arg, Expression::string(&unit_str)],
28061 ))))
28062 }
28063 DialectType::BigQuery => {
28064 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
28065 let unit_ident =
28066 Expression::Column(Box::new(crate::expressions::Column {
28067 name: crate::expressions::Identifier::new(unit_str),
28068 table: None,
28069 join_mark: false,
28070 trailing_comments: Vec::new(),
28071 span: None,
28072 inferred_type: None,
28073 }));
28074 let mut args = vec![expr_arg, unit_ident];
28075 if let Some(tz) = tz_arg {
28076 args.push(tz);
28077 }
28078 Ok(Expression::Function(Box::new(Function::new(
28079 "TIMESTAMP_TRUNC".to_string(),
28080 args,
28081 ))))
28082 }
28083 DialectType::DuckDB => {
28084 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28085 if let Some(tz) = tz_arg {
28086 let tz_str = match &tz {
28087 Expression::Literal(lit)
28088 if matches!(lit.as_ref(), Literal::String(_)) =>
28089 {
28090 let Literal::String(s) = lit.as_ref() else {
28091 unreachable!()
28092 };
28093 s.clone()
28094 }
28095 _ => "UTC".to_string(),
28096 };
28097 // x AT TIME ZONE 'tz'
28098 let at_tz = Expression::AtTimeZone(Box::new(
28099 crate::expressions::AtTimeZone {
28100 this: expr_arg,
28101 zone: Expression::string(&tz_str),
28102 },
28103 ));
28104 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
28105 let trunc = Expression::Function(Box::new(Function::new(
28106 "DATE_TRUNC".to_string(),
28107 vec![Expression::string(&unit_str), at_tz],
28108 )));
28109 // DATE_TRUNC(...) AT TIME ZONE 'tz'
28110 Ok(Expression::AtTimeZone(Box::new(
28111 crate::expressions::AtTimeZone {
28112 this: trunc,
28113 zone: Expression::string(&tz_str),
28114 },
28115 )))
28116 } else {
28117 Ok(Expression::Function(Box::new(Function::new(
28118 "DATE_TRUNC".to_string(),
28119 vec![Expression::string(&unit_str), expr_arg],
28120 ))))
28121 }
28122 }
28123 DialectType::Presto
28124 | DialectType::Trino
28125 | DialectType::Athena
28126 | DialectType::Snowflake => {
28127 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
28128 Ok(Expression::Function(Box::new(Function::new(
28129 "DATE_TRUNC".to_string(),
28130 vec![Expression::string(&unit_str), expr_arg],
28131 ))))
28132 }
28133 _ => {
28134 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
28135 let mut args = vec![Expression::string(&unit_str), expr_arg];
28136 if let Some(tz) = tz_arg {
28137 args.push(tz);
28138 }
28139 Ok(Expression::Function(Box::new(Function::new(
28140 "DATE_TRUNC".to_string(),
28141 args,
28142 ))))
28143 }
28144 }
28145 } else {
28146 Ok(Expression::Function(f))
28147 }
28148 } else {
28149 Ok(e)
28150 }
28151 }
28152
28153 Action::StrToDateConvert => {
28154 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
28155 if let Expression::Function(f) = e {
28156 if f.args.len() == 2 {
28157 let mut args = f.args;
28158 let this = args.remove(0);
28159 let fmt_expr = args.remove(0);
28160 let fmt_str = match &fmt_expr {
28161 Expression::Literal(lit)
28162 if matches!(lit.as_ref(), Literal::String(_)) =>
28163 {
28164 let Literal::String(s) = lit.as_ref() else {
28165 unreachable!()
28166 };
28167 Some(s.clone())
28168 }
28169 _ => None,
28170 };
28171 let default_date = "%Y-%m-%d";
28172 let default_time = "%Y-%m-%d %H:%M:%S";
28173 let is_default = fmt_str
28174 .as_ref()
28175 .map_or(false, |f| f == default_date || f == default_time);
28176
28177 if is_default {
28178 // Default format: handle per-dialect
28179 match target {
28180 DialectType::MySQL
28181 | DialectType::Doris
28182 | DialectType::StarRocks => {
28183 // Keep STR_TO_DATE(x, fmt) as-is
28184 Ok(Expression::Function(Box::new(Function::new(
28185 "STR_TO_DATE".to_string(),
28186 vec![this, fmt_expr],
28187 ))))
28188 }
28189 DialectType::Hive => {
28190 // Hive: CAST(x AS DATE)
28191 Ok(Expression::Cast(Box::new(Cast {
28192 this,
28193 to: DataType::Date,
28194 double_colon_syntax: false,
28195 trailing_comments: Vec::new(),
28196 format: None,
28197 default: None,
28198 inferred_type: None,
28199 })))
28200 }
28201 DialectType::Presto
28202 | DialectType::Trino
28203 | DialectType::Athena => {
28204 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
28205 let date_parse =
28206 Expression::Function(Box::new(Function::new(
28207 "DATE_PARSE".to_string(),
28208 vec![this, fmt_expr],
28209 )));
28210 Ok(Expression::Cast(Box::new(Cast {
28211 this: date_parse,
28212 to: DataType::Date,
28213 double_colon_syntax: false,
28214 trailing_comments: Vec::new(),
28215 format: None,
28216 default: None,
28217 inferred_type: None,
28218 })))
28219 }
28220 _ => {
28221 // Others: TsOrDsToDate (delegates to generator)
28222 Ok(Expression::TsOrDsToDate(Box::new(
28223 crate::expressions::TsOrDsToDate {
28224 this: Box::new(this),
28225 format: None,
28226 safe: None,
28227 },
28228 )))
28229 }
28230 }
28231 } else if let Some(fmt) = fmt_str {
28232 match target {
28233 DialectType::Doris
28234 | DialectType::StarRocks
28235 | DialectType::MySQL => {
28236 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
28237 let mut normalized = fmt.clone();
28238 normalized = normalized.replace("%-d", "%e");
28239 normalized = normalized.replace("%-m", "%c");
28240 normalized = normalized.replace("%H:%M:%S", "%T");
28241 Ok(Expression::Function(Box::new(Function::new(
28242 "STR_TO_DATE".to_string(),
28243 vec![this, Expression::string(&normalized)],
28244 ))))
28245 }
28246 DialectType::Hive => {
28247 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
28248 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28249 let unix_ts =
28250 Expression::Function(Box::new(Function::new(
28251 "UNIX_TIMESTAMP".to_string(),
28252 vec![this, Expression::string(&java_fmt)],
28253 )));
28254 let from_unix =
28255 Expression::Function(Box::new(Function::new(
28256 "FROM_UNIXTIME".to_string(),
28257 vec![unix_ts],
28258 )));
28259 Ok(Expression::Cast(Box::new(Cast {
28260 this: from_unix,
28261 to: DataType::Date,
28262 double_colon_syntax: false,
28263 trailing_comments: Vec::new(),
28264 format: None,
28265 default: None,
28266 inferred_type: None,
28267 })))
28268 }
28269 DialectType::Spark | DialectType::Databricks => {
28270 // Spark: TO_DATE(x, java_fmt)
28271 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28272 Ok(Expression::Function(Box::new(Function::new(
28273 "TO_DATE".to_string(),
28274 vec![this, Expression::string(&java_fmt)],
28275 ))))
28276 }
28277 DialectType::Drill => {
28278 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
28279 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
28280 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28281 let java_fmt = java_fmt.replace('T', "'T'");
28282 Ok(Expression::Function(Box::new(Function::new(
28283 "TO_DATE".to_string(),
28284 vec![this, Expression::string(&java_fmt)],
28285 ))))
28286 }
28287 _ => {
28288 // For other dialects: use TsOrDsToDate which delegates to generator
28289 Ok(Expression::TsOrDsToDate(Box::new(
28290 crate::expressions::TsOrDsToDate {
28291 this: Box::new(this),
28292 format: Some(fmt),
28293 safe: None,
28294 },
28295 )))
28296 }
28297 }
28298 } else {
28299 // Non-string format - keep as-is
28300 let mut new_args = Vec::new();
28301 new_args.push(this);
28302 new_args.push(fmt_expr);
28303 Ok(Expression::Function(Box::new(Function::new(
28304 "STR_TO_DATE".to_string(),
28305 new_args,
28306 ))))
28307 }
28308 } else {
28309 Ok(Expression::Function(f))
28310 }
28311 } else {
28312 Ok(e)
28313 }
28314 }
28315
28316 Action::TsOrDsAddConvert => {
28317 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
28318 if let Expression::Function(f) = e {
28319 if f.args.len() == 3 {
28320 let mut args = f.args;
28321 let x = args.remove(0);
28322 let n = args.remove(0);
28323 let unit_expr = args.remove(0);
28324 let unit_str = match &unit_expr {
28325 Expression::Literal(lit)
28326 if matches!(lit.as_ref(), Literal::String(_)) =>
28327 {
28328 let Literal::String(s) = lit.as_ref() else {
28329 unreachable!()
28330 };
28331 s.to_ascii_uppercase()
28332 }
28333 _ => "DAY".to_string(),
28334 };
28335
28336 match target {
28337 DialectType::Hive
28338 | DialectType::Spark
28339 | DialectType::Databricks => {
28340 // DATE_ADD(x, n) - only supports DAY unit
28341 Ok(Expression::Function(Box::new(Function::new(
28342 "DATE_ADD".to_string(),
28343 vec![x, n],
28344 ))))
28345 }
28346 DialectType::MySQL => {
28347 // DATE_ADD(x, INTERVAL n UNIT)
28348 let iu = match unit_str.as_str() {
28349 "YEAR" => crate::expressions::IntervalUnit::Year,
28350 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28351 "MONTH" => crate::expressions::IntervalUnit::Month,
28352 "WEEK" => crate::expressions::IntervalUnit::Week,
28353 "HOUR" => crate::expressions::IntervalUnit::Hour,
28354 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28355 "SECOND" => crate::expressions::IntervalUnit::Second,
28356 _ => crate::expressions::IntervalUnit::Day,
28357 };
28358 let interval = Expression::Interval(Box::new(
28359 crate::expressions::Interval {
28360 this: Some(n),
28361 unit: Some(
28362 crate::expressions::IntervalUnitSpec::Simple {
28363 unit: iu,
28364 use_plural: false,
28365 },
28366 ),
28367 },
28368 ));
28369 Ok(Expression::Function(Box::new(Function::new(
28370 "DATE_ADD".to_string(),
28371 vec![x, interval],
28372 ))))
28373 }
28374 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28375 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
28376 let cast_ts = Expression::Cast(Box::new(Cast {
28377 this: x,
28378 to: DataType::Timestamp {
28379 precision: None,
28380 timezone: false,
28381 },
28382 double_colon_syntax: false,
28383 trailing_comments: Vec::new(),
28384 format: None,
28385 default: None,
28386 inferred_type: None,
28387 }));
28388 let cast_date = Expression::Cast(Box::new(Cast {
28389 this: cast_ts,
28390 to: DataType::Date,
28391 double_colon_syntax: false,
28392 trailing_comments: Vec::new(),
28393 format: None,
28394 default: None,
28395 inferred_type: None,
28396 }));
28397 Ok(Expression::Function(Box::new(Function::new(
28398 "DATE_ADD".to_string(),
28399 vec![Expression::string(&unit_str), n, cast_date],
28400 ))))
28401 }
28402 DialectType::DuckDB => {
28403 // CAST(x AS DATE) + INTERVAL n UNIT
28404 let cast_date = Expression::Cast(Box::new(Cast {
28405 this: x,
28406 to: DataType::Date,
28407 double_colon_syntax: false,
28408 trailing_comments: Vec::new(),
28409 format: None,
28410 default: None,
28411 inferred_type: None,
28412 }));
28413 let iu = match unit_str.as_str() {
28414 "YEAR" => crate::expressions::IntervalUnit::Year,
28415 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28416 "MONTH" => crate::expressions::IntervalUnit::Month,
28417 "WEEK" => crate::expressions::IntervalUnit::Week,
28418 "HOUR" => crate::expressions::IntervalUnit::Hour,
28419 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28420 "SECOND" => crate::expressions::IntervalUnit::Second,
28421 _ => crate::expressions::IntervalUnit::Day,
28422 };
28423 let interval = Expression::Interval(Box::new(
28424 crate::expressions::Interval {
28425 this: Some(n),
28426 unit: Some(
28427 crate::expressions::IntervalUnitSpec::Simple {
28428 unit: iu,
28429 use_plural: false,
28430 },
28431 ),
28432 },
28433 ));
28434 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
28435 left: cast_date,
28436 right: interval,
28437 left_comments: Vec::new(),
28438 operator_comments: Vec::new(),
28439 trailing_comments: Vec::new(),
28440 inferred_type: None,
28441 })))
28442 }
28443 DialectType::Drill => {
28444 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
28445 let cast_date = Expression::Cast(Box::new(Cast {
28446 this: x,
28447 to: DataType::Date,
28448 double_colon_syntax: false,
28449 trailing_comments: Vec::new(),
28450 format: None,
28451 default: None,
28452 inferred_type: None,
28453 }));
28454 let iu = match unit_str.as_str() {
28455 "YEAR" => crate::expressions::IntervalUnit::Year,
28456 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28457 "MONTH" => crate::expressions::IntervalUnit::Month,
28458 "WEEK" => crate::expressions::IntervalUnit::Week,
28459 "HOUR" => crate::expressions::IntervalUnit::Hour,
28460 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28461 "SECOND" => crate::expressions::IntervalUnit::Second,
28462 _ => crate::expressions::IntervalUnit::Day,
28463 };
28464 let interval = Expression::Interval(Box::new(
28465 crate::expressions::Interval {
28466 this: Some(n),
28467 unit: Some(
28468 crate::expressions::IntervalUnitSpec::Simple {
28469 unit: iu,
28470 use_plural: false,
28471 },
28472 ),
28473 },
28474 ));
28475 Ok(Expression::Function(Box::new(Function::new(
28476 "DATE_ADD".to_string(),
28477 vec![cast_date, interval],
28478 ))))
28479 }
28480 _ => {
28481 // Default: keep as TS_OR_DS_ADD
28482 Ok(Expression::Function(Box::new(Function::new(
28483 "TS_OR_DS_ADD".to_string(),
28484 vec![x, n, unit_expr],
28485 ))))
28486 }
28487 }
28488 } else {
28489 Ok(Expression::Function(f))
28490 }
28491 } else {
28492 Ok(e)
28493 }
28494 }
28495
28496 Action::DateFromUnixDateConvert => {
28497 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28498 if let Expression::Function(f) = e {
28499 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
28500 if matches!(
28501 target,
28502 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
28503 ) {
28504 return Ok(Expression::Function(Box::new(Function::new(
28505 "DATE_FROM_UNIX_DATE".to_string(),
28506 f.args,
28507 ))));
28508 }
28509 let n = f.args.into_iter().next().unwrap();
28510 let epoch_date = Expression::Cast(Box::new(Cast {
28511 this: Expression::string("1970-01-01"),
28512 to: DataType::Date,
28513 double_colon_syntax: false,
28514 trailing_comments: Vec::new(),
28515 format: None,
28516 default: None,
28517 inferred_type: None,
28518 }));
28519 match target {
28520 DialectType::DuckDB => {
28521 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
28522 let interval =
28523 Expression::Interval(Box::new(crate::expressions::Interval {
28524 this: Some(n),
28525 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28526 unit: crate::expressions::IntervalUnit::Day,
28527 use_plural: false,
28528 }),
28529 }));
28530 Ok(Expression::Add(Box::new(
28531 crate::expressions::BinaryOp::new(epoch_date, interval),
28532 )))
28533 }
28534 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28535 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
28536 Ok(Expression::Function(Box::new(Function::new(
28537 "DATE_ADD".to_string(),
28538 vec![Expression::string("DAY"), n, epoch_date],
28539 ))))
28540 }
28541 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
28542 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28543 Ok(Expression::Function(Box::new(Function::new(
28544 "DATEADD".to_string(),
28545 vec![
28546 Expression::Identifier(Identifier::new("DAY")),
28547 n,
28548 epoch_date,
28549 ],
28550 ))))
28551 }
28552 DialectType::BigQuery => {
28553 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28554 let interval =
28555 Expression::Interval(Box::new(crate::expressions::Interval {
28556 this: Some(n),
28557 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28558 unit: crate::expressions::IntervalUnit::Day,
28559 use_plural: false,
28560 }),
28561 }));
28562 Ok(Expression::Function(Box::new(Function::new(
28563 "DATE_ADD".to_string(),
28564 vec![epoch_date, interval],
28565 ))))
28566 }
28567 DialectType::MySQL
28568 | DialectType::Doris
28569 | DialectType::StarRocks
28570 | DialectType::Drill => {
28571 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28572 let interval =
28573 Expression::Interval(Box::new(crate::expressions::Interval {
28574 this: Some(n),
28575 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28576 unit: crate::expressions::IntervalUnit::Day,
28577 use_plural: false,
28578 }),
28579 }));
28580 Ok(Expression::Function(Box::new(Function::new(
28581 "DATE_ADD".to_string(),
28582 vec![epoch_date, interval],
28583 ))))
28584 }
28585 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28586 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
28587 Ok(Expression::Function(Box::new(Function::new(
28588 "DATE_ADD".to_string(),
28589 vec![epoch_date, n],
28590 ))))
28591 }
28592 DialectType::PostgreSQL
28593 | DialectType::Materialize
28594 | DialectType::RisingWave => {
28595 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
28596 let n_str = match &n {
28597 Expression::Literal(lit)
28598 if matches!(lit.as_ref(), Literal::Number(_)) =>
28599 {
28600 let Literal::Number(s) = lit.as_ref() else {
28601 unreachable!()
28602 };
28603 s.clone()
28604 }
28605 _ => Self::expr_to_string_static(&n),
28606 };
28607 let interval =
28608 Expression::Interval(Box::new(crate::expressions::Interval {
28609 this: Some(Expression::string(&format!("{} DAY", n_str))),
28610 unit: None,
28611 }));
28612 Ok(Expression::Add(Box::new(
28613 crate::expressions::BinaryOp::new(epoch_date, interval),
28614 )))
28615 }
28616 _ => {
28617 // Default: keep as-is
28618 Ok(Expression::Function(Box::new(Function::new(
28619 "DATE_FROM_UNIX_DATE".to_string(),
28620 vec![n],
28621 ))))
28622 }
28623 }
28624 } else {
28625 Ok(e)
28626 }
28627 }
28628
28629 Action::ArrayRemoveConvert => {
28630 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
28631 if let Expression::ArrayRemove(bf) = e {
28632 let arr = bf.this;
28633 let target_val = bf.expression;
28634 match target {
28635 DialectType::DuckDB => {
28636 let u_id = crate::expressions::Identifier::new("_u");
28637 let lambda =
28638 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28639 parameters: vec![u_id.clone()],
28640 body: Expression::Neq(Box::new(BinaryOp {
28641 left: Expression::Identifier(u_id),
28642 right: target_val,
28643 left_comments: Vec::new(),
28644 operator_comments: Vec::new(),
28645 trailing_comments: Vec::new(),
28646 inferred_type: None,
28647 })),
28648 colon: false,
28649 parameter_types: Vec::new(),
28650 }));
28651 Ok(Expression::Function(Box::new(Function::new(
28652 "LIST_FILTER".to_string(),
28653 vec![arr, lambda],
28654 ))))
28655 }
28656 DialectType::ClickHouse => {
28657 let u_id = crate::expressions::Identifier::new("_u");
28658 let lambda =
28659 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28660 parameters: vec![u_id.clone()],
28661 body: Expression::Neq(Box::new(BinaryOp {
28662 left: Expression::Identifier(u_id),
28663 right: target_val,
28664 left_comments: Vec::new(),
28665 operator_comments: Vec::new(),
28666 trailing_comments: Vec::new(),
28667 inferred_type: None,
28668 })),
28669 colon: false,
28670 parameter_types: Vec::new(),
28671 }));
28672 Ok(Expression::Function(Box::new(Function::new(
28673 "arrayFilter".to_string(),
28674 vec![lambda, arr],
28675 ))))
28676 }
28677 DialectType::BigQuery => {
28678 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
28679 let u_id = crate::expressions::Identifier::new("_u");
28680 let u_col =
28681 Expression::Column(Box::new(crate::expressions::Column {
28682 name: u_id.clone(),
28683 table: None,
28684 join_mark: false,
28685 trailing_comments: Vec::new(),
28686 span: None,
28687 inferred_type: None,
28688 }));
28689 let unnest_expr =
28690 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
28691 this: arr,
28692 expressions: Vec::new(),
28693 with_ordinality: false,
28694 alias: None,
28695 offset_alias: None,
28696 }));
28697 let aliased_unnest =
28698 Expression::Alias(Box::new(crate::expressions::Alias {
28699 this: unnest_expr,
28700 alias: u_id.clone(),
28701 column_aliases: Vec::new(),
28702 pre_alias_comments: Vec::new(),
28703 trailing_comments: Vec::new(),
28704 inferred_type: None,
28705 }));
28706 let where_cond = Expression::Neq(Box::new(BinaryOp {
28707 left: u_col.clone(),
28708 right: target_val,
28709 left_comments: Vec::new(),
28710 operator_comments: Vec::new(),
28711 trailing_comments: Vec::new(),
28712 inferred_type: None,
28713 }));
28714 let subquery = Expression::Select(Box::new(
28715 crate::expressions::Select::new()
28716 .column(u_col)
28717 .from(aliased_unnest)
28718 .where_(where_cond),
28719 ));
28720 Ok(Expression::ArrayFunc(Box::new(
28721 crate::expressions::ArrayConstructor {
28722 expressions: vec![subquery],
28723 bracket_notation: false,
28724 use_list_keyword: false,
28725 },
28726 )))
28727 }
28728 _ => Ok(Expression::ArrayRemove(Box::new(
28729 crate::expressions::BinaryFunc {
28730 original_name: None,
28731 this: arr,
28732 expression: target_val,
28733 inferred_type: None,
28734 },
28735 ))),
28736 }
28737 } else {
28738 Ok(e)
28739 }
28740 }
28741
28742 Action::ArrayReverseConvert => {
28743 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
28744 if let Expression::ArrayReverse(af) = e {
28745 Ok(Expression::Function(Box::new(Function::new(
28746 "arrayReverse".to_string(),
28747 vec![af.this],
28748 ))))
28749 } else {
28750 Ok(e)
28751 }
28752 }
28753
28754 Action::JsonKeysConvert => {
28755 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
28756 if let Expression::JsonKeys(uf) = e {
28757 match target {
28758 DialectType::Spark | DialectType::Databricks => {
28759 Ok(Expression::Function(Box::new(Function::new(
28760 "JSON_OBJECT_KEYS".to_string(),
28761 vec![uf.this],
28762 ))))
28763 }
28764 DialectType::Snowflake => Ok(Expression::Function(Box::new(
28765 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
28766 ))),
28767 _ => Ok(Expression::JsonKeys(uf)),
28768 }
28769 } else {
28770 Ok(e)
28771 }
28772 }
28773
28774 Action::ParseJsonStrip => {
28775 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
28776 if let Expression::ParseJson(uf) = e {
28777 Ok(uf.this)
28778 } else {
28779 Ok(e)
28780 }
28781 }
28782
28783 Action::ArraySizeDrill => {
28784 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
28785 if let Expression::ArraySize(uf) = e {
28786 Ok(Expression::Function(Box::new(Function::new(
28787 "REPEATED_COUNT".to_string(),
28788 vec![uf.this],
28789 ))))
28790 } else {
28791 Ok(e)
28792 }
28793 }
28794
28795 Action::WeekOfYearToWeekIso => {
28796 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
28797 if let Expression::WeekOfYear(uf) = e {
28798 Ok(Expression::Function(Box::new(Function::new(
28799 "WEEKISO".to_string(),
28800 vec![uf.this],
28801 ))))
28802 } else {
28803 Ok(e)
28804 }
28805 }
28806 }
28807 })
28808 }
28809
28810 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
28811 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
28812 use crate::expressions::Function;
28813 match unit {
28814 "DAY" => {
28815 // DATE(x)
28816 Ok(Expression::Function(Box::new(Function::new(
28817 "DATE".to_string(),
28818 vec![expr.clone()],
28819 ))))
28820 }
28821 "WEEK" => {
28822 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
28823 let year_x = Expression::Function(Box::new(Function::new(
28824 "YEAR".to_string(),
28825 vec![expr.clone()],
28826 )));
28827 let week_x = Expression::Function(Box::new(Function::new(
28828 "WEEK".to_string(),
28829 vec![expr.clone(), Expression::number(1)],
28830 )));
28831 let concat_args = vec![
28832 year_x,
28833 Expression::string(" "),
28834 week_x,
28835 Expression::string(" 1"),
28836 ];
28837 let concat = Expression::Function(Box::new(Function::new(
28838 "CONCAT".to_string(),
28839 concat_args,
28840 )));
28841 Ok(Expression::Function(Box::new(Function::new(
28842 "STR_TO_DATE".to_string(),
28843 vec![concat, Expression::string("%Y %u %w")],
28844 ))))
28845 }
28846 "MONTH" => {
28847 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
28848 let year_x = Expression::Function(Box::new(Function::new(
28849 "YEAR".to_string(),
28850 vec![expr.clone()],
28851 )));
28852 let month_x = Expression::Function(Box::new(Function::new(
28853 "MONTH".to_string(),
28854 vec![expr.clone()],
28855 )));
28856 let concat_args = vec![
28857 year_x,
28858 Expression::string(" "),
28859 month_x,
28860 Expression::string(" 1"),
28861 ];
28862 let concat = Expression::Function(Box::new(Function::new(
28863 "CONCAT".to_string(),
28864 concat_args,
28865 )));
28866 Ok(Expression::Function(Box::new(Function::new(
28867 "STR_TO_DATE".to_string(),
28868 vec![concat, Expression::string("%Y %c %e")],
28869 ))))
28870 }
28871 "QUARTER" => {
28872 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
28873 let year_x = Expression::Function(Box::new(Function::new(
28874 "YEAR".to_string(),
28875 vec![expr.clone()],
28876 )));
28877 let quarter_x = Expression::Function(Box::new(Function::new(
28878 "QUARTER".to_string(),
28879 vec![expr.clone()],
28880 )));
28881 // QUARTER(x) * 3 - 2
28882 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
28883 left: quarter_x,
28884 right: Expression::number(3),
28885 left_comments: Vec::new(),
28886 operator_comments: Vec::new(),
28887 trailing_comments: Vec::new(),
28888 inferred_type: None,
28889 }));
28890 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
28891 left: mul,
28892 right: Expression::number(2),
28893 left_comments: Vec::new(),
28894 operator_comments: Vec::new(),
28895 trailing_comments: Vec::new(),
28896 inferred_type: None,
28897 }));
28898 let concat_args = vec![
28899 year_x,
28900 Expression::string(" "),
28901 sub,
28902 Expression::string(" 1"),
28903 ];
28904 let concat = Expression::Function(Box::new(Function::new(
28905 "CONCAT".to_string(),
28906 concat_args,
28907 )));
28908 Ok(Expression::Function(Box::new(Function::new(
28909 "STR_TO_DATE".to_string(),
28910 vec![concat, Expression::string("%Y %c %e")],
28911 ))))
28912 }
28913 "YEAR" => {
28914 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
28915 let year_x = Expression::Function(Box::new(Function::new(
28916 "YEAR".to_string(),
28917 vec![expr.clone()],
28918 )));
28919 let concat_args = vec![year_x, Expression::string(" 1 1")];
28920 let concat = Expression::Function(Box::new(Function::new(
28921 "CONCAT".to_string(),
28922 concat_args,
28923 )));
28924 Ok(Expression::Function(Box::new(Function::new(
28925 "STR_TO_DATE".to_string(),
28926 vec![concat, Expression::string("%Y %c %e")],
28927 ))))
28928 }
28929 _ => {
28930 // Unsupported unit -> keep as DATE_TRUNC
28931 Ok(Expression::Function(Box::new(Function::new(
28932 "DATE_TRUNC".to_string(),
28933 vec![Expression::string(unit), expr.clone()],
28934 ))))
28935 }
28936 }
28937 }
28938
28939 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
28940 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
28941 use crate::expressions::DataType;
28942 match dt {
28943 DataType::VarChar { .. } | DataType::Char { .. } => true,
28944 DataType::Struct { fields, .. } => fields
28945 .iter()
28946 .any(|f| Self::has_varchar_char_type(&f.data_type)),
28947 _ => false,
28948 }
28949 }
28950
28951 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
28952 fn normalize_varchar_to_string(
28953 dt: crate::expressions::DataType,
28954 ) -> crate::expressions::DataType {
28955 use crate::expressions::DataType;
28956 match dt {
28957 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
28958 name: "STRING".to_string(),
28959 },
28960 DataType::Struct { fields, nested } => {
28961 let fields = fields
28962 .into_iter()
28963 .map(|mut f| {
28964 f.data_type = Self::normalize_varchar_to_string(f.data_type);
28965 f
28966 })
28967 .collect();
28968 DataType::Struct { fields, nested }
28969 }
28970 other => other,
28971 }
28972 }
28973
28974 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
28975 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
28976 if let Expression::Literal(ref lit) = expr {
28977 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
28978 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
28979 let trimmed = s.trim();
28980
28981 // Find where digits end and unit text begins
28982 let digit_end = trimmed
28983 .find(|c: char| !c.is_ascii_digit())
28984 .unwrap_or(trimmed.len());
28985 if digit_end == 0 || digit_end == trimmed.len() {
28986 return expr;
28987 }
28988 let num = &trimmed[..digit_end];
28989 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
28990 if unit_text.is_empty() {
28991 return expr;
28992 }
28993
28994 let known_units = [
28995 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
28996 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
28997 ];
28998 if !known_units.contains(&unit_text.as_str()) {
28999 return expr;
29000 }
29001
29002 let unit_str = unit_text.clone();
29003 // Singularize
29004 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
29005 &unit_str[..unit_str.len() - 1]
29006 } else {
29007 &unit_str
29008 };
29009 let unit = unit_singular;
29010
29011 match target {
29012 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29013 // INTERVAL '2' DAY
29014 let iu = match unit {
29015 "DAY" => crate::expressions::IntervalUnit::Day,
29016 "HOUR" => crate::expressions::IntervalUnit::Hour,
29017 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29018 "SECOND" => crate::expressions::IntervalUnit::Second,
29019 "WEEK" => crate::expressions::IntervalUnit::Week,
29020 "MONTH" => crate::expressions::IntervalUnit::Month,
29021 "YEAR" => crate::expressions::IntervalUnit::Year,
29022 _ => return expr,
29023 };
29024 return Expression::Interval(Box::new(crate::expressions::Interval {
29025 this: Some(Expression::string(num)),
29026 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29027 unit: iu,
29028 use_plural: false,
29029 }),
29030 }));
29031 }
29032 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
29033 // INTERVAL '2 DAYS'
29034 let plural = if num != "1" && !unit_str.ends_with('S') {
29035 format!("{} {}S", num, unit)
29036 } else if unit_str.ends_with('S') {
29037 format!("{} {}", num, unit_str)
29038 } else {
29039 format!("{} {}", num, unit)
29040 };
29041 return Expression::Interval(Box::new(crate::expressions::Interval {
29042 this: Some(Expression::string(&plural)),
29043 unit: None,
29044 }));
29045 }
29046 _ => {
29047 // Spark/Databricks/Hive: INTERVAL '1' DAY
29048 let iu = match unit {
29049 "DAY" => crate::expressions::IntervalUnit::Day,
29050 "HOUR" => crate::expressions::IntervalUnit::Hour,
29051 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29052 "SECOND" => crate::expressions::IntervalUnit::Second,
29053 "WEEK" => crate::expressions::IntervalUnit::Week,
29054 "MONTH" => crate::expressions::IntervalUnit::Month,
29055 "YEAR" => crate::expressions::IntervalUnit::Year,
29056 _ => return expr,
29057 };
29058 return Expression::Interval(Box::new(crate::expressions::Interval {
29059 this: Some(Expression::string(num)),
29060 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29061 unit: iu,
29062 use_plural: false,
29063 }),
29064 }));
29065 }
29066 }
29067 }
29068 }
29069 // If it's already an INTERVAL expression, pass through
29070 expr
29071 }
29072
29073 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
29074 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
29075 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
29076 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
29077 fn rewrite_unnest_expansion(
29078 select: &crate::expressions::Select,
29079 target: DialectType,
29080 ) -> Option<crate::expressions::Select> {
29081 use crate::expressions::{
29082 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
29083 UnnestFunc,
29084 };
29085
29086 let index_offset: i64 = match target {
29087 DialectType::Presto | DialectType::Trino => 1,
29088 _ => 0, // BigQuery, Snowflake
29089 };
29090
29091 let if_func_name = match target {
29092 DialectType::Snowflake => "IFF",
29093 _ => "IF",
29094 };
29095
29096 let array_length_func = match target {
29097 DialectType::BigQuery => "ARRAY_LENGTH",
29098 DialectType::Presto | DialectType::Trino => "CARDINALITY",
29099 DialectType::Snowflake => "ARRAY_SIZE",
29100 _ => "ARRAY_LENGTH",
29101 };
29102
29103 let use_table_aliases = matches!(
29104 target,
29105 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
29106 );
29107 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
29108
29109 fn make_col(name: &str, table: Option<&str>) -> Expression {
29110 if let Some(tbl) = table {
29111 Expression::boxed_column(Column {
29112 name: Identifier::new(name.to_string()),
29113 table: Some(Identifier::new(tbl.to_string())),
29114 join_mark: false,
29115 trailing_comments: Vec::new(),
29116 span: None,
29117 inferred_type: None,
29118 })
29119 } else {
29120 Expression::Identifier(Identifier::new(name.to_string()))
29121 }
29122 }
29123
29124 fn make_join(this: Expression) -> Join {
29125 Join {
29126 this,
29127 on: None,
29128 using: Vec::new(),
29129 kind: JoinKind::Cross,
29130 use_inner_keyword: false,
29131 use_outer_keyword: false,
29132 deferred_condition: false,
29133 join_hint: None,
29134 match_condition: None,
29135 pivots: Vec::new(),
29136 comments: Vec::new(),
29137 nesting_group: 0,
29138 directed: false,
29139 }
29140 }
29141
29142 // Collect UNNEST info from SELECT expressions
29143 struct UnnestInfo {
29144 arr_expr: Expression,
29145 col_alias: String,
29146 pos_alias: String,
29147 source_alias: String,
29148 original_expr: Expression,
29149 has_outer_alias: Option<String>,
29150 }
29151
29152 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
29153 let mut col_counter = 0usize;
29154 let mut pos_counter = 1usize;
29155 let mut source_counter = 1usize;
29156
29157 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
29158 match expr {
29159 Expression::Unnest(u) => Some(u.this.clone()),
29160 Expression::Function(f)
29161 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
29162 {
29163 Some(f.args[0].clone())
29164 }
29165 Expression::Alias(a) => extract_unnest_arg(&a.this),
29166 Expression::Add(op)
29167 | Expression::Sub(op)
29168 | Expression::Mul(op)
29169 | Expression::Div(op) => {
29170 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
29171 }
29172 _ => None,
29173 }
29174 }
29175
29176 fn get_alias_name(expr: &Expression) -> Option<String> {
29177 if let Expression::Alias(a) = expr {
29178 Some(a.alias.name.clone())
29179 } else {
29180 None
29181 }
29182 }
29183
29184 for sel_expr in &select.expressions {
29185 if let Some(arr) = extract_unnest_arg(sel_expr) {
29186 col_counter += 1;
29187 pos_counter += 1;
29188 source_counter += 1;
29189
29190 let col_alias = if col_counter == 1 {
29191 "col".to_string()
29192 } else {
29193 format!("col_{}", col_counter)
29194 };
29195 let pos_alias = format!("pos_{}", pos_counter);
29196 let source_alias = format!("_u_{}", source_counter);
29197 let has_outer_alias = get_alias_name(sel_expr);
29198
29199 unnest_infos.push(UnnestInfo {
29200 arr_expr: arr,
29201 col_alias,
29202 pos_alias,
29203 source_alias,
29204 original_expr: sel_expr.clone(),
29205 has_outer_alias,
29206 });
29207 }
29208 }
29209
29210 if unnest_infos.is_empty() {
29211 return None;
29212 }
29213
29214 let series_alias = "pos".to_string();
29215 let series_source_alias = "_u".to_string();
29216 let tbl_ref = if use_table_aliases {
29217 Some(series_source_alias.as_str())
29218 } else {
29219 None
29220 };
29221
29222 // Build new SELECT expressions
29223 let mut new_select_exprs = Vec::new();
29224 for info in &unnest_infos {
29225 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29226 let src_ref = if use_table_aliases {
29227 Some(info.source_alias.as_str())
29228 } else {
29229 None
29230 };
29231
29232 let pos_col = make_col(&series_alias, tbl_ref);
29233 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29234 let col_ref = make_col(actual_col_name, src_ref);
29235
29236 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
29237 pos_col.clone(),
29238 unnest_pos_col.clone(),
29239 )));
29240 let mut if_args = vec![eq_cond, col_ref];
29241 if null_third_arg {
29242 if_args.push(Expression::Null(crate::expressions::Null));
29243 }
29244
29245 let if_expr =
29246 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
29247 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
29248
29249 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
29250 final_expr,
29251 Identifier::new(actual_col_name.clone()),
29252 ))));
29253 }
29254
29255 // Build array size expressions for GREATEST
29256 let size_exprs: Vec<Expression> = unnest_infos
29257 .iter()
29258 .map(|info| {
29259 Expression::Function(Box::new(Function::new(
29260 array_length_func.to_string(),
29261 vec![info.arr_expr.clone()],
29262 )))
29263 })
29264 .collect();
29265
29266 let greatest =
29267 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
29268
29269 let series_end = if index_offset == 0 {
29270 Expression::Sub(Box::new(BinaryOp::new(
29271 greatest,
29272 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29273 )))
29274 } else {
29275 greatest
29276 };
29277
29278 // Build the position array source
29279 let series_unnest_expr = match target {
29280 DialectType::BigQuery => {
29281 let gen_array = Expression::Function(Box::new(Function::new(
29282 "GENERATE_ARRAY".to_string(),
29283 vec![
29284 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29285 series_end,
29286 ],
29287 )));
29288 Expression::Unnest(Box::new(UnnestFunc {
29289 this: gen_array,
29290 expressions: Vec::new(),
29291 with_ordinality: false,
29292 alias: None,
29293 offset_alias: None,
29294 }))
29295 }
29296 DialectType::Presto | DialectType::Trino => {
29297 let sequence = Expression::Function(Box::new(Function::new(
29298 "SEQUENCE".to_string(),
29299 vec![
29300 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29301 series_end,
29302 ],
29303 )));
29304 Expression::Unnest(Box::new(UnnestFunc {
29305 this: sequence,
29306 expressions: Vec::new(),
29307 with_ordinality: false,
29308 alias: None,
29309 offset_alias: None,
29310 }))
29311 }
29312 DialectType::Snowflake => {
29313 let range_end = Expression::Add(Box::new(BinaryOp::new(
29314 Expression::Paren(Box::new(crate::expressions::Paren {
29315 this: series_end,
29316 trailing_comments: Vec::new(),
29317 })),
29318 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29319 )));
29320 let gen_range = Expression::Function(Box::new(Function::new(
29321 "ARRAY_GENERATE_RANGE".to_string(),
29322 vec![
29323 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29324 range_end,
29325 ],
29326 )));
29327 let flatten_arg =
29328 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29329 name: Identifier::new("INPUT".to_string()),
29330 value: gen_range,
29331 separator: crate::expressions::NamedArgSeparator::DArrow,
29332 }));
29333 let flatten = Expression::Function(Box::new(Function::new(
29334 "FLATTEN".to_string(),
29335 vec![flatten_arg],
29336 )));
29337 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
29338 }
29339 _ => return None,
29340 };
29341
29342 // Build series alias expression
29343 let series_alias_expr = if use_table_aliases {
29344 let col_aliases = if matches!(target, DialectType::Snowflake) {
29345 vec![
29346 Identifier::new("seq".to_string()),
29347 Identifier::new("key".to_string()),
29348 Identifier::new("path".to_string()),
29349 Identifier::new("index".to_string()),
29350 Identifier::new(series_alias.clone()),
29351 Identifier::new("this".to_string()),
29352 ]
29353 } else {
29354 vec![Identifier::new(series_alias.clone())]
29355 };
29356 Expression::Alias(Box::new(Alias {
29357 this: series_unnest_expr,
29358 alias: Identifier::new(series_source_alias.clone()),
29359 column_aliases: col_aliases,
29360 pre_alias_comments: Vec::new(),
29361 trailing_comments: Vec::new(),
29362 inferred_type: None,
29363 }))
29364 } else {
29365 Expression::Alias(Box::new(Alias::new(
29366 series_unnest_expr,
29367 Identifier::new(series_alias.clone()),
29368 )))
29369 };
29370
29371 // Build CROSS JOINs for each UNNEST
29372 let mut joins = Vec::new();
29373 for info in &unnest_infos {
29374 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29375
29376 let unnest_join_expr = match target {
29377 DialectType::BigQuery => {
29378 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
29379 let unnest = UnnestFunc {
29380 this: info.arr_expr.clone(),
29381 expressions: Vec::new(),
29382 with_ordinality: true,
29383 alias: Some(Identifier::new(actual_col_name.clone())),
29384 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
29385 };
29386 Expression::Unnest(Box::new(unnest))
29387 }
29388 DialectType::Presto | DialectType::Trino => {
29389 let unnest = UnnestFunc {
29390 this: info.arr_expr.clone(),
29391 expressions: Vec::new(),
29392 with_ordinality: true,
29393 alias: None,
29394 offset_alias: None,
29395 };
29396 Expression::Alias(Box::new(Alias {
29397 this: Expression::Unnest(Box::new(unnest)),
29398 alias: Identifier::new(info.source_alias.clone()),
29399 column_aliases: vec![
29400 Identifier::new(actual_col_name.clone()),
29401 Identifier::new(info.pos_alias.clone()),
29402 ],
29403 pre_alias_comments: Vec::new(),
29404 trailing_comments: Vec::new(),
29405 inferred_type: None,
29406 }))
29407 }
29408 DialectType::Snowflake => {
29409 let flatten_arg =
29410 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29411 name: Identifier::new("INPUT".to_string()),
29412 value: info.arr_expr.clone(),
29413 separator: crate::expressions::NamedArgSeparator::DArrow,
29414 }));
29415 let flatten = Expression::Function(Box::new(Function::new(
29416 "FLATTEN".to_string(),
29417 vec![flatten_arg],
29418 )));
29419 let table_fn = Expression::Function(Box::new(Function::new(
29420 "TABLE".to_string(),
29421 vec![flatten],
29422 )));
29423 Expression::Alias(Box::new(Alias {
29424 this: table_fn,
29425 alias: Identifier::new(info.source_alias.clone()),
29426 column_aliases: vec![
29427 Identifier::new("seq".to_string()),
29428 Identifier::new("key".to_string()),
29429 Identifier::new("path".to_string()),
29430 Identifier::new(info.pos_alias.clone()),
29431 Identifier::new(actual_col_name.clone()),
29432 Identifier::new("this".to_string()),
29433 ],
29434 pre_alias_comments: Vec::new(),
29435 trailing_comments: Vec::new(),
29436 inferred_type: None,
29437 }))
29438 }
29439 _ => return None,
29440 };
29441
29442 joins.push(make_join(unnest_join_expr));
29443 }
29444
29445 // Build WHERE clause
29446 let mut where_conditions: Vec<Expression> = Vec::new();
29447 for info in &unnest_infos {
29448 let src_ref = if use_table_aliases {
29449 Some(info.source_alias.as_str())
29450 } else {
29451 None
29452 };
29453 let pos_col = make_col(&series_alias, tbl_ref);
29454 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29455
29456 let arr_size = Expression::Function(Box::new(Function::new(
29457 array_length_func.to_string(),
29458 vec![info.arr_expr.clone()],
29459 )));
29460
29461 let size_ref = if index_offset == 0 {
29462 Expression::Paren(Box::new(crate::expressions::Paren {
29463 this: Expression::Sub(Box::new(BinaryOp::new(
29464 arr_size,
29465 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29466 ))),
29467 trailing_comments: Vec::new(),
29468 }))
29469 } else {
29470 arr_size
29471 };
29472
29473 let eq = Expression::Eq(Box::new(BinaryOp::new(
29474 pos_col.clone(),
29475 unnest_pos_col.clone(),
29476 )));
29477 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
29478 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
29479 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
29480 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
29481 this: and_cond,
29482 trailing_comments: Vec::new(),
29483 }));
29484 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
29485
29486 where_conditions.push(or_cond);
29487 }
29488
29489 let where_expr = if where_conditions.len() == 1 {
29490 // Single condition: no parens needed
29491 where_conditions.into_iter().next().unwrap()
29492 } else {
29493 // Multiple conditions: wrap each OR in parens, then combine with AND
29494 let wrap = |e: Expression| {
29495 Expression::Paren(Box::new(crate::expressions::Paren {
29496 this: e,
29497 trailing_comments: Vec::new(),
29498 }))
29499 };
29500 let mut iter = where_conditions.into_iter();
29501 let first = wrap(iter.next().unwrap());
29502 let second = wrap(iter.next().unwrap());
29503 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
29504 this: Expression::And(Box::new(BinaryOp::new(first, second))),
29505 trailing_comments: Vec::new(),
29506 }));
29507 for cond in iter {
29508 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
29509 }
29510 combined
29511 };
29512
29513 // Build the new SELECT
29514 let mut new_select = select.clone();
29515 new_select.expressions = new_select_exprs;
29516
29517 if new_select.from.is_some() {
29518 let mut all_joins = vec![make_join(series_alias_expr)];
29519 all_joins.extend(joins);
29520 new_select.joins.extend(all_joins);
29521 } else {
29522 new_select.from = Some(From {
29523 expressions: vec![series_alias_expr],
29524 });
29525 new_select.joins.extend(joins);
29526 }
29527
29528 if let Some(ref existing_where) = new_select.where_clause {
29529 let combined = Expression::And(Box::new(BinaryOp::new(
29530 existing_where.this.clone(),
29531 where_expr,
29532 )));
29533 new_select.where_clause = Some(crate::expressions::Where { this: combined });
29534 } else {
29535 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
29536 }
29537
29538 Some(new_select)
29539 }
29540
29541 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
29542 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
29543 match original {
29544 Expression::Unnest(_) => replacement.clone(),
29545 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
29546 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
29547 Expression::Add(op) => {
29548 let left = Self::replace_unnest_with_if(&op.left, replacement);
29549 let right = Self::replace_unnest_with_if(&op.right, replacement);
29550 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
29551 }
29552 Expression::Sub(op) => {
29553 let left = Self::replace_unnest_with_if(&op.left, replacement);
29554 let right = Self::replace_unnest_with_if(&op.right, replacement);
29555 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
29556 }
29557 Expression::Mul(op) => {
29558 let left = Self::replace_unnest_with_if(&op.left, replacement);
29559 let right = Self::replace_unnest_with_if(&op.right, replacement);
29560 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
29561 }
29562 Expression::Div(op) => {
29563 let left = Self::replace_unnest_with_if(&op.left, replacement);
29564 let right = Self::replace_unnest_with_if(&op.right, replacement);
29565 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
29566 }
29567 _ => original.clone(),
29568 }
29569 }
29570
29571 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
29572 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
29573 fn decompose_json_path(path: &str) -> Vec<String> {
29574 let mut parts = Vec::new();
29575 let path = if path.starts_with("$.") {
29576 &path[2..]
29577 } else if path.starts_with('$') {
29578 &path[1..]
29579 } else {
29580 path
29581 };
29582 if path.is_empty() {
29583 return parts;
29584 }
29585 let mut current = String::new();
29586 let chars: Vec<char> = path.chars().collect();
29587 let mut i = 0;
29588 while i < chars.len() {
29589 match chars[i] {
29590 '.' => {
29591 if !current.is_empty() {
29592 parts.push(current.clone());
29593 current.clear();
29594 }
29595 i += 1;
29596 }
29597 '[' => {
29598 if !current.is_empty() {
29599 parts.push(current.clone());
29600 current.clear();
29601 }
29602 i += 1;
29603 let mut bracket_content = String::new();
29604 while i < chars.len() && chars[i] != ']' {
29605 if chars[i] == '"' || chars[i] == '\'' {
29606 let quote = chars[i];
29607 i += 1;
29608 while i < chars.len() && chars[i] != quote {
29609 bracket_content.push(chars[i]);
29610 i += 1;
29611 }
29612 if i < chars.len() {
29613 i += 1;
29614 }
29615 } else {
29616 bracket_content.push(chars[i]);
29617 i += 1;
29618 }
29619 }
29620 if i < chars.len() {
29621 i += 1;
29622 }
29623 if bracket_content != "*" {
29624 parts.push(bracket_content);
29625 }
29626 }
29627 _ => {
29628 current.push(chars[i]);
29629 i += 1;
29630 }
29631 }
29632 }
29633 if !current.is_empty() {
29634 parts.push(current);
29635 }
29636 parts
29637 }
29638
29639 /// Strip `$` prefix from a JSON path, keeping the rest.
29640 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
29641 fn strip_json_dollar_prefix(path: &str) -> String {
29642 if path.starts_with("$.") {
29643 path[2..].to_string()
29644 } else if path.starts_with('$') {
29645 path[1..].to_string()
29646 } else {
29647 path.to_string()
29648 }
29649 }
29650
29651 /// Strip `[*]` wildcards from a JSON path.
29652 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
29653 fn strip_json_wildcards(path: &str) -> String {
29654 path.replace("[*]", "")
29655 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
29656 .trim_end_matches('.')
29657 .to_string()
29658 }
29659
29660 /// Convert bracket notation to dot notation for JSON paths.
29661 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
29662 fn bracket_to_dot_notation(path: &str) -> String {
29663 let mut result = String::new();
29664 let chars: Vec<char> = path.chars().collect();
29665 let mut i = 0;
29666 while i < chars.len() {
29667 if chars[i] == '[' {
29668 // Read bracket content
29669 i += 1;
29670 let mut bracket_content = String::new();
29671 let mut is_quoted = false;
29672 let mut _quote_char = '"';
29673 while i < chars.len() && chars[i] != ']' {
29674 if chars[i] == '"' || chars[i] == '\'' {
29675 is_quoted = true;
29676 _quote_char = chars[i];
29677 i += 1;
29678 while i < chars.len() && chars[i] != _quote_char {
29679 bracket_content.push(chars[i]);
29680 i += 1;
29681 }
29682 if i < chars.len() {
29683 i += 1;
29684 }
29685 } else {
29686 bracket_content.push(chars[i]);
29687 i += 1;
29688 }
29689 }
29690 if i < chars.len() {
29691 i += 1;
29692 } // skip ]
29693 if bracket_content == "*" {
29694 // Keep wildcard as-is
29695 result.push_str("[*]");
29696 } else if is_quoted {
29697 // Quoted bracket -> dot notation with quotes
29698 result.push('.');
29699 result.push('"');
29700 result.push_str(&bracket_content);
29701 result.push('"');
29702 } else {
29703 // Numeric index -> keep as bracket
29704 result.push('[');
29705 result.push_str(&bracket_content);
29706 result.push(']');
29707 }
29708 } else {
29709 result.push(chars[i]);
29710 i += 1;
29711 }
29712 }
29713 result
29714 }
29715
29716 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
29717 /// `$["a b"]` -> `$['a b']`
29718 fn bracket_to_single_quotes(path: &str) -> String {
29719 let mut result = String::new();
29720 let chars: Vec<char> = path.chars().collect();
29721 let mut i = 0;
29722 while i < chars.len() {
29723 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
29724 result.push('[');
29725 result.push('\'');
29726 i += 2; // skip [ and "
29727 while i < chars.len() && chars[i] != '"' {
29728 result.push(chars[i]);
29729 i += 1;
29730 }
29731 if i < chars.len() {
29732 i += 1;
29733 } // skip closing "
29734 result.push('\'');
29735 } else {
29736 result.push(chars[i]);
29737 i += 1;
29738 }
29739 }
29740 result
29741 }
29742
29743 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
29744 /// or PostgreSQL #temp -> TEMPORARY.
29745 /// Also strips # from INSERT INTO #table for non-TSQL targets.
29746 fn transform_select_into(
29747 expr: Expression,
29748 _source: DialectType,
29749 target: DialectType,
29750 ) -> Expression {
29751 use crate::expressions::{CreateTable, Expression, TableRef};
29752
29753 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
29754 if let Expression::Insert(ref insert) = expr {
29755 if insert.table.name.name.starts_with('#')
29756 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
29757 {
29758 let mut new_insert = insert.clone();
29759 new_insert.table.name.name =
29760 insert.table.name.name.trim_start_matches('#').to_string();
29761 return Expression::Insert(new_insert);
29762 }
29763 return expr;
29764 }
29765
29766 if let Expression::Select(ref select) = expr {
29767 if let Some(ref into) = select.into {
29768 let table_name_raw = match &into.this {
29769 Expression::Table(tr) => tr.name.name.clone(),
29770 Expression::Identifier(id) => id.name.clone(),
29771 _ => String::new(),
29772 };
29773 let is_temp = table_name_raw.starts_with('#') || into.temporary;
29774 let clean_name = table_name_raw.trim_start_matches('#').to_string();
29775
29776 match target {
29777 DialectType::DuckDB | DialectType::Snowflake => {
29778 // SELECT INTO -> CREATE TABLE AS SELECT
29779 let mut new_select = select.clone();
29780 new_select.into = None;
29781 let ct = CreateTable {
29782 name: TableRef::new(clean_name),
29783 on_cluster: None,
29784 columns: Vec::new(),
29785 constraints: Vec::new(),
29786 if_not_exists: false,
29787 temporary: is_temp,
29788 or_replace: false,
29789 table_modifier: None,
29790 as_select: Some(Expression::Select(new_select)),
29791 as_select_parenthesized: false,
29792 on_commit: None,
29793 clone_source: None,
29794 clone_at_clause: None,
29795 shallow_clone: false,
29796 is_copy: false,
29797 leading_comments: Vec::new(),
29798 with_properties: Vec::new(),
29799 teradata_post_name_options: Vec::new(),
29800 with_data: None,
29801 with_statistics: None,
29802 teradata_indexes: Vec::new(),
29803 with_cte: None,
29804 properties: Vec::new(),
29805 partition_of: None,
29806 post_table_properties: Vec::new(),
29807 mysql_table_options: Vec::new(),
29808 inherits: Vec::new(),
29809 on_property: None,
29810 copy_grants: false,
29811 using_template: None,
29812 rollup: None,
29813 uuid: None,
29814 };
29815 return Expression::CreateTable(Box::new(ct));
29816 }
29817 DialectType::PostgreSQL | DialectType::Redshift => {
29818 // PostgreSQL: #foo -> INTO TEMPORARY foo
29819 if is_temp && !into.temporary {
29820 let mut new_select = select.clone();
29821 let mut new_into = into.clone();
29822 new_into.temporary = true;
29823 new_into.unlogged = false;
29824 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
29825 new_select.into = Some(new_into);
29826 Expression::Select(new_select)
29827 } else {
29828 expr
29829 }
29830 }
29831 _ => expr,
29832 }
29833 } else {
29834 expr
29835 }
29836 } else {
29837 expr
29838 }
29839 }
29840
29841 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
29842 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
29843 fn transform_create_table_properties(
29844 ct: &mut crate::expressions::CreateTable,
29845 _source: DialectType,
29846 target: DialectType,
29847 ) {
29848 use crate::expressions::{
29849 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
29850 Properties,
29851 };
29852
29853 // Helper to convert a raw property value string to the correct Expression
29854 let value_to_expr = |v: &str| -> Expression {
29855 let trimmed = v.trim();
29856 // Check if it's a quoted string (starts and ends with ')
29857 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
29858 Expression::Literal(Box::new(Literal::String(
29859 trimmed[1..trimmed.len() - 1].to_string(),
29860 )))
29861 }
29862 // Check if it's a number
29863 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
29864 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
29865 }
29866 // Check if it's ARRAY[...] or ARRAY(...)
29867 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29868 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
29869 let inner = trimmed
29870 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
29871 .trim_start_matches('[')
29872 .trim_start_matches('(')
29873 .trim_end_matches(']')
29874 .trim_end_matches(')');
29875 let elements: Vec<Expression> = inner
29876 .split(',')
29877 .map(|e| {
29878 let elem = e.trim().trim_matches('\'');
29879 Expression::Literal(Box::new(Literal::String(elem.to_string())))
29880 })
29881 .collect();
29882 Expression::Function(Box::new(crate::expressions::Function::new(
29883 "ARRAY".to_string(),
29884 elements,
29885 )))
29886 }
29887 // Otherwise, just output as identifier (unquoted)
29888 else {
29889 Expression::Identifier(Identifier::new(trimmed.to_string()))
29890 }
29891 };
29892
29893 if ct.with_properties.is_empty() && ct.properties.is_empty() {
29894 return;
29895 }
29896
29897 // Handle Presto-style WITH properties
29898 if !ct.with_properties.is_empty() {
29899 // Extract FORMAT property and remaining properties
29900 let mut format_value: Option<String> = None;
29901 let mut partitioned_by: Option<String> = None;
29902 let mut other_props: Vec<(String, String)> = Vec::new();
29903
29904 for (key, value) in ct.with_properties.drain(..) {
29905 if key.eq_ignore_ascii_case("FORMAT") {
29906 // Strip surrounding quotes from value if present
29907 format_value = Some(value.trim_matches('\'').to_string());
29908 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
29909 partitioned_by = Some(value);
29910 } else {
29911 other_props.push((key, value));
29912 }
29913 }
29914
29915 match target {
29916 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29917 // Presto: keep WITH properties but lowercase 'format' key
29918 if let Some(fmt) = format_value {
29919 ct.with_properties
29920 .push(("format".to_string(), format!("'{}'", fmt)));
29921 }
29922 if let Some(part) = partitioned_by {
29923 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
29924 let trimmed = part.trim();
29925 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
29926 // Also handle ARRAY['...'] format - keep as-is
29927 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29928 ct.with_properties
29929 .push(("PARTITIONED_BY".to_string(), part));
29930 } else {
29931 // Parse column names from the parenthesized list
29932 let cols: Vec<&str> = inner
29933 .split(',')
29934 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
29935 .collect();
29936 let array_val = format!(
29937 "ARRAY[{}]",
29938 cols.iter()
29939 .map(|c| format!("'{}'", c))
29940 .collect::<Vec<_>>()
29941 .join(", ")
29942 );
29943 ct.with_properties
29944 .push(("PARTITIONED_BY".to_string(), array_val));
29945 }
29946 }
29947 ct.with_properties.extend(other_props);
29948 }
29949 DialectType::Hive => {
29950 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
29951 if let Some(fmt) = format_value {
29952 ct.properties.push(Expression::FileFormatProperty(Box::new(
29953 FileFormatProperty {
29954 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
29955 expressions: vec![],
29956 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
29957 value: true,
29958 }))),
29959 },
29960 )));
29961 }
29962 if let Some(_part) = partitioned_by {
29963 // PARTITIONED_BY handling is complex - move columns to partitioned by
29964 // For now, the partition columns are extracted from the column list
29965 Self::apply_partitioned_by(ct, &_part, target);
29966 }
29967 if !other_props.is_empty() {
29968 let eq_exprs: Vec<Expression> = other_props
29969 .into_iter()
29970 .map(|(k, v)| {
29971 Expression::Eq(Box::new(BinaryOp::new(
29972 Expression::Literal(Box::new(Literal::String(k))),
29973 value_to_expr(&v),
29974 )))
29975 })
29976 .collect();
29977 ct.properties
29978 .push(Expression::Properties(Box::new(Properties {
29979 expressions: eq_exprs,
29980 })));
29981 }
29982 }
29983 DialectType::Spark | DialectType::Databricks => {
29984 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
29985 if let Some(fmt) = format_value {
29986 ct.properties.push(Expression::FileFormatProperty(Box::new(
29987 FileFormatProperty {
29988 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
29989 expressions: vec![],
29990 hive_format: None, // None means USING syntax
29991 },
29992 )));
29993 }
29994 if let Some(_part) = partitioned_by {
29995 Self::apply_partitioned_by(ct, &_part, target);
29996 }
29997 if !other_props.is_empty() {
29998 let eq_exprs: Vec<Expression> = other_props
29999 .into_iter()
30000 .map(|(k, v)| {
30001 Expression::Eq(Box::new(BinaryOp::new(
30002 Expression::Literal(Box::new(Literal::String(k))),
30003 value_to_expr(&v),
30004 )))
30005 })
30006 .collect();
30007 ct.properties
30008 .push(Expression::Properties(Box::new(Properties {
30009 expressions: eq_exprs,
30010 })));
30011 }
30012 }
30013 DialectType::DuckDB => {
30014 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
30015 // Keep nothing
30016 }
30017 _ => {
30018 // For other dialects, keep WITH properties as-is
30019 if let Some(fmt) = format_value {
30020 ct.with_properties
30021 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
30022 }
30023 if let Some(part) = partitioned_by {
30024 ct.with_properties
30025 .push(("PARTITIONED_BY".to_string(), part));
30026 }
30027 ct.with_properties.extend(other_props);
30028 }
30029 }
30030 }
30031
30032 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
30033 // and Hive STORED AS -> Presto WITH (format=...) conversion
30034 if !ct.properties.is_empty() {
30035 let is_presto_target = matches!(
30036 target,
30037 DialectType::Presto | DialectType::Trino | DialectType::Athena
30038 );
30039 let is_duckdb_target = matches!(target, DialectType::DuckDB);
30040
30041 if is_presto_target || is_duckdb_target {
30042 let mut new_properties = Vec::new();
30043 for prop in ct.properties.drain(..) {
30044 match &prop {
30045 Expression::FileFormatProperty(ffp) => {
30046 if is_presto_target {
30047 // Convert STORED AS/USING to WITH (format=...)
30048 if let Some(ref fmt_expr) = ffp.this {
30049 let fmt_str = match fmt_expr.as_ref() {
30050 Expression::Identifier(id) => id.name.clone(),
30051 Expression::Literal(lit)
30052 if matches!(lit.as_ref(), Literal::String(_)) =>
30053 {
30054 let Literal::String(s) = lit.as_ref() else {
30055 unreachable!()
30056 };
30057 s.clone()
30058 }
30059 _ => {
30060 new_properties.push(prop);
30061 continue;
30062 }
30063 };
30064 ct.with_properties
30065 .push(("format".to_string(), format!("'{}'", fmt_str)));
30066 }
30067 }
30068 // DuckDB: just strip file format properties
30069 }
30070 // Convert TBLPROPERTIES to WITH properties for Presto target
30071 Expression::Properties(props) if is_presto_target => {
30072 for expr in &props.expressions {
30073 if let Expression::Eq(eq) = expr {
30074 // Extract key and value from the Eq expression
30075 let key = match &eq.left {
30076 Expression::Literal(lit)
30077 if matches!(lit.as_ref(), Literal::String(_)) =>
30078 {
30079 let Literal::String(s) = lit.as_ref() else {
30080 unreachable!()
30081 };
30082 s.clone()
30083 }
30084 Expression::Identifier(id) => id.name.clone(),
30085 _ => continue,
30086 };
30087 let value = match &eq.right {
30088 Expression::Literal(lit)
30089 if matches!(lit.as_ref(), Literal::String(_)) =>
30090 {
30091 let Literal::String(s) = lit.as_ref() else {
30092 unreachable!()
30093 };
30094 format!("'{}'", s)
30095 }
30096 Expression::Literal(lit)
30097 if matches!(lit.as_ref(), Literal::Number(_)) =>
30098 {
30099 let Literal::Number(n) = lit.as_ref() else {
30100 unreachable!()
30101 };
30102 n.clone()
30103 }
30104 Expression::Identifier(id) => id.name.clone(),
30105 _ => continue,
30106 };
30107 ct.with_properties.push((key, value));
30108 }
30109 }
30110 }
30111 // Convert PartitionedByProperty for Presto target
30112 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
30113 // Check if it contains ColumnDef expressions (Hive-style with types)
30114 if let Expression::Tuple(ref tuple) = *pbp.this {
30115 let mut col_names: Vec<String> = Vec::new();
30116 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
30117 let mut has_col_defs = false;
30118 for expr in &tuple.expressions {
30119 if let Expression::ColumnDef(ref cd) = expr {
30120 has_col_defs = true;
30121 col_names.push(cd.name.name.clone());
30122 col_defs.push(*cd.clone());
30123 } else if let Expression::Column(ref col) = expr {
30124 col_names.push(col.name.name.clone());
30125 } else if let Expression::Identifier(ref id) = expr {
30126 col_names.push(id.name.clone());
30127 } else {
30128 // For function expressions like MONTHS(y), serialize to SQL
30129 let generic = Dialect::get(DialectType::Generic);
30130 if let Ok(sql) = generic.generate(expr) {
30131 col_names.push(sql);
30132 }
30133 }
30134 }
30135 if has_col_defs {
30136 // Merge partition column defs into the main column list
30137 for cd in col_defs {
30138 ct.columns.push(cd);
30139 }
30140 }
30141 if !col_names.is_empty() {
30142 // Add PARTITIONED_BY property
30143 let array_val = format!(
30144 "ARRAY[{}]",
30145 col_names
30146 .iter()
30147 .map(|n| format!("'{}'", n))
30148 .collect::<Vec<_>>()
30149 .join(", ")
30150 );
30151 ct.with_properties
30152 .push(("PARTITIONED_BY".to_string(), array_val));
30153 }
30154 }
30155 // Skip - don't keep in properties
30156 }
30157 _ => {
30158 if !is_duckdb_target {
30159 new_properties.push(prop);
30160 }
30161 }
30162 }
30163 }
30164 ct.properties = new_properties;
30165 } else {
30166 // For Hive/Spark targets, unquote format names in STORED AS
30167 for prop in &mut ct.properties {
30168 if let Expression::FileFormatProperty(ref mut ffp) = prop {
30169 if let Some(ref mut fmt_expr) = ffp.this {
30170 if let Expression::Literal(lit) = fmt_expr.as_ref() {
30171 if let Literal::String(s) = lit.as_ref() {
30172 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
30173 let unquoted = s.clone();
30174 *fmt_expr =
30175 Box::new(Expression::Identifier(Identifier::new(unquoted)));
30176 }
30177 }
30178 }
30179 }
30180 }
30181 }
30182 }
30183 }
30184
30185 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
30186 fn apply_partitioned_by(
30187 ct: &mut crate::expressions::CreateTable,
30188 partitioned_by_value: &str,
30189 target: DialectType,
30190 ) {
30191 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
30192
30193 // Parse the ARRAY['col1', 'col2'] value to extract column names
30194 let mut col_names: Vec<String> = Vec::new();
30195 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
30196 let inner = partitioned_by_value
30197 .trim()
30198 .trim_start_matches("ARRAY")
30199 .trim_start_matches('[')
30200 .trim_start_matches('(')
30201 .trim_end_matches(']')
30202 .trim_end_matches(')');
30203 for part in inner.split(',') {
30204 let col = part.trim().trim_matches('\'').trim_matches('"');
30205 if !col.is_empty() {
30206 col_names.push(col.to_string());
30207 }
30208 }
30209
30210 if col_names.is_empty() {
30211 return;
30212 }
30213
30214 if matches!(target, DialectType::Hive) {
30215 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
30216 let mut partition_col_defs = Vec::new();
30217 for col_name in &col_names {
30218 // Find and remove from columns
30219 if let Some(pos) = ct
30220 .columns
30221 .iter()
30222 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
30223 {
30224 let col_def = ct.columns.remove(pos);
30225 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
30226 }
30227 }
30228 if !partition_col_defs.is_empty() {
30229 ct.properties
30230 .push(Expression::PartitionedByProperty(Box::new(
30231 PartitionedByProperty {
30232 this: Box::new(Expression::Tuple(Box::new(Tuple {
30233 expressions: partition_col_defs,
30234 }))),
30235 },
30236 )));
30237 }
30238 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
30239 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
30240 // Use quoted identifiers to match the quoting style of the original column definitions
30241 let partition_exprs: Vec<Expression> = col_names
30242 .iter()
30243 .map(|name| {
30244 // Check if the column exists in the column list and use its quoting
30245 let is_quoted = ct
30246 .columns
30247 .iter()
30248 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
30249 let ident = if is_quoted {
30250 Identifier::quoted(name.clone())
30251 } else {
30252 Identifier::new(name.clone())
30253 };
30254 Expression::boxed_column(Column {
30255 name: ident,
30256 table: None,
30257 join_mark: false,
30258 trailing_comments: Vec::new(),
30259 span: None,
30260 inferred_type: None,
30261 })
30262 })
30263 .collect();
30264 ct.properties
30265 .push(Expression::PartitionedByProperty(Box::new(
30266 PartitionedByProperty {
30267 this: Box::new(Expression::Tuple(Box::new(Tuple {
30268 expressions: partition_exprs,
30269 }))),
30270 },
30271 )));
30272 }
30273 // DuckDB: strip partitioned_by entirely (already handled)
30274 }
30275
30276 /// Convert a DataType to Spark's type string format (using angle brackets)
30277 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
30278 use crate::expressions::DataType;
30279 match dt {
30280 DataType::Int { .. } => "INT".to_string(),
30281 DataType::BigInt { .. } => "BIGINT".to_string(),
30282 DataType::SmallInt { .. } => "SMALLINT".to_string(),
30283 DataType::TinyInt { .. } => "TINYINT".to_string(),
30284 DataType::Float { .. } => "FLOAT".to_string(),
30285 DataType::Double { .. } => "DOUBLE".to_string(),
30286 DataType::Decimal {
30287 precision: Some(p),
30288 scale: Some(s),
30289 } => format!("DECIMAL({}, {})", p, s),
30290 DataType::Decimal {
30291 precision: Some(p), ..
30292 } => format!("DECIMAL({})", p),
30293 DataType::Decimal { .. } => "DECIMAL".to_string(),
30294 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30295 "STRING".to_string()
30296 }
30297 DataType::Char { .. } => "STRING".to_string(),
30298 DataType::Boolean => "BOOLEAN".to_string(),
30299 DataType::Date => "DATE".to_string(),
30300 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30301 DataType::Json | DataType::JsonB => "STRING".to_string(),
30302 DataType::Binary { .. } => "BINARY".to_string(),
30303 DataType::Array { element_type, .. } => {
30304 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
30305 }
30306 DataType::Map {
30307 key_type,
30308 value_type,
30309 } => format!(
30310 "MAP<{}, {}>",
30311 Self::data_type_to_spark_string(key_type),
30312 Self::data_type_to_spark_string(value_type)
30313 ),
30314 DataType::Struct { fields, .. } => {
30315 let field_strs: Vec<String> = fields
30316 .iter()
30317 .map(|f| {
30318 if f.name.is_empty() {
30319 Self::data_type_to_spark_string(&f.data_type)
30320 } else {
30321 format!(
30322 "{}: {}",
30323 f.name,
30324 Self::data_type_to_spark_string(&f.data_type)
30325 )
30326 }
30327 })
30328 .collect();
30329 format!("STRUCT<{}>", field_strs.join(", "))
30330 }
30331 DataType::Custom { name } => name.clone(),
30332 _ => format!("{:?}", dt),
30333 }
30334 }
30335
30336 /// Extract value and unit from an Interval expression
30337 /// Returns (value_expression, IntervalUnit)
30338 fn extract_interval_parts(
30339 interval_expr: &Expression,
30340 ) -> (Expression, crate::expressions::IntervalUnit) {
30341 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
30342
30343 if let Expression::Interval(iv) = interval_expr {
30344 let val = iv.this.clone().unwrap_or(Expression::number(0));
30345 let unit = match &iv.unit {
30346 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
30347 None => {
30348 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
30349 if let Expression::Literal(lit) = &val {
30350 if let crate::expressions::Literal::String(s) = lit.as_ref() {
30351 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
30352 if parts.len() == 2 {
30353 let unit_str = parts[1].trim().to_ascii_uppercase();
30354 let parsed_unit = match unit_str.as_str() {
30355 "YEAR" | "YEARS" => IntervalUnit::Year,
30356 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
30357 "MONTH" | "MONTHS" => IntervalUnit::Month,
30358 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
30359 "DAY" | "DAYS" => IntervalUnit::Day,
30360 "HOUR" | "HOURS" => IntervalUnit::Hour,
30361 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
30362 "SECOND" | "SECONDS" => IntervalUnit::Second,
30363 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
30364 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
30365 _ => IntervalUnit::Day,
30366 };
30367 // Return just the numeric part as value and parsed unit
30368 return (
30369 Expression::Literal(Box::new(
30370 crate::expressions::Literal::String(parts[0].to_string()),
30371 )),
30372 parsed_unit,
30373 );
30374 }
30375 IntervalUnit::Day
30376 } else {
30377 IntervalUnit::Day
30378 }
30379 } else {
30380 IntervalUnit::Day
30381 }
30382 }
30383 _ => IntervalUnit::Day,
30384 };
30385 (val, unit)
30386 } else {
30387 // Not an interval - pass through
30388 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
30389 }
30390 }
30391
30392 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
30393 fn normalize_bigquery_function(
30394 e: Expression,
30395 source: DialectType,
30396 target: DialectType,
30397 ) -> Result<Expression> {
30398 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
30399
30400 let f = if let Expression::Function(f) = e {
30401 *f
30402 } else {
30403 return Ok(e);
30404 };
30405 let name = f.name.to_ascii_uppercase();
30406 let mut args = f.args;
30407
30408 /// Helper to extract unit string from an identifier, column, or literal expression
30409 fn get_unit_str(expr: &Expression) -> String {
30410 match expr {
30411 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
30412 Expression::Var(v) => v.this.to_ascii_uppercase(),
30413 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
30414 let Literal::String(s) = lit.as_ref() else {
30415 unreachable!()
30416 };
30417 s.to_ascii_uppercase()
30418 }
30419 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
30420 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
30421 Expression::Function(f) => {
30422 let base = f.name.to_ascii_uppercase();
30423 if !f.args.is_empty() {
30424 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
30425 let inner = get_unit_str(&f.args[0]);
30426 format!("{}({})", base, inner)
30427 } else {
30428 base
30429 }
30430 }
30431 _ => "DAY".to_string(),
30432 }
30433 }
30434
30435 /// Parse unit string to IntervalUnit
30436 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
30437 match s {
30438 "YEAR" => crate::expressions::IntervalUnit::Year,
30439 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30440 "MONTH" => crate::expressions::IntervalUnit::Month,
30441 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30442 "DAY" => crate::expressions::IntervalUnit::Day,
30443 "HOUR" => crate::expressions::IntervalUnit::Hour,
30444 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30445 "SECOND" => crate::expressions::IntervalUnit::Second,
30446 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
30447 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
30448 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30449 _ => crate::expressions::IntervalUnit::Day,
30450 }
30451 }
30452
30453 match name.as_str() {
30454 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
30455 // (BigQuery: result = date1 - date2, Standard: result = end - start)
30456 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
30457 let date1 = args.remove(0);
30458 let date2 = args.remove(0);
30459 let unit_expr = args.remove(0);
30460 let unit_str = get_unit_str(&unit_expr);
30461
30462 if matches!(target, DialectType::BigQuery) {
30463 // BigQuery -> BigQuery: just uppercase the unit
30464 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
30465 return Ok(Expression::Function(Box::new(Function::new(
30466 f.name,
30467 vec![date1, date2, unit],
30468 ))));
30469 }
30470
30471 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
30472 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
30473 if matches!(target, DialectType::Snowflake) {
30474 return Ok(Expression::TimestampDiff(Box::new(
30475 crate::expressions::TimestampDiff {
30476 this: Box::new(date2),
30477 expression: Box::new(date1),
30478 unit: Some(unit_str),
30479 },
30480 )));
30481 }
30482
30483 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
30484 if matches!(target, DialectType::DuckDB) {
30485 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
30486 // CAST to TIME
30487 let cast_fn = |e: Expression| -> Expression {
30488 match e {
30489 Expression::Literal(lit)
30490 if matches!(lit.as_ref(), Literal::String(_)) =>
30491 {
30492 let Literal::String(s) = lit.as_ref() else {
30493 unreachable!()
30494 };
30495 Expression::Cast(Box::new(Cast {
30496 this: Expression::Literal(Box::new(Literal::String(
30497 s.clone(),
30498 ))),
30499 to: DataType::Custom {
30500 name: "TIME".to_string(),
30501 },
30502 trailing_comments: vec![],
30503 double_colon_syntax: false,
30504 format: None,
30505 default: None,
30506 inferred_type: None,
30507 }))
30508 }
30509 other => other,
30510 }
30511 };
30512 (cast_fn(date1), cast_fn(date2))
30513 } else if name == "DATETIME_DIFF" {
30514 // CAST to TIMESTAMP
30515 (
30516 Self::ensure_cast_timestamp(date1),
30517 Self::ensure_cast_timestamp(date2),
30518 )
30519 } else {
30520 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
30521 (
30522 Self::ensure_cast_timestamptz(date1),
30523 Self::ensure_cast_timestamptz(date2),
30524 )
30525 };
30526 return Ok(Expression::Function(Box::new(Function::new(
30527 "DATE_DIFF".to_string(),
30528 vec![
30529 Expression::Literal(Box::new(Literal::String(unit_str))),
30530 cast_d2,
30531 cast_d1,
30532 ],
30533 ))));
30534 }
30535
30536 // Convert to standard TIMESTAMPDIFF(unit, start, end)
30537 let unit = Expression::Identifier(Identifier::new(unit_str));
30538 Ok(Expression::Function(Box::new(Function::new(
30539 "TIMESTAMPDIFF".to_string(),
30540 vec![unit, date2, date1],
30541 ))))
30542 }
30543
30544 // DATEDIFF(unit, start, end) -> target-specific form
30545 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
30546 "DATEDIFF" if args.len() == 3 => {
30547 let arg0 = args.remove(0);
30548 let arg1 = args.remove(0);
30549 let arg2 = args.remove(0);
30550 let unit_str = get_unit_str(&arg0);
30551
30552 // Redshift DATEDIFF(unit, start, end) order: result = end - start
30553 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
30554 // TSQL DATEDIFF(unit, start, end) order: result = end - start
30555
30556 if matches!(target, DialectType::Snowflake) {
30557 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
30558 let unit = Expression::Identifier(Identifier::new(unit_str));
30559 return Ok(Expression::Function(Box::new(Function::new(
30560 "DATEDIFF".to_string(),
30561 vec![unit, arg1, arg2],
30562 ))));
30563 }
30564
30565 if matches!(target, DialectType::DuckDB) {
30566 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
30567 let cast_d1 = Self::ensure_cast_timestamp(arg1);
30568 let cast_d2 = Self::ensure_cast_timestamp(arg2);
30569 return Ok(Expression::Function(Box::new(Function::new(
30570 "DATE_DIFF".to_string(),
30571 vec![
30572 Expression::Literal(Box::new(Literal::String(unit_str))),
30573 cast_d1,
30574 cast_d2,
30575 ],
30576 ))));
30577 }
30578
30579 if matches!(target, DialectType::BigQuery) {
30580 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
30581 let cast_d1 = Self::ensure_cast_datetime(arg1);
30582 let cast_d2 = Self::ensure_cast_datetime(arg2);
30583 let unit = Expression::Identifier(Identifier::new(unit_str));
30584 return Ok(Expression::Function(Box::new(Function::new(
30585 "DATE_DIFF".to_string(),
30586 vec![cast_d2, cast_d1, unit],
30587 ))));
30588 }
30589
30590 if matches!(target, DialectType::Spark | DialectType::Databricks) {
30591 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
30592 let unit = Expression::Identifier(Identifier::new(unit_str));
30593 return Ok(Expression::Function(Box::new(Function::new(
30594 "DATEDIFF".to_string(),
30595 vec![unit, arg1, arg2],
30596 ))));
30597 }
30598
30599 if matches!(target, DialectType::Hive) {
30600 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
30601 match unit_str.as_str() {
30602 "MONTH" => {
30603 return Ok(Expression::Function(Box::new(Function::new(
30604 "CAST".to_string(),
30605 vec![Expression::Function(Box::new(Function::new(
30606 "MONTHS_BETWEEN".to_string(),
30607 vec![arg2, arg1],
30608 )))],
30609 ))));
30610 }
30611 "WEEK" => {
30612 return Ok(Expression::Cast(Box::new(Cast {
30613 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
30614 Expression::Function(Box::new(Function::new(
30615 "DATEDIFF".to_string(),
30616 vec![arg2, arg1],
30617 ))),
30618 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
30619 ))),
30620 to: DataType::Int {
30621 length: None,
30622 integer_spelling: false,
30623 },
30624 trailing_comments: vec![],
30625 double_colon_syntax: false,
30626 format: None,
30627 default: None,
30628 inferred_type: None,
30629 })));
30630 }
30631 _ => {
30632 // Default: DATEDIFF(end, start) for DAY
30633 return Ok(Expression::Function(Box::new(Function::new(
30634 "DATEDIFF".to_string(),
30635 vec![arg2, arg1],
30636 ))));
30637 }
30638 }
30639 }
30640
30641 if matches!(
30642 target,
30643 DialectType::Presto | DialectType::Trino | DialectType::Athena
30644 ) {
30645 // Presto/Trino: DATE_DIFF('UNIT', start, end)
30646 return Ok(Expression::Function(Box::new(Function::new(
30647 "DATE_DIFF".to_string(),
30648 vec![
30649 Expression::Literal(Box::new(Literal::String(unit_str))),
30650 arg1,
30651 arg2,
30652 ],
30653 ))));
30654 }
30655
30656 if matches!(target, DialectType::TSQL) {
30657 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
30658 let cast_d2 = Self::ensure_cast_datetime2(arg2);
30659 let unit = Expression::Identifier(Identifier::new(unit_str));
30660 return Ok(Expression::Function(Box::new(Function::new(
30661 "DATEDIFF".to_string(),
30662 vec![unit, arg1, cast_d2],
30663 ))));
30664 }
30665
30666 if matches!(target, DialectType::PostgreSQL) {
30667 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
30668 // For now, use DATEDIFF (passthrough) with uppercased unit
30669 let unit = Expression::Identifier(Identifier::new(unit_str));
30670 return Ok(Expression::Function(Box::new(Function::new(
30671 "DATEDIFF".to_string(),
30672 vec![unit, arg1, arg2],
30673 ))));
30674 }
30675
30676 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
30677 let unit = Expression::Identifier(Identifier::new(unit_str));
30678 Ok(Expression::Function(Box::new(Function::new(
30679 "DATEDIFF".to_string(),
30680 vec![unit, arg1, arg2],
30681 ))))
30682 }
30683
30684 // DATE_DIFF(date1, date2, unit) -> standard form
30685 "DATE_DIFF" if args.len() == 3 => {
30686 let date1 = args.remove(0);
30687 let date2 = args.remove(0);
30688 let unit_expr = args.remove(0);
30689 let unit_str = get_unit_str(&unit_expr);
30690
30691 if matches!(target, DialectType::BigQuery) {
30692 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
30693 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
30694 "WEEK".to_string()
30695 } else {
30696 unit_str
30697 };
30698 let norm_d1 = Self::date_literal_to_cast(date1);
30699 let norm_d2 = Self::date_literal_to_cast(date2);
30700 let unit = Expression::Identifier(Identifier::new(norm_unit));
30701 return Ok(Expression::Function(Box::new(Function::new(
30702 f.name,
30703 vec![norm_d1, norm_d2, unit],
30704 ))));
30705 }
30706
30707 if matches!(target, DialectType::MySQL) {
30708 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
30709 let norm_d1 = Self::date_literal_to_cast(date1);
30710 let norm_d2 = Self::date_literal_to_cast(date2);
30711 return Ok(Expression::Function(Box::new(Function::new(
30712 "DATEDIFF".to_string(),
30713 vec![norm_d1, norm_d2],
30714 ))));
30715 }
30716
30717 if matches!(target, DialectType::StarRocks) {
30718 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
30719 let norm_d1 = Self::date_literal_to_cast(date1);
30720 let norm_d2 = Self::date_literal_to_cast(date2);
30721 return Ok(Expression::Function(Box::new(Function::new(
30722 "DATE_DIFF".to_string(),
30723 vec![
30724 Expression::Literal(Box::new(Literal::String(unit_str))),
30725 norm_d1,
30726 norm_d2,
30727 ],
30728 ))));
30729 }
30730
30731 if matches!(target, DialectType::DuckDB) {
30732 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
30733 let norm_d1 = Self::ensure_cast_date(date1);
30734 let norm_d2 = Self::ensure_cast_date(date2);
30735
30736 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
30737 let is_week_variant = unit_str == "WEEK"
30738 || unit_str.starts_with("WEEK(")
30739 || unit_str == "ISOWEEK";
30740 if is_week_variant {
30741 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
30742 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
30743 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
30744 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
30745 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
30746 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
30747 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
30748 Some("1") // Shift Sunday to Monday alignment
30749 } else if unit_str == "WEEK(SATURDAY)" {
30750 Some("-5")
30751 } else if unit_str == "WEEK(TUESDAY)" {
30752 Some("-1")
30753 } else if unit_str == "WEEK(WEDNESDAY)" {
30754 Some("-2")
30755 } else if unit_str == "WEEK(THURSDAY)" {
30756 Some("-3")
30757 } else if unit_str == "WEEK(FRIDAY)" {
30758 Some("-4")
30759 } else {
30760 Some("1") // default to Sunday
30761 };
30762
30763 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
30764 let shifted = if let Some(off) = offset {
30765 let interval =
30766 Expression::Interval(Box::new(crate::expressions::Interval {
30767 this: Some(Expression::Literal(Box::new(Literal::String(
30768 off.to_string(),
30769 )))),
30770 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30771 unit: crate::expressions::IntervalUnit::Day,
30772 use_plural: false,
30773 }),
30774 }));
30775 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
30776 date, interval,
30777 )))
30778 } else {
30779 date
30780 };
30781 Expression::Function(Box::new(Function::new(
30782 "DATE_TRUNC".to_string(),
30783 vec![
30784 Expression::Literal(Box::new(Literal::String(
30785 "WEEK".to_string(),
30786 ))),
30787 shifted,
30788 ],
30789 )))
30790 };
30791
30792 let trunc_d2 = make_trunc(norm_d2, day_offset);
30793 let trunc_d1 = make_trunc(norm_d1, day_offset);
30794 return Ok(Expression::Function(Box::new(Function::new(
30795 "DATE_DIFF".to_string(),
30796 vec![
30797 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
30798 trunc_d2,
30799 trunc_d1,
30800 ],
30801 ))));
30802 }
30803
30804 return Ok(Expression::Function(Box::new(Function::new(
30805 "DATE_DIFF".to_string(),
30806 vec![
30807 Expression::Literal(Box::new(Literal::String(unit_str))),
30808 norm_d2,
30809 norm_d1,
30810 ],
30811 ))));
30812 }
30813
30814 // Default: DATEDIFF(unit, date2, date1)
30815 let unit = Expression::Identifier(Identifier::new(unit_str));
30816 Ok(Expression::Function(Box::new(Function::new(
30817 "DATEDIFF".to_string(),
30818 vec![unit, date2, date1],
30819 ))))
30820 }
30821
30822 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
30823 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
30824 let ts = args.remove(0);
30825 let interval_expr = args.remove(0);
30826 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30827
30828 match target {
30829 DialectType::Snowflake => {
30830 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
30831 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
30832 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
30833 let unit_str = Self::interval_unit_to_string(&unit);
30834 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
30835 Ok(Expression::TimestampAdd(Box::new(
30836 crate::expressions::TimestampAdd {
30837 this: Box::new(val),
30838 expression: Box::new(cast_ts),
30839 unit: Some(unit_str.to_string()),
30840 },
30841 )))
30842 }
30843 DialectType::Spark | DialectType::Databricks => {
30844 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
30845 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
30846 let interval =
30847 Expression::Interval(Box::new(crate::expressions::Interval {
30848 this: Some(val),
30849 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30850 unit,
30851 use_plural: false,
30852 }),
30853 }));
30854 Ok(Expression::Add(Box::new(
30855 crate::expressions::BinaryOp::new(ts, interval),
30856 )))
30857 } else if name == "DATETIME_ADD"
30858 && matches!(target, DialectType::Databricks)
30859 {
30860 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
30861 let unit_str = Self::interval_unit_to_string(&unit);
30862 Ok(Expression::Function(Box::new(Function::new(
30863 "TIMESTAMPADD".to_string(),
30864 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
30865 ))))
30866 } else {
30867 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
30868 let unit_str = Self::interval_unit_to_string(&unit);
30869 let cast_ts =
30870 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
30871 Self::maybe_cast_ts(ts)
30872 } else {
30873 ts
30874 };
30875 Ok(Expression::Function(Box::new(Function::new(
30876 "DATE_ADD".to_string(),
30877 vec![
30878 Expression::Identifier(Identifier::new(unit_str)),
30879 val,
30880 cast_ts,
30881 ],
30882 ))))
30883 }
30884 }
30885 DialectType::MySQL => {
30886 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
30887 let mysql_ts = if name.starts_with("TIMESTAMP") {
30888 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
30889 match &ts {
30890 Expression::Function(ref inner_f)
30891 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
30892 {
30893 // Already wrapped, keep as-is
30894 ts
30895 }
30896 _ => {
30897 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
30898 let unwrapped = match ts {
30899 Expression::Literal(lit)
30900 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
30901 {
30902 let Literal::Timestamp(s) = lit.as_ref() else {
30903 unreachable!()
30904 };
30905 Expression::Literal(Box::new(Literal::String(
30906 s.clone(),
30907 )))
30908 }
30909 other => other,
30910 };
30911 Expression::Function(Box::new(Function::new(
30912 "TIMESTAMP".to_string(),
30913 vec![unwrapped],
30914 )))
30915 }
30916 }
30917 } else {
30918 ts
30919 };
30920 Ok(Expression::DateAdd(Box::new(
30921 crate::expressions::DateAddFunc {
30922 this: mysql_ts,
30923 interval: val,
30924 unit,
30925 },
30926 )))
30927 }
30928 _ => {
30929 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
30930 let cast_ts = if matches!(target, DialectType::DuckDB) {
30931 if name == "DATETIME_ADD" {
30932 Self::ensure_cast_timestamp(ts)
30933 } else if name.starts_with("TIMESTAMP") {
30934 Self::maybe_cast_ts_to_tz(ts, &name)
30935 } else {
30936 ts
30937 }
30938 } else {
30939 ts
30940 };
30941 Ok(Expression::DateAdd(Box::new(
30942 crate::expressions::DateAddFunc {
30943 this: cast_ts,
30944 interval: val,
30945 unit,
30946 },
30947 )))
30948 }
30949 }
30950 }
30951
30952 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
30953 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
30954 let ts = args.remove(0);
30955 let interval_expr = args.remove(0);
30956 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30957
30958 match target {
30959 DialectType::Snowflake => {
30960 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
30961 let unit_str = Self::interval_unit_to_string(&unit);
30962 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
30963 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
30964 val,
30965 Expression::Neg(Box::new(crate::expressions::UnaryOp {
30966 this: Expression::number(1),
30967 inferred_type: None,
30968 })),
30969 )));
30970 Ok(Expression::TimestampAdd(Box::new(
30971 crate::expressions::TimestampAdd {
30972 this: Box::new(neg_val),
30973 expression: Box::new(cast_ts),
30974 unit: Some(unit_str.to_string()),
30975 },
30976 )))
30977 }
30978 DialectType::Spark | DialectType::Databricks => {
30979 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
30980 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
30981 {
30982 // Spark: ts - INTERVAL val UNIT
30983 let cast_ts = if name.starts_with("TIMESTAMP") {
30984 Self::maybe_cast_ts(ts)
30985 } else {
30986 ts
30987 };
30988 let interval =
30989 Expression::Interval(Box::new(crate::expressions::Interval {
30990 this: Some(val),
30991 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30992 unit,
30993 use_plural: false,
30994 }),
30995 }));
30996 Ok(Expression::Sub(Box::new(
30997 crate::expressions::BinaryOp::new(cast_ts, interval),
30998 )))
30999 } else {
31000 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
31001 let unit_str = Self::interval_unit_to_string(&unit);
31002 let neg_val =
31003 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31004 val,
31005 Expression::Neg(Box::new(crate::expressions::UnaryOp {
31006 this: Expression::number(1),
31007 inferred_type: None,
31008 })),
31009 )));
31010 Ok(Expression::Function(Box::new(Function::new(
31011 "TIMESTAMPADD".to_string(),
31012 vec![
31013 Expression::Identifier(Identifier::new(unit_str)),
31014 neg_val,
31015 ts,
31016 ],
31017 ))))
31018 }
31019 }
31020 DialectType::MySQL => {
31021 let mysql_ts = if name.starts_with("TIMESTAMP") {
31022 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
31023 match &ts {
31024 Expression::Function(ref inner_f)
31025 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
31026 {
31027 // Already wrapped, keep as-is
31028 ts
31029 }
31030 _ => {
31031 let unwrapped = match ts {
31032 Expression::Literal(lit)
31033 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
31034 {
31035 let Literal::Timestamp(s) = lit.as_ref() else {
31036 unreachable!()
31037 };
31038 Expression::Literal(Box::new(Literal::String(
31039 s.clone(),
31040 )))
31041 }
31042 other => other,
31043 };
31044 Expression::Function(Box::new(Function::new(
31045 "TIMESTAMP".to_string(),
31046 vec![unwrapped],
31047 )))
31048 }
31049 }
31050 } else {
31051 ts
31052 };
31053 Ok(Expression::DateSub(Box::new(
31054 crate::expressions::DateAddFunc {
31055 this: mysql_ts,
31056 interval: val,
31057 unit,
31058 },
31059 )))
31060 }
31061 _ => {
31062 let cast_ts = if matches!(target, DialectType::DuckDB) {
31063 if name == "DATETIME_SUB" {
31064 Self::ensure_cast_timestamp(ts)
31065 } else if name.starts_with("TIMESTAMP") {
31066 Self::maybe_cast_ts_to_tz(ts, &name)
31067 } else {
31068 ts
31069 }
31070 } else {
31071 ts
31072 };
31073 Ok(Expression::DateSub(Box::new(
31074 crate::expressions::DateAddFunc {
31075 this: cast_ts,
31076 interval: val,
31077 unit,
31078 },
31079 )))
31080 }
31081 }
31082 }
31083
31084 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
31085 "DATE_SUB" if args.len() == 2 => {
31086 let date = args.remove(0);
31087 let interval_expr = args.remove(0);
31088 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31089
31090 match target {
31091 DialectType::Databricks | DialectType::Spark => {
31092 // Databricks/Spark: DATE_ADD(date, -val)
31093 // Use DateAdd expression with negative val so it generates correctly
31094 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
31095 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
31096 // Instead, we directly output as a simple negated DateSub
31097 Ok(Expression::DateSub(Box::new(
31098 crate::expressions::DateAddFunc {
31099 this: date,
31100 interval: val,
31101 unit,
31102 },
31103 )))
31104 }
31105 DialectType::DuckDB => {
31106 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
31107 let cast_date = Self::ensure_cast_date(date);
31108 let interval =
31109 Expression::Interval(Box::new(crate::expressions::Interval {
31110 this: Some(val),
31111 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31112 unit,
31113 use_plural: false,
31114 }),
31115 }));
31116 Ok(Expression::Sub(Box::new(
31117 crate::expressions::BinaryOp::new(cast_date, interval),
31118 )))
31119 }
31120 DialectType::Snowflake => {
31121 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
31122 // Just ensure the date is cast properly
31123 let cast_date = Self::ensure_cast_date(date);
31124 Ok(Expression::DateSub(Box::new(
31125 crate::expressions::DateAddFunc {
31126 this: cast_date,
31127 interval: val,
31128 unit,
31129 },
31130 )))
31131 }
31132 DialectType::PostgreSQL => {
31133 // PostgreSQL: date - INTERVAL 'val UNIT'
31134 let unit_str = Self::interval_unit_to_string(&unit);
31135 let interval =
31136 Expression::Interval(Box::new(crate::expressions::Interval {
31137 this: Some(Expression::Literal(Box::new(Literal::String(
31138 format!("{} {}", Self::expr_to_string(&val), unit_str),
31139 )))),
31140 unit: None,
31141 }));
31142 Ok(Expression::Sub(Box::new(
31143 crate::expressions::BinaryOp::new(date, interval),
31144 )))
31145 }
31146 _ => Ok(Expression::DateSub(Box::new(
31147 crate::expressions::DateAddFunc {
31148 this: date,
31149 interval: val,
31150 unit,
31151 },
31152 ))),
31153 }
31154 }
31155
31156 // DATEADD(unit, val, date) -> target-specific form
31157 // Used by: Redshift, Snowflake, TSQL, ClickHouse
31158 "DATEADD" if args.len() == 3 => {
31159 let arg0 = args.remove(0);
31160 let arg1 = args.remove(0);
31161 let arg2 = args.remove(0);
31162 let unit_str = get_unit_str(&arg0);
31163
31164 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
31165 // Keep DATEADD(UNIT, val, date) with uppercased unit
31166 let unit = Expression::Identifier(Identifier::new(unit_str));
31167 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
31168 let date = if matches!(target, DialectType::TSQL)
31169 && !matches!(
31170 source,
31171 DialectType::Spark | DialectType::Databricks | DialectType::Hive
31172 ) {
31173 Self::ensure_cast_datetime2(arg2)
31174 } else {
31175 arg2
31176 };
31177 return Ok(Expression::Function(Box::new(Function::new(
31178 "DATEADD".to_string(),
31179 vec![unit, arg1, date],
31180 ))));
31181 }
31182
31183 if matches!(target, DialectType::DuckDB) {
31184 // DuckDB: date + INTERVAL 'val' UNIT
31185 let iu = parse_interval_unit(&unit_str);
31186 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31187 this: Some(arg1),
31188 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31189 unit: iu,
31190 use_plural: false,
31191 }),
31192 }));
31193 let cast_date = Self::ensure_cast_timestamp(arg2);
31194 return Ok(Expression::Add(Box::new(
31195 crate::expressions::BinaryOp::new(cast_date, interval),
31196 )));
31197 }
31198
31199 if matches!(target, DialectType::BigQuery) {
31200 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
31201 let iu = parse_interval_unit(&unit_str);
31202 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31203 this: Some(arg1),
31204 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31205 unit: iu,
31206 use_plural: false,
31207 }),
31208 }));
31209 return Ok(Expression::Function(Box::new(Function::new(
31210 "DATE_ADD".to_string(),
31211 vec![arg2, interval],
31212 ))));
31213 }
31214
31215 if matches!(target, DialectType::Databricks) {
31216 // Databricks: keep DATEADD(UNIT, val, date) format
31217 let unit = Expression::Identifier(Identifier::new(unit_str));
31218 return Ok(Expression::Function(Box::new(Function::new(
31219 "DATEADD".to_string(),
31220 vec![unit, arg1, arg2],
31221 ))));
31222 }
31223
31224 if matches!(target, DialectType::Spark) {
31225 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
31226 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
31227 if let Expression::Literal(lit) = &expr {
31228 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
31229 if let Ok(val) = n.parse::<i64>() {
31230 return Expression::Literal(Box::new(
31231 crate::expressions::Literal::Number(
31232 (val * factor).to_string(),
31233 ),
31234 ));
31235 }
31236 }
31237 }
31238 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31239 expr,
31240 Expression::Literal(Box::new(crate::expressions::Literal::Number(
31241 factor.to_string(),
31242 ))),
31243 )))
31244 }
31245 match unit_str.as_str() {
31246 "YEAR" => {
31247 let months = multiply_expr_dateadd(arg1, 12);
31248 return Ok(Expression::Function(Box::new(Function::new(
31249 "ADD_MONTHS".to_string(),
31250 vec![arg2, months],
31251 ))));
31252 }
31253 "QUARTER" => {
31254 let months = multiply_expr_dateadd(arg1, 3);
31255 return Ok(Expression::Function(Box::new(Function::new(
31256 "ADD_MONTHS".to_string(),
31257 vec![arg2, months],
31258 ))));
31259 }
31260 "MONTH" => {
31261 return Ok(Expression::Function(Box::new(Function::new(
31262 "ADD_MONTHS".to_string(),
31263 vec![arg2, arg1],
31264 ))));
31265 }
31266 "WEEK" => {
31267 let days = multiply_expr_dateadd(arg1, 7);
31268 return Ok(Expression::Function(Box::new(Function::new(
31269 "DATE_ADD".to_string(),
31270 vec![arg2, days],
31271 ))));
31272 }
31273 "DAY" => {
31274 return Ok(Expression::Function(Box::new(Function::new(
31275 "DATE_ADD".to_string(),
31276 vec![arg2, arg1],
31277 ))));
31278 }
31279 _ => {
31280 let unit = Expression::Identifier(Identifier::new(unit_str));
31281 return Ok(Expression::Function(Box::new(Function::new(
31282 "DATE_ADD".to_string(),
31283 vec![unit, arg1, arg2],
31284 ))));
31285 }
31286 }
31287 }
31288
31289 if matches!(target, DialectType::Hive) {
31290 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
31291 match unit_str.as_str() {
31292 "DAY" => {
31293 return Ok(Expression::Function(Box::new(Function::new(
31294 "DATE_ADD".to_string(),
31295 vec![arg2, arg1],
31296 ))));
31297 }
31298 "MONTH" => {
31299 return Ok(Expression::Function(Box::new(Function::new(
31300 "ADD_MONTHS".to_string(),
31301 vec![arg2, arg1],
31302 ))));
31303 }
31304 _ => {
31305 let iu = parse_interval_unit(&unit_str);
31306 let interval =
31307 Expression::Interval(Box::new(crate::expressions::Interval {
31308 this: Some(arg1),
31309 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31310 unit: iu,
31311 use_plural: false,
31312 }),
31313 }));
31314 return Ok(Expression::Add(Box::new(
31315 crate::expressions::BinaryOp::new(arg2, interval),
31316 )));
31317 }
31318 }
31319 }
31320
31321 if matches!(target, DialectType::PostgreSQL) {
31322 // PostgreSQL: date + INTERVAL 'val UNIT'
31323 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31324 this: Some(Expression::Literal(Box::new(Literal::String(format!(
31325 "{} {}",
31326 Self::expr_to_string(&arg1),
31327 unit_str
31328 ))))),
31329 unit: None,
31330 }));
31331 return Ok(Expression::Add(Box::new(
31332 crate::expressions::BinaryOp::new(arg2, interval),
31333 )));
31334 }
31335
31336 if matches!(
31337 target,
31338 DialectType::Presto | DialectType::Trino | DialectType::Athena
31339 ) {
31340 // Presto/Trino: DATE_ADD('UNIT', val, date)
31341 return Ok(Expression::Function(Box::new(Function::new(
31342 "DATE_ADD".to_string(),
31343 vec![
31344 Expression::Literal(Box::new(Literal::String(unit_str))),
31345 arg1,
31346 arg2,
31347 ],
31348 ))));
31349 }
31350
31351 if matches!(target, DialectType::ClickHouse) {
31352 // ClickHouse: DATE_ADD(UNIT, val, date)
31353 let unit = Expression::Identifier(Identifier::new(unit_str));
31354 return Ok(Expression::Function(Box::new(Function::new(
31355 "DATE_ADD".to_string(),
31356 vec![unit, arg1, arg2],
31357 ))));
31358 }
31359
31360 // Default: keep DATEADD with uppercased unit
31361 let unit = Expression::Identifier(Identifier::new(unit_str));
31362 Ok(Expression::Function(Box::new(Function::new(
31363 "DATEADD".to_string(),
31364 vec![unit, arg1, arg2],
31365 ))))
31366 }
31367
31368 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
31369 "DATE_ADD" if args.len() == 3 => {
31370 let arg0 = args.remove(0);
31371 let arg1 = args.remove(0);
31372 let arg2 = args.remove(0);
31373 let unit_str = get_unit_str(&arg0);
31374
31375 if matches!(
31376 target,
31377 DialectType::Presto | DialectType::Trino | DialectType::Athena
31378 ) {
31379 // Presto/Trino: DATE_ADD('UNIT', val, date)
31380 return Ok(Expression::Function(Box::new(Function::new(
31381 "DATE_ADD".to_string(),
31382 vec![
31383 Expression::Literal(Box::new(Literal::String(unit_str))),
31384 arg1,
31385 arg2,
31386 ],
31387 ))));
31388 }
31389
31390 if matches!(
31391 target,
31392 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
31393 ) {
31394 // DATEADD(UNIT, val, date)
31395 let unit = Expression::Identifier(Identifier::new(unit_str));
31396 let date = if matches!(target, DialectType::TSQL) {
31397 Self::ensure_cast_datetime2(arg2)
31398 } else {
31399 arg2
31400 };
31401 return Ok(Expression::Function(Box::new(Function::new(
31402 "DATEADD".to_string(),
31403 vec![unit, arg1, date],
31404 ))));
31405 }
31406
31407 if matches!(target, DialectType::DuckDB) {
31408 // DuckDB: date + INTERVAL val UNIT
31409 let iu = parse_interval_unit(&unit_str);
31410 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31411 this: Some(arg1),
31412 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31413 unit: iu,
31414 use_plural: false,
31415 }),
31416 }));
31417 return Ok(Expression::Add(Box::new(
31418 crate::expressions::BinaryOp::new(arg2, interval),
31419 )));
31420 }
31421
31422 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31423 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
31424 let unit = Expression::Identifier(Identifier::new(unit_str));
31425 return Ok(Expression::Function(Box::new(Function::new(
31426 "DATE_ADD".to_string(),
31427 vec![unit, arg1, arg2],
31428 ))));
31429 }
31430
31431 // Default: DATE_ADD(UNIT, val, date)
31432 let unit = Expression::Identifier(Identifier::new(unit_str));
31433 Ok(Expression::Function(Box::new(Function::new(
31434 "DATE_ADD".to_string(),
31435 vec![unit, arg1, arg2],
31436 ))))
31437 }
31438
31439 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
31440 "DATE_ADD" if args.len() == 2 => {
31441 let date = args.remove(0);
31442 let interval_expr = args.remove(0);
31443 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31444 let unit_str = Self::interval_unit_to_string(&unit);
31445
31446 match target {
31447 DialectType::DuckDB => {
31448 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
31449 let cast_date = Self::ensure_cast_date(date);
31450 let quoted_val = Self::quote_interval_val(&val);
31451 let interval =
31452 Expression::Interval(Box::new(crate::expressions::Interval {
31453 this: Some(quoted_val),
31454 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31455 unit,
31456 use_plural: false,
31457 }),
31458 }));
31459 Ok(Expression::Add(Box::new(
31460 crate::expressions::BinaryOp::new(cast_date, interval),
31461 )))
31462 }
31463 DialectType::PostgreSQL => {
31464 // PostgreSQL: date + INTERVAL 'val UNIT'
31465 let interval =
31466 Expression::Interval(Box::new(crate::expressions::Interval {
31467 this: Some(Expression::Literal(Box::new(Literal::String(
31468 format!("{} {}", Self::expr_to_string(&val), unit_str),
31469 )))),
31470 unit: None,
31471 }));
31472 Ok(Expression::Add(Box::new(
31473 crate::expressions::BinaryOp::new(date, interval),
31474 )))
31475 }
31476 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31477 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
31478 let val_str = Self::expr_to_string(&val);
31479 Ok(Expression::Function(Box::new(Function::new(
31480 "DATE_ADD".to_string(),
31481 vec![
31482 Expression::Literal(Box::new(Literal::String(
31483 unit_str.to_string(),
31484 ))),
31485 Expression::Cast(Box::new(Cast {
31486 this: Expression::Literal(Box::new(Literal::String(val_str))),
31487 to: DataType::BigInt { length: None },
31488 trailing_comments: vec![],
31489 double_colon_syntax: false,
31490 format: None,
31491 default: None,
31492 inferred_type: None,
31493 })),
31494 date,
31495 ],
31496 ))))
31497 }
31498 DialectType::Spark | DialectType::Hive => {
31499 // Spark/Hive: DATE_ADD(date, val) for DAY
31500 match unit_str {
31501 "DAY" => Ok(Expression::Function(Box::new(Function::new(
31502 "DATE_ADD".to_string(),
31503 vec![date, val],
31504 )))),
31505 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
31506 "ADD_MONTHS".to_string(),
31507 vec![date, val],
31508 )))),
31509 _ => {
31510 let iu = parse_interval_unit(&unit_str);
31511 let interval =
31512 Expression::Interval(Box::new(crate::expressions::Interval {
31513 this: Some(val),
31514 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31515 unit: iu,
31516 use_plural: false,
31517 }),
31518 }));
31519 Ok(Expression::Function(Box::new(Function::new(
31520 "DATE_ADD".to_string(),
31521 vec![date, interval],
31522 ))))
31523 }
31524 }
31525 }
31526 DialectType::Snowflake => {
31527 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
31528 let cast_date = Self::ensure_cast_date(date);
31529 let val_str = Self::expr_to_string(&val);
31530 Ok(Expression::Function(Box::new(Function::new(
31531 "DATEADD".to_string(),
31532 vec![
31533 Expression::Identifier(Identifier::new(unit_str)),
31534 Expression::Literal(Box::new(Literal::String(val_str))),
31535 cast_date,
31536 ],
31537 ))))
31538 }
31539 DialectType::TSQL | DialectType::Fabric => {
31540 let cast_date = Self::ensure_cast_datetime2(date);
31541 Ok(Expression::Function(Box::new(Function::new(
31542 "DATEADD".to_string(),
31543 vec![
31544 Expression::Identifier(Identifier::new(unit_str)),
31545 val,
31546 cast_date,
31547 ],
31548 ))))
31549 }
31550 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
31551 "DATEADD".to_string(),
31552 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31553 )))),
31554 DialectType::MySQL => {
31555 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
31556 let quoted_val = Self::quote_interval_val(&val);
31557 let iu = parse_interval_unit(&unit_str);
31558 let interval =
31559 Expression::Interval(Box::new(crate::expressions::Interval {
31560 this: Some(quoted_val),
31561 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31562 unit: iu,
31563 use_plural: false,
31564 }),
31565 }));
31566 Ok(Expression::Function(Box::new(Function::new(
31567 "DATE_ADD".to_string(),
31568 vec![date, interval],
31569 ))))
31570 }
31571 DialectType::BigQuery => {
31572 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
31573 let quoted_val = Self::quote_interval_val(&val);
31574 let iu = parse_interval_unit(&unit_str);
31575 let interval =
31576 Expression::Interval(Box::new(crate::expressions::Interval {
31577 this: Some(quoted_val),
31578 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31579 unit: iu,
31580 use_plural: false,
31581 }),
31582 }));
31583 Ok(Expression::Function(Box::new(Function::new(
31584 "DATE_ADD".to_string(),
31585 vec![date, interval],
31586 ))))
31587 }
31588 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
31589 "DATEADD".to_string(),
31590 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31591 )))),
31592 _ => {
31593 // Default: keep as DATE_ADD with decomposed interval
31594 Ok(Expression::DateAdd(Box::new(
31595 crate::expressions::DateAddFunc {
31596 this: date,
31597 interval: val,
31598 unit,
31599 },
31600 )))
31601 }
31602 }
31603 }
31604
31605 // ADD_MONTHS(date, val) -> target-specific form
31606 "ADD_MONTHS" if args.len() == 2 => {
31607 let date = args.remove(0);
31608 let val = args.remove(0);
31609
31610 if matches!(target, DialectType::TSQL) {
31611 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
31612 let cast_date = Self::ensure_cast_datetime2(date);
31613 return Ok(Expression::Function(Box::new(Function::new(
31614 "DATEADD".to_string(),
31615 vec![
31616 Expression::Identifier(Identifier::new("MONTH")),
31617 val,
31618 cast_date,
31619 ],
31620 ))));
31621 }
31622
31623 if matches!(target, DialectType::DuckDB) {
31624 // DuckDB: date + INTERVAL val MONTH
31625 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31626 this: Some(val),
31627 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31628 unit: crate::expressions::IntervalUnit::Month,
31629 use_plural: false,
31630 }),
31631 }));
31632 return Ok(Expression::Add(Box::new(
31633 crate::expressions::BinaryOp::new(date, interval),
31634 )));
31635 }
31636
31637 if matches!(target, DialectType::Snowflake) {
31638 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
31639 if matches!(source, DialectType::Snowflake) {
31640 return Ok(Expression::Function(Box::new(Function::new(
31641 "ADD_MONTHS".to_string(),
31642 vec![date, val],
31643 ))));
31644 }
31645 return Ok(Expression::Function(Box::new(Function::new(
31646 "DATEADD".to_string(),
31647 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
31648 ))));
31649 }
31650
31651 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31652 // Spark: ADD_MONTHS(date, val) - keep as is
31653 return Ok(Expression::Function(Box::new(Function::new(
31654 "ADD_MONTHS".to_string(),
31655 vec![date, val],
31656 ))));
31657 }
31658
31659 if matches!(target, DialectType::Hive) {
31660 return Ok(Expression::Function(Box::new(Function::new(
31661 "ADD_MONTHS".to_string(),
31662 vec![date, val],
31663 ))));
31664 }
31665
31666 if matches!(
31667 target,
31668 DialectType::Presto | DialectType::Trino | DialectType::Athena
31669 ) {
31670 // Presto: DATE_ADD('MONTH', val, date)
31671 return Ok(Expression::Function(Box::new(Function::new(
31672 "DATE_ADD".to_string(),
31673 vec![
31674 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
31675 val,
31676 date,
31677 ],
31678 ))));
31679 }
31680
31681 // Default: keep ADD_MONTHS
31682 Ok(Expression::Function(Box::new(Function::new(
31683 "ADD_MONTHS".to_string(),
31684 vec![date, val],
31685 ))))
31686 }
31687
31688 // SAFE_DIVIDE(x, y) -> target-specific form directly
31689 "SAFE_DIVIDE" if args.len() == 2 => {
31690 let x = args.remove(0);
31691 let y = args.remove(0);
31692 // Wrap x and y in parens if they're complex expressions
31693 let y_ref = match &y {
31694 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31695 y.clone()
31696 }
31697 _ => Expression::Paren(Box::new(Paren {
31698 this: y.clone(),
31699 trailing_comments: vec![],
31700 })),
31701 };
31702 let x_ref = match &x {
31703 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31704 x.clone()
31705 }
31706 _ => Expression::Paren(Box::new(Paren {
31707 this: x.clone(),
31708 trailing_comments: vec![],
31709 })),
31710 };
31711 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
31712 y_ref.clone(),
31713 Expression::number(0),
31714 )));
31715 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31716 x_ref.clone(),
31717 y_ref.clone(),
31718 )));
31719
31720 match target {
31721 DialectType::DuckDB | DialectType::PostgreSQL => {
31722 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
31723 let result_div = if matches!(target, DialectType::PostgreSQL) {
31724 let cast_x = Expression::Cast(Box::new(Cast {
31725 this: x_ref,
31726 to: DataType::Custom {
31727 name: "DOUBLE PRECISION".to_string(),
31728 },
31729 trailing_comments: vec![],
31730 double_colon_syntax: false,
31731 format: None,
31732 default: None,
31733 inferred_type: None,
31734 }));
31735 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31736 cast_x, y_ref,
31737 )))
31738 } else {
31739 div_expr
31740 };
31741 Ok(Expression::Case(Box::new(crate::expressions::Case {
31742 operand: None,
31743 whens: vec![(condition, result_div)],
31744 else_: Some(Expression::Null(crate::expressions::Null)),
31745 comments: Vec::new(),
31746 inferred_type: None,
31747 })))
31748 }
31749 DialectType::Snowflake => {
31750 // IFF(y <> 0, x / y, NULL)
31751 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31752 condition,
31753 true_value: div_expr,
31754 false_value: Some(Expression::Null(crate::expressions::Null)),
31755 original_name: Some("IFF".to_string()),
31756 inferred_type: None,
31757 })))
31758 }
31759 DialectType::Presto | DialectType::Trino => {
31760 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
31761 let cast_x = Expression::Cast(Box::new(Cast {
31762 this: x_ref,
31763 to: DataType::Double {
31764 precision: None,
31765 scale: None,
31766 },
31767 trailing_comments: vec![],
31768 double_colon_syntax: false,
31769 format: None,
31770 default: None,
31771 inferred_type: None,
31772 }));
31773 let cast_div = Expression::Div(Box::new(
31774 crate::expressions::BinaryOp::new(cast_x, y_ref),
31775 ));
31776 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31777 condition,
31778 true_value: cast_div,
31779 false_value: Some(Expression::Null(crate::expressions::Null)),
31780 original_name: None,
31781 inferred_type: None,
31782 })))
31783 }
31784 _ => {
31785 // IF(y <> 0, x / y, NULL)
31786 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31787 condition,
31788 true_value: div_expr,
31789 false_value: Some(Expression::Null(crate::expressions::Null)),
31790 original_name: None,
31791 inferred_type: None,
31792 })))
31793 }
31794 }
31795 }
31796
31797 // GENERATE_UUID() -> UUID() with CAST to string
31798 "GENERATE_UUID" => {
31799 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
31800 this: None,
31801 name: None,
31802 is_string: None,
31803 }));
31804 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
31805 let cast_type = match target {
31806 DialectType::DuckDB => Some(DataType::Text),
31807 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
31808 length: None,
31809 parenthesized_length: false,
31810 }),
31811 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
31812 Some(DataType::String { length: None })
31813 }
31814 _ => None,
31815 };
31816 if let Some(dt) = cast_type {
31817 Ok(Expression::Cast(Box::new(Cast {
31818 this: uuid_expr,
31819 to: dt,
31820 trailing_comments: vec![],
31821 double_colon_syntax: false,
31822 format: None,
31823 default: None,
31824 inferred_type: None,
31825 })))
31826 } else {
31827 Ok(uuid_expr)
31828 }
31829 }
31830
31831 // COUNTIF(x) -> CountIf expression
31832 "COUNTIF" if args.len() == 1 => {
31833 let arg = args.remove(0);
31834 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
31835 this: arg,
31836 distinct: false,
31837 filter: None,
31838 order_by: vec![],
31839 name: None,
31840 ignore_nulls: None,
31841 having_max: None,
31842 limit: None,
31843 inferred_type: None,
31844 })))
31845 }
31846
31847 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
31848 "EDIT_DISTANCE" => {
31849 // Strip named arguments (max_distance => N) and pass as positional
31850 let mut positional_args: Vec<Expression> = vec![];
31851 for arg in args {
31852 match arg {
31853 Expression::NamedArgument(na) => {
31854 positional_args.push(na.value);
31855 }
31856 other => positional_args.push(other),
31857 }
31858 }
31859 if positional_args.len() >= 2 {
31860 let col1 = positional_args.remove(0);
31861 let col2 = positional_args.remove(0);
31862 let levenshtein = crate::expressions::BinaryFunc {
31863 this: col1,
31864 expression: col2,
31865 original_name: None,
31866 inferred_type: None,
31867 };
31868 // Pass extra args through a function wrapper with all args
31869 if !positional_args.is_empty() {
31870 let max_dist = positional_args.remove(0);
31871 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
31872 if matches!(target, DialectType::DuckDB) {
31873 let lev = Expression::Function(Box::new(Function::new(
31874 "LEVENSHTEIN".to_string(),
31875 vec![levenshtein.this, levenshtein.expression],
31876 )));
31877 let lev_is_null =
31878 Expression::IsNull(Box::new(crate::expressions::IsNull {
31879 this: lev.clone(),
31880 not: false,
31881 postfix_form: false,
31882 }));
31883 let max_is_null =
31884 Expression::IsNull(Box::new(crate::expressions::IsNull {
31885 this: max_dist.clone(),
31886 not: false,
31887 postfix_form: false,
31888 }));
31889 let null_check =
31890 Expression::Or(Box::new(crate::expressions::BinaryOp {
31891 left: lev_is_null,
31892 right: max_is_null,
31893 left_comments: Vec::new(),
31894 operator_comments: Vec::new(),
31895 trailing_comments: Vec::new(),
31896 inferred_type: None,
31897 }));
31898 let least =
31899 Expression::Least(Box::new(crate::expressions::VarArgFunc {
31900 expressions: vec![lev, max_dist],
31901 original_name: None,
31902 inferred_type: None,
31903 }));
31904 return Ok(Expression::Case(Box::new(crate::expressions::Case {
31905 operand: None,
31906 whens: vec![(
31907 null_check,
31908 Expression::Null(crate::expressions::Null),
31909 )],
31910 else_: Some(least),
31911 comments: Vec::new(),
31912 inferred_type: None,
31913 })));
31914 }
31915 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
31916 all_args.extend(positional_args);
31917 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
31918 let func_name = if matches!(target, DialectType::PostgreSQL) {
31919 "LEVENSHTEIN_LESS_EQUAL"
31920 } else {
31921 "LEVENSHTEIN"
31922 };
31923 return Ok(Expression::Function(Box::new(Function::new(
31924 func_name.to_string(),
31925 all_args,
31926 ))));
31927 }
31928 Ok(Expression::Levenshtein(Box::new(levenshtein)))
31929 } else {
31930 Ok(Expression::Function(Box::new(Function::new(
31931 "EDIT_DISTANCE".to_string(),
31932 positional_args,
31933 ))))
31934 }
31935 }
31936
31937 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
31938 "TIMESTAMP_SECONDS" if args.len() == 1 => {
31939 let arg = args.remove(0);
31940 Ok(Expression::UnixToTime(Box::new(
31941 crate::expressions::UnixToTime {
31942 this: Box::new(arg),
31943 scale: Some(0),
31944 zone: None,
31945 hours: None,
31946 minutes: None,
31947 format: None,
31948 target_type: None,
31949 },
31950 )))
31951 }
31952
31953 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
31954 "TIMESTAMP_MILLIS" if args.len() == 1 => {
31955 let arg = args.remove(0);
31956 Ok(Expression::UnixToTime(Box::new(
31957 crate::expressions::UnixToTime {
31958 this: Box::new(arg),
31959 scale: Some(3),
31960 zone: None,
31961 hours: None,
31962 minutes: None,
31963 format: None,
31964 target_type: None,
31965 },
31966 )))
31967 }
31968
31969 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
31970 "TIMESTAMP_MICROS" if args.len() == 1 => {
31971 let arg = args.remove(0);
31972 Ok(Expression::UnixToTime(Box::new(
31973 crate::expressions::UnixToTime {
31974 this: Box::new(arg),
31975 scale: Some(6),
31976 zone: None,
31977 hours: None,
31978 minutes: None,
31979 format: None,
31980 target_type: None,
31981 },
31982 )))
31983 }
31984
31985 // DIV(x, y) -> IntDiv expression
31986 "DIV" if args.len() == 2 => {
31987 let x = args.remove(0);
31988 let y = args.remove(0);
31989 Ok(Expression::IntDiv(Box::new(
31990 crate::expressions::BinaryFunc {
31991 this: x,
31992 expression: y,
31993 original_name: None,
31994 inferred_type: None,
31995 },
31996 )))
31997 }
31998
31999 // TO_HEX(x) -> target-specific form
32000 "TO_HEX" if args.len() == 1 => {
32001 let arg = args.remove(0);
32002 // Check if inner function already returns hex string in certain targets
32003 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
32004 if matches!(target, DialectType::BigQuery) {
32005 // BQ->BQ: keep as TO_HEX
32006 Ok(Expression::Function(Box::new(Function::new(
32007 "TO_HEX".to_string(),
32008 vec![arg],
32009 ))))
32010 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
32011 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
32012 Ok(arg)
32013 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
32014 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
32015 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
32016 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
32017 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
32018 if let Expression::Function(ref inner_f) = arg {
32019 let inner_args = inner_f.args.clone();
32020 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
32021 "SHA1" => Expression::Function(Box::new(Function::new(
32022 "SHA1_BINARY".to_string(),
32023 inner_args,
32024 ))),
32025 "MD5" => Expression::Function(Box::new(Function::new(
32026 "MD5_BINARY".to_string(),
32027 inner_args,
32028 ))),
32029 "SHA256" => {
32030 let mut a = inner_args;
32031 a.push(Expression::number(256));
32032 Expression::Function(Box::new(Function::new(
32033 "SHA2_BINARY".to_string(),
32034 a,
32035 )))
32036 }
32037 "SHA512" => {
32038 let mut a = inner_args;
32039 a.push(Expression::number(512));
32040 Expression::Function(Box::new(Function::new(
32041 "SHA2_BINARY".to_string(),
32042 a,
32043 )))
32044 }
32045 _ => arg.clone(),
32046 };
32047 Ok(Expression::Function(Box::new(Function::new(
32048 "TO_CHAR".to_string(),
32049 vec![binary_func],
32050 ))))
32051 } else {
32052 let inner = Expression::Function(Box::new(Function::new(
32053 "HEX".to_string(),
32054 vec![arg],
32055 )));
32056 Ok(Expression::Lower(Box::new(
32057 crate::expressions::UnaryFunc::new(inner),
32058 )))
32059 }
32060 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
32061 let inner = Expression::Function(Box::new(Function::new(
32062 "TO_HEX".to_string(),
32063 vec![arg],
32064 )));
32065 Ok(Expression::Lower(Box::new(
32066 crate::expressions::UnaryFunc::new(inner),
32067 )))
32068 } else {
32069 let inner =
32070 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
32071 Ok(Expression::Lower(Box::new(
32072 crate::expressions::UnaryFunc::new(inner),
32073 )))
32074 }
32075 }
32076
32077 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
32078 "LAST_DAY" if args.len() == 2 => {
32079 let date = args.remove(0);
32080 let _unit = args.remove(0); // Strip the unit (MONTH is default)
32081 Ok(Expression::Function(Box::new(Function::new(
32082 "LAST_DAY".to_string(),
32083 vec![date],
32084 ))))
32085 }
32086
32087 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
32088 "GENERATE_ARRAY" => {
32089 let start = args.get(0).cloned();
32090 let end = args.get(1).cloned();
32091 let step = args.get(2).cloned();
32092 Ok(Expression::GenerateSeries(Box::new(
32093 crate::expressions::GenerateSeries {
32094 start: start.map(Box::new),
32095 end: end.map(Box::new),
32096 step: step.map(Box::new),
32097 is_end_exclusive: None,
32098 },
32099 )))
32100 }
32101
32102 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
32103 "GENERATE_TIMESTAMP_ARRAY" => {
32104 let start = args.get(0).cloned();
32105 let end = args.get(1).cloned();
32106 let step = args.get(2).cloned();
32107
32108 if matches!(target, DialectType::DuckDB) {
32109 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
32110 // Only cast string literals - leave columns/expressions as-is
32111 let maybe_cast_ts = |expr: Expression| -> Expression {
32112 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
32113 {
32114 Expression::Cast(Box::new(Cast {
32115 this: expr,
32116 to: DataType::Timestamp {
32117 precision: None,
32118 timezone: false,
32119 },
32120 trailing_comments: vec![],
32121 double_colon_syntax: false,
32122 format: None,
32123 default: None,
32124 inferred_type: None,
32125 }))
32126 } else {
32127 expr
32128 }
32129 };
32130 let cast_start = start.map(maybe_cast_ts);
32131 let cast_end = end.map(maybe_cast_ts);
32132 Ok(Expression::GenerateSeries(Box::new(
32133 crate::expressions::GenerateSeries {
32134 start: cast_start.map(Box::new),
32135 end: cast_end.map(Box::new),
32136 step: step.map(Box::new),
32137 is_end_exclusive: None,
32138 },
32139 )))
32140 } else {
32141 Ok(Expression::GenerateSeries(Box::new(
32142 crate::expressions::GenerateSeries {
32143 start: start.map(Box::new),
32144 end: end.map(Box::new),
32145 step: step.map(Box::new),
32146 is_end_exclusive: None,
32147 },
32148 )))
32149 }
32150 }
32151
32152 // TO_JSON(x) -> target-specific (from Spark/Hive)
32153 "TO_JSON" => {
32154 match target {
32155 DialectType::Presto | DialectType::Trino => {
32156 // JSON_FORMAT(CAST(x AS JSON))
32157 let arg = args
32158 .into_iter()
32159 .next()
32160 .unwrap_or(Expression::Null(crate::expressions::Null));
32161 let cast_json = Expression::Cast(Box::new(Cast {
32162 this: arg,
32163 to: DataType::Custom {
32164 name: "JSON".to_string(),
32165 },
32166 trailing_comments: vec![],
32167 double_colon_syntax: false,
32168 format: None,
32169 default: None,
32170 inferred_type: None,
32171 }));
32172 Ok(Expression::Function(Box::new(Function::new(
32173 "JSON_FORMAT".to_string(),
32174 vec![cast_json],
32175 ))))
32176 }
32177 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
32178 "TO_JSON_STRING".to_string(),
32179 args,
32180 )))),
32181 DialectType::DuckDB => {
32182 // CAST(TO_JSON(x) AS TEXT)
32183 let arg = args
32184 .into_iter()
32185 .next()
32186 .unwrap_or(Expression::Null(crate::expressions::Null));
32187 let to_json = Expression::Function(Box::new(Function::new(
32188 "TO_JSON".to_string(),
32189 vec![arg],
32190 )));
32191 Ok(Expression::Cast(Box::new(Cast {
32192 this: to_json,
32193 to: DataType::Text,
32194 trailing_comments: vec![],
32195 double_colon_syntax: false,
32196 format: None,
32197 default: None,
32198 inferred_type: None,
32199 })))
32200 }
32201 _ => Ok(Expression::Function(Box::new(Function::new(
32202 "TO_JSON".to_string(),
32203 args,
32204 )))),
32205 }
32206 }
32207
32208 // TO_JSON_STRING(x) -> target-specific
32209 "TO_JSON_STRING" => {
32210 match target {
32211 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
32212 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
32213 ),
32214 DialectType::Presto | DialectType::Trino => {
32215 // JSON_FORMAT(CAST(x AS JSON))
32216 let arg = args
32217 .into_iter()
32218 .next()
32219 .unwrap_or(Expression::Null(crate::expressions::Null));
32220 let cast_json = Expression::Cast(Box::new(Cast {
32221 this: arg,
32222 to: DataType::Custom {
32223 name: "JSON".to_string(),
32224 },
32225 trailing_comments: vec![],
32226 double_colon_syntax: false,
32227 format: None,
32228 default: None,
32229 inferred_type: None,
32230 }));
32231 Ok(Expression::Function(Box::new(Function::new(
32232 "JSON_FORMAT".to_string(),
32233 vec![cast_json],
32234 ))))
32235 }
32236 DialectType::DuckDB => {
32237 // CAST(TO_JSON(x) AS TEXT)
32238 let arg = args
32239 .into_iter()
32240 .next()
32241 .unwrap_or(Expression::Null(crate::expressions::Null));
32242 let to_json = Expression::Function(Box::new(Function::new(
32243 "TO_JSON".to_string(),
32244 vec![arg],
32245 )));
32246 Ok(Expression::Cast(Box::new(Cast {
32247 this: to_json,
32248 to: DataType::Text,
32249 trailing_comments: vec![],
32250 double_colon_syntax: false,
32251 format: None,
32252 default: None,
32253 inferred_type: None,
32254 })))
32255 }
32256 DialectType::Snowflake => {
32257 // TO_JSON(x)
32258 Ok(Expression::Function(Box::new(Function::new(
32259 "TO_JSON".to_string(),
32260 args,
32261 ))))
32262 }
32263 _ => Ok(Expression::Function(Box::new(Function::new(
32264 "TO_JSON_STRING".to_string(),
32265 args,
32266 )))),
32267 }
32268 }
32269
32270 // SAFE_ADD(x, y) -> SafeAdd expression
32271 "SAFE_ADD" if args.len() == 2 => {
32272 let x = args.remove(0);
32273 let y = args.remove(0);
32274 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
32275 this: Box::new(x),
32276 expression: Box::new(y),
32277 })))
32278 }
32279
32280 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
32281 "SAFE_SUBTRACT" if args.len() == 2 => {
32282 let x = args.remove(0);
32283 let y = args.remove(0);
32284 Ok(Expression::SafeSubtract(Box::new(
32285 crate::expressions::SafeSubtract {
32286 this: Box::new(x),
32287 expression: Box::new(y),
32288 },
32289 )))
32290 }
32291
32292 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
32293 "SAFE_MULTIPLY" if args.len() == 2 => {
32294 let x = args.remove(0);
32295 let y = args.remove(0);
32296 Ok(Expression::SafeMultiply(Box::new(
32297 crate::expressions::SafeMultiply {
32298 this: Box::new(x),
32299 expression: Box::new(y),
32300 },
32301 )))
32302 }
32303
32304 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
32305 "REGEXP_CONTAINS" if args.len() == 2 => {
32306 let str_expr = args.remove(0);
32307 let pattern = args.remove(0);
32308 Ok(Expression::RegexpLike(Box::new(
32309 crate::expressions::RegexpFunc {
32310 this: str_expr,
32311 pattern,
32312 flags: None,
32313 },
32314 )))
32315 }
32316
32317 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
32318 "CONTAINS_SUBSTR" if args.len() == 2 => {
32319 let a = args.remove(0);
32320 let b = args.remove(0);
32321 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
32322 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
32323 Ok(Expression::Function(Box::new(Function::new(
32324 "CONTAINS".to_string(),
32325 vec![lower_a, lower_b],
32326 ))))
32327 }
32328
32329 // INT64(x) -> CAST(x AS BIGINT)
32330 "INT64" if args.len() == 1 => {
32331 let arg = args.remove(0);
32332 Ok(Expression::Cast(Box::new(Cast {
32333 this: arg,
32334 to: DataType::BigInt { length: None },
32335 trailing_comments: vec![],
32336 double_colon_syntax: false,
32337 format: None,
32338 default: None,
32339 inferred_type: None,
32340 })))
32341 }
32342
32343 // INSTR(str, substr) -> target-specific
32344 "INSTR" if args.len() >= 2 => {
32345 let str_expr = args.remove(0);
32346 let substr = args.remove(0);
32347 if matches!(target, DialectType::Snowflake) {
32348 // CHARINDEX(substr, str)
32349 Ok(Expression::Function(Box::new(Function::new(
32350 "CHARINDEX".to_string(),
32351 vec![substr, str_expr],
32352 ))))
32353 } else if matches!(target, DialectType::BigQuery) {
32354 // Keep as INSTR
32355 Ok(Expression::Function(Box::new(Function::new(
32356 "INSTR".to_string(),
32357 vec![str_expr, substr],
32358 ))))
32359 } else {
32360 // Default: keep as INSTR
32361 Ok(Expression::Function(Box::new(Function::new(
32362 "INSTR".to_string(),
32363 vec![str_expr, substr],
32364 ))))
32365 }
32366 }
32367
32368 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
32369 "DATE_TRUNC" if args.len() == 2 => {
32370 let expr = args.remove(0);
32371 let unit_expr = args.remove(0);
32372 let unit_str = get_unit_str(&unit_expr);
32373
32374 match target {
32375 DialectType::DuckDB
32376 | DialectType::Snowflake
32377 | DialectType::PostgreSQL
32378 | DialectType::Presto
32379 | DialectType::Trino
32380 | DialectType::Databricks
32381 | DialectType::Spark
32382 | DialectType::Redshift
32383 | DialectType::ClickHouse
32384 | DialectType::TSQL => {
32385 // Standard: DATE_TRUNC('UNIT', expr)
32386 Ok(Expression::Function(Box::new(Function::new(
32387 "DATE_TRUNC".to_string(),
32388 vec![
32389 Expression::Literal(Box::new(Literal::String(unit_str))),
32390 expr,
32391 ],
32392 ))))
32393 }
32394 _ => {
32395 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
32396 Ok(Expression::Function(Box::new(Function::new(
32397 "DATE_TRUNC".to_string(),
32398 vec![expr, unit_expr],
32399 ))))
32400 }
32401 }
32402 }
32403
32404 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
32405 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
32406 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
32407 let ts = args.remove(0);
32408 let unit_expr = args.remove(0);
32409 let tz = if !args.is_empty() {
32410 Some(args.remove(0))
32411 } else {
32412 None
32413 };
32414 let unit_str = get_unit_str(&unit_expr);
32415
32416 match target {
32417 DialectType::DuckDB => {
32418 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32419 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
32420 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
32421 let is_coarse = matches!(
32422 unit_str.as_str(),
32423 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
32424 );
32425 // For DATETIME_TRUNC, cast string args to TIMESTAMP
32426 let cast_ts = if name == "DATETIME_TRUNC" {
32427 match ts {
32428 Expression::Literal(ref lit)
32429 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
32430 {
32431 Expression::Cast(Box::new(Cast {
32432 this: ts,
32433 to: DataType::Timestamp {
32434 precision: None,
32435 timezone: false,
32436 },
32437 trailing_comments: vec![],
32438 double_colon_syntax: false,
32439 format: None,
32440 default: None,
32441 inferred_type: None,
32442 }))
32443 }
32444 _ => Self::maybe_cast_ts_to_tz(ts, &name),
32445 }
32446 } else {
32447 Self::maybe_cast_ts_to_tz(ts, &name)
32448 };
32449
32450 if let Some(tz_arg) = tz {
32451 if is_coarse {
32452 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
32453 let at_tz = Expression::AtTimeZone(Box::new(
32454 crate::expressions::AtTimeZone {
32455 this: cast_ts,
32456 zone: tz_arg.clone(),
32457 },
32458 ));
32459 let date_trunc = Expression::Function(Box::new(Function::new(
32460 "DATE_TRUNC".to_string(),
32461 vec![
32462 Expression::Literal(Box::new(Literal::String(unit_str))),
32463 at_tz,
32464 ],
32465 )));
32466 Ok(Expression::AtTimeZone(Box::new(
32467 crate::expressions::AtTimeZone {
32468 this: date_trunc,
32469 zone: tz_arg,
32470 },
32471 )))
32472 } else {
32473 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
32474 Ok(Expression::Function(Box::new(Function::new(
32475 "DATE_TRUNC".to_string(),
32476 vec![
32477 Expression::Literal(Box::new(Literal::String(unit_str))),
32478 cast_ts,
32479 ],
32480 ))))
32481 }
32482 } else {
32483 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32484 Ok(Expression::Function(Box::new(Function::new(
32485 "DATE_TRUNC".to_string(),
32486 vec![
32487 Expression::Literal(Box::new(Literal::String(unit_str))),
32488 cast_ts,
32489 ],
32490 ))))
32491 }
32492 }
32493 DialectType::Databricks | DialectType::Spark => {
32494 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
32495 Ok(Expression::Function(Box::new(Function::new(
32496 "DATE_TRUNC".to_string(),
32497 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
32498 ))))
32499 }
32500 _ => {
32501 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
32502 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
32503 let mut date_trunc_args = vec![unit, ts];
32504 if let Some(tz_arg) = tz {
32505 date_trunc_args.push(tz_arg);
32506 }
32507 Ok(Expression::Function(Box::new(Function::new(
32508 "TIMESTAMP_TRUNC".to_string(),
32509 date_trunc_args,
32510 ))))
32511 }
32512 }
32513 }
32514
32515 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
32516 "TIME" => {
32517 if args.len() == 3 {
32518 // TIME(h, m, s) constructor
32519 match target {
32520 DialectType::TSQL => {
32521 // TIMEFROMPARTS(h, m, s, 0, 0)
32522 args.push(Expression::number(0));
32523 args.push(Expression::number(0));
32524 Ok(Expression::Function(Box::new(Function::new(
32525 "TIMEFROMPARTS".to_string(),
32526 args,
32527 ))))
32528 }
32529 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
32530 "MAKETIME".to_string(),
32531 args,
32532 )))),
32533 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
32534 Function::new("MAKE_TIME".to_string(), args),
32535 ))),
32536 _ => Ok(Expression::Function(Box::new(Function::new(
32537 "TIME".to_string(),
32538 args,
32539 )))),
32540 }
32541 } else if args.len() == 1 {
32542 let arg = args.remove(0);
32543 if matches!(target, DialectType::Spark) {
32544 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
32545 Ok(Expression::Cast(Box::new(Cast {
32546 this: arg,
32547 to: DataType::Timestamp {
32548 timezone: false,
32549 precision: None,
32550 },
32551 trailing_comments: vec![],
32552 double_colon_syntax: false,
32553 format: None,
32554 default: None,
32555 inferred_type: None,
32556 })))
32557 } else {
32558 // Most targets: CAST(x AS TIME)
32559 Ok(Expression::Cast(Box::new(Cast {
32560 this: arg,
32561 to: DataType::Time {
32562 precision: None,
32563 timezone: false,
32564 },
32565 trailing_comments: vec![],
32566 double_colon_syntax: false,
32567 format: None,
32568 default: None,
32569 inferred_type: None,
32570 })))
32571 }
32572 } else if args.len() == 2 {
32573 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
32574 let expr = args.remove(0);
32575 let tz = args.remove(0);
32576 let cast_tstz = Expression::Cast(Box::new(Cast {
32577 this: expr,
32578 to: DataType::Timestamp {
32579 timezone: true,
32580 precision: None,
32581 },
32582 trailing_comments: vec![],
32583 double_colon_syntax: false,
32584 format: None,
32585 default: None,
32586 inferred_type: None,
32587 }));
32588 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32589 this: cast_tstz,
32590 zone: tz,
32591 }));
32592 Ok(Expression::Cast(Box::new(Cast {
32593 this: at_tz,
32594 to: DataType::Time {
32595 precision: None,
32596 timezone: false,
32597 },
32598 trailing_comments: vec![],
32599 double_colon_syntax: false,
32600 format: None,
32601 default: None,
32602 inferred_type: None,
32603 })))
32604 } else {
32605 Ok(Expression::Function(Box::new(Function::new(
32606 "TIME".to_string(),
32607 args,
32608 ))))
32609 }
32610 }
32611
32612 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
32613 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32614 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
32615 // DATETIME(y, m, d, h, min, s) -> target-specific
32616 "DATETIME" => {
32617 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
32618 if matches!(target, DialectType::BigQuery) {
32619 if args.len() == 2 {
32620 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32621 if has_time_literal {
32622 let first = args.remove(0);
32623 let second = args.remove(0);
32624 let time_as_cast = match second {
32625 Expression::Literal(lit)
32626 if matches!(lit.as_ref(), Literal::Time(_)) =>
32627 {
32628 let Literal::Time(s) = lit.as_ref() else {
32629 unreachable!()
32630 };
32631 Expression::Cast(Box::new(Cast {
32632 this: Expression::Literal(Box::new(Literal::String(
32633 s.clone(),
32634 ))),
32635 to: DataType::Time {
32636 precision: None,
32637 timezone: false,
32638 },
32639 trailing_comments: vec![],
32640 double_colon_syntax: false,
32641 format: None,
32642 default: None,
32643 inferred_type: None,
32644 }))
32645 }
32646 other => other,
32647 };
32648 return Ok(Expression::Function(Box::new(Function::new(
32649 "DATETIME".to_string(),
32650 vec![first, time_as_cast],
32651 ))));
32652 }
32653 }
32654 return Ok(Expression::Function(Box::new(Function::new(
32655 "DATETIME".to_string(),
32656 args,
32657 ))));
32658 }
32659
32660 if args.len() == 1 {
32661 let arg = args.remove(0);
32662 Ok(Expression::Cast(Box::new(Cast {
32663 this: arg,
32664 to: DataType::Timestamp {
32665 timezone: false,
32666 precision: None,
32667 },
32668 trailing_comments: vec![],
32669 double_colon_syntax: false,
32670 format: None,
32671 default: None,
32672 inferred_type: None,
32673 })))
32674 } else if args.len() == 2 {
32675 let first = args.remove(0);
32676 let second = args.remove(0);
32677 // Check if second arg is a TIME literal
32678 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32679 if is_time_literal {
32680 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32681 let cast_date = Expression::Cast(Box::new(Cast {
32682 this: first,
32683 to: DataType::Date,
32684 trailing_comments: vec![],
32685 double_colon_syntax: false,
32686 format: None,
32687 default: None,
32688 inferred_type: None,
32689 }));
32690 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
32691 let time_as_string = match second {
32692 Expression::Literal(lit)
32693 if matches!(lit.as_ref(), Literal::Time(_)) =>
32694 {
32695 let Literal::Time(s) = lit.as_ref() else {
32696 unreachable!()
32697 };
32698 Expression::Literal(Box::new(Literal::String(s.clone())))
32699 }
32700 other => other,
32701 };
32702 let cast_time = Expression::Cast(Box::new(Cast {
32703 this: time_as_string,
32704 to: DataType::Time {
32705 precision: None,
32706 timezone: false,
32707 },
32708 trailing_comments: vec![],
32709 double_colon_syntax: false,
32710 format: None,
32711 default: None,
32712 inferred_type: None,
32713 }));
32714 let add_expr =
32715 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
32716 Ok(Expression::Cast(Box::new(Cast {
32717 this: add_expr,
32718 to: DataType::Timestamp {
32719 timezone: false,
32720 precision: None,
32721 },
32722 trailing_comments: vec![],
32723 double_colon_syntax: false,
32724 format: None,
32725 default: None,
32726 inferred_type: None,
32727 })))
32728 } else {
32729 // DATETIME('string', 'timezone')
32730 let cast_tstz = Expression::Cast(Box::new(Cast {
32731 this: first,
32732 to: DataType::Timestamp {
32733 timezone: true,
32734 precision: None,
32735 },
32736 trailing_comments: vec![],
32737 double_colon_syntax: false,
32738 format: None,
32739 default: None,
32740 inferred_type: None,
32741 }));
32742 let at_tz =
32743 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32744 this: cast_tstz,
32745 zone: second,
32746 }));
32747 Ok(Expression::Cast(Box::new(Cast {
32748 this: at_tz,
32749 to: DataType::Timestamp {
32750 timezone: false,
32751 precision: None,
32752 },
32753 trailing_comments: vec![],
32754 double_colon_syntax: false,
32755 format: None,
32756 default: None,
32757 inferred_type: None,
32758 })))
32759 }
32760 } else if args.len() >= 3 {
32761 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
32762 // For other targets, use MAKE_TIMESTAMP or similar
32763 if matches!(target, DialectType::Snowflake) {
32764 Ok(Expression::Function(Box::new(Function::new(
32765 "TIMESTAMP_FROM_PARTS".to_string(),
32766 args,
32767 ))))
32768 } else {
32769 Ok(Expression::Function(Box::new(Function::new(
32770 "DATETIME".to_string(),
32771 args,
32772 ))))
32773 }
32774 } else {
32775 Ok(Expression::Function(Box::new(Function::new(
32776 "DATETIME".to_string(),
32777 args,
32778 ))))
32779 }
32780 }
32781
32782 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
32783 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
32784 "TIMESTAMP" => {
32785 if args.len() == 1 {
32786 let arg = args.remove(0);
32787 Ok(Expression::Cast(Box::new(Cast {
32788 this: arg,
32789 to: DataType::Timestamp {
32790 timezone: true,
32791 precision: None,
32792 },
32793 trailing_comments: vec![],
32794 double_colon_syntax: false,
32795 format: None,
32796 default: None,
32797 inferred_type: None,
32798 })))
32799 } else if args.len() == 2 {
32800 let arg = args.remove(0);
32801 let tz = args.remove(0);
32802 let cast_ts = Expression::Cast(Box::new(Cast {
32803 this: arg,
32804 to: DataType::Timestamp {
32805 timezone: false,
32806 precision: None,
32807 },
32808 trailing_comments: vec![],
32809 double_colon_syntax: false,
32810 format: None,
32811 default: None,
32812 inferred_type: None,
32813 }));
32814 if matches!(target, DialectType::Snowflake) {
32815 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
32816 Ok(Expression::Function(Box::new(Function::new(
32817 "CONVERT_TIMEZONE".to_string(),
32818 vec![tz, cast_ts],
32819 ))))
32820 } else {
32821 Ok(Expression::AtTimeZone(Box::new(
32822 crate::expressions::AtTimeZone {
32823 this: cast_ts,
32824 zone: tz,
32825 },
32826 )))
32827 }
32828 } else {
32829 Ok(Expression::Function(Box::new(Function::new(
32830 "TIMESTAMP".to_string(),
32831 args,
32832 ))))
32833 }
32834 }
32835
32836 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
32837 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
32838 "STRING" => {
32839 if args.len() == 1 {
32840 let arg = args.remove(0);
32841 let cast_type = match target {
32842 DialectType::DuckDB => DataType::Text,
32843 _ => DataType::VarChar {
32844 length: None,
32845 parenthesized_length: false,
32846 },
32847 };
32848 Ok(Expression::Cast(Box::new(Cast {
32849 this: arg,
32850 to: cast_type,
32851 trailing_comments: vec![],
32852 double_colon_syntax: false,
32853 format: None,
32854 default: None,
32855 inferred_type: None,
32856 })))
32857 } else if args.len() == 2 {
32858 let arg = args.remove(0);
32859 let tz = args.remove(0);
32860 let cast_type = match target {
32861 DialectType::DuckDB => DataType::Text,
32862 _ => DataType::VarChar {
32863 length: None,
32864 parenthesized_length: false,
32865 },
32866 };
32867 if matches!(target, DialectType::Snowflake) {
32868 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
32869 let convert_tz = Expression::Function(Box::new(Function::new(
32870 "CONVERT_TIMEZONE".to_string(),
32871 vec![
32872 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
32873 tz,
32874 arg,
32875 ],
32876 )));
32877 Ok(Expression::Cast(Box::new(Cast {
32878 this: convert_tz,
32879 to: cast_type,
32880 trailing_comments: vec![],
32881 double_colon_syntax: false,
32882 format: None,
32883 default: None,
32884 inferred_type: None,
32885 })))
32886 } else {
32887 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
32888 let cast_ts = Expression::Cast(Box::new(Cast {
32889 this: arg,
32890 to: DataType::Timestamp {
32891 timezone: false,
32892 precision: None,
32893 },
32894 trailing_comments: vec![],
32895 double_colon_syntax: false,
32896 format: None,
32897 default: None,
32898 inferred_type: None,
32899 }));
32900 let at_utc =
32901 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32902 this: cast_ts,
32903 zone: Expression::Literal(Box::new(Literal::String(
32904 "UTC".to_string(),
32905 ))),
32906 }));
32907 let at_tz =
32908 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32909 this: at_utc,
32910 zone: tz,
32911 }));
32912 Ok(Expression::Cast(Box::new(Cast {
32913 this: at_tz,
32914 to: cast_type,
32915 trailing_comments: vec![],
32916 double_colon_syntax: false,
32917 format: None,
32918 default: None,
32919 inferred_type: None,
32920 })))
32921 }
32922 } else {
32923 Ok(Expression::Function(Box::new(Function::new(
32924 "STRING".to_string(),
32925 args,
32926 ))))
32927 }
32928 }
32929
32930 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
32931 "UNIX_SECONDS" if args.len() == 1 => {
32932 let ts = args.remove(0);
32933 match target {
32934 DialectType::DuckDB => {
32935 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
32936 let cast_ts = Self::ensure_cast_timestamptz(ts);
32937 let epoch = Expression::Function(Box::new(Function::new(
32938 "EPOCH".to_string(),
32939 vec![cast_ts],
32940 )));
32941 Ok(Expression::Cast(Box::new(Cast {
32942 this: epoch,
32943 to: DataType::BigInt { length: None },
32944 trailing_comments: vec![],
32945 double_colon_syntax: false,
32946 format: None,
32947 default: None,
32948 inferred_type: None,
32949 })))
32950 }
32951 DialectType::Snowflake => {
32952 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
32953 let epoch = Expression::Cast(Box::new(Cast {
32954 this: Expression::Literal(Box::new(Literal::String(
32955 "1970-01-01 00:00:00+00".to_string(),
32956 ))),
32957 to: DataType::Timestamp {
32958 timezone: true,
32959 precision: None,
32960 },
32961 trailing_comments: vec![],
32962 double_colon_syntax: false,
32963 format: None,
32964 default: None,
32965 inferred_type: None,
32966 }));
32967 Ok(Expression::TimestampDiff(Box::new(
32968 crate::expressions::TimestampDiff {
32969 this: Box::new(epoch),
32970 expression: Box::new(ts),
32971 unit: Some("SECONDS".to_string()),
32972 },
32973 )))
32974 }
32975 _ => Ok(Expression::Function(Box::new(Function::new(
32976 "UNIX_SECONDS".to_string(),
32977 vec![ts],
32978 )))),
32979 }
32980 }
32981
32982 "UNIX_MILLIS" if args.len() == 1 => {
32983 let ts = args.remove(0);
32984 match target {
32985 DialectType::DuckDB => {
32986 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
32987 let cast_ts = Self::ensure_cast_timestamptz(ts);
32988 Ok(Expression::Function(Box::new(Function::new(
32989 "EPOCH_MS".to_string(),
32990 vec![cast_ts],
32991 ))))
32992 }
32993 _ => Ok(Expression::Function(Box::new(Function::new(
32994 "UNIX_MILLIS".to_string(),
32995 vec![ts],
32996 )))),
32997 }
32998 }
32999
33000 "UNIX_MICROS" if args.len() == 1 => {
33001 let ts = args.remove(0);
33002 match target {
33003 DialectType::DuckDB => {
33004 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
33005 let cast_ts = Self::ensure_cast_timestamptz(ts);
33006 Ok(Expression::Function(Box::new(Function::new(
33007 "EPOCH_US".to_string(),
33008 vec![cast_ts],
33009 ))))
33010 }
33011 _ => Ok(Expression::Function(Box::new(Function::new(
33012 "UNIX_MICROS".to_string(),
33013 vec![ts],
33014 )))),
33015 }
33016 }
33017
33018 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
33019 "ARRAY_CONCAT" | "LIST_CONCAT" => {
33020 match target {
33021 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33022 // CONCAT(arr1, arr2, ...)
33023 Ok(Expression::Function(Box::new(Function::new(
33024 "CONCAT".to_string(),
33025 args,
33026 ))))
33027 }
33028 DialectType::Presto | DialectType::Trino => {
33029 // CONCAT(arr1, arr2, ...)
33030 Ok(Expression::Function(Box::new(Function::new(
33031 "CONCAT".to_string(),
33032 args,
33033 ))))
33034 }
33035 DialectType::Snowflake => {
33036 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33037 if args.len() == 1 {
33038 // ARRAY_CAT requires 2 args, add empty array as []
33039 let empty_arr = Expression::ArrayFunc(Box::new(
33040 crate::expressions::ArrayConstructor {
33041 expressions: vec![],
33042 bracket_notation: true,
33043 use_list_keyword: false,
33044 },
33045 ));
33046 let mut new_args = args;
33047 new_args.push(empty_arr);
33048 Ok(Expression::Function(Box::new(Function::new(
33049 "ARRAY_CAT".to_string(),
33050 new_args,
33051 ))))
33052 } else if args.is_empty() {
33053 Ok(Expression::Function(Box::new(Function::new(
33054 "ARRAY_CAT".to_string(),
33055 args,
33056 ))))
33057 } else {
33058 let mut it = args.into_iter().rev();
33059 let mut result = it.next().unwrap();
33060 for arr in it {
33061 result = Expression::Function(Box::new(Function::new(
33062 "ARRAY_CAT".to_string(),
33063 vec![arr, result],
33064 )));
33065 }
33066 Ok(result)
33067 }
33068 }
33069 DialectType::PostgreSQL => {
33070 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33071 if args.len() <= 1 {
33072 Ok(Expression::Function(Box::new(Function::new(
33073 "ARRAY_CAT".to_string(),
33074 args,
33075 ))))
33076 } else {
33077 let mut it = args.into_iter().rev();
33078 let mut result = it.next().unwrap();
33079 for arr in it {
33080 result = Expression::Function(Box::new(Function::new(
33081 "ARRAY_CAT".to_string(),
33082 vec![arr, result],
33083 )));
33084 }
33085 Ok(result)
33086 }
33087 }
33088 DialectType::Redshift => {
33089 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
33090 if args.len() <= 2 {
33091 Ok(Expression::Function(Box::new(Function::new(
33092 "ARRAY_CONCAT".to_string(),
33093 args,
33094 ))))
33095 } else {
33096 let mut it = args.into_iter().rev();
33097 let mut result = it.next().unwrap();
33098 for arr in it {
33099 result = Expression::Function(Box::new(Function::new(
33100 "ARRAY_CONCAT".to_string(),
33101 vec![arr, result],
33102 )));
33103 }
33104 Ok(result)
33105 }
33106 }
33107 DialectType::DuckDB => {
33108 // LIST_CONCAT supports multiple args natively in DuckDB
33109 Ok(Expression::Function(Box::new(Function::new(
33110 "LIST_CONCAT".to_string(),
33111 args,
33112 ))))
33113 }
33114 _ => Ok(Expression::Function(Box::new(Function::new(
33115 "ARRAY_CONCAT".to_string(),
33116 args,
33117 )))),
33118 }
33119 }
33120
33121 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
33122 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
33123 let arg = args.remove(0);
33124 match target {
33125 DialectType::Snowflake => {
33126 let array_agg =
33127 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
33128 this: arg,
33129 distinct: false,
33130 filter: None,
33131 order_by: vec![],
33132 name: None,
33133 ignore_nulls: None,
33134 having_max: None,
33135 limit: None,
33136 inferred_type: None,
33137 }));
33138 Ok(Expression::Function(Box::new(Function::new(
33139 "ARRAY_FLATTEN".to_string(),
33140 vec![array_agg],
33141 ))))
33142 }
33143 _ => Ok(Expression::Function(Box::new(Function::new(
33144 "ARRAY_CONCAT_AGG".to_string(),
33145 vec![arg],
33146 )))),
33147 }
33148 }
33149
33150 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
33151 "MD5" if args.len() == 1 => {
33152 let arg = args.remove(0);
33153 match target {
33154 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33155 // UNHEX(MD5(x))
33156 let md5 = Expression::Function(Box::new(Function::new(
33157 "MD5".to_string(),
33158 vec![arg],
33159 )));
33160 Ok(Expression::Function(Box::new(Function::new(
33161 "UNHEX".to_string(),
33162 vec![md5],
33163 ))))
33164 }
33165 DialectType::Snowflake => {
33166 // MD5_BINARY(x)
33167 Ok(Expression::Function(Box::new(Function::new(
33168 "MD5_BINARY".to_string(),
33169 vec![arg],
33170 ))))
33171 }
33172 _ => Ok(Expression::Function(Box::new(Function::new(
33173 "MD5".to_string(),
33174 vec![arg],
33175 )))),
33176 }
33177 }
33178
33179 "SHA1" if args.len() == 1 => {
33180 let arg = args.remove(0);
33181 match target {
33182 DialectType::DuckDB => {
33183 // UNHEX(SHA1(x))
33184 let sha1 = Expression::Function(Box::new(Function::new(
33185 "SHA1".to_string(),
33186 vec![arg],
33187 )));
33188 Ok(Expression::Function(Box::new(Function::new(
33189 "UNHEX".to_string(),
33190 vec![sha1],
33191 ))))
33192 }
33193 _ => Ok(Expression::Function(Box::new(Function::new(
33194 "SHA1".to_string(),
33195 vec![arg],
33196 )))),
33197 }
33198 }
33199
33200 "SHA256" if args.len() == 1 => {
33201 let arg = args.remove(0);
33202 match target {
33203 DialectType::DuckDB => {
33204 // UNHEX(SHA256(x))
33205 let sha = Expression::Function(Box::new(Function::new(
33206 "SHA256".to_string(),
33207 vec![arg],
33208 )));
33209 Ok(Expression::Function(Box::new(Function::new(
33210 "UNHEX".to_string(),
33211 vec![sha],
33212 ))))
33213 }
33214 DialectType::Snowflake => {
33215 // SHA2_BINARY(x, 256)
33216 Ok(Expression::Function(Box::new(Function::new(
33217 "SHA2_BINARY".to_string(),
33218 vec![arg, Expression::number(256)],
33219 ))))
33220 }
33221 DialectType::Redshift | DialectType::Spark => {
33222 // SHA2(x, 256)
33223 Ok(Expression::Function(Box::new(Function::new(
33224 "SHA2".to_string(),
33225 vec![arg, Expression::number(256)],
33226 ))))
33227 }
33228 _ => Ok(Expression::Function(Box::new(Function::new(
33229 "SHA256".to_string(),
33230 vec![arg],
33231 )))),
33232 }
33233 }
33234
33235 "SHA512" if args.len() == 1 => {
33236 let arg = args.remove(0);
33237 match target {
33238 DialectType::Snowflake => {
33239 // SHA2_BINARY(x, 512)
33240 Ok(Expression::Function(Box::new(Function::new(
33241 "SHA2_BINARY".to_string(),
33242 vec![arg, Expression::number(512)],
33243 ))))
33244 }
33245 DialectType::Redshift | DialectType::Spark => {
33246 // SHA2(x, 512)
33247 Ok(Expression::Function(Box::new(Function::new(
33248 "SHA2".to_string(),
33249 vec![arg, Expression::number(512)],
33250 ))))
33251 }
33252 _ => Ok(Expression::Function(Box::new(Function::new(
33253 "SHA512".to_string(),
33254 vec![arg],
33255 )))),
33256 }
33257 }
33258
33259 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
33260 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
33261 let str_expr = args.remove(0);
33262 let pattern = args.remove(0);
33263
33264 // Check if pattern contains capturing groups (parentheses)
33265 let has_groups = match &pattern {
33266 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
33267 let Literal::String(s) = lit.as_ref() else {
33268 unreachable!()
33269 };
33270 s.contains('(') && s.contains(')')
33271 }
33272 _ => false,
33273 };
33274
33275 match target {
33276 DialectType::DuckDB => {
33277 let group = if has_groups {
33278 Expression::number(1)
33279 } else {
33280 Expression::number(0)
33281 };
33282 Ok(Expression::Function(Box::new(Function::new(
33283 "REGEXP_EXTRACT_ALL".to_string(),
33284 vec![str_expr, pattern, group],
33285 ))))
33286 }
33287 DialectType::Spark | DialectType::Databricks => {
33288 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
33289 if has_groups {
33290 Ok(Expression::Function(Box::new(Function::new(
33291 "REGEXP_EXTRACT_ALL".to_string(),
33292 vec![str_expr, pattern],
33293 ))))
33294 } else {
33295 Ok(Expression::Function(Box::new(Function::new(
33296 "REGEXP_EXTRACT_ALL".to_string(),
33297 vec![str_expr, pattern, Expression::number(0)],
33298 ))))
33299 }
33300 }
33301 DialectType::Presto | DialectType::Trino => {
33302 if has_groups {
33303 Ok(Expression::Function(Box::new(Function::new(
33304 "REGEXP_EXTRACT_ALL".to_string(),
33305 vec![str_expr, pattern, Expression::number(1)],
33306 ))))
33307 } else {
33308 Ok(Expression::Function(Box::new(Function::new(
33309 "REGEXP_EXTRACT_ALL".to_string(),
33310 vec![str_expr, pattern],
33311 ))))
33312 }
33313 }
33314 DialectType::Snowflake => {
33315 if has_groups {
33316 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
33317 Ok(Expression::Function(Box::new(Function::new(
33318 "REGEXP_EXTRACT_ALL".to_string(),
33319 vec![
33320 str_expr,
33321 pattern,
33322 Expression::number(1),
33323 Expression::number(1),
33324 Expression::Literal(Box::new(Literal::String("c".to_string()))),
33325 Expression::number(1),
33326 ],
33327 ))))
33328 } else {
33329 Ok(Expression::Function(Box::new(Function::new(
33330 "REGEXP_EXTRACT_ALL".to_string(),
33331 vec![str_expr, pattern],
33332 ))))
33333 }
33334 }
33335 _ => Ok(Expression::Function(Box::new(Function::new(
33336 "REGEXP_EXTRACT_ALL".to_string(),
33337 vec![str_expr, pattern],
33338 )))),
33339 }
33340 }
33341
33342 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
33343 "MOD" if args.len() == 2 => {
33344 match target {
33345 DialectType::PostgreSQL
33346 | DialectType::DuckDB
33347 | DialectType::Presto
33348 | DialectType::Trino
33349 | DialectType::Athena
33350 | DialectType::Snowflake => {
33351 let x = args.remove(0);
33352 let y = args.remove(0);
33353 // Wrap complex expressions in parens to preserve precedence
33354 let needs_paren = |e: &Expression| {
33355 matches!(
33356 e,
33357 Expression::Add(_)
33358 | Expression::Sub(_)
33359 | Expression::Mul(_)
33360 | Expression::Div(_)
33361 )
33362 };
33363 let x = if needs_paren(&x) {
33364 Expression::Paren(Box::new(crate::expressions::Paren {
33365 this: x,
33366 trailing_comments: vec![],
33367 }))
33368 } else {
33369 x
33370 };
33371 let y = if needs_paren(&y) {
33372 Expression::Paren(Box::new(crate::expressions::Paren {
33373 this: y,
33374 trailing_comments: vec![],
33375 }))
33376 } else {
33377 y
33378 };
33379 Ok(Expression::Mod(Box::new(
33380 crate::expressions::BinaryOp::new(x, y),
33381 )))
33382 }
33383 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
33384 // Hive/Spark: a % b
33385 let x = args.remove(0);
33386 let y = args.remove(0);
33387 let needs_paren = |e: &Expression| {
33388 matches!(
33389 e,
33390 Expression::Add(_)
33391 | Expression::Sub(_)
33392 | Expression::Mul(_)
33393 | Expression::Div(_)
33394 )
33395 };
33396 let x = if needs_paren(&x) {
33397 Expression::Paren(Box::new(crate::expressions::Paren {
33398 this: x,
33399 trailing_comments: vec![],
33400 }))
33401 } else {
33402 x
33403 };
33404 let y = if needs_paren(&y) {
33405 Expression::Paren(Box::new(crate::expressions::Paren {
33406 this: y,
33407 trailing_comments: vec![],
33408 }))
33409 } else {
33410 y
33411 };
33412 Ok(Expression::Mod(Box::new(
33413 crate::expressions::BinaryOp::new(x, y),
33414 )))
33415 }
33416 _ => Ok(Expression::Function(Box::new(Function::new(
33417 "MOD".to_string(),
33418 args,
33419 )))),
33420 }
33421 }
33422
33423 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
33424 "ARRAY_FILTER" if args.len() == 2 => {
33425 let name = match target {
33426 DialectType::DuckDB => "LIST_FILTER",
33427 DialectType::StarRocks => "ARRAY_FILTER",
33428 _ => "FILTER",
33429 };
33430 Ok(Expression::Function(Box::new(Function::new(
33431 name.to_string(),
33432 args,
33433 ))))
33434 }
33435 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
33436 "FILTER" if args.len() == 2 => {
33437 let name = match target {
33438 DialectType::DuckDB => "LIST_FILTER",
33439 DialectType::StarRocks => "ARRAY_FILTER",
33440 _ => "FILTER",
33441 };
33442 Ok(Expression::Function(Box::new(Function::new(
33443 name.to_string(),
33444 args,
33445 ))))
33446 }
33447 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
33448 "REDUCE" if args.len() >= 3 => {
33449 let name = match target {
33450 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
33451 _ => "REDUCE",
33452 };
33453 Ok(Expression::Function(Box::new(Function::new(
33454 name.to_string(),
33455 args,
33456 ))))
33457 }
33458 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
33459 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
33460 Function::new("ARRAY_REVERSE".to_string(), args),
33461 ))),
33462
33463 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
33464 "CONCAT" if args.len() > 2 => match target {
33465 DialectType::DuckDB => {
33466 let mut it = args.into_iter();
33467 let mut result = it.next().unwrap();
33468 for arg in it {
33469 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
33470 this: Box::new(result),
33471 expression: Box::new(arg),
33472 safe: None,
33473 }));
33474 }
33475 Ok(result)
33476 }
33477 _ => Ok(Expression::Function(Box::new(Function::new(
33478 "CONCAT".to_string(),
33479 args,
33480 )))),
33481 },
33482
33483 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
33484 "GENERATE_DATE_ARRAY" => {
33485 if matches!(target, DialectType::BigQuery) {
33486 // BQ->BQ: add default interval if not present
33487 if args.len() == 2 {
33488 let start = args.remove(0);
33489 let end = args.remove(0);
33490 let default_interval =
33491 Expression::Interval(Box::new(crate::expressions::Interval {
33492 this: Some(Expression::Literal(Box::new(Literal::String(
33493 "1".to_string(),
33494 )))),
33495 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33496 unit: crate::expressions::IntervalUnit::Day,
33497 use_plural: false,
33498 }),
33499 }));
33500 Ok(Expression::Function(Box::new(Function::new(
33501 "GENERATE_DATE_ARRAY".to_string(),
33502 vec![start, end, default_interval],
33503 ))))
33504 } else {
33505 Ok(Expression::Function(Box::new(Function::new(
33506 "GENERATE_DATE_ARRAY".to_string(),
33507 args,
33508 ))))
33509 }
33510 } else if matches!(target, DialectType::DuckDB) {
33511 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
33512 let start = args.get(0).cloned();
33513 let end = args.get(1).cloned();
33514 let step = args.get(2).cloned().or_else(|| {
33515 Some(Expression::Interval(Box::new(
33516 crate::expressions::Interval {
33517 this: Some(Expression::Literal(Box::new(Literal::String(
33518 "1".to_string(),
33519 )))),
33520 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33521 unit: crate::expressions::IntervalUnit::Day,
33522 use_plural: false,
33523 }),
33524 },
33525 )))
33526 });
33527
33528 // Wrap start/end in CAST(... AS DATE) only for string literals
33529 let maybe_cast_date = |expr: Expression| -> Expression {
33530 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33531 {
33532 Expression::Cast(Box::new(Cast {
33533 this: expr,
33534 to: DataType::Date,
33535 trailing_comments: vec![],
33536 double_colon_syntax: false,
33537 format: None,
33538 default: None,
33539 inferred_type: None,
33540 }))
33541 } else {
33542 expr
33543 }
33544 };
33545 let cast_start = start.map(maybe_cast_date);
33546 let cast_end = end.map(maybe_cast_date);
33547
33548 let gen_series =
33549 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
33550 start: cast_start.map(Box::new),
33551 end: cast_end.map(Box::new),
33552 step: step.map(Box::new),
33553 is_end_exclusive: None,
33554 }));
33555
33556 // Wrap in CAST(... AS DATE[])
33557 Ok(Expression::Cast(Box::new(Cast {
33558 this: gen_series,
33559 to: DataType::Array {
33560 element_type: Box::new(DataType::Date),
33561 dimension: None,
33562 },
33563 trailing_comments: vec![],
33564 double_colon_syntax: false,
33565 format: None,
33566 default: None,
33567 inferred_type: None,
33568 })))
33569 } else if matches!(target, DialectType::Snowflake) {
33570 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
33571 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
33572 if args.len() == 2 {
33573 let start = args.remove(0);
33574 let end = args.remove(0);
33575 let default_interval =
33576 Expression::Interval(Box::new(crate::expressions::Interval {
33577 this: Some(Expression::Literal(Box::new(Literal::String(
33578 "1".to_string(),
33579 )))),
33580 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33581 unit: crate::expressions::IntervalUnit::Day,
33582 use_plural: false,
33583 }),
33584 }));
33585 Ok(Expression::Function(Box::new(Function::new(
33586 "GENERATE_DATE_ARRAY".to_string(),
33587 vec![start, end, default_interval],
33588 ))))
33589 } else {
33590 Ok(Expression::Function(Box::new(Function::new(
33591 "GENERATE_DATE_ARRAY".to_string(),
33592 args,
33593 ))))
33594 }
33595 } else {
33596 // Convert to GenerateSeries for other targets
33597 let start = args.get(0).cloned();
33598 let end = args.get(1).cloned();
33599 let step = args.get(2).cloned().or_else(|| {
33600 Some(Expression::Interval(Box::new(
33601 crate::expressions::Interval {
33602 this: Some(Expression::Literal(Box::new(Literal::String(
33603 "1".to_string(),
33604 )))),
33605 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33606 unit: crate::expressions::IntervalUnit::Day,
33607 use_plural: false,
33608 }),
33609 },
33610 )))
33611 });
33612 Ok(Expression::GenerateSeries(Box::new(
33613 crate::expressions::GenerateSeries {
33614 start: start.map(Box::new),
33615 end: end.map(Box::new),
33616 step: step.map(Box::new),
33617 is_end_exclusive: None,
33618 },
33619 )))
33620 }
33621 }
33622
33623 // PARSE_DATE(format, str) -> target-specific
33624 "PARSE_DATE" if args.len() == 2 => {
33625 let format = args.remove(0);
33626 let str_expr = args.remove(0);
33627 match target {
33628 DialectType::DuckDB => {
33629 // CAST(STRPTIME(str, duck_format) AS DATE)
33630 let duck_format = Self::bq_format_to_duckdb(&format);
33631 let strptime = Expression::Function(Box::new(Function::new(
33632 "STRPTIME".to_string(),
33633 vec![str_expr, duck_format],
33634 )));
33635 Ok(Expression::Cast(Box::new(Cast {
33636 this: strptime,
33637 to: DataType::Date,
33638 trailing_comments: vec![],
33639 double_colon_syntax: false,
33640 format: None,
33641 default: None,
33642 inferred_type: None,
33643 })))
33644 }
33645 DialectType::Snowflake => {
33646 // _POLYGLOT_DATE(str, snowflake_format)
33647 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
33648 let sf_format = Self::bq_format_to_snowflake(&format);
33649 Ok(Expression::Function(Box::new(Function::new(
33650 "_POLYGLOT_DATE".to_string(),
33651 vec![str_expr, sf_format],
33652 ))))
33653 }
33654 _ => Ok(Expression::Function(Box::new(Function::new(
33655 "PARSE_DATE".to_string(),
33656 vec![format, str_expr],
33657 )))),
33658 }
33659 }
33660
33661 // PARSE_TIMESTAMP(format, str) -> target-specific
33662 "PARSE_TIMESTAMP" if args.len() >= 2 => {
33663 let format = args.remove(0);
33664 let str_expr = args.remove(0);
33665 let tz = if !args.is_empty() {
33666 Some(args.remove(0))
33667 } else {
33668 None
33669 };
33670 match target {
33671 DialectType::DuckDB => {
33672 let duck_format = Self::bq_format_to_duckdb(&format);
33673 let strptime = Expression::Function(Box::new(Function::new(
33674 "STRPTIME".to_string(),
33675 vec![str_expr, duck_format],
33676 )));
33677 Ok(strptime)
33678 }
33679 _ => {
33680 let mut result_args = vec![format, str_expr];
33681 if let Some(tz_arg) = tz {
33682 result_args.push(tz_arg);
33683 }
33684 Ok(Expression::Function(Box::new(Function::new(
33685 "PARSE_TIMESTAMP".to_string(),
33686 result_args,
33687 ))))
33688 }
33689 }
33690 }
33691
33692 // FORMAT_DATE(format, date) -> target-specific
33693 "FORMAT_DATE" if args.len() == 2 => {
33694 let format = args.remove(0);
33695 let date_expr = args.remove(0);
33696 match target {
33697 DialectType::DuckDB => {
33698 // STRFTIME(CAST(date AS DATE), format)
33699 let cast_date = Expression::Cast(Box::new(Cast {
33700 this: date_expr,
33701 to: DataType::Date,
33702 trailing_comments: vec![],
33703 double_colon_syntax: false,
33704 format: None,
33705 default: None,
33706 inferred_type: None,
33707 }));
33708 Ok(Expression::Function(Box::new(Function::new(
33709 "STRFTIME".to_string(),
33710 vec![cast_date, format],
33711 ))))
33712 }
33713 _ => Ok(Expression::Function(Box::new(Function::new(
33714 "FORMAT_DATE".to_string(),
33715 vec![format, date_expr],
33716 )))),
33717 }
33718 }
33719
33720 // FORMAT_DATETIME(format, datetime) -> target-specific
33721 "FORMAT_DATETIME" if args.len() == 2 => {
33722 let format = args.remove(0);
33723 let dt_expr = args.remove(0);
33724
33725 if matches!(target, DialectType::BigQuery) {
33726 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
33727 let norm_format = Self::bq_format_normalize_bq(&format);
33728 // Also strip DATETIME keyword from typed literals
33729 let norm_dt = match dt_expr {
33730 Expression::Literal(lit)
33731 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
33732 {
33733 let Literal::Timestamp(s) = lit.as_ref() else {
33734 unreachable!()
33735 };
33736 Expression::Cast(Box::new(Cast {
33737 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
33738 to: DataType::Custom {
33739 name: "DATETIME".to_string(),
33740 },
33741 trailing_comments: vec![],
33742 double_colon_syntax: false,
33743 format: None,
33744 default: None,
33745 inferred_type: None,
33746 }))
33747 }
33748 other => other,
33749 };
33750 return Ok(Expression::Function(Box::new(Function::new(
33751 "FORMAT_DATETIME".to_string(),
33752 vec![norm_format, norm_dt],
33753 ))));
33754 }
33755
33756 match target {
33757 DialectType::DuckDB => {
33758 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
33759 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
33760 let duck_format = Self::bq_format_to_duckdb(&format);
33761 Ok(Expression::Function(Box::new(Function::new(
33762 "STRFTIME".to_string(),
33763 vec![cast_dt, duck_format],
33764 ))))
33765 }
33766 _ => Ok(Expression::Function(Box::new(Function::new(
33767 "FORMAT_DATETIME".to_string(),
33768 vec![format, dt_expr],
33769 )))),
33770 }
33771 }
33772
33773 // FORMAT_TIMESTAMP(format, ts) -> target-specific
33774 "FORMAT_TIMESTAMP" if args.len() == 2 => {
33775 let format = args.remove(0);
33776 let ts_expr = args.remove(0);
33777 match target {
33778 DialectType::DuckDB => {
33779 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
33780 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33781 let cast_ts = Expression::Cast(Box::new(Cast {
33782 this: cast_tstz,
33783 to: DataType::Timestamp {
33784 timezone: false,
33785 precision: None,
33786 },
33787 trailing_comments: vec![],
33788 double_colon_syntax: false,
33789 format: None,
33790 default: None,
33791 inferred_type: None,
33792 }));
33793 Ok(Expression::Function(Box::new(Function::new(
33794 "STRFTIME".to_string(),
33795 vec![cast_ts, format],
33796 ))))
33797 }
33798 DialectType::Snowflake => {
33799 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
33800 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33801 let cast_ts = Expression::Cast(Box::new(Cast {
33802 this: cast_tstz,
33803 to: DataType::Timestamp {
33804 timezone: false,
33805 precision: None,
33806 },
33807 trailing_comments: vec![],
33808 double_colon_syntax: false,
33809 format: None,
33810 default: None,
33811 inferred_type: None,
33812 }));
33813 let sf_format = Self::bq_format_to_snowflake(&format);
33814 Ok(Expression::Function(Box::new(Function::new(
33815 "TO_CHAR".to_string(),
33816 vec![cast_ts, sf_format],
33817 ))))
33818 }
33819 _ => Ok(Expression::Function(Box::new(Function::new(
33820 "FORMAT_TIMESTAMP".to_string(),
33821 vec![format, ts_expr],
33822 )))),
33823 }
33824 }
33825
33826 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
33827 "UNIX_DATE" if args.len() == 1 => {
33828 let date = args.remove(0);
33829 match target {
33830 DialectType::DuckDB => {
33831 let epoch = Expression::Cast(Box::new(Cast {
33832 this: Expression::Literal(Box::new(Literal::String(
33833 "1970-01-01".to_string(),
33834 ))),
33835 to: DataType::Date,
33836 trailing_comments: vec![],
33837 double_colon_syntax: false,
33838 format: None,
33839 default: None,
33840 inferred_type: None,
33841 }));
33842 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
33843 // Need to convert DATE literal to CAST
33844 let norm_date = Self::date_literal_to_cast(date);
33845 Ok(Expression::Function(Box::new(Function::new(
33846 "DATE_DIFF".to_string(),
33847 vec![
33848 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
33849 epoch,
33850 norm_date,
33851 ],
33852 ))))
33853 }
33854 _ => Ok(Expression::Function(Box::new(Function::new(
33855 "UNIX_DATE".to_string(),
33856 vec![date],
33857 )))),
33858 }
33859 }
33860
33861 // UNIX_SECONDS(ts) -> target-specific
33862 "UNIX_SECONDS" if args.len() == 1 => {
33863 let ts = args.remove(0);
33864 match target {
33865 DialectType::DuckDB => {
33866 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
33867 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33868 let epoch = Expression::Function(Box::new(Function::new(
33869 "EPOCH".to_string(),
33870 vec![norm_ts],
33871 )));
33872 Ok(Expression::Cast(Box::new(Cast {
33873 this: epoch,
33874 to: DataType::BigInt { length: None },
33875 trailing_comments: vec![],
33876 double_colon_syntax: false,
33877 format: None,
33878 default: None,
33879 inferred_type: None,
33880 })))
33881 }
33882 DialectType::Snowflake => {
33883 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
33884 let epoch = Expression::Cast(Box::new(Cast {
33885 this: Expression::Literal(Box::new(Literal::String(
33886 "1970-01-01 00:00:00+00".to_string(),
33887 ))),
33888 to: DataType::Timestamp {
33889 timezone: true,
33890 precision: None,
33891 },
33892 trailing_comments: vec![],
33893 double_colon_syntax: false,
33894 format: None,
33895 default: None,
33896 inferred_type: None,
33897 }));
33898 Ok(Expression::Function(Box::new(Function::new(
33899 "TIMESTAMPDIFF".to_string(),
33900 vec![
33901 Expression::Identifier(Identifier::new("SECONDS".to_string())),
33902 epoch,
33903 ts,
33904 ],
33905 ))))
33906 }
33907 _ => Ok(Expression::Function(Box::new(Function::new(
33908 "UNIX_SECONDS".to_string(),
33909 vec![ts],
33910 )))),
33911 }
33912 }
33913
33914 // UNIX_MILLIS(ts) -> target-specific
33915 "UNIX_MILLIS" if args.len() == 1 => {
33916 let ts = args.remove(0);
33917 match target {
33918 DialectType::DuckDB => {
33919 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33920 Ok(Expression::Function(Box::new(Function::new(
33921 "EPOCH_MS".to_string(),
33922 vec![norm_ts],
33923 ))))
33924 }
33925 _ => Ok(Expression::Function(Box::new(Function::new(
33926 "UNIX_MILLIS".to_string(),
33927 vec![ts],
33928 )))),
33929 }
33930 }
33931
33932 // UNIX_MICROS(ts) -> target-specific
33933 "UNIX_MICROS" if args.len() == 1 => {
33934 let ts = args.remove(0);
33935 match target {
33936 DialectType::DuckDB => {
33937 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33938 Ok(Expression::Function(Box::new(Function::new(
33939 "EPOCH_US".to_string(),
33940 vec![norm_ts],
33941 ))))
33942 }
33943 _ => Ok(Expression::Function(Box::new(Function::new(
33944 "UNIX_MICROS".to_string(),
33945 vec![ts],
33946 )))),
33947 }
33948 }
33949
33950 // INSTR(str, substr) -> target-specific
33951 "INSTR" => {
33952 if matches!(target, DialectType::BigQuery) {
33953 // BQ->BQ: keep as INSTR
33954 Ok(Expression::Function(Box::new(Function::new(
33955 "INSTR".to_string(),
33956 args,
33957 ))))
33958 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
33959 // Snowflake: CHARINDEX(substr, str) - swap args
33960 let str_expr = args.remove(0);
33961 let substr = args.remove(0);
33962 Ok(Expression::Function(Box::new(Function::new(
33963 "CHARINDEX".to_string(),
33964 vec![substr, str_expr],
33965 ))))
33966 } else {
33967 // Keep as INSTR for other targets
33968 Ok(Expression::Function(Box::new(Function::new(
33969 "INSTR".to_string(),
33970 args,
33971 ))))
33972 }
33973 }
33974
33975 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
33976 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
33977 if matches!(target, DialectType::BigQuery) {
33978 // BQ->BQ: always output with parens (function form), keep any timezone arg
33979 Ok(Expression::Function(Box::new(Function::new(name, args))))
33980 } else if name == "CURRENT_DATE" && args.len() == 1 {
33981 // CURRENT_DATE('UTC') - has timezone arg
33982 let tz_arg = args.remove(0);
33983 match target {
33984 DialectType::DuckDB => {
33985 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
33986 let ct = Expression::CurrentTimestamp(
33987 crate::expressions::CurrentTimestamp {
33988 precision: None,
33989 sysdate: false,
33990 },
33991 );
33992 let at_tz =
33993 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
33994 this: ct,
33995 zone: tz_arg,
33996 }));
33997 Ok(Expression::Cast(Box::new(Cast {
33998 this: at_tz,
33999 to: DataType::Date,
34000 trailing_comments: vec![],
34001 double_colon_syntax: false,
34002 format: None,
34003 default: None,
34004 inferred_type: None,
34005 })))
34006 }
34007 DialectType::Snowflake => {
34008 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
34009 let ct = Expression::Function(Box::new(Function::new(
34010 "CURRENT_TIMESTAMP".to_string(),
34011 vec![],
34012 )));
34013 let convert = Expression::Function(Box::new(Function::new(
34014 "CONVERT_TIMEZONE".to_string(),
34015 vec![tz_arg, ct],
34016 )));
34017 Ok(Expression::Cast(Box::new(Cast {
34018 this: convert,
34019 to: DataType::Date,
34020 trailing_comments: vec![],
34021 double_colon_syntax: false,
34022 format: None,
34023 default: None,
34024 inferred_type: None,
34025 })))
34026 }
34027 _ => {
34028 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
34029 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
34030 Ok(Expression::AtTimeZone(Box::new(
34031 crate::expressions::AtTimeZone {
34032 this: cd,
34033 zone: tz_arg,
34034 },
34035 )))
34036 }
34037 }
34038 } else if (name == "CURRENT_TIMESTAMP"
34039 || name == "CURRENT_TIME"
34040 || name == "CURRENT_DATE")
34041 && args.is_empty()
34042 && matches!(
34043 target,
34044 DialectType::PostgreSQL
34045 | DialectType::DuckDB
34046 | DialectType::Presto
34047 | DialectType::Trino
34048 )
34049 {
34050 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
34051 if name == "CURRENT_TIMESTAMP" {
34052 Ok(Expression::CurrentTimestamp(
34053 crate::expressions::CurrentTimestamp {
34054 precision: None,
34055 sysdate: false,
34056 },
34057 ))
34058 } else if name == "CURRENT_DATE" {
34059 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
34060 } else {
34061 // CURRENT_TIME
34062 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
34063 precision: None,
34064 }))
34065 }
34066 } else {
34067 // All other targets: keep as function (with parens)
34068 Ok(Expression::Function(Box::new(Function::new(name, args))))
34069 }
34070 }
34071
34072 // JSON_QUERY(json, path) -> target-specific
34073 "JSON_QUERY" if args.len() == 2 => {
34074 match target {
34075 DialectType::DuckDB | DialectType::SQLite => {
34076 // json -> path syntax
34077 let json_expr = args.remove(0);
34078 let path = args.remove(0);
34079 Ok(Expression::JsonExtract(Box::new(
34080 crate::expressions::JsonExtractFunc {
34081 this: json_expr,
34082 path,
34083 returning: None,
34084 arrow_syntax: true,
34085 hash_arrow_syntax: false,
34086 wrapper_option: None,
34087 quotes_option: None,
34088 on_scalar_string: false,
34089 on_error: None,
34090 },
34091 )))
34092 }
34093 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34094 Ok(Expression::Function(Box::new(Function::new(
34095 "GET_JSON_OBJECT".to_string(),
34096 args,
34097 ))))
34098 }
34099 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
34100 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
34101 )),
34102 _ => Ok(Expression::Function(Box::new(Function::new(
34103 "JSON_QUERY".to_string(),
34104 args,
34105 )))),
34106 }
34107 }
34108
34109 // JSON_VALUE_ARRAY(json, path) -> target-specific
34110 "JSON_VALUE_ARRAY" if args.len() == 2 => {
34111 match target {
34112 DialectType::DuckDB => {
34113 // CAST(json -> path AS TEXT[])
34114 let json_expr = args.remove(0);
34115 let path = args.remove(0);
34116 let arrow = Expression::JsonExtract(Box::new(
34117 crate::expressions::JsonExtractFunc {
34118 this: json_expr,
34119 path,
34120 returning: None,
34121 arrow_syntax: true,
34122 hash_arrow_syntax: false,
34123 wrapper_option: None,
34124 quotes_option: None,
34125 on_scalar_string: false,
34126 on_error: None,
34127 },
34128 ));
34129 Ok(Expression::Cast(Box::new(Cast {
34130 this: arrow,
34131 to: DataType::Array {
34132 element_type: Box::new(DataType::Text),
34133 dimension: None,
34134 },
34135 trailing_comments: vec![],
34136 double_colon_syntax: false,
34137 format: None,
34138 default: None,
34139 inferred_type: None,
34140 })))
34141 }
34142 DialectType::Snowflake => {
34143 let json_expr = args.remove(0);
34144 let path_expr = args.remove(0);
34145 // Convert JSON path from $.path to just path
34146 let sf_path = if let Expression::Literal(ref lit) = path_expr {
34147 if let Literal::String(ref s) = lit.as_ref() {
34148 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
34149 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
34150 } else {
34151 path_expr.clone()
34152 }
34153 } else {
34154 path_expr
34155 };
34156 let parse_json = Expression::Function(Box::new(Function::new(
34157 "PARSE_JSON".to_string(),
34158 vec![json_expr],
34159 )));
34160 let get_path = Expression::Function(Box::new(Function::new(
34161 "GET_PATH".to_string(),
34162 vec![parse_json, sf_path],
34163 )));
34164 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
34165 let cast_expr = Expression::Cast(Box::new(Cast {
34166 this: Expression::Identifier(Identifier::new("x")),
34167 to: DataType::VarChar {
34168 length: None,
34169 parenthesized_length: false,
34170 },
34171 trailing_comments: vec![],
34172 double_colon_syntax: false,
34173 format: None,
34174 default: None,
34175 inferred_type: None,
34176 }));
34177 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34178 parameters: vec![Identifier::new("x")],
34179 body: cast_expr,
34180 colon: false,
34181 parameter_types: vec![],
34182 }));
34183 Ok(Expression::Function(Box::new(Function::new(
34184 "TRANSFORM".to_string(),
34185 vec![get_path, lambda],
34186 ))))
34187 }
34188 _ => Ok(Expression::Function(Box::new(Function::new(
34189 "JSON_VALUE_ARRAY".to_string(),
34190 args,
34191 )))),
34192 }
34193 }
34194
34195 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
34196 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
34197 // This is different from Hive/Spark where 3rd arg is "group_index"
34198 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
34199 match target {
34200 DialectType::DuckDB
34201 | DialectType::Presto
34202 | DialectType::Trino
34203 | DialectType::Athena => {
34204 if args.len() == 2 {
34205 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
34206 args.push(Expression::number(1));
34207 Ok(Expression::Function(Box::new(Function::new(
34208 "REGEXP_EXTRACT".to_string(),
34209 args,
34210 ))))
34211 } else if args.len() == 3 {
34212 let val = args.remove(0);
34213 let regex = args.remove(0);
34214 let position = args.remove(0);
34215 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34216 if is_pos_1 {
34217 Ok(Expression::Function(Box::new(Function::new(
34218 "REGEXP_EXTRACT".to_string(),
34219 vec![val, regex, Expression::number(1)],
34220 ))))
34221 } else {
34222 let substring_expr = Expression::Function(Box::new(Function::new(
34223 "SUBSTRING".to_string(),
34224 vec![val, position],
34225 )));
34226 let nullif_expr = Expression::Function(Box::new(Function::new(
34227 "NULLIF".to_string(),
34228 vec![
34229 substring_expr,
34230 Expression::Literal(Box::new(Literal::String(
34231 String::new(),
34232 ))),
34233 ],
34234 )));
34235 Ok(Expression::Function(Box::new(Function::new(
34236 "REGEXP_EXTRACT".to_string(),
34237 vec![nullif_expr, regex, Expression::number(1)],
34238 ))))
34239 }
34240 } else if args.len() == 4 {
34241 let val = args.remove(0);
34242 let regex = args.remove(0);
34243 let position = args.remove(0);
34244 let occurrence = args.remove(0);
34245 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34246 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34247 if is_pos_1 && is_occ_1 {
34248 Ok(Expression::Function(Box::new(Function::new(
34249 "REGEXP_EXTRACT".to_string(),
34250 vec![val, regex, Expression::number(1)],
34251 ))))
34252 } else {
34253 let subject = if is_pos_1 {
34254 val
34255 } else {
34256 let substring_expr = Expression::Function(Box::new(
34257 Function::new("SUBSTRING".to_string(), vec![val, position]),
34258 ));
34259 Expression::Function(Box::new(Function::new(
34260 "NULLIF".to_string(),
34261 vec![
34262 substring_expr,
34263 Expression::Literal(Box::new(Literal::String(
34264 String::new(),
34265 ))),
34266 ],
34267 )))
34268 };
34269 let extract_all = Expression::Function(Box::new(Function::new(
34270 "REGEXP_EXTRACT_ALL".to_string(),
34271 vec![subject, regex, Expression::number(1)],
34272 )));
34273 Ok(Expression::Function(Box::new(Function::new(
34274 "ARRAY_EXTRACT".to_string(),
34275 vec![extract_all, occurrence],
34276 ))))
34277 }
34278 } else {
34279 Ok(Expression::Function(Box::new(Function {
34280 name: f.name,
34281 args,
34282 distinct: f.distinct,
34283 trailing_comments: f.trailing_comments,
34284 use_bracket_syntax: f.use_bracket_syntax,
34285 no_parens: f.no_parens,
34286 quoted: f.quoted,
34287 span: None,
34288 inferred_type: None,
34289 })))
34290 }
34291 }
34292 DialectType::Snowflake => {
34293 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
34294 Ok(Expression::Function(Box::new(Function::new(
34295 "REGEXP_SUBSTR".to_string(),
34296 args,
34297 ))))
34298 }
34299 _ => {
34300 // For other targets (Hive/Spark/BigQuery): pass through as-is
34301 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
34302 Ok(Expression::Function(Box::new(Function {
34303 name: f.name,
34304 args,
34305 distinct: f.distinct,
34306 trailing_comments: f.trailing_comments,
34307 use_bracket_syntax: f.use_bracket_syntax,
34308 no_parens: f.no_parens,
34309 quoted: f.quoted,
34310 span: None,
34311 inferred_type: None,
34312 })))
34313 }
34314 }
34315 }
34316
34317 // BigQuery STRUCT(args) -> target-specific struct expression
34318 "STRUCT" => {
34319 // Convert Function args to Struct fields
34320 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
34321 for (i, arg) in args.into_iter().enumerate() {
34322 match arg {
34323 Expression::Alias(a) => {
34324 // Named field: expr AS name
34325 fields.push((Some(a.alias.name.clone()), a.this));
34326 }
34327 other => {
34328 // Unnamed field: for Spark/Hive, keep as None
34329 // For Snowflake, auto-name as _N
34330 // For DuckDB, use column name for column refs, _N for others
34331 if matches!(target, DialectType::Snowflake) {
34332 fields.push((Some(format!("_{}", i)), other));
34333 } else if matches!(target, DialectType::DuckDB) {
34334 let auto_name = match &other {
34335 Expression::Column(col) => col.name.name.clone(),
34336 _ => format!("_{}", i),
34337 };
34338 fields.push((Some(auto_name), other));
34339 } else {
34340 fields.push((None, other));
34341 }
34342 }
34343 }
34344 }
34345
34346 match target {
34347 DialectType::Snowflake => {
34348 // OBJECT_CONSTRUCT('name', value, ...)
34349 let mut oc_args = Vec::new();
34350 for (name, val) in &fields {
34351 if let Some(n) = name {
34352 oc_args.push(Expression::Literal(Box::new(Literal::String(
34353 n.clone(),
34354 ))));
34355 oc_args.push(val.clone());
34356 } else {
34357 oc_args.push(val.clone());
34358 }
34359 }
34360 Ok(Expression::Function(Box::new(Function::new(
34361 "OBJECT_CONSTRUCT".to_string(),
34362 oc_args,
34363 ))))
34364 }
34365 DialectType::DuckDB => {
34366 // {'name': value, ...}
34367 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34368 fields,
34369 })))
34370 }
34371 DialectType::Hive => {
34372 // STRUCT(val1, val2, ...) - strip aliases
34373 let hive_fields: Vec<(Option<String>, Expression)> =
34374 fields.into_iter().map(|(_, v)| (None, v)).collect();
34375 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34376 fields: hive_fields,
34377 })))
34378 }
34379 DialectType::Spark | DialectType::Databricks => {
34380 // Use Expression::Struct to bypass Spark target transform auto-naming
34381 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34382 fields,
34383 })))
34384 }
34385 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
34386 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
34387 let all_named =
34388 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
34389 let all_types_inferable = all_named
34390 && fields
34391 .iter()
34392 .all(|(_, val)| Self::can_infer_presto_type(val));
34393 let row_args: Vec<Expression> =
34394 fields.iter().map(|(_, v)| v.clone()).collect();
34395 let row_expr = Expression::Function(Box::new(Function::new(
34396 "ROW".to_string(),
34397 row_args,
34398 )));
34399 if all_named && all_types_inferable {
34400 // Build ROW type with inferred types
34401 let mut row_type_fields = Vec::new();
34402 for (name, val) in &fields {
34403 if let Some(n) = name {
34404 let type_str = Self::infer_sql_type_for_presto(val);
34405 row_type_fields.push(crate::expressions::StructField::new(
34406 n.clone(),
34407 crate::expressions::DataType::Custom { name: type_str },
34408 ));
34409 }
34410 }
34411 let row_type = crate::expressions::DataType::Struct {
34412 fields: row_type_fields,
34413 nested: true,
34414 };
34415 Ok(Expression::Cast(Box::new(Cast {
34416 this: row_expr,
34417 to: row_type,
34418 trailing_comments: Vec::new(),
34419 double_colon_syntax: false,
34420 format: None,
34421 default: None,
34422 inferred_type: None,
34423 })))
34424 } else {
34425 Ok(row_expr)
34426 }
34427 }
34428 _ => {
34429 // Default: keep as STRUCT function with original args
34430 let mut new_args = Vec::new();
34431 for (name, val) in fields {
34432 if let Some(n) = name {
34433 new_args.push(Expression::Alias(Box::new(
34434 crate::expressions::Alias::new(val, Identifier::new(n)),
34435 )));
34436 } else {
34437 new_args.push(val);
34438 }
34439 }
34440 Ok(Expression::Function(Box::new(Function::new(
34441 "STRUCT".to_string(),
34442 new_args,
34443 ))))
34444 }
34445 }
34446 }
34447
34448 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
34449 "ROUND" if args.len() == 3 => {
34450 let x = args.remove(0);
34451 let n = args.remove(0);
34452 let mode = args.remove(0);
34453 // Check if mode is 'ROUND_HALF_EVEN'
34454 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
34455 if is_half_even && matches!(target, DialectType::DuckDB) {
34456 Ok(Expression::Function(Box::new(Function::new(
34457 "ROUND_EVEN".to_string(),
34458 vec![x, n],
34459 ))))
34460 } else {
34461 // Pass through with all args
34462 Ok(Expression::Function(Box::new(Function::new(
34463 "ROUND".to_string(),
34464 vec![x, n, mode],
34465 ))))
34466 }
34467 }
34468
34469 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
34470 "MAKE_INTERVAL" => {
34471 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
34472 // The positional args are: year, month
34473 // Named args are: day =>, minute =>, etc.
34474 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
34475 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
34476 // For BigQuery->BigQuery: reorder named args (day before minute)
34477 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
34478 let mut parts: Vec<(String, String)> = Vec::new();
34479 let mut pos_idx = 0;
34480 let pos_units = ["year", "month"];
34481 for arg in &args {
34482 if let Expression::NamedArgument(na) = arg {
34483 // Named arg like minute => 5
34484 let unit = na.name.name.clone();
34485 if let Expression::Literal(lit) = &na.value {
34486 if let Literal::Number(n) = lit.as_ref() {
34487 parts.push((unit, n.clone()));
34488 }
34489 }
34490 } else if pos_idx < pos_units.len() {
34491 if let Expression::Literal(lit) = arg {
34492 if let Literal::Number(n) = lit.as_ref() {
34493 parts.push((pos_units[pos_idx].to_string(), n.clone()));
34494 }
34495 }
34496 pos_idx += 1;
34497 }
34498 }
34499 // Don't sort - preserve original argument order
34500 let separator = if matches!(target, DialectType::Snowflake) {
34501 ", "
34502 } else {
34503 " "
34504 };
34505 let interval_str = parts
34506 .iter()
34507 .map(|(u, v)| format!("{} {}", v, u))
34508 .collect::<Vec<_>>()
34509 .join(separator);
34510 Ok(Expression::Interval(Box::new(
34511 crate::expressions::Interval {
34512 this: Some(Expression::Literal(Box::new(Literal::String(
34513 interval_str,
34514 )))),
34515 unit: None,
34516 },
34517 )))
34518 } else if matches!(target, DialectType::BigQuery) {
34519 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
34520 let mut positional = Vec::new();
34521 let mut named: Vec<(
34522 String,
34523 Expression,
34524 crate::expressions::NamedArgSeparator,
34525 )> = Vec::new();
34526 let _pos_units = ["year", "month"];
34527 let mut _pos_idx = 0;
34528 for arg in args {
34529 if let Expression::NamedArgument(na) = arg {
34530 named.push((na.name.name.clone(), na.value, na.separator));
34531 } else {
34532 positional.push(arg);
34533 _pos_idx += 1;
34534 }
34535 }
34536 // Sort named args by: day, hour, minute, second
34537 let unit_order = |u: &str| -> usize {
34538 match u.to_ascii_lowercase().as_str() {
34539 "day" => 0,
34540 "hour" => 1,
34541 "minute" => 2,
34542 "second" => 3,
34543 _ => 4,
34544 }
34545 };
34546 named.sort_by_key(|(u, _, _)| unit_order(u));
34547 let mut result_args = positional;
34548 for (name, value, sep) in named {
34549 result_args.push(Expression::NamedArgument(Box::new(
34550 crate::expressions::NamedArgument {
34551 name: Identifier::new(&name),
34552 value,
34553 separator: sep,
34554 },
34555 )));
34556 }
34557 Ok(Expression::Function(Box::new(Function::new(
34558 "MAKE_INTERVAL".to_string(),
34559 result_args,
34560 ))))
34561 } else {
34562 Ok(Expression::Function(Box::new(Function::new(
34563 "MAKE_INTERVAL".to_string(),
34564 args,
34565 ))))
34566 }
34567 }
34568
34569 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
34570 "ARRAY_TO_STRING" if args.len() == 3 => {
34571 let arr = args.remove(0);
34572 let sep = args.remove(0);
34573 let null_text = args.remove(0);
34574 match target {
34575 DialectType::DuckDB => {
34576 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
34577 let _lambda_param =
34578 Expression::Identifier(crate::expressions::Identifier::new("x"));
34579 let coalesce =
34580 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
34581 original_name: None,
34582 expressions: vec![
34583 Expression::Identifier(crate::expressions::Identifier::new(
34584 "x",
34585 )),
34586 null_text,
34587 ],
34588 inferred_type: None,
34589 }));
34590 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34591 parameters: vec![crate::expressions::Identifier::new("x")],
34592 body: coalesce,
34593 colon: false,
34594 parameter_types: vec![],
34595 }));
34596 let list_transform = Expression::Function(Box::new(Function::new(
34597 "LIST_TRANSFORM".to_string(),
34598 vec![arr, lambda],
34599 )));
34600 Ok(Expression::Function(Box::new(Function::new(
34601 "ARRAY_TO_STRING".to_string(),
34602 vec![list_transform, sep],
34603 ))))
34604 }
34605 _ => Ok(Expression::Function(Box::new(Function::new(
34606 "ARRAY_TO_STRING".to_string(),
34607 vec![arr, sep, null_text],
34608 )))),
34609 }
34610 }
34611
34612 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
34613 "LENGTH" if args.len() == 1 => {
34614 let arg = args.remove(0);
34615 match target {
34616 DialectType::DuckDB => {
34617 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
34618 let typeof_func = Expression::Function(Box::new(Function::new(
34619 "TYPEOF".to_string(),
34620 vec![arg.clone()],
34621 )));
34622 let blob_cast = Expression::Cast(Box::new(Cast {
34623 this: arg.clone(),
34624 to: DataType::VarBinary { length: None },
34625 trailing_comments: vec![],
34626 double_colon_syntax: false,
34627 format: None,
34628 default: None,
34629 inferred_type: None,
34630 }));
34631 let octet_length = Expression::Function(Box::new(Function::new(
34632 "OCTET_LENGTH".to_string(),
34633 vec![blob_cast],
34634 )));
34635 let text_cast = Expression::Cast(Box::new(Cast {
34636 this: arg,
34637 to: DataType::Text,
34638 trailing_comments: vec![],
34639 double_colon_syntax: false,
34640 format: None,
34641 default: None,
34642 inferred_type: None,
34643 }));
34644 let length_text = Expression::Function(Box::new(Function::new(
34645 "LENGTH".to_string(),
34646 vec![text_cast],
34647 )));
34648 Ok(Expression::Case(Box::new(crate::expressions::Case {
34649 operand: Some(typeof_func),
34650 whens: vec![(
34651 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
34652 octet_length,
34653 )],
34654 else_: Some(length_text),
34655 comments: Vec::new(),
34656 inferred_type: None,
34657 })))
34658 }
34659 _ => Ok(Expression::Function(Box::new(Function::new(
34660 "LENGTH".to_string(),
34661 vec![arg],
34662 )))),
34663 }
34664 }
34665
34666 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
34667 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
34668 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
34669 // The args should be [x, fraction] with the null handling stripped
34670 // For DuckDB: QUANTILE_CONT(x, fraction)
34671 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
34672 match target {
34673 DialectType::DuckDB => {
34674 // Strip down to just 2 args, rename to QUANTILE_CONT
34675 let x = args[0].clone();
34676 let frac = args[1].clone();
34677 Ok(Expression::Function(Box::new(Function::new(
34678 "QUANTILE_CONT".to_string(),
34679 vec![x, frac],
34680 ))))
34681 }
34682 _ => Ok(Expression::Function(Box::new(Function::new(
34683 "PERCENTILE_CONT".to_string(),
34684 args,
34685 )))),
34686 }
34687 }
34688
34689 // All others: pass through
34690 _ => Ok(Expression::Function(Box::new(Function {
34691 name: f.name,
34692 args,
34693 distinct: f.distinct,
34694 trailing_comments: f.trailing_comments,
34695 use_bracket_syntax: f.use_bracket_syntax,
34696 no_parens: f.no_parens,
34697 quoted: f.quoted,
34698 span: None,
34699 inferred_type: None,
34700 }))),
34701 }
34702 }
34703
34704 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
34705 /// Returns false for column references and other non-literal expressions where the type is unknown.
34706 fn can_infer_presto_type(expr: &Expression) -> bool {
34707 match expr {
34708 Expression::Literal(_) => true,
34709 Expression::Boolean(_) => true,
34710 Expression::Array(_) | Expression::ArrayFunc(_) => true,
34711 Expression::Struct(_) | Expression::StructFunc(_) => true,
34712 Expression::Function(f) => {
34713 f.name.eq_ignore_ascii_case("STRUCT")
34714 || f.name.eq_ignore_ascii_case("ROW")
34715 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
34716 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34717 || f.name.eq_ignore_ascii_case("NOW")
34718 }
34719 Expression::Cast(_) => true,
34720 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
34721 _ => false,
34722 }
34723 }
34724
34725 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
34726 fn infer_sql_type_for_presto(expr: &Expression) -> String {
34727 use crate::expressions::Literal;
34728 match expr {
34729 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34730 "VARCHAR".to_string()
34731 }
34732 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34733 let Literal::Number(n) = lit.as_ref() else {
34734 unreachable!()
34735 };
34736 if n.contains('.') {
34737 "DOUBLE".to_string()
34738 } else {
34739 "INTEGER".to_string()
34740 }
34741 }
34742 Expression::Boolean(_) => "BOOLEAN".to_string(),
34743 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
34744 "DATE".to_string()
34745 }
34746 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
34747 "TIMESTAMP".to_string()
34748 }
34749 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
34750 "TIMESTAMP".to_string()
34751 }
34752 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
34753 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
34754 Expression::Function(f) => {
34755 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
34756 "ROW".to_string()
34757 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
34758 "DATE".to_string()
34759 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34760 || f.name.eq_ignore_ascii_case("NOW")
34761 {
34762 "TIMESTAMP".to_string()
34763 } else {
34764 "VARCHAR".to_string()
34765 }
34766 }
34767 Expression::Cast(c) => {
34768 // If already cast, use the target type
34769 Self::data_type_to_presto_string(&c.to)
34770 }
34771 _ => "VARCHAR".to_string(),
34772 }
34773 }
34774
34775 /// Convert a DataType to its Presto/Trino string representation for ROW type
34776 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
34777 use crate::expressions::DataType;
34778 match dt {
34779 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
34780 "VARCHAR".to_string()
34781 }
34782 DataType::Int { .. }
34783 | DataType::BigInt { .. }
34784 | DataType::SmallInt { .. }
34785 | DataType::TinyInt { .. } => "INTEGER".to_string(),
34786 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
34787 DataType::Boolean => "BOOLEAN".to_string(),
34788 DataType::Date => "DATE".to_string(),
34789 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
34790 DataType::Struct { fields, .. } => {
34791 let field_strs: Vec<String> = fields
34792 .iter()
34793 .map(|f| {
34794 format!(
34795 "{} {}",
34796 f.name,
34797 Self::data_type_to_presto_string(&f.data_type)
34798 )
34799 })
34800 .collect();
34801 format!("ROW({})", field_strs.join(", "))
34802 }
34803 DataType::Array { element_type, .. } => {
34804 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
34805 }
34806 DataType::Custom { name } => {
34807 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
34808 name.clone()
34809 }
34810 _ => "VARCHAR".to_string(),
34811 }
34812 }
34813
34814 /// Convert IntervalUnit to string
34815 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
34816 match unit {
34817 crate::expressions::IntervalUnit::Year => "YEAR",
34818 crate::expressions::IntervalUnit::Quarter => "QUARTER",
34819 crate::expressions::IntervalUnit::Month => "MONTH",
34820 crate::expressions::IntervalUnit::Week => "WEEK",
34821 crate::expressions::IntervalUnit::Day => "DAY",
34822 crate::expressions::IntervalUnit::Hour => "HOUR",
34823 crate::expressions::IntervalUnit::Minute => "MINUTE",
34824 crate::expressions::IntervalUnit::Second => "SECOND",
34825 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
34826 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
34827 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
34828 }
34829 }
34830
34831 /// Extract unit string from an expression (uppercased)
34832 fn get_unit_str_static(expr: &Expression) -> String {
34833 use crate::expressions::Literal;
34834 match expr {
34835 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
34836 Expression::Var(v) => v.this.to_ascii_uppercase(),
34837 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34838 let Literal::String(s) = lit.as_ref() else {
34839 unreachable!()
34840 };
34841 s.to_ascii_uppercase()
34842 }
34843 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
34844 Expression::Function(f) => {
34845 let base = f.name.to_ascii_uppercase();
34846 if !f.args.is_empty() {
34847 let inner = Self::get_unit_str_static(&f.args[0]);
34848 format!("{}({})", base, inner)
34849 } else {
34850 base
34851 }
34852 }
34853 _ => "DAY".to_string(),
34854 }
34855 }
34856
34857 /// Parse unit string to IntervalUnit
34858 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
34859 match s {
34860 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
34861 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
34862 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
34863 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
34864 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
34865 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
34866 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
34867 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
34868 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
34869 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
34870 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
34871 _ => crate::expressions::IntervalUnit::Day,
34872 }
34873 }
34874
34875 /// Convert expression to simple string for interval building
34876 fn expr_to_string_static(expr: &Expression) -> String {
34877 use crate::expressions::Literal;
34878 match expr {
34879 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34880 let Literal::Number(s) = lit.as_ref() else {
34881 unreachable!()
34882 };
34883 s.clone()
34884 }
34885 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34886 let Literal::String(s) = lit.as_ref() else {
34887 unreachable!()
34888 };
34889 s.clone()
34890 }
34891 Expression::Identifier(id) => id.name.clone(),
34892 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
34893 _ => "1".to_string(),
34894 }
34895 }
34896
34897 /// Extract a simple string representation from a literal expression
34898 fn expr_to_string(expr: &Expression) -> String {
34899 use crate::expressions::Literal;
34900 match expr {
34901 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34902 let Literal::Number(s) = lit.as_ref() else {
34903 unreachable!()
34904 };
34905 s.clone()
34906 }
34907 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34908 let Literal::String(s) = lit.as_ref() else {
34909 unreachable!()
34910 };
34911 s.clone()
34912 }
34913 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
34914 Expression::Identifier(id) => id.name.clone(),
34915 _ => "1".to_string(),
34916 }
34917 }
34918
34919 /// Quote an interval value expression as a string literal if it's a number (or negated number)
34920 fn quote_interval_val(expr: &Expression) -> Expression {
34921 use crate::expressions::Literal;
34922 match expr {
34923 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34924 let Literal::Number(n) = lit.as_ref() else {
34925 unreachable!()
34926 };
34927 Expression::Literal(Box::new(Literal::String(n.clone())))
34928 }
34929 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
34930 Expression::Neg(inner) => {
34931 if let Expression::Literal(lit) = &inner.this {
34932 if let Literal::Number(n) = lit.as_ref() {
34933 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
34934 } else {
34935 inner.this.clone()
34936 }
34937 } else {
34938 expr.clone()
34939 }
34940 }
34941 _ => expr.clone(),
34942 }
34943 }
34944
34945 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
34946 fn timestamp_string_has_timezone(ts: &str) -> bool {
34947 let trimmed = ts.trim();
34948 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
34949 if let Some(last_space) = trimmed.rfind(' ') {
34950 let suffix = &trimmed[last_space + 1..];
34951 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
34952 let rest = &suffix[1..];
34953 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
34954 return true;
34955 }
34956 }
34957 }
34958 // Check for named timezone abbreviations
34959 let ts_lower = trimmed.to_ascii_lowercase();
34960 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
34961 for abbrev in &tz_abbrevs {
34962 if ts_lower.ends_with(abbrev) {
34963 return true;
34964 }
34965 }
34966 false
34967 }
34968
34969 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
34970 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
34971 use crate::expressions::{Cast, DataType, Literal};
34972 match expr {
34973 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
34974 let Literal::Timestamp(s) = lit.as_ref() else {
34975 unreachable!()
34976 };
34977 let tz = func_name.starts_with("TIMESTAMP");
34978 Expression::Cast(Box::new(Cast {
34979 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34980 to: if tz {
34981 DataType::Timestamp {
34982 timezone: true,
34983 precision: None,
34984 }
34985 } else {
34986 DataType::Timestamp {
34987 timezone: false,
34988 precision: None,
34989 }
34990 },
34991 trailing_comments: vec![],
34992 double_colon_syntax: false,
34993 format: None,
34994 default: None,
34995 inferred_type: None,
34996 }))
34997 }
34998 other => other,
34999 }
35000 }
35001
35002 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
35003 fn maybe_cast_ts(expr: Expression) -> Expression {
35004 use crate::expressions::{Cast, DataType, Literal};
35005 match expr {
35006 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35007 let Literal::Timestamp(s) = lit.as_ref() else {
35008 unreachable!()
35009 };
35010 Expression::Cast(Box::new(Cast {
35011 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35012 to: DataType::Timestamp {
35013 timezone: false,
35014 precision: None,
35015 },
35016 trailing_comments: vec![],
35017 double_colon_syntax: false,
35018 format: None,
35019 default: None,
35020 inferred_type: None,
35021 }))
35022 }
35023 other => other,
35024 }
35025 }
35026
35027 /// Convert DATE 'x' literal to CAST('x' AS DATE)
35028 fn date_literal_to_cast(expr: Expression) -> Expression {
35029 use crate::expressions::{Cast, DataType, Literal};
35030 match expr {
35031 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35032 let Literal::Date(s) = lit.as_ref() else {
35033 unreachable!()
35034 };
35035 Expression::Cast(Box::new(Cast {
35036 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35037 to: DataType::Date,
35038 trailing_comments: vec![],
35039 double_colon_syntax: false,
35040 format: None,
35041 default: None,
35042 inferred_type: None,
35043 }))
35044 }
35045 other => other,
35046 }
35047 }
35048
35049 /// Ensure an expression that should be a date is CAST(... AS DATE).
35050 /// Handles both DATE literals and string literals that look like dates.
35051 fn ensure_cast_date(expr: Expression) -> Expression {
35052 use crate::expressions::{Cast, DataType, Literal};
35053 match expr {
35054 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35055 let Literal::Date(s) = lit.as_ref() else {
35056 unreachable!()
35057 };
35058 Expression::Cast(Box::new(Cast {
35059 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35060 to: DataType::Date,
35061 trailing_comments: vec![],
35062 double_colon_syntax: false,
35063 format: None,
35064 default: None,
35065 inferred_type: None,
35066 }))
35067 }
35068 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35069 // String literal that should be a date -> CAST('s' AS DATE)
35070 Expression::Cast(Box::new(Cast {
35071 this: expr,
35072 to: DataType::Date,
35073 trailing_comments: vec![],
35074 double_colon_syntax: false,
35075 format: None,
35076 default: None,
35077 inferred_type: None,
35078 }))
35079 }
35080 // Already a CAST or other expression -> leave as-is
35081 other => other,
35082 }
35083 }
35084
35085 /// Force CAST(expr AS DATE) for any expression (not just literals)
35086 /// Skips if the expression is already a CAST to DATE
35087 fn force_cast_date(expr: Expression) -> Expression {
35088 use crate::expressions::{Cast, DataType};
35089 // If it's already a CAST to DATE, don't double-wrap
35090 if let Expression::Cast(ref c) = expr {
35091 if matches!(c.to, DataType::Date) {
35092 return expr;
35093 }
35094 }
35095 Expression::Cast(Box::new(Cast {
35096 this: expr,
35097 to: DataType::Date,
35098 trailing_comments: vec![],
35099 double_colon_syntax: false,
35100 format: None,
35101 default: None,
35102 inferred_type: None,
35103 }))
35104 }
35105
35106 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
35107 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
35108 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
35109 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
35110
35111 fn ensure_to_date_preserved(expr: Expression) -> Expression {
35112 use crate::expressions::{Function, Literal};
35113 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
35114 {
35115 Expression::Function(Box::new(Function::new(
35116 Self::PRESERVED_TO_DATE.to_string(),
35117 vec![expr],
35118 )))
35119 } else {
35120 expr
35121 }
35122 }
35123
35124 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
35125 fn try_cast_date(expr: Expression) -> Expression {
35126 use crate::expressions::{Cast, DataType};
35127 Expression::TryCast(Box::new(Cast {
35128 this: expr,
35129 to: DataType::Date,
35130 trailing_comments: vec![],
35131 double_colon_syntax: false,
35132 format: None,
35133 default: None,
35134 inferred_type: None,
35135 }))
35136 }
35137
35138 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
35139 fn double_cast_timestamp_date(expr: Expression) -> Expression {
35140 use crate::expressions::{Cast, DataType};
35141 let inner = Expression::Cast(Box::new(Cast {
35142 this: expr,
35143 to: DataType::Timestamp {
35144 timezone: false,
35145 precision: None,
35146 },
35147 trailing_comments: vec![],
35148 double_colon_syntax: false,
35149 format: None,
35150 default: None,
35151 inferred_type: None,
35152 }));
35153 Expression::Cast(Box::new(Cast {
35154 this: inner,
35155 to: DataType::Date,
35156 trailing_comments: vec![],
35157 double_colon_syntax: false,
35158 format: None,
35159 default: None,
35160 inferred_type: None,
35161 }))
35162 }
35163
35164 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
35165 fn double_cast_datetime_date(expr: Expression) -> Expression {
35166 use crate::expressions::{Cast, DataType};
35167 let inner = Expression::Cast(Box::new(Cast {
35168 this: expr,
35169 to: DataType::Custom {
35170 name: "DATETIME".to_string(),
35171 },
35172 trailing_comments: vec![],
35173 double_colon_syntax: false,
35174 format: None,
35175 default: None,
35176 inferred_type: None,
35177 }));
35178 Expression::Cast(Box::new(Cast {
35179 this: inner,
35180 to: DataType::Date,
35181 trailing_comments: vec![],
35182 double_colon_syntax: false,
35183 format: None,
35184 default: None,
35185 inferred_type: None,
35186 }))
35187 }
35188
35189 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
35190 fn double_cast_datetime2_date(expr: Expression) -> Expression {
35191 use crate::expressions::{Cast, DataType};
35192 let inner = Expression::Cast(Box::new(Cast {
35193 this: expr,
35194 to: DataType::Custom {
35195 name: "DATETIME2".to_string(),
35196 },
35197 trailing_comments: vec![],
35198 double_colon_syntax: false,
35199 format: None,
35200 default: None,
35201 inferred_type: None,
35202 }));
35203 Expression::Cast(Box::new(Cast {
35204 this: inner,
35205 to: DataType::Date,
35206 trailing_comments: vec![],
35207 double_colon_syntax: false,
35208 format: None,
35209 default: None,
35210 inferred_type: None,
35211 }))
35212 }
35213
35214 /// Convert Hive/Java-style date format strings to C-style (strftime) format
35215 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
35216 fn hive_format_to_c_format(fmt: &str) -> String {
35217 let mut result = String::new();
35218 let chars: Vec<char> = fmt.chars().collect();
35219 let mut i = 0;
35220 while i < chars.len() {
35221 match chars[i] {
35222 'y' => {
35223 let mut count = 0;
35224 while i < chars.len() && chars[i] == 'y' {
35225 count += 1;
35226 i += 1;
35227 }
35228 if count >= 4 {
35229 result.push_str("%Y");
35230 } else if count == 2 {
35231 result.push_str("%y");
35232 } else {
35233 result.push_str("%Y");
35234 }
35235 }
35236 'M' => {
35237 let mut count = 0;
35238 while i < chars.len() && chars[i] == 'M' {
35239 count += 1;
35240 i += 1;
35241 }
35242 if count >= 3 {
35243 result.push_str("%b");
35244 } else if count == 2 {
35245 result.push_str("%m");
35246 } else {
35247 result.push_str("%m");
35248 }
35249 }
35250 'd' => {
35251 let mut _count = 0;
35252 while i < chars.len() && chars[i] == 'd' {
35253 _count += 1;
35254 i += 1;
35255 }
35256 result.push_str("%d");
35257 }
35258 'H' => {
35259 let mut _count = 0;
35260 while i < chars.len() && chars[i] == 'H' {
35261 _count += 1;
35262 i += 1;
35263 }
35264 result.push_str("%H");
35265 }
35266 'h' => {
35267 let mut _count = 0;
35268 while i < chars.len() && chars[i] == 'h' {
35269 _count += 1;
35270 i += 1;
35271 }
35272 result.push_str("%I");
35273 }
35274 'm' => {
35275 let mut _count = 0;
35276 while i < chars.len() && chars[i] == 'm' {
35277 _count += 1;
35278 i += 1;
35279 }
35280 result.push_str("%M");
35281 }
35282 's' => {
35283 let mut _count = 0;
35284 while i < chars.len() && chars[i] == 's' {
35285 _count += 1;
35286 i += 1;
35287 }
35288 result.push_str("%S");
35289 }
35290 'S' => {
35291 // Fractional seconds - skip
35292 while i < chars.len() && chars[i] == 'S' {
35293 i += 1;
35294 }
35295 result.push_str("%f");
35296 }
35297 'a' => {
35298 // AM/PM
35299 while i < chars.len() && chars[i] == 'a' {
35300 i += 1;
35301 }
35302 result.push_str("%p");
35303 }
35304 'E' => {
35305 let mut count = 0;
35306 while i < chars.len() && chars[i] == 'E' {
35307 count += 1;
35308 i += 1;
35309 }
35310 if count >= 4 {
35311 result.push_str("%A");
35312 } else {
35313 result.push_str("%a");
35314 }
35315 }
35316 '\'' => {
35317 // Quoted literal text - pass through the quotes and content
35318 result.push('\'');
35319 i += 1;
35320 while i < chars.len() && chars[i] != '\'' {
35321 result.push(chars[i]);
35322 i += 1;
35323 }
35324 if i < chars.len() {
35325 result.push('\'');
35326 i += 1;
35327 }
35328 }
35329 c => {
35330 result.push(c);
35331 i += 1;
35332 }
35333 }
35334 }
35335 result
35336 }
35337
35338 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
35339 fn hive_format_to_presto_format(fmt: &str) -> String {
35340 let c_fmt = Self::hive_format_to_c_format(fmt);
35341 // Presto uses %T for HH:MM:SS
35342 c_fmt.replace("%H:%M:%S", "%T")
35343 }
35344
35345 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
35346 fn ensure_cast_timestamp(expr: Expression) -> Expression {
35347 use crate::expressions::{Cast, DataType, Literal};
35348 match expr {
35349 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35350 let Literal::Timestamp(s) = lit.as_ref() else {
35351 unreachable!()
35352 };
35353 Expression::Cast(Box::new(Cast {
35354 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35355 to: DataType::Timestamp {
35356 timezone: false,
35357 precision: None,
35358 },
35359 trailing_comments: vec![],
35360 double_colon_syntax: false,
35361 format: None,
35362 default: None,
35363 inferred_type: None,
35364 }))
35365 }
35366 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35367 Expression::Cast(Box::new(Cast {
35368 this: expr,
35369 to: DataType::Timestamp {
35370 timezone: false,
35371 precision: None,
35372 },
35373 trailing_comments: vec![],
35374 double_colon_syntax: false,
35375 format: None,
35376 default: None,
35377 inferred_type: None,
35378 }))
35379 }
35380 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35381 let Literal::Datetime(s) = lit.as_ref() else {
35382 unreachable!()
35383 };
35384 Expression::Cast(Box::new(Cast {
35385 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35386 to: DataType::Timestamp {
35387 timezone: false,
35388 precision: None,
35389 },
35390 trailing_comments: vec![],
35391 double_colon_syntax: false,
35392 format: None,
35393 default: None,
35394 inferred_type: None,
35395 }))
35396 }
35397 other => other,
35398 }
35399 }
35400
35401 /// Force CAST to TIMESTAMP for any expression (not just literals)
35402 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
35403 fn force_cast_timestamp(expr: Expression) -> Expression {
35404 use crate::expressions::{Cast, DataType};
35405 // Don't double-wrap if already a CAST to TIMESTAMP
35406 if let Expression::Cast(ref c) = expr {
35407 if matches!(c.to, DataType::Timestamp { .. }) {
35408 return expr;
35409 }
35410 }
35411 Expression::Cast(Box::new(Cast {
35412 this: expr,
35413 to: DataType::Timestamp {
35414 timezone: false,
35415 precision: None,
35416 },
35417 trailing_comments: vec![],
35418 double_colon_syntax: false,
35419 format: None,
35420 default: None,
35421 inferred_type: None,
35422 }))
35423 }
35424
35425 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
35426 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
35427 use crate::expressions::{Cast, DataType, Literal};
35428 match expr {
35429 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35430 let Literal::Timestamp(s) = lit.as_ref() else {
35431 unreachable!()
35432 };
35433 Expression::Cast(Box::new(Cast {
35434 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35435 to: DataType::Timestamp {
35436 timezone: true,
35437 precision: None,
35438 },
35439 trailing_comments: vec![],
35440 double_colon_syntax: false,
35441 format: None,
35442 default: None,
35443 inferred_type: None,
35444 }))
35445 }
35446 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35447 Expression::Cast(Box::new(Cast {
35448 this: expr,
35449 to: DataType::Timestamp {
35450 timezone: true,
35451 precision: None,
35452 },
35453 trailing_comments: vec![],
35454 double_colon_syntax: false,
35455 format: None,
35456 default: None,
35457 inferred_type: None,
35458 }))
35459 }
35460 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35461 let Literal::Datetime(s) = lit.as_ref() else {
35462 unreachable!()
35463 };
35464 Expression::Cast(Box::new(Cast {
35465 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35466 to: DataType::Timestamp {
35467 timezone: true,
35468 precision: None,
35469 },
35470 trailing_comments: vec![],
35471 double_colon_syntax: false,
35472 format: None,
35473 default: None,
35474 inferred_type: None,
35475 }))
35476 }
35477 other => other,
35478 }
35479 }
35480
35481 /// Ensure expression is CAST to DATETIME (for BigQuery)
35482 fn ensure_cast_datetime(expr: Expression) -> Expression {
35483 use crate::expressions::{Cast, DataType, Literal};
35484 match expr {
35485 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35486 Expression::Cast(Box::new(Cast {
35487 this: expr,
35488 to: DataType::Custom {
35489 name: "DATETIME".to_string(),
35490 },
35491 trailing_comments: vec![],
35492 double_colon_syntax: false,
35493 format: None,
35494 default: None,
35495 inferred_type: None,
35496 }))
35497 }
35498 other => other,
35499 }
35500 }
35501
35502 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
35503 fn force_cast_datetime(expr: Expression) -> Expression {
35504 use crate::expressions::{Cast, DataType};
35505 if let Expression::Cast(ref c) = expr {
35506 if let DataType::Custom { ref name } = c.to {
35507 if name.eq_ignore_ascii_case("DATETIME") {
35508 return expr;
35509 }
35510 }
35511 }
35512 Expression::Cast(Box::new(Cast {
35513 this: expr,
35514 to: DataType::Custom {
35515 name: "DATETIME".to_string(),
35516 },
35517 trailing_comments: vec![],
35518 double_colon_syntax: false,
35519 format: None,
35520 default: None,
35521 inferred_type: None,
35522 }))
35523 }
35524
35525 /// Ensure expression is CAST to DATETIME2 (for TSQL)
35526 fn ensure_cast_datetime2(expr: Expression) -> Expression {
35527 use crate::expressions::{Cast, DataType, Literal};
35528 match expr {
35529 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35530 Expression::Cast(Box::new(Cast {
35531 this: expr,
35532 to: DataType::Custom {
35533 name: "DATETIME2".to_string(),
35534 },
35535 trailing_comments: vec![],
35536 double_colon_syntax: false,
35537 format: None,
35538 default: None,
35539 inferred_type: None,
35540 }))
35541 }
35542 other => other,
35543 }
35544 }
35545
35546 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
35547 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
35548 use crate::expressions::{Cast, DataType, Literal};
35549 match expr {
35550 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35551 let Literal::Timestamp(s) = lit.as_ref() else {
35552 unreachable!()
35553 };
35554 Expression::Cast(Box::new(Cast {
35555 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35556 to: DataType::Timestamp {
35557 timezone: true,
35558 precision: None,
35559 },
35560 trailing_comments: vec![],
35561 double_colon_syntax: false,
35562 format: None,
35563 default: None,
35564 inferred_type: None,
35565 }))
35566 }
35567 other => other,
35568 }
35569 }
35570
35571 /// Convert BigQuery format string to Snowflake format string
35572 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
35573 use crate::expressions::Literal;
35574 if let Expression::Literal(lit) = format_expr {
35575 if let Literal::String(s) = lit.as_ref() {
35576 let sf = s
35577 .replace("%Y", "yyyy")
35578 .replace("%m", "mm")
35579 .replace("%d", "DD")
35580 .replace("%H", "HH24")
35581 .replace("%M", "MI")
35582 .replace("%S", "SS")
35583 .replace("%b", "mon")
35584 .replace("%B", "Month")
35585 .replace("%e", "FMDD");
35586 Expression::Literal(Box::new(Literal::String(sf)))
35587 } else {
35588 format_expr.clone()
35589 }
35590 } else {
35591 format_expr.clone()
35592 }
35593 }
35594
35595 /// Convert BigQuery format string to DuckDB format string
35596 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
35597 use crate::expressions::Literal;
35598 if let Expression::Literal(lit) = format_expr {
35599 if let Literal::String(s) = lit.as_ref() {
35600 let duck = s
35601 .replace("%T", "%H:%M:%S")
35602 .replace("%F", "%Y-%m-%d")
35603 .replace("%D", "%m/%d/%y")
35604 .replace("%x", "%m/%d/%y")
35605 .replace("%c", "%a %b %-d %H:%M:%S %Y")
35606 .replace("%e", "%-d")
35607 .replace("%E6S", "%S.%f");
35608 Expression::Literal(Box::new(Literal::String(duck)))
35609 } else {
35610 format_expr.clone()
35611 }
35612 } else {
35613 format_expr.clone()
35614 }
35615 }
35616
35617 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
35618 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
35619 use crate::expressions::Literal;
35620 if let Expression::Literal(lit) = format_expr {
35621 if let Literal::String(s) = lit.as_ref() {
35622 // Replace format elements from longest to shortest to avoid partial matches
35623 let result = s
35624 .replace("YYYYMMDD", "%Y%m%d")
35625 .replace("YYYY", "%Y")
35626 .replace("YY", "%y")
35627 .replace("MONTH", "%B")
35628 .replace("MON", "%b")
35629 .replace("MM", "%m")
35630 .replace("DD", "%d")
35631 .replace("HH24", "%H")
35632 .replace("HH12", "%I")
35633 .replace("HH", "%I")
35634 .replace("MI", "%M")
35635 .replace("SSTZH", "%S%z")
35636 .replace("SS", "%S")
35637 .replace("TZH", "%z");
35638 Expression::Literal(Box::new(Literal::String(result)))
35639 } else {
35640 format_expr.clone()
35641 }
35642 } else {
35643 format_expr.clone()
35644 }
35645 }
35646
35647 /// Normalize BigQuery format strings for BQ->BQ output
35648 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
35649 use crate::expressions::Literal;
35650 if let Expression::Literal(lit) = format_expr {
35651 if let Literal::String(s) = lit.as_ref() {
35652 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
35653 Expression::Literal(Box::new(Literal::String(norm)))
35654 } else {
35655 format_expr.clone()
35656 }
35657 } else {
35658 format_expr.clone()
35659 }
35660 }
35661}
35662
35663#[cfg(test)]
35664mod tests {
35665 use super::*;
35666
35667 #[test]
35668 fn test_dialect_type_from_str() {
35669 assert_eq!(
35670 "postgres".parse::<DialectType>().unwrap(),
35671 DialectType::PostgreSQL
35672 );
35673 assert_eq!(
35674 "postgresql".parse::<DialectType>().unwrap(),
35675 DialectType::PostgreSQL
35676 );
35677 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
35678 assert_eq!(
35679 "bigquery".parse::<DialectType>().unwrap(),
35680 DialectType::BigQuery
35681 );
35682 }
35683
35684 #[test]
35685 fn test_basic_transpile() {
35686 let dialect = Dialect::get(DialectType::Generic);
35687 let result = dialect
35688 .transpile_to("SELECT 1", DialectType::PostgreSQL)
35689 .unwrap();
35690 assert_eq!(result.len(), 1);
35691 assert_eq!(result[0], "SELECT 1");
35692 }
35693
35694 #[test]
35695 fn test_function_transformation_mysql() {
35696 // NVL should be transformed to IFNULL in MySQL
35697 let dialect = Dialect::get(DialectType::Generic);
35698 let result = dialect
35699 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
35700 .unwrap();
35701 assert_eq!(result[0], "SELECT IFNULL(a, b)");
35702 }
35703
35704 #[test]
35705 fn test_get_path_duckdb() {
35706 // Test: step by step
35707 let snowflake = Dialect::get(DialectType::Snowflake);
35708
35709 // Step 1: Parse and check what Snowflake produces as intermediate
35710 let result_sf_sf = snowflake
35711 .transpile_to(
35712 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35713 DialectType::Snowflake,
35714 )
35715 .unwrap();
35716 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
35717
35718 // Step 2: DuckDB target
35719 let result_sf_dk = snowflake
35720 .transpile_to(
35721 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35722 DialectType::DuckDB,
35723 )
35724 .unwrap();
35725 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
35726
35727 // Step 3: GET_PATH directly
35728 let result_gp = snowflake
35729 .transpile_to(
35730 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
35731 DialectType::DuckDB,
35732 )
35733 .unwrap();
35734 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
35735 }
35736
35737 #[test]
35738 fn test_function_transformation_postgres() {
35739 // IFNULL should be transformed to COALESCE in PostgreSQL
35740 let dialect = Dialect::get(DialectType::Generic);
35741 let result = dialect
35742 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
35743 .unwrap();
35744 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35745
35746 // NVL should also be transformed to COALESCE
35747 let result = dialect
35748 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
35749 .unwrap();
35750 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35751 }
35752
35753 #[test]
35754 fn test_hive_cast_to_trycast() {
35755 // Hive CAST should become TRY_CAST for targets that support it
35756 let hive = Dialect::get(DialectType::Hive);
35757 let result = hive
35758 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
35759 .unwrap();
35760 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
35761
35762 let result = hive
35763 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
35764 .unwrap();
35765 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
35766 }
35767
35768 #[test]
35769 fn test_hive_array_identity() {
35770 // Hive ARRAY<DATE> should preserve angle bracket syntax
35771 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
35772 let hive = Dialect::get(DialectType::Hive);
35773
35774 // Test via transpile_to (this works)
35775 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
35776 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
35777 assert!(
35778 result[0].contains("ARRAY<DATE>"),
35779 "transpile_to: Expected ARRAY<DATE>, got: {}",
35780 result[0]
35781 );
35782
35783 // Test via parse -> transform -> generate (identity test path)
35784 let ast = hive.parse(sql).unwrap();
35785 let transformed = hive.transform(ast[0].clone()).unwrap();
35786 let output = hive.generate(&transformed).unwrap();
35787 eprintln!("Hive ARRAY via identity path: {}", output);
35788 assert!(
35789 output.contains("ARRAY<DATE>"),
35790 "identity path: Expected ARRAY<DATE>, got: {}",
35791 output
35792 );
35793 }
35794
35795 #[test]
35796 fn test_starrocks_delete_between_expansion() {
35797 // StarRocks doesn't support BETWEEN in DELETE statements
35798 let dialect = Dialect::get(DialectType::Generic);
35799
35800 // BETWEEN should be expanded to >= AND <= in DELETE
35801 let result = dialect
35802 .transpile_to(
35803 "DELETE FROM t WHERE a BETWEEN b AND c",
35804 DialectType::StarRocks,
35805 )
35806 .unwrap();
35807 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
35808
35809 // NOT BETWEEN should be expanded to < OR > in DELETE
35810 let result = dialect
35811 .transpile_to(
35812 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
35813 DialectType::StarRocks,
35814 )
35815 .unwrap();
35816 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
35817
35818 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
35819 let result = dialect
35820 .transpile_to(
35821 "SELECT * FROM t WHERE a BETWEEN b AND c",
35822 DialectType::StarRocks,
35823 )
35824 .unwrap();
35825 assert!(
35826 result[0].contains("BETWEEN"),
35827 "BETWEEN should be preserved in SELECT"
35828 );
35829 }
35830
35831 #[test]
35832 fn test_snowflake_ltrim_rtrim_parse() {
35833 let sf = Dialect::get(DialectType::Snowflake);
35834 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
35835 let result = sf.transpile_to(sql, DialectType::DuckDB);
35836 match &result {
35837 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
35838 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
35839 }
35840 assert!(
35841 result.is_ok(),
35842 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
35843 result.err()
35844 );
35845 }
35846
35847 #[test]
35848 fn test_duckdb_count_if_parse() {
35849 let duck = Dialect::get(DialectType::DuckDB);
35850 let sql = "COUNT_IF(x)";
35851 let result = duck.transpile_to(sql, DialectType::DuckDB);
35852 match &result {
35853 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
35854 Err(e) => eprintln!("COUNT_IF error: {}", e),
35855 }
35856 assert!(
35857 result.is_ok(),
35858 "Expected successful parse of COUNT_IF(x), got error: {:?}",
35859 result.err()
35860 );
35861 }
35862
35863 #[test]
35864 fn test_tsql_cast_tinyint_parse() {
35865 let tsql = Dialect::get(DialectType::TSQL);
35866 let sql = "CAST(X AS TINYINT)";
35867 let result = tsql.transpile_to(sql, DialectType::DuckDB);
35868 match &result {
35869 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
35870 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
35871 }
35872 assert!(
35873 result.is_ok(),
35874 "Expected successful transpile, got error: {:?}",
35875 result.err()
35876 );
35877 }
35878
35879 #[test]
35880 fn test_pg_hash_bitwise_xor() {
35881 let dialect = Dialect::get(DialectType::PostgreSQL);
35882 let result = dialect
35883 .transpile_to("x # y", DialectType::PostgreSQL)
35884 .unwrap();
35885 assert_eq!(result[0], "x # y");
35886 }
35887
35888 #[test]
35889 fn test_pg_array_to_duckdb() {
35890 let dialect = Dialect::get(DialectType::PostgreSQL);
35891 let result = dialect
35892 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
35893 .unwrap();
35894 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
35895 }
35896
35897 #[test]
35898 fn test_array_remove_bigquery() {
35899 let dialect = Dialect::get(DialectType::Generic);
35900 let result = dialect
35901 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
35902 .unwrap();
35903 assert_eq!(
35904 result[0],
35905 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
35906 );
35907 }
35908
35909 #[test]
35910 fn test_map_clickhouse_case() {
35911 let dialect = Dialect::get(DialectType::Generic);
35912 let parsed = dialect
35913 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
35914 .unwrap();
35915 eprintln!("MAP parsed: {:?}", parsed);
35916 let result = dialect
35917 .transpile_to(
35918 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
35919 DialectType::ClickHouse,
35920 )
35921 .unwrap();
35922 eprintln!("MAP result: {}", result[0]);
35923 }
35924
35925 #[test]
35926 fn test_generate_date_array_presto() {
35927 let dialect = Dialect::get(DialectType::Generic);
35928 let result = dialect.transpile_to(
35929 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35930 DialectType::Presto,
35931 ).unwrap();
35932 eprintln!("GDA -> Presto: {}", result[0]);
35933 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
35934 }
35935
35936 #[test]
35937 fn test_generate_date_array_postgres() {
35938 let dialect = Dialect::get(DialectType::Generic);
35939 let result = dialect.transpile_to(
35940 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35941 DialectType::PostgreSQL,
35942 ).unwrap();
35943 eprintln!("GDA -> PostgreSQL: {}", result[0]);
35944 }
35945
35946 #[test]
35947 fn test_generate_date_array_snowflake() {
35948 std::thread::Builder::new()
35949 .stack_size(16 * 1024 * 1024)
35950 .spawn(|| {
35951 let dialect = Dialect::get(DialectType::Generic);
35952 let result = dialect.transpile_to(
35953 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35954 DialectType::Snowflake,
35955 ).unwrap();
35956 eprintln!("GDA -> Snowflake: {}", result[0]);
35957 })
35958 .unwrap()
35959 .join()
35960 .unwrap();
35961 }
35962
35963 #[test]
35964 fn test_array_length_generate_date_array_snowflake() {
35965 let dialect = Dialect::get(DialectType::Generic);
35966 let result = dialect.transpile_to(
35967 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35968 DialectType::Snowflake,
35969 ).unwrap();
35970 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
35971 }
35972
35973 #[test]
35974 fn test_generate_date_array_mysql() {
35975 let dialect = Dialect::get(DialectType::Generic);
35976 let result = dialect.transpile_to(
35977 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35978 DialectType::MySQL,
35979 ).unwrap();
35980 eprintln!("GDA -> MySQL: {}", result[0]);
35981 }
35982
35983 #[test]
35984 fn test_generate_date_array_redshift() {
35985 let dialect = Dialect::get(DialectType::Generic);
35986 let result = dialect.transpile_to(
35987 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35988 DialectType::Redshift,
35989 ).unwrap();
35990 eprintln!("GDA -> Redshift: {}", result[0]);
35991 }
35992
35993 #[test]
35994 fn test_generate_date_array_tsql() {
35995 let dialect = Dialect::get(DialectType::Generic);
35996 let result = dialect.transpile_to(
35997 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35998 DialectType::TSQL,
35999 ).unwrap();
36000 eprintln!("GDA -> TSQL: {}", result[0]);
36001 }
36002
36003 #[test]
36004 fn test_struct_colon_syntax() {
36005 let dialect = Dialect::get(DialectType::Generic);
36006 // Test without colon first
36007 let result = dialect.transpile_to(
36008 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
36009 DialectType::ClickHouse,
36010 );
36011 match result {
36012 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
36013 Err(e) => eprintln!("STRUCT no colon error: {}", e),
36014 }
36015 // Now test with colon
36016 let result = dialect.transpile_to(
36017 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
36018 DialectType::ClickHouse,
36019 );
36020 match result {
36021 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
36022 Err(e) => eprintln!("STRUCT colon error: {}", e),
36023 }
36024 }
36025
36026 #[test]
36027 fn test_generate_date_array_cte_wrapped_mysql() {
36028 let dialect = Dialect::get(DialectType::Generic);
36029 let result = dialect.transpile_to(
36030 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36031 DialectType::MySQL,
36032 ).unwrap();
36033 eprintln!("GDA CTE -> MySQL: {}", result[0]);
36034 }
36035
36036 #[test]
36037 fn test_generate_date_array_cte_wrapped_tsql() {
36038 let dialect = Dialect::get(DialectType::Generic);
36039 let result = dialect.transpile_to(
36040 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36041 DialectType::TSQL,
36042 ).unwrap();
36043 eprintln!("GDA CTE -> TSQL: {}", result[0]);
36044 }
36045
36046 #[test]
36047 fn test_decode_literal_no_null_check() {
36048 // Oracle DECODE with all literals should produce simple equality, no IS NULL
36049 let dialect = Dialect::get(DialectType::Oracle);
36050 let result = dialect
36051 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
36052 .unwrap();
36053 assert_eq!(
36054 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
36055 "Literal DECODE should not have IS NULL checks"
36056 );
36057 }
36058
36059 #[test]
36060 fn test_decode_column_vs_literal_no_null_check() {
36061 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
36062 let dialect = Dialect::get(DialectType::Oracle);
36063 let result = dialect
36064 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
36065 .unwrap();
36066 assert_eq!(
36067 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
36068 "Column vs literal DECODE should not have IS NULL checks"
36069 );
36070 }
36071
36072 #[test]
36073 fn test_decode_column_vs_column_keeps_null_check() {
36074 // Oracle DECODE with column vs column should keep null-safe comparison
36075 let dialect = Dialect::get(DialectType::Oracle);
36076 let result = dialect
36077 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
36078 .unwrap();
36079 assert!(
36080 result[0].contains("IS NULL"),
36081 "Column vs column DECODE should have IS NULL checks, got: {}",
36082 result[0]
36083 );
36084 }
36085
36086 #[test]
36087 fn test_decode_null_search() {
36088 // Oracle DECODE with NULL search should use IS NULL
36089 let dialect = Dialect::get(DialectType::Oracle);
36090 let result = dialect
36091 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
36092 .unwrap();
36093 assert_eq!(
36094 result[0],
36095 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
36096 );
36097 }
36098
36099 // =========================================================================
36100 // REGEXP function transpilation tests
36101 // =========================================================================
36102
36103 #[test]
36104 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
36105 let dialect = Dialect::get(DialectType::Snowflake);
36106 let result = dialect
36107 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
36108 .unwrap();
36109 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36110 }
36111
36112 #[test]
36113 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
36114 let dialect = Dialect::get(DialectType::Snowflake);
36115 let result = dialect
36116 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
36117 .unwrap();
36118 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36119 }
36120
36121 #[test]
36122 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
36123 let dialect = Dialect::get(DialectType::Snowflake);
36124 let result = dialect
36125 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
36126 .unwrap();
36127 assert_eq!(
36128 result[0],
36129 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
36130 );
36131 }
36132
36133 #[test]
36134 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
36135 let dialect = Dialect::get(DialectType::Snowflake);
36136 let result = dialect
36137 .transpile_to(
36138 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
36139 DialectType::DuckDB,
36140 )
36141 .unwrap();
36142 assert_eq!(
36143 result[0],
36144 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
36145 );
36146 }
36147
36148 #[test]
36149 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
36150 let dialect = Dialect::get(DialectType::Snowflake);
36151 let result = dialect
36152 .transpile_to(
36153 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
36154 DialectType::DuckDB,
36155 )
36156 .unwrap();
36157 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36158 }
36159
36160 #[test]
36161 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
36162 let dialect = Dialect::get(DialectType::Snowflake);
36163 let result = dialect
36164 .transpile_to(
36165 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36166 DialectType::DuckDB,
36167 )
36168 .unwrap();
36169 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36170 }
36171
36172 #[test]
36173 fn test_regexp_substr_snowflake_identity_strip_group0() {
36174 let dialect = Dialect::get(DialectType::Snowflake);
36175 let result = dialect
36176 .transpile_to(
36177 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36178 DialectType::Snowflake,
36179 )
36180 .unwrap();
36181 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
36182 }
36183
36184 #[test]
36185 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
36186 let dialect = Dialect::get(DialectType::Snowflake);
36187 let result = dialect
36188 .transpile_to(
36189 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
36190 DialectType::DuckDB,
36191 )
36192 .unwrap();
36193 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36194 }
36195
36196 #[test]
36197 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
36198 let dialect = Dialect::get(DialectType::Snowflake);
36199 let result = dialect
36200 .transpile_to(
36201 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
36202 DialectType::DuckDB,
36203 )
36204 .unwrap();
36205 assert_eq!(
36206 result[0],
36207 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
36208 );
36209 }
36210
36211 #[test]
36212 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
36213 let dialect = Dialect::get(DialectType::Snowflake);
36214 let result = dialect
36215 .transpile_to(
36216 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
36217 DialectType::DuckDB,
36218 )
36219 .unwrap();
36220 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36221 }
36222
36223 #[test]
36224 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
36225 let dialect = Dialect::get(DialectType::Snowflake);
36226 let result = dialect
36227 .transpile_to(
36228 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36229 DialectType::DuckDB,
36230 )
36231 .unwrap();
36232 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36233 }
36234
36235 #[test]
36236 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
36237 let dialect = Dialect::get(DialectType::Snowflake);
36238 let result = dialect
36239 .transpile_to(
36240 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36241 DialectType::Snowflake,
36242 )
36243 .unwrap();
36244 assert_eq!(
36245 result[0],
36246 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
36247 );
36248 }
36249
36250 #[test]
36251 fn test_regexp_count_snowflake_to_duckdb_2arg() {
36252 let dialect = Dialect::get(DialectType::Snowflake);
36253 let result = dialect
36254 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
36255 .unwrap();
36256 assert_eq!(
36257 result[0],
36258 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
36259 );
36260 }
36261
36262 #[test]
36263 fn test_regexp_count_snowflake_to_duckdb_3arg() {
36264 let dialect = Dialect::get(DialectType::Snowflake);
36265 let result = dialect
36266 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
36267 .unwrap();
36268 assert_eq!(
36269 result[0],
36270 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
36271 );
36272 }
36273
36274 #[test]
36275 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
36276 let dialect = Dialect::get(DialectType::Snowflake);
36277 let result = dialect
36278 .transpile_to(
36279 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
36280 DialectType::DuckDB,
36281 )
36282 .unwrap();
36283 assert_eq!(
36284 result[0],
36285 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
36286 );
36287 }
36288
36289 #[test]
36290 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
36291 let dialect = Dialect::get(DialectType::Snowflake);
36292 let result = dialect
36293 .transpile_to(
36294 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
36295 DialectType::DuckDB,
36296 )
36297 .unwrap();
36298 assert_eq!(
36299 result[0],
36300 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
36301 );
36302 }
36303
36304 #[test]
36305 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
36306 let dialect = Dialect::get(DialectType::Snowflake);
36307 let result = dialect
36308 .transpile_to(
36309 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
36310 DialectType::DuckDB,
36311 )
36312 .unwrap();
36313 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
36314 }
36315
36316 #[test]
36317 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
36318 let dialect = Dialect::get(DialectType::Snowflake);
36319 let result = dialect
36320 .transpile_to(
36321 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
36322 DialectType::DuckDB,
36323 )
36324 .unwrap();
36325 assert_eq!(
36326 result[0],
36327 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
36328 );
36329 }
36330
36331 #[test]
36332 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
36333 let dialect = Dialect::get(DialectType::Snowflake);
36334 let result = dialect
36335 .transpile_to(
36336 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
36337 DialectType::DuckDB,
36338 )
36339 .unwrap();
36340 assert_eq!(
36341 result[0],
36342 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
36343 );
36344 }
36345
36346 #[test]
36347 fn test_rlike_snowflake_to_duckdb_2arg() {
36348 let dialect = Dialect::get(DialectType::Snowflake);
36349 let result = dialect
36350 .transpile_to("SELECT RLIKE(a, b)", DialectType::DuckDB)
36351 .unwrap();
36352 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
36353 }
36354
36355 #[test]
36356 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
36357 let dialect = Dialect::get(DialectType::Snowflake);
36358 let result = dialect
36359 .transpile_to("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
36360 .unwrap();
36361 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
36362 }
36363
36364 #[test]
36365 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
36366 let dialect = Dialect::get(DialectType::BigQuery);
36367 let result = dialect
36368 .transpile_to(
36369 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
36370 DialectType::Snowflake,
36371 )
36372 .unwrap();
36373 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
36374 }
36375
36376 #[test]
36377 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
36378 let dialect = Dialect::get(DialectType::BigQuery);
36379 let result = dialect
36380 .transpile_to(
36381 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
36382 DialectType::Snowflake,
36383 )
36384 .unwrap();
36385 assert_eq!(
36386 result[0],
36387 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
36388 );
36389 }
36390
36391 #[test]
36392 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
36393 let handle = std::thread::Builder::new()
36394 .stack_size(16 * 1024 * 1024)
36395 .spawn(|| {
36396 let dialect = Dialect::get(DialectType::Snowflake);
36397 let result = dialect
36398 .transpile_to("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
36399 .unwrap();
36400 // Should produce a CASE WHEN expression
36401 assert!(
36402 result[0].contains("CASE WHEN"),
36403 "Expected CASE WHEN in result: {}",
36404 result[0]
36405 );
36406 assert!(
36407 result[0].contains("LIST_SUM"),
36408 "Expected LIST_SUM in result: {}",
36409 result[0]
36410 );
36411 })
36412 .unwrap();
36413 handle.join().unwrap();
36414 }
36415
36416 #[test]
36417 fn test_array_except_generic_to_duckdb() {
36418 // Use larger stack to avoid overflow from deeply nested expression Drop
36419 let handle = std::thread::Builder::new()
36420 .stack_size(16 * 1024 * 1024)
36421 .spawn(|| {
36422 let dialect = Dialect::get(DialectType::Generic);
36423 let result = dialect
36424 .transpile_to(
36425 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36426 DialectType::DuckDB,
36427 )
36428 .unwrap();
36429 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
36430 assert!(
36431 result[0].contains("CASE WHEN"),
36432 "Expected CASE WHEN: {}",
36433 result[0]
36434 );
36435 assert!(
36436 result[0].contains("LIST_FILTER"),
36437 "Expected LIST_FILTER: {}",
36438 result[0]
36439 );
36440 assert!(
36441 result[0].contains("LIST_DISTINCT"),
36442 "Expected LIST_DISTINCT: {}",
36443 result[0]
36444 );
36445 assert!(
36446 result[0].contains("IS NOT DISTINCT FROM"),
36447 "Expected IS NOT DISTINCT FROM: {}",
36448 result[0]
36449 );
36450 assert!(
36451 result[0].contains("= 0"),
36452 "Expected = 0 filter: {}",
36453 result[0]
36454 );
36455 })
36456 .unwrap();
36457 handle.join().unwrap();
36458 }
36459
36460 #[test]
36461 fn test_array_except_generic_to_snowflake() {
36462 let dialect = Dialect::get(DialectType::Generic);
36463 let result = dialect
36464 .transpile_to(
36465 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36466 DialectType::Snowflake,
36467 )
36468 .unwrap();
36469 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
36470 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
36471 }
36472
36473 #[test]
36474 fn test_array_except_generic_to_presto() {
36475 let dialect = Dialect::get(DialectType::Generic);
36476 let result = dialect
36477 .transpile_to(
36478 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36479 DialectType::Presto,
36480 )
36481 .unwrap();
36482 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
36483 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
36484 }
36485
36486 #[test]
36487 fn test_array_except_snowflake_to_duckdb() {
36488 let handle = std::thread::Builder::new()
36489 .stack_size(16 * 1024 * 1024)
36490 .spawn(|| {
36491 let dialect = Dialect::get(DialectType::Snowflake);
36492 let result = dialect
36493 .transpile_to("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
36494 .unwrap();
36495 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
36496 assert!(
36497 result[0].contains("CASE WHEN"),
36498 "Expected CASE WHEN: {}",
36499 result[0]
36500 );
36501 assert!(
36502 result[0].contains("LIST_TRANSFORM"),
36503 "Expected LIST_TRANSFORM: {}",
36504 result[0]
36505 );
36506 })
36507 .unwrap();
36508 handle.join().unwrap();
36509 }
36510
36511 #[test]
36512 fn test_array_contains_snowflake_to_snowflake() {
36513 let dialect = Dialect::get(DialectType::Snowflake);
36514 let result = dialect
36515 .transpile_to(
36516 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36517 DialectType::Snowflake,
36518 )
36519 .unwrap();
36520 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
36521 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
36522 }
36523
36524 #[test]
36525 fn test_array_contains_snowflake_to_duckdb() {
36526 let dialect = Dialect::get(DialectType::Snowflake);
36527 let result = dialect
36528 .transpile_to(
36529 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36530 DialectType::DuckDB,
36531 )
36532 .unwrap();
36533 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
36534 assert!(
36535 result[0].contains("CASE WHEN"),
36536 "Expected CASE WHEN: {}",
36537 result[0]
36538 );
36539 assert!(
36540 result[0].contains("NULLIF"),
36541 "Expected NULLIF: {}",
36542 result[0]
36543 );
36544 assert!(
36545 result[0].contains("ARRAY_CONTAINS"),
36546 "Expected ARRAY_CONTAINS: {}",
36547 result[0]
36548 );
36549 }
36550
36551 #[test]
36552 fn test_array_distinct_snowflake_to_duckdb() {
36553 let dialect = Dialect::get(DialectType::Snowflake);
36554 let result = dialect
36555 .transpile_to(
36556 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
36557 DialectType::DuckDB,
36558 )
36559 .unwrap();
36560 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
36561 assert!(
36562 result[0].contains("CASE WHEN"),
36563 "Expected CASE WHEN: {}",
36564 result[0]
36565 );
36566 assert!(
36567 result[0].contains("LIST_DISTINCT"),
36568 "Expected LIST_DISTINCT: {}",
36569 result[0]
36570 );
36571 assert!(
36572 result[0].contains("LIST_APPEND"),
36573 "Expected LIST_APPEND: {}",
36574 result[0]
36575 );
36576 assert!(
36577 result[0].contains("LIST_FILTER"),
36578 "Expected LIST_FILTER: {}",
36579 result[0]
36580 );
36581 }
36582}